South China Morning Post / robots.txt snapshot
← back to scmp.com · fetched 2026-06-20T01:10:30Z (15h ago) · HTTP 200 · 4349 bytes · sha256 3a4d4b15232a97ca · raw
final URL: https://www.scmp.com/robots.txt
| 1 | # |
| 2 | # robots.txt |
| 3 | # |
| 4 | # This file is to prevent the crawling and indexing of certain parts |
| 5 | # of your site by web crawlers and spiders run by sites like Yahoo! |
| 6 | # and Google. By telling these "robots" where not to go on your site, |
| 7 | # you save bandwidth and server resources. |
| 8 | # |
| 9 | # This file will be ignored unless it is at the root of your host: |
| 10 | # Used: http://example.com/robots.txt |
| 11 | # Ignored: http://example.com/site/robots.txt |
| 12 | # |
| 13 | # For more information about the robots.txt standard, see: |
| 14 | # http://www.robotstxt.org/robotstxt.html |
| 15 | # |
| 16 | # For syntax checking, see: |
| 17 | # http://www.sxw.org.uk/computing/robots/check.html |
| 18 | |
| 19 | # AmazonAdBot |
| 20 | User-agent: AmazonAdBot |
| 21 | Allow: / |
| 22 | |
| 23 | User-agent: * |
| 24 | Crawl-delay: 10 |
| 25 | |
| 26 | # PWA |
| 27 | |
| 28 | # Directories |
| 29 | Disallow: /public/ |
| 30 | Disallow: /static/ |
| 31 | # Path |
| 32 | Disallow: /login |
| 33 | Disallow: /signin |
| 34 | Disallow: /register |
| 35 | Disallow: /logout |
| 36 | Disallow: /login/facebook |
| 37 | Disallow: /login/facebook/* |
| 38 | Disallow: /styleguide/* |
| 39 | Disallow: /healthz |
| 40 | Disallow: /.well-known/* |
| 41 | Disallow: /*/firebase-messaging-sw.js |
| 42 | Disallow: /google97d8d43559c9b155.html |
| 43 | Allow: /.well-known/amphtml/apikey.pub |
| 44 | |
| 45 | # CSS, JS, Image |
| 46 | Allow: /static/*.css$ |
| 47 | Allow: /static/*.css? |
| 48 | Allow: /static/*.js$ |
| 49 | Allow: /static/*.js? |
| 50 | Allow: /static/*.gif |
| 51 | Allow: /static/*.jpg |
| 52 | Allow: /static/*.jpeg |
| 53 | Allow: /static/*.png |
| 54 | Allow: /public/*.css$ |
| 55 | Allow: /public/*.css? |
| 56 | Allow: /public/*.js$ |
| 57 | Allow: /public/*.js? |
| 58 | Allow: /public/*.gif |
| 59 | Allow: /public/*.jpg |
| 60 | Allow: /public/*.jpeg |
| 61 | Allow: /public/*.png |
| 62 | |
| 63 | # Directories |
| 64 | Disallow: /includes/ |
| 65 | Disallow: /misc/ |
| 66 | Disallow: /modules/ |
| 67 | Disallow: /profiles/ |
| 68 | Disallow: /scripts/ |
| 69 | Disallow: /themes/ |
| 70 | # Files |
| 71 | Disallow: /CHANGELOG.txt |
| 72 | Disallow: /cron.php |
| 73 | Disallow: /INSTALL.mysql.txt |
| 74 | Disallow: /INSTALL.pgsql.txt |
| 75 | Disallow: /INSTALL.sqlite.txt |
| 76 | Disallow: /install.php |
| 77 | Disallow: /INSTALL.txt |
| 78 | Disallow: /LICENSE.txt |
| 79 | Disallow: /MAINTAINERS.txt |
| 80 | Disallow: /update.php |
| 81 | Disallow: /UPGRADE.txt |
| 82 | Disallow: /xmlrpc.php |
| 83 | Disallow: /sites/default/files/*.pdf |
| 84 | Disallow: /sites/default/files/*.doc |
| 85 | Disallow: /sites/default/files/*.docx |
| 86 | Disallow: /sites/default/files/*.swf |
| 87 | Disallow: /sites/default/files/styles/*.jpg |
| 88 | Disallow: /_next/data/*.json |
| 89 | |
| 90 | # Paths (clean URLs) |
| 91 | Disallow: /admin/ |
| 92 | Disallow: /comment/reply/ |
| 93 | Disallow: /filter/tips/ |
| 94 | Disallow: /node/add/ |
| 95 | Disallow: /user/register/ |
| 96 | Disallow: /user/password/ |
| 97 | Disallow: /user/login/ |
| 98 | Disallow: /user/logout/ |
| 99 | Disallow: *?destination=* |
| 100 | Disallow: /ajax_comments/ |
| 101 | Disallow: /scmp_comments/ |
| 102 | Disallow: *Article_type=* |
| 103 | Disallow: *field_article* |
| 104 | Disallow: /label/ |
| 105 | Disallow: /node/*/nodequeue |
| 106 | Disallow: /node/*/edit |
| 107 | Disallow: /ajax |
| 108 | Disallow: /ajax/* |
| 109 | Disallow: /facebook-instant-articles/feed/* |
| 110 | Disallow: /epaper |
| 111 | Disallow: /epaper/* |
| 112 | Disallow: /story/style/* |
| 113 | |
| 114 | # Paths (no clean URLs) |
| 115 | Disallow: /?q=admin/ |
| 116 | Disallow: /?q=comment/reply/ |
| 117 | Disallow: /?q=filter/tips/ |
| 118 | Disallow: /?q=node/add/ |
| 119 | Disallow: /?q=user/password/ |
| 120 | Disallow: /?q=user/register/ |
| 121 | Disallow: /?q=user/login/ |
| 122 | Disallow: /?q=user/logout/ |
| 123 | Disallow: /?q=node/*/edit |
| 124 | Disallow: /?q=node/*/nodequeue |
| 125 | Disallow: /?q=epaper |
| 126 | Disallow: /?q=epaper/* |
| 127 | Disallow: /?q=facebook-instant-articles/feed/* |
| 128 | |
| 129 | Disallow: /*/logSend$ |
| 130 | Disallow: /*/spmException$ |
| 131 | Disallow: /*/spmact$ |
| 132 | Disallow: /*/antiSpam$ |
| 133 | Disallow: /*/nameStorage$ |
| 134 | Disallow: /*/spmMonitor$ |
| 135 | Disallow: /*/pvData$ |
| 136 | Disallow: /*/goldlog$ |
| 137 | Disallow: /*/initLoad$ |
| 138 | Disallow: /*/beforeUnload$ |
| 139 | Disallow: /*/util$ |
| 140 | Disallow: /*/metaInfo$ |
| 141 | Disallow: /*/beaconBase$ |
| 142 | Disallow: /*/spm$ |
| 143 | Disallow: /*/makeid$ |
| 144 | Disallow: /*/referrer$ |
| 145 | Disallow: /*/pvid$ |
| 146 | Disallow: /*/etag$ |
| 147 | Disallow: /*/iframe$ |
| 148 | Disallow: /*/client$ |
| 149 | Disallow: /*/windvane$ |
| 150 | Disallow: /*/cookie$ |
| 151 | Disallow: /*/sendpv$ |
| 152 | Disallow: /*/personality/index$ |
| 153 | Disallow: /*/misc$ |
| 154 | Disallow: /*/client$ |
| 155 | Disallow: /*/log$ |
| 156 | Disallow: /*/compose$ |
| 157 | Disallow: /*/lib_b/*$ |
| 158 | Disallow: /print/ |
| 159 | Disallow: /?q=print/ |
| 160 | |
| 161 | # NewsNow |
| 162 | User-agent: NewsNow |
| 163 | Allow: /print/ |
| 164 | Allow: /?q=print/ |
| 165 | |
| 166 | # GrapeShot |
| 167 | User-agent: grapeshot |
| 168 | Allow: /*/article/*$ |
| 169 | |
| 170 | # Ads |
| 171 | Disallow: /*?*cid=* |
| 172 | Disallow: /*?*showonlyads=* |
| 173 | Disallow: /*?*nograpeshot=* |
| 174 | Disallow: /*?*noixwrapper=* |
| 175 | Disallow: /*?*nogtm=* |
| 176 | Disallow: /*?*nochartbeat=* |
| 177 | Disallow: /*?*noga=* |
| 178 | Disallow: /*?*nomoatyi=* |
| 179 | Disallow: /*?*nomoat=* |
| 180 | |
| 181 | # Bot score and country |
| 182 | Disallow: /navigator-info |
| 183 | |
| 184 | # Disallow article with query params and everything with campaign for google bot |
| 185 | User-agent: Googlebot |
| 186 | Disallow: /*?*campaign=* |
| 187 | |
| 188 | # Sitemap |
| 189 | |
| 190 | Sitemap: https://www.scmp.com/sitemap/sitemap.xml |
| 191 | Sitemap: https://www.scmp.com/sitemap/archives-0.xml |