NPR / robots.txt snapshot
← back to npr.org · fetched 2026-06-20T01:10:30Z (18h ago) · HTTP 200 · 4657 bytes · sha256 385d71521a5490c6 · raw
final URL: https://www.npr.org/robots.txt
| 1 | # robots.txt for www.npr.org |
| 2 | # Changes are tracked in www-render |
| 3 | |
| 4 | User-agent: * |
| 5 | Disallow: /mpx/ |
| 6 | Disallow: /cgi-bin |
| 7 | Disallow: /ramfiles/ |
| 8 | Disallow: /oauth2/ |
| 9 | Disallow: /account/ |
| 10 | Disallow: /proxy/ |
| 11 | Disallow: /*.smil |
| 12 | Disallow: /*.asx |
| 13 | Disallow: /*.ram |
| 14 | Disallow: /*.wav |
| 15 | Disallow: /*.rmm |
| 16 | Disallow: /*.js |
| 17 | Disallow: /*.au |
| 18 | Disallow: /stations/force/force_localization.php? |
| 19 | Disallow: /rundowns/segment.php? |
| 20 | Disallow: /templates/search/* |
| 21 | Disallow: /2013/03/21/174840895/ |
| 22 | Disallow: /sections/ombudsman/2008/01/frequently_asked_questions_1.html |
| 23 | Disallow: /sections/health-shots/2013/03/11/173816690/new-voices-for-the-voiceless-synthetic-speech-gets-an-upgrade |
| 24 | Disallow: /transcripts/470280334* |
| 25 | Disallow: /2015/07/04/419570939/chasing-memories-in-their-refugee-camp-40-years-after-they-fled-vietnam |
| 26 | Disallow: /transcripts/419570939* |
| 27 | Disallow: /sections/parallels/2016/08/15/480128005/for-french-teens-smoking-still-has-more-allure-than-stigma |
| 28 | Disallow: /transcripts/480128005* |
| 29 | Disallow: /2020/04/08/830237502/episode-989-what-if-no-one-pays-rent |
| 30 | Disallow: /transcripts/830237502* |
| 31 | Disallow: /sections/goatsandsoda/2015/06/09/406744975/a-gender-revolution-hits-the-streets-two-wheels-at-a-time |
| 32 | Disallow: /transcripts/406744975* |
| 33 | Disallow: /sureroute |
| 34 | Disallow: /*/partials* |
| 35 | Disallow: /*?* |
| 36 | Disallow: /proxy/* |
| 37 | Disallow: /player/* |
| 38 | Disallow: /get/* |
| 39 | Disallow: /geolocation |
| 40 | |
| 41 | # Disallowing the OpenAI web crawler |
| 42 | User-agent: GPTBot |
| 43 | Disallow: / |
| 44 | |
| 45 | # Disallowing OpenAI plugins |
| 46 | User-agent: ChatGPT-User |
| 47 | Disallow: / |
| 48 | |
| 49 | # Disallowing Common Crawl |
| 50 | User-agent: CCBot |
| 51 | Disallow: / |
| 52 | |
| 53 | User-agent: OAI-SearchBot |
| 54 | Disallow: / |
| 55 | |
| 56 | # Disallowing Google Bard and Vertex AI web crawlers |
| 57 | User-agent: Google-Extended |
| 58 | Disallow: / |
| 59 | |
| 60 | # Disallowing various bots |
| 61 | User-agent: anthropic-ai |
| 62 | Disallow: / |
| 63 | |
| 64 | User-agent: Applebot-Extended |
| 65 | Disallow: / |
| 66 | |
| 67 | User-agent: Bytespider |
| 68 | Disallow: / |
| 69 | |
| 70 | User-agent: ClaudeBot |
| 71 | Disallow: / |
| 72 | |
| 73 | User-agent: Claude-Web |
| 74 | Disallow: / |
| 75 | |
| 76 | User-agent: cohere-ai |
| 77 | Disallow: / |
| 78 | |
| 79 | User-agent: Diffbot |
| 80 | Disallow: / |
| 81 | |
| 82 | User-agent: FacebookBot |
| 83 | Disallow: / |
| 84 | |
| 85 | User-agent: omgili |
| 86 | Disallow: / |
| 87 | |
| 88 | User-agent: omgilibot |
| 89 | Disallow: / |
| 90 | |
| 91 | User-agent: PerplexityBot |
| 92 | Disallow: / |
| 93 | |
| 94 | User-agent: PerplexityUser |
| 95 | Disallow: / |
| 96 | |
| 97 | # Allow Google Search Console for sitemap crawling |
| 98 | User-agent: Google-InspectionTool |
| 99 | Allow: / |
| 100 | User-agent: Google-Image |
| 101 | Allow: / |
| 102 | User-agent: Google-Video |
| 103 | Allow: / |
| 104 | User-agent: Googlebot |
| 105 | Allow: / |
| 106 | |
| 107 | # Allowing the TTD contextual crawler for Sponsorship |
| 108 | User-agent: TTD-Content |
| 109 | Allow: / |
| 110 | |
| 111 | # Allowing the TTD ads fraud/IVT prevention crawler for Sponsorship |
| 112 | User-agent: Trade Desk ads.txt & sellers.json crawler |
| 113 | Allow: / |
| 114 | |
| 115 | # Allowing the AdsBot-Google crawler for Sponsorship |
| 116 | User-agent: AdsBot-Google |
| 117 | Allow: / |
| 118 | |
| 119 | # Allowing the AdsBot-Google-Mobile crawler for Sponsorship |
| 120 | User-agent: AdsBot-Google-Mobile |
| 121 | Allow: / |
| 122 | |
| 123 | # Allowing the Google-Mediapartners crawler for Sponsorship |
| 124 | User-agent: Mediapartners-Google |
| 125 | Allow: / |
| 126 | |
| 127 | # Allowing the Google-Display-Ads-Bot crawler for Sponsorship |
| 128 | User-agent: Google-Display-Ads-Bot |
| 129 | Allow: / |
| 130 | |
| 131 | # Allowing the IAB tech lab crawler for Sponsorship |
| 132 | User-agent: IAB-Tech-Lab |
| 133 | Allow: / |
| 134 | |
| 135 | # Allowing the IAS crawler for Sponsorship |
| 136 | User-agent: ias_crawler |
| 137 | Allow: / |
| 138 | |
| 139 | # Allowing the IAS Wombles crawler for Sponsorship |
| 140 | User-agent: ias_wombles |
| 141 | Allow: / |
| 142 | |
| 143 | # Allowing the Amazon Standards crawler for Sponsorship |
| 144 | User-agent: Amazon-Advertising-ad-standards-bot/1.0 |
| 145 | Allow: / |
| 146 | # (fetches ads.txt, app-ads.txt, sellers.json) |
| 147 | User-agent: APS-ad-standards-bot/1.0 |
| 148 | Allow: / |
| 149 | |
| 150 | # New requirement as of 3/2/26 to be eligible for demand from Amazon ads. |
| 151 | User-agent: AmazonAdBot |
| 152 | Allow: / |
| 153 | |
| 154 | # Allowing the Audigent crawler for Sponsorship |
| 155 | User-agent: AudigentAdBot |
| 156 | Allow: / |
| 157 | |
| 158 | # Allowing the Concert crawler for Sponsorship |
| 159 | User-agent: Concert/1.0 |
| 160 | Allow: / |
| 161 | |
| 162 | # Allowing Centro/Basis |
| 163 | User-agent: Centro |
| 164 | Allow: / |
| 165 | |
| 166 | # Allowing BidSwitch (AdX DSP) |
| 167 | User-agent: bidswitchbot/1.0 |
| 168 | Allow: / |
| 169 | |
| 170 | # Allowing Roku (AdX DSP) |
| 171 | User-agent: DataXu/1.0 |
| 172 | Allow: / |
| 173 | |
| 174 | # Allowing PubMatic |
| 175 | User-agent: PubMatic |
| 176 | Allow: / |
| 177 | |
| 178 | # Allowing AdForm |
| 179 | User-agent: IAB ATQ team adform robot site.adform.com/general/url-fetcher |
| 180 | Allow: / |
| 181 | |
| 182 | # Allowing StackAdapt |
| 183 | User-agent: StackAdapt |
| 184 | Allow: / |
| 185 | |
| 186 | # Allowing Quantcast |
| 187 | User-agent: Quantcastbot/1.0 |
| 188 | Allow: / |
| 189 | User-agent: Quantcastbot/2.0 |
| 190 | Allow: / |
| 191 | |
| 192 | # Facebook sharing |
| 193 | User-agent: facebookexternalhit |
| 194 | Allow: / |
| 195 | |
| 196 | # Ensures that we're using the correct sitemap. The fact that this is googlecrawl*.npr.org is OK because the crawler will only accept |
| 197 | # URLs in this sitemap to match www*.npr.org |
| 198 | Sitemap: https://googlecrawl.npr.org/standard/sitemap_index.xml |
| 199 | Sitemap: https://googlecrawl.npr.org/news/sitemap_news.xml |
| 200 | Sitemap: https://googlecrawl.npr.org/video/sitemap_index.xml |
| 201 | Sitemap: https://www.npr.org/live-updates/sitemap.xml |