NeuralCrawl

NPR / robots.txt snapshot

← back to npr.org · fetched 2026-06-20T01:10:30Z (18h ago) · HTTP 200 · 4657 bytes · sha256 385d71521a5490c6 · raw

final URL: https://www.npr.org/robots.txt

1# robots.txt for www.npr.org
2# Changes are tracked in www-render
3
4User-agent: *
5Disallow: /mpx/
6Disallow: /cgi-bin
7Disallow: /ramfiles/
8Disallow: /oauth2/
9Disallow: /account/
10Disallow: /proxy/
11Disallow: /*.smil
12Disallow: /*.asx
13Disallow: /*.ram
14Disallow: /*.wav
15Disallow: /*.rmm
16Disallow: /*.js
17Disallow: /*.au
18Disallow: /stations/force/force_localization.php?
19Disallow: /rundowns/segment.php?
20Disallow: /templates/search/*
21Disallow: /2013/03/21/174840895/
22Disallow: /sections/ombudsman/2008/01/frequently_asked_questions_1.html
23Disallow: /sections/health-shots/2013/03/11/173816690/new-voices-for-the-voiceless-synthetic-speech-gets-an-upgrade
24Disallow: /transcripts/470280334*
25Disallow: /2015/07/04/419570939/chasing-memories-in-their-refugee-camp-40-years-after-they-fled-vietnam
26Disallow: /transcripts/419570939*
27Disallow: /sections/parallels/2016/08/15/480128005/for-french-teens-smoking-still-has-more-allure-than-stigma
28Disallow: /transcripts/480128005*
29Disallow: /2020/04/08/830237502/episode-989-what-if-no-one-pays-rent
30Disallow: /transcripts/830237502*
31Disallow: /sections/goatsandsoda/2015/06/09/406744975/a-gender-revolution-hits-the-streets-two-wheels-at-a-time
32Disallow: /transcripts/406744975*
33Disallow: /sureroute
34Disallow: /*/partials*
35Disallow: /*?*
36Disallow: /proxy/*
37Disallow: /player/*
38Disallow: /get/*
39Disallow: /geolocation
40
41# Disallowing the OpenAI web crawler
42User-agent: GPTBot
43Disallow: /
44
45# Disallowing OpenAI plugins
46User-agent: ChatGPT-User
47Disallow: /
48
49# Disallowing Common Crawl
50User-agent: CCBot
51Disallow: /
52
53User-agent: OAI-SearchBot
54Disallow: /
55
56# Disallowing Google Bard and Vertex AI web crawlers
57User-agent: Google-Extended
58Disallow: /
59
60# Disallowing various bots
61User-agent: anthropic-ai
62Disallow: /
63
64User-agent: Applebot-Extended
65Disallow: /
66
67User-agent: Bytespider
68Disallow: /
69
70User-agent: ClaudeBot
71Disallow: /
72
73User-agent: Claude-Web
74Disallow: /
75
76User-agent: cohere-ai
77Disallow: /
78
79User-agent: Diffbot
80Disallow: /
81
82User-agent: FacebookBot
83Disallow: /
84
85User-agent: omgili
86Disallow: /
87
88User-agent: omgilibot
89Disallow: /
90
91User-agent: PerplexityBot
92Disallow: /
93
94User-agent: PerplexityUser
95Disallow: /
96
97# Allow Google Search Console for sitemap crawling
98User-agent: Google-InspectionTool
99Allow: /
100User-agent: Google-Image
101Allow: /
102User-agent: Google-Video
103Allow: /
104User-agent: Googlebot
105Allow: /
106
107# Allowing the TTD contextual crawler for Sponsorship
108User-agent: TTD-Content
109Allow: /
110
111# Allowing the TTD ads fraud/IVT prevention crawler for Sponsorship
112User-agent: Trade Desk ads.txt & sellers.json crawler
113Allow: /
114
115# Allowing the AdsBot-Google crawler for Sponsorship
116User-agent: AdsBot-Google
117Allow: /
118
119# Allowing the AdsBot-Google-Mobile crawler for Sponsorship
120User-agent: AdsBot-Google-Mobile
121Allow: /
122
123# Allowing the Google-Mediapartners crawler for Sponsorship
124User-agent: Mediapartners-Google
125Allow: /
126
127# Allowing the Google-Display-Ads-Bot crawler for Sponsorship
128User-agent: Google-Display-Ads-Bot
129Allow: /
130
131# Allowing the IAB tech lab crawler for Sponsorship
132User-agent: IAB-Tech-Lab
133Allow: /
134
135# Allowing the IAS crawler for Sponsorship
136User-agent: ias_crawler
137Allow: /
138
139# Allowing the IAS Wombles crawler for Sponsorship
140User-agent: ias_wombles
141Allow: /
142
143# Allowing the Amazon Standards crawler for Sponsorship
144User-agent: Amazon-Advertising-ad-standards-bot/1.0
145Allow: /
146# (fetches ads.txt, app-ads.txt, sellers.json)
147User-agent: APS-ad-standards-bot/1.0
148Allow: /
149
150# New requirement as of 3/2/26 to be eligible for demand from Amazon ads.
151User-agent: AmazonAdBot
152Allow: /
153
154# Allowing the Audigent crawler for Sponsorship
155User-agent: AudigentAdBot
156Allow: /
157
158# Allowing the Concert crawler for Sponsorship
159User-agent: Concert/1.0
160Allow: /
161
162# Allowing Centro/Basis
163User-agent: Centro
164Allow: /
165
166# Allowing BidSwitch (AdX DSP)
167User-agent: bidswitchbot/1.0
168Allow: /
169
170# Allowing Roku (AdX DSP)
171User-agent: DataXu/1.0
172Allow: /
173
174# Allowing PubMatic
175User-agent: PubMatic
176Allow: /
177
178# Allowing AdForm
179User-agent: IAB ATQ team adform robot site.adform.com/general/url-fetcher
180Allow: /
181
182# Allowing StackAdapt
183User-agent: StackAdapt
184Allow: /
185
186# Allowing Quantcast
187User-agent: Quantcastbot/1.0
188Allow: /
189User-agent: Quantcastbot/2.0
190Allow: /
191
192# Facebook sharing
193User-agent: facebookexternalhit
194Allow: /
195
196# Ensures that we're using the correct sitemap. The fact that this is googlecrawl*.npr.org is OK because the crawler will only accept
197# URLs in this sitemap to match www*.npr.org
198Sitemap: https://googlecrawl.npr.org/standard/sitemap_index.xml
199Sitemap: https://googlecrawl.npr.org/news/sitemap_news.xml
200Sitemap: https://googlecrawl.npr.org/video/sitemap_index.xml
201Sitemap: https://www.npr.org/live-updates/sitemap.xml