NPR / robots.txt snapshot

← back to npr.org · fetched 2026-06-20T01:10:30Z (18h ago) · HTTP 200 · 4657 bytes · sha256 385d71521a5490c6 · raw

final URL: https://www.npr.org/robots.txt

1	# robots.txt for www.npr.org
2	# Changes are tracked in www-render
3
4	User-agent: *
5	Disallow: /mpx/
6	Disallow: /cgi-bin
7	Disallow: /ramfiles/
8	Disallow: /oauth2/
9	Disallow: /account/
10	Disallow: /proxy/
11	Disallow: /*.smil
12	Disallow: /*.asx
13	Disallow: /*.ram
14	Disallow: /*.wav
15	Disallow: /*.rmm
16	Disallow: /*.js
17	Disallow: /*.au
18	Disallow: /stations/force/force_localization.php?
19	Disallow: /rundowns/segment.php?
20	Disallow: /templates/search/*
21	Disallow: /2013/03/21/174840895/
22	Disallow: /sections/ombudsman/2008/01/frequently_asked_questions_1.html
23	Disallow: /sections/health-shots/2013/03/11/173816690/new-voices-for-the-voiceless-synthetic-speech-gets-an-upgrade
24	Disallow: /transcripts/470280334*
25	Disallow: /2015/07/04/419570939/chasing-memories-in-their-refugee-camp-40-years-after-they-fled-vietnam
26	Disallow: /transcripts/419570939*
27	Disallow: /sections/parallels/2016/08/15/480128005/for-french-teens-smoking-still-has-more-allure-than-stigma
28	Disallow: /transcripts/480128005*
29	Disallow: /2020/04/08/830237502/episode-989-what-if-no-one-pays-rent
30	Disallow: /transcripts/830237502*
31	Disallow: /sections/goatsandsoda/2015/06/09/406744975/a-gender-revolution-hits-the-streets-two-wheels-at-a-time
32	Disallow: /transcripts/406744975*
33	Disallow: /sureroute
34	Disallow: //partials
35	Disallow: /?
36	Disallow: /proxy/*
37	Disallow: /player/*
38	Disallow: /get/*
39	Disallow: /geolocation
40
41	# Disallowing the OpenAI web crawler
42	User-agent: GPTBot
43	Disallow: /
44
45	# Disallowing OpenAI plugins
46	User-agent: ChatGPT-User
47	Disallow: /
48
49	# Disallowing Common Crawl
50	User-agent: CCBot
51	Disallow: /
52
53	User-agent: OAI-SearchBot
54	Disallow: /
55
56	# Disallowing Google Bard and Vertex AI web crawlers
57	User-agent: Google-Extended
58	Disallow: /
59
60	# Disallowing various bots
61	User-agent: anthropic-ai
62	Disallow: /
63
64	User-agent: Applebot-Extended
65	Disallow: /
66
67	User-agent: Bytespider
68	Disallow: /
69
70	User-agent: ClaudeBot
71	Disallow: /
72
73	User-agent: Claude-Web
74	Disallow: /
75
76	User-agent: cohere-ai
77	Disallow: /
78
79	User-agent: Diffbot
80	Disallow: /
81
82	User-agent: FacebookBot
83	Disallow: /
84
85	User-agent: omgili
86	Disallow: /
87
88	User-agent: omgilibot
89	Disallow: /
90
91	User-agent: PerplexityBot
92	Disallow: /
93
94	User-agent: PerplexityUser
95	Disallow: /
96
97	# Allow Google Search Console for sitemap crawling
98	User-agent: Google-InspectionTool
99	Allow: /
100	User-agent: Google-Image
101	Allow: /
102	User-agent: Google-Video
103	Allow: /
104	User-agent: Googlebot
105	Allow: /
106
107	# Allowing the TTD contextual crawler for Sponsorship
108	User-agent: TTD-Content
109	Allow: /
110
111	# Allowing the TTD ads fraud/IVT prevention crawler for Sponsorship
112	User-agent: Trade Desk ads.txt & sellers.json crawler
113	Allow: /
114
115	# Allowing the AdsBot-Google crawler for Sponsorship
116	User-agent: AdsBot-Google
117	Allow: /
118
119	# Allowing the AdsBot-Google-Mobile crawler for Sponsorship
120	User-agent: AdsBot-Google-Mobile
121	Allow: /
122
123	# Allowing the Google-Mediapartners crawler for Sponsorship
124	User-agent: Mediapartners-Google
125	Allow: /
126
127	# Allowing the Google-Display-Ads-Bot crawler for Sponsorship
128	User-agent: Google-Display-Ads-Bot
129	Allow: /
130
131	# Allowing the IAB tech lab crawler for Sponsorship
132	User-agent: IAB-Tech-Lab
133	Allow: /
134
135	# Allowing the IAS crawler for Sponsorship
136	User-agent: ias_crawler
137	Allow: /
138
139	# Allowing the IAS Wombles crawler for Sponsorship
140	User-agent: ias_wombles
141	Allow: /
142
143	# Allowing the Amazon Standards crawler for Sponsorship
144	User-agent: Amazon-Advertising-ad-standards-bot/1.0
145	Allow: /
146	# (fetches ads.txt, app-ads.txt, sellers.json)
147	User-agent: APS-ad-standards-bot/1.0
148	Allow: /
149
150	# New requirement as of 3/2/26 to be eligible for demand from Amazon ads.
151	User-agent: AmazonAdBot
152	Allow: /
153
154	# Allowing the Audigent crawler for Sponsorship
155	User-agent: AudigentAdBot
156	Allow: /
157
158	# Allowing the Concert crawler for Sponsorship
159	User-agent: Concert/1.0
160	Allow: /
161
162	# Allowing Centro/Basis
163	User-agent: Centro
164	Allow: /
165
166	# Allowing BidSwitch (AdX DSP)
167	User-agent: bidswitchbot/1.0
168	Allow: /
169
170	# Allowing Roku (AdX DSP)
171	User-agent: DataXu/1.0
172	Allow: /
173
174	# Allowing PubMatic
175	User-agent: PubMatic
176	Allow: /
177
178	# Allowing AdForm
179	User-agent: IAB ATQ team adform robot site.adform.com/general/url-fetcher
180	Allow: /
181
182	# Allowing StackAdapt
183	User-agent: StackAdapt
184	Allow: /
185
186	# Allowing Quantcast
187	User-agent: Quantcastbot/1.0
188	Allow: /
189	User-agent: Quantcastbot/2.0
190	Allow: /
191
192	# Facebook sharing
193	User-agent: facebookexternalhit
194	Allow: /
195
196	# Ensures that we're using the correct sitemap. The fact that this is googlecrawl*.npr.org is OK because the crawler will only accept
197	# URLs in this sitemap to match www*.npr.org
198	Sitemap: https://googlecrawl.npr.org/standard/sitemap_index.xml
199	Sitemap: https://googlecrawl.npr.org/news/sitemap_news.xml
200	Sitemap: https://googlecrawl.npr.org/video/sitemap_index.xml
201	Sitemap: https://www.npr.org/live-updates/sitemap.xml