wikiHow / robots.txt snapshot

← back to wikihow.com · fetched 2026-06-20T01:10:31Z (13h ago) · HTTP 200 · 4945 bytes · sha256 c977e2edd5010f1c · raw

final URL: https://www.wikihow.com/robots.txt

1	# robots.txt for wikiHow
2	# based on wikipedia.org's robots.txt
3	#
4	# Crawlers that are kind enough to obey, but which we'd rather not have
5	# unless they're feeding search engines.
6	# Sitemap: https://www.wikihow.com/sitemap_index.xml
7	# Or: /sitemap.xml on some other domains.
8	#
9	# If your bot supports such a thing using the 'Crawl-delay' or another
10	# instruction, we can add it to our robots.txt.
11	#
12	# Friendly, low-speed bots are welcome viewing article pages, but not
13	# dynamically-generated pages please. Article pages contain our site's
14	# real content.
15	#
16	# NOTICE: All crawlers and bots, regardless of whether or not they are
17	# specified below, are strictly prohibited from using our content for the
18	# purposes of training or retrieval for AI models or similar machine learning
19	# systems, except where explicit prior permission has been granted by wikiHow
20	# through a contractual licensing agreement. Any use of our content for such
21	# purposes without our explicit contractual permission is a violation of our
22	# terms of service and intellectual property rights.
23
24	User-agent: Ai2Bot
25	Disallow: /
26
27	User-agent: AI2Bot
28	Disallow: /
29
30	User-agent: Ai2Bot-Dolma
31	Disallow: /
32
33	User-agent: Amazonbot
34	Disallow: /
35
36	User-agent: anthropic-ai
37	Disallow: /
38
39	User-agent: archive.org
40	Disallow: /api.php
41	Disallow: /index.php
42	Disallow: /Special:
43
44	User-agent: Applebot-Extended
45	Disallow: /
46
47	User-agent: Bytespider
48	Disallow: /
49
50	User-agent: CCBot
51	Disallow: /
52
53	User-agent: ChatGPT-User
54	Disallow: /
55
56	User-agent: ClaudeBot
57	Disallow: /
58
59	User-agent: Claude-SearchBot
60	Disallow: /
61
62	User-agent: Claude-User
63	Disallow: /
64
65	User-agent: Claude-Web
66	Disallow: /
67
68	User-agent: cohere-ai
69	Disallow: /
70
71	User-agent: Diffbot
72	Disallow: /
73
74	User-agent: DOC
75	Disallow: /
76
77	User-agent: Download Ninja
78	Disallow: /
79
80	User-agent: DuckAssistBot
81	Disallow: /
82
83	User-agent: FacebookBot
84	Disallow: /
85
86	User-agent: Facebookexternalhit
87	Disallow: /
88
89	User-agent: Fetch
90	Disallow: /
91
92	User-agent: Firecrawl
93	Disallow: /
94
95	User-agent: FirecrawlAgent
96	Disallow: /
97
98	User-agent: FriendlyCrawler
99	Disallow: /
100
101	User-agent: GPTBot
102	Disallow: /
103
104	User-agent: GrokApp
105	Disallow: /
106
107	User-agent: HMSE_Robot
108	Disallow: /
109
110	User-agent: HTTrack
111	Disallow: /
112
113	User-agent: ia_archiver
114	Disallow: /
115
116	User-agent: ICC-Crawler
117	Disallow: /
118
119	User-agent: ImagesiftBot
120	Disallow: /
121
122	User-agent: img2dataset
123	Disallow: /
124
125	User-agent: k2spider
126	Disallow: /
127
128	User-agent: larbin
129	Disallow: /
130
131	User-agent: libwww
132	Disallow: /
133
134	User-agent: linko
135	Disallow: /
136
137	User-agent: Meta-ExternalAgent
138	Disallow: /
139
140	User-agent: Meta-ExternalFetcher
141	Disallow: /
142
143	User-agent: Meta-WebIndexer
144	Disallow: /
145
146	User-agent: Microsoft.URL.Control
147	Disallow: /
148
149	User-agent: MistralAI-User
150	Disallow: /
151
152	User-agent: MSIECrawler
153	Disallow: /
154
155	# Requests many pages per second
156	# http://www.nameprotect.com/botinfo.html
157	User-agent: NPBot
158	Disallow: /
159
160	User-agent: OAI-SearchBot
161	Disallow: /
162
163	User-agent: Offline Explorer
164	Disallow: /
165
166	User-agent: omgili
167	Disallow: /
168
169	User-agent: OmigiliBot
170	Disallow: /
171
172	User-agent: PerplexityBot
173	Disallow: /
174
175	User-agent: PetalBot
176	Disallow: /
177
178	User-agent: Scrapy
179	Disallow: /
180
181	User-agent: Seekr
182	Disallow: /
183
184	User-agent: ShapBot
185	Disallow: /
186
187	# Some bots are known to be trouble, particularly those designed to copy
188	# entire sites. Please obey robots.txt.
189	User-agent: sitecheck.internetseer.com
190	Disallow: /
191
192	User-agent: SiteSnagger
193	Disallow: /
194
195	User-agent: Teleport
196	Disallow: /
197
198	User-agent: TeleportPro
199	Disallow: /
200
201	User-agent: TikTokSpider
202	Disallow: /
203
204	User-agent: Timpibot
205	Disallow: /
206
207	User-agent: UbiCrawler
208	Disallow: /
209
210	User-agent: VelenPublicWebCrawler
211	Disallow: /
212
213	User-agent: WebCopier
214	Disallow: /
215
216	User-agent: WebReaper
217	Disallow: /
218
219	User-agent: WebStripper
220	Disallow: /
221
222	User-agent: Webzio-Extended
223	Disallow: /
224
225	User-agent: WebZIP
226	Disallow: /
227
228	# wget in recursive mode uses too many resources for us.
229	# Please read the man page and use it properly; there is a
230	# --wait option you can use to set the delay between hits,
231	# for instance. Please wait 3 seconds between each request.
232	User-agent: wget
233	Disallow: /
234
235	User-agent: Xenu
236	Disallow: /
237
238	User-agent: YouBot
239	Disallow: /
240
241	User-agent: Zao
242	Disallow: /
243
244	User-agent: Zealbot
245	Disallow: /
246
247	User-agent: ZyBORG
248	Disallow: /
249
250	User-agent: AdsBot-Google
251	Allow: /
252
253	User-agent: Mediapartners-Google
254	Allow: /
255
256	User-agent: Googlebot
257	Allow: /Special:NewPages
258	Allow: /Special:Sitemap
259	Allow: /Special:CategoryListing
260	Allow: /
261
262	User-agent: *
263	Allow: /Special:Block
264	Allow: /Special:BlockList
265	Allow: /Special:Categorylisting
266	Allow: /Special:CategoryListing
267	Allow: /Special:Charity
268	Allow: /Special:EmailUser
269	Allow: /Special:LSearch
270	Allow: /Special:NewPages
271	Allow: /Special:QABox
272	Allow: /Special:SearchAd
273	Allow: /Special:Sitemap
274	Allow: /Special:ThankAuthors
275	Allow: /Special:UserLogin
276	Allow: /index.php?*action=credits
277	Allow: /index.php?*MathShowImage
278	Allow: /index.php?*printable
279	Disallow: /index.php
280	Disallow: /*feed=rss
281	Disallow: /*action=delete
282	Disallow: /*action=history
283	Disallow: /Special:
284	Disallow: /*platform=
285	Disallow: /*variant=