NeuralCrawl

wikiHow / robots.txt snapshot

← back to wikihow.com · fetched 2026-06-20T01:10:31Z (13h ago) · HTTP 200 · 4945 bytes · sha256 c977e2edd5010f1c · raw

final URL: https://www.wikihow.com/robots.txt

1# robots.txt for wikiHow
2# based on wikipedia.org's robots.txt
3#
4# Crawlers that are kind enough to obey, but which we'd rather not have
5# unless they're feeding search engines.
6# Sitemap: https://www.wikihow.com/sitemap_index.xml
7# Or: /sitemap.xml on some other domains.
8#
9# If your bot supports such a thing using the 'Crawl-delay' or another
10# instruction, we can add it to our robots.txt.
11#
12# Friendly, low-speed bots are welcome viewing article pages, but not
13# dynamically-generated pages please. Article pages contain our site's
14# real content.
15#
16# NOTICE: All crawlers and bots, regardless of whether or not they are
17# specified below, are strictly prohibited from using our content for the
18# purposes of training or retrieval for AI models or similar machine learning
19# systems, except where explicit prior permission has been granted by wikiHow
20# through a contractual licensing agreement. Any use of our content for such
21# purposes without our explicit contractual permission is a violation of our
22# terms of service and intellectual property rights.
23
24User-agent: Ai2Bot
25Disallow: /
26
27User-agent: AI2Bot
28Disallow: /
29
30User-agent: Ai2Bot-Dolma
31Disallow: /
32
33User-agent: Amazonbot
34Disallow: /
35
36User-agent: anthropic-ai
37Disallow: /
38
39User-agent: archive.org
40Disallow: /api.php
41Disallow: /index.php
42Disallow: /Special:
43
44User-agent: Applebot-Extended
45Disallow: /
46
47User-agent: Bytespider
48Disallow: /
49
50User-agent: CCBot
51Disallow: /
52
53User-agent: ChatGPT-User
54Disallow: /
55
56User-agent: ClaudeBot
57Disallow: /
58
59User-agent: Claude-SearchBot
60Disallow: /
61
62User-agent: Claude-User
63Disallow: /
64
65User-agent: Claude-Web
66Disallow: /
67
68User-agent: cohere-ai
69Disallow: /
70
71User-agent: Diffbot
72Disallow: /
73
74User-agent: DOC
75Disallow: /
76
77User-agent: Download Ninja
78Disallow: /
79
80User-agent: DuckAssistBot
81Disallow: /
82
83User-agent: FacebookBot
84Disallow: /
85
86User-agent: Facebookexternalhit
87Disallow: /
88
89User-agent: Fetch
90Disallow: /
91
92User-agent: Firecrawl
93Disallow: /
94
95User-agent: FirecrawlAgent
96Disallow: /
97
98User-agent: FriendlyCrawler
99Disallow: /
100
101User-agent: GPTBot
102Disallow: /
103
104User-agent: GrokApp
105Disallow: /
106
107User-agent: HMSE_Robot
108Disallow: /
109
110User-agent: HTTrack
111Disallow: /
112
113User-agent: ia_archiver
114Disallow: /
115
116User-agent: ICC-Crawler
117Disallow: /
118
119User-agent: ImagesiftBot
120Disallow: /
121
122User-agent: img2dataset
123Disallow: /
124
125User-agent: k2spider
126Disallow: /
127
128User-agent: larbin
129Disallow: /
130
131User-agent: libwww
132Disallow: /
133
134User-agent: linko
135Disallow: /
136
137User-agent: Meta-ExternalAgent
138Disallow: /
139
140User-agent: Meta-ExternalFetcher
141Disallow: /
142
143User-agent: Meta-WebIndexer
144Disallow: /
145
146User-agent: Microsoft.URL.Control
147Disallow: /
148
149User-agent: MistralAI-User
150Disallow: /
151
152User-agent: MSIECrawler
153Disallow: /
154
155# Requests many pages per second
156# http://www.nameprotect.com/botinfo.html
157User-agent: NPBot
158Disallow: /
159
160User-agent: OAI-SearchBot
161Disallow: /
162
163User-agent: Offline Explorer
164Disallow: /
165
166User-agent: omgili
167Disallow: /
168
169User-agent: OmigiliBot
170Disallow: /
171
172User-agent: PerplexityBot
173Disallow: /
174
175User-agent: PetalBot
176Disallow: /
177
178User-agent: Scrapy
179Disallow: /
180
181User-agent: Seekr
182Disallow: /
183
184User-agent: ShapBot
185Disallow: /
186
187# Some bots are known to be trouble, particularly those designed to copy
188# entire sites. Please obey robots.txt.
189User-agent: sitecheck.internetseer.com
190Disallow: /
191
192User-agent: SiteSnagger
193Disallow: /
194
195User-agent: Teleport
196Disallow: /
197
198User-agent: TeleportPro
199Disallow: /
200
201User-agent: TikTokSpider
202Disallow: /
203
204User-agent: Timpibot
205Disallow: /
206
207User-agent: UbiCrawler
208Disallow: /
209
210User-agent: VelenPublicWebCrawler
211Disallow: /
212
213User-agent: WebCopier
214Disallow: /
215
216User-agent: WebReaper
217Disallow: /
218
219User-agent: WebStripper
220Disallow: /
221
222User-agent: Webzio-Extended
223Disallow: /
224
225User-agent: WebZIP
226Disallow: /
227
228# wget in recursive mode uses too many resources for us.
229# Please read the man page and use it properly; there is a
230# --wait option you can use to set the delay between hits,
231# for instance. Please wait 3 seconds between each request.
232User-agent: wget
233Disallow: /
234
235User-agent: Xenu
236Disallow: /
237
238User-agent: YouBot
239Disallow: /
240
241User-agent: Zao
242Disallow: /
243
244User-agent: Zealbot
245Disallow: /
246
247User-agent: ZyBORG
248Disallow: /
249
250User-agent: AdsBot-Google
251Allow: /
252
253User-agent: Mediapartners-Google
254Allow: /
255
256User-agent: Googlebot
257Allow: /Special:NewPages
258Allow: /Special:Sitemap
259Allow: /Special:CategoryListing
260Allow: /
261
262User-agent: *
263Allow: /Special:Block
264Allow: /Special:BlockList
265Allow: /Special:Categorylisting
266Allow: /Special:CategoryListing
267Allow: /Special:Charity
268Allow: /Special:EmailUser
269Allow: /Special:LSearch
270Allow: /Special:NewPages
271Allow: /Special:QABox
272Allow: /Special:SearchAd
273Allow: /Special:Sitemap
274Allow: /Special:ThankAuthors
275Allow: /Special:UserLogin
276Allow: /index.php?*action=credits
277Allow: /index.php?*MathShowImage
278Allow: /index.php?*printable
279Disallow: /index.php
280Disallow: /*feed=rss
281Disallow: /*action=delete
282Disallow: /*action=history
283Disallow: /Special:
284Disallow: /*platform=
285Disallow: /*variant=