wikiHow / robots.txt snapshot
← back to wikihow.com · fetched 2026-06-20T01:10:31Z (13h ago) · HTTP 200 · 4945 bytes · sha256 c977e2edd5010f1c · raw
final URL: https://www.wikihow.com/robots.txt
| 1 | # robots.txt for wikiHow |
| 2 | # based on wikipedia.org's robots.txt |
| 3 | # |
| 4 | # Crawlers that are kind enough to obey, but which we'd rather not have |
| 5 | # unless they're feeding search engines. |
| 6 | # Sitemap: https://www.wikihow.com/sitemap_index.xml |
| 7 | # Or: /sitemap.xml on some other domains. |
| 8 | # |
| 9 | # If your bot supports such a thing using the 'Crawl-delay' or another |
| 10 | # instruction, we can add it to our robots.txt. |
| 11 | # |
| 12 | # Friendly, low-speed bots are welcome viewing article pages, but not |
| 13 | # dynamically-generated pages please. Article pages contain our site's |
| 14 | # real content. |
| 15 | # |
| 16 | # NOTICE: All crawlers and bots, regardless of whether or not they are |
| 17 | # specified below, are strictly prohibited from using our content for the |
| 18 | # purposes of training or retrieval for AI models or similar machine learning |
| 19 | # systems, except where explicit prior permission has been granted by wikiHow |
| 20 | # through a contractual licensing agreement. Any use of our content for such |
| 21 | # purposes without our explicit contractual permission is a violation of our |
| 22 | # terms of service and intellectual property rights. |
| 23 | |
| 24 | User-agent: Ai2Bot |
| 25 | Disallow: / |
| 26 | |
| 27 | User-agent: AI2Bot |
| 28 | Disallow: / |
| 29 | |
| 30 | User-agent: Ai2Bot-Dolma |
| 31 | Disallow: / |
| 32 | |
| 33 | User-agent: Amazonbot |
| 34 | Disallow: / |
| 35 | |
| 36 | User-agent: anthropic-ai |
| 37 | Disallow: / |
| 38 | |
| 39 | User-agent: archive.org |
| 40 | Disallow: /api.php |
| 41 | Disallow: /index.php |
| 42 | Disallow: /Special: |
| 43 | |
| 44 | User-agent: Applebot-Extended |
| 45 | Disallow: / |
| 46 | |
| 47 | User-agent: Bytespider |
| 48 | Disallow: / |
| 49 | |
| 50 | User-agent: CCBot |
| 51 | Disallow: / |
| 52 | |
| 53 | User-agent: ChatGPT-User |
| 54 | Disallow: / |
| 55 | |
| 56 | User-agent: ClaudeBot |
| 57 | Disallow: / |
| 58 | |
| 59 | User-agent: Claude-SearchBot |
| 60 | Disallow: / |
| 61 | |
| 62 | User-agent: Claude-User |
| 63 | Disallow: / |
| 64 | |
| 65 | User-agent: Claude-Web |
| 66 | Disallow: / |
| 67 | |
| 68 | User-agent: cohere-ai |
| 69 | Disallow: / |
| 70 | |
| 71 | User-agent: Diffbot |
| 72 | Disallow: / |
| 73 | |
| 74 | User-agent: DOC |
| 75 | Disallow: / |
| 76 | |
| 77 | User-agent: Download Ninja |
| 78 | Disallow: / |
| 79 | |
| 80 | User-agent: DuckAssistBot |
| 81 | Disallow: / |
| 82 | |
| 83 | User-agent: FacebookBot |
| 84 | Disallow: / |
| 85 | |
| 86 | User-agent: Facebookexternalhit |
| 87 | Disallow: / |
| 88 | |
| 89 | User-agent: Fetch |
| 90 | Disallow: / |
| 91 | |
| 92 | User-agent: Firecrawl |
| 93 | Disallow: / |
| 94 | |
| 95 | User-agent: FirecrawlAgent |
| 96 | Disallow: / |
| 97 | |
| 98 | User-agent: FriendlyCrawler |
| 99 | Disallow: / |
| 100 | |
| 101 | User-agent: GPTBot |
| 102 | Disallow: / |
| 103 | |
| 104 | User-agent: GrokApp |
| 105 | Disallow: / |
| 106 | |
| 107 | User-agent: HMSE_Robot |
| 108 | Disallow: / |
| 109 | |
| 110 | User-agent: HTTrack |
| 111 | Disallow: / |
| 112 | |
| 113 | User-agent: ia_archiver |
| 114 | Disallow: / |
| 115 | |
| 116 | User-agent: ICC-Crawler |
| 117 | Disallow: / |
| 118 | |
| 119 | User-agent: ImagesiftBot |
| 120 | Disallow: / |
| 121 | |
| 122 | User-agent: img2dataset |
| 123 | Disallow: / |
| 124 | |
| 125 | User-agent: k2spider |
| 126 | Disallow: / |
| 127 | |
| 128 | User-agent: larbin |
| 129 | Disallow: / |
| 130 | |
| 131 | User-agent: libwww |
| 132 | Disallow: / |
| 133 | |
| 134 | User-agent: linko |
| 135 | Disallow: / |
| 136 | |
| 137 | User-agent: Meta-ExternalAgent |
| 138 | Disallow: / |
| 139 | |
| 140 | User-agent: Meta-ExternalFetcher |
| 141 | Disallow: / |
| 142 | |
| 143 | User-agent: Meta-WebIndexer |
| 144 | Disallow: / |
| 145 | |
| 146 | User-agent: Microsoft.URL.Control |
| 147 | Disallow: / |
| 148 | |
| 149 | User-agent: MistralAI-User |
| 150 | Disallow: / |
| 151 | |
| 152 | User-agent: MSIECrawler |
| 153 | Disallow: / |
| 154 | |
| 155 | # Requests many pages per second |
| 156 | # http://www.nameprotect.com/botinfo.html |
| 157 | User-agent: NPBot |
| 158 | Disallow: / |
| 159 | |
| 160 | User-agent: OAI-SearchBot |
| 161 | Disallow: / |
| 162 | |
| 163 | User-agent: Offline Explorer |
| 164 | Disallow: / |
| 165 | |
| 166 | User-agent: omgili |
| 167 | Disallow: / |
| 168 | |
| 169 | User-agent: OmigiliBot |
| 170 | Disallow: / |
| 171 | |
| 172 | User-agent: PerplexityBot |
| 173 | Disallow: / |
| 174 | |
| 175 | User-agent: PetalBot |
| 176 | Disallow: / |
| 177 | |
| 178 | User-agent: Scrapy |
| 179 | Disallow: / |
| 180 | |
| 181 | User-agent: Seekr |
| 182 | Disallow: / |
| 183 | |
| 184 | User-agent: ShapBot |
| 185 | Disallow: / |
| 186 | |
| 187 | # Some bots are known to be trouble, particularly those designed to copy |
| 188 | # entire sites. Please obey robots.txt. |
| 189 | User-agent: sitecheck.internetseer.com |
| 190 | Disallow: / |
| 191 | |
| 192 | User-agent: SiteSnagger |
| 193 | Disallow: / |
| 194 | |
| 195 | User-agent: Teleport |
| 196 | Disallow: / |
| 197 | |
| 198 | User-agent: TeleportPro |
| 199 | Disallow: / |
| 200 | |
| 201 | User-agent: TikTokSpider |
| 202 | Disallow: / |
| 203 | |
| 204 | User-agent: Timpibot |
| 205 | Disallow: / |
| 206 | |
| 207 | User-agent: UbiCrawler |
| 208 | Disallow: / |
| 209 | |
| 210 | User-agent: VelenPublicWebCrawler |
| 211 | Disallow: / |
| 212 | |
| 213 | User-agent: WebCopier |
| 214 | Disallow: / |
| 215 | |
| 216 | User-agent: WebReaper |
| 217 | Disallow: / |
| 218 | |
| 219 | User-agent: WebStripper |
| 220 | Disallow: / |
| 221 | |
| 222 | User-agent: Webzio-Extended |
| 223 | Disallow: / |
| 224 | |
| 225 | User-agent: WebZIP |
| 226 | Disallow: / |
| 227 | |
| 228 | # wget in recursive mode uses too many resources for us. |
| 229 | # Please read the man page and use it properly; there is a |
| 230 | # --wait option you can use to set the delay between hits, |
| 231 | # for instance. Please wait 3 seconds between each request. |
| 232 | User-agent: wget |
| 233 | Disallow: / |
| 234 | |
| 235 | User-agent: Xenu |
| 236 | Disallow: / |
| 237 | |
| 238 | User-agent: YouBot |
| 239 | Disallow: / |
| 240 | |
| 241 | User-agent: Zao |
| 242 | Disallow: / |
| 243 | |
| 244 | User-agent: Zealbot |
| 245 | Disallow: / |
| 246 | |
| 247 | User-agent: ZyBORG |
| 248 | Disallow: / |
| 249 | |
| 250 | User-agent: AdsBot-Google |
| 251 | Allow: / |
| 252 | |
| 253 | User-agent: Mediapartners-Google |
| 254 | Allow: / |
| 255 | |
| 256 | User-agent: Googlebot |
| 257 | Allow: /Special:NewPages |
| 258 | Allow: /Special:Sitemap |
| 259 | Allow: /Special:CategoryListing |
| 260 | Allow: / |
| 261 | |
| 262 | User-agent: * |
| 263 | Allow: /Special:Block |
| 264 | Allow: /Special:BlockList |
| 265 | Allow: /Special:Categorylisting |
| 266 | Allow: /Special:CategoryListing |
| 267 | Allow: /Special:Charity |
| 268 | Allow: /Special:EmailUser |
| 269 | Allow: /Special:LSearch |
| 270 | Allow: /Special:NewPages |
| 271 | Allow: /Special:QABox |
| 272 | Allow: /Special:SearchAd |
| 273 | Allow: /Special:Sitemap |
| 274 | Allow: /Special:ThankAuthors |
| 275 | Allow: /Special:UserLogin |
| 276 | Allow: /index.php?*action=credits |
| 277 | Allow: /index.php?*MathShowImage |
| 278 | Allow: /index.php?*printable |
| 279 | Disallow: /index.php |
| 280 | Disallow: /*feed=rss |
| 281 | Disallow: /*action=delete |
| 282 | Disallow: /*action=history |
| 283 | Disallow: /Special: |
| 284 | Disallow: /*platform= |
| 285 | Disallow: /*variant= |