NeuralCrawl

The Hill / robots.txt snapshot

← back to thehill.com · fetched 2026-06-20T01:10:30Z (18h ago) · HTTP 200 · 2143 bytes · sha256 5e0c73fd451e413b · raw

final URL: https://thehill.com/robots.txt

1User-agent: *
2Disallow: /wp-admin/
3Allow: /wp-admin/admin-ajax.php
4
5Sitemap: https://thehill.com/sitemap.xml
6Sitemap: https://thehill.com/news-sitemap.xml
7Sitemap: https://thehill.com/video-sitemap.xml
8Disallow: */?s=
9Disallow: */search/
10Disallow: /*.woff
11Disallow: /*.woff2
12Disallow: /page/
13Disallow: /wp-content/plugins/
14Allow: /wp-content/plugins/liveblog/
15Disallow: /wp-content/client-mu-plugins/
16Disallow: /wp-includes/css/
17Disallow: /wp-includes/js/
18Disallow: /wp-content/themes/nexstar/client/build/js/
19Disallow: /wp-content/themes/nexstar/client/build/css/
20Disallow: /nxs-video-player/
21
22
23# --- (configured to see /advertise/ ONLY) ---
24
25User-agent: GPTBot
26User-agent: OAI-SearchBot
27Disallow: /
28Allow: /advertise/
29
30User-Agent: PerplexityBot
31Disallow: /
32Allow: /advertise/
33
34User-agent: ia_archiver
35Disallow: /
36
37User-agent: Google-Extended
38Disallow: /
39Allow: /advertise/
40
41User-agent: ClaudeBot
42User-agent: Claude-Web
43User-agent: anthropic-ai
44Disallow: /
45Allow: /advertise/
46
47# --- Other Agents (Strict Block or Delays) ---
48
49User-agent: CCBot
50Disallow: /
51
52User-Agent: FacebookBot
53Disallow: /
54
55User-agent: proximic
56Crawl-Delay: 20
57
58User-agent: Moreover
59Crawl-Delay: 20
60
61User-agent: AhrefsBot
62Crawl-Delay: 20
63
64User-agent: Applebot-Extended
65Disallow: /
66
67User-agent: GumGum
68Crawl-Delay: 10
69
70User-agent: Verity
71Crawl-Delay: 10
72
73User-agent: AwarioRssBot
74User-agent: AwarioSmartBot
75Disallow: /
76
77User-agent: Bytespider
78Disallow: /
79
80User-agent: ChatGPT-User
81Disallow: /
82
83User-agent: cohere-ai
84Disallow: /
85
86User-agent: DataForSeoBot
87Disallow: /
88
89User-agent: Diffbot
90Disallow: /
91
92User-agent: FriendlyCrawler
93Disallow: /
94
95User-agent: Google-CloudVertexBot
96Disallow: /
97Allow: /wirecutter/
98
99User-agent: ImagesiftBot
100Disallow: /
101
102User-agent: magpie-crawler
103Disallow: /
104
105User-agent: Meta-ExternalAgent
106User-agent: meta-externalagent
107Disallow: /
108
109User-agent: NewsNow
110Disallow: /
111
112User-agent: news-please
113Disallow: /
114
115User-agent: omgili
116Disallow: /
117
118User-agent: omgilibot
119Disallow: /
120
121User-agent: peer39_crawler
122User-agent: peer39_crawler/1.0
123Disallow: /
124
125User-agent: Quora-Bot
126Disallow: /
127
128User-agent: Scrapy
129Disallow: /
130
131User-agent: TurnitinBot
132Disallow: /