NeuralCrawl

Arm Holdings / robots.txt snapshot

← back to arm.com · fetched 2026-06-20T14:27:25Z (7h ago) · HTTP 200 · 2000 bytes · sha256 6f77495354580b32 · raw

final URL: https://www.arm.com/robots.txt

1# ============================
2# robots.txt for www.arm.com
3# ============================
4
5# ----------------------------
6# ALLOW: Trusted AI/LLM Crawlers for Training & Indexing
7# ----------------------------
8
9User-agent: anthropic-ai
10Allow: /llms.txt
11
12User-agent: GPTBot
13Allow: /llms.txt
14
15User-agent: ClaudeBot
16Allow: /llms.txt
17
18User-agent: Claude-web
19Allow: /llms.txt
20
21User-agent: CCBot
22Allow: /llms.txt
23
24User-agent: Google-Extended
25Allow: /llms.txt
26
27User-agent: Amazonbot
28Allow: /llms.txt
29
30User-agent: Applebot
31Allow: /llms.txt
32
33User-agent: Bingbot
34Allow: /llms.txt
35
36User-agent: ChatGPT-User
37Allow: /llms.txt
38
39User-agent: Bytespider
40Allow: /llms.txt
41
42User-agent: PerplexityBot
43Allow: /llms.txt
44
45User-agent: Sogou
46Allow: /llms.txt
47
48Sitemap: https://www.arm.com/sitemap_index.xml
49
50# ----------------------------
51# Internal Search Bot - Limited Access
52# ----------------------------
53User-agent: CoveoEnterpriseSearch
54Allow: /news/20*
55
56User-agent: *
57Disallow: /coveo/
58
59# ----------------------------
60# BLOCK: Crawlers with Low Value or High Server Load
61# ----------------------------
62User-agent: AhrefsBot
63Disallow: /
64
65User-agent: YandexBot
66Disallow: /
67
68User-agent: MegaIndex.ru
69Disallow: /
70
71User-agent: SemrushBot
72Disallow: /
73
74User-agent: Qwantify/Bleriot
75Disallow: /
76
77User-agent: DotBot
78Disallow: /
79
80User-agent: MJ12bot
81Disallow: /
82
83User-agent: SEOkicks
84Disallow: /
85
86# ----------------------------
87# GENERAL RULES - Internal/Private Paths
88# ----------------------------
89User-agent: *
90
91# Backend or dev/testing paths
92Disallow: /assets/fonts/
93Disallow: /assets/Fonts/
94Disallow: /includes/
95Disallow: /phpscripts/
96Disallow: /shouldremainempty/
97Disallow: /xml/
98Disallow: /zh/includes/
99Disallow: /zh/shouldremainempty/
100Disallow: /zh/xml/
101Disallow: /about/newsroom
102
103# Block legacy trademark-related assets
104Disallow: /-/media/global/company/policies/trademarks/incorrect-logo/
105
106# Block thank-you pages or soft redirects
107Disallow: /*-ty$
108
109# Block internal site search
110Disallow: /search*
111Disallow: /Search*