Midjourney / robots.txt snapshot

← back to midjourney.com · fetched 2026-06-20T11:49:20Z (7h ago) · HTTP 200 · 6300 bytes · sha256 7918d83c331c7b7b · raw

final URL: manual:file

1	# www.midjourney.com — robots.txt
2	# Last reviewed: 2026-05-14
3	#
4	# Policy summary:
5	# - Open to verified search & social bots (Googlebot, Bingbot, Twitterbot, etc.) — default.
6	# - Auth-gated routes and tool surfaces disallowed to save crawl budget.
7	# - /jobs/{id} image pages disallowed entirely — internal policy decision.
8	# - AI training bots blocked. AI retrieval/citation bots allowed (so we can show up in
9	# ChatGPT, Perplexity, Gemini citations without being used as training data).
10	# - Sitemap pointer at the bottom.
11	#
12	# Note: robots.txt is a polite request. Bad actors ignore it. For absolute blocking,
13	# layer Cloudflare WAF rules on top.
14
15	# ============================================================================
16	# DEFAULT — all bots not specifically named below
17	# ============================================================================
18
19	User-agent: *
20	Allow: /
21
22	# Auth-only & tool surfaces — these waste crawl budget today (GSC shows /imagine,
23	# /account, /editor each absorbing 500K–800K weekly impressions to a login wall).
24	Disallow: /api/
25	Disallow: /auth/
26	Disallow: /account
27	Disallow: /preferences
28	Disallow: /checkout/
29	Disallow: /editor/
30	Disallow: /organize/
31	Disallow: /imagine
32	Disallow: /personalize
33	Disallow: /app/
34
35	# Image detail pages — internal policy: do not expose the full /jobs/ corpus
36	# to crawlers. Existing indexed pages will fall out of SERPs over weeks.
37	Disallow: /jobs/
38
39	# Allow CSS/JS/fonts so crawlers can render SSR'd content & SPA hydration.
40	# Without these, Google may see incomplete pages and downgrade rendering quality.
41	Allow: /*.css
42	Allow: /*.js
43	Allow: /*.woff2
44
45	# ============================================================================
46	# AI TRAINING BOTS — fully blocked
47	# These bots crawl primarily to feed LLM training datasets. Blocking here prevents
48	# our content (including /home marketing copy, /explore descriptions, blog posts)
49	# from entering future model training corpora.
50	# ============================================================================
51
52	User-agent: GPTBot
53	Disallow: /
54
55	User-agent: CCBot
56	Disallow: /
57
58	User-agent: anthropic-ai
59	Disallow: /
60
61	User-agent: Bytespider
62	Disallow: /
63
64	User-agent: FacebookBot
65	Disallow: /
66
67	User-agent: Diffbot
68	Disallow: /
69
70	User-agent: Omgilibot
71	Disallow: /
72
73	User-agent: Omgili
74	Disallow: /
75
76	User-agent: ImagesiftBot
77	Disallow: /
78
79	User-agent: PetalBot
80	Disallow: /
81
82	User-agent: cohere-ai
83	Disallow: /
84
85	User-agent: cohere-training-data-crawler
86	Disallow: /
87
88	# Amazon — Rufus (shopping AI) and Alexa retrieval. Training-vs-retrieval split is
89	# opaque, and image-platform referral value from Amazon AI products is negligible.
90	User-agent: Amazonbot
91	Disallow: /
92
93	# You.com — small AI search engine with similar training-retrieval ambiguity.
94	User-agent: YouBot
95	Disallow: /
96
97	# Google's training opt-out token. This is a "rules-only" directive (not a real
98	# User-Agent string) — adding this tells Google to keep crawling normally for
99	# Search but to NOT use our content for Gemini, Vertex AI Search, or other AI products.
100	User-agent: Google-Extended
101	Disallow: /
102
103	# Apple's training opt-out token. Same shape as Google-Extended — Applebot still
104	# crawls for Apple Intelligence retrieval/citations; Applebot-Extended blocks training.
105	User-agent: Applebot-Extended
106	Disallow: /
107
108	# Meta's training opt-out token.
109	User-agent: meta-externalagent
110	Disallow: /
111
112	# ============================================================================
113	# COMMERCIAL CRAWLERS — crawl-budget protection
114	# Not AI-training; these are commercial scrapers that feed paid backlink/keyword
115	# indexes (Ahrefs, Semrush, etc.). We leave Ahrefs and Semrush ALLOWED because the
116	# growth team uses those tools internally — blocking them would create a blind
117	# spot in our own SEO data. MJ12bot (Majestic) and DataForSeoBot blocked because
118	# we don't use those products internally, and they consume meaningful crawl bandwidth.
119	# ============================================================================
120
121	User-agent: MJ12bot
122	Disallow: /
123
124	User-agent: DataForSeoBot
125	Disallow: /
126
127	# ============================================================================
128	# AI RETRIEVAL / CITATION BOTS — allowed
129	# These bots crawl to support live answers in ChatGPT, Perplexity, Bing Copilot,
130	# Claude, etc. Being allowed here lets us appear as cited sources, which drives
131	# referral traffic from those products.
132	# ============================================================================
133
134	# OpenAI — search index for ChatGPT Search citations
135	User-agent: OAI-SearchBot
136	Allow: /
137	Disallow: /api/
138	Disallow: /auth/
139	Disallow: /account
140	Disallow: /preferences
141	Disallow: /checkout/
142	Disallow: /editor/
143	Disallow: /organize/
144	Disallow: /imagine
145	Disallow: /personalize
146	Disallow: /app/
147	Disallow: /jobs/
148
149	# OpenAI — real-time fetch when a ChatGPT user clicks "browse" or invokes web tool
150	User-agent: ChatGPT-User
151	Allow: /
152	Disallow: /api/
153	Disallow: /auth/
154	Disallow: /account
155	Disallow: /preferences
156	Disallow: /checkout/
157	Disallow: /editor/
158	Disallow: /organize/
159	Disallow: /imagine
160	Disallow: /personalize
161	Disallow: /app/
162	Disallow: /jobs/
163
164	# Perplexity — search index for Perplexity citations
165	User-agent: PerplexityBot
166	Allow: /
167	Disallow: /api/
168	Disallow: /auth/
169	Disallow: /account
170	Disallow: /preferences
171	Disallow: /checkout/
172	Disallow: /editor/
173	Disallow: /organize/
174	Disallow: /imagine
175	Disallow: /personalize
176	Disallow: /app/
177	Disallow: /jobs/
178
179	# Perplexity — real-time fetch when a Perplexity user submits a query
180	User-agent: Perplexity-User
181	Allow: /
182	Disallow: /api/
183	Disallow: /auth/
184	Disallow: /account
185	Disallow: /preferences
186	Disallow: /checkout/
187	Disallow: /editor/
188	Disallow: /organize/
189	Disallow: /imagine
190	Disallow: /personalize
191	Disallow: /app/
192	Disallow: /jobs/
193
194	# Anthropic — Claude's retrieval bot (Anthropic's documented policy is search/index only,
195	# not training). The training opt-out is the `anthropic-ai` block above.
196	User-agent: ClaudeBot
197	Allow: /
198	Disallow: /api/
199	Disallow: /auth/
200	Disallow: /account
201	Disallow: /preferences
202	Disallow: /checkout/
203	Disallow: /editor/
204	Disallow: /organize/
205	Disallow: /imagine
206	Disallow: /personalize
207	Disallow: /app/
208	Disallow: /jobs/
209
210	# ============================================================================
211	# Sitemap
212	# ============================================================================
213
214	Sitemap: https://www.midjourney.com/sitemap.xml