Snowflake / robots.txt snapshot
← back to snowflake.com · fetched 2026-06-20T01:10:30Z (18h ago) · HTTP 200 · 2805 bytes · sha256 be32bef083daa322 · raw
final URL: https://www.snowflake.com/robots.txt
| 1 | # ---------------------------------------------------------------------- |
| 2 | # Global default for all other crawlers (Applicable to both WordPress & AEM if paths match) |
| 3 | # ---------------------------------------------------------------------- |
| 4 | |
| 5 | User-agent: * |
| 6 | Allow: / |
| 7 | # Disallow WordPress admin area |
| 8 | Disallow: /wp-admin/ |
| 9 | # Disallow common search query parameters to prevent duplicate content |
| 10 | Disallow: *?s= |
| 11 | Disallow: *&s= |
| 12 | |
| 13 | # ---------------------------------------------------------------------- |
| 14 | # AEM (Adobe Experience Manager) specific rules |
| 15 | # ---------------------------------------------------------------------- |
| 16 | |
| 17 | # Standard AEM authoring, system, and temporary paths |
| 18 | Disallow: /apps/ |
| 19 | Disallow: /bin/ |
| 20 | Disallow: /crx/ |
| 21 | Disallow: /etc/ |
| 22 | Disallow: /etc/clientlibs/ |
| 23 | Disallow: /libs/ |
| 24 | Disallow: /system/ |
| 25 | Disallow: /tmp/ |
| 26 | Disallow: /var/ |
| 27 | # Disallow JCR content exposure |
| 28 | Disallow: /jcr:content/ |
| 29 | # Disallow JCR content exposure in subpages |
| 30 | Disallow: /*/_jcr_content/ |
| 31 | # Common AEM dispatcher cache invalidation path |
| 32 | Disallow: /_/ |
| 33 | # Disallow AEM's infinity.json selectors |
| 34 | Disallow: /*.infinity.json$ |
| 35 | Disallow: /*.tidy.json$ |
| 36 | Disallow: /*.sysview.xml$ |
| 37 | # If you have specific feeds to allow, add Allow rules before this |
| 38 | Disallow: /*.feed.xml$ |
| 39 | #RITM0480998 |
| 40 | Disallow: /content/experience-fragments/ |
| 41 | |
| 42 | # AEM DAM rules |
| 43 | # Allow specific legal PDFs in English |
| 44 | Allow: /content/dam/snowflake-site/en/legal/*.pdf |
| 45 | |
| 46 | # Disallow other PDFs within the main snowflake-site DAM path |
| 47 | Disallow: /content/dam/snowflake-site/*.pdf |
| 48 | |
| 49 | # Allow crawling of the general DAM path (if it contains browsable content or other allowed assets) |
| 50 | Allow: /content/dam/snowflake-site/ |
| 51 | |
| 52 | # ---------------------------------------------------------------------- |
| 53 | # WordPress specific rules |
| 54 | # ---------------------------------------------------------------------- |
| 55 | |
| 56 | # Disallow theme directory |
| 57 | Disallow: /wp-content/themes/snowflake/ |
| 58 | |
| 59 | # Allow WordPress AJAX handler (important for some plugin/theme functionality) |
| 60 | Allow: /wp-admin/admin-ajax.php |
| 61 | |
| 62 | # Disallow other common WordPress paths that don't need indexing |
| 63 | Disallow: /wp-includes/ |
| 64 | Disallow: /wp-content/plugins/ |
| 65 | Disallow: /wp-content/cache/ |
| 66 | # WordPress REST API, usually not for direct crawling unless specific endpoints are public |
| 67 | Disallow: /wp-json/ |
| 68 | Disallow: /xmlrpc.php |
| 69 | Disallow: /readme.html |
| 70 | Disallow: /license.txt |
| 71 | Disallow: /trackback/ |
| 72 | # If you want specific feeds crawled, use Allow rules. Otherwise, sitemap is preferred. |
| 73 | Disallow: /feed/ |
| 74 | Disallow: */feed/$ |
| 75 | Disallow: */trackback/$ |
| 76 | Disallow: /*?replytocom= |
| 77 | |
| 78 | # ---------------------------------------------------------------------- |
| 79 | # Sitemap Directives |
| 80 | # ---------------------------------------------------------------------- |
| 81 | |
| 82 | Sitemap: https://www.snowflake.com/sitemap_index.xml |
| 83 | Sitemap: https://www.snowflake.com/content/snowflake-site/global.sitemap.xml |