# -------------------------------- # THE AGENTIC — robots.txt # -------------------------------- User-agent: * Allow: / # Core discovery Sitemap: https://theagentic.news/sitemap.xml # -------------------------------- # BLOCK LOW-VALUE / ABUSE PATHS # -------------------------------- Disallow: /api/ Disallow: /admin/ Disallow: /private/ Disallow: /tmp/ Disallow: /drafts/ Disallow: /*?* Disallow: /*.json$ Disallow: /*.log$ # -------------------------------- # ALLOW IMPORTANT CONTENT # -------------------------------- Allow: /rss.xml Allow: /sitemap.xml Allow: /about Allow: /policies # -------------------------------- # RATE CONTROL (RESPECTFUL SIGNAL) # -------------------------------- Crawl-delay: 5 # -------------------------------- # KNOWN GOOD AGENTS (OPTIONAL HINT) # -------------------------------- User-agent: Googlebot Allow: / User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: ClaudeBot Allow: / User-agent: PerplexityBot Allow: / # -------------------------------- # BAD / AGGRESSIVE SCRAPERS # -------------------------------- User-agent: AhrefsBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: SemrushBot Disallow: / # -------------------------------- # VERDICT # -------------------------------- # ✔ open to real agents # ✔ blocks junk crawling # ✔ exposes sitemap + rss # ✔ production-grade