# robots.txt for blockchain-development-company.skin # Last updated: 2026-04-08 # # Strategy (must match .htaccess access policy): # - Allow only Google, Bing, Yandex, Mail.ru search engines # - Disallow every AI training / AI citation crawler # - Disallow every SEO backlink / audit crawler # - Disallow every download tool / bulk scraper # - Disallow common dev/build paths in the default rule # # .htaccess enforces the same policy at the HTTP layer (scrapers get 403, # allowed search engines get 301 to canonical target, real browsers get # served the page content). robots.txt is the polite-crawler declaration of # the same rules for crawlers that respect it. # ============================================================================= # ALLOWED SEARCH ENGINES (these are the only bots that should crawl) # ============================================================================= User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: / User-agent: Googlebot-News Allow: / User-agent: Googlebot-Video Allow: / User-agent: Googlebot-Mobile Allow: / User-agent: APIs-Google Allow: / User-agent: Google-InspectionTool Allow: / User-agent: Storebot-Google Allow: / User-agent: bingbot Allow: / User-agent: BingPreview Allow: / User-agent: msnbot Allow: / User-agent: adidxbot Allow: / User-agent: YandexBot Allow: / User-agent: YandexImages Allow: / User-agent: YandexNews Allow: / User-agent: YandexMobileBot Allow: / User-agent: YandexMetrika Allow: / User-agent: Mail.RU_Bot Allow: / # ============================================================================= # AI CRAWLERS. Policy: CITATION-ONLY. # Live-answer / search-index crawlers are ALLOWED so the page can be cited in # ChatGPT, Perplexity and Google AI surfaces. Pure-training crawlers stay # disallowed (corpus ingestion only, no citation value). # ============================================================================= # OpenAI: block training (GPTBot), allow citation (search index + live fetch) User-agent: GPTBot Disallow: / User-agent: OAI-SearchBot Allow: / User-agent: ChatGPT-User Allow: / # Anthropic: training/ingestion crawlers, disallow User-agent: ClaudeBot Disallow: / User-agent: claude-web Disallow: / User-agent: anthropic-ai Disallow: / # Google AI surfaces (Gemini / AI Overviews generative use): allow User-agent: Google-Extended Allow: / # Apple Intelligence training (distinct from Applebot search): disallow User-agent: Applebot-Extended Disallow: / User-agent: Applebot Disallow: / # Perplexity: allow citation (index + live fetch) User-agent: PerplexityBot Allow: / User-agent: Perplexity-User Allow: / # Common Crawl (bulk training dataset) User-agent: CCBot Disallow: / # ByteDance / TikTok User-agent: Bytespider Disallow: / # Meta AI User-agent: Meta-ExternalAgent Disallow: / User-agent: Meta-ExternalFetcher Disallow: / User-agent: FacebookBot Disallow: / # Amazon User-agent: Amazonbot Disallow: / # Other AI/data crawlers User-agent: Diffbot Disallow: / User-agent: Omgilibot Disallow: / User-agent: Omgili Disallow: / User-agent: cohere-ai Disallow: / User-agent: cohere-training-data-crawler Disallow: / User-agent: YouBot Disallow: / User-agent: Bravebot Disallow: / User-agent: Neevabot Disallow: / User-agent: FriendlyCrawler Disallow: / User-agent: ImagesiftBot Disallow: / User-agent: img2dataset Disallow: / User-agent: TurnitinBot Disallow: / User-agent: Timpibot Disallow: / User-agent: PanguBot Disallow: / User-agent: webzio-extended Disallow: / User-agent: iaskspider Disallow: / User-agent: AI2Bot Disallow: / User-agent: AwarioSmartBot Disallow: / User-agent: VelenPublicWebCrawler Disallow: / User-agent: Kangaroo Bot Disallow: / User-agent: Scoop.it Disallow: / # ============================================================================= # SEO / BACKLINK / AUDIT CRAWLERS (disallow, they extract for competitors) # ============================================================================= User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: Semrush Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: BLEXBot Disallow: / User-agent: rogerbot Disallow: / User-agent: MegaIndex Disallow: / User-agent: Serpstat Disallow: / User-agent: SerpstatBot Disallow: / User-agent: sistrix Disallow: / User-agent: SiteAuditBot Disallow: / User-agent: SeznamBot Disallow: / User-agent: spbot Disallow: / User-agent: linkdexbot Disallow: / User-agent: SpyFu Disallow: / User-agent: MajesticSEO Disallow: / User-agent: Majestic-12 Disallow: / User-agent: BarkRowler Disallow: / User-agent: SeekportBot Disallow: / User-agent: Exabot Disallow: / User-agent: Cliqzbot Disallow: / User-agent: Searchmetricsbot Disallow: / User-agent: BacklinkCrawler Disallow: / User-agent: LinkpadBot Disallow: / User-agent: Linkdex Disallow: / User-agent: Cocolyzebot Disallow: / User-agent: DataForSeoBot Disallow: / # ============================================================================= # NON-WHITELIST SEARCH ENGINES (disallow, user whitelisted only 4) # ============================================================================= User-agent: baiduspider Disallow: / User-agent: Baiduspider Disallow: / User-agent: sogou Disallow: / User-agent: coccoc Disallow: / User-agent: Naver Disallow: / User-agent: Yeti Disallow: / User-agent: DuckDuckBot Disallow: / User-agent: Qwantbot Disallow: / User-agent: Qwantify Disallow: / User-agent: Ecosia Disallow: / User-agent: PetalBot Disallow: / # ============================================================================= # DOWNLOAD TOOLS / OFFLINE BROWSERS / BULK SCRAPERS (disallow) # ============================================================================= User-agent: HTTrack Disallow: / User-agent: Wget Disallow: / User-agent: WebCopier Disallow: / User-agent: WebStripper Disallow: / User-agent: WebZIP Disallow: / User-agent: Teleport Disallow: / User-agent: TeleportPro Disallow: / User-agent: GetRight Disallow: / User-agent: FlashGet Disallow: / User-agent: LeechGet Disallow: / User-agent: MassDownloader Disallow: / User-agent: Harvester Disallow: / User-agent: EmailCollector Disallow: / User-agent: EmailSiphon Disallow: / User-agent: EmailWolf Disallow: / User-agent: ExtractorPro Disallow: / User-agent: LinkExtractorPro Disallow: / # ============================================================================= # ARCHIVE CRAWLERS (disallow, prevent Wayback caching of landing content) # ============================================================================= User-agent: ia_archiver Disallow: / User-agent: archive.org_bot Disallow: / User-agent: Archive-It Disallow: / User-agent: wayback Disallow: / # ============================================================================= # DEFAULT (any bot not listed above) # # Allows crawling by polite crawlers that are not explicitly blacklisted, # but disallows dev/build artifacts. Note: the .htaccess Mozilla check # already 403s any UA that is neither a real browser nor an allowed bot, so # this default rule is a second line of defense for crawlers that DO spoof # Mozilla but respect robots.txt. # ============================================================================= User-agent: * Disallow: /.env Disallow: /.git/ Disallow: /.svn/ Disallow: /.hg/ Disallow: /.idea/ Disallow: /.vscode/ Disallow: /.DS_Store Disallow: /backups/ Disallow: /node_modules/ Disallow: /vendor/ Disallow: /deploy.sh Disallow: /up.sh Disallow: /package.json Disallow: /package-lock.json Disallow: /yarn.lock Disallow: /pnpm-lock.yaml Disallow: /README.md Disallow: /CHANGELOG.md Disallow: /LICENSE Disallow: /Makefile Disallow: /*.log$ Disallow: /*.bak$ Disallow: /*.swp$ Disallow: /*.swo$ Disallow: /*.sql$ Disallow: /*.sqlite$ Disallow: /*.env$ Disallow: /*.sh$ Disallow: /*.md$ Disallow: /*.yml$ Disallow: /*.yaml$ Disallow: /*.toml$ Disallow: /*.sample$ Disallow: /*.example$ # Crawl-delay for polite crawlers (Google ignores; Yandex + Bing respect) Crawl-delay: 2 # ============================================================================= # SITEMAP # ============================================================================= Sitemap: https://blockchain-development-company.skin/sitemap.xml