crawler-tricker
v1.0.0
Published
Tricks webcrawlers via user-agent detection
Maintainers
Readme
crawler-tricker
Serve a static site only when a User-Agent rule hits.
Default behavior in createUserAgentSiteRouter is hit-only:
requireSingleMatch: trueallowDefaultTarget: false
So if there is no match (or multiple matches), nothing is served.
Install
npm install crawler-trickerQuick start
const http = require("http");
const path = require("path");
const { createUserAgentSiteRouter } = require("crawler-tricker");
const router = createUserAgentSiteRouter({
useDefaultCrawlerRules: true,
crawlerRootDir: path.join(__dirname, "sites/bot")
});
http.createServer((req, res) => {
router.handle(req, res, () => {
res.statusCode = 404;
res.end("Not Found");
});
}).listen(3000);Default crawler list
You can enable built-in crawler matching with:
useDefaultCrawlerRules: truecrawlerRootDir(required for static router)
Built-ins exported:
DEFAULT_CRAWLER_KEYWORDScreateCrawlerRegex(keywords)
Framework integration
Express:
app.use(router.express());Connect-compatible:
app.use(router.connect());Next custom server:
router.handle(req, res, () => nextHandler(req, res));Matcher-only (no file serving):
const { createUserAgentMatcher } = require("crawler-tricker");API
createUserAgentSiteRouter(config)
rules(optional ifuseDefaultCrawlerRulesis true)rule.test:RegExp | string | (userAgent, req) => booleanrule.rootDir: required per custom ruleuseDefaultCrawlerRules(default:false)crawlerRootDir(required whenuseDefaultCrawlerRulesis true)crawlerRuleName(default:"crawler")crawlerKeywords(default:DEFAULT_CRAWLER_KEYWORDS)headerName(default:"user-agent")indexFile(default:"index.html")spaFallback(default:true)addResponseHeader(default:true)addSecurityHeaders(default:true, setsX-Content-Type-Options: nosniff)requireSingleMatch(default:true)allowDefaultTarget(default:false)defaultRootDir(used only ifallowDefaultTargetis true)
Methods:
handle(req, res, next?)middleware(req, res, next)express()connect()createHandler(next?)match(reqLike)selectByUserAgent(userAgent, reqLike?)
createUserAgentMatcher(config)
rules(optional ifuseDefaultCrawlerRulesis true)useDefaultCrawlerRules(default:false)crawlerRuleName(default:"crawler")crawlerKeywords(default:DEFAULT_CRAWLER_KEYWORDS)defaultTarget(optional)headerName(default:"user-agent")requireSingleMatch(default:false)
Methods:
match(reqLike)selectByUserAgent(userAgent, reqLike?)
Production checks
npm run check
npm test