Next.js website for Rocky Mountain Vending company featuring: - Product catalog with Stripe integration - Service areas and parts pages - Admin dashboard with Clerk authentication - SEO optimized pages with JSON-LD structured data Co-authored-by: Cursor <cursoragent@cursor.com>
864 lines
22 KiB
Text
864 lines
22 KiB
Text
var robotsParser = require('../index');
|
|
var expect = require('chai').expect;
|
|
|
|
|
|
function testRobots(url, contents, allowed, disallowed) {
|
|
var robots = robotsParser(url, contents);
|
|
|
|
allowed.forEach(function (url) {
|
|
expect(robots.isAllowed(url)).to.equal(true);
|
|
});
|
|
|
|
disallowed.forEach(function (url) {
|
|
expect(robots.isDisallowed(url)).to.equal(true);
|
|
});
|
|
}
|
|
|
|
describe('Robots', function () {
|
|
it('should parse the disallow directive', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /fish/',
|
|
'Disallow: /test.html'
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'http://www.example.com/fish',
|
|
'http://www.example.com/Test.html'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com/fish/index.php',
|
|
'http://www.example.com/fish/',
|
|
'http://www.example.com/test.html'
|
|
];
|
|
|
|
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should parse the allow directive', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /fish/',
|
|
'Disallow: /test.html',
|
|
'Allow: /fish/test.html',
|
|
'Allow: /test.html'
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'http://www.example.com/fish',
|
|
'http://www.example.com/fish/test.html',
|
|
'http://www.example.com/Test.html',
|
|
'http://www.example.com/test.html'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com/fish/index.php',
|
|
'http://www.example.com/fish/',
|
|
];
|
|
|
|
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should parse patterns', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /fish*.php',
|
|
'Disallow: /*.dext$',
|
|
'Disallow: /dir*'
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'http://www.example.com/Fish.PHP',
|
|
'http://www.example.com/Fish.dext1',
|
|
'http://www.example.com/folder/dir.html',
|
|
'http://www.example.com/folder/dir/test.html'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com/fish.php',
|
|
'http://www.example.com/fishheads/catfish.php?parameters',
|
|
'http://www.example.com/AnYthInG.dext',
|
|
'http://www.example.com/Fish.dext.dext',
|
|
'http://www.example.com/dir/test.html',
|
|
'http://www.example.com/directory.html'
|
|
];
|
|
|
|
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should have the correct order precedence for allow and disallow', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /fish*.php',
|
|
'Allow: /fish/index.php',
|
|
'Disallow: /test',
|
|
'Allow: /test/',
|
|
'Disallow: /aa/',
|
|
'Allow: /aa/',
|
|
'Allow: /bb/',
|
|
'Disallow: /bb/',
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'http://www.example.com/test/index.html',
|
|
'http://www.example.com/fish/index.php',
|
|
'http://www.example.com/test/',
|
|
'http://www.example.com/aa/',
|
|
'http://www.example.com/bb/',
|
|
'http://www.example.com/x/'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com/fish.php',
|
|
'http://www.example.com/fishheads/catfish.php?parameters',
|
|
'http://www.example.com/test'
|
|
];
|
|
|
|
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should have the correct order precedence for wildcards', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /*/',
|
|
'Allow: /x/',
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'http://www.example.com/x/',
|
|
'http://www.example.com/fish.php',
|
|
'http://www.example.com/test'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com/a/',
|
|
'http://www.example.com/xx/',
|
|
'http://www.example.com/test/index.html'
|
|
];
|
|
|
|
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should parse lines delimitated by \\r', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /fish/',
|
|
'Disallow: /test.html'
|
|
].join('\r');
|
|
|
|
var allowed = [
|
|
'http://www.example.com/fish',
|
|
'http://www.example.com/Test.html'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com/fish/index.php',
|
|
'http://www.example.com/fish/',
|
|
'http://www.example.com/test.html'
|
|
];
|
|
|
|
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should parse lines delimitated by \\r\\n', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /fish/',
|
|
'Disallow: /test.html'
|
|
].join('\r\n');
|
|
|
|
var allowed = [
|
|
'http://www.example.com/fish',
|
|
'http://www.example.com/Test.html'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com/fish/index.php',
|
|
'http://www.example.com/fish/',
|
|
'http://www.example.com/test.html'
|
|
];
|
|
|
|
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
|
|
it('should parse lines delimitated by mixed line endings', function () {
|
|
var contents = [
|
|
'User-agent: *\r',
|
|
'Disallow: /fish/\r\n',
|
|
'Disallow: /test.html\n\n'
|
|
].join('');
|
|
|
|
var allowed = [
|
|
'http://www.example.com/fish',
|
|
'http://www.example.com/Test.html'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com/fish/index.php',
|
|
'http://www.example.com/fish/',
|
|
'http://www.example.com/test.html'
|
|
];
|
|
|
|
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should ignore rules that are not in a group', function () {
|
|
var contents = [
|
|
'Disallow: /secret.html',
|
|
'Disallow: /test',
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'http://www.example.com/secret.html',
|
|
'http://www.example.com/test/index.html',
|
|
'http://www.example.com/test/'
|
|
];
|
|
|
|
testRobots('http://www.example.com/robots.txt', contents, allowed, []);
|
|
});
|
|
|
|
|
|
it('should ignore comments', function () {
|
|
var contents = [
|
|
'#',
|
|
'# This is a comment',
|
|
'#',
|
|
'User-agent: *',
|
|
'# This is a comment',
|
|
'Disallow: /fish/ # ignore',
|
|
'# Disallow: fish',
|
|
'Disallow: /test.html'
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'http://www.example.com/fish',
|
|
'http://www.example.com/Test.html'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com/fish/index.php',
|
|
'http://www.example.com/fish/',
|
|
'http://www.example.com/test.html'
|
|
];
|
|
|
|
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should ignore invalid lines', function () {
|
|
var contents = [
|
|
'invalid line',
|
|
'User-agent: *',
|
|
'Disallow: /fish/',
|
|
':::::another invalid line:::::',
|
|
'Disallow: /test.html',
|
|
'Unknown: tule'
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'http://www.example.com/fish',
|
|
'http://www.example.com/Test.html'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com/fish/index.php',
|
|
'http://www.example.com/fish/',
|
|
'http://www.example.com/test.html'
|
|
];
|
|
|
|
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should ignore empty user-agent lines', function () {
|
|
var contents = [
|
|
'User-agent:',
|
|
'Disallow: /fish/',
|
|
'Disallow: /test.html'
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'http://www.example.com/fish',
|
|
'http://www.example.com/Test.html',
|
|
'http://www.example.com/fish/index.php',
|
|
'http://www.example.com/fish/',
|
|
'http://www.example.com/test.html'
|
|
];
|
|
|
|
var disallowed = [];
|
|
|
|
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should support groups with multiple user agents (case insensitive)', function () {
|
|
var contents = [
|
|
'User-agent: agenta',
|
|
'User-agent: agentb',
|
|
'Disallow: /fish',
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
|
|
|
expect(robots.isAllowed("http://www.example.com/fish", "agenta")).to.equal(false);
|
|
});
|
|
|
|
it('should return undefined for invalid urls', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /secret.html',
|
|
'Disallow: /test',
|
|
].join('\n');
|
|
|
|
var invalidUrls = [
|
|
'http://example.com/secret.html',
|
|
'http://ex ample.com/secret.html',
|
|
'http://www.example.net/test/index.html',
|
|
'http://www.examsple.com/test/',
|
|
'example.com/test/',
|
|
':::::;;`\\|/.example.com/test/'
|
|
];
|
|
|
|
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
|
|
|
invalidUrls.forEach(function (url) {
|
|
expect(robots.isAllowed(url)).to.equal(undefined);
|
|
});
|
|
});
|
|
|
|
it('should handle Unicode, urlencoded and punycode URLs', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /secret.html',
|
|
'Disallow: /test',
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'http://www.münich.com/index.html',
|
|
'http://www.xn--mnich-kva.com/index.html',
|
|
'http://www.m%C3%BCnich.com/index.html'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.münich.com/secret.html',
|
|
'http://www.xn--mnich-kva.com/secret.html',
|
|
'http://www.m%C3%BCnich.com/secret.html'
|
|
];
|
|
|
|
testRobots('http://www.münich.com/robots.txt', contents, allowed, disallowed);
|
|
testRobots('http://www.xn--mnich-kva.com/robots.txt', contents, allowed, disallowed);
|
|
testRobots('http://www.m%C3%BCnich.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should handle Unicode and urlencoded paths', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /%CF%80',
|
|
'Disallow: /%e2%9d%83',
|
|
'Disallow: /%a%a',
|
|
'Disallow: /💩',
|
|
'Disallow: /✼*t$',
|
|
'Disallow: /%E2%9C%A4*t$',
|
|
'Disallow: /✿%a',
|
|
'Disallow: /http%3A%2F%2Fexample.org'
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'http://www.example.com/✼testing',
|
|
'http://www.example.com/%E2%9C%BCtesting',
|
|
'http://www.example.com/✤testing',
|
|
'http://www.example.com/%E2%9C%A4testing',
|
|
'http://www.example.com/http://example.org',
|
|
'http://www.example.com/http:%2F%2Fexample.org'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com/%CF%80',
|
|
'http://www.example.com/%CF%80/index.html',
|
|
'http://www.example.com/π',
|
|
'http://www.example.com/π/index.html',
|
|
'http://www.example.com/%e2%9d%83',
|
|
'http://www.example.com/%E2%9D%83/index.html',
|
|
'http://www.example.com/❃',
|
|
'http://www.example.com/❃/index.html',
|
|
'http://www.example.com/%F0%9F%92%A9',
|
|
'http://www.example.com/%F0%9F%92%A9/index.html',
|
|
'http://www.example.com/💩',
|
|
'http://www.example.com/💩/index.html',
|
|
'http://www.example.com/%a%a',
|
|
'http://www.example.com/%a%a/index.html',
|
|
'http://www.example.com/✼test',
|
|
'http://www.example.com/%E2%9C%BCtest',
|
|
'http://www.example.com/✤test',
|
|
'http://www.example.com/%E2%9C%A4testt',
|
|
'http://www.example.com/✿%a',
|
|
'http://www.example.com/%E2%9C%BF%atest',
|
|
'http://www.example.com/http%3A%2F%2Fexample.org'
|
|
];
|
|
|
|
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should handle lone high / low surrogates', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /\uD800',
|
|
'Disallow: /\uDC00'
|
|
].join('\n');
|
|
|
|
// These are invalid so can't be disallowed
|
|
var allowed = [
|
|
'http://www.example.com/\uDC00',
|
|
'http://www.example.com/\uD800'
|
|
];
|
|
|
|
var disallowed = [];
|
|
|
|
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should ignore host case', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /secret.html',
|
|
'Disallow: /test',
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'http://www.example.com/index.html',
|
|
'http://www.ExAmPlE.com/index.html',
|
|
'http://www.EXAMPLE.com/index.html'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com/secret.html',
|
|
'http://www.ExAmPlE.com/secret.html',
|
|
'http://www.EXAMPLE.com/secret.html'
|
|
];
|
|
|
|
testRobots('http://www.eXample.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should handle relative paths', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /fish',
|
|
'Allow: /fish/test',
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('/robots.txt', contents);
|
|
expect(robots.isAllowed('/fish/test')).to.equal(true);
|
|
expect(robots.isAllowed('/fish')).to.equal(false);
|
|
});
|
|
|
|
it('should not allow relative paths if domain specified', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /fish',
|
|
'Allow: /fish/test',
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
|
expect(robots.isAllowed('/fish/test')).to.equal(undefined);
|
|
expect(robots.isAllowed('/fish')).to.equal(undefined);
|
|
});
|
|
|
|
it('should not treat invalid robots.txt URLs as relative', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /fish',
|
|
'Allow: /fish/test',
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('https://ex ample.com/robots.txt', contents);
|
|
expect(robots.isAllowed('/fish/test')).to.equal(undefined);
|
|
expect(robots.isAllowed('/fish')).to.equal(undefined);
|
|
});
|
|
|
|
it('should not allow URls if domain specified and robots.txt is relative', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /fish',
|
|
'Allow: /fish/test',
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('/robots.txt', contents);
|
|
expect(robots.isAllowed('http://www.example.com/fish/test')).to.equal(undefined);
|
|
expect(robots.isAllowed('http://www.example.com/fish')).to.equal(undefined);
|
|
});
|
|
|
|
it('should allow all if empty robots.txt', function () {
|
|
var allowed = [
|
|
'http://www.example.com/secret.html',
|
|
'http://www.example.com/test/index.html',
|
|
'http://www.example.com/test/'
|
|
];
|
|
|
|
var robots = robotsParser('http://www.example.com/robots.txt', '');
|
|
|
|
allowed.forEach(function (url) {
|
|
expect(robots.isAllowed(url)).to.equal(true);
|
|
});
|
|
});
|
|
|
|
it('should treat null as allowing all', function () {
|
|
var robots = robotsParser('http://www.example.com/robots.txt', null);
|
|
|
|
expect(robots.isAllowed("http://www.example.com/", "userAgent")).to.equal(true);
|
|
expect(robots.isAllowed("http://www.example.com/")).to.equal(true);
|
|
});
|
|
|
|
it('should handle invalid robots.txt urls', function () {
|
|
var contents = [
|
|
'user-agent: *',
|
|
'disallow: /',
|
|
|
|
'host: www.example.com',
|
|
'sitemap: /sitemap.xml'
|
|
].join('\n');
|
|
|
|
var sitemapUrls = [
|
|
undefined,
|
|
null,
|
|
'null',
|
|
':/wom/test/'
|
|
];
|
|
|
|
sitemapUrls.forEach(function (url) {
|
|
var robots = robotsParser(url, contents);
|
|
expect(robots.isAllowed('http://www.example.com/index.html')).to.equal(undefined);
|
|
expect(robots.getPreferredHost()).to.equal('www.example.com');
|
|
expect(robots.getSitemaps()).to.eql(['/sitemap.xml']);
|
|
});
|
|
});
|
|
|
|
it('should parse the crawl-delay directive', function () {
|
|
var contents = [
|
|
'user-agent: a',
|
|
'crawl-delay: 1',
|
|
|
|
'user-agent: b',
|
|
'disallow: /d',
|
|
|
|
'user-agent: c',
|
|
'user-agent: d',
|
|
'crawl-delay: 10'
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
|
|
|
expect(robots.getCrawlDelay('a')).to.equal(1);
|
|
expect(robots.getCrawlDelay('b')).to.equal(undefined);
|
|
expect(robots.getCrawlDelay('c')).to.equal(10);
|
|
expect(robots.getCrawlDelay('d')).to.equal(10);
|
|
expect(robots.getCrawlDelay()).to.equal(undefined);
|
|
});
|
|
|
|
it('should ignore invalid crawl-delay directives', function () {
|
|
var contents = [
|
|
'user-agent: a',
|
|
'crawl-delay: 1.2.1',
|
|
|
|
'user-agent: b',
|
|
'crawl-delay: 1.a0',
|
|
|
|
'user-agent: c',
|
|
'user-agent: d',
|
|
'crawl-delay: 10a'
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
|
|
|
expect(robots.getCrawlDelay('a')).to.equal(undefined);
|
|
expect(robots.getCrawlDelay('b')).to.equal(undefined);
|
|
expect(robots.getCrawlDelay('c')).to.equal(undefined);
|
|
expect(robots.getCrawlDelay('d')).to.equal(undefined);
|
|
});
|
|
|
|
it('should parse the sitemap directive', function () {
|
|
var contents = [
|
|
'user-agent: a',
|
|
'crawl-delay: 1',
|
|
'sitemap: http://example.com/test.xml',
|
|
|
|
'user-agent: b',
|
|
'disallow: /d',
|
|
|
|
'sitemap: /sitemap.xml',
|
|
'sitemap: http://example.com/test/sitemap.xml '
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
|
|
|
expect(robots.getSitemaps()).to.eql([
|
|
'http://example.com/test.xml',
|
|
'/sitemap.xml',
|
|
'http://example.com/test/sitemap.xml'
|
|
]);
|
|
});
|
|
|
|
it('should parse the host directive', function () {
|
|
var contents = [
|
|
'user-agent: a',
|
|
'crawl-delay: 1',
|
|
'host: www.example.net',
|
|
|
|
'user-agent: b',
|
|
'disallow: /d',
|
|
|
|
'host: example.com'
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
|
|
|
expect(robots.getPreferredHost()).to.equal('example.com');
|
|
});
|
|
|
|
it('should parse empty and invalid directives', function () {
|
|
var contents = [
|
|
'user-agent:',
|
|
'user-agent:::: a::',
|
|
'crawl-delay:',
|
|
'crawl-delay:::: 0:',
|
|
'host:',
|
|
'host:: example.com',
|
|
'sitemap:',
|
|
'sitemap:: site:map.xml',
|
|
'disallow:',
|
|
'disallow::: /:',
|
|
'allow:',
|
|
'allow::: /:',
|
|
].join('\n');
|
|
|
|
robotsParser('http://www.example.com/robots.txt', contents);
|
|
});
|
|
|
|
it('should treat only the last host directive as valid', function () {
|
|
var contents = [
|
|
'user-agent: a',
|
|
'crawl-delay: 1',
|
|
'host: www.example.net',
|
|
|
|
'user-agent: b',
|
|
'disallow: /d',
|
|
|
|
'host: example.net',
|
|
'host: example.com'
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
|
|
|
expect(robots.getPreferredHost()).to.equal('example.com');
|
|
});
|
|
|
|
it('should return null when there is no host directive', function () {
|
|
var contents = [
|
|
'user-agent: a',
|
|
'crawl-delay: 1',
|
|
|
|
'user-agent: b',
|
|
'disallow: /d',
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
|
|
|
expect(robots.getPreferredHost()).to.equal(null);
|
|
});
|
|
|
|
it('should fallback to * when a UA has no rules of its own', function () {
|
|
var contents = [
|
|
'user-agent: *',
|
|
'crawl-delay: 1',
|
|
|
|
'user-agent: b',
|
|
'crawl-delay: 12',
|
|
|
|
'user-agent: c',
|
|
'user-agent: d',
|
|
'crawl-delay: 10'
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
|
|
|
expect(robots.getCrawlDelay('should-fall-back')).to.equal(1);
|
|
expect(robots.getCrawlDelay('d')).to.equal(10);
|
|
expect(robots.getCrawlDelay('dd')).to.equal(1);
|
|
});
|
|
|
|
it('should not fallback to * when a UA has rules', function () {
|
|
var contents = [
|
|
'user-agent: *',
|
|
'crawl-delay: 1',
|
|
|
|
'user-agent: b',
|
|
'disallow:'
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
|
|
|
expect(robots.getCrawlDelay('b')).to.equal(undefined);
|
|
});
|
|
|
|
it('should handle UAs with object property names', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /fish',
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
|
expect(robots.isAllowed('http://www.example.com/fish', 'constructor')).to.equal(false);
|
|
expect(robots.isAllowed('http://www.example.com/fish', '__proto__')).to.equal(false);
|
|
});
|
|
|
|
it('should ignore version numbers in the UA string', function () {
|
|
var contents = [
|
|
'user-agent: *',
|
|
'crawl-delay: 1',
|
|
|
|
'user-agent: b',
|
|
'crawl-delay: 12',
|
|
|
|
'user-agent: c',
|
|
'user-agent: d',
|
|
'crawl-delay: 10'
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
|
|
|
expect(robots.getCrawlDelay('should-fall-back/1.0.0')).to.equal(1);
|
|
expect(robots.getCrawlDelay('d/12')).to.equal(10);
|
|
expect(robots.getCrawlDelay('dd / 0-32-3')).to.equal(1);
|
|
expect(robots.getCrawlDelay('b / 1.0')).to.equal(12);
|
|
});
|
|
|
|
|
|
it('should return the line number of the matching directive', function () {
|
|
var contents = [
|
|
'',
|
|
'User-agent: *',
|
|
'',
|
|
'Disallow: /fish/',
|
|
'Disallow: /test.html',
|
|
'Allow: /fish/test.html',
|
|
'Allow: /test.html',
|
|
'',
|
|
'User-agent: a',
|
|
'allow: /',
|
|
'',
|
|
'User-agent: b',
|
|
'disallow: /test',
|
|
'disallow: /t*t',
|
|
'',
|
|
'User-agent: c',
|
|
'Disallow: /fish*.php',
|
|
'Allow: /fish/index.php'
|
|
].join('\n');
|
|
|
|
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
|
|
|
expect(robots.getMatchingLineNumber('http://www.example.com/fish')).to.equal(-1);
|
|
expect(robots.getMatchingLineNumber('http://www.example.com/fish/test.html')).to.equal(6);
|
|
expect(robots.getMatchingLineNumber('http://www.example.com/Test.html')).to.equal(-1);
|
|
|
|
expect(robots.getMatchingLineNumber('http://www.example.com/fish/index.php')).to.equal(4);
|
|
expect(robots.getMatchingLineNumber('http://www.example.com/fish/')).to.equal(4);
|
|
expect(robots.getMatchingLineNumber('http://www.example.com/test.html')).to.equal(7);
|
|
|
|
expect(robots.getMatchingLineNumber('http://www.example.com/test.html', 'a')).to.equal(10);
|
|
|
|
expect(robots.getMatchingLineNumber('http://www.example.com/fish.php', 'c')).to.equal(17);
|
|
expect(robots.getMatchingLineNumber('http://www.example.com/fish/index.php', 'c')).to.equal(18);
|
|
});
|
|
|
|
it('should handle large wildcards efficiently', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /' + '*'.repeat(2048) + '.html',
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'http://www.example.com/' + 'sub'.repeat(2048) + 'folder/index.php',
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com/secret.html'
|
|
];
|
|
|
|
const start = Date.now();
|
|
testRobots('http://www.eXample.com/robots.txt', contents, allowed, disallowed);
|
|
const end = Date.now();
|
|
|
|
// Should take less than 500 ms (high to allow for variableness of
|
|
// machines running the test, should normally be much less)
|
|
expect(end - start).to.be.lessThan(500);
|
|
});
|
|
|
|
it('should honor given port number', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /fish/',
|
|
'Disallow: /test.html'
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'http://www.example.com:8080/fish',
|
|
'http://www.example.com:8080/Test.html'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com/fish',
|
|
'http://www.example.com/Test.html',
|
|
'http://www.example.com:80/fish',
|
|
'http://www.example.com:80/Test.html'
|
|
];
|
|
|
|
testRobots('http://www.example.com:8080/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should default to port 80 for http: if no port given', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /fish/',
|
|
'Disallow: /test.html'
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'http://www.example.com:80/fish',
|
|
'http://www.example.com:80/Test.html'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com:443/fish',
|
|
'http://www.example.com:443/Test.html',
|
|
'http://www.example.com:80/fish/index.php',
|
|
'http://www.example.com:80/fish/',
|
|
'http://www.example.com:80/test.html'
|
|
];
|
|
|
|
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
|
|
it('should default to port 443 for https: if no port given', function () {
|
|
var contents = [
|
|
'User-agent: *',
|
|
'Disallow: /fish/',
|
|
'Disallow: /test.html'
|
|
].join('\n');
|
|
|
|
var allowed = [
|
|
'https://www.example.com:443/fish',
|
|
'https://www.example.com:443/Test.html',
|
|
'https://www.example.com/fish',
|
|
'https://www.example.com/Test.html'
|
|
];
|
|
|
|
var disallowed = [
|
|
'http://www.example.com:80/fish',
|
|
'http://www.example.com:80/Test.html',
|
|
'http://www.example.com:443/fish/index.php',
|
|
'http://www.example.com:443/fish/',
|
|
'http://www.example.com:443/test.html'
|
|
];
|
|
|
|
testRobots('https://www.example.com/robots.txt', contents, allowed, disallowed);
|
|
});
|
|
});
|