Added a --random switch to choose between different test-crawler modes.

- Legacy-Id: 9893
This commit is contained in:
Henrik Levkowetz 2015-07-27 16:52:26 +00:00
parent 2da90c8ab3
commit 224fef557c

View file

@ -28,6 +28,8 @@ parser.add_argument('--validator-nu', dest='validator_nu', action='store_true',
help='Use validator.nu instead of html5lib for HTML validation')
parser.add_argument('--pedantic', action='store_true',
help='Stop the crawl on the first HTML validation issue')
parser.add_argument('--random', action='store_true',
help='Crawl URLs randomly')
parser.add_argument('--validate-all', dest='validate_all', action='store_true', default=False,
help='Run html 5 validation on all pages, without skipping similar urls. '
'(The default is to only run validation on one of /foo/1/, /foo/2/, /foo/3/, etc.)')
@ -232,9 +234,12 @@ if __name__ == "__main__":
sys.exit(1)
while urls:
# popitem() is documented to be random, but really isn't
url = random.choice(urls.keys())
referrer = urls.pop(url)
if args.random:
# popitem() is documented to be random, but really isn't
url = random.choice(urls.keys())
referrer = urls.pop(url)
else:
url, referrer = urls.popitem()
visited.add(url)