From 25af6fbfad940e1e4d260d09c033e5d0b9ff3c6e Mon Sep 17 00:00:00 2001 From: Henrik Levkowetz Date: Mon, 8 Jul 2019 19:37:10 +0000 Subject: [PATCH] Updated the test crawler for python3. - Legacy-Id: 16438 --- bin/test-crawl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/test-crawl b/bin/test-crawl index 0a8d17977..6226d6837 100755 --- a/bin/test-crawl +++ b/bin/test-crawl @@ -1,4 +1,5 @@ #!/usr/bin/env python +# Copyright The IETF Trust 2013-2019, All Rights Reserved import os, sys, re, datetime, argparse, traceback, json, subprocess import html5lib @@ -62,6 +63,7 @@ import debug # pyflakes:ignore from ietf.name.models import DocTypeName from ietf.utils.html import unescape +from ietf.utils.test_utils import unicontent # --- Constants --- @@ -387,7 +389,7 @@ if __name__ == "__main__": if ctype == "text/html": try: if args.follow and not skip_extract_from(url): - for u in extract_html_urls(r.content): + for u in extract_html_urls(unicontent(r)): if u not in visited and u not in urls: urls[u] = url referrers[u] = url @@ -403,7 +405,7 @@ if __name__ == "__main__": elif ctype == "application/json": try: if args.follow: - for u in extract_tastypie_urls(r.content): + for u in extract_tastypie_urls(unicontent(r)): if u not in visited and u not in urls: urls[u] = url referrers[u] = url