From 948804f73fbabf9507b78715e99467633564885c Mon Sep 17 00:00:00 2001 From: Henrik Levkowetz Date: Wed, 29 Jul 2015 17:03:32 +0000 Subject: [PATCH] Added static javascript and image files to the URLs crawled by the test-crawler. - Legacy-Id: 9913 --- bin/test-crawl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/test-crawl b/bin/test-crawl index c8e36c0e3..66ae5abca 100755 --- a/bin/test-crawl +++ b/bin/test-crawl @@ -65,7 +65,7 @@ def strip_url(url): return url def extract_html_urls(content): - for m in re.finditer(r'(<(?:a|link) [^>]*href=[\'"]([^"]+)[\'"][^>]*>)', content): + for m in re.finditer(r'(<(?:(?:a|link) [^>]*href|(?:img|script) [^>]*src)=[\'"]([^"]+)[\'"][^>]*>)', content): if re.search(r'rel=["\']?nofollow["\']', m.group(1)): continue