Added static javascript and image files to the URLs crawled by the test-crawler.

- Legacy-Id: 9913
This commit is contained in:
Henrik Levkowetz 2015-07-29 17:03:32 +00:00
parent 1b36eec887
commit 948804f73f

View file

@ -65,7 +65,7 @@ def strip_url(url):
return url
def extract_html_urls(content):
for m in re.finditer(r'(<(?:a|link) [^>]*href=[\'"]([^"]+)[\'"][^>]*>)', content):
for m in re.finditer(r'(<(?:(?:a|link) [^>]*href|(?:img|script) [^>]*src)=[\'"]([^"]+)[\'"][^>]*>)', content):
if re.search(r'rel=["\']?nofollow["\']', m.group(1)):
continue