Added static javascript and image files to the URLs crawled by the test-crawler.
- Legacy-Id: 9913
This commit is contained in:
parent
1b36eec887
commit
948804f73f
|
@ -65,7 +65,7 @@ def strip_url(url):
|
|||
return url
|
||||
|
||||
def extract_html_urls(content):
|
||||
for m in re.finditer(r'(<(?:a|link) [^>]*href=[\'"]([^"]+)[\'"][^>]*>)', content):
|
||||
for m in re.finditer(r'(<(?:(?:a|link) [^>]*href|(?:img|script) [^>]*src)=[\'"]([^"]+)[\'"][^>]*>)', content):
|
||||
if re.search(r'rel=["\']?nofollow["\']', m.group(1)):
|
||||
continue
|
||||
|
||||
|
|
Loading…
Reference in a new issue