From f48452853fc41c0be6ffe8ab0707f96e81b16a34 Mon Sep 17 00:00:00 2001 From: Henrik Levkowetz Date: Sat, 1 Aug 2015 12:47:03 +0000 Subject: [PATCH] Changed test-crawl to avoid unnecessary repetitions of the blacklisting message. - Legacy-Id: 9933 --- bin/test-crawl | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/bin/test-crawl b/bin/test-crawl index a67eab477..83b03e24e 100755 --- a/bin/test-crawl +++ b/bin/test-crawl @@ -2,7 +2,6 @@ import os, sys, re, datetime, argparse, traceback, tempfile, json, subprocess import html5lib -import debug # pyflakes:ignore import random # Set up import path to find our own Django @@ -42,6 +41,9 @@ import django.test django.setup() +# This needs to come after we set up sys path to include the local django +import debug # pyflakes:ignore + # prevent memory from leaking when settings.DEBUG=True from django.db import connection class DontSaveQueries(object): @@ -103,10 +105,6 @@ def extract_tastypie_urls(content): def check_html_valid(url, response, args): global parser, validated_urls, doc_types, warnings - # These URLs have known issues, skip them until those are fixed - if re.search('(/secr|admin/)|/doc/.*/edit/info/', url): - log("%s blacklisted; skipping HTML validation" % url) - return key = url if not args.validate_all: # derive a key for urls like this by replacing primary keys @@ -123,6 +121,13 @@ def check_html_valid(url, response, args): key = re.sub("/%s-.*/"%slug, "/%s-nnnn/"%slug, key) if not key in validated_urls: + + # These URLs have known issues, skip them until those are fixed + if re.search('(/secr|admin/)|/doc/.*/edit/info/', url): + log("%s blacklisted; skipping HTML validation" % url) + validated_urls[key] = True + return + if hasattr(response, "content"): content = response.content else: