From 754ba193cac337a93f9c43d1ea2ea4a89e554ee5 Mon Sep 17 00:00:00 2001 From: Henrik Levkowetz Date: Wed, 13 Jun 2007 17:26:04 +0000 Subject: [PATCH] A small script to run a diff against the master for one single django URL specified in any of the testurl.list files. Uses environment variable DJANGO_SERVER if set, or http://merlot.tools.ietf.org:31415/ otherwise. - Legacy-Id: 375 --- ietf/utils/soup2text.py | 14 +++++++----- test/check_url | 47 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 5 deletions(-) create mode 100755 test/check_url diff --git a/ietf/utils/soup2text.py b/ietf/utils/soup2text.py index 9c6f9f33b..fe04349c9 100755 --- a/ietf/utils/soup2text.py +++ b/ietf/utils/soup2text.py @@ -7,10 +7,11 @@ try: except: from BeautifulSoup import Tag, BeautifulSoup, NavigableString -block_tags = ["[document]", "html", "body", "div", "blockquote", "table", "tr", "p", "pre", "h1", "h2", "h3", "h4", "h5", "h6", "li"] -space_tags = ["th", "td", "br"] +block_tags = ["[document]", "html", "body", "div", "blockquote", "table", "tr", "p", "pre", "h1", "h2", "h3", "h4", "h5", "h6", "li", "option"] +space_tags = ["th", "td"] +break_tags = ["br"] ignore_tags = ["head", "script", "style"] -pre_tags = ["pre"] +pre_tags = ["pre", "option"] entities = [("<", "<"), (">", ">"), (""", '"'), ("'", "'"), (" ", " "), @@ -85,8 +86,11 @@ def render(node, encoding='latin-1', pre=False): node.is_block = True else: words.append(child.text) - if child.name in space_tags and not (words and words[-1] and words[-1][-1] in [" ", "\t", "\n"]): - words.append(" ") + if child.text[-1] not in [" ", "\t", "\n"]: + if child.name in space_tags: + words.append(" ") + if child.name in break_tags: + words.append("\n") else: raise ValueError("Unexpected node type: '%s'" % child) if words: diff --git a/test/check_url b/test/check_url new file mode 100755 index 000000000..f3f7b8333 --- /dev/null +++ b/test/check_url @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +import sys +import os + +# Warning: The following code assumes that this file is located in the svn +# checkout directory, and hasn't been moved: +ietfpath = os.path.abspath(__file__.rsplit("/", 1)[0] + "/..") +sys.path.append(ietfpath) + +os.environ["DJANGO_SETTINGS_MODULE"] = "ietf.settings" + +from ietf.utils.soup2text import soup2text as html2text +from difflib import unified_diff +import urllib2 as urllib +from ietf.tests import read_testurls + +django_server = os.environ.get("DJANGO_SERVER", "http://merlot.tools.ietf.org:31415") +django_server.rstrip("/") + +testtuples = [] +for root, dirs, files in os.walk(ietfpath): + if "testurl.list" in files: + testtuples += read_testurls(root+"/testurl.list") + if "testurls.list" in files: + testtuples += read_testurls(root+"/testurls.list") +testurls = dict([ (tuple[1], tuple) for tuple in testtuples ]) + +def fetch(url): + file = urllib.urlopen(url) + html = file.read() + file.close() + return html + +for url in sys.argv[1:]: + tuple = testurls[url] + if len(tuple) > 2: + url1 = tuple[2] + url2 = django_server + tuple[1] + print "Fetching %s ..." % url1 + text1 = html2text(fetch(url1)).split("\n") + print "Fetching %s ..." % url2 + text2 = html2text(fetch(url2)).split("\n") + print "\n".join(unified_diff(text1, text2, url1, url2, "", "", 3, lineterm="")) + + + \ No newline at end of file