review decisions. Add migration to split up iesg_approve/disapprove events of the form "IESG has approved ... and state has been changed ..." into the approve/disapprove event and a synthesized state change event. Also regularize the descriptions a bit. This simplifies the code in the new page. - Legacy-Id: 6340
102 lines
2.8 KiB
Python
Executable file
102 lines
2.8 KiB
Python
Executable file
#!/usr/bin/env python
|
|
|
|
import os, sys, re, datetime, optparse, traceback
|
|
import syslog
|
|
|
|
# boilerplate
|
|
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
|
|
sys.path = [ basedir ] + sys.path
|
|
|
|
from ietf import settings
|
|
from django.core import management
|
|
management.setup_environ(settings)
|
|
|
|
import django.test
|
|
from django.conf import settings
|
|
|
|
# prevent memory from leaking when settings.DEBUG=True
|
|
from django.db import connection
|
|
class DontSaveQueries(object):
|
|
def append(self, x):
|
|
pass
|
|
connection.queries = DontSaveQueries()
|
|
|
|
MAX_URL_LENGTH = 500
|
|
SLOW_THRESHOLD = 1.0
|
|
|
|
initial = ["/doc/all/", "/doc/in-last-call/", "/iesg/decisions/"]
|
|
|
|
visited = set()
|
|
urls = {} # url -> referrer
|
|
|
|
|
|
def strip_url(url):
|
|
if url.startswith("http://testserver"):
|
|
url = url[len("http://testserver"):]
|
|
return url
|
|
|
|
def extract_html_urls(content):
|
|
for m in re.finditer(r'<a.*href="([^"]+)">', content):
|
|
url = strip_url(m.group(1))
|
|
if len(url) > MAX_URL_LENGTH:
|
|
continue # avoid infinite GET parameter appendages
|
|
|
|
if not url.startswith("/"):
|
|
continue
|
|
|
|
yield url
|
|
|
|
client = django.test.Client()
|
|
|
|
for url in initial:
|
|
urls[url] = "[initial]"
|
|
|
|
while urls:
|
|
url, referrer = urls.popitem()
|
|
|
|
visited.add(url)
|
|
|
|
try:
|
|
timestamp = datetime.datetime.now()
|
|
r = client.get(url)
|
|
elapsed = datetime.datetime.now() - timestamp
|
|
except KeyboardInterrupt:
|
|
print "was fetching", url
|
|
sys.exit(1)
|
|
except:
|
|
print 500, "%.3fs" % (datetime.datetime.now() - timestamp).total_seconds(), url, "FAIL (from %s)" % referrer
|
|
print "============="
|
|
print traceback.format_exc()
|
|
print "============="
|
|
else:
|
|
tags = []
|
|
|
|
if r.status_code in (301, 302):
|
|
u = strip_url(r["Location"])
|
|
if u not in visited and u not in urls:
|
|
urls[u] = referrer # referrer is original referrer, not redirected url
|
|
|
|
elif r.status_code == 200:
|
|
ctype = r["Content-Type"]
|
|
if ";" in ctype:
|
|
ctype = ctype[:ctype.index(";")]
|
|
|
|
if ctype == "text/html":
|
|
try:
|
|
for u in extract_html_urls(r.content):
|
|
if u not in visited and u not in urls:
|
|
urls[u] = url
|
|
except:
|
|
print "error extracting HTML urls from", url
|
|
print "============="
|
|
print traceback.format_exc()
|
|
print "============="
|
|
else:
|
|
tags.append(u"FAIL (from %s)" % referrer)
|
|
|
|
if elapsed.total_seconds() > SLOW_THRESHOLD:
|
|
tags.append("SLOW")
|
|
|
|
print r.status_code, "%.3fs" % elapsed.total_seconds(), url, " ".join(tags)
|
|
|