Changed a bunch of regexes to use r strings; also miscellaneous smaller fixes.

- Legacy-Id: 16376
This commit is contained in:
Henrik Levkowetz 2019-07-04 15:51:05 +00:00
parent 1225f8af6b
commit 0589d0b313
10 changed files with 63 additions and 64 deletions

View file

@ -4,8 +4,6 @@ import bleach
import datetime
import re
import types
from email.utils import parseaddr
from django import template
@ -47,8 +45,8 @@ def parse_email_list(value):
Splitting a string of email addresses should return a list:
>>> unicode(parse_email_list('joe@example.org, fred@example.com'))
u'<a href="mailto:joe@example.org">joe@example.org</a>, <a href="mailto:fred@example.com">fred@example.com</a>'
>>> parse_email_list('joe@example.org, fred@example.com')
'<a href="mailto:joe@example.org">joe@example.org</a>, <a href="mailto:fred@example.com">fred@example.com</a>'
Parsing a non-string should return the input value, rather than fail:
@ -88,7 +86,7 @@ def strip_email(value):
@register.filter(name='fix_angle_quotes')
def fix_angle_quotes(value):
if "<" in value:
value = re.sub("<([\w\-\.]+@[\w\-\.]+)>", "&lt;\1&gt;", value)
value = re.sub(r"<([\w\-\.]+@[\w\-\.]+)>", "&lt;\1&gt;", value)
return value
# there's an "ahref -> a href" in GEN_UTIL
@ -213,13 +211,13 @@ def urlize_ietf_docs(string, autoescape=None):
"""
if autoescape and not isinstance(string, SafeData):
string = escape(string)
string = re.sub("(?<!>)(RFC ?)0{0,3}(\d+)", "<a href=\"/doc/rfc\\2/\">\\1\\2</a>", string)
string = re.sub("(?<!>)(BCP ?)0{0,3}(\d+)", "<a href=\"/doc/bcp\\2/\">\\1\\2</a>", string)
string = re.sub("(?<!>)(STD ?)0{0,3}(\d+)", "<a href=\"/doc/std\\2/\">\\1\\2</a>", string)
string = re.sub("(?<!>)(FYI ?)0{0,3}(\d+)", "<a href=\"/doc/fyi\\2/\">\\1\\2</a>", string)
string = re.sub("(?<!>)(draft-[-0-9a-zA-Z._+]+)", "<a href=\"/doc/\\1/\">\\1</a>", string)
string = re.sub("(?<!>)(conflict-review-[-0-9a-zA-Z._+]+)", "<a href=\"/doc/\\1/\">\\1</a>", string)
string = re.sub("(?<!>)(status-change-[-0-9a-zA-Z._+]+)", "<a href=\"/doc/\\1/\">\\1</a>", string)
string = re.sub(r"(?<!>)(RFC ?)0{0,3}(\d+)", "<a href=\"/doc/rfc\\2/\">\\1\\2</a>", string)
string = re.sub(r"(?<!>)(BCP ?)0{0,3}(\d+)", "<a href=\"/doc/bcp\\2/\">\\1\\2</a>", string)
string = re.sub(r"(?<!>)(STD ?)0{0,3}(\d+)", "<a href=\"/doc/std\\2/\">\\1\\2</a>", string)
string = re.sub(r"(?<!>)(FYI ?)0{0,3}(\d+)", "<a href=\"/doc/fyi\\2/\">\\1\\2</a>", string)
string = re.sub(r"(?<!>)(draft-[-0-9a-zA-Z._+]+)", "<a href=\"/doc/\\1/\">\\1</a>", string)
string = re.sub(r"(?<!>)(conflict-review-[-0-9a-zA-Z._+]+)", "<a href=\"/doc/\\1/\">\\1</a>", string)
string = re.sub(r"(?<!>)(status-change-[-0-9a-zA-Z._+]+)", "<a href=\"/doc/\\1/\">\\1</a>", string)
return mark_safe(string)
urlize_ietf_docs = stringfilter(urlize_ietf_docs)
@ -461,8 +459,8 @@ def capfirst_allcaps(text):
"""Like capfirst, except it doesn't lowercase words in ALL CAPS."""
result = text
i = False
for token in re.split("(\W+)", striptags(text)):
if not re.match("^[A-Z]+$", token):
for token in re.split(r"(\W+)", striptags(text)):
if not re.match(r"^[A-Z]+$", token):
if not i:
result = result.replace(token, token.capitalize())
i = True
@ -474,8 +472,8 @@ def capfirst_allcaps(text):
def lower_allcaps(text):
"""Like lower, except it doesn't lowercase words in ALL CAPS."""
result = text
for token in re.split("(\W+)", striptags(text)):
if not re.match("^[A-Z]+$", token):
for token in re.split(r"(\W+)", striptags(text)):
if not re.match(r"^[A-Z]+$", token):
result = result.replace(token, token.lower())
return result
@ -515,7 +513,7 @@ def zaptmp(s):
@register.filter()
def rfcbis(s):
m = re.search('^.*-rfc(\d+)-?bis(-.*)?$', s)
m = re.search(r'^.*-rfc(\d+)-?bis(-.*)?$', s)
return None if m is None else 'rfc' + m.group(1)
@register.filter

View file

@ -829,7 +829,7 @@ def build_doc_meta_block(doc, path):
line = re.sub(r'Errata exist', r'<a class="text-warning" href="%s">Errata exist</a>'%(errata_url, ), line)
if is_hst or not rfcnum:
# make current draft rev bold
line = re.sub(r'>(%s)<'%rev, '><b>\g<1></b><', line)
line = re.sub(r'>(%s)<'%rev, r'><b>\g<1></b><', line)
line = re.sub(r'IPR declarations', r'<a class="text-warning" href="%s">IPR declarations</a>'%(ipr_url, ), line)
line = line.replace(r'[txt]', r'[<a href="%s">txt</a>]' % doc.href())
lines[i] = line

View file

@ -623,7 +623,7 @@ def document_main(request, name, rev=None):
def document_html(request, name, rev=None):
if name.startswith('rfc0'):
name = "rfc" + name[3:].lstrip('0')
if name.startswith('review-') and re.search('-\d\d\d\d-\d\d$', name):
if name.startswith('review-') and re.search(r'-\d\d\d\d-\d\d$', name):
name = "%s-%s" % (name, rev)
if rev and not name.startswith('charter-') and re.search('[0-9]{1,2}-[0-9]{2}', rev):
name = "%s-%s" % (name, rev[:-3])
@ -658,7 +658,7 @@ def document_html(request, name, rev=None):
return render(request, "doc/document_html.html", {"doc":doc, "top":top, "navbar_mode":"navbar-static-top", })
def check_doc_email_aliases():
pattern = re.compile('^expand-(.*?)(\..*?)?@.*? +(.*)$')
pattern = re.compile(r'^expand-(.*?)(\..*?)?@.*? +(.*)$')
good_count = 0
tot_count = 0
with open(settings.DRAFT_VIRTUAL_PATH,"r") as virtual_file:
@ -673,9 +673,9 @@ def check_doc_email_aliases():
def get_doc_email_aliases(name):
if name:
pattern = re.compile('^expand-(%s)(\..*?)?@.*? +(.*)$'%name)
pattern = re.compile(r'^expand-(%s)(\..*?)?@.*? +(.*)$'%name)
else:
pattern = re.compile('^expand-(.*?)(\..*?)?@.*? +(.*)$')
pattern = re.compile(r'^expand-(.*?)(\..*?)?@.*? +(.*)$')
aliases = []
with open(settings.DRAFT_VIRTUAL_PATH,"r") as virtual_file:
for line in virtual_file.readlines():

View file

@ -67,7 +67,7 @@ def initials(name):
given += " "+middle
# Don't use non-word characters as initials.
# Example: The Bulgarian transcribed name "'Rnest Balkanska" should not have an initial of "'".
given = re.sub('[^ .\w]', '', given)
given = re.sub(r'[^ .\w]', '', given)
initials = " ".join([ n[0].upper()+'.' for n in given.split() ])
return initials

View file

@ -30,7 +30,7 @@ def redirect(request, path="", script=""):
continue
if fc[0] in rparam:
remove_args.append(fc[0])
num = re.match('(\d+)', rparam[fc[0]])
num = re.match(r'(\d+)', rparam[fc[0]])
if (num and int(num.group(1))) or (num is None):
cmd = flag
break
@ -64,8 +64,8 @@ def redirect(request, path="", script=""):
# contains non-ASCII characters. The old scripts didn't support
# non-ASCII characters anyway, so there's no need to handle
# them fully correctly in these redirects.
url += str(rest % rparam)
url += "/"
(rest % rparam).encode('ascii')
url += (rest % rparam) + "/"
except:
# rest had something in it that request didn't have, so just
# redirect to the root of the tool.

View file

@ -56,7 +56,7 @@ def parse_queue(response):
events.expandNode(node)
node.normalize()
draft_name = get_child_text(node, "draft").strip()
draft_name = re.sub("(-\d\d)?(.txt){1,2}$", "", draft_name)
draft_name = re.sub(r"(-\d\d)?(.txt){1,2}$", "", draft_name)
date_received = get_child_text(node, "date-received")
state = ""
@ -306,7 +306,7 @@ def parse_index(response):
abstract = get_child_text(abstract, "p")
draft = get_child_text(node, "draft")
if draft and re.search("-\d\d$", draft):
if draft and re.search(r"-\d\d$", draft):
draft = draft[0:-3]
if len(node.getElementsByTagName("errata-url")) > 0:

View file

@ -191,7 +191,7 @@ class Draft():
name, __ = base.split(".", 1)
else:
name = base
revmatch = re.search("\d\d$", name)
revmatch = re.search(r"\d\d$", name)
if revmatch:
filename = name[:-3]
revision = name[-2:]
@ -243,36 +243,36 @@ class Draft():
for line in self.rawlines:
linecount += 1
line = line.rstrip()
if re.search("\[?page [0-9ivx]+\]?[ \t\f]*$", line, re.I):
if re.search(r"\[?page [0-9ivx]+\]?[ \t\f]*$", line, re.I):
pages, page, newpage = endpage(pages, page, newpage, line)
continue
if re.search("\f", line, re.I):
if re.search(r"\f", line, re.I):
pages, page, newpage = begpage(pages, page, newpage)
continue
if re.search("^ *Internet.Draft.+ .+[12][0-9][0-9][0-9] *$", line, re.I):
if re.search(r"^ *Internet.Draft.+ .+[12][0-9][0-9][0-9] *$", line, re.I):
pages, page, newpage = begpage(pages, page, newpage, line)
continue
# if re.search("^ *Internet.Draft +", line, re.I):
# newpage = True
# continue
if re.search("^ *Draft.+[12][0-9][0-9][0-9] *$", line, re.I):
if re.search(r"^ *Draft.+[12][0-9][0-9][0-9] *$", line, re.I):
pages, page, newpage = begpage(pages, page, newpage, line)
continue
if re.search("^RFC[ -]?[0-9]+.*( +)[12][0-9][0-9][0-9]$", line, re.I):
if re.search(r"^RFC[ -]?[0-9]+.*( +)[12][0-9][0-9][0-9]$", line, re.I):
pages, page, newpage = begpage(pages, page, newpage, line)
continue
if re.search("^draft-[-a-z0-9_.]+.*[0-9][0-9][0-9][0-9]$", line, re.I):
if re.search(r"^draft-[-a-z0-9_.]+.*[0-9][0-9][0-9][0-9]$", line, re.I):
pages, page, newpage = endpage(pages, page, newpage, line)
continue
if linecount > 15 and re.search(".{58,}(Jan|Feb|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|Sep|Oct|Nov|Dec) (19[89][0-9]|20[0-9][0-9]) *$", line, re.I):
if linecount > 15 and re.search(r".{58,}(Jan|Feb|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|Sep|Oct|Nov|Dec) (19[89][0-9]|20[0-9][0-9]) *$", line, re.I):
pages, page, newpage = begpage(pages, page, newpage, line)
continue
if newpage and re.search("^ *draft-[-a-z0-9_.]+ *$", line, re.I):
if newpage and re.search(r"^ *draft-[-a-z0-9_.]+ *$", line, re.I):
pages, page, newpage = begpage(pages, page, newpage, line)
continue
if re.search("^[^ \t]+", line):
if re.search(r"^[^ \t]+", line):
sentence = True
if re.search("[^ \t]", line):
if re.search(r"[^ \t]", line):
if newpage:
# 36 is a somewhat arbitrary count for a 'short' line
shortthis = len(line.strip()) < 36 # 36 is a somewhat arbitrary count for a 'short' line
@ -300,7 +300,7 @@ class Draft():
# ----------------------------------------------------------------------
def get_pagecount(self):
if self._pagecount == None:
label_pages = len(re.findall("\[page [0-9ixldv]+\]", self.text, re.I))
label_pages = len(re.findall(r"\[page [0-9ixldv]+\]", self.text, re.I))
count_pages = len(self.pages)
if label_pages > count_pages/2:
self._pagecount = label_pages
@ -343,7 +343,7 @@ class Draft():
def get_status(self):
if self._status == None:
for line in self.lines[:10]:
status_match = re.search("^\s*Intended [Ss]tatus:\s*(.*?) ", line)
status_match = re.search(r"^\s*Intended [Ss]tatus:\s*(.*?) ", line)
if status_match:
self._status = status_match.group(1)
break
@ -416,8 +416,8 @@ class Draft():
def get_abstract(self):
if self._abstract:
return self._abstract
abstract_re = re.compile('^(\s*)abstract', re.I)
header_re = re.compile("^(\s*)([0-9]+\.? |Appendix|Status of|Table of|Full Copyright|Copyright|Intellectual Property|Acknowled|Author|Index|Disclaimer).*", re.I)
abstract_re = re.compile(r'^(\s*)abstract', re.I)
header_re = re.compile(r"^(\s*)([0-9]+\.? |Appendix|Status of|Table of|Full Copyright|Copyright|Intellectual Property|Acknowled|Author|Index|Disclaimer).*", re.I)
begin = False
abstract = []
abstract_indent = 0
@ -446,7 +446,7 @@ class Draft():
def _check_abstract_indent(self, abstract, indent):
indentation_re = re.compile('^(\s)*')
indentation_re = re.compile(r'^(\s)*')
indent_lines = []
for line in abstract.split('\n'):
if line:
@ -807,7 +807,7 @@ class Draft():
_debug( "Cut: '%s'" % form[beg:end])
author_match = re.search(authpat, columns[col].strip()).group(1)
_debug( "AuthMatch: '%s'" % (author_match,))
if re.search('\(.*\)$', author_match.strip()):
if re.search(r'\(.*\)$', author_match.strip()):
author_match = author_match.rsplit('(',1)[0].strip()
if author_match in companies_seen:
companies[i] = authors[i]
@ -887,7 +887,7 @@ class Draft():
# for a in authors:
# if a and a not in companies_seen:
# _debug("Search for: %s"%(r"(^|\W)"+re.sub("\.? ", ".* ", a)+"(\W|$)"))
authmatch = [ a for a in authors[i+1:] if a and not a.lower() in companies_seen and (re.search((r"(?i)(^|\W)"+re.sub("[. ]+", ".*", a)+"(\W|$)"), line.strip()) or acronym_match(a, line.strip()) )]
authmatch = [ a for a in authors[i+1:] if a and not a.lower() in companies_seen and (re.search((r"(?i)(^|\W)"+re.sub(r"[. ]+", ".*", a)+r"(\W|$)"), line.strip()) or acronym_match(a, line.strip()) )]
if authmatch:
_debug(" ? Other author or company ? : %s" % authmatch)
@ -915,9 +915,9 @@ class Draft():
column = l.replace('\t', 8 * ' ')[max(0, beg - 1):end].strip()
except:
column = l
column = re.sub(" *(?:\(at\)| <at> | at ) *", "@", column)
column = re.sub(" *(?:\(dot\)| <dot> | dot ) *", ".", column)
column = re.sub("&cisco.com", "@cisco.com", column)
column = re.sub(r" *(?:\(at\)| <at> | at ) *", "@", column)
column = re.sub(r" *(?:\(dot\)| <dot> | dot ) *", ".", column)
column = re.sub(r"&cisco.com", "@cisco.com", column)
column = column.replace("\xa0", " ")
return column
@ -1003,13 +1003,13 @@ class Draft():
def get_title(self):
if self._title:
return self._title
match = re.search('(?:\n\s*\n\s*)((.+\n){0,2}(.+\n*))(\s+<?draft-\S+\s*\n)\s*\n', self.pages[0])
match = re.search(r'(?:\n\s*\n\s*)((.+\n){0,2}(.+\n*))(\s+<?draft-\S+\s*\n)\s*\n', self.pages[0])
if not match:
match = re.search('(?:\n\s*\n\s*)<?draft-\S+\s*\n*((.+\n){1,3})\s*\n', self.pages[0])
match = re.search(r'(?:\n\s*\n\s*)<?draft-\S+\s*\n*((.+\n){1,3})\s*\n', self.pages[0])
if not match:
match = re.search('(?:\n\s*\n\s*)((.+\n){0,2}(.+\n*))(\s*\n){2}', self.pages[0])
match = re.search(r'(?:\n\s*\n\s*)((.+\n){0,2}(.+\n*))(\s*\n){2}', self.pages[0])
if not match:
match = re.search('(?i)(.+\n|.+\n.+\n)(\s*status of this memo\s*\n)', self.pages[0])
match = re.search(r'(?i)(.+\n|.+\n.+\n)(\s*status of this memo\s*\n)', self.pages[0])
if match:
title = match.group(1)
title = title.strip()
@ -1147,10 +1147,10 @@ class Draft():
para += " "
para += line
refs += [ para ]
rfc_match = re.search("(?i)rfc ?\d+", para)
rfc_match = re.search(r"(?i)rfc ?\d+", para)
if rfc_match:
rfcrefs += [ rfc_match.group(0).replace(" ","").lower() ]
draft_match = re.search("draft-[a-z0-9-]+", para)
draft_match = re.search(r"draft-[a-z0-9-]+", para)
if draft_match:
draft = draft_match.group(0).lower()
if not draft in draftrefs:
@ -1185,7 +1185,7 @@ def getmeta(fn):
if not os.path.exists(filename):
fn = filename
while not "-00." in fn:
revmatch = re.search("-(\d\d)\.", fn)
revmatch = re.search(r"-(\d\d)\.", fn)
if revmatch:
rev = revmatch.group(1)
prev = "%02d" % (int(rev)-1)
@ -1312,7 +1312,7 @@ def _main(outfile=sys.stdout):
# Option processing
# ----------------------------------------------------------------------
options = ""
for line in re.findall("\n +(if|elif) +opt in \[(.+)\]:\s+#(.+)\n", open(sys.argv[0]).read()):
for line in re.findall(r"\n +(if|elif) +opt in \[(.+)\]:\s+#(.+)\n", open(sys.argv[0]).read()):
if not options:
options += "OPTIONS\n"
options += " %-16s %s\n" % (line[1].replace('"', ''), line[2])

View file

@ -1,8 +1,8 @@
# Copyright The IETF Trust 2007, All Rights Reserved
# Copyright The IETF Trust 2007-2019, All Rights Reserved
import re
def normalize_draftname(string):
string = string.strip()
string = re.sub("\.txt$","",string)
string = re.sub("-\d\d$","",string)
string = re.sub(r"\.txt$","",string)
string = re.sub(r"-\d\d$","",string)
return string

View file

@ -74,11 +74,11 @@ def markup(content, width=None):
# expand tabs + escape
content = escape(content.expandtabs())
content = re.sub("\n(.+\[Page \d+\])\n\f\n(.+)\n", """\n<span class="m_ftr">\g<1></span>\n<span class="m_hdr">\g<2></span>\n""", content)
content = re.sub("\n(.+\[Page \d+\])\n\s*$", """\n<span class="m_ftr">\g<1></span>\n""", content)
content = re.sub(r"\n(.+\[Page \d+\])\n\f\n(.+)\n", r"""\n<span class="m_ftr">\g<1></span>\n<span class="m_hdr">\g<2></span>\n""", content)
content = re.sub(r"\n(.+\[Page \d+\])\n\s*$", r"""\n<span class="m_ftr">\g<1></span>\n""", content)
# remove remaining FFs (to be valid XHTML)
content = content.replace("\f","\n")
content = re.sub("\n\n([0-9]+\\.|[A-Z]\\.[0-9]|Appendix|Status of|Abstract|Table of|Full Copyright|Copyright|Intellectual Property|Acknowled|Author|Index)(.*)(?=\n\n)", """\n\n<span class="m_h">\g<1>\g<2></span>""", content)
content = re.sub(r"\n\n([0-9]+\\.|[A-Z]\\.[0-9]|Appendix|Status of|Abstract|Table of|Full Copyright|Copyright|Intellectual Property|Acknowled|Author|Index)(.*)(?=\n\n)", r"""\n\n<span class="m_h">\g<1>\g<2></span>""", content)
return "<pre>" + content + "</pre>\n"

View file

@ -1,3 +1,4 @@
# Copyright The IETF Trust 2015-2019, All Rights Reserved
import re
def pdf_pages(filename):
@ -7,7 +8,7 @@ def pdf_pages(filename):
except IOError:
return 0
for line in infile:
m = re.match('\] /Count ([0-9]+)',line)
m = re.match(r'\] /Count ([0-9]+)',line)
if m:
return int(m.group(1))
return 0