From 0589d0b313db09d2a3ab4a3602b469047fde1451 Mon Sep 17 00:00:00 2001 From: Henrik Levkowetz Date: Thu, 4 Jul 2019 15:51:05 +0000 Subject: [PATCH] Changed a bunch of regexes to use r strings; also miscellaneous smaller fixes. - Legacy-Id: 16376 --- ietf/doc/templatetags/ietf_filters.py | 32 +++++++-------- ietf/doc/utils.py | 2 +- ietf/doc/views_doc.py | 8 ++-- ietf/person/name.py | 2 +- ietf/redirects/views.py | 6 +-- ietf/sync/rfceditor.py | 4 +- ietf/utils/draft.py | 58 +++++++++++++-------------- ietf/utils/draft_search.py | 6 +-- ietf/utils/markup_txt.py | 6 +-- ietf/utils/pdf.py | 3 +- 10 files changed, 63 insertions(+), 64 deletions(-) diff --git a/ietf/doc/templatetags/ietf_filters.py b/ietf/doc/templatetags/ietf_filters.py index b4d9113f8..90851df25 100644 --- a/ietf/doc/templatetags/ietf_filters.py +++ b/ietf/doc/templatetags/ietf_filters.py @@ -4,8 +4,6 @@ import bleach import datetime import re -import types - from email.utils import parseaddr from django import template @@ -47,8 +45,8 @@ def parse_email_list(value): Splitting a string of email addresses should return a list: - >>> unicode(parse_email_list('joe@example.org, fred@example.com')) - u'joe@example.org, fred@example.com' + >>> parse_email_list('joe@example.org, fred@example.com') + 'joe@example.org, fred@example.com' Parsing a non-string should return the input value, rather than fail: @@ -88,7 +86,7 @@ def strip_email(value): @register.filter(name='fix_angle_quotes') def fix_angle_quotes(value): if "<" in value: - value = re.sub("<([\w\-\.]+@[\w\-\.]+)>", "<\1>", value) + value = re.sub(r"<([\w\-\.]+@[\w\-\.]+)>", "<\1>", value) return value # there's an "ahref -> a href" in GEN_UTIL @@ -213,13 +211,13 @@ def urlize_ietf_docs(string, autoescape=None): """ if autoescape and not isinstance(string, SafeData): string = escape(string) - string = re.sub("(?)(RFC ?)0{0,3}(\d+)", "\\1\\2", string) - string = re.sub("(?)(BCP ?)0{0,3}(\d+)", "\\1\\2", string) - string = re.sub("(?)(STD ?)0{0,3}(\d+)", "\\1\\2", string) - string = re.sub("(?)(FYI ?)0{0,3}(\d+)", "\\1\\2", string) - string = re.sub("(?)(draft-[-0-9a-zA-Z._+]+)", "\\1", string) - string = re.sub("(?)(conflict-review-[-0-9a-zA-Z._+]+)", "\\1", string) - string = re.sub("(?)(status-change-[-0-9a-zA-Z._+]+)", "\\1", string) + string = re.sub(r"(?)(RFC ?)0{0,3}(\d+)", "\\1\\2", string) + string = re.sub(r"(?)(BCP ?)0{0,3}(\d+)", "\\1\\2", string) + string = re.sub(r"(?)(STD ?)0{0,3}(\d+)", "\\1\\2", string) + string = re.sub(r"(?)(FYI ?)0{0,3}(\d+)", "\\1\\2", string) + string = re.sub(r"(?)(draft-[-0-9a-zA-Z._+]+)", "\\1", string) + string = re.sub(r"(?)(conflict-review-[-0-9a-zA-Z._+]+)", "\\1", string) + string = re.sub(r"(?)(status-change-[-0-9a-zA-Z._+]+)", "\\1", string) return mark_safe(string) urlize_ietf_docs = stringfilter(urlize_ietf_docs) @@ -461,8 +459,8 @@ def capfirst_allcaps(text): """Like capfirst, except it doesn't lowercase words in ALL CAPS.""" result = text i = False - for token in re.split("(\W+)", striptags(text)): - if not re.match("^[A-Z]+$", token): + for token in re.split(r"(\W+)", striptags(text)): + if not re.match(r"^[A-Z]+$", token): if not i: result = result.replace(token, token.capitalize()) i = True @@ -474,8 +472,8 @@ def capfirst_allcaps(text): def lower_allcaps(text): """Like lower, except it doesn't lowercase words in ALL CAPS.""" result = text - for token in re.split("(\W+)", striptags(text)): - if not re.match("^[A-Z]+$", token): + for token in re.split(r"(\W+)", striptags(text)): + if not re.match(r"^[A-Z]+$", token): result = result.replace(token, token.lower()) return result @@ -515,7 +513,7 @@ def zaptmp(s): @register.filter() def rfcbis(s): - m = re.search('^.*-rfc(\d+)-?bis(-.*)?$', s) + m = re.search(r'^.*-rfc(\d+)-?bis(-.*)?$', s) return None if m is None else 'rfc' + m.group(1) @register.filter diff --git a/ietf/doc/utils.py b/ietf/doc/utils.py index 4b935b454..601efc0d8 100644 --- a/ietf/doc/utils.py +++ b/ietf/doc/utils.py @@ -829,7 +829,7 @@ def build_doc_meta_block(doc, path): line = re.sub(r'Errata exist', r'Errata exist'%(errata_url, ), line) if is_hst or not rfcnum: # make current draft rev bold - line = re.sub(r'>(%s)<'%rev, '>\g<1><', line) + line = re.sub(r'>(%s)<'%rev, r'>\g<1><', line) line = re.sub(r'IPR declarations', r'IPR declarations'%(ipr_url, ), line) line = line.replace(r'[txt]', r'[txt]' % doc.href()) lines[i] = line diff --git a/ietf/doc/views_doc.py b/ietf/doc/views_doc.py index 73df69018..f4b4cff20 100644 --- a/ietf/doc/views_doc.py +++ b/ietf/doc/views_doc.py @@ -623,7 +623,7 @@ def document_main(request, name, rev=None): def document_html(request, name, rev=None): if name.startswith('rfc0'): name = "rfc" + name[3:].lstrip('0') - if name.startswith('review-') and re.search('-\d\d\d\d-\d\d$', name): + if name.startswith('review-') and re.search(r'-\d\d\d\d-\d\d$', name): name = "%s-%s" % (name, rev) if rev and not name.startswith('charter-') and re.search('[0-9]{1,2}-[0-9]{2}', rev): name = "%s-%s" % (name, rev[:-3]) @@ -658,7 +658,7 @@ def document_html(request, name, rev=None): return render(request, "doc/document_html.html", {"doc":doc, "top":top, "navbar_mode":"navbar-static-top", }) def check_doc_email_aliases(): - pattern = re.compile('^expand-(.*?)(\..*?)?@.*? +(.*)$') + pattern = re.compile(r'^expand-(.*?)(\..*?)?@.*? +(.*)$') good_count = 0 tot_count = 0 with open(settings.DRAFT_VIRTUAL_PATH,"r") as virtual_file: @@ -673,9 +673,9 @@ def check_doc_email_aliases(): def get_doc_email_aliases(name): if name: - pattern = re.compile('^expand-(%s)(\..*?)?@.*? +(.*)$'%name) + pattern = re.compile(r'^expand-(%s)(\..*?)?@.*? +(.*)$'%name) else: - pattern = re.compile('^expand-(.*?)(\..*?)?@.*? +(.*)$') + pattern = re.compile(r'^expand-(.*?)(\..*?)?@.*? +(.*)$') aliases = [] with open(settings.DRAFT_VIRTUAL_PATH,"r") as virtual_file: for line in virtual_file.readlines(): diff --git a/ietf/person/name.py b/ietf/person/name.py index e01f0c343..74bb5cd64 100644 --- a/ietf/person/name.py +++ b/ietf/person/name.py @@ -67,7 +67,7 @@ def initials(name): given += " "+middle # Don't use non-word characters as initials. # Example: The Bulgarian transcribed name "'Rnest Balkanska" should not have an initial of "'". - given = re.sub('[^ .\w]', '', given) + given = re.sub(r'[^ .\w]', '', given) initials = " ".join([ n[0].upper()+'.' for n in given.split() ]) return initials diff --git a/ietf/redirects/views.py b/ietf/redirects/views.py index db557a6f1..bd98a641c 100644 --- a/ietf/redirects/views.py +++ b/ietf/redirects/views.py @@ -30,7 +30,7 @@ def redirect(request, path="", script=""): continue if fc[0] in rparam: remove_args.append(fc[0]) - num = re.match('(\d+)', rparam[fc[0]]) + num = re.match(r'(\d+)', rparam[fc[0]]) if (num and int(num.group(1))) or (num is None): cmd = flag break @@ -64,8 +64,8 @@ def redirect(request, path="", script=""): # contains non-ASCII characters. The old scripts didn't support # non-ASCII characters anyway, so there's no need to handle # them fully correctly in these redirects. - url += str(rest % rparam) - url += "/" + (rest % rparam).encode('ascii') + url += (rest % rparam) + "/" except: # rest had something in it that request didn't have, so just # redirect to the root of the tool. diff --git a/ietf/sync/rfceditor.py b/ietf/sync/rfceditor.py index 17fa929a1..c4a60ceac 100644 --- a/ietf/sync/rfceditor.py +++ b/ietf/sync/rfceditor.py @@ -56,7 +56,7 @@ def parse_queue(response): events.expandNode(node) node.normalize() draft_name = get_child_text(node, "draft").strip() - draft_name = re.sub("(-\d\d)?(.txt){1,2}$", "", draft_name) + draft_name = re.sub(r"(-\d\d)?(.txt){1,2}$", "", draft_name) date_received = get_child_text(node, "date-received") state = "" @@ -306,7 +306,7 @@ def parse_index(response): abstract = get_child_text(abstract, "p") draft = get_child_text(node, "draft") - if draft and re.search("-\d\d$", draft): + if draft and re.search(r"-\d\d$", draft): draft = draft[0:-3] if len(node.getElementsByTagName("errata-url")) > 0: diff --git a/ietf/utils/draft.py b/ietf/utils/draft.py index e8781052e..be7f9aae8 100755 --- a/ietf/utils/draft.py +++ b/ietf/utils/draft.py @@ -191,7 +191,7 @@ class Draft(): name, __ = base.split(".", 1) else: name = base - revmatch = re.search("\d\d$", name) + revmatch = re.search(r"\d\d$", name) if revmatch: filename = name[:-3] revision = name[-2:] @@ -243,36 +243,36 @@ class Draft(): for line in self.rawlines: linecount += 1 line = line.rstrip() - if re.search("\[?page [0-9ivx]+\]?[ \t\f]*$", line, re.I): + if re.search(r"\[?page [0-9ivx]+\]?[ \t\f]*$", line, re.I): pages, page, newpage = endpage(pages, page, newpage, line) continue - if re.search("\f", line, re.I): + if re.search(r"\f", line, re.I): pages, page, newpage = begpage(pages, page, newpage) continue - if re.search("^ *Internet.Draft.+ .+[12][0-9][0-9][0-9] *$", line, re.I): + if re.search(r"^ *Internet.Draft.+ .+[12][0-9][0-9][0-9] *$", line, re.I): pages, page, newpage = begpage(pages, page, newpage, line) continue # if re.search("^ *Internet.Draft +", line, re.I): # newpage = True # continue - if re.search("^ *Draft.+[12][0-9][0-9][0-9] *$", line, re.I): + if re.search(r"^ *Draft.+[12][0-9][0-9][0-9] *$", line, re.I): pages, page, newpage = begpage(pages, page, newpage, line) continue - if re.search("^RFC[ -]?[0-9]+.*( +)[12][0-9][0-9][0-9]$", line, re.I): + if re.search(r"^RFC[ -]?[0-9]+.*( +)[12][0-9][0-9][0-9]$", line, re.I): pages, page, newpage = begpage(pages, page, newpage, line) continue - if re.search("^draft-[-a-z0-9_.]+.*[0-9][0-9][0-9][0-9]$", line, re.I): + if re.search(r"^draft-[-a-z0-9_.]+.*[0-9][0-9][0-9][0-9]$", line, re.I): pages, page, newpage = endpage(pages, page, newpage, line) continue - if linecount > 15 and re.search(".{58,}(Jan|Feb|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|Sep|Oct|Nov|Dec) (19[89][0-9]|20[0-9][0-9]) *$", line, re.I): + if linecount > 15 and re.search(r".{58,}(Jan|Feb|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|Sep|Oct|Nov|Dec) (19[89][0-9]|20[0-9][0-9]) *$", line, re.I): pages, page, newpage = begpage(pages, page, newpage, line) continue - if newpage and re.search("^ *draft-[-a-z0-9_.]+ *$", line, re.I): + if newpage and re.search(r"^ *draft-[-a-z0-9_.]+ *$", line, re.I): pages, page, newpage = begpage(pages, page, newpage, line) continue - if re.search("^[^ \t]+", line): + if re.search(r"^[^ \t]+", line): sentence = True - if re.search("[^ \t]", line): + if re.search(r"[^ \t]", line): if newpage: # 36 is a somewhat arbitrary count for a 'short' line shortthis = len(line.strip()) < 36 # 36 is a somewhat arbitrary count for a 'short' line @@ -300,7 +300,7 @@ class Draft(): # ---------------------------------------------------------------------- def get_pagecount(self): if self._pagecount == None: - label_pages = len(re.findall("\[page [0-9ixldv]+\]", self.text, re.I)) + label_pages = len(re.findall(r"\[page [0-9ixldv]+\]", self.text, re.I)) count_pages = len(self.pages) if label_pages > count_pages/2: self._pagecount = label_pages @@ -343,7 +343,7 @@ class Draft(): def get_status(self): if self._status == None: for line in self.lines[:10]: - status_match = re.search("^\s*Intended [Ss]tatus:\s*(.*?) ", line) + status_match = re.search(r"^\s*Intended [Ss]tatus:\s*(.*?) ", line) if status_match: self._status = status_match.group(1) break @@ -416,8 +416,8 @@ class Draft(): def get_abstract(self): if self._abstract: return self._abstract - abstract_re = re.compile('^(\s*)abstract', re.I) - header_re = re.compile("^(\s*)([0-9]+\.? |Appendix|Status of|Table of|Full Copyright|Copyright|Intellectual Property|Acknowled|Author|Index|Disclaimer).*", re.I) + abstract_re = re.compile(r'^(\s*)abstract', re.I) + header_re = re.compile(r"^(\s*)([0-9]+\.? |Appendix|Status of|Table of|Full Copyright|Copyright|Intellectual Property|Acknowled|Author|Index|Disclaimer).*", re.I) begin = False abstract = [] abstract_indent = 0 @@ -446,7 +446,7 @@ class Draft(): def _check_abstract_indent(self, abstract, indent): - indentation_re = re.compile('^(\s)*') + indentation_re = re.compile(r'^(\s)*') indent_lines = [] for line in abstract.split('\n'): if line: @@ -807,7 +807,7 @@ class Draft(): _debug( "Cut: '%s'" % form[beg:end]) author_match = re.search(authpat, columns[col].strip()).group(1) _debug( "AuthMatch: '%s'" % (author_match,)) - if re.search('\(.*\)$', author_match.strip()): + if re.search(r'\(.*\)$', author_match.strip()): author_match = author_match.rsplit('(',1)[0].strip() if author_match in companies_seen: companies[i] = authors[i] @@ -887,7 +887,7 @@ class Draft(): # for a in authors: # if a and a not in companies_seen: # _debug("Search for: %s"%(r"(^|\W)"+re.sub("\.? ", ".* ", a)+"(\W|$)")) - authmatch = [ a for a in authors[i+1:] if a and not a.lower() in companies_seen and (re.search((r"(?i)(^|\W)"+re.sub("[. ]+", ".*", a)+"(\W|$)"), line.strip()) or acronym_match(a, line.strip()) )] + authmatch = [ a for a in authors[i+1:] if a and not a.lower() in companies_seen and (re.search((r"(?i)(^|\W)"+re.sub(r"[. ]+", ".*", a)+r"(\W|$)"), line.strip()) or acronym_match(a, line.strip()) )] if authmatch: _debug(" ? Other author or company ? : %s" % authmatch) @@ -915,9 +915,9 @@ class Draft(): column = l.replace('\t', 8 * ' ')[max(0, beg - 1):end].strip() except: column = l - column = re.sub(" *(?:\(at\)| | at ) *", "@", column) - column = re.sub(" *(?:\(dot\)| | dot ) *", ".", column) - column = re.sub("&cisco.com", "@cisco.com", column) + column = re.sub(r" *(?:\(at\)| | at ) *", "@", column) + column = re.sub(r" *(?:\(dot\)| | dot ) *", ".", column) + column = re.sub(r"&cisco.com", "@cisco.com", column) column = column.replace("\xa0", " ") return column @@ -1003,13 +1003,13 @@ class Draft(): def get_title(self): if self._title: return self._title - match = re.search('(?:\n\s*\n\s*)((.+\n){0,2}(.+\n*))(\s+\g<1>\n\g<2>\n""", content) - content = re.sub("\n(.+\[Page \d+\])\n\s*$", """\n\g<1>\n""", content) + content = re.sub(r"\n(.+\[Page \d+\])\n\f\n(.+)\n", r"""\n\g<1>\n\g<2>\n""", content) + content = re.sub(r"\n(.+\[Page \d+\])\n\s*$", r"""\n\g<1>\n""", content) # remove remaining FFs (to be valid XHTML) content = content.replace("\f","\n") - content = re.sub("\n\n([0-9]+\\.|[A-Z]\\.[0-9]|Appendix|Status of|Abstract|Table of|Full Copyright|Copyright|Intellectual Property|Acknowled|Author|Index)(.*)(?=\n\n)", """\n\n\g<1>\g<2>""", content) + content = re.sub(r"\n\n([0-9]+\\.|[A-Z]\\.[0-9]|Appendix|Status of|Abstract|Table of|Full Copyright|Copyright|Intellectual Property|Acknowled|Author|Index)(.*)(?=\n\n)", r"""\n\n\g<1>\g<2>""", content) return "
" + content + "
\n" diff --git a/ietf/utils/pdf.py b/ietf/utils/pdf.py index c863d4db1..5b5ca2371 100644 --- a/ietf/utils/pdf.py +++ b/ietf/utils/pdf.py @@ -1,3 +1,4 @@ +# Copyright The IETF Trust 2015-2019, All Rights Reserved import re def pdf_pages(filename): @@ -7,7 +8,7 @@ def pdf_pages(filename): except IOError: return 0 for line in infile: - m = re.match('\] /Count ([0-9]+)',line) + m = re.match(r'\] /Count ([0-9]+)',line) if m: return int(m.group(1)) return 0