From 5dd4ef6742bd2f287dec00d7b7c4a0c957c3db12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Tarrag=C3=B3n?= Date: Tue, 8 Feb 2011 15:25:59 +0000 Subject: [PATCH] make plain parser pep8 compliant. See #585. - Legacy-Id: 2827 --- ietf/submit/parsers/plain_parser.py | 73 ++++++++++++++--------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/ietf/submit/parsers/plain_parser.py b/ietf/submit/parsers/plain_parser.py index 38f6d75da..2ed1481cf 100644 --- a/ietf/submit/parsers/plain_parser.py +++ b/ietf/submit/parsers/plain_parser.py @@ -15,7 +15,7 @@ class PlainParser(FileParser): super(PlainParser, self).__init__(fd) self.lines = fd.file.readlines() fd.file.seek(0) - self.full_text= self.normalize_text(''.join(self.lines)) + self.full_text = self.normalize_text(''.join(self.lines)) def normalize_text(self, text): text = re.sub(".\x08", "", text) # Get rid of inkribbon backspace-emphasis @@ -111,18 +111,18 @@ class PlainParser(FileParser): self.parsed_info.metadraft.title = title def parse_normal_002_num_pages(self): - pagecount = len(re.findall("\[[Pp]age [0-9ixldv]+\]", self.full_text)) or len(self.lines)/58 + pagecount = len(re.findall("\[[Pp]age [0-9ixldv]+\]", self.full_text)) or len(self.lines) / 58 self.parsed_info.metadraft.pagecount = pagecount def parse_normal_003_creation_date(self): - month_names = [ 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec' ] + month_names = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] date_regexes = [ r'\s{3,}(?P\w+)\s+(?P\d{1,2}),?\s+(?P\d{4})', r'\s{3,}(?P\d{1,2}),?\s+(?P\w+)\s+(?P\d{4})', r'\s{3,}(?P\d{1,2})-(?P\w+)-(?P\d{4})', # 'October 2008' - default day to today's. r'\s{3,}(?P\w+)\s+(?P\d{4})', - ] + ] first = self.parsed_info.metadraft.first_two_pages or self.full_text for regex in date_regexes: @@ -130,10 +130,10 @@ class PlainParser(FileParser): if match: md = match.groupdict() mon = md['month'][0:3].lower() - day = int( md.get( 'day', datetime.date.today().day ) ) - year = int( md['year'] ) + day = int(md.get('day', datetime.date.today().day)) + year = int(md['year']) try: - month = month_names.index( mon ) + 1 + month = month_names.index(mon) + 1 self.parsed_info.metadraft.creation_date = datetime.date(year, month, day) return except ValueError: @@ -142,12 +142,11 @@ class PlainParser(FileParser): continue self.parsed_info.add_warning('creation_date', 'Creation Date field is empty or the creation date is not in a proper format.') - def parse_normal_004_authors(self): """ comes from http://svn.tools.ietf.org/svn/tools/ietfdb/branch/idsubmit/ietf/utils/draft.py """ - + def _stripheaders(rawlines): stripped = [] pages = [] @@ -160,15 +159,16 @@ class PlainParser(FileParser): def endpage(pages, page, line): if line: - page += [ line ] + page += [line] return begpage(pages, page) + def begpage(pages, page, line=None): if page and len(page) > 5: - pages += [ "\n".join(page) ] + pages += ["\n".join(page)] page = [] newpage = True if line: - page += [ line ] + page += [line] return pages, page for line in rawlines: @@ -213,13 +213,13 @@ class PlainParser(FileParser): sentence = True if re.search("^[ \t]*$", line): haveblank = True - page += [ line ] + page += [line] continue - page += [ line ] - stripped += [ line ] + page += [line] + stripped += [line] pages, page = begpage(pages, page) return stripped, pages - + self.fd.file.seek(0) raw_lines = self.fd.file.read().split("\n") draft_lines, draft_pages = _stripheaders(raw_lines) @@ -233,15 +233,15 @@ class PlainParser(FileParser): "Jerry": "Gerald", "Liz": "Elizabeth", "Lynn": "Carolyn", - "Ned": "Edward" , - "Ted":"Edward", + "Ned": "Edward", + "Ted": "Edward", } aux = { - "honor" : r"(?:Dr\.?|Prof(?:\.?|essor)|Sir|Lady|Dame)", + "honor": r"(?:Dr\.?|Prof(?:\.?|essor)|Sir|Lady|Dame)", "prefix": r"([Dd]e|Hadi|van|van de|van der|Ver|von)", "suffix": r"(jr|II|2nd|III|3rd|IV|4th)", - "first" : r"([A-Z][-A-Za-z]*)((\.?[- ]{1,2}[A-Za-z]+)*)", - "last" : r"([-A-Za-z']{2,})", + "first": r"([A-Z][-A-Za-z]*)((\.?[- ]{1,2}[A-Za-z]+)*)", + "last": r"([-A-Za-z']{2,})", } authformats = [ r" {6}(%(first)s[ \.]{1,3}((%(prefix)s )?%(last)s)( %(suffix)s)?)([, ]?(.+\.?|\(.+\.?|\)))?$" % aux, @@ -272,7 +272,7 @@ class PlainParser(FileParser): match = re.search(authformat, line) if match: author = match.group(1) - authors += [ author ] + authors += [author] if line.strip() == "": if prev_blankline: break @@ -291,7 +291,7 @@ class PlainParser(FileParser): if author == None: continue if "," in author: - last, first = author.split(",",1) + last, first = author.split(",", 1) author = "%s %s" % (first.strip(), last.strip()) if not " " in author: if "." in author: @@ -303,7 +303,7 @@ class PlainParser(FileParser): else: first, last = author.rsplit(" ", 1) - for author in [ "%s %s"%(first,last), "%s %s"%(last,first), ]: + for author in ["%s %s" % (first, last), "%s %s" % (last, first)]: # Pattern for full author information search, based on first page author name: authpat = author # Permit expansion of first name @@ -315,7 +315,7 @@ class PlainParser(FileParser): authpat = re.sub("-", ".*?-", authpat) # Some chinese names are shown with double-letter(latin) abbreviated given names, rather than # a single-letter(latin) abbreviation: - authpat = re.sub("^([A-Z])[A-Z]+\.\*", r"\1[-\w]+", authpat) + authpat = re.sub("^([A-Z])[A-Z]+\.\*", r"\1[-\w]+", authpat) authpat = "^(?:%s ?)?(%s)( *\(.*\)|,( [A-Z][-A-Za-z0-9]*)?)?" % (aux["honor"], authpat) start = 0 col = None @@ -323,24 +323,24 @@ class PlainParser(FileParser): # Find start of author info for this author (if any). # Scan from the end of the file, looking for a match to authpath try: - for j in range(len(draft_lines)-1, 15, -1): + for j in range(len(draft_lines) - 1, 15, -1): line = draft_lines[j].strip() - forms = [ line ] + [ line.replace(short, longform[short]) for short in longform if short in line ] + forms = [line] + [line.replace(short, longform[short]) for short in longform if short in line] for line in forms: if re.search(authpat, line): start = j columns = re.split("( +)", line) # Find which column: - cols = [ c for c in range(len(columns)) if re.search(authpat+r"$", columns[c].strip()) ] + cols = [c for c in range(len(columns)) if re.search(authpat + r"$", columns[c].strip())] if cols: col = cols[0] if not (start, col) in found_pos: - found_pos += [ (start, col) ] + found_pos += [(start, col)] beg = len("".join(columns[:col])) - if col == len(columns) or col == len(columns)-1: + if col == len(columns) or col == len(columns) - 1: end = None else: - end = beg + len("".join(columns[col:col+2])) + end = beg + len("".join(columns[col:col + 2])) author = re.search(authpat, columns[col].strip()).group(1) if author in companies: authors[i] = None @@ -360,7 +360,7 @@ class PlainParser(FileParser): count = 0 keyword = False blanklines = 0 - for line in draft_lines[start+1:]: + for line in draft_lines[start + 1:]: # Break on the second blank line if not line: blanklines += 1 @@ -370,14 +370,14 @@ class PlainParser(FileParser): continue else: count += 1 - authmatch = [ a for a in authors[i+1:] if a and not a in companies and re.search((r"(^|\W)"+re.sub("\.? ", ".* ", a)+"(\W|$)"), line.strip()) ] + authmatch = [a for a in authors[i + 1:] if a and not a in companies and re.search((r"(^|\W)" + re.sub("\.? ", ".* ", a) + "(\W|$)"), line.strip())] if authmatch: if count == 1 or (count == 2 and not blanklines): # First line after an author -- this is a company companies += authmatch - companies += [ line.strip() ] # XXX fix this for columnized author list + companies += [line.strip()] # XXX fix this for columnized author list companies = list(set(companies)) - for k in range(i+1, len(authors)): + for k in range(i + 1, len(authors)): if authors[k] in companies: authors[k] = None elif not "@" in line: @@ -399,7 +399,7 @@ class PlainParser(FileParser): else: authors[i] = None - authors = [ re.sub(r" +"," ", a) for a in authors if a != None ] + authors = [re.sub(r" +", " ", a) for a in authors if a != None] if authors: authors.sort() self.parsed_info.metadraft.authors = authors @@ -408,6 +408,5 @@ class PlainParser(FileParser): return authors - def parse_normal_005_abstract(self): pass