Updated utils/draft.py and modified the submit app code accordingly.

New features (keep in mind that utils/draft.py can be run standalone
to do extraction of draft author data, too):

  * The handling of author info formatted in columns causes problems
    in the face of an author named for instance A. Author with the
    company 'Al Author and Associates', causing breakage of email
    addresses longer than 'Al Author and'.  Tweaked the recognition
    of column data to require multiple (not only one) space around
    'and'.

  * Added support for extraction of author affiliation.

  * Tweaked the meaning of -t, --timestamp and added --notimestamp; and
    made the default be to emit leading timestamps based ont the draft
    file time.

  * Added support for running author extraction on RFCs, by not bailing
    out on not finding a draft name when RFC information is available.

  * Added support for additional date formats and author name formats.

  * Improved creation date extraction -- previously, the first supported
    date format which was recognized on the first page of the draft would
    be used, rather than the first date in a supported format.  This could
    cause errors if the Status of Memo section or Abstract contained a
    date occurring at the start of a line.

  * Tweaked the honorific regex to make things work better for the case
    when the full name in the author's address section includes a first
    name which isn't part of the first-page abbreviated name.  Fixes
    problems with draft-chiappa-lisp-introduction and similar.

  * Added a special case for people who provide their email address as
    'foo&cisco.com' instead of 'foo@cisco.com'.  Bah.

  * Added an alternative, more human-readable key-value-pair attribute
    output mode with a '-a' switch.

  * Tweaded the first-name regex to capture cases where the first name
    is indicated with an alternate first letter: 'Y(J) Stein'.  Fixes
    problems with draft-anavi-tdmoip and similar.
 - Legacy-Id: 4612
This commit is contained in:
Henrik Levkowetz 2012-07-11 12:51:33 +00:00
parent cddec8fc48
commit 0c49999fc9
2 changed files with 280 additions and 75 deletions

View file

@ -200,7 +200,7 @@ class UploadForm(forms.Form):
return self.draft
txt_file = self.cleaned_data['txt']
txt_file.seek(0)
self.draft = Draft(txt_file.read())
self.draft = Draft(txt_file.read(), txt_file.name)
txt_file.seek(0)
return self.draft
@ -274,8 +274,8 @@ class UploadForm(forms.Form):
file_type=','.join(self.file_type),
)
order = 0
for author in draft.get_author_info():
full_name, first_name, middle_initial, last_name, name_suffix, email = author
for author in draft.get_author_list():
full_name, first_name, middle_initial, last_name, name_suffix, email, company = author
order += 1
if settings.USE_DB_REDESIGN_PROXY_CLASSES:
# save full name

View file

@ -40,7 +40,7 @@ import stat
import sys
import time
version = "0.23"
version = "0.26"
program = os.path.basename(sys.argv[0])
progdir = os.path.dirname(sys.argv[0])
@ -52,6 +52,11 @@ progdir = os.path.dirname(sys.argv[0])
opt_debug = False
opt_timestamp = False
opt_trace = False
opt_authorinfo = False
opt_getauthors = False
opt_attributes = False
# Don't forget to add the option variable to the globals list in _main below
# The following is an alias list for short forms which starts with a
# different letter than the long form.
@ -70,6 +75,7 @@ longform = {
}
longform = dict([ (short+" ", longform[short]+" ") for short in longform ])
month_names = [ 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec' ]
# ----------------------------------------------------------------------
# Functions
@ -114,7 +120,8 @@ def acronym_match(s, l):
class Draft():
def __init__(self, text):
def __init__(self, text, source):
self.source = source
self.rawtext = text
text = re.sub(".\x08", "", text) # Get rid of inkribbon backspace-emphasis
@ -135,9 +142,27 @@ class Draft():
for pagestart in range(0, len(self.lines), 58):
self.pages += [ "\n".join(self.lines[pagestart:pagestart+54]) ]
self.filename, self.revision = self._parse_draftname()
try:
self.filename, self.revision = self._parse_draftname()
except ValueError, e:
_warn("While processing '%s': %s" % (self.source, e))
try:
path, base = self.source.rsplit("/", 1)
except ValueError:
path, base = "", self.source
if base.startswith("draft-"):
name, ext = base.split(".", 1)
revmatch = re.search("\d\d$", name)
if revmatch:
self.filename = name[:-3]
self.revision = name[-2:]
else:
raise ValueError(e+"\n"+self.source)
else:
raise ValueError(e+"\n"+self.source)
self._authors = None
self._authors_with_firm = None
self._author_info = None
self._abstract = None
self._pagecount = None
@ -149,10 +174,15 @@ class Draft():
def _parse_draftname(self):
draftname_regex = r"(draft-[a-z0-9-]*)-(\d\d)(\w|\.txt|\n|$)"
draftname_match = re.search(draftname_regex, self.pages[0])
rfcnum_regex = r"(Re[qg]uests? [Ff]or Commm?ents?:? +|Request for Comments: RFC |RFC-|RFC )((# ?)?[0-9]+)( |,|\n|$)"
rfcnum_match = re.search(rfcnum_regex, self.pages[0])
if draftname_match:
return (draftname_match.group(1), draftname_match.group(2) )
elif rfcnum_match:
return ("rfc"+rfcnum_match.group(2), None )
else:
self.errors["draftname"] = "Could not find the draft name and revision on the first page."
raise ValueError, self.errors["draftname"] + "\n'"+self.text.strip()[:240] + "'..."
return ("", "")
# ----------------------------------------------------------------------
@ -252,7 +282,6 @@ class Draft():
def get_creation_date(self):
if self._creation_date:
return self._creation_date
month_names = [ 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec' ]
date_regexes = [
r'^(?P<month>\w+)\s+(?P<day>\d{1,2}),?\s+(?P<year>\d{4})',
r'^(?P<day>\d{1,2}),?\s+(?P<month>\w+)\s+(?P<year>\d{4})',
@ -265,9 +294,16 @@ class Draft():
r'\s{3,}(?P<month>\w+)\s+(?P<year>\d{4})',
]
dates = []
text = self.pages[0]
for regex in date_regexes:
match = re.search(regex, self.pages[0], re.MULTILINE)
match = re.search(regex, text, re.MULTILINE)
if match:
start = match.start()
if not "expires" in text[start-10:start].lower():
dates += [(start, match)]
dates.sort()
for start, match in dates:
md = match.groupdict()
mon = md['month'][0:3].lower()
day = int( md.get( 'day', 0 ) )
@ -370,8 +406,17 @@ class Draft():
self.extract_authors()
return self._authors
def get_author_info(self):
"""Returns a list of tuples, with each tuple containing (given_names, surname, email). Email will be None if unknown."""
def get_authors_with_firm(self):
"""Returns a list of strings with author name and email within angle brackets"""
if self._authors_with_firm == None:
self.extract_authors()
return self._authors_with_firm
def get_author_list(self):
"""Returns a list of tuples, with each tuple containing (given_names,
surname, email, company). Email will be None if unknown.
"""
if self._author_info == None:
self.extract_authors()
return self._author_info
@ -381,17 +426,23 @@ class Draft():
"""
aux = {
"honor" : r"(?:Dr\.?|Prof(?:\.?|essor)|Sir|Lady|Dame|Sri)",
"honor" : r"(?:[A-Z]\.|Dr\.?|Prof(?:\.?|essor)|Sir|Lady|Dame|Sri)",
"prefix": r"([Dd]e|Hadi|van|van de|van der|Ver|von|[Ee]l)",
"suffix": r"(jr.?|Jr.?|II|2nd|III|3rd|IV|4th)",
"first" : r"([A-Z][-A-Za-z]*)((\.?[- ]{1,2}[A-Za-z]+)*)",
"first" : r"([A-Z][-A-Za-z]*)(( ?\([A-Z][-A-Za-z]*\))?(\.?[- ]{1,2}[A-Za-z]+)*)",
"last" : r"([-A-Za-z']{2,})",
"months": r"(January|February|March|April|May|June|July|August|September|October|November|December)",
"mabbr" : r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.?",
}
authcompanyformats = [
r" {6}(?P<author>(%(first)s[ \.]{1,3})+((%(prefix)s )?%(last)s)( %(suffix)s)?), (?P<company>[^.]+\.?)$" % aux,
r" {6}(?P<author>(%(first)s[ \.]{1,3})+((%(prefix)s )?%(last)s)( %(suffix)s)?) *\((?P<company>[^.]+\.?)\)$" % aux,
]
authformats = [
r" {6}(%(first)s[ \.]{1,3}((%(prefix)s )?%(last)s)( %(suffix)s)?)([, ]?(.+\.?|\(.+\.?|\)))?$" % aux,
r" {6}((%(first)s[ \.]{1,3})+((%(prefix)s )?%(last)s)( %(suffix)s)?)(, ([^.]+\.?|\([^.]+\.?|\)))?,?$" % aux,
r" {6}(((%(prefix)s )?%(last)s)( %(suffix)s)?, %(first)s)?$" % aux,
r" {6}(%(last)s)$" % aux,
]
]
multiauthformats = [
(
r" {6}(%(first)s[ \.]{1,3}((%(prefix)s )?%(last)s)( %(suffix)s)?)(, ?%(first)s[ \.]{1,3}((%(prefix)s )?%(last)s)( %(suffix)s)?)+$" % aux,
@ -400,13 +451,25 @@ class Draft():
]
editorformats = [
r"(?:, | )([Ee]d\.?|\([Ee]d\.?\)|[Ee]ditor)$",
]
]
companyformats = [
r" {6}(([A-Za-z'][-A-Za-z0-9.& ']+)(,? ?Inc\.?))$",
r" {6}(([A-Za-z'][-A-Za-z0-9.& ']+)(,? ?Ltd\.?))$",
r" {6}(([A-Za-z'][-A-Za-z0-9.& ']+)(/([A-Za-z'][-A-Za-z0-9.& ']+))+)$",
r" {6}([a-z0-9.-]+)$",
r" {6}(([A-Za-z'][-A-Za-z0-9.&']+)( [A-Za-z'][-A-Za-z0-9.&']+)*)$",
r" {6}(([A-Za-z'][-A-Za-z0-9.']+)( & [A-Za-z'][-A-Za-z0-9.']+)*)$",
r" {6}\((.+)\)$",
r" {6}(\w+\s?\(.+\))$",
]
dateformat = r"(((%(month)s|%(mabbr)s) \d+, |\d+ (%(month)s|%(mabbr)s),? |\d+/\d+/)\d\d\d\d|\d\d\d\d-\d\d-\d\d)$"
address_section = r"^ *([0-9]+\.)? *(Author|Editor)('s|s'|s|\(s\)) (Address|Addresses|Information)"
ignore = [
"Standards Track", "Current Practice", "Internet Draft", "Working Group",
"No Affiliation",
"Expiration Date",
]
def make_authpat(hon, first, last, suffix):
@ -416,6 +479,7 @@ class Draft():
return s
first = dotexp(first)
last = dotexp(last)
first = re.sub("[()]", " ", first)
if " " in first:
# if there's a middle part, let it be optional
first, middle = first.split(" ", 1)
@ -430,15 +494,19 @@ class Draft():
return authpat
authors = []
author_info = []
companies = []
author_info = []
companies_seen = []
self._docheader = ""
# Collect first-page author information first
have_blankline = False
have_draftline = False
prev_blankline = False
for line in self.lines[:30]:
#_debug( "**" + line)
self._docheader += line+"\n"
author_on_line = False
_debug( "**" + line)
leading_space = len(re.findall("^ *", line)[0])
line_len = len(line.rstrip())
trailing_space = line_len <= 72 and 72 - line_len or 0
@ -451,6 +519,9 @@ class Draft():
if (leading_space > 5 and abs(leading_space - trailing_space) < 5):
_debug("Breaking for centered line")
break
if re.search(dateformat, line):
if authors:
_debug("Breaking for dateformat after author name")
for editorformat in editorformats:
if re.search(editorformat, line):
line = re.sub(editorformat, "", line)
@ -458,35 +529,86 @@ class Draft():
for lineformat, authformat in multiauthformats:
match = re.search(lineformat, line)
if match:
#_debug("Multiauth format: '%s'" % lineformat)
_debug("Multiauth format: '%s'" % lineformat)
author_list = re.findall(authformat, line)
authors += [ a[0] for a in author_list ]
companies += [ None for a in author_list ]
author_on_line = True
#_debug("\nLine: " + line)
#_debug("Format: " + authformat)
for author in author_list:
_debug("Author: '%s'" % author[0])
else:
for authformat in authformats:
match = re.search(authformat, line)
break
if not author_on_line:
for lineformat in authcompanyformats:
match = re.search(lineformat, line)
if match:
#_debug("Auth format: '%s'" % authformat)
author = match.group(1)
authors += [ author ]
_debug("Line format: '%s'" % lineformat)
author = match.group("author")
company = match.group("company")
authors += [ author, '']
companies += [ None, company ]
#_debug("\nLine: " + line)
#_debug("Format: " + authformat)
_debug("Author: '%s'" % author)
_debug("Company: '%s'" % company)
author_on_line = True
break
if not author_on_line:
for authformat in authformats:
match = re.search(authformat, line)
if match:
_debug("Auth format: '%s'" % authformat)
author = match.group(1)
authors += [ author ]
companies += [ None ]
#_debug("\nLine: " + line)
#_debug("Format: " + authformat)
_debug("Author: '%s'" % author)
author_on_line = True
break
if not author_on_line:
for authformat in companyformats:
match = re.search(authformat, line)
if match:
_debug("Auth format: '%s'" % authformat)
company = match.group(1)
authors += [ "" ]
companies += [ company ]
#_debug("\nLine: " + line)
#_debug("Format: " + authformat)
_debug("Company: '%s'" % company)
break
if authors and not author_on_line:
# Retain information about blank lines in author list
authors += [""]
companies += [ "" ]
if line.strip() == "":
if prev_blankline and authors:
_debug("Breaking for having found consecutive blank lines after author name")
break
have_blankline = True
prev_blankline = True
if authors:
have_blankline = True
prev_blankline = True
else:
prev_blankline = False
if "draft-" in line:
have_draftline = True
if have_blankline and have_draftline:
_debug("Breaking for having found both blank line and draft-name line")
break
# remove trailing blank entries in the author list:
for i in range(len(authors)-1,-1,-1):
if authors[i] == "" and companies[i] == "":
del authors[i]
del companies[i]
else:
break
_debug("A:companies : %s" % str(companies))
#companies = [ None if a else '' for a in authors ]
#_debug("B:companies : %s" % str(companies))
#find authors' addresses section if it exists
last_line = len(self.lines)-1
address_section_pos = last_line/2
@ -499,8 +621,9 @@ class Draft():
found_pos = []
for i in range(len(authors)):
_debug("1: authors[%s]: %s" % (i, authors[i]))
_debug(" company[%s]: %s" % (i, companies[i]))
author = authors[i]
if author == None:
if author in [ None, '', ]:
continue
suffix_match = re.search(" %(suffix)s$" % aux, author)
if suffix_match:
@ -537,7 +660,6 @@ class Draft():
start = 0
col = None
# Find start of author info for this author (if any).
# Scan from the end of the file, looking for a match to authpath
# Scan towards the front from the end of the file, looking for a match to authpath
for j in range(last_line, address_section_pos, -1):
line = self.lines[j]
@ -552,12 +674,12 @@ class Draft():
found_pos += [ start ]
_debug( " ==> start %s, normalized '%s'" % (start, form.strip()))
# The author info could be formatted in multiple columns...
columns = re.split("( +| and )", form)
columns = re.split("( +| and )", form)
# _debug( "Columns:" + str(columns))
# Find which column:
# _debug( "Col range:" + str(range(len(columns))))
cols = [ c for c in range(len(columns)) if re.search(authpat+r"( and |, |$)", columns[c].strip()) ]
cols = [ c for c in range(len(columns)) if re.search(authpat+r"( and |, |$)", columns[c].strip()) ]
if cols:
col = cols[0]
if not (start, col) in found_pos:
@ -575,7 +697,8 @@ class Draft():
_debug( "Cut: '%s'" % form[beg:end])
author_match = re.search(authpat, columns[col].strip()).group(1)
_debug( "AuthMatch: '%s'" % (author_match,))
if author_match in companies:
if author_match in companies_seen:
companies[i] = authors[i]
authors[i] = None
else:
if casefixname in author_match:
@ -595,6 +718,7 @@ class Draft():
if not " ".join([ n for n in names if n ]) == fullname:
_err("Author tuple doesn't match text in draft: %s, %s" % (authors[i], fullname))
authors[i] = (fullname, first, middle, surname, suffix)
companies[i] = None
#_debug( "Author: %s: %s" % (author_match, authors[author_match]))
break
except AssertionError, e:
@ -631,20 +755,21 @@ class Draft():
# Maybe break on author name
# _debug("Line: %s"%line.strip())
# for a in authors:
# if a and a not in companies:
# if a and a not in companies_seen:
# _debug("Search for: %s"%(r"(^|\W)"+re.sub("\.? ", ".* ", a)+"(\W|$)"))
authmatch = [ a for a in authors[i+1:] if a and not a.lower() in companies and (re.search((r"(?i)(^|\W)"+re.sub("\.? ", ".* ", a)+"(\W|$)"), line.strip()) or acronym_match(a, line.strip()) )]
authmatch = [ a for a in authors[i+1:] if a and not a.lower() in companies_seen and (re.search((r"(?i)(^|\W)"+re.sub("\.? ", ".* ", a)+"(\W|$)"), line.strip()) or acronym_match(a, line.strip()) )]
if authmatch:
_debug(" ? Other author or company ? : %s" % authmatch)
_debug(" Line: "+line.strip())
if nonblank_count == 1 or (nonblank_count == 2 and not blanklines):
# First line after an author -- this is a company
companies += [ c.lower() for c in authmatch ]
companies += [ line.strip().lower() ] # XXX fix this for columnized author list
companies = list(set(companies))
_debug(" -- Companies: " + ", ".join(companies))
companies_seen += [ c.lower() for c in authmatch ]
companies_seen += [ line.strip().lower() ] # XXX fix this for columnized author list
companies_seen = list(set(companies_seen))
_debug(" -- Companies: " + ", ".join(companies_seen))
for k in range(i+1, len(authors)):
if authors[k] and authors[k].lower() in companies:
if authors[k] and authors[k].lower() in companies_seen:
companies[k] = authors[k]
authors[k] = None
elif blanklines and not "@" in line:
# Break on an author name
@ -661,6 +786,7 @@ class Draft():
column = re.sub(" *\(dot\) *", ".", column)
column = re.sub(" +at +", "@", column)
column = re.sub(" +dot +", ".", column)
column = re.sub("&cisco.com", "@cisco.com", column)
# if re.search("^\w+: \w+", column):
# keyword = True
@ -679,14 +805,32 @@ class Draft():
break
authors[i] = authors[i] + ( email, )
else:
authors[i] = None
if not author in ignore:
companies[i] = authors[i]
_debug("Not an author? '%s'" % (author))
authors[i] = None
authors = [ a for a in authors if a != None ]
assert(len(authors) == len(companies))
_debug('Author list: %s' % authors)
_debug('Company list: %s' % companies)
for i in range(len(authors)):
if authors[i]:
_debug('authors[%s]: %s' % (i, authors[i]))
company = ''
for k in range(i+1, len(companies)):
_debug('companies[%s]: %s' % (k, companies[k]))
if companies[k] != None:
company = companies[k]
break
authors[i] = authors[i] + ( company, )
authors = [ a for a in authors if a ]
_debug(" * Final author tuples: %s" % (authors,))
_debug(" * Final company list: %s" % (companies,))
_debug(" * Final companies_seen: %s" % (companies_seen,))
self._author_info = authors
self._authors = [ "%s <%s>"%(full,email) if email else full for full,first,middle,last,suffix,email in authors ]
self._authors_with_firm = [ "%s <%s> (%s)"%(full,email,company) for full,first,middle,last,suffix,email,company in authors ]
self._authors = [ "%s <%s>"%(full,email) if email else full for full,first,middle,last,suffix,email,company in authors ]
self._authors.sort()
_debug(" * Final author list: " + ", ".join(self._authors))
_debug("-"*72)
@ -764,17 +908,35 @@ def getmeta(fn):
if os.path.exists(fn):
filename = fn
fn = os.path.basename(fn)
elif fn.lower().startswith('rfc'):
filename = os.path.join("/www/tools.ietf.org/rfc", fn)
else:
filename = os.path.join("/www/tools.ietf.org/id", fn)
if fn.lower().startswith('rfc'):
filename = os.path.join("/www/tools.ietf.org/rfc", fn)
elif not "/" in fn:
filename = os.path.join("/www/tools.ietf.org/id", fn)
if not os.path.exists(filename):
fn = filename
while not "-00." in fn:
revmatch = re.search("-(\d\d)\.", fn)
if revmatch:
rev = revmatch.group(1)
prev = "%02d" % (int(rev)-1)
fn = fn.replace("-%s."%rev, "-%s."%prev)
if os.path.exists(fn):
_warn("Using rev %s instead: '%s'" % (prev, filename))
filename = fn
fn = os.path.basename(fn)
break
else:
break
else:
filename = fn
if not os.path.exists(filename):
_warn("Could not find file: '%s'" % (filename))
return None
_warn("Could not find file: '%s'" % (filename))
return
timestamp = time.strftime("%Y-%m-%dT%H:%M:%S+00:00", time.gmtime(os.stat(filename)[stat.ST_MTIME]))
text = _gettext(filename)
draft = Draft(text)
draft = Draft(text, filename)
#_debug("\n".join(draft.lines))
fields["eventdate"] = timestamp
@ -785,7 +947,10 @@ def getmeta(fn):
fields["doctitle"] = draft.get_title()
fields["docpages"] = str(draft.get_pagecount())
fields["docauthors"] = ", ".join(draft.get_authors())
# fields["docauthinfo"] = str(draft.get_author_info())
fields["_authorlist"] = draft.get_author_list()
fields["docaffiliations"] = ", ".join(draft.get_authors_with_firm())
if opt_debug:
fields["docheader"] = draft._docheader
normrefs, rfcrefs, refs = draft.get_refs()
fields["docrfcrefs"] = ", ".join(rfcrefs)
fields["doccreationdate"] = str(draft.get_creation_date())
@ -801,22 +966,50 @@ def getmeta(fn):
# ----------------------------------------------------------------------
def _output(docname, fields, outfile=sys.stdout):
if opt_timestamp:
outfile.write("%s " % (fields["eventdate"]))
outfile.write("%s" % (os.path.basename(docname.strip())))
if opt_getauthors:
# Output an (incomplete!) getauthors-compatible format. Country
# information is always UNKNOWN, and information about security and
# iana sections presence is missing.
for full,first,middle,last,suffix,email,company in fields["_authorlist"]:
if company in company_domain:
company = company_domain[company]
else:
if email and '@' in email:
company = email.split('@')[1]
if company.endswith(".com"):
company = company[:-4]
fields["name"] = full
fields["email"] = email
fields["company"] = company
fields["country"] = "UNKNOWN"
try:
year, month, day = fields["doccreationdate"].split("-")
except ValueError:
year, month, day = "UNKNOWN", "UNKNOWN", "UNKNOWN"
fields["day"] = day
fields["month"] = month_names[int(month)] if month != "UNKNOWN" else "UNKNOWN"
fields["year"] = year
print "%(doctag)s:%(name)s:%(company)s:%(email)s:%(country)s:%(docpages)s:%(month)s:%(year)s:%(day)s:" % fields
else:
if opt_attributes:
def outputkey(key, fields):
outfile.write("%-24s: %s\n" % ( key, fields[key].strip().replace("\\", "\\\\" ).replace("'", "\\x27" )))
else:
def outputkey(key, fields):
outfile.write(" %s='%s'" % ( key.lower(), fields[key].strip().replace("\\", "\\\\" ).replace("'", "\\x27" ).replace("\n", "\\n")))
if opt_timestamp:
outfile.write("%s " % (fields["eventdate"]))
outfile.write("%s" % (os.path.basename(docname.strip())))
def outputkey(key, fields):
outfile.write(" %s='%s'" % ( key.lower(), fields[key].strip().replace("\\", "\\\\" ).replace("'", "\\x27" ).replace("\n", "\\n")))
keys = fields.keys()
keys.sort()
for key in keys:
if fields[key] and not key in ["eventdate", ]:
outputkey(key, fields)
outfile.write("\n")
keys = fields.keys()
keys.sort()
for key in keys:
if fields[key] and not key in ["eventdate", ] and not key.startswith("_"):
outputkey(key, fields)
outfile.write("\n")
# ----------------------------------------------------------------------
def _printmeta(timestamp, fn, outfile=sys.stdout):
def _printmeta(fn, outfile=sys.stdout):
if opt_trace:
t = time.time()
sys.stderr.write("%-58s" % fn[:-4])
@ -828,13 +1021,12 @@ def _printmeta(timestamp, fn, outfile=sys.stdout):
if opt_trace:
sys.stderr.write("%5.1f\n" % ((time.time() - t)))
# ----------------------------------------------------------------------
# Main
# ----------------------------------------------------------------------
def _main(outfile=sys.stdout):
global opt_debug, opt_timestamp, opt_trace, files
global opt_debug, opt_timestamp, opt_trace, opt_authorinfo, opt_getauthors, files, company_domain, opt_attributes
# set default values, if any
# ----------------------------------------------------------------------
# Option processing
@ -854,14 +1046,14 @@ def _main(outfile=sys.stdout):
sys.exit(1)
try:
opts, files = getopt.gnu_getopt(sys.argv[1:], "dhtTv", ["debug", "help", "timestamp", "trace", "version",])
opts, files = getopt.gnu_getopt(sys.argv[1:], "dhatTv", ["debug", "getauthors", "attribs", "attributes", "help", "timestamp", "notimestamp", "trace", "version",])
except Exception, e:
print "%s: %s" % (program, e)
sys.exit(1)
# parse options
for opt, value in opts:
if opt in ["-d", "--debug"]: # Output debug information
if opt in ["-d", "--debug"]: # Output debug information
opt_debug = True
elif opt in ["-h", "--help"]: # Output this help text, then exit
vars = globals()
@ -871,11 +1063,29 @@ def _main(outfile=sys.stdout):
elif opt in ["-v", "--version"]: # Output version information, then exit
print program, version
sys.exit(0)
elif opt in ["-t", "--timestamp"]: # Emit leading timestamp information
elif opt in ["--getauthors"]: # Output an (incomplete) getauthors-compatible format
opt_getauthors = True
elif opt in ["-a", "--attribs"]: # Output key-value attribute pairs
opt_attributes = True
elif opt in ["-t", ]: # Toggle leading timestamp information
opt_timestamp = not opt_timestamp
elif opt in ["--timestamp"]: # Emit leading timestamp information
opt_timestamp = True
elif opt in ["--notimestamp"]: # Omit leading timestamp information
opt_timestamp = False
elif opt in ["-T", "--trace"]: # Emit trace information while working
opt_trace = True
company_domain = {}
if opt_getauthors:
gadata = open("/www/tools.ietf.org/tools/getauthors/getauthors.data")
for line in gadata:
if line.startswith("company:"):
try:
kword, name, abbrev = line.strip().split(':')
company_domain[name] = abbrev
except ValueError:
pass
if not files:
files = [ "-" ]
@ -888,16 +1098,11 @@ def _main(outfile=sys.stdout):
else:
file = open(file)
if os.path.exists(file.name):
timestamp = time.strftime("%Y-%m-%dT%H:%M:%S+00:00", time.gmtime(os.stat(file.name)[stat.ST_MTIME]))
else:
timestamp = time.strftime("%Y-%m-%dT%H:%M:%S+00:00", time.gmtime())
basename = os.path.basename(file.name)
if basename.startswith("draft-"):
draft = basename
_debug( "** Processing '%s'" % draft)
_printmeta(timestamp, file.name, outfile)
_printmeta(file.name, outfile)
else:
for line in file:
draft = line.strip()
@ -905,7 +1110,7 @@ def _main(outfile=sys.stdout):
continue
if draft:
_debug( "** Processing '%s'" % draft)
_printmeta(timestamp, draft, outfile)
_printmeta(draft, outfile)
if __name__ == "__main__":
try: