Added rfcmarkup code. Will probably be migrated to a separate lib in time.
- Legacy-Id: 12994
This commit is contained in:
parent
58dae78a43
commit
319a4850d6
379
ietf/utils/rfcmarkup.py
Normal file
379
ietf/utils/rfcmarkup.py
Normal file
|
@ -0,0 +1,379 @@
|
|||
import re
|
||||
import cgi
|
||||
import urllib
|
||||
|
||||
def markup(text, path=".", script="", extra=""):
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Start of markup handling
|
||||
|
||||
# Convert \r which is not followed or preceded by a \n to \n
|
||||
# (in case this is a mac document)
|
||||
text = re.sub("([^\n])\r([^\n])", "\g<1>\n\g<2>", text)
|
||||
# Strip \r (in case this is a ms format document):
|
||||
text = text.replace("\r","")
|
||||
|
||||
# -------------
|
||||
# Normalization
|
||||
|
||||
# Remove whitespace at the end of lines
|
||||
text = re.sub("[\t ]+\n", "\n", text)
|
||||
|
||||
# Remove whitespace (including formfeeds) at the end of the document.
|
||||
# (Trailing formfeeds will result in trailing blank pages.)
|
||||
text = re.sub("[\t \r\n\f]+$", "\n", text)
|
||||
|
||||
text = text.expandtabs()
|
||||
|
||||
# Remove extra blank lines at the start of the document
|
||||
text = re.sub("^\n*", "", text, 1)
|
||||
|
||||
# Fix up page breaks:
|
||||
# \f should aways be preceeded and followed by \n
|
||||
text = re.sub("([^\n])\f", "\g<1>\n\f", text)
|
||||
text = re.sub("\f([^\n])", "\f\n\g<1>", text)
|
||||
|
||||
# [Page nn] should be followed by \n\f\n
|
||||
text = re.sub("(?i)(\[Page [0-9ivxlc]+\])[\n\f\t ]*(\n *[^\n\f\t ])", "\g<1>\n\f\g<2>", text)
|
||||
|
||||
# Normalize indentation
|
||||
linestarts = re.findall("(?m)^([ ]*)\S", text);
|
||||
prefixlen = 72
|
||||
for start in linestarts:
|
||||
if len(start) < prefixlen:
|
||||
prefixlen = len(start)
|
||||
if prefixlen:
|
||||
text = re.sub("\n"+(" "*prefixlen), "\n", text)
|
||||
|
||||
# reference name tag markup
|
||||
reference = {}
|
||||
ref_url = {}
|
||||
|
||||
## Locate the start of the References section as the first reference
|
||||
## definition after the last reference usage
|
||||
## Incomplete 05 Aug 2010 17:05:27 XXXX Complete this!!
|
||||
|
||||
##ref_usages = re.findall("(\W)(\[)([-\w.]+)((, ?[-\w.]+)*\])", text)
|
||||
ref_defs = re.findall("(?sm)^( *\n *)\[([-\w.]+?)\]( +)(.*?)(\n *)$", text)
|
||||
|
||||
##ref_pos = [ match.start() for match in ref_usages ]
|
||||
##def_pos = [ match.start() for match in ref_defs ]
|
||||
##ref_pos = [ pos for pos in ref_pos if not pos in ref_defs ]
|
||||
##last_ref_pos = ref_pos[-1] if ref_pos else None
|
||||
|
||||
#sys.stderr.write("ref_defs: %s\n" % repr(ref_defs))
|
||||
for tuple in ref_defs:
|
||||
title_match = re.search("(?sm)^(.*?(\"[^\"]+?\").+?|.*?(,[^,]+?,)[^,]+?)$", tuple[3])
|
||||
if title_match:
|
||||
reftitle = title_match.group(2) or title_match.group(3).strip("[ ,]+")
|
||||
# Get rid of page break information inside the title
|
||||
reftitle = re.sub("(?s)\n\n\S+.*\n\n", "", reftitle)
|
||||
reftitle = cgi.escape(reftitle, quote=True)
|
||||
reftitle = re.sub("[\n\t ]+", " ", reftitle) # Remove newlines and tabs
|
||||
reference[tuple[1]] = reftitle
|
||||
url_match = re.search(r"(http|https|ftp)://\S+", tuple[3])
|
||||
if url_match:
|
||||
ref_url[tuple[1]] = url_match.group(0)
|
||||
|
||||
# -------------
|
||||
# escape any html significant characters
|
||||
text = cgi.escape(text);
|
||||
|
||||
|
||||
# -------------
|
||||
# Adding markup
|
||||
|
||||
text = "<pre>"+text+"</pre>"
|
||||
|
||||
# Typewriter-style underline:
|
||||
text = re.sub("_[\b](.)", "<u>\g<1></u>", text)
|
||||
|
||||
# Line number markup goes here
|
||||
|
||||
|
||||
# Obsoletes: ... markup
|
||||
|
||||
def rfclist_replace(keyword, text):
|
||||
def replacement(match):
|
||||
group = list(match.groups(""))
|
||||
group[3] = re.sub("\d+", """<a href=\"%s?%srfc=\g<0>\">\g<0></a>""" % (script, extra), group[3])
|
||||
if group[8]:
|
||||
group[8] = re.sub("\d+", """<a href=\"%s?%srfc=\g<0>\">\g<0></a>""" % (script, extra), group[8])
|
||||
else:
|
||||
group[8] = ""
|
||||
return "\n%s%s%s\n%s%s" % (group[0], group[3], group[5], group[7], group[8])
|
||||
text = re.sub("\n(%s( RFCs| RFC)?: ?( RFCs| RFC)?)(( \d+,| \d+)+)(.*)\n(( *)((\d+, )*(\d+)))*" % keyword, replacement, text, 1)
|
||||
return text
|
||||
|
||||
text = rfclist_replace("Obsoletes", text)
|
||||
text = rfclist_replace("Updates", text)
|
||||
|
||||
lines = text.splitlines(True)
|
||||
head = "".join(lines[:28])
|
||||
rest = "".join(lines[28:])
|
||||
|
||||
# title markup
|
||||
head = re.sub("""(?im)(([12][0-9][0-9][0-9]|^Obsoletes.*|^Category: (Standards Track|Informational|Experimental|Best Current Practice)) *\n\n+ +)([A-Z][^\n]+)$""", """\g<1><span class=\"h1\">\g<4></span>""", head, 1)
|
||||
head = re.sub("""(?i)(<span class="h1".+</span>)(\n +)([^<\n]+)\n""", """\g<1>\g<2><span class="h1">\g<3></span>\n""", head, 1)
|
||||
head = re.sub("""(?i)(<span class="h1".+</span>)(\n +)([^<\n]+)\n""", """\g<1>\g<2><span class="h1">\g<3></span>\n""", head, 1)
|
||||
|
||||
text = head + rest
|
||||
|
||||
# http link markup
|
||||
# link crossing a line. Not permitting ":" after the line break will
|
||||
# result in some URLs broken across lines not being recognized, but
|
||||
# will on the other hand correctly handle a series of URL listed line
|
||||
# by line, one on each line.
|
||||
# Link crossing a line, where the continuation contains '.' or '/'
|
||||
text = re.sub("(?im)(\s|^|[^=]\"|\()((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?)(\n +)([A-Za-z0-9_./@%&?#~=-]+[./][A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])([.,)\"\s]|$)",
|
||||
"\g<1><a href=\"\g<2>\g<6>\">\g<2></a>\g<5><a href=\"\g<2>\g<6>\">\g<6></a>\g<7>", text)
|
||||
text = re.sub("(?im)(<)((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?)(\n +)([A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])(>)",
|
||||
"\g<1><a href=\"\g<2>\g<6>\">\g<2></a>\g<5><a href=\"\g<2>\g<6>\">\g<6></a>\g<7>", text)
|
||||
# Link crossing a line, where first line ends in '-' or '/'
|
||||
text = re.sub("(?im)(\s|^|[^=]\"|\()((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?[-/])(\n +)([A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])([.,)\"\s]|$)",
|
||||
"\g<1><a href=\"\g<2>\g<6>\">\g<2></a>\g<5><a href=\"\g<2>\g<6>\">\g<6></a>\g<7>", text)
|
||||
text = re.sub("(?im)(<)((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?)(\n +)([A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])(>)",
|
||||
"\g<1><a href=\"\g<2>\g<6>\">\g<2></a>\g<5><a href=\"\g<2>\g<6>\">\g<6></a>\g<7>", text)
|
||||
# link crossing a line, enclosed in "<" ... ">"
|
||||
text = re.sub("(?im)<((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?)(\n +)([A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])>",
|
||||
"<\g<1><a href=\"\g<1>\g<5>\">\g<1></a>\g<4><a href=\"\g<1>\g<5>\">\g<5></a>>", text)
|
||||
text = re.sub("(?im)(<)((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?)(\n +)([A-Za-z0-9_./@%&;?#~=-]+[A-Za-z0-9_/@%&;?#~=-])(>)",
|
||||
"\g<1><a href=\"\g<2>\g<6>\">\g<2></a>\g<5><a href=\"\g<2>\g<6>\">\g<6></a>\g<7>", text)
|
||||
# link crossing two lines, enclosed in "<" ... ">"
|
||||
text = re.sub("(?im)<((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?)(\n +)([A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])(\n +)([A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])>",
|
||||
"<\g<1><a href=\"\g<1>\g<5>\g<7>\">\g<1></a>\g<4><a href=\"\g<1>\g<5>\g<7>\">\g<5></a>\g<6><a href=\"\g<1>\g<5>\g<7>\">\g<7></a>>", text)
|
||||
text = re.sub("(?im)(<)((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?)(\n +)([A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])(\n +)([A-Za-z0-9_./@%&;?#~=-]+[A-Za-z0-9_/@%&;?#~=-])(>)",
|
||||
"\g<1><a href=\"\g<2>\g<6>\g<8>\">\g<2></a>\g<5><a href=\"\g<2>\g<6>\g<8>\">\g<6></a>\g<7><a href=\"\g<2>\g<6>\g<8>\">\g<8></a>\g<9>", text)
|
||||
# link on a single line
|
||||
text = re.sub("(?im)(\s|^|[^=]\"|<|\()((http|https|ftp)://[:A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])([.,)\"\s]|>|$)",
|
||||
"\g<1><a href=\"\g<2>\">\g<2></a>\g<4>", text)
|
||||
# # Special case for licensing boilerplate
|
||||
# text = text.replace('<a href="http://trustee.ietf.org/">http://trustee.ietf.org/</a>\n license-info',
|
||||
# '<a href="http://trustee.ietf.org/licence-info">http://trustee.ietf.org/</a>\n <a href="http://trustee.ietf.org/licence-info">licence-info</a>')
|
||||
|
||||
# undo markup if RFC2606 domain
|
||||
text = re.sub("""(?i)<a href="[a-z]*?://([a-z0-9_-]+?\.)?example(\.(com|org|net))?(/.*?)?">(.*?)</a>""", "\g<5>", text)
|
||||
|
||||
# draft markup
|
||||
# draft name crossing line break
|
||||
text = re.sub("([^/#=\?\w-])(draft-([-a-zA-Z0-9]+-)?)(\n +)([-a-zA-Z0-9]+[a-zA-Z0-9](.txt)?)",
|
||||
"\g<1><a href=\"%s?%sdraft=\g<2>\g<5>\">\g<2></a>\g<4><a href=\"%s?%sdraft=\g<2>\g<5>\">\g<5></a>" % (script, extra, script, extra), text)
|
||||
# draft name on one line (but don't mess with what we just did above)
|
||||
text = re.sub("([^/#=\?\w>=-])(draft-[-a-zA-Z0-9]+[a-zA-Z0-9](.txt)?)",
|
||||
"\g<1><a href=\"%s?%sdraft=\g<2>\">\g<2></a>" % (script, extra), text)
|
||||
|
||||
# rfc markup
|
||||
# rfc and number on the same line
|
||||
text = re.sub("""(?i)([^[/\w-])(rfc([- ]?))([0-9]+)(\W)""",
|
||||
"""\g<1><a href=\"%s?%srfc=\g<4>\">\g<2>\g<4></a>\g<5>""" % (script, extra), text)
|
||||
# rfc and number on separate lines
|
||||
text = re.sub("(?i)([^[/\w-])(rfc([-]?))(\n +)([0-9]+)(\W)",
|
||||
"\g<1><a href=\"%s?%srfc=\g<5>\">\g<2></a>\g<4><a href=\"%s?%srfc=\g<5>\">\g<5></a>\g<6>" % (script, extra, script, extra), text)
|
||||
# spelled out Request For Comments markup
|
||||
text = re.sub("(?i)(\s)(Request\s+For\s+Comments\s+\([^)]+\)\s+)([0-9]+)",
|
||||
"\g<1>\g<2><a href=\"%s?%srfc=\g<3>\">\g<3></a>" % (script, extra), text)
|
||||
# bcp markup
|
||||
text = re.sub("(?i)([^[/\w-])(bcp([- ]?))([0-9]+)(\W)",
|
||||
"\g<1><a href=\"%s?%sbcp=\g<4>\">\g<2>\g<4></a>\g<5>" % (script, extra), text)
|
||||
text = re.sub("(?i)([^[/\w-])(bcp([-]?))(\n +)([0-9]+)(\W)",
|
||||
"\g<1><a href=\"%s?%sbcp=\g<5>\">\g<2></a>\g<4><a href=\"%s?%sbcp=\g<5>\">\g<5></a>\g<6>" % (script, extra, script, extra), text)
|
||||
|
||||
def workinprogress_replacement(match):
|
||||
g1 = match.group(1)
|
||||
g2 = match.group(2)
|
||||
g3 = match.group(3)
|
||||
# eliminate embedded hyperlinks in text we'll use as anchor text
|
||||
g4 = match.group(4)
|
||||
g4 = re.sub("<a.+?>(.+?)</a>", "\g<1>", g4)
|
||||
g4url = urllib.quote_plus(g4)
|
||||
g5 = match.group(5)
|
||||
return """%s[<a id=\"ref-%s\">%s</a>]%s<a style=\"text-decoration: none\" href='https://www.google.com/search?sitesearch=tools.ietf.org%%2Fhtml%%2F&q=inurl:draft-+%s'>%s</a>%s""" % (g1, g2, g2, g3, g4url, g4, g5)
|
||||
|
||||
text = re.sub("(\n *\n *)\[([-\w.]+)\](\s+.*?)(\".+\")(,\s+Work\s+in\s+Progress.)", workinprogress_replacement, text)
|
||||
text = re.sub("(\n *\n *)\[([-\w.]+)\](\s)", "\g<1>[<a id=\"ref-\g<2>\">\g<2></a>]\g<3>", text)
|
||||
|
||||
text = re.sub("(\n *\n *)\[(RFC [-\w.]+)\](\s)", "\g<1>[<a id=\"ref-\g<2>\">\g<2></a>]\g<3>", text)
|
||||
|
||||
ref_targets = re.findall('<a id="ref-(.*?)"', text)
|
||||
|
||||
# reference link markup
|
||||
def reference_replacement(match):
|
||||
pre = match.group(1)
|
||||
beg = match.group(2)
|
||||
tag = match.group(3)
|
||||
end = match.group(4)
|
||||
isrfc = re.match("(?i)^rfc[ -]?([0-9]+)$", tag)
|
||||
if isrfc:
|
||||
rfcnum = isrfc.group(1)
|
||||
if tag in reference:
|
||||
return """%s%s<a href="%s?%srfc=%s" title="%s">%s</a>%s""" % (pre, beg, script, extra, rfcnum, reference[tag], tag, end)
|
||||
else:
|
||||
return """%s%s<a href="%s?%srfc=%s">%s</a>%s""" % (pre, beg, script, extra, rfcnum , tag, end)
|
||||
else:
|
||||
if tag in ref_targets:
|
||||
if tag in reference:
|
||||
return """%s%s<a href="#ref-%s" title="%s">%s</a>%s""" % (pre, beg, tag, reference[tag], tag, end)
|
||||
else:
|
||||
return """%s%s<a href="#ref-%s">%s</a>%s""" % (pre, beg, tag, tag, end)
|
||||
else:
|
||||
return match.group(0)
|
||||
|
||||
# Group: 1 2 3 45
|
||||
text = re.sub("(\W)(\[)([-\w.]+)((, ?[-\w.]+)*\])", reference_replacement, text)
|
||||
text = re.sub("(\W)(\[)(RFC [0-9]+)((, ?RFC [0-9]+)*\])", reference_replacement, text)
|
||||
while True:
|
||||
old = text
|
||||
text = re.sub("(\W)(\[(?:<a.*?>.*?</a>, ?)+)([-\w.]+)((, ?[-\w.]+)*\])", reference_replacement, text)
|
||||
if text == old:
|
||||
break
|
||||
while True:
|
||||
old = text
|
||||
text = re.sub("(\W)(\[(?:<a.*?>.*?</a>, ?)+)(RFC [-\w.]+)((, ?RFC [-\w.]+)*\])", reference_replacement, text)
|
||||
if text == old:
|
||||
break
|
||||
|
||||
# greying out the page headers and footers
|
||||
text = re.sub("\n(.+\[Page \w+\])\n\f\n(.+)\n", """\n<span class="grey">\g<1></span>\n\f\n<span class="grey">\g<2></span>\n""", text)
|
||||
|
||||
# contents link markup: section links
|
||||
# 1 2 3 4 5 6 7
|
||||
text = re.sub("(?m)^(\s*)(\d+(\.\d+)*)(\.?[ ]+)(.*[^ .])( *\. ?\.)(.*[0-9])$", """\g<1><a href="#section-\g<2>">\g<2></a>\g<4>\g<5>\g<6>\g<7>""", text)
|
||||
text = re.sub("(?m)^(\s*)(Appendix |)([A-Z](\.\d+)*)(\.?[ ]+)(.*[^ .])( *\. ?\.)(.*[0-9])$", """\g<1><a href="#appendix-\g<3>">\g<2>\g<3></a>\g<5>\g<6>\g<7>\g<8>""", text)
|
||||
|
||||
# page number markup
|
||||
multidoc_separator = "========================================================================"
|
||||
if re.search(multidoc_separator, text):
|
||||
parts = re.split(multidoc_separator, text)
|
||||
for i in range(len(parts)):
|
||||
parts[i] = re.sub("(?si)(\f)([^\f]*\[Page (\w+)\])", "\g<1><div id=\"%(page)s-\g<3>\" class=\"anchor-target\"></div>\g<2>"%{"page": "page-%s"%(i+1)}, parts[i])
|
||||
parts[i] = re.sub("(?i)(\. ?\. +|\. \. \.|\.\.\. *)([0-9ivxlc]+)( *\n)", "\g<1><a href=\"#%(page)s-\g<2>\">\g<2></a>\g<3>"%{"page": "page-%s"%(i+1)}, parts[i])
|
||||
text = multidoc_separator.join(parts)
|
||||
else:
|
||||
# page name tag markup
|
||||
text = re.sub("(?si)(\f)([^\f]*\[Page (\w+)\])", "\g<1><div id=\"page-\g<3>\" class=\"anchor-target\"></div>\g<2>", text)
|
||||
# contents link markup: page numbers
|
||||
text = re.sub("(?i)(\. ?\. +|\. \. \.|\.\.\. *)([0-9ivxlc]+)( *\n)", "\g<1><a href=\"#page-\g<2>\">\g<2></a>\g<3>", text)
|
||||
|
||||
# section number tag markup
|
||||
def section_anchor_replacement(match):
|
||||
# exclude TOC entries
|
||||
mstring = match.group(0)
|
||||
if " \. \. " in mstring or "\.\.\." in mstring:
|
||||
return mstring
|
||||
|
||||
level = len(re.findall("[^\.]+", match.group(1)))+1
|
||||
if level > 6:
|
||||
level = 6
|
||||
html = """<span class="h%s anchor-target"><a class=\"selflink\" id=\"section-%s\" href=\"#section-%s\">%s</a>%s</span>""" % (level, match.group(1), match.group(1), match.group(1), match.group(3))
|
||||
html = html.replace("\n", """</span>\n<span class="h%s anchor-target">""" % level)
|
||||
return html
|
||||
|
||||
|
||||
text = re.sub("(?im)^(\d+(\.\d+)*)(\.?[ ]+\S.*?(\n +\w+.*)?( |$))", section_anchor_replacement, text)
|
||||
#text = re.sub("(?i)(\n *\n *)(\d+(\.\d+)*)(\.?[ ].*)", section_replacement, text)
|
||||
# section number link markup
|
||||
text = re.sub("(?i)(section\s)(\d+(\.\d+)*)", "<a href=\"#section-\g<2>\">\g<1>\g<2></a>", text)
|
||||
text = re.sub("(?i)(section)\n(\s+)(\d+(\.\d+)*)", "<a href=\"#section-\g<3>\">\g<1></a>\n\g<2><a href=\"#section-\g<3>\">\g<3></a>", text)
|
||||
|
||||
# Special cases for licensing boilerplate
|
||||
text = text.replace('<a href="#section-4">Section 4</a>.e of the Trust Legal Provisions',
|
||||
'Section 4.e of the <a href="https://trustee.ietf.org/license-info">Trust Legal Provisions</a>')
|
||||
|
||||
while True:
|
||||
old = text
|
||||
text = re.sub("(?i)(sections\s(<a.*?>.*?</a>(,\s|\s?-\s?|\sthrough\s|\sor\s|\sto\s|,?\sand\s))*)(\d+(\.\d+)*)", "\g<1><a href=\"#section-\g<4>\">\g<4></a>", text)
|
||||
if text == old:
|
||||
break
|
||||
|
||||
# appendix number tag markup
|
||||
def appendix_replacement(match):
|
||||
# exclude TOC entries
|
||||
mstring = match.group(0)
|
||||
if " \. \. " in mstring or "\.\.\." in mstring:
|
||||
return mstring
|
||||
|
||||
txt = match.group(4)
|
||||
num = match.group(2).rstrip('.')
|
||||
if num != match.group(2):
|
||||
txt = "." + txt
|
||||
level = len(re.findall("[^\.]+", num))+1
|
||||
if level > 6:
|
||||
level = 6
|
||||
return """<span class="h%s anchortarget"><a class=\"selflink\" id=\"appendix-%s\" href=\"#appendix-%s\">%s%s</a>%s</span>""" % (level, num, num, match.group(1), num, txt)
|
||||
|
||||
text = re.sub("(?m)^(Appendix |)([A-Z](\.|\.\d+)+)(\.?[ ].*)$", appendix_replacement, text)
|
||||
#text = re.sub("(?i)(\n *\n *)(\d+(\.\d+)*)(\.?[ ].*)", appendix_replacement, text)
|
||||
# appendix number link markup
|
||||
text = re.sub(" ([Aa]ppendix\s)([A-Z](\.\d+)*)", " <a href=\"#appendix-\g<2>\">\g<1>\g<2></a>", text)
|
||||
text = re.sub(" ([Aa]ppendix)\n(\s+)([A-Z](\.\d+)*)", " <a href=\"#appendix-\g<3>\">\g<1></a>\n\g<2><a href=\"#appendix-\g<3>\">\g<3></a>", text)
|
||||
|
||||
# # section x of draft-y markup
|
||||
# text = re.sub("(?i)<a href=\"[^\"]*\">(section)\s(\d+(\.\d+)*)</a>(\.?\s+(of|in)\s+)<a href=\"[^\"]*\">(draft-[-.a-zA-Z0-9]+[a-zA-Z0-9])</a>", "<a href=\"%s?%surl=%s/rfc\g<7>.txt#section-\g<2>\">\g<1> \g<2>\g<4>\g<6>\g<7></a>" % (script, extra, rfcs), text)
|
||||
# # draft-y, section x markup
|
||||
# text = re.sub("(?i)<a href=\"[^\"]*\">(draft-[-.a-zA-Z0-9]+[a-zA-Z0-9])</a>(,?\s)<a href=\"[^\"]*\">(section)\s(\d+(\.\d+)*)</a>", "<a href=\"%s?%surl=%s/rfc\g<2>.txt#section-\g<5>\">\g<1>\g<2>\g<3>\g<4> \g<5></a>" % (script, extra, rfcs), text)
|
||||
# # [draft-y], section x markup
|
||||
# text = re.sub("(?i)\[<a href=\"[^>\"]+\">(draft-[-.a-zA-Z0-9]+[a-zA-Z0-9])</a>\](,?\s)<a href=\"[^>\"]*\">(section)\s(\d+(\.\d+)*)</a>", "<a href=\"%s?%surl=%s/rfc\g<2>.txt#section-\g<5>\">[\g<1>\g<2>]\g<3>\g<4> \g<5></a>" % (script, extra, rfcs), text)
|
||||
|
||||
for n in ['rfc', 'bcp', 'fyi', 'std']:
|
||||
# section x of rfc y markup
|
||||
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(section)\s(\d+(\.\d+)*)</a>(\.?\s+(of|in)\s+)<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>"%n,
|
||||
"<a href=\"%s?%s%s=\g<9>\g<1>\">\g<2> \g<3>\g<5>\g<8>\g<9></a>" % (script, extra, n), text)
|
||||
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(section)</a>(\n\s+)<a href=\"(?:[^\"]*)\"[^>]*>(\d+(\.\d+)*)</a>(\.?\s+(of|in)\s+)<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>"%n,
|
||||
"<a href=\"%s?%s%s=\g<10>\g<1>\">\g<2></a>\g<3><a href=\"%s?%s%s=\g<10>\g<1>\">\g<4>\g<5>\g<9>\g<10></a>" % (script, extra, n, script, extra, n), text)
|
||||
# appendix x of rfc y markup
|
||||
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(appendix)\s([A-Z](\.\d+)*)</a>(\.?\s+(of|in)\s+)<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>"%n,
|
||||
"<a href=\"%s?%s%s=\g<9>\g<1>\">\g<2> \g<3>\g<5>\g<8>\g<9></a>" % (script, extra, n), text)
|
||||
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(appendix)</a>(\n\s+)<a href=\"(?:[^\"]*)\"[^>]*>([A-Z]+(\.\d+)*)</a>(\.?\s+(of|in)\s+)<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>"%n,
|
||||
"<a href=\"%s?%s%s=\g<10>\g<1>\">\g<2></a>\g<3><a href=\"%s?%s%s=\g<10>\g<1>\">\g<4>\g<5>\g<9>\g<10></a>" % (script, extra, n, script, extra, n), text)
|
||||
|
||||
# rfc y, section x markup
|
||||
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>(,?\s+)<a href=\"([^\"]*)\"[^>]*>(section)\s?(([^<]*))</a>"%n,
|
||||
"<a href=\"%s?%s%s=\g<3>\g<5>\">\g<2>\g<3>\g<4>\g<6> \g<7></a>" % (script, extra, n), text)
|
||||
# rfc y, appendix x markup
|
||||
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>(,?\s+)<a href=\"([^\"]*)\"[^>]*>(appendix)\s?(([^<]*))</a>"%n,
|
||||
"<a href=\"%s?%s%s=\g<3>\g<5>\">\g<2>\g<3>\g<4>\g<6> \g<7></a>" % (script, extra, n), text)
|
||||
|
||||
# section x of? [rfc y] markup
|
||||
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(section)\s(\d+(\.\d+)*)</a>(\.?\s+(of\s+|in\s+)?)\[<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>\]"%n,
|
||||
"<a href=\"%s?%s%s=\g<9>\g<1>\">\g<2> \g<3>\g<5>[\g<8>\g<9>]</a>" % (script, extra, n), text)
|
||||
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(section)</a>(\n\s+)<a href=\"(?:[^\"]*)\"[^>]*>(\d+(\.\d+)*)</a>(\.?\s+(of\s+|in\s+)?)\[<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>\]"%n,
|
||||
"<a href=\"%s?%s%s=\g<10>\g<1>\">\g<2></a>\g<3><a href=\"%s?%s%s=\g<10>\g<1>\">\g<4>\g<6>[\g<9>\g<10>]</a>" % (script, extra, n, script, extra, n), text)
|
||||
# appendix x of? [rfc y] markup
|
||||
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(appendix)\s([A-Z](\.\d+)*)</a>(\.?\s+(of\s+|in\s+)?)\[<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>\]"%n,
|
||||
"<a href=\"%s?%s%s=\g<9>\g<1>\">\g<2> \g<3>\g<5>[\g<8>\g<9>]</a>" % (script, extra, n), text)
|
||||
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(appendix)</a>(\n\s+)<a href=\"(?:[^\"]*)\"[^>]*>([A-Z](\.\d+)*)</a>(\.?\s+(of\s+|in\s+)?)\[<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>\]"%n,
|
||||
"<a href=\"%s?%s%s=\g<10>\g<1>\">\g<2></a>\g<3><a href=\"%s?%s%s=\g<10>\g<1>\">\g<4>\g<6>[\g<9>\g<10>]</a>" % (script, extra, n, script, extra, n), text)
|
||||
|
||||
# [rfc y], section x markup
|
||||
text = re.sub("(?i)\[<a href=\"([^>\"]+)\"[^>]*>(%s[- ]?)([0-9]+)</a>\](,?\s+)<a href=\"([^>\"]*)\"[^>]*>(section)\s(\d+(\.\d+)*)</a>"%n,
|
||||
"<a href=\"%s?%s%s=\g<3>\g<5>\">[\g<2>\g<3>]\g<4>\g<6> \g<7></a>" % (script, extra, n), text)
|
||||
# [rfc y], appendix x markup
|
||||
text = re.sub("(?i)\[<a href=\"([^>\"]+)\"[^>]*>(%s[- ]?)([0-9]+)</a>\](,?\s+)<a href=\"([^>\"]*)\"[^>]*>(appendix)\s([A-Z](\.\d+)*)</a>"%n,
|
||||
"<a href=\"%s?%s%s=\g<3>\g<5>\">[\g<2>\g<3>]\g<4>\g<6> \g<7></a>" % (script, extra, n), text)
|
||||
|
||||
|
||||
# remove section link for section x.x (of|in) <something else>
|
||||
old = text
|
||||
text = re.sub("(?i)<a href=\"[^\"]*\"[^>]*>(section\s)(\d+(\.\d+)*)</a>(\.?[a-z]*\s+(of|in)\s+)(\[?)<a href=\"([^\"]*)\"([^>]*)>(.*)</a>(\]?)",
|
||||
'\g<1>\g<2>\g<4>\g<6><a href="\g<7>"\g<8>>\g<9></a>\g<10>', text)
|
||||
text = re.sub('(?i)(\[?)<a href="([^"]*#ref[^"]*)"([^>]*)>(.*?)</a>(\]?,\s+)<a href="[^"]*"[^>]*>(section\s)(\d+(\.\d+)*)</a>',
|
||||
'\g<1><a href="\g<2>"\g<3>>\g<4></a>\g<5>\g<6>\g<7>', text)
|
||||
|
||||
# Special fix for referring to the trust legal provisons in
|
||||
# boilerplate text:
|
||||
text = re.sub("(?i)<a href=\"[^\"]*\"[^>]*>(section\s)(\d+(\.\d+)*)</a>(\.?[a-z]*\s+(of|in)\s*\n\s*the Trust Legal Provisions)",
|
||||
'\g<1>\g<2>\g<4>', text)
|
||||
|
||||
#
|
||||
#text = re.sub("\f", "<div class=\"newpage\" />", text)
|
||||
text = re.sub("\n?\f\n?", "</pre>\n<hr class='noprint' style='width: 78ex; margin-left: 0;'/><!--NewPage--><pre class='newpage'>", text)
|
||||
|
||||
# restore indentation
|
||||
if prefixlen:
|
||||
text = re.sub("\n", "\n"+(" "*prefixlen), text)
|
||||
|
||||
if path:
|
||||
text = re.sub("%s\?(rfc|bcp|std)=" % script, "%s/\g<1>" % path, text)
|
||||
text = re.sub("%s\?draft=" % script, "%s/" % path, text)
|
||||
|
||||
return text
|
Loading…
Reference in a new issue