Merged in ^/personal/henrik/6.47.3-htmlize@13040:

Htmlized drafts, rfcs, and charters using the htmlization code developed for
tools.ietf.org.
 - Legacy-Id: 13041
This commit is contained in:
Henrik Levkowetz 2017-03-20 15:05:25 +00:00
commit bca5951251
23 changed files with 957 additions and 68 deletions

View file

@ -155,10 +155,19 @@ def check_html_valid(url, response, args):
if not key in validated_urls:
note('Validate: %-32s: %s' % (url[:32], key))
# These URLs have known issues, skip them until those are fixed
if re.search('(/secr|admin/|/doc/.*/edit/info/|rfc542$|rfc776$|draft-leroux-pce-pcecp-interarea-reqs)', url):
log("%s blacklisted; skipping HTML validation" % url)
validated_urls[key] = True
return
for pattern in (
'/secr',
'admin/',
'/doc/.*/edit/info/',
'rfc542$',
'rfc776$',
'draft-leroux-pce-pcecp-interarea-reqs',
'draft-fujiwara-dnsop-resolver-update',
):
if re.search(pattern, url):
validated_urls[key] = True
log("%s blacklisted; skipping HTML validation" % url)
return
if hasattr(response, "content"):
content = response.content
@ -193,6 +202,7 @@ def skip_extract_from(url):
for pattern in (
r'^/doc/html/[a-z0-9-]+',
r'^/meeting/[a-z0-9-]+/agenda/[a-z0-9-]+',
r'^/static/coverage/',
):
if re.search(pattern, url):
return True
@ -209,6 +219,17 @@ def skip_url(url):
r"/site/ietfdhcwg/_/rsrc/1311005436000/system/app/css/overlay.css\?cb=simple100%250150goog-ws-left",
r"/dir/tsvdir/reviews/",
r"draft-touch-msword-template-v2\.0",
# These will always 404:
r"^/doc/html/charter-ietf-cicm",
r"^/doc/html/charter-ietf-dcon",
r"^/doc/html/charter-ietf-fun",
r"^/doc/html/charter-ietf-multrans",
r"^/doc/html/charter-ietf-sdn",
r"^/doc/html/charter-ietf-woes",
r"^/doc/html/draft-floyd-cc-alt",
r"^/doc/html/draft-ietf-sipping-overload-design",
r"^/doc/html/status-change-icmpv6-dns-ipv6-to-internet-standard",
r"^/static/coverage/",
):
if re.search(pattern, url):
return True

View file

@ -3,7 +3,6 @@
import datetime
import logging
import os
#import re
from django.db import models
from django.core import checks
@ -22,7 +21,7 @@ from ietf.name.models import ( DocTypeName, DocTagName, StreamName, IntendedStdL
from ietf.person.models import Email, Person
from ietf.utils import log
from ietf.utils.admin import admin_link
#from ietf.utils.rfcmarkup import markup
from ietf.utils.rfcmarkup import markup
from ietf.utils.validators import validate_no_control_chars
logger = logging.getLogger('django')
@ -100,10 +99,13 @@ class DocumentInfo(models.Model):
def get_file_path(self):
if not hasattr(self, '_cached_file_path'):
if self.type_id == "draft":
if self.get_state_slug() == "rfc":
self._cached_file_path = settings.RFC_PATH
else:
if self.is_dochistory():
self._cached_file_path = settings.INTERNET_DRAFT_PATH
else:
if self.get_state_slug() == "rfc":
self._cached_file_path = settings.RFC_PATH
else:
self._cached_file_path = settings.INTERNET_DRAFT_PATH
elif self.type_id in ("agenda", "minutes", "slides", "bluesheets") and self.meeting_related():
doc = self.doc if isinstance(self, DocHistory) else self
if doc.session_set.exists():
@ -124,16 +126,21 @@ class DocumentInfo(models.Model):
def get_base_name(self):
if not hasattr(self, '_cached_base_name'):
if self.type_id == 'draft':
if self.get_state_slug() == 'rfc':
self._cached_base_name = "%s.txt" % self.canonical_name()
if self.is_dochistory():
self._cached_base_name = "%s-%s.txt" % (self.doc.name, self.rev)
else:
self._cached_base_name = "%s-%s.txt" % (self.name, self.rev)
if self.get_state_slug() == 'rfc':
self._cached_base_name = "%s.txt" % self.canonical_name()
else:
self._cached_base_name = "%s-%s.txt" % (self.name, self.rev)
elif self.type_id in ["slides", "agenda", "minutes", "bluesheets", ] and self.meeting_related():
if self.external_url:
# we need to remove the extension for the globbing below to work
self._cached_base_name = self.external_url
else:
self._cached_base_name = "%s.txt" % self.canonical_name() # meeting materials are unversioned at the moment
elif self.type_id == 'review':
self._cached_base_name = "%s.txt" % self.name
else:
if self.rev:
self._cached_base_name = "%s-%s.txt" % (self.canonical_name(), self.rev)
@ -146,6 +153,16 @@ class DocumentInfo(models.Model):
self._cached_file_name = os.path.join(self.get_file_path(), self.get_base_name())
return self._cached_file_name
def revisions(self):
revisions = []
doc = self.doc if isinstance(self, DocHistory) else self
for e in doc.docevent_set.filter(type='new_revision').distinct().order_by("time", "id"):
if e.rev and not e.rev in revisions:
revisions.append(e.rev)
if not doc.rev in revisions:
revisions.append(doc.rev)
return revisions
def href(self, meeting=None):
"""
Returns an url to the document text. This differs from .get_absolute_url(),
@ -290,6 +307,11 @@ class DocumentInfo(models.Model):
else:
return state.name
def is_rfc(self):
if not hasattr(self, '_cached_is_rfc'):
self._cached_is_rfc = self.pk and self.type_id == 'draft' and self.states.filter(type='draft',slug='rfc').exists()
return self._cached_is_rfc
def author_list(self):
return ", ".join(email.address for email in self.authors.all())
@ -383,8 +405,35 @@ class DocumentInfo(models.Model):
def all_related_that_doc(self, relationship, related=None):
return list(set([x.target for x in self.all_relations_that_doc(relationship)]))
def replaces(self):
return set([ r.document for r in self.related_that_doc("replaces")])
def replaced_by(self):
return [ r.document for r in self.related_that("replaces") ]
return set([ r.document for r in self.related_that("replaces") ])
def text(self):
path = self.get_file_name()
try:
with open(path, 'rb') as file:
raw = file.read()
except IOError as e:
logger.error("IOError for %s: %s", path, e, exc_info=e)
return None
try:
text = raw.decode('utf-8')
except UnicodeDecodeError as e:
text = raw.decode('latin-1')
#
return text
def htmlized(self):
text = self.text()
html = None
if text:
# The path here has to match the urlpattern for htmlized documents
html = markup(text, path=settings.HTMLIZER_URL_PREFIX)
#html = re.sub(r'<hr[^>]*/>','', html)
return html
class Meta:
abstract = True
@ -477,7 +526,6 @@ class Document(DocumentInfo):
return url
return urlreverse('ietf.doc.views_doc.document_main', kwargs={ 'name': name }, urlconf="ietf.urls")
def file_tag(self):
return u"<%s>" % self.filename_with_rev()
@ -497,9 +545,9 @@ class Document(DocumentInfo):
if not hasattr(self, '_canonical_name'):
name = self.name
if self.type_id == "draft" and self.get_state_slug() == "rfc":
a = self.docalias_set.filter(name__startswith="rfc")
a = self.docalias_set.filter(name__startswith="rfc").first()
if a:
name = a[0].name
name = a.name
elif self.type_id == "charter":
from ietf.doc.utils_charter import charter_name_for_group # Imported locally to avoid circular imports
try:
@ -593,8 +641,15 @@ class Document(DocumentInfo):
return mark_safe('<a href="%s">%s-%s</a>' % (self.get_absolute_url(), self.name , self.rev))
def rfc_number(self):
n = self.canonical_name()
return n[3:] if n.startswith("rfc") else None
if not hasattr(self, '_cached_rfc_number'):
self._cached_rfc_number = None
if self.is_rfc():
n = self.canonical_name()
if n.startswith("rfc"):
self._cached_rfc_number = n[3:]
else:
logger.error("Document self.is_rfc() is True but self.canonical_name() is %s" % n)
return self._cached_rfc_number
def ipr(self,states=('posted','removed')):
"""Returns the IPR disclosures against this document (as a queryset over IprDocRel)."""
@ -640,25 +695,36 @@ class Document(DocumentInfo):
event = self.latest_event(type='new_revision')
return event.time
def text(self):
path = self.get_file_name()
try:
with open(path, 'rb') as file:
text = file.read().decode('utf-8')
except (IOError, UnicodeDecodeError) as e:
text = None
logger.error("Failure to read document text for %s", self.name, exc_info=e)
#
return text
def is_dochistory(self):
return False
def fake_history_obj(self, rev):
"""
Mock up a fake DocHistory object with the given revision, for
situations where we need an entry but there is none in the DocHistory
table.
XXX TODO: Add missing objects to DocHistory instead
"""
history = DocHistory.objects.filter(doc=self, rev=rev).order_by("time")
if history.exists():
return history.first()
else:
# fake one
events = self.docevent_set.order_by("time", "id")
rev_events = events.filter(rev=rev)
new_rev_events = rev_events.filter(type='new_revision')
if new_rev_events.exists():
time = new_rev_events.first().time
elif rev_events.exists():
time = rev_events.first().time
else:
time = datetime.datetime.fromtimestamp(0)
dh = DocHistory(name=self.name, rev=rev, doc=self, time=time, type=self.type, title=self.title,
stream=self.stream, group=self.group)
return dh
# def htmlized(self):
# text = self.text()
# html = None
# if text:
# html = markup(text, path="/doc")
# html = re.sub(r'<hr[^>]*/>','', html)
# return html
class RelatedDocHistory(models.Model):
source = models.ForeignKey('DocHistory')
@ -712,6 +778,12 @@ class DocHistory(DocumentInfo):
def docalias_set(self):
return self.doc.docalias_set
def is_dochistory(self):
return True
def related_ipr(self):
return self.doc.related_ipr()
class Meta:
verbose_name = "document history"
verbose_name_plural = "document histories"

View file

@ -197,7 +197,7 @@ def rfclink(string):
URL for that RFC.
"""
string = str(string);
return "https://tools.ietf.org/html/rfc" + string;
return "/doc/html/rfc" + string;
@register.filter(name='urlize_ietf_docs', is_safe=True, needs_autoescape=True)
def urlize_ietf_docs(string, autoescape=None):

View file

@ -493,6 +493,15 @@ Man Expires September 22, 2015 [Page 3]
self.assertTrue("Show full document text" in unicontent(r))
self.assertFalse("Deimos street" in unicontent(r))
r = self.client.get(urlreverse("ietf.doc.views_doc.document_html", kwargs=dict(name=draft.name)))
self.assertEqual(r.status_code, 200)
self.assertTrue("Versions:" in unicontent(r))
self.assertTrue("Deimos street" in unicontent(r))
q = PyQuery(r.content)
self.assertEqual(len(q('.rfcmarkup pre')), 4)
self.assertEqual(len(q('span.h1')), 2)
self.assertEqual(len(q('a[href]')), 116)
# expired draft
draft.set_state(State.objects.get(type="draft", slug="expired"))

View file

@ -59,13 +59,14 @@ urlpatterns = [
url(r'^stats/newrevisiondocevent/data/?$', views_stats.chart_data_newrevisiondocevent),
url(r'^stats/person/(?P<id>[0-9]+)/drafts/conf/?$', views_stats.chart_conf_person_drafts),
url(r'^stats/person/(?P<id>[0-9]+)/drafts/data/?$', views_stats.chart_data_person_drafts),
url(r'^html/%(name)s(?:-%(rev)s)?(\.txt|\.html)?$' % settings.URL_REGEXPS, views_doc.document_html),
url(r'^all/$', views_search.index_all_drafts),
url(r'^active/$', views_search.index_active_drafts),
url(r'^select2search/(?P<model_name>(document|docalias))/(?P<doc_type>draft)/$', views_search.ajax_select2_search_docs),
url(r'^%(name)s/(?:%(rev)s/)?$' % settings.URL_REGEXPS, views_doc.document_main),
url(r'^%(name)s/(?:%(rev)s/)?bibtex/$' % settings.URL_REGEXPS, views_doc.document_bibtex),
url(r'^%(name)s(?:/%(rev)s)?/$' % settings.URL_REGEXPS, views_doc.document_main),
url(r'^%(name)s(?:/%(rev)s)?/bibtex/$' % settings.URL_REGEXPS, views_doc.document_bibtex),
url(r'^%(name)s/history/$' % settings.URL_REGEXPS, views_doc.document_history),
url(r'^%(name)s/writeup/$' % settings.URL_REGEXPS, views_doc.document_writeup),
url(r'^%(name)s/email/$' % settings.URL_REGEXPS, views_doc.document_email),

View file

@ -1,7 +1,5 @@
# Copyright The IETF Trust 2011, All Rights Reserved
from django.conf import settings
from ietf.doc import views_charter, views_doc
from ietf.utils.urls import url
@ -17,5 +15,5 @@ urlpatterns = [
url(r'^ballotwriteupnotes/$', views_charter.ballot_writeupnotes),
url(r'^approve/$', views_charter.approve),
url(r'^submit/(?:(?P<option>initcharter|recharter)/)?$', views_charter.submit),
url(r'^withmilestones-%(rev)s.txt$' % settings.URL_REGEXPS, views_charter.charter_with_milestones_txt),
url(r'^withmilestones-(?P<rev>[0-9-]{2,5}).txt$', views_charter.charter_with_milestones_txt),
]

View file

@ -12,6 +12,8 @@ from django.forms import ValidationError
from django.utils.html import escape
from django.core.urlresolvers import reverse as urlreverse
import debug # pyflakes:ignore
from ietf.doc.models import Document, DocHistory, State, DocumentAuthor, DocHistoryAuthor
from ietf.doc.models import DocAlias, RelatedDocument, RelatedDocHistory, BallotType, DocReminder
from ietf.doc.models import DocEvent, ConsensusDocEvent, BallotDocEvent, NewRevisionDocEvent, StateDocEvent
@ -705,3 +707,130 @@ def get_search_cache_key(params):
key = "doc:document:search:" + hashlib.sha512(json.dumps(kwargs, sort_keys=True)).hexdigest()
return key
def label_wrap(label, items, joiner=',', max=50):
lines = []
if not items:
return lines
line = '%s: %s' % (label, items[0])
for item in items[1:]:
if len(line)+len(joiner+' ')+len(item) > max:
lines.append(line+joiner)
line = ' '*(len(label)+len(': ')) + item
else:
line += joiner+' '+item
if line:
lines.append(line)
return lines
def join_justified(left, right, width=72):
count = max(len(left), len(right))
left = left + ['']*(count-len(left))
right = right + ['']*(count-len(right))
lines = []
i = 0
while True:
l = left[i]
r = right[i]
if len(l)+1+len(r) > width:
left = left + ['']
right = right[:i] + [''] + right[i:]
r = right[i]
count += 1
lines.append( l + ' ' + r.rjust(width-len(l)-1) )
i += 1
if i >= count:
break
return lines
def build_doc_meta_block(doc, path):
def add_markup(path, doc, lines):
is_hst = doc.is_dochistory()
rev = doc.rev
if is_hst:
doc = doc.doc
name = doc.name
rfcnum = doc.rfc_number()
errata_url = settings.RFC_EDITOR_ERRATA_URL.format(rfc_number=rfcnum) if not is_hst else ""
ipr_url = "%s?submit=draft&amp;id=%s" % (urlreverse('ietf.ipr.views.search'), name)
for i, line in enumerate(lines):
# add draft links
line = re.sub(r'\b(draft-[-a-z0-9]+)\b', '<a href="%s/\g<1>">\g<1></a>'%(path, ), line)
# add rfcXXXX to RFC links
line = re.sub(r' (rfc[0-9]+)\b', ' <a href="%s/\g<1>">\g<1></a>'%(path, ), line)
# add XXXX to RFC links
line = re.sub(r' ([0-9]{3,5})\b', ' <a href="%s/rfc\g<1>">\g<1></a>'%(path, ), line)
# add draft revision links
line = re.sub(r' ([0-9]{2})\b', ' <a href="%s/%s-\g<1>">\g<1></a>'%(path, name, ), line)
if rfcnum:
# add errata link
line = re.sub(r'Errata exist', '<a class="text-warning" href="%s">Errata exist</a>'%(errata_url, ), line)
if is_hst or not rfcnum:
# make current draft rev bold
line = re.sub(r'>(%s)<'%rev, '><b>\g<1></b><', line)
line = re.sub(r'IPR declarations', '<a class="text-warning" href="%s">IPR declarations</a>'%(ipr_url, ), line)
line = line.replace(r'[txt]', '[<a href="%s">txt</a>]' % doc.href())
lines[i] = line
return lines
#
now = datetime.datetime.now()
draft_state = doc.get_state('draft')
block = ''
meta = {}
if doc.type_id == 'draft':
revisions = []
ipr = doc.related_ipr()
if ipr:
meta['ipr'] = [ "IPR declarations" ]
if doc.is_rfc() and not doc.is_dochistory():
if not doc.name.startswith('rfc'):
meta['from'] = [ "%s-%s"%(doc.name, doc.rev) ]
meta['errata'] = [ "Errata exist" ] if doc.tags.filter(slug='errata').exists() else []
meta['obsoletedby'] = [ alias.document.rfc_number() for alias in doc.related_that('obs') ]
meta['obsoletedby'].sort()
meta['updatedby'] = [ alias.document.rfc_number() for alias in doc.related_that('updates') ]
meta['updatedby'].sort()
meta['stdstatus'] = [ doc.std_level.name ]
else:
dd = doc.doc if doc.is_dochistory() else doc
revisions += [ '(%s)%s'%(d.name, ' '*(2-((len(d.name)-1)%3))) for d in dd.replaces() ]
revisions += doc.revisions()
if doc.is_dochistory() and doc.doc.is_rfc():
revisions += [ doc.doc.canonical_name() ]
else:
revisions += [ d.name for d in doc.replaced_by() ]
meta['versions'] = revisions
if not doc.is_dochistory and draft_state.slug == 'active' and now > doc.expires:
# Active past expiration date
meta['active'] = [ 'Document is active' ]
meta['state' ] = [ doc.friendly_state() ]
intended_std = doc.intended_std_level if doc.intended_std_level else None
if intended_std:
if intended_std.slug in ['ps', 'ds', 'std']:
meta['stdstatus'] = [ "Standards Track" ]
else:
meta['stdstatus'] = [ intended_std.name ]
elif doc.type_id == 'charter':
meta['versions'] = doc.revisions()
#
# Add markup to items that needs it.
if 'versions' in meta:
meta['versions'] = label_wrap('Versions', meta['versions'], joiner="")
for label in ['Obsoleted by', 'Updated by', 'From' ]:
item = label.replace(' ','').lower()
if item in meta and meta[item]:
meta[item] = label_wrap(label, meta[item])
#
left = []
right = []
#right = [ '[txt]']
for item in [ 'from', 'versions', 'obsoletedby', 'updatedby', ]:
if item in meta and meta[item]:
left += meta[item]
for item in ['stdstatus', 'active', 'state', 'ipr', 'errata', ]:
if item in meta and meta[item]:
right += meta[item]
lines = join_justified(left, right)
block = '\n'.join(add_markup(path, doc, lines))
#
return block

View file

@ -50,7 +50,7 @@ from ietf.doc.utils import ( add_links_in_new_revision_events, augment_events_wi
can_adopt_draft, get_chartering_type, get_document_content, get_tags_for_stream_id,
needed_ballot_positions, nice_consensus, prettify_std_name, update_telechat, has_same_ballot,
get_initial_notify, make_notify_changed_event, crawl_history, default_consensus,
add_events_message_info, get_unicode_document_content)
add_events_message_info, get_unicode_document_content, build_doc_meta_block)
from ietf.community.utils import augment_docs_with_tracking_info
from ietf.group.models import Role
from ietf.group.utils import can_manage_group_type, can_manage_materials
@ -67,9 +67,13 @@ from ietf.review.models import ReviewRequest
from ietf.review.utils import can_request_review_of_doc, review_requests_to_list_for_docs
from ietf.review.utils import no_review_from_teams_on_doc
def render_document_top(request, doc, tab, name):
tabs = []
tabs.append(("Document", "document", urlreverse("ietf.doc.views_doc.document_main", kwargs=dict(name=name)), True, None))
tabs.append(("Status", "status", urlreverse("ietf.doc.views_doc.document_main", kwargs=dict(name=name)), True, None))
if doc.type_id in ["draft", "charter", ]:
tabs.append(("Document", "document", urlreverse("ietf.doc.views_doc.document_html", kwargs=dict(name=name)), True, None))
ballot = doc.latest_event(BallotDocEvent, type="created_ballot")
if doc.type_id in ("draft","conflrev", "statchg"):
@ -141,7 +145,7 @@ def document_main(request, name, rev=None):
# set this after we've found the right doc instance
group = doc.group
top = render_document_top(request, doc, "document", name)
top = render_document_top(request, doc, "status", name)
telechat = doc.latest_event(TelechatDocEvent, type="scheduled_for_telechat")
@ -597,9 +601,39 @@ def document_main(request, name, rev=None):
other_reviews=other_reviews,
))
raise Http404
raise Http404("Document not found: %s" % (name + ("-%s"%rev if rev else "")))
def document_html(request, name, rev=None):
if name.startswith('rfc0'):
name = "rfc" + name[3:].lstrip('0')
if name.startswith('review-') and re.search('-\d\d\d\d-\d\d$', name):
name = "%s-%s" % (name, rev)
docs = Document.objects.filter(docalias__name=name)
if not docs.exists():
# handle some special cases, like draft-ietf-tsvwg-ieee-802-11
name = '%s-%s' % (name, rev)
rev=None
docs = Document.objects.filter(docalias__name=name)
doc = docs.get()
if not os.path.exists(doc.get_file_name()):
raise Http404("Document not found: %s" % doc.get_base_name())
top = render_document_top(request, doc, "document", name)
if not rev and not name.startswith('rfc'):
rev = doc.rev
if rev:
docs = DocHistory.objects.filter(doc=doc, rev=rev)
if docs.exists():
doc = docs.first()
else:
doc = doc.fake_history_obj(rev)
if doc.type_id in ['draft',]:
doc.meta = build_doc_meta_block(doc, settings.HTMLIZER_URL_PREFIX)
return render(request, "doc/document_html.html", {"doc":doc, "top":top, "navbar_mode":"navbar-static-top", })
def check_doc_email_aliases():
pattern = re.compile('^expand-(.*?)(\..*?)?@.*? +(.*)$')
good_count = 0

View file

@ -61,7 +61,7 @@ ALLOWED_HOSTS = [".ietf.org", ".ietf.org.", "209.208.19.216", "4.31.198.44", ]
TOOLS_SERVER = 'tools.' + IETF_DOMAIN
TOOLS_SERVER_URL = 'https://' + TOOLS_SERVER
TOOLS_ID_PDF_URL = TOOLS_SERVER_URL + '/pdf/'
TOOLS_ID_HTML_URL = TOOLS_SERVER_URL + '/html/'
TOOLS_ID_HTML_URL = '/doc/html/'
# Override this in the settings_local.py file:
SERVER_EMAIL = 'Django Server <django-project@' + TOOLS_SERVER + '>'
@ -524,12 +524,14 @@ GROUP_STATES_WITH_EXTRA_PROCESSING = ["sub-pub", "rfc-edit", ]
DATE_FORMAT = "Y-m-d"
DATETIME_FORMAT = "Y-m-d H:i T"
DRAFT_NAMES_WITH_DOT = "(draft-[a-z-]+-(ion-sig-uni4.0|pilc-2.5g3g|trade-iotp-v1.0-[a-z]+|msword-template-v2.0))"
URL_REGEXPS = {
"acronym": r"(?P<acronym>[-a-z0-9]+)",
"charter": r"(?P<name>charter-[-a-z0-9]+)",
"date": r"(?P<date>\d{4}-\d{2}-\d{2})",
"name": r"(?P<name>[A-Za-z0-9._+-]+)",
"rev": r"(?P<rev>[0-9-]+)",
"name": r"(?P<name>([A-Za-z0-9_+-]+?|%s))" % DRAFT_NAMES_WITH_DOT,
"rev": r"(?P<rev>[0-9-]{2})",
"owner": r"(?P<owner>[-A-Za-z0-9\'+._]+@[A-Za-z0-9-._]+)",
"schedule_name": r"(?P<name>[A-Za-z0-9-:_]+)",
}
@ -595,9 +597,22 @@ CACHES = {
'default': {
'BACKEND': 'django.core.cache.backends.memcached.MemcachedCache',
'LOCATION': '127.0.0.1:11211',
}
'OPTIONS': {
'MAX_ENTRIES': 10000, # 10,000
},
},
'htmlized': {
'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
'LOCATION': '/var/cache/datatracker/htmlized',
'OPTIONS': {
'MAX_ENTRIES': 100000, # 100,000
},
},
}
HTMLIZER_VERSION = 1
HTMLIZER_URL_PREFIX = "/doc/html"
IPR_EMAIL_FROM = 'ietf-ipr@ietf.org'
AUDIO_IMPORT_EMAIL = ['agenda@ietf.org']
IANA_EVAL_EMAIL = "drafts-eval@icann.org"
@ -613,6 +628,7 @@ RFC_EDITOR_SYNC_PASSWORD="secret"
RFC_EDITOR_SYNC_NOTIFICATION_URL = "https://www.rfc-editor.org/parser/parser.php"
RFC_EDITOR_QUEUE_URL = "https://www.rfc-editor.org/queue2.xml"
RFC_EDITOR_INDEX_URL = "https://www.rfc-editor.org/rfc/rfc-index.xml"
RFC_EDITOR_ERRATA_URL = "https://www.rfc-editor.org/errata_search.php?rfc={rfc_number}&amp;rec_status=0"
# NomCom Tool settings
ROLODEX_URL = ""
@ -889,7 +905,15 @@ if SERVER_MODE != 'production':
CACHES = {
'default': {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
}
},
'htmlized': {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
#'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
'LOCATION': '/var/cache/datatracker/htmlized',
'OPTIONS': {
'MAX_ENTRIES': 1000,
},
}
}
SESSION_ENGINE = "django.contrib.sessions.backends.db"

View file

@ -19,7 +19,18 @@ TEMPLATES[0]['OPTIONS']['loaders'] = (
CACHES = {
'default': {
'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',
}
'OPTIONS': {
'MAX_ENTRIES': 10000,
},
},
'htmlized': {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
#'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
'LOCATION': '/var/cache/datatracker/htmlized',
'OPTIONS': {
'MAX_ENTRIES': 100000,
},
},
}
PASSWORD_HASHERS = ( 'django.contrib.auth.hashers.MD5PasswordHasher', )

View file

@ -6,7 +6,7 @@
*/
/* Passing for static navbar; see http://getbootstrap.com/components/#navbar-fixed-top */
body { padding-top: 70px; }
body { padding-top: 72px; }
/* Browse Happy prompt */
.browsehappy {
@ -101,7 +101,12 @@ body { padding-top: 70px; }
h2.anchor-target:before,
h3.anchor-target:before,
h4.anchor-target:before,
.h1.anchor-target:before,
.h2.anchor-target:before,
.h3.anchor-target:before,
.h4.anchor-target:before,
tr th.anchor-target:before,
span.anchor-target:before,
div.anchor-target:before {
content: '';
display: block;
@ -110,7 +115,10 @@ div.anchor-target:before {
height: 65px;
margin-top: -65px;
}
div.anchor-target { z-index: 0; }
div.anchor-target {
z-index: 0;
height: 0px;
}
/* Make the panel title font normally large */
.panel-title { font-size: 14px }
@ -715,3 +723,107 @@ blockquote {
padding-top: 4px;
padding-right: 4px;
}
.symbol-link,
.symbol-link a:link,
.symbol-link a:visited {
padding-right: 0.5em;
}
.rfcmarkup div {
margin-top: 1em;
}
.rfcmarkup pre {
font-size: 10.5pt;
margin-right: 0;
margin-left: 0;
border: 0;
margin: 0;
padding: 0;
padding-bottom: 1em;
background-color: white;
line-height: 1.12;
}
.rfcmarkup pre span.h1,
.rfcmarkup pre span.h2,
.rfcmarkup pre span.h3,
.rfcmarkup pre span.h4,
.rfcmarkup pre span.h5,
.rfcmarkup pre span.h6 {
font-weight: bold;
line-height: 0pt;
display: inline;
white-space: pre;
font-family: monospace;
font-size: 1em;
font-weight: bold;
}
.rfcmarkup pre span.invisible {
text-decoration: none;
color: white;
}
.rfcmarkup pre a { text-decoration: underline; }
.rfcmarkup pre .grey,
.rfcmarkup pre .grey a:link,
.rfcmarkup pre .grey a:visited {
color: #777;
}
.rfcmarkup pre.meta-info {
padding: 0.5em;
margin-left: -0.5em;
margin-bottom: 0.5em;
background-color: #f8f8f8;
border: 1px solid #e0e0e0;
width: 89ex;
}
.rfcmarkup hr {
margin: 0;
width: 80ex;
}
.rfcmarkup .text-warning,
.rfcmarkup a.text-warning,
.rfcmarkup a.text-warning:focus,
.rfcmarkup a.text-warning:active,
.rfcmarkup a.text-warning:visited,
.rfcmarkup a.text-warning:hover
{
color: #d9534f; /* brand-danger colour */
}
@media print {
.rfcmarkup .noprint { display: none; }
.rfcmarkup a,
.rfcmarkup a:visited,
.rfcmarkup pre a,
.rfcmarkup pre a:visited {
text-decoration: none;
}
.rfcmarkup a[href]:after {
content: "";
}
.rfcmarkup abbr[title]:after {
content: "";
}
.rfcmarkup pre {
font-size: 10pt;
margin-top: 0;
margin-bottom: 0;
margin-left: auto;
margin-right: auto;
}
.rfcmarkup .newpage {
page-break-before: always;
}
}

View file

@ -430,6 +430,8 @@ def ensure_person_email_info_exists(name, email):
email.active = active
email.person = person
if email.time is None:
email.time = datetime.datetime.now()
email.save()
return email

View file

@ -44,7 +44,7 @@
</head>
<body {% block bodyAttrs %}{%endblock%} data-group-menu-data-url="{% url 'ietf.group.views_ajax.group_menu_data' %}">
<nav class="navbar {% if server_mode and server_mode != "production" %}navbar-default{% else %}navbar-inverse{% endif %} navbar-fixed-top">
<nav class="navbar {% if server_mode and server_mode != "production" %}navbar-default{% else %}navbar-inverse{% endif %} {% if navbar_mode %}{{ navbar_mode }}{% else %}navbar-fixed-top{% endif %}">
<div class="container-fluid">
<div class="navbar-header">
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target="#navbar-collapse">
@ -57,7 +57,7 @@
<img alt="IETF Logo" src="{% static 'ietf/images/ietflogo-small-transparent.png' %}">
{% if not user.is_authenticated %}
{% if server_mode and server_mode != "production" %}
<b><i>Development mode</i></b>
<b><i><small>Development mode</small></i></b>
{% else %}
Datatracker
{% endif %}
@ -115,6 +115,7 @@
</div>
{% endif %}
{% block footer %}
<hr>
<div class="col-md-12">
<div class="text-center padded">
@ -131,6 +132,7 @@
</div>
</div>
<footer class="row">
<div class="col-md-12">
<div class="text-center">
@ -151,8 +153,12 @@
</div>
</div>
</footer>
{% include "debug.html" %}
{% endblock %}
</div>
{% include "debug.html" %}
<script src="{% static 'jquery/jquery.min.js' %}"></script>
{% comment %}
<!-- Remove the *-nojs attributes if we are running js. This depends on jQuery's removeClass(): -->

View file

@ -23,7 +23,7 @@
institution = {% templatetag openbrace %}Internet Engineering Task Force{% templatetag closebrace %},
publisher = {% templatetag openbrace %}Internet Engineering Task Force{% templatetag closebrace %},
note = {% templatetag openbrace %}Work in Progress{% templatetag closebrace %},
url = {% templatetag openbrace %}https://tools.ietf.org/html/{{doc.name}}-{{doc.rev}}{% templatetag closebrace %},{% endif %}
url = {% templatetag openbrace %}https://datatracker.ietf.org/doc/html/{{doc.name}}-{{doc.rev}}{% templatetag closebrace %},{% endif %}
author = {% templatetag openbrace %}{% for entry in doc.authors.all %}{% with entry.person as author %}{{author.name}}{% endwith %}{% if not forloop.last %} and {% endif %}{% endfor %}{% templatetag closebrace %},
title = {% templatetag openbrace %}{% templatetag openbrace %}{{doc.title}}{% templatetag closebrace %}{% templatetag closebrace %},
pagetotal = {{ doc.pages }},

View file

@ -3,6 +3,7 @@
{% load origin %}
{% load staticfiles %}
{% load ietf_filters %}
{% load cache %}
{% block pagehead %}
<link rel="alternate" type="application/atom+xml" title="Document changes" href="/feed/document-changes/{{ name }}/">
@ -547,10 +548,24 @@
</div>
{% if doc.get_state_slug == "active" or doc.get_state_slug == "rfc" %}
{{ content|safe }}
{% if split_content %}
{{ content|safe }}
<a class="btn btn-default btn-block" href="?include_text=1"><span class="fa fa-caret-down"></span> Show full document text</a>
{% else %}
<div class="col-md-12">
<div class="col-md-2"></div>
<div class="col-md-8 rfcmarkup">
{% with 1209600 as two_weeks %}
{% cache two_weeks htmlized doc.name doc.rev using="htmlized" %}
<div>
{{ doc.htmlized|default:"Generation of htmlized text failed"|safe }}
</div>
{% endcache %}
{% endwith %}
</div>
<div class="col-md-2"></div>
</div>
{% endif %}
{% else %}
@ -559,7 +574,8 @@
<div class="panel-heading">
This Internet-Draft is no longer active. A copy of
the expired Internet-Draft can be found at<br/>
<a href="{{doc.href}}">{{doc.href}}</a>
{% url 'ietf.doc.views_doc.document_html' name=doc.name rev=doc.rev as html_url %}
<a href="{{ html_url }}">{{ html_url }}</a>
</div>
<div class="panel-body">
<p>

View file

@ -0,0 +1,69 @@
{% extends "base.html" %}
{# Copyright The IETF Trust 2016, All Rights Reserved #}
{% load origin %}
{% load staticfiles %}
{% load ietf_filters %}
{% load cache %}
{% block pagehead %}
<link rel="alternate" type="application/atom+xml" title="Document changes" href="/feed/document-changes/{{ doc.name }}/">
<meta name="description" content="{{ doc.title }} {% if doc.get_state_slug == "rfc" %}(RFC {{ rfc_number }}{% if published %}, {{ published.time|date:"F Y" }}{% endif %}{% if obsoleted_by %}; obsoleted by {{ obsoleted_by|join:", " }}{% endif %}){% else %}(Internet-Draft, {{ doc.time|date:"Y" }}){% endif %}">
<script src="{% static 'd3/d3.min.js' %}"></script>
<script src="{% static 'jquery/jquery.min.js' %}"></script>
{% endblock %}
{% block morecss %}
.inline { display: inline; }
{% endblock %}
{% block title %}
{% if doc.get_state_slug == "rfc" %}
{{ doc.canonical_name }}
{% else %}
{{ doc.name }}-{{ doc.rev }}
{% endif %}
{% endblock %}
{% block bodyAttrs %}style="padding-top: 0;"{% endblock %}
{% block content %}
{% origin %}
<div class="hidden-print">
{{ top | safe }}
</div>
{# {% include "doc/revisions_list.html" %} #}
<div class="col-md-2"></div>
<div class="col-md-8 rfcmarkup">
{% if doc.meta %}
<div class="hidden-print">
<pre class="meta-info">{{ doc.meta|safe }}</pre>
</div>
{% endif %}
{% comment %}
{% if doc.is_dochistory %}
{% if doc.rev != doc.doc.rev %}
<pre class="meta-info alert-warning text-center">A newer version of the document below exists</pre>
{% elif doc.doc.is_rfc %}
<pre class="meta-info alert-info text-center">The draft below has been published as <a href="{% url 'ietf.doc.views_doc.document_html' name=doc.doc.canonical_name %}">RFC {{doc.doc.rfc_number}}</a></pre>
{% endif %}
{% endif %}
{% endcomment %}
{% with 1209600 as two_weeks %}
{% cache two_weeks htmlized doc.name doc.rev using="htmlized" %}
<div>
{{ doc.htmlized|default:"Generation of htmlized text failed"|safe }}
</div>
{% endcache %}
{% endwith %}
</div>
<div class="col-md-1"></div>
<div class="col-md-1"></div>
{% endblock %}
{% block footer %}
<div></div>
{% endblock %}

View file

@ -1,6 +1,6 @@
{# Copyright The IETF Trust 2015, All Rights Reserved #}{% load origin %}{% origin %}
{% load ietf_filters %}
<h1>{{ doc.title }}<br><small>{{ name }}</small></h1>
<h2>{{ doc.title }}<br><small>{{ name }}</small></h2>
<ul class="nav nav-tabs" role="tablist">
{% for name, t, url, active, tooltip in tabs %}

View file

@ -24,8 +24,8 @@
<td class="doc">
<div>
<a href="{{ doc.get_absolute_url }}">{% if doc.get_state_slug == "rfc" %}RFC {{ doc.rfc_number }}{% else %}{{ doc.name }}-{{ doc.rev }}{% endif %}</a>
<a class="symbol-link" href="{{ doc.get_absolute_url }}"><span class="fa fa-info-circle fa-lg"></span></a>
<a href="{% url 'ietf.doc.views_doc.document_html' name=doc.canonical_name %}">{% if doc.get_state_slug == "rfc" %}RFC {{ doc.rfc_number }}{% else %}{{ doc.name }}-{{ doc.rev }}{% endif %}</a>
{% if doc.get_state_slug == "rfc" and "draft" in doc.name %}
<i>(was {{ doc.name }})</i>
{% endif %}

View file

@ -20,7 +20,7 @@
{% if group.type_id == "wg" %}
<p class="help-block">You can see the default Working Group I-D State Diagram
in <a href="https://tools.ietf.org/html/rfc6174#section-4.1">Section 4.1 of RFC6174</a>.</p>
in <a href="/doc/html/rfc6174#section-4.1">Section 4.1 of RFC6174</a>.</p>
{% endif %}
<h3>States</h3>

View file

@ -12,7 +12,7 @@ Diff from previous version:
{{rfcdiff_base_url}}?url2={{ submission.name }}-{{ submission.rev }}
Please note that it may take a couple of minutes from the time of submission
until the htmlized version and diff are available at tools.ietf.org.
until the diff is available at tools.ietf.org.
IETF Secretariat.
{% endautoescape %}

View file

@ -11,7 +11,7 @@ Group: {{ group }}
Pages: {{ submission.pages }}
URL: https://www.ietf.org/internet-drafts/{{ submission.name }}-{{ submission.rev }}.txt
Status: https://datatracker.ietf.org/doc/{{ submission.name }}/
Htmlized: https://tools.ietf.org/html/{{ submission.name }}-{{ submission.rev }}
Htmlized: https://datatracker.ietf.org/doc/html/{{ submission.name }}-{{ submission.rev }}
{% if submission.rev != "00" %}Diff: {{rfcdiff_base_url}}?url2={{ submission.name }}-{{ submission.rev }}{% endif %}
Abstract:
@ -20,7 +20,7 @@ Abstract:
{{ submission.note|default:"" }}
Please note that it may take a couple of minutes from the time of submission
until the htmlized version and diff are available at tools.ietf.org.
until the diff is available at tools.ietf.org.
The IETF Secretariat
{% endautoescape %}

View file

@ -16,14 +16,14 @@ The IETF datatracker status page for this draft is:
https://datatracker.ietf.org/doc/{{ submission.name }}/
There's also a htmlized version available at:
https://tools.ietf.org/html/{{ submission.name }}-{{ submission.rev }}
https://datatracker.ietf.org/doc/html/{{ submission.name }}-{{ submission.rev }}
{% if submission.rev != "00" %}
A diff from the previous version is available at:
{{settings.RFCDIFF_BASE_URL}}?url2={{ submission.name }}-{{ submission.rev }}
{% endif %}
Please note that it may take a couple of minutes from the time of submission
until the htmlized version and diff are available at tools.ietf.org.
until the diff is available at tools.ietf.org.
Internet-Drafts are also available by anonymous FTP at:
ftp://ftp.ietf.org/internet-drafts/

385
ietf/utils/rfcmarkup.py Normal file
View file

@ -0,0 +1,385 @@
import re
import cgi
import urllib
def markup(text, path=".", script="", extra=""):
# ------------------------------------------------------------------------
# Start of markup handling
# Convert \r which is not followed or preceded by a \n to \n
# (in case this is a mac document)
text = re.sub("([^\n])\r([^\n])", "\g<1>\n\g<2>", text)
# Strip \r (in case this is a ms format document):
text = text.replace("\r","")
# -------------
# Normalization
# Remove whitespace at the end of lines
text = re.sub("[\t ]+\n", "\n", text)
# Remove whitespace (including formfeeds) at the end of the document.
# (Trailing formfeeds will result in trailing blank pages.)
text = re.sub("[\t \r\n\f]+$", "\n", text)
text = text.expandtabs()
# Remove extra blank lines at the start of the document
text = re.sub("^\n*", "", text, 1)
# Fix up page breaks:
# \f should aways be preceeded and followed by \n
text = re.sub("([^\n])\f", "\g<1>\n\f", text)
text = re.sub("\f([^\n])", "\f\n\g<1>", text)
# Limit the number of blank lines after page break
text = re.sub("\f\n+", "\f\n", text)
# [Page nn] should be followed by \n\f\n
text = re.sub("(?i)(\[Page [0-9ivxlc]+\])[\n\f\t ]*(\n *[^\n\f\t ])", "\g<1>\n\f\g<2>", text)
# Normalize indentation
linestarts = re.findall("(?m)^([ ]*)\S", text);
prefixlen = 72
for start in linestarts:
if len(start) < prefixlen:
prefixlen = len(start)
if prefixlen:
text = re.sub("\n"+(" "*prefixlen), "\n", text)
# reference name tag markup
reference = {}
ref_url = {}
## Locate the start of the References section as the first reference
## definition after the last reference usage
## Incomplete 05 Aug 2010 17:05:27 XXXX Complete this!!
ref_start = re.search("(?im)^(\d+(\.\d+)*)(\.?[ ]+)(References?|Normative References?|Informative References?)", text)
ref_text = text[ref_start.end():] if ref_start else text
##ref_usages = re.findall("(\W)(\[)([-\w.]+)((, ?[-\w.]+)*\])", text)
ref_defs = re.findall("(?sm)^( *\n *)\[([-\w.]+?)\]( +)(.*?)(\n *)$", ref_text)
##ref_pos = [ match.start() for match in ref_usages ]
##def_pos = [ match.start() for match in ref_defs ]
##ref_pos = [ pos for pos in ref_pos if not pos in ref_defs ]
##last_ref_pos = ref_pos[-1] if ref_pos else None
#sys.stderr.write("ref_defs: %s\n" % repr(ref_defs))
for tuple in ref_defs:
title_match = re.search("(?sm)^(.*?(\"[^\"]+?\").+?|.*?(,[^,]+?,)[^,]+?)$", tuple[3])
if title_match:
reftitle = title_match.group(2) or title_match.group(3).strip("[ ,]+")
# Get rid of page break information inside the title
reftitle = re.sub("(?s)\n\n\S+.*\n\n", "", reftitle)
reftitle = cgi.escape(reftitle, quote=True)
reftitle = re.sub("[\n\t ]+", " ", reftitle) # Remove newlines and tabs
reference[tuple[1]] = reftitle if not re.search(r'(?i)(page|section|appendix)[- ]', reftitle) else ''
url_match = re.search(r"(http|https|ftp)://\S+", tuple[3])
if url_match:
ref_url[tuple[1]] = url_match.group(0)
# -------------
# escape any html significant characters
text = cgi.escape(text);
# -------------
# Adding markup
text = "<pre>"+text+"</pre>"
# Typewriter-style underline:
text = re.sub("_[\b](.)", "<u>\g<1></u>", text)
# Line number markup goes here
# Obsoletes: ... markup
def rfclist_replace(keyword, text):
def replacement(match):
group = list(match.groups(""))
group[3] = re.sub("\d+", """<a href=\"%s?%srfc=\g<0>\">\g<0></a>""" % (script, extra), group[3])
if group[8]:
group[8] = re.sub("\d+", """<a href=\"%s?%srfc=\g<0>\">\g<0></a>""" % (script, extra), group[8])
else:
group[8] = ""
return "\n%s%s%s\n%s%s" % (group[0], group[3], group[5], group[7], group[8])
text = re.sub("\n(%s( RFCs| RFC)?: ?( RFCs| RFC)?)(( \d+,| \d+)+)(.*)\n(( *)((\d+, )*(\d+)))*" % keyword, replacement, text, 1)
return text
text = rfclist_replace("Obsoletes", text)
text = rfclist_replace("Updates", text)
lines = text.splitlines(True)
head = "".join(lines[:28])
rest = "".join(lines[28:])
# title markup
head = re.sub("""(?im)(([12][0-9][0-9][0-9]|^Obsoletes.*|^Category: (Standards Track|Informational|Experimental|Best Current Practice)) *\n\n+ +)([A-Z][^\n]+)$""", """\g<1><span class=\"h1\">\g<4></span>""", head, 1)
head = re.sub("""(?i)(<span class="h1".+</span>)(\n +)([^<\n]+)\n""", """\g<1>\g<2><span class="h1">\g<3></span>\n""", head, 1)
head = re.sub("""(?i)(<span class="h1".+</span>)(\n +)([^<\n]+)\n""", """\g<1>\g<2><span class="h1">\g<3></span>\n""", head, 1)
text = head + rest
# http link markup
# link crossing a line. Not permitting ":" after the line break will
# result in some URLs broken across lines not being recognized, but
# will on the other hand correctly handle a series of URL listed line
# by line, one on each line.
# Link crossing a line, where the continuation contains '.' or '/'
text = re.sub("(?im)(\s|^|[^=]\"|\()((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?)(\n +)([A-Za-z0-9_./@%&?#~=-]+[./][A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])([.,)\"\s]|$)",
"\g<1><a href=\"\g<2>\g<6>\">\g<2></a>\g<5><a href=\"\g<2>\g<6>\">\g<6></a>\g<7>", text)
text = re.sub("(?im)(&lt;)((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?)(\n +)([A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])(&gt;)",
"\g<1><a href=\"\g<2>\g<6>\">\g<2></a>\g<5><a href=\"\g<2>\g<6>\">\g<6></a>\g<7>", text)
# Link crossing a line, where first line ends in '-' or '/'
text = re.sub("(?im)(\s|^|[^=]\"|\()((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?[-/])(\n +)([A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])([.,)\"\s]|$)",
"\g<1><a href=\"\g<2>\g<6>\">\g<2></a>\g<5><a href=\"\g<2>\g<6>\">\g<6></a>\g<7>", text)
text = re.sub("(?im)(&lt;)((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?)(\n +)([A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])(&gt;)",
"\g<1><a href=\"\g<2>\g<6>\">\g<2></a>\g<5><a href=\"\g<2>\g<6>\">\g<6></a>\g<7>", text)
# link crossing a line, enclosed in "<" ... ">"
text = re.sub("(?im)<((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?)(\n +)([A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])>",
"<\g<1><a href=\"\g<1>\g<5>\">\g<1></a>\g<4><a href=\"\g<1>\g<5>\">\g<5></a>>", text)
text = re.sub("(?im)(&lt;)((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?)(\n +)([A-Za-z0-9_./@%&;?#~=-]+[A-Za-z0-9_/@%&;?#~=-])(&gt;)",
"\g<1><a href=\"\g<2>\g<6>\">\g<2></a>\g<5><a href=\"\g<2>\g<6>\">\g<6></a>\g<7>", text)
# link crossing two lines, enclosed in "<" ... ">"
text = re.sub("(?im)<((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?)(\n +)([A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])(\n +)([A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])>",
"<\g<1><a href=\"\g<1>\g<5>\g<7>\">\g<1></a>\g<4><a href=\"\g<1>\g<5>\g<7>\">\g<5></a>\g<6><a href=\"\g<1>\g<5>\g<7>\">\g<7></a>>", text)
text = re.sub("(?im)(&lt;)((http|https|ftp)://([:A-Za-z0-9_./@%&?#~=-]+)?)(\n +)([A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])(\n +)([A-Za-z0-9_./@%&;?#~=-]+[A-Za-z0-9_/@%&;?#~=-])(&gt;)",
"\g<1><a href=\"\g<2>\g<6>\g<8>\">\g<2></a>\g<5><a href=\"\g<2>\g<6>\g<8>\">\g<6></a>\g<7><a href=\"\g<2>\g<6>\g<8>\">\g<8></a>\g<9>", text)
# link on a single line
text = re.sub("(?im)(\s|^|[^=]\"|&lt;|\()((http|https|ftp)://[:A-Za-z0-9_./@%&?#~=-]+[A-Za-z0-9_/@%&?#~=-])([.,)\"\s]|&gt;|$)",
"\g<1><a href=\"\g<2>\">\g<2></a>\g<4>", text)
# # Special case for licensing boilerplate
# text = text.replace('<a href="http://trustee.ietf.org/">http://trustee.ietf.org/</a>\n license-info',
# '<a href="http://trustee.ietf.org/licence-info">http://trustee.ietf.org/</a>\n <a href="http://trustee.ietf.org/licence-info">licence-info</a>')
# undo markup if RFC2606 domain
text = re.sub("""(?i)<a href="[a-z]*?://([a-z0-9_-]+?\.)?example(\.(com|org|net))?(/.*?)?">(.*?)</a>""", "\g<5>", text)
# draft markup
# draft name crossing line break
text = re.sub("([^/#=\?\w-])(draft-([-a-zA-Z0-9]+-)?)(\n +)([-a-zA-Z0-9]+[a-zA-Z0-9](.txt)?)",
"\g<1><a href=\"%s?%sdraft=\g<2>\g<5>\">\g<2></a>\g<4><a href=\"%s?%sdraft=\g<2>\g<5>\">\g<5></a>" % (script, extra, script, extra), text)
# draft name on one line (but don't mess with what we just did above)
text = re.sub("([^/#=\?\w>=-])(draft-[-a-zA-Z0-9]+[a-zA-Z0-9](.txt)?)",
"\g<1><a href=\"%s?%sdraft=\g<2>\">\g<2></a>" % (script, extra), text)
# rfc markup
# rfc and number on the same line
text = re.sub("""(?i)([^[/>\w-])(rfc([- ]?))([0-9]+)(\W)""",
"""\g<1><a href=\"%s?%srfc=\g<4>\">\g<2>\g<4></a>\g<5>""" % (script, extra), text)
# rfc and number on separate lines
text = re.sub("(?i)([^[/>\w-])(rfc([-]?))(\n +)([0-9]+)(\W)",
"\g<1><a href=\"%s?%srfc=\g<5>\">\g<2></a>\g<4><a href=\"%s?%srfc=\g<5>\">\g<5></a>\g<6>" % (script, extra, script, extra), text)
# spelled out Request For Comments markup
text = re.sub("(?i)(\s)(Request\s+For\s+Comments\s+\([^)]+\)\s+)([0-9]+)",
"\g<1>\g<2><a href=\"%s?%srfc=\g<3>\">\g<3></a>" % (script, extra), text)
# bcp markup
text = re.sub("(?i)([^[/>\w.-])(bcp([- ]?))([0-9]+)(\W)",
"\g<1><a href=\"%s?%sbcp=\g<4>\">\g<2>\g<4></a>\g<5>" % (script, extra), text)
text = re.sub("(?i)([^[/>\w.-])(bcp([-]?))(\n +)([0-9]+)(\W)",
"\g<1><a href=\"%s?%sbcp=\g<5>\">\g<2></a>\g<4><a href=\"%s?%sbcp=\g<5>\">\g<5></a>\g<6>" % (script, extra, script, extra), text)
def workinprogress_replacement(match):
g1 = match.group(1)
g2 = match.group(2)
g3 = match.group(3)
# eliminate embedded hyperlinks in text we'll use as anchor text
g4 = match.group(4)
g4 = re.sub("<a.+?>(.+?)</a>", "\g<1>", g4)
g4url = urllib.quote_plus(g4)
g5 = match.group(5)
return """%s[<a id=\"ref-%s\">%s</a>]%s<a style=\"text-decoration: none\" href='https://www.google.com/search?sitesearch=datatracker.ietf.org%%2Fdoc%%2Fhtml%%2F&amp;q=inurl:draft-+%s'>%s</a>%s""" % (g1, g2, g2, g3, g4url, g4, g5)
text = re.sub("(\n *\n *)\[([-\w.]+)\](\s+.*?)(\".+\")(,\s+Work\s+in\s+Progress.)", workinprogress_replacement, text)
text = re.sub("(\n *\n *)\[([-\w.]+)\](\s)", "\g<1>[<a id=\"ref-\g<2>\">\g<2></a>]\g<3>", text)
text = re.sub("(\n *\n *)\[(RFC [-\w.]+)\](\s)", "\g<1>[<a id=\"ref-\g<2>\">\g<2></a>]\g<3>", text)
ref_targets = re.findall('<a id="ref-(.*?)"', text)
# reference link markup
def reference_replacement(match):
pre = match.group(1)
beg = match.group(2)
tag = match.group(3)
end = match.group(4)
isrfc = re.match("(?i)^rfc[ -]?([0-9]+)$", tag)
if isrfc:
rfcnum = isrfc.group(1)
if tag in reference:
return """%s%s<a href="%s?%srfc=%s" title="%s">%s</a>%s""" % (pre, beg, script, extra, rfcnum, reference[tag], tag, end)
else:
return """%s%s<a href="%s?%srfc=%s">%s</a>%s""" % (pre, beg, script, extra, rfcnum , tag, end)
else:
if tag in ref_targets:
if tag in reference:
return """%s%s<a href="#ref-%s" title="%s">%s</a>%s""" % (pre, beg, tag, reference[tag], tag, end)
else:
return """%s%s<a href="#ref-%s">%s</a>%s""" % (pre, beg, tag, tag, end)
else:
return match.group(0)
# Group: 1 2 3 45
text = re.sub("(\W)(\[)([-\w.]+)((, ?[-\w.]+)*\])", reference_replacement, text)
text = re.sub("(\W)(\[)(RFC [0-9]+)((, ?RFC [0-9]+)*\])", reference_replacement, text)
while True:
old = text
text = re.sub("(\W)(\[(?:<a.*?>.*?</a>, ?)+)([-\w.]+)((, ?[-\w.]+)*\])", reference_replacement, text)
if text == old:
break
while True:
old = text
text = re.sub("(\W)(\[(?:<a.*?>.*?</a>, ?)+)(RFC [-\w.]+)((, ?RFC [-\w.]+)*\])", reference_replacement, text)
if text == old:
break
# greying out the page headers and footers
text = re.sub("\n(.+\[Page \w+\])\n\f\n(.+)\n", """\n<span class="grey">\g<1></span>\n\f\n<span class="grey">\g<2></span>\n""", text)
# contents link markup: section links
# 1 2 3 4 5 6 7
text = re.sub("(?m)^(\s*)(\d+(\.\d+)*)(\.?[ ]+)(.*[^ .])( *\. ?\.)(.*[0-9])$", """\g<1><a href="#section-\g<2>">\g<2></a>\g<4>\g<5>\g<6>\g<7>""", text)
text = re.sub("(?m)^(\s*)(Appendix |)([A-Z](\.\d+)*)(\.?[ ]+)(.*[^ .])( *\. ?\.)(.*[0-9])$", """\g<1><a href="#appendix-\g<3>">\g<2>\g<3></a>\g<5>\g<6>\g<7>\g<8>""", text)
# page number markup
multidoc_separator = "========================================================================"
if re.search(multidoc_separator, text):
parts = re.split(multidoc_separator, text)
for i in range(len(parts)):
parts[i] = re.sub("(?si)(\f)([^\f]*\[Page (\w+)\])", "\g<1><span id=\"%(page)s-\g<3>\" ></span>\g<2>"%{"page": "page-%s"%(i+1)}, parts[i])
parts[i] = re.sub("(?i)(\. ?\. +|\. \. \.|\.\.\. *)([0-9ivxlc]+)( *\n)", "\g<1><a href=\"#%(page)s-\g<2>\">\g<2></a>\g<3>"%{"page": "page-%s"%(i+1)}, parts[i])
text = multidoc_separator.join(parts)
else:
# page name tag markup
text = re.sub("(?si)(\f)([^\f]*\[Page (\w+)\])", "\g<1><span id=\"page-\g<3>\" ></span>\g<2>", text)
# contents link markup: page numbers
text = re.sub("(?i)(\. ?\. +|\. \. \.|\.\.\. *)([0-9ivxlc]+)( *\n)", "\g<1><a href=\"#page-\g<2>\">\g<2></a>\g<3>", text)
# section number tag markup
def section_anchor_replacement(match):
# exclude TOC entries
mstring = match.group(0)
if " \. \. " in mstring or "\.\.\." in mstring:
return mstring
level = len(re.findall("[^\.]+", match.group(1)))+1
if level > 6:
level = 6
html = """<span class="h%s"><a class=\"selflink\" id=\"section-%s\" href=\"#section-%s\">%s</a>%s</span>""" % (level, match.group(1), match.group(1), match.group(1), match.group(3))
html = html.replace("\n", """</span>\n<span class="h%s">""" % level)
return html
text = re.sub("(?im)^(\d+(\.\d+)*)(\.?[ ]+\S.*?(\n +\w+.*)?( |$))", section_anchor_replacement, text)
#text = re.sub("(?i)(\n *\n *)(\d+(\.\d+)*)(\.?[ ].*)", section_replacement, text)
# section number link markup
text = re.sub("(?i)(section\s)(\d+(\.\d+)*)", "<a href=\"#section-\g<2>\">\g<1>\g<2></a>", text)
text = re.sub("(?i)(section)\n(\s+)(\d+(\.\d+)*)", "<a href=\"#section-\g<3>\">\g<1></a>\n\g<2><a href=\"#section-\g<3>\">\g<3></a>", text)
# Special cases for licensing boilerplate
text = text.replace('<a href="#section-4">Section 4</a>.e of the Trust Legal Provisions',
'Section 4.e of the <a href="https://trustee.ietf.org/license-info">Trust Legal Provisions</a>')
while True:
old = text
text = re.sub("(?i)(sections\s(<a.*?>.*?</a>(,\s|\s?-\s?|\sthrough\s|\sor\s|\sto\s|,?\sand\s))*)(\d+(\.\d+)*)", "\g<1><a href=\"#section-\g<4>\">\g<4></a>", text)
if text == old:
break
# appendix number tag markup
def appendix_replacement(match):
# exclude TOC entries
mstring = match.group(0)
if " \. \. " in mstring or "\.\.\." in mstring:
return mstring
txt = match.group(4)
num = match.group(2).rstrip('.')
if num != match.group(2):
txt = "." + txt
level = len(re.findall("[^\.]+", num))+1
if level > 6:
level = 6
return """<span class="h%s"><a class=\"selflink\" id=\"appendix-%s\" href=\"#appendix-%s\">%s%s</a>%s</span>""" % (level, num, num, match.group(1), num, txt)
text = re.sub("(?m)^(Appendix |)([A-Z](\.|\.\d+)+)(\.?[ ].*)$", appendix_replacement, text)
#text = re.sub("(?i)(\n *\n *)(\d+(\.\d+)*)(\.?[ ].*)", appendix_replacement, text)
# appendix number link markup
text = re.sub(" ([Aa]ppendix\s)([A-Z](\.\d+)*)", " <a href=\"#appendix-\g<2>\">\g<1>\g<2></a>", text)
text = re.sub(" ([Aa]ppendix)\n(\s+)([A-Z](\.\d+)*)", " <a href=\"#appendix-\g<3>\">\g<1></a>\n\g<2><a href=\"#appendix-\g<3>\">\g<3></a>", text)
# # section x of draft-y markup
# text = re.sub("(?i)<a href=\"[^\"]*\">(section)\s(\d+(\.\d+)*)</a>(\.?\s+(of|in)\s+)<a href=\"[^\"]*\">(draft-[-.a-zA-Z0-9]+[a-zA-Z0-9])</a>", "<a href=\"%s?%surl=%s/rfc\g<7>.txt#section-\g<2>\">\g<1>&nbsp;\g<2>\g<4>\g<6>\g<7></a>" % (script, extra, rfcs), text)
# # draft-y, section x markup
# text = re.sub("(?i)<a href=\"[^\"]*\">(draft-[-.a-zA-Z0-9]+[a-zA-Z0-9])</a>(,?\s)<a href=\"[^\"]*\">(section)\s(\d+(\.\d+)*)</a>", "<a href=\"%s?%surl=%s/rfc\g<2>.txt#section-\g<5>\">\g<1>\g<2>\g<3>\g<4>&nbsp;\g<5></a>" % (script, extra, rfcs), text)
# # [draft-y], section x markup
# text = re.sub("(?i)\[<a href=\"[^>\"]+\">(draft-[-.a-zA-Z0-9]+[a-zA-Z0-9])</a>\](,?\s)<a href=\"[^>\"]*\">(section)\s(\d+(\.\d+)*)</a>", "<a href=\"%s?%surl=%s/rfc\g<2>.txt#section-\g<5>\">[\g<1>\g<2>]\g<3>\g<4>&nbsp;\g<5></a>" % (script, extra, rfcs), text)
for n in ['rfc', 'bcp', 'fyi', 'std']:
# section x of rfc y markup
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(section)\s(\d+(\.\d+)*)</a>(\.?\s+(of|in)\s+)<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>"%n,
"<a href=\"%s?%s%s=\g<9>\g<1>\">\g<2>&nbsp;\g<3>\g<5>\g<8>\g<9></a>" % (script, extra, n), text)
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(section)</a>(\n\s+)<a href=\"(?:[^\"]*)\"[^>]*>(\d+(\.\d+)*)</a>(\.?\s+(of|in)\s+)<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>"%n,
"<a href=\"%s?%s%s=\g<10>\g<1>\">\g<2></a>\g<3><a href=\"%s?%s%s=\g<10>\g<1>\">\g<4>\g<6>\g<9>\g<10></a>" % (script, extra, n, script, extra, n), text)
# appendix x of rfc y markup
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(appendix)\s([A-Z](\.\d+)*)</a>(\.?\s+(of|in)\s+)<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>"%n,
"<a href=\"%s?%s%s=\g<9>\g<1>\">\g<2>&nbsp;\g<3>\g<5>\g<8>\g<9></a>" % (script, extra, n), text)
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(appendix)</a>(\n\s+)<a href=\"(?:[^\"]*)\"[^>]*>([A-Z]+(\.\d+)*)</a>(\.?\s+(of|in)\s+)<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>"%n,
"<a href=\"%s?%s%s=\g<10>\g<1>\">\g<2></a>\g<3><a href=\"%s?%s%s=\g<10>\g<1>\">\g<4>\g<6>\g<9>\g<10></a>" % (script, extra, n, script, extra, n), text)
# rfc y, section x markup
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>(,?\s+)<a href=\"([^\"]*)\"[^>]*>(section)\s?(([^<]*))</a>"%n,
"<a href=\"%s?%s%s=\g<3>\g<5>\">\g<2>\g<3>\g<4>\g<6>&nbsp;\g<7></a>" % (script, extra, n), text)
# rfc y, appendix x markup
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>(,?\s+)<a href=\"([^\"]*)\"[^>]*>(appendix)\s?(([^<]*))</a>"%n,
"<a href=\"%s?%s%s=\g<3>\g<5>\">\g<2>\g<3>\g<4>\g<6>&nbsp;\g<7></a>" % (script, extra, n), text)
# section x of? [rfc y] markup
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(section)\s(\d+(\.\d+)*)</a>(\.?\s+(of\s+|in\s+)?)\[<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>\]"%n,
"<a href=\"%s?%s%s=\g<9>\g<1>\">\g<2>&nbsp;\g<3>\g<5>[\g<8>\g<9>]</a>" % (script, extra, n), text)
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(section)</a>(\n\s+)<a href=\"(?:[^\"]*)\"[^>]*>(\d+(\.\d+)*)</a>(\.?\s+(of\s+|in\s+)?)\[<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>\]"%n,
"<a href=\"%s?%s%s=\g<10>\g<1>\">\g<2></a>\g<3><a href=\"%s?%s%s=\g<10>\g<1>\">\g<4>\g<6>[\g<9>\g<10>]</a>" % (script, extra, n, script, extra, n), text)
# appendix x of? [rfc y] markup
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(appendix)\s([A-Z](\.\d+)*)</a>(\.?\s+(of\s+|in\s+)?)\[<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>\]"%n,
"<a href=\"%s?%s%s=\g<9>\g<1>\">\g<2>&nbsp;\g<3>\g<5>[\g<8>\g<9>]</a>" % (script, extra, n), text)
text = re.sub("(?i)<a href=\"([^\"]*)\"[^>]*>(appendix)</a>(\n\s+)<a href=\"(?:[^\"]*)\"[^>]*>([A-Z](\.\d+)*)</a>(\.?\s+(of\s+|in\s+)?)\[<a href=\"([^\"]*)\"[^>]*>(%s[- ]?)([0-9]+)</a>\]"%n,
"<a href=\"%s?%s%s=\g<10>\g<1>\">\g<2></a>\g<3><a href=\"%s?%s%s=\g<10>\g<1>\">\g<4>\g<6>[\g<9>\g<10>]</a>" % (script, extra, n, script, extra, n), text)
# [rfc y], section x markup
text = re.sub("(?i)\[<a href=\"([^>\"]+)\"[^>]*>(%s[- ]?)([0-9]+)</a>\](,?\s+)<a href=\"([^>\"]*)\"[^>]*>(section)\s(\d+(\.\d+)*)</a>"%n,
"<a href=\"%s?%s%s=\g<3>\g<5>\">[\g<2>\g<3>]\g<4>\g<6>&nbsp;\g<7></a>" % (script, extra, n), text)
# [rfc y], appendix x markup
text = re.sub("(?i)\[<a href=\"([^>\"]+)\"[^>]*>(%s[- ]?)([0-9]+)</a>\](,?\s+)<a href=\"([^>\"]*)\"[^>]*>(appendix)\s([A-Z](\.\d+)*)</a>"%n,
"<a href=\"%s?%s%s=\g<3>\g<5>\">[\g<2>\g<3>]\g<4>\g<6>&nbsp;\g<7></a>" % (script, extra, n), text)
# remove section link for section x.x (of|in) <something else>
old = text
text = re.sub("(?i)<a href=\"[^\"]*\"[^>]*>(section\s)(\d+(\.\d+)*)</a>(\.?[a-z]*\s+(of|in)\s+)(\[?)<a href=\"([^\"]*)\"([^>]*)>(.*)</a>(\]?)",
'\g<1>\g<2>\g<4>\g<6><a href="\g<7>"\g<8>>\g<9></a>\g<10>', text)
text = re.sub('(?i)(\[?)<a href="([^"]*#ref[^"]*)"([^>]*)>(.*?)</a>(\]?,\s+)<a href="[^"]*"[^>]*>(section\s)(\d+(\.\d+)*)</a>',
'\g<1><a href="\g<2>"\g<3>>\g<4></a>\g<5>\g<6>\g<7>', text)
# Special fix for referring to the trust legal provisons in
# boilerplate text:
text = re.sub("(?i)<a href=\"[^\"]*\"[^>]*>(section\s)(\d+(\.\d+)*)</a>(\.?[a-z]*\s+(of|in)\s*\n\s*the Trust Legal Provisions)",
'\g<1>\g<2>\g<4>', text)
#
#text = re.sub("\f", "<div class=\"newpage\" />", text)
text = re.sub("\n?\f\n?", "</pre>\n<hr class='noprint'/><!--NewPage--><pre class='newpage'>", text)
# restore indentation
if prefixlen:
text = re.sub("\n", "\n"+(" "*prefixlen), text)
if path:
text = re.sub("%s\?(rfc|bcp|std)=" % script, "%s/\g<1>" % path, text)
text = re.sub("%s\?draft=" % script, "%s/" % path, text)
return text