Improve robustness of pdfization. Tune the test crawler. Commit ready for merge.

- Legacy-Id: 19813
This commit is contained in:
Robert Sparks 2022-01-06 20:17:55 +00:00
parent 7d4ea4e37a
commit b1585124d6
3 changed files with 10 additions and 3 deletions

View file

@ -232,6 +232,8 @@ def skip_url(url):
# Skip most html conversions, not worth the time
"^/doc/html/draft-[0-9ac-z]",
"^/doc/html/draft-b[0-9b-z]",
"^/doc/pdf/draft-[0-9ac-z]",
"^/doc/pdf/draft-b[0-9b-z]",
"^/doc/html/charter-.*",
"^/doc/html/status-.*",
"^/doc/html/rfc.*",

View file

@ -577,7 +577,11 @@ class DocumentInfo(models.Model):
pdf = None
if not pdf:
html = rfc2html.markup(text, path=settings.PDFIZER_URL_PREFIX)
pdf = wpHTML(string=html).write_pdf(stylesheets=[io.BytesIO(b'html { font-size: 94%;}')])
try:
pdf = wpHTML(string=html.replace('\xad','')).write_pdf(stylesheets=[io.BytesIO(b'html { font-size: 94%;}')])
except AssertionError:
log.log(f'weasyprint failed with an assert on {self.name}')
pdf = None
if pdf:
cache.set(cache_key, pdf, settings.PDFIZER_CACHE_TIME)
return pdf

View file

@ -1051,6 +1051,7 @@ def build_file_urls(doc):
label = "plain text" if t == "txt" else t
file_urls.append((label, base + doc.name + "-" + doc.rev + "." + t))
if doc.text():
file_urls.append(("htmlized", urlreverse('ietf.doc.views_doc.document_html', kwargs=dict(name=doc.name, rev=doc.rev))))
file_urls.append(("pdfized", urlreverse('ietf.doc.views_doc.document_pdfized', kwargs=dict(name=doc.name, rev=doc.rev))))
file_urls.append(("bibtex", urlreverse('ietf.doc.views_doc.document_bibtex',kwargs=dict(name=doc.name,rev=doc.rev))))