Provide pdfs of htmlized (pdfized) documents to replace tools.ietf.org/pdf/ at /doc/pdf. Commit ready for merge.
- Legacy-Id: 19753
This commit is contained in:
parent
024cfc3f20
commit
adbf8acb81
|
@ -147,6 +147,12 @@ class IndividualRfcFactory(IndividualDraftFactory):
|
|||
else:
|
||||
obj.set_state(State.objects.get(type_id='draft',slug='rfc'))
|
||||
|
||||
@factory.post_generation
|
||||
def reset_canonical_name(obj, create, extracted, **kwargs):
|
||||
if hasattr(obj, '_canonical_name'):
|
||||
del obj._canonical_name
|
||||
return None
|
||||
|
||||
class WgDraftFactory(BaseDocumentFactory):
|
||||
|
||||
type_id = 'draft'
|
||||
|
@ -186,6 +192,11 @@ class WgRfcFactory(WgDraftFactory):
|
|||
obj.set_state(State.objects.get(type_id='draft',slug='rfc'))
|
||||
obj.set_state(State.objects.get(type_id='draft-iesg', slug='pub'))
|
||||
|
||||
@factory.post_generation
|
||||
def reset_canonical_name(obj, create, extracted, **kwargs):
|
||||
if hasattr(obj, '_canonical_name'):
|
||||
del obj._canonical_name
|
||||
return None
|
||||
|
||||
class RgDraftFactory(BaseDocumentFactory):
|
||||
|
||||
|
@ -230,6 +241,12 @@ class RgRfcFactory(RgDraftFactory):
|
|||
obj.set_state(State.objects.get(type_id='draft-stream-irtf', slug='pub'))
|
||||
obj.set_state(State.objects.get(type_id='draft-iesg',slug='idexists'))
|
||||
|
||||
@factory.post_generation
|
||||
def reset_canonical_name(obj, create, extracted, **kwargs):
|
||||
if hasattr(obj, '_canonical_name'):
|
||||
del obj._canonical_name
|
||||
return None
|
||||
|
||||
|
||||
class CharterFactory(BaseDocumentFactory):
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ import rfc2html
|
|||
import time
|
||||
|
||||
from typing import Optional, TYPE_CHECKING
|
||||
from weasyprint import HTML as wpHTML
|
||||
|
||||
from django.db import models
|
||||
from django.core import checks
|
||||
|
@ -565,6 +566,22 @@ class DocumentInfo(models.Model):
|
|||
cache.set(cache_key, html, settings.HTMLIZER_CACHE_TIME)
|
||||
return html
|
||||
|
||||
def pdfized(self):
|
||||
name = self.get_base_name()
|
||||
text = self.text()
|
||||
cache = caches['pdfized']
|
||||
cache_key = name.split('.')[0]
|
||||
try:
|
||||
pdf = cache.get(cache_key)
|
||||
except EOFError:
|
||||
pdf = None
|
||||
if not pdf:
|
||||
html = rfc2html.markup(text, path=settings.PDFIZER_URL_PREFIX)
|
||||
pdf = wpHTML(string=html).write_pdf(stylesheets=[io.BytesIO(b'html { font-size: 94%;}')])
|
||||
if pdf:
|
||||
cache.set(cache_key, pdf, settings.PDFIZER_CACHE_TIME)
|
||||
return pdf
|
||||
|
||||
def references(self):
|
||||
return self.relations_that_doc(('refnorm','refinfo','refunk','refold'))
|
||||
|
||||
|
|
|
@ -2733,4 +2733,38 @@ class RawIdTests(TestCase):
|
|||
charter = CharterFactory()
|
||||
self.should_404(dict(name=charter.name))
|
||||
|
||||
class PdfizedTests(TestCase):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.view = "ietf.doc.views_doc.document_pdfized"
|
||||
super(self.__class__, self).__init__(*args, **kwargs)
|
||||
|
||||
def should_succeed(self, argdict):
|
||||
url = urlreverse(self.view, kwargs=argdict)
|
||||
r = self.client.get(url)
|
||||
self.assertEqual(r.status_code,200)
|
||||
self.assertEqual(r.get('Content-Type'),'application/pdf;charset=utf-8')
|
||||
|
||||
def should_404(self, argdict):
|
||||
url = urlreverse(self.view, kwargs=argdict)
|
||||
r = self.client.get(url)
|
||||
self.assertEqual(r.status_code, 404)
|
||||
|
||||
def test_pdfized(self):
|
||||
rfc = WgRfcFactory(create_revisions=range(0,2))
|
||||
|
||||
dir = settings.RFC_PATH
|
||||
with (Path(dir) / f'{rfc.canonical_name()}.txt').open('w') as f:
|
||||
f.write('text content')
|
||||
dir = settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR
|
||||
for r in range(0,2):
|
||||
with (Path(dir) / f'{rfc.name}-{r:02d}.txt').open('w') as f:
|
||||
f.write('text content')
|
||||
|
||||
self.should_succeed(dict(name=rfc.canonical_name()))
|
||||
self.should_succeed(dict(name=rfc.name))
|
||||
for r in range(0,2):
|
||||
self.should_succeed(dict(name=rfc.name,rev=f'{r:02d}'))
|
||||
for ext in ('pdf','txt','html','anythingatall'):
|
||||
self.should_succeed(dict(name=rfc.name,rev=f'{r:02d}',ext=ext))
|
||||
self.should_404(dict(name=rfc.name,rev='02'))
|
||||
|
|
|
@ -72,6 +72,7 @@ urlpatterns = [
|
|||
url(r'^html/%(name)s(?:-%(rev)s)?(\.txt|\.html)?/?$' % settings.URL_REGEXPS, views_doc.document_html),
|
||||
|
||||
url(r'^id/%(name)s(?:-%(rev)s)?(?:\.(?P<ext>(txt|html|xml)))?/?$' % settings.URL_REGEXPS, views_doc.document_raw_id),
|
||||
url(r'^pdf/%(name)s(?:-%(rev)s)?(?:\.(?P<ext>[a-z]+))?/?$' % settings.URL_REGEXPS, views_doc.document_pdfized),
|
||||
|
||||
# End of block that should be an idealized docs.ietf.org service instead
|
||||
|
||||
|
|
|
@ -769,8 +769,7 @@ def document_html(request, name, rev=None):
|
|||
return redirect('ietf.doc.views_doc.document_html', name=found.matched_name)
|
||||
|
||||
doc = found.documents.get()
|
||||
if not os.path.exists(doc.get_file_name()):
|
||||
raise Http404("File not found: %s" % doc.get_file_name())
|
||||
|
||||
|
||||
if found.matched_rev or found.matched_name.startswith('rfc'):
|
||||
rev = found.matched_rev
|
||||
|
@ -778,6 +777,10 @@ def document_html(request, name, rev=None):
|
|||
rev = doc.rev
|
||||
if rev:
|
||||
doc = doc.history_set.filter(rev=rev).first() or doc.fake_history_obj(rev)
|
||||
|
||||
if not os.path.exists(doc.get_file_name()):
|
||||
raise Http404("File not found: %s" % doc.get_file_name())
|
||||
|
||||
if doc.type_id in ['draft',]:
|
||||
doc.supermeta = build_doc_supermeta_block(doc)
|
||||
doc.meta = build_doc_meta_block(doc, settings.HTMLIZER_URL_PREFIX)
|
||||
|
@ -803,6 +806,36 @@ def document_html(request, name, rev=None):
|
|||
|
||||
return render(request, "doc/document_html.html", {"doc":doc, "doccolor":doccolor })
|
||||
|
||||
def document_pdfized(request, name, rev=None, ext=None):
|
||||
|
||||
found = fuzzy_find_documents(name, rev)
|
||||
num_found = found.documents.count()
|
||||
if num_found == 0:
|
||||
raise Http404("Document not found: %s" % name)
|
||||
if num_found > 1:
|
||||
raise Http404("Multiple documents matched: %s" % name)
|
||||
|
||||
if found.matched_name.startswith('rfc') and name != found.matched_name:
|
||||
return redirect('ietf.doc.views_doc.document_pdfized', name=found.matched_name)
|
||||
|
||||
doc = found.documents.get()
|
||||
|
||||
if found.matched_rev or found.matched_name.startswith('rfc'):
|
||||
rev = found.matched_rev
|
||||
else:
|
||||
rev = doc.rev
|
||||
if rev:
|
||||
doc = doc.history_set.filter(rev=rev).first() or doc.fake_history_obj(rev)
|
||||
|
||||
if not os.path.exists(doc.get_file_name()):
|
||||
raise Http404("File not found: %s" % doc.get_file_name())
|
||||
|
||||
pdf = doc.pdfized()
|
||||
if pdf:
|
||||
return HttpResponse(pdf,content_type='application/pdf;charset=utf-8')
|
||||
else:
|
||||
raise Http404
|
||||
|
||||
def check_doc_email_aliases():
|
||||
pattern = re.compile(r'^expand-(.*?)(\..*?)?@.*? +(.*)$')
|
||||
good_count = 0
|
||||
|
|
|
@ -743,6 +743,13 @@ CACHES = {
|
|||
'MAX_ENTRIES': 100000, # 100,000
|
||||
},
|
||||
},
|
||||
'pdfized': {
|
||||
'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
|
||||
'LOCATION': '/a/cache/datatracker/pdfized',
|
||||
'OPTIONS': {
|
||||
'MAX_ENTRIES': 100000, # 100,000
|
||||
},
|
||||
},
|
||||
'slowpages': {
|
||||
'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
|
||||
'LOCATION': '/a/cache/datatracker/slowpages',
|
||||
|
@ -755,6 +762,8 @@ CACHES = {
|
|||
HTMLIZER_VERSION = 1
|
||||
HTMLIZER_URL_PREFIX = "/doc/html"
|
||||
HTMLIZER_CACHE_TIME = 60*60*24*14 # 14 days
|
||||
PDFIZER_CACHE_TIME = HTMLIZER_CACHE_TIME
|
||||
PDFIZER_URL_PREFIX = IDTRACKER_BASE_URL+"/doc/pdf"
|
||||
|
||||
# Email settings
|
||||
IPR_EMAIL_FROM = 'ietf-ipr@ietf.org'
|
||||
|
@ -1267,6 +1276,14 @@ if SERVER_MODE != 'production':
|
|||
'MAX_ENTRIES': 1000,
|
||||
},
|
||||
},
|
||||
'pdfized': {
|
||||
'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
|
||||
#'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
|
||||
'LOCATION': '/var/cache/datatracker/pdfized',
|
||||
'OPTIONS': {
|
||||
'MAX_ENTRIES': 1000,
|
||||
},
|
||||
},
|
||||
'slowpages': {
|
||||
'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
|
||||
#'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
|
||||
|
|
|
@ -70,6 +70,7 @@ tqdm>=3.7.0
|
|||
#Trac>=1.0.10,<1.2
|
||||
Unidecode>=0.4.18,<1.2.0
|
||||
#wsgiref>=0.1.2
|
||||
weasyprint>=53.4
|
||||
xml2rfc>=2.35.0
|
||||
xym>=0.4.4,!=0.4.7,<1.0
|
||||
#zxcvbn-python>=4.4.14 # Not needed until we do back-end password entropy validation
|
||||
|
|
Loading…
Reference in a new issue