Provide pdfs of htmlized (pdfized) documents to replace tools.ietf.org/pdf/ at /doc/pdf. Commit ready for merge.
- Legacy-Id: 19753
This commit is contained in:
parent
024cfc3f20
commit
adbf8acb81
|
@ -147,6 +147,12 @@ class IndividualRfcFactory(IndividualDraftFactory):
|
||||||
else:
|
else:
|
||||||
obj.set_state(State.objects.get(type_id='draft',slug='rfc'))
|
obj.set_state(State.objects.get(type_id='draft',slug='rfc'))
|
||||||
|
|
||||||
|
@factory.post_generation
|
||||||
|
def reset_canonical_name(obj, create, extracted, **kwargs):
|
||||||
|
if hasattr(obj, '_canonical_name'):
|
||||||
|
del obj._canonical_name
|
||||||
|
return None
|
||||||
|
|
||||||
class WgDraftFactory(BaseDocumentFactory):
|
class WgDraftFactory(BaseDocumentFactory):
|
||||||
|
|
||||||
type_id = 'draft'
|
type_id = 'draft'
|
||||||
|
@ -186,6 +192,11 @@ class WgRfcFactory(WgDraftFactory):
|
||||||
obj.set_state(State.objects.get(type_id='draft',slug='rfc'))
|
obj.set_state(State.objects.get(type_id='draft',slug='rfc'))
|
||||||
obj.set_state(State.objects.get(type_id='draft-iesg', slug='pub'))
|
obj.set_state(State.objects.get(type_id='draft-iesg', slug='pub'))
|
||||||
|
|
||||||
|
@factory.post_generation
|
||||||
|
def reset_canonical_name(obj, create, extracted, **kwargs):
|
||||||
|
if hasattr(obj, '_canonical_name'):
|
||||||
|
del obj._canonical_name
|
||||||
|
return None
|
||||||
|
|
||||||
class RgDraftFactory(BaseDocumentFactory):
|
class RgDraftFactory(BaseDocumentFactory):
|
||||||
|
|
||||||
|
@ -230,6 +241,12 @@ class RgRfcFactory(RgDraftFactory):
|
||||||
obj.set_state(State.objects.get(type_id='draft-stream-irtf', slug='pub'))
|
obj.set_state(State.objects.get(type_id='draft-stream-irtf', slug='pub'))
|
||||||
obj.set_state(State.objects.get(type_id='draft-iesg',slug='idexists'))
|
obj.set_state(State.objects.get(type_id='draft-iesg',slug='idexists'))
|
||||||
|
|
||||||
|
@factory.post_generation
|
||||||
|
def reset_canonical_name(obj, create, extracted, **kwargs):
|
||||||
|
if hasattr(obj, '_canonical_name'):
|
||||||
|
del obj._canonical_name
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class CharterFactory(BaseDocumentFactory):
|
class CharterFactory(BaseDocumentFactory):
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@ import rfc2html
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from typing import Optional, TYPE_CHECKING
|
from typing import Optional, TYPE_CHECKING
|
||||||
|
from weasyprint import HTML as wpHTML
|
||||||
|
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.core import checks
|
from django.core import checks
|
||||||
|
@ -565,6 +566,22 @@ class DocumentInfo(models.Model):
|
||||||
cache.set(cache_key, html, settings.HTMLIZER_CACHE_TIME)
|
cache.set(cache_key, html, settings.HTMLIZER_CACHE_TIME)
|
||||||
return html
|
return html
|
||||||
|
|
||||||
|
def pdfized(self):
|
||||||
|
name = self.get_base_name()
|
||||||
|
text = self.text()
|
||||||
|
cache = caches['pdfized']
|
||||||
|
cache_key = name.split('.')[0]
|
||||||
|
try:
|
||||||
|
pdf = cache.get(cache_key)
|
||||||
|
except EOFError:
|
||||||
|
pdf = None
|
||||||
|
if not pdf:
|
||||||
|
html = rfc2html.markup(text, path=settings.PDFIZER_URL_PREFIX)
|
||||||
|
pdf = wpHTML(string=html).write_pdf(stylesheets=[io.BytesIO(b'html { font-size: 94%;}')])
|
||||||
|
if pdf:
|
||||||
|
cache.set(cache_key, pdf, settings.PDFIZER_CACHE_TIME)
|
||||||
|
return pdf
|
||||||
|
|
||||||
def references(self):
|
def references(self):
|
||||||
return self.relations_that_doc(('refnorm','refinfo','refunk','refold'))
|
return self.relations_that_doc(('refnorm','refinfo','refunk','refold'))
|
||||||
|
|
||||||
|
|
|
@ -2733,4 +2733,38 @@ class RawIdTests(TestCase):
|
||||||
charter = CharterFactory()
|
charter = CharterFactory()
|
||||||
self.should_404(dict(name=charter.name))
|
self.should_404(dict(name=charter.name))
|
||||||
|
|
||||||
|
class PdfizedTests(TestCase):
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.view = "ietf.doc.views_doc.document_pdfized"
|
||||||
|
super(self.__class__, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def should_succeed(self, argdict):
|
||||||
|
url = urlreverse(self.view, kwargs=argdict)
|
||||||
|
r = self.client.get(url)
|
||||||
|
self.assertEqual(r.status_code,200)
|
||||||
|
self.assertEqual(r.get('Content-Type'),'application/pdf;charset=utf-8')
|
||||||
|
|
||||||
|
def should_404(self, argdict):
|
||||||
|
url = urlreverse(self.view, kwargs=argdict)
|
||||||
|
r = self.client.get(url)
|
||||||
|
self.assertEqual(r.status_code, 404)
|
||||||
|
|
||||||
|
def test_pdfized(self):
|
||||||
|
rfc = WgRfcFactory(create_revisions=range(0,2))
|
||||||
|
|
||||||
|
dir = settings.RFC_PATH
|
||||||
|
with (Path(dir) / f'{rfc.canonical_name()}.txt').open('w') as f:
|
||||||
|
f.write('text content')
|
||||||
|
dir = settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR
|
||||||
|
for r in range(0,2):
|
||||||
|
with (Path(dir) / f'{rfc.name}-{r:02d}.txt').open('w') as f:
|
||||||
|
f.write('text content')
|
||||||
|
|
||||||
|
self.should_succeed(dict(name=rfc.canonical_name()))
|
||||||
|
self.should_succeed(dict(name=rfc.name))
|
||||||
|
for r in range(0,2):
|
||||||
|
self.should_succeed(dict(name=rfc.name,rev=f'{r:02d}'))
|
||||||
|
for ext in ('pdf','txt','html','anythingatall'):
|
||||||
|
self.should_succeed(dict(name=rfc.name,rev=f'{r:02d}',ext=ext))
|
||||||
|
self.should_404(dict(name=rfc.name,rev='02'))
|
||||||
|
|
|
@ -72,6 +72,7 @@ urlpatterns = [
|
||||||
url(r'^html/%(name)s(?:-%(rev)s)?(\.txt|\.html)?/?$' % settings.URL_REGEXPS, views_doc.document_html),
|
url(r'^html/%(name)s(?:-%(rev)s)?(\.txt|\.html)?/?$' % settings.URL_REGEXPS, views_doc.document_html),
|
||||||
|
|
||||||
url(r'^id/%(name)s(?:-%(rev)s)?(?:\.(?P<ext>(txt|html|xml)))?/?$' % settings.URL_REGEXPS, views_doc.document_raw_id),
|
url(r'^id/%(name)s(?:-%(rev)s)?(?:\.(?P<ext>(txt|html|xml)))?/?$' % settings.URL_REGEXPS, views_doc.document_raw_id),
|
||||||
|
url(r'^pdf/%(name)s(?:-%(rev)s)?(?:\.(?P<ext>[a-z]+))?/?$' % settings.URL_REGEXPS, views_doc.document_pdfized),
|
||||||
|
|
||||||
# End of block that should be an idealized docs.ietf.org service instead
|
# End of block that should be an idealized docs.ietf.org service instead
|
||||||
|
|
||||||
|
|
|
@ -769,8 +769,7 @@ def document_html(request, name, rev=None):
|
||||||
return redirect('ietf.doc.views_doc.document_html', name=found.matched_name)
|
return redirect('ietf.doc.views_doc.document_html', name=found.matched_name)
|
||||||
|
|
||||||
doc = found.documents.get()
|
doc = found.documents.get()
|
||||||
if not os.path.exists(doc.get_file_name()):
|
|
||||||
raise Http404("File not found: %s" % doc.get_file_name())
|
|
||||||
|
|
||||||
if found.matched_rev or found.matched_name.startswith('rfc'):
|
if found.matched_rev or found.matched_name.startswith('rfc'):
|
||||||
rev = found.matched_rev
|
rev = found.matched_rev
|
||||||
|
@ -778,6 +777,10 @@ def document_html(request, name, rev=None):
|
||||||
rev = doc.rev
|
rev = doc.rev
|
||||||
if rev:
|
if rev:
|
||||||
doc = doc.history_set.filter(rev=rev).first() or doc.fake_history_obj(rev)
|
doc = doc.history_set.filter(rev=rev).first() or doc.fake_history_obj(rev)
|
||||||
|
|
||||||
|
if not os.path.exists(doc.get_file_name()):
|
||||||
|
raise Http404("File not found: %s" % doc.get_file_name())
|
||||||
|
|
||||||
if doc.type_id in ['draft',]:
|
if doc.type_id in ['draft',]:
|
||||||
doc.supermeta = build_doc_supermeta_block(doc)
|
doc.supermeta = build_doc_supermeta_block(doc)
|
||||||
doc.meta = build_doc_meta_block(doc, settings.HTMLIZER_URL_PREFIX)
|
doc.meta = build_doc_meta_block(doc, settings.HTMLIZER_URL_PREFIX)
|
||||||
|
@ -803,6 +806,36 @@ def document_html(request, name, rev=None):
|
||||||
|
|
||||||
return render(request, "doc/document_html.html", {"doc":doc, "doccolor":doccolor })
|
return render(request, "doc/document_html.html", {"doc":doc, "doccolor":doccolor })
|
||||||
|
|
||||||
|
def document_pdfized(request, name, rev=None, ext=None):
|
||||||
|
|
||||||
|
found = fuzzy_find_documents(name, rev)
|
||||||
|
num_found = found.documents.count()
|
||||||
|
if num_found == 0:
|
||||||
|
raise Http404("Document not found: %s" % name)
|
||||||
|
if num_found > 1:
|
||||||
|
raise Http404("Multiple documents matched: %s" % name)
|
||||||
|
|
||||||
|
if found.matched_name.startswith('rfc') and name != found.matched_name:
|
||||||
|
return redirect('ietf.doc.views_doc.document_pdfized', name=found.matched_name)
|
||||||
|
|
||||||
|
doc = found.documents.get()
|
||||||
|
|
||||||
|
if found.matched_rev or found.matched_name.startswith('rfc'):
|
||||||
|
rev = found.matched_rev
|
||||||
|
else:
|
||||||
|
rev = doc.rev
|
||||||
|
if rev:
|
||||||
|
doc = doc.history_set.filter(rev=rev).first() or doc.fake_history_obj(rev)
|
||||||
|
|
||||||
|
if not os.path.exists(doc.get_file_name()):
|
||||||
|
raise Http404("File not found: %s" % doc.get_file_name())
|
||||||
|
|
||||||
|
pdf = doc.pdfized()
|
||||||
|
if pdf:
|
||||||
|
return HttpResponse(pdf,content_type='application/pdf;charset=utf-8')
|
||||||
|
else:
|
||||||
|
raise Http404
|
||||||
|
|
||||||
def check_doc_email_aliases():
|
def check_doc_email_aliases():
|
||||||
pattern = re.compile(r'^expand-(.*?)(\..*?)?@.*? +(.*)$')
|
pattern = re.compile(r'^expand-(.*?)(\..*?)?@.*? +(.*)$')
|
||||||
good_count = 0
|
good_count = 0
|
||||||
|
|
|
@ -743,6 +743,13 @@ CACHES = {
|
||||||
'MAX_ENTRIES': 100000, # 100,000
|
'MAX_ENTRIES': 100000, # 100,000
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
'pdfized': {
|
||||||
|
'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
|
||||||
|
'LOCATION': '/a/cache/datatracker/pdfized',
|
||||||
|
'OPTIONS': {
|
||||||
|
'MAX_ENTRIES': 100000, # 100,000
|
||||||
|
},
|
||||||
|
},
|
||||||
'slowpages': {
|
'slowpages': {
|
||||||
'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
|
'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
|
||||||
'LOCATION': '/a/cache/datatracker/slowpages',
|
'LOCATION': '/a/cache/datatracker/slowpages',
|
||||||
|
@ -755,6 +762,8 @@ CACHES = {
|
||||||
HTMLIZER_VERSION = 1
|
HTMLIZER_VERSION = 1
|
||||||
HTMLIZER_URL_PREFIX = "/doc/html"
|
HTMLIZER_URL_PREFIX = "/doc/html"
|
||||||
HTMLIZER_CACHE_TIME = 60*60*24*14 # 14 days
|
HTMLIZER_CACHE_TIME = 60*60*24*14 # 14 days
|
||||||
|
PDFIZER_CACHE_TIME = HTMLIZER_CACHE_TIME
|
||||||
|
PDFIZER_URL_PREFIX = IDTRACKER_BASE_URL+"/doc/pdf"
|
||||||
|
|
||||||
# Email settings
|
# Email settings
|
||||||
IPR_EMAIL_FROM = 'ietf-ipr@ietf.org'
|
IPR_EMAIL_FROM = 'ietf-ipr@ietf.org'
|
||||||
|
@ -1267,6 +1276,14 @@ if SERVER_MODE != 'production':
|
||||||
'MAX_ENTRIES': 1000,
|
'MAX_ENTRIES': 1000,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
'pdfized': {
|
||||||
|
'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
|
||||||
|
#'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
|
||||||
|
'LOCATION': '/var/cache/datatracker/pdfized',
|
||||||
|
'OPTIONS': {
|
||||||
|
'MAX_ENTRIES': 1000,
|
||||||
|
},
|
||||||
|
},
|
||||||
'slowpages': {
|
'slowpages': {
|
||||||
'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
|
'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
|
||||||
#'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
|
#'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
|
||||||
|
|
|
@ -70,6 +70,7 @@ tqdm>=3.7.0
|
||||||
#Trac>=1.0.10,<1.2
|
#Trac>=1.0.10,<1.2
|
||||||
Unidecode>=0.4.18,<1.2.0
|
Unidecode>=0.4.18,<1.2.0
|
||||||
#wsgiref>=0.1.2
|
#wsgiref>=0.1.2
|
||||||
|
weasyprint>=53.4
|
||||||
xml2rfc>=2.35.0
|
xml2rfc>=2.35.0
|
||||||
xym>=0.4.4,!=0.4.7,<1.0
|
xym>=0.4.4,!=0.4.7,<1.0
|
||||||
#zxcvbn-python>=4.4.14 # Not needed until we do back-end password entropy validation
|
#zxcvbn-python>=4.4.14 # Not needed until we do back-end password entropy validation
|
||||||
|
|
Loading…
Reference in a new issue