From c233f07b5de74d542eea6c916953003001e157e0 Mon Sep 17 00:00:00 2001 From: Henrik Levkowetz Date: Tue, 5 Nov 2019 18:10:29 +0000 Subject: [PATCH] Added a management command to generate draft bibxml files, and also a trial version of datatracker draft bibxml pages. - Legacy-Id: 16962 --- ietf/doc/factories.py | 1 + .../commands/generate_draft_bibxml_files.py | 103 +++++++++++++----- ietf/doc/tests.py | 18 +++ ietf/doc/urls.py | 1 + ietf/doc/views_doc.py | 32 +++++- ietf/templates/doc/bibxml.xml | 29 +++-- ietf/utils/text.py | 2 + 7 files changed, 147 insertions(+), 39 deletions(-) diff --git a/ietf/doc/factories.py b/ietf/doc/factories.py index 162d7ca95..57e9ad610 100644 --- a/ietf/doc/factories.py +++ b/ietf/doc/factories.py @@ -30,6 +30,7 @@ class BaseDocumentFactory(factory.DjangoModelFactory): model = Document title = factory.Faker('sentence',nb_words=5) + abstract = factory.Faker('paragraph', nb_sentences=5) rev = '00' std_level_id = None # type: Optional[str] intended_std_level_id = None diff --git a/ietf/doc/management/commands/generate_draft_bibxml_files.py b/ietf/doc/management/commands/generate_draft_bibxml_files.py index 78a052927..47f6f0f54 100644 --- a/ietf/doc/management/commands/generate_draft_bibxml_files.py +++ b/ietf/doc/management/commands/generate_draft_bibxml_files.py @@ -1,44 +1,95 @@ -# Copyright The IETF Trust 2012-2019, All Rights Reserved +# Copyright The IETF Trust 2018-2019, All Rights Reserved # -*- coding: utf-8 -*- - from __future__ import absolute_import, print_function, unicode_literals +import datetime import io -import sys import os +import re +import sys -from django.core.management.base import BaseCommand from django.conf import settings +from django.core.management.base import BaseCommand from django.template.loader import render_to_string -from ietf.doc.models import Document +import debug # pyflakes:ignore -def write(fn, new): - try: - f = io.open(fn) - old = f.read().decode('utf-8') - f.close - except IOError: - old = "" - if old.strip() != new.strip(): - sys.stdout.write(os.path.basename(fn)+'\n') - f = io.open(fn, "wb") - f.write(new.encode('utf-8')) - f.close() +from ietf.doc.models import NewRevisionDocEvent + +DEFAULT_DAYS = 7 class Command(BaseCommand): - help = ('Generate draft bibxml files, for xml2rfc references') + help = ('Generate draft bibxml files for xml2rfc references, placing them in the ' + 'directory configured in settings.BIBXML_BASE_PATH: %s. ' + 'By default, generate files as needed for new draft revisions from the ' + 'last %s days.' % (settings.BIBXML_BASE_PATH, DEFAULT_DAYS)) + + def add_arguments(self, parser): + parser.add_argument('--all', action='store_true', default=False, help="Process all documents, not only recent submissions") + parser.add_argument('--days', type=int, default=DEFAULT_DAYS, help="Look submissions from the last DAYS days, instead of %s" % DEFAULT_DAYS) + + def say(self, msg): + if self.verbosity > 0: + sys.stdout.write(msg) + sys.stdout.write('\n') + + def note(self, msg): + if self.verbosity > 1: + sys.stdout.write(msg) + sys.stdout.write('\n') + + def mutter(self, msg): + if self.verbosity > 2: + sys.stdout.write(msg) + sys.stdout.write('\n') + + def write(self, fn, new): + # normalize new + new = re.sub(r'\r\n?', r'\n', new) + try: + with io.open(fn, encoding='utf-8') as f: + old = f.read() + except IOError: + old = "" + if old.strip() != new.strip(): + self.note('Writing %s' % os.path.basename(fn)) + with io.open(fn, "w", encoding='utf-8') as f: + f.write(new) def handle(self, *args, **options): - documents = Document.objects.filter(type__slug='draft') + self.verbosity = options.get("verbosity", 1) + process_all = options.get("all") + days = options.get("days") + # bibxmldir = os.path.join(settings.BIBXML_BASE_PATH, 'bibxml3') if not os.path.exists(bibxmldir): os.makedirs(bibxmldir) - for doc in documents: - ref_text = render_to_string('doc/bibxml.xml', {'doc': doc, 'doc_bibtype':'I-D'}) - ref_file_name = os.path.join(bibxmldir, 'reference.I-D.%s.xml' % (doc.name, )) - ref_rev_file_name = os.path.join(bibxmldir, 'reference.I-D.%s-%s.xml' % (doc.name, doc.rev)) - write(ref_file_name, ref_text) - write(ref_rev_file_name, ref_text) - \ No newline at end of file + # + if process_all: + doc_events = NewRevisionDocEvent.objects.filter(type='new_revision', doc__type_id='draft') + else: + start = datetime.datetime.now() - datetime.timedelta(days=days) + doc_events = NewRevisionDocEvent.objects.filter(type='new_revision', doc__type_id='draft', time__gte=start) + doc_events = doc_events.order_by('time') + + for e in doc_events: + self.mutter('%s %s' % (e.time, e.doc.name)) + try: + e.doc.date = e.time.date() + doc = e.doc + if e.rev != doc.rev: + for h in doc.history_set.order_by("-time"): + if e.rev == h.rev: + doc = h + break + ref_text = '%s' % render_to_string('doc/bibxml.xml', {'doc': doc, 'doc_bibtype':'I-D'}) + if e.rev == e.doc.rev: + ref_file_name = os.path.join(bibxmldir, 'reference.I-D.%s.xml' % (doc.name[6:], )) + self.write(ref_file_name, ref_text) + else: + self.note("Skipping %s; outdated revision: %s" % (os.path.basename(ref_file_name), e.rev)) + ref_rev_file_name = os.path.join(bibxmldir, 'reference.I-D.%s-%s.xml' % (doc.name, doc.rev)) + self.write(ref_rev_file_name, ref_text) + except Exception as ee: + sys.stderr.write('\n%s-%s: %s\n' % (doc.name, doc.rev, ee)) diff --git a/ietf/doc/tests.py b/ietf/doc/tests.py index 9fa40aa98..63d986e59 100644 --- a/ietf/doc/tests.py +++ b/ietf/doc/tests.py @@ -8,6 +8,7 @@ import os import shutil import datetime import io +import lxml import sys import bibtexparser @@ -46,6 +47,7 @@ from ietf.person.factories import PersonFactory from ietf.utils.mail import outbox from ietf.utils.test_utils import login_testing_unauthorized, unicontent from ietf.utils.test_utils import TestCase +from ietf.utils.text import normalize_text class SearchTests(TestCase): def test_search(self): @@ -972,6 +974,22 @@ class DocTestCase(TestCase): # self.assertNotIn('doi', entry) + def test_document_bibxml(self): + + draft = IndividualDraftFactory.create() + docname = '%s-%s' % (draft.name, draft.rev) + url = urlreverse('ietf.doc.views_doc.document_bibxml', kwargs=dict(name=draft.name)) + r = self.client.get(url) + entry = lxml.etree.fromstring(r.content) + self.assertEqual(entry.find('./front/title').text, draft.title) + date = entry.find('./front/date') + self.assertEqual(date.get('year'), str(draft.pub_date().year)) + self.assertEqual(date.get('month'), draft.pub_date().strftime('%B')) + self.assertEqual(date.get('day'), str(draft.pub_date().day)) + self.assertEqual(normalize_text(entry.find('./front/abstract/t').text), normalize_text(draft.abstract)) + self.assertEqual(entry.find('./seriesInfo').get('value'), docname) + self.assertEqual(entry.find('./seriesInfo[@name="DOI"]'), None) + class AddCommentTestCase(TestCase): def test_add_comment(self): draft = WgDraftFactory(name='draft-ietf-mars-test',group__acronym='mars') diff --git a/ietf/doc/urls.py b/ietf/doc/urls.py index 83ab67a2d..5db310770 100644 --- a/ietf/doc/urls.py +++ b/ietf/doc/urls.py @@ -74,6 +74,7 @@ urlpatterns = [ url(r'^%(name)s(?:/%(rev)s)?/$' % settings.URL_REGEXPS, views_doc.document_main), url(r'^%(name)s(?:/%(rev)s)?/bibtex/$' % settings.URL_REGEXPS, views_doc.document_bibtex), + url(r'^bibxml3/%(name)s(?:-%(rev)s)?.xml$' % settings.URL_REGEXPS, views_doc.document_bibxml), url(r'^%(name)s/history/$' % settings.URL_REGEXPS, views_doc.document_history), url(r'^%(name)s/writeup/$' % settings.URL_REGEXPS, views_doc.document_writeup), url(r'^%(name)s/email/$' % settings.URL_REGEXPS, views_doc.document_email), diff --git a/ietf/doc/views_doc.py b/ietf/doc/views_doc.py index 336ec9826..933a85af1 100644 --- a/ietf/doc/views_doc.py +++ b/ietf/doc/views_doc.py @@ -824,7 +824,37 @@ def document_bibtex(request, name, rev=None): latest_revision=latest_revision, doi=doi, ), - content_type="text/plain; charset=utf-8", + content_type="application/xml; charset=utf-8", + ) + + +def document_bibxml(request, name, rev=None): + # This only deals with drafts, as bibxml entries for RFCs should come from + # the RFC-Editor. + doc = get_object_or_404(Document, docalias__name=name, name__startswith='draft-', type_id='draft') + + latest_revision = doc.latest_event(NewRevisionDocEvent, type="new_revision") + latest_rev = latest_revision.rev if latest_revision else None + + if rev != None and rev != doc.rev: + # find the entry in the history + for h in doc.history_set.order_by("-time"): + if rev == h.rev: + doc = h + break + + try: + doc_event = NewRevisionDocEvent.objects.get(doc__name=doc.name, rev=(rev or latest_rev)) + doc.date = doc_event.time.date() + except DocEvent.DoesNotExist: + doc.date = doc.time.date() # Even if this may be incoreect, what would be better? + + return render(request, "doc/bibxml.xml", + dict( + doc=doc, + doc_bibtype='I-D', + ), + content_type="application/xml; charset=utf-8", ) diff --git a/ietf/templates/doc/bibxml.xml b/ietf/templates/doc/bibxml.xml index fdb57c132..857ec3463 100644 --- a/ietf/templates/doc/bibxml.xml +++ b/ietf/templates/doc/bibxml.xml @@ -1,15 +1,20 @@ - - + + - {{doc.title}} - {% for author in doc.documentauthor_set.all %} - - {{ author.affiliation }} - - {% endfor %} - - {{doc.abstract}} + {{doc.title}}{% if doc.submission %}{% for author in doc.submission.authors %} + + {% if author.affiliation %}{{ author.affiliation }} + {% endif %}{% endfor %}{% else %}{% for author in doc.documentauthor_set.all %} + + {% if author.affiliation %}{{ author.affiliation }} + {% endif %}{% endfor %}{% endif %} + + + {{doc.abstract}} + + - - + + {% if doi %} + {% endif %} diff --git a/ietf/utils/text.py b/ietf/utils/text.py index 1c44e8a49..f65014cb3 100644 --- a/ietf/utils/text.py +++ b/ietf/utils/text.py @@ -194,3 +194,5 @@ def texescape(s): def unwrap(s): return s.replace('\n', ' ') +def normalize_text(s): + return s.replace(r'\s+', ' ').strip()