ci: merge main to relase (#8487)
This commit is contained in:
commit
3b8faf0369
|
@ -1,4 +1,4 @@
|
|||
FROM ghcr.io/ietf-tools/datatracker-app-base:20250117T1516
|
||||
FROM ghcr.io/ietf-tools/datatracker-app-base:20250128T1728
|
||||
LABEL maintainer="IETF Tools Team <tools-discuss@ietf.org>"
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
|
|
@ -1 +1 @@
|
|||
20250117T1516
|
||||
20250128T1728
|
||||
|
|
|
@ -26,10 +26,11 @@ import debug # pyflakes:ignore
|
|||
from ietf.doc.models import BallotDocEvent, Document
|
||||
from ietf.doc.models import ConsensusDocEvent
|
||||
from ietf.ietfauth.utils import can_request_rfc_publication as utils_can_request_rfc_publication
|
||||
from ietf.utils.html import sanitize_fragment
|
||||
from ietf.utils import log
|
||||
from ietf.doc.utils import prettify_std_name
|
||||
from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped, bleach_linker, bleach_cleaner, validate_url
|
||||
from ietf.utils.html import clean_html
|
||||
from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped, linkify
|
||||
from ietf.utils.validators import validate_url
|
||||
|
||||
register = template.Library()
|
||||
|
||||
|
@ -98,7 +99,7 @@ def sanitize(value):
|
|||
attributes to those deemed acceptable. See ietf/utils/html.py
|
||||
for the details.
|
||||
"""
|
||||
return mark_safe(sanitize_fragment(value))
|
||||
return mark_safe(clean_html(value))
|
||||
|
||||
|
||||
# For use with ballot view
|
||||
|
@ -446,16 +447,16 @@ def ad_area(user):
|
|||
@register.filter
|
||||
def format_history_text(text, trunc_words=25):
|
||||
"""Run history text through some cleaning and add ellipsis if it's too long."""
|
||||
full = mark_safe(bleach_cleaner.clean(text))
|
||||
full = bleach_linker.linkify(urlize_ietf_docs(full))
|
||||
full = mark_safe(clean_html(text))
|
||||
full = linkify(urlize_ietf_docs(full))
|
||||
|
||||
return format_snippet(full, trunc_words)
|
||||
|
||||
@register.filter
|
||||
def format_snippet(text, trunc_words=25):
|
||||
# urlize if there aren't already links present
|
||||
text = bleach_linker.linkify(text)
|
||||
full = keep_spacing(collapsebr(linebreaksbr(mark_safe(sanitize_fragment(text)))))
|
||||
text = linkify(text)
|
||||
full = keep_spacing(collapsebr(linebreaksbr(mark_safe(clean_html(text)))))
|
||||
snippet = truncatewords_html(full, trunc_words)
|
||||
if snippet != full:
|
||||
return mark_safe('<div class="snippet">%s<button type="button" aria-label="Expand" class="btn btn-sm btn-primary show-all"><i class="bi bi-caret-down"></i></button></div><div class="d-none full">%s</div>' % (snippet, full))
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
import os
|
||||
import datetime
|
||||
import io
|
||||
from hashlib import sha384
|
||||
|
||||
from django.http import HttpRequest
|
||||
import lxml
|
||||
import bibtexparser
|
||||
|
@ -3280,6 +3282,41 @@ class InvestigateTests(TestCase):
|
|||
"draft-this-should-not-be-possible-00.txt",
|
||||
)
|
||||
|
||||
@mock.patch("ietf.doc.utils.caches")
|
||||
def test_investigate_fragment_cache(self, mock_caches):
|
||||
"""investigate_fragment should cache its result"""
|
||||
mock_default_cache = mock_caches["default"]
|
||||
mock_default_cache.get.return_value = None # disable cache
|
||||
result = investigate_fragment("this-is-active")
|
||||
self.assertEqual(len(result["can_verify"]), 1)
|
||||
self.assertEqual(len(result["unverifiable_collections"]), 0)
|
||||
self.assertEqual(len(result["unexpected"]), 0)
|
||||
self.assertEqual(
|
||||
list(result["can_verify"])[0].name, "draft-this-is-active-00.txt"
|
||||
)
|
||||
self.assertTrue(mock_default_cache.get.called)
|
||||
self.assertTrue(mock_default_cache.set.called)
|
||||
expected_key = f"investigate_fragment:{sha384(b'this-is-active').hexdigest()}"
|
||||
self.assertEqual(mock_default_cache.set.call_args.kwargs["key"], expected_key)
|
||||
cached_value = mock_default_cache.set.call_args.kwargs["value"] # hang on to this
|
||||
mock_default_cache.reset_mock()
|
||||
|
||||
# Check that a cached value is used
|
||||
mock_default_cache.get.return_value = cached_value
|
||||
with mock.patch("ietf.doc.utils.Path") as mock_path:
|
||||
result = investigate_fragment("this-is-active")
|
||||
# Check that we got the same results
|
||||
self.assertEqual(len(result["can_verify"]), 1)
|
||||
self.assertEqual(len(result["unverifiable_collections"]), 0)
|
||||
self.assertEqual(len(result["unexpected"]), 0)
|
||||
self.assertEqual(
|
||||
list(result["can_verify"])[0].name, "draft-this-is-active-00.txt"
|
||||
)
|
||||
# And that we used the cache
|
||||
self.assertFalse(mock_path.called) # a proxy for "did the method do any real work"
|
||||
self.assertTrue(mock_default_cache.get.called)
|
||||
self.assertEqual(mock_default_cache.get.call_args, mock.call(expected_key))
|
||||
|
||||
def test_investigate_get(self):
|
||||
"""GET with no querystring should retrieve the investigate UI"""
|
||||
url = urlreverse("ietf.doc.views_doc.investigate")
|
||||
|
|
|
@ -11,12 +11,14 @@ import textwrap
|
|||
|
||||
from collections import defaultdict, namedtuple, Counter
|
||||
from dataclasses import dataclass
|
||||
from hashlib import sha384
|
||||
from pathlib import Path
|
||||
from typing import Iterator, Optional, Union
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib import messages
|
||||
from django.core.cache import caches
|
||||
from django.db.models import OuterRef
|
||||
from django.forms import ValidationError
|
||||
from django.http import Http404
|
||||
|
@ -1459,35 +1461,43 @@ def get_doc_email_aliases(name: Optional[str] = None):
|
|||
return sorted(aliases, key=lambda a: (a["doc_name"]))
|
||||
|
||||
|
||||
def investigate_fragment(name_fragment):
|
||||
can_verify = set()
|
||||
for root in [settings.INTERNET_DRAFT_PATH, settings.INTERNET_DRAFT_ARCHIVE_DIR]:
|
||||
can_verify.update(list(Path(root).glob(f"*{name_fragment}*")))
|
||||
archive_verifiable_names = set([p.name for p in can_verify])
|
||||
# Can also verify drafts in proceedings directories
|
||||
can_verify.update(list(Path(settings.AGENDA_PATH).glob(f"**/*{name_fragment}*")))
|
||||
|
||||
# N.B. This reflects the assumption that the internet draft archive dir is in the
|
||||
# a directory with other collections (at /a/ietfdata/draft/collections as this is written)
|
||||
unverifiable_collections = set([
|
||||
p for p in
|
||||
Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent.glob(f"**/*{name_fragment}*")
|
||||
if p.name not in archive_verifiable_names
|
||||
])
|
||||
def investigate_fragment(name_fragment: str):
|
||||
cache = caches["default"]
|
||||
# Ensure name_fragment does not interact badly with the cache key handling
|
||||
name_digest = sha384(name_fragment.encode("utf8")).hexdigest()
|
||||
cache_key = f"investigate_fragment:{name_digest}"
|
||||
result = cache.get(cache_key)
|
||||
if result is None:
|
||||
can_verify = set()
|
||||
for root in [settings.INTERNET_DRAFT_PATH, settings.INTERNET_DRAFT_ARCHIVE_DIR]:
|
||||
can_verify.update(list(Path(root).glob(f"*{name_fragment}*")))
|
||||
archive_verifiable_names = set([p.name for p in can_verify])
|
||||
# Can also verify drafts in proceedings directories
|
||||
can_verify.update(list(Path(settings.AGENDA_PATH).glob(f"**/*{name_fragment}*")))
|
||||
|
||||
unverifiable_collections.difference_update(can_verify)
|
||||
|
||||
expected_names = set([p.name for p in can_verify.union(unverifiable_collections)])
|
||||
maybe_unexpected = list(
|
||||
Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR).glob(f"*{name_fragment}*")
|
||||
)
|
||||
unexpected = [p for p in maybe_unexpected if p.name not in expected_names]
|
||||
|
||||
return dict(
|
||||
can_verify=can_verify,
|
||||
unverifiable_collections=unverifiable_collections,
|
||||
unexpected=unexpected,
|
||||
)
|
||||
# N.B. This reflects the assumption that the internet draft archive dir is in the
|
||||
# a directory with other collections (at /a/ietfdata/draft/collections as this is written)
|
||||
unverifiable_collections = set([
|
||||
p for p in
|
||||
Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent.glob(f"**/*{name_fragment}*")
|
||||
if p.name not in archive_verifiable_names
|
||||
])
|
||||
|
||||
unverifiable_collections.difference_update(can_verify)
|
||||
|
||||
expected_names = set([p.name for p in can_verify.union(unverifiable_collections)])
|
||||
maybe_unexpected = list(
|
||||
Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR).glob(f"*{name_fragment}*")
|
||||
)
|
||||
unexpected = [p for p in maybe_unexpected if p.name not in expected_names]
|
||||
result = dict(
|
||||
can_verify=can_verify,
|
||||
unverifiable_collections=unverifiable_collections,
|
||||
unexpected=unexpected,
|
||||
)
|
||||
# 1 hour caching
|
||||
cache.set(key=cache_key, timeout=3600, value=result)
|
||||
return result
|
||||
|
||||
|
||||
def update_or_create_draft_bibxml_file(doc, rev):
|
||||
|
|
|
@ -33,7 +33,7 @@ def telechat_page_count(date=None, docs=None, ad=None):
|
|||
ballot = draft.active_ballot()
|
||||
if ballot:
|
||||
positions = ballot.active_balloter_positions()
|
||||
ad_position = positions[ad]
|
||||
ad_position = positions.get(ad, None)
|
||||
if ad_position is None or ad_position.pos_id == "norecord":
|
||||
ad_pages_left_to_ballot_on += draft.pages or 0
|
||||
|
||||
|
|
|
@ -6423,8 +6423,7 @@ class MaterialsTests(TestCase):
|
|||
text = doc.text()
|
||||
self.assertIn('Some text', text)
|
||||
self.assertNotIn('<section>', text)
|
||||
self.assertIn('charset="utf-8"', text)
|
||||
|
||||
|
||||
# txt upload
|
||||
test_file = BytesIO(b'This is some text for a test, with the word\nvirtual at the beginning of a line.')
|
||||
test_file.name = "some.txt"
|
||||
|
|
|
@ -30,7 +30,7 @@ from ietf.group.utils import can_manage_materials
|
|||
from ietf.name.models import SessionStatusName, ConstraintName, DocTypeName
|
||||
from ietf.person.models import Person
|
||||
from ietf.stats.models import MeetingRegistration
|
||||
from ietf.utils.html import sanitize_document
|
||||
from ietf.utils.html import clean_html
|
||||
from ietf.utils.log import log
|
||||
from ietf.utils.timezone import date_today
|
||||
|
||||
|
@ -738,25 +738,12 @@ def handle_upload_file(file, filename, meeting, subdir, request=None, encoding=N
|
|||
|
||||
This function takes a _binary mode_ file object, a filename and a meeting object and subdir as string.
|
||||
It saves the file to the appropriate directory, get_materials_path() + subdir.
|
||||
If the file is a zip file, it creates a new directory in 'slides', which is the basename of the
|
||||
zip file and unzips the file in the new directory.
|
||||
"""
|
||||
filename = Path(filename)
|
||||
is_zipfile = filename.suffix == '.zip'
|
||||
|
||||
path = Path(meeting.get_materials_path()) / subdir
|
||||
if is_zipfile:
|
||||
path = path / filename.stem
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# agendas and minutes can only have one file instance so delete file if it already exists
|
||||
if subdir in ('agenda', 'minutes'):
|
||||
for f in path.glob(f'{filename.stem}.*'):
|
||||
try:
|
||||
f.unlink()
|
||||
except FileNotFoundError:
|
||||
pass # if the file is already gone, so be it
|
||||
|
||||
with (path / filename).open('wb+') as destination:
|
||||
# prep file for reading
|
||||
if hasattr(file, "chunks"):
|
||||
|
@ -786,8 +773,8 @@ def handle_upload_file(file, filename, meeting, subdir, request=None, encoding=N
|
|||
return "Failure trying to save '%s'. Hint: Try to upload as UTF-8: %s..." % (filename, str(e)[:120])
|
||||
# Whole file sanitization; add back what's missing from a complete
|
||||
# document (sanitize will remove these).
|
||||
clean = sanitize_document(text)
|
||||
destination.write(clean.encode('utf8'))
|
||||
clean = clean_html(text)
|
||||
destination.write(clean.encode("utf8"))
|
||||
if request and clean != text:
|
||||
messages.warning(request,
|
||||
(
|
||||
|
@ -799,10 +786,6 @@ def handle_upload_file(file, filename, meeting, subdir, request=None, encoding=N
|
|||
for chunk in chunks:
|
||||
destination.write(chunk)
|
||||
|
||||
# unzip zipfile
|
||||
if is_zipfile:
|
||||
subprocess.call(['unzip', filename], cwd=path)
|
||||
|
||||
return None
|
||||
|
||||
def new_doc_for_session(type_id, session):
|
||||
|
|
|
@ -24,6 +24,8 @@ $(document)
|
|||
.before(mailArchiveSearchTemplate);
|
||||
|
||||
var mailArchiveSearch = form.find(".mail-archive-search");
|
||||
const isReviewer = mailArchiveSearch.data('isReviewer');
|
||||
const searchMailArchiveUrl = mailArchiveSearch.data('searchMailArchiveUrl');
|
||||
|
||||
var retrievingData = null;
|
||||
|
||||
|
@ -190,4 +192,4 @@ $(document)
|
|||
form.find("[name=review_submission][value=link]")
|
||||
.trigger("click");
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
@ -18,13 +18,13 @@ document.addEventListener('DOMContentLoaded', () => {
|
|||
loadResultsFromTask('bogus-task-id') // bad task id will generate an error from Django
|
||||
}
|
||||
const taskId = (await response.json()).id
|
||||
// Poll for completion of the investigation up to 18*10 = 180 seconds
|
||||
waitForResults(taskId, 18)
|
||||
// Poll for completion of the investigation up to 60*10 = 600 seconds
|
||||
waitForResults(taskId, 60)
|
||||
}
|
||||
|
||||
const waitForResults = async (taskId, retries) => {
|
||||
// indicate that investigation is in progress
|
||||
document.getElementById('spinner').classList.remove('d-none')
|
||||
document.querySelectorAll('.investigation-indicator').forEach(elt => elt.classList.remove('d-none'))
|
||||
document.getElementById('investigate-button').disabled = true
|
||||
investigateForm.elements['id_name_fragment'].disabled = true
|
||||
|
||||
|
|
|
@ -13,13 +13,15 @@
|
|||
{% csrf_token %}
|
||||
{% bootstrap_form form %}
|
||||
<button class="btn btn-primary" type="submit" id="investigate-button">
|
||||
<span id="spinner"
|
||||
class="spinner-border spinner-border-sm d-none"
|
||||
role="status"
|
||||
aria-hidden="true">
|
||||
</span>
|
||||
<span class="spinner-border spinner-border-sm investigation-indicator d-none"
|
||||
role="status"
|
||||
aria-hidden="true">
|
||||
</span>
|
||||
Investigate
|
||||
</button>
|
||||
<div class="alert alert-info mt-3 d-none investigation-indicator">
|
||||
Please be patient, processing may take several minutes.
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
{% if results %}
|
||||
|
|
|
@ -94,7 +94,13 @@
|
|||
{% endif %}
|
||||
<div class="template d-none">
|
||||
{% if mail_archive_query_urls %}
|
||||
<div class="mail-archive-search">
|
||||
<div class="mail-archive-search"
|
||||
{% if assignment %}
|
||||
data-search-mail-archive-url="{% url "ietf.doc.views_review.search_mail_archive" name=doc.name assignment_id=assignment.pk %}"
|
||||
{% else %}
|
||||
data-search-mail-archive-url="{% url "ietf.doc.views_review.search_mail_archive" name=doc.name acronym=team.acronym %}"
|
||||
{% endif %}
|
||||
data-is-reviewer="{{ is_reviewer|yesno:"true,false" }}">
|
||||
<div class="offset-md-2 col-md-10">
|
||||
<label for="mail-archive-subjects" class="form-label">Search {{team.list_email}} mail archive subjects for:</label>
|
||||
<div class="input-group mb-3">
|
||||
|
@ -144,13 +150,5 @@
|
|||
{% endblock %}
|
||||
{% block js %}
|
||||
<script src="{% static 'ietf/js/datepicker.js' %}"></script>
|
||||
<script>
|
||||
{% if assignment %}
|
||||
var searchMailArchiveUrl = "{% url "ietf.doc.views_review.search_mail_archive" name=doc.name assignment_id=assignment.pk %}";
|
||||
{% else %}
|
||||
var searchMailArchiveUrl = "{% url "ietf.doc.views_review.search_mail_archive" name=doc.name acronym=team.acronym %}";
|
||||
{% endif %}
|
||||
var isReviewer = {{ is_reviewer|yesno:'true,false' }};
|
||||
</script>
|
||||
<script src="{% static 'ietf/js/complete-review.js' %}"></script>
|
||||
{% endblock %}
|
|
@ -5,11 +5,7 @@
|
|||
|
||||
|
||||
import bleach
|
||||
import copy
|
||||
import html2text
|
||||
import lxml.etree
|
||||
import lxml.html
|
||||
import lxml.html.clean
|
||||
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
|
@ -17,62 +13,66 @@ from django import forms
|
|||
from django.utils.functional import keep_lazy
|
||||
|
||||
from ietf.utils.mime import get_mime_type
|
||||
from ietf.utils.text import bleach_cleaner, tags as acceptable_tags
|
||||
|
||||
acceptable_protocols = ['http', 'https', 'mailto', 'xmpp', ]
|
||||
|
||||
def unescape(text):
|
||||
"""
|
||||
Returns the given text with ampersands, quotes and angle brackets decoded
|
||||
for use in URLs.
|
||||
# Allow the protocols/tags/attributes we specifically want, plus anything that bleach declares
|
||||
# to be safe. As of 2025-01-27, the explicit lists for protocols and tags are a strict superset
|
||||
# of bleach's defaults.
|
||||
acceptable_protocols = bleach.sanitizer.ALLOWED_PROTOCOLS.union(
|
||||
{"http", "https", "mailto", "ftp", "xmpp"}
|
||||
)
|
||||
acceptable_tags = bleach.sanitizer.ALLOWED_TAGS.union(
|
||||
{
|
||||
# fmt: off
|
||||
"a", "abbr", "acronym", "address", "b", "big",
|
||||
"blockquote", "body", "br", "caption", "center", "cite", "code", "col",
|
||||
"colgroup", "dd", "del", "dfn", "dir", "div", "dl", "dt", "em", "font",
|
||||
"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html", "i", "ins", "kbd",
|
||||
"li", "ol", "p", "pre", "q", "s", "samp", "small", "span", "strike", "style",
|
||||
"strong", "sub", "sup", "table", "title", "tbody", "td", "tfoot", "th", "thead",
|
||||
"tr", "tt", "u", "ul", "var"
|
||||
# fmt: on
|
||||
}
|
||||
)
|
||||
acceptable_attributes = bleach.sanitizer.ALLOWED_ATTRIBUTES | {
|
||||
"*": ["id"],
|
||||
"ol": ["start"],
|
||||
}
|
||||
|
||||
|
||||
# Instantiate sanitizer classes
|
||||
_bleach_cleaner = bleach.sanitizer.Cleaner(
|
||||
tags=acceptable_tags,
|
||||
attributes=acceptable_attributes,
|
||||
protocols=acceptable_protocols,
|
||||
strip=True,
|
||||
)
|
||||
|
||||
|
||||
_liberal_bleach_cleaner = bleach.sanitizer.Cleaner(
|
||||
tags=acceptable_tags.union({"img", "figure", "figcaption"}),
|
||||
attributes=acceptable_attributes | {"img": ["src", "alt"]},
|
||||
protocols=acceptable_protocols,
|
||||
strip=True,
|
||||
)
|
||||
|
||||
|
||||
def clean_html(text: str):
|
||||
"""Clean the HTML in a string"""
|
||||
return _bleach_cleaner.clean(text)
|
||||
|
||||
|
||||
def liberal_clean_html(text: str):
|
||||
"""More permissively clean the HTML in a string"""
|
||||
return _liberal_bleach_cleaner.clean(text)
|
||||
|
||||
This function undoes what django.utils.html.escape() does
|
||||
"""
|
||||
return text.replace('&', '&').replace(''', "'").replace('"', '"').replace('>', '>').replace('<', '<' )
|
||||
|
||||
@keep_lazy(str)
|
||||
def remove_tags(html, tags):
|
||||
"""Returns the given HTML sanitized, and with the given tags removed."""
|
||||
allowed = set(acceptable_tags) - set([ t.lower() for t in tags ])
|
||||
allowed = acceptable_tags - set(t.lower() for t in tags)
|
||||
return bleach.clean(html, tags=allowed, strip=True)
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Html fragment cleaning
|
||||
|
||||
def sanitize_fragment(html):
|
||||
return bleach_cleaner.clean(html)
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Page cleaning
|
||||
|
||||
|
||||
class Cleaner(lxml.html.clean.Cleaner):
|
||||
charset = 'utf-8'
|
||||
def __init__(self, charset='utf-8', **kw):
|
||||
self.charset = charset
|
||||
super(Cleaner, self).__init__(**kw)
|
||||
|
||||
# Copied from lxml 4.2.0 and modified to insert charset meta:
|
||||
def clean_html(self, html):
|
||||
result_type = type(html)
|
||||
if isinstance(html, (str, bytes)):
|
||||
doc = lxml.html.fromstring(html)
|
||||
else:
|
||||
doc = copy.deepcopy(html)
|
||||
self(doc)
|
||||
head = doc.find('head')
|
||||
if head != None:
|
||||
meta = lxml.etree.Element('meta', charset=self.charset)
|
||||
meta.tail = '\n'
|
||||
head.insert(0, meta)
|
||||
return lxml.html._transform_result(result_type, doc)
|
||||
|
||||
# We will be saving as utf-8 later, so set that in the meta tag.
|
||||
lxml_cleaner = Cleaner(allow_tags=acceptable_tags, remove_unknown_tags=None, style=False, page_structure=False, charset='utf-8')
|
||||
|
||||
def sanitize_document(html):
|
||||
return lxml_cleaner.clean_html(html)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Text field cleaning
|
||||
|
@ -86,4 +86,15 @@ def clean_text_field(text):
|
|||
else:
|
||||
raise forms.ValidationError("Unexpected text field mime type: %s" % mime_type)
|
||||
return text
|
||||
|
||||
|
||||
|
||||
def unescape(text):
|
||||
"""
|
||||
Returns the given text with ampersands, quotes and angle brackets decoded
|
||||
for use in URLs.
|
||||
|
||||
This function undoes what django.utils.html.escape() does
|
||||
"""
|
||||
return text.replace('&', '&').replace(''', "'").replace('"', '"').replace('>', '>').replace('<', '<' )
|
||||
|
||||
|
||||
|
|
|
@ -12,13 +12,15 @@ from markdown.postprocessors import Postprocessor
|
|||
from django.utils.safestring import mark_safe
|
||||
|
||||
from ietf.doc.templatetags.ietf_filters import urlize_ietf_docs
|
||||
from ietf.utils.text import bleach_cleaner, liberal_bleach_cleaner, bleach_linker
|
||||
from .html import clean_html, liberal_clean_html
|
||||
from .text import linkify
|
||||
|
||||
|
||||
|
||||
class LinkifyExtension(Extension):
|
||||
"""
|
||||
Simple Markdown extension inspired by https://github.com/daGrevis/mdx_linkify,
|
||||
but using our bleach_linker directly. Doing the linkification on the converted
|
||||
but using our own linker directly. Doing the linkification on the converted
|
||||
Markdown output introduces artifacts.
|
||||
"""
|
||||
|
||||
|
@ -31,12 +33,12 @@ class LinkifyExtension(Extension):
|
|||
|
||||
class LinkifyPostprocessor(Postprocessor):
|
||||
def run(self, text):
|
||||
return urlize_ietf_docs(bleach_linker.linkify(text))
|
||||
return urlize_ietf_docs(linkify(text))
|
||||
|
||||
|
||||
def markdown(text):
|
||||
return mark_safe(
|
||||
bleach_cleaner.clean(
|
||||
clean_html(
|
||||
python_markdown.markdown(
|
||||
text,
|
||||
extensions=[
|
||||
|
@ -52,7 +54,7 @@ def markdown(text):
|
|||
|
||||
def liberal_markdown(text):
|
||||
return mark_safe(
|
||||
liberal_bleach_cleaner.clean(
|
||||
liberal_clean_html(
|
||||
python_markdown.markdown(
|
||||
text,
|
||||
extensions=[
|
||||
|
|
|
@ -11,7 +11,7 @@ from django.utils.safestring import mark_safe
|
|||
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
from ietf.utils.text import xslugify as _xslugify, texescape, bleach_linker
|
||||
from ietf.utils.text import linkify as _linkify, xslugify as _xslugify, texescape
|
||||
|
||||
register = template.Library()
|
||||
|
||||
|
@ -74,7 +74,7 @@ def texescape_filter(value):
|
|||
@register.filter
|
||||
@stringfilter
|
||||
def linkify(value):
|
||||
text = mark_safe(bleach_linker.linkify(value))
|
||||
text = mark_safe(_linkify(value))
|
||||
return text
|
||||
|
||||
@register.filter
|
||||
|
|
|
@ -1,17 +1,15 @@
|
|||
# Copyright The IETF Trust 2016-2020, All Rights Reserved
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
import bleach # type: ignore
|
||||
import copy
|
||||
import bleach
|
||||
import email
|
||||
import re
|
||||
import textwrap
|
||||
import tlds
|
||||
import unicodedata
|
||||
|
||||
from django.core.validators import URLValidator
|
||||
from django.core.exceptions import ValidationError
|
||||
from django.core.validators import URLValidator
|
||||
from django.utils.functional import keep_lazy
|
||||
from django.utils.safestring import mark_safe
|
||||
|
||||
|
@ -19,66 +17,52 @@ import debug # pyflakes:ignore
|
|||
|
||||
from .texescape import init as texescape_init, tex_escape_map
|
||||
|
||||
tlds_sorted = sorted(tlds.tld_set, key=len, reverse=True)
|
||||
protocols = set(bleach.sanitizer.ALLOWED_PROTOCOLS)
|
||||
protocols.add("ftp") # we still have some ftp links
|
||||
protocols.add("xmpp") # we still have some xmpp links
|
||||
# Sort in reverse so substrings are considered later - e.g., so ".co" comes after ".com".
|
||||
tlds_sorted = sorted(tlds.tld_set, reverse=True)
|
||||
|
||||
tags = set(bleach.sanitizer.ALLOWED_TAGS).union(
|
||||
{
|
||||
# fmt: off
|
||||
'a', 'abbr', 'acronym', 'address', 'b', 'big',
|
||||
'blockquote', 'body', 'br', 'caption', 'center', 'cite', 'code', 'col',
|
||||
'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'font',
|
||||
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'html', 'i', 'ins', 'kbd',
|
||||
'li', 'ol', 'p', 'pre', 'q', 's', 'samp', 'small', 'span', 'strike', 'style',
|
||||
'strong', 'sub', 'sup', 'table', 'title', 'tbody', 'td', 'tfoot', 'th', 'thead',
|
||||
'tr', 'tt', 'u', 'ul', 'var'
|
||||
# fmt: on
|
||||
}
|
||||
)
|
||||
# Protocols we're interested in auto-linking. See also ietf.utils.html.acceptable_protocols,
|
||||
# which is protocols we allow people to include explicitly in sanitized html.
|
||||
linkable_protocols = ["http", "https", "mailto", "ftp", "xmpp"]
|
||||
|
||||
attributes = copy.copy(bleach.sanitizer.ALLOWED_ATTRIBUTES)
|
||||
attributes["*"] = ["id"]
|
||||
attributes["ol"] = ["start"]
|
||||
|
||||
bleach_cleaner = bleach.sanitizer.Cleaner(
|
||||
tags=tags, attributes=attributes, protocols=protocols, strip=True
|
||||
)
|
||||
|
||||
liberal_tags = copy.copy(tags)
|
||||
liberal_attributes = copy.copy(attributes)
|
||||
liberal_tags.update(["img","figure","figcaption"])
|
||||
liberal_attributes["img"] = ["src","alt"]
|
||||
|
||||
liberal_bleach_cleaner = bleach.sanitizer.Cleaner(
|
||||
tags=liberal_tags, attributes=liberal_attributes, protocols=protocols, strip=True
|
||||
)
|
||||
|
||||
validate_url = URLValidator()
|
||||
_validate_url = URLValidator()
|
||||
|
||||
|
||||
def check_url_validity(attrs, new=False):
|
||||
"""Callback for bleach linkify
|
||||
|
||||
:param attrs: dict of attributes of the <a> tag
|
||||
:param new: boolean - True if the link is new; False if <a> was found in text
|
||||
:return: new dict of attributes for the link, or None to block link creation
|
||||
|
||||
Attributes are namespaced, so normally look like `(None, "SomeAttribute")`.
|
||||
This includes as the keys in the `attrs` argument, so `attrs[(None, "href")]`
|
||||
would be the value of the href attribute.
|
||||
"""
|
||||
if (None, "href") not in attrs:
|
||||
# rfc2html creates a tags without href
|
||||
return attrs
|
||||
url = attrs[(None, "href")]
|
||||
try:
|
||||
if url.startswith("http"):
|
||||
validate_url(url)
|
||||
_validate_url(url)
|
||||
except ValidationError:
|
||||
return None
|
||||
return attrs
|
||||
|
||||
|
||||
bleach_linker = bleach.Linker(
|
||||
_bleach_linker = bleach.Linker(
|
||||
callbacks=[check_url_validity],
|
||||
url_re=bleach.linkifier.build_url_re(tlds=tlds_sorted, protocols=protocols),
|
||||
url_re=bleach.linkifier.build_url_re(tlds=tlds_sorted, protocols=linkable_protocols),
|
||||
email_re=bleach.linkifier.build_email_re(tlds=tlds_sorted), # type: ignore
|
||||
parse_email=True,
|
||||
)
|
||||
|
||||
|
||||
def linkify(text):
|
||||
return _bleach_linker.linkify(text)
|
||||
|
||||
|
||||
@keep_lazy(str)
|
||||
def xslugify(value):
|
||||
"""
|
||||
|
|
|
@ -43,8 +43,7 @@ jsonfield>=3.1.0 # for SubmissionCheck. This is https://github.com/bradjaspe
|
|||
jsonschema[format]>=4.2.1
|
||||
jwcrypto>=1.2 # for signed notifications - this is aspirational, and is not really used.
|
||||
logging_tree>=1.9 # Used only by the showloggers management command
|
||||
lxml>=5.3.0 # lxml[html_clean] fails on some architectures
|
||||
lxml_html_clean>=0.4.1
|
||||
lxml>=5.3.0
|
||||
markdown>=3.3.6
|
||||
types-markdown>=3.3.6
|
||||
mock>=4.0.3 # Used only by tests, of course
|
||||
|
|
Loading…
Reference in a new issue