fix: Correctly linkify all current TLDs (#3868)
* fix: Correctly linkify all current TLDs * Pass a list to the build_*_re functions, not a string * Need to sort TLDs by length to force longer ones to match first * chore: silence incorrect mypy complaint. Co-authored-by: Robert Sparks <rjsparks@nostrum.com> Co-authored-by: Nicolas Giard <github@ngpixel.com>
This commit is contained in:
parent
955d9ac489
commit
9db1d48258
|
@ -2,7 +2,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
import bleach
|
||||
import datetime
|
||||
import re
|
||||
from urllib.parse import urljoin
|
||||
|
@ -26,7 +25,7 @@ from ietf.doc.models import ConsensusDocEvent
|
|||
from ietf.utils.html import sanitize_fragment
|
||||
from ietf.utils import log
|
||||
from ietf.doc.utils import prettify_std_name
|
||||
from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped
|
||||
from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped, bleach_linker
|
||||
|
||||
register = template.Library()
|
||||
|
||||
|
@ -428,14 +427,14 @@ def format_history_text(text, trunc_words=25):
|
|||
full = mark_safe(text)
|
||||
if "</a>" not in full:
|
||||
full = urlize_ietf_docs(full)
|
||||
full = bleach.linkify(full, parse_email=True)
|
||||
full = bleach_linker.linkify(full)
|
||||
|
||||
return format_snippet(full, trunc_words)
|
||||
|
||||
@register.filter
|
||||
def format_snippet(text, trunc_words=25):
|
||||
# urlize if there aren't already links present
|
||||
text = bleach.linkify(text, parse_email=True)
|
||||
text = bleach_linker.linkify(text)
|
||||
full = keep_spacing(collapsebr(linebreaksbr(mark_safe(sanitize_fragment(text)))))
|
||||
snippet = truncatewords_html(full, trunc_words)
|
||||
if snippet != full:
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
|
||||
import re
|
||||
import bleach
|
||||
|
||||
from django import template
|
||||
from django.conf import settings
|
||||
|
@ -12,7 +11,7 @@ from django.utils.safestring import mark_safe
|
|||
|
||||
import debug # pyflakes:ignore
|
||||
|
||||
from ietf.utils.text import xslugify as _xslugify, texescape
|
||||
from ietf.utils.text import xslugify as _xslugify, texescape, bleach_linker
|
||||
|
||||
register = template.Library()
|
||||
|
||||
|
@ -75,7 +74,7 @@ def texescape_filter(value):
|
|||
@register.filter
|
||||
@stringfilter
|
||||
def linkify(value):
|
||||
text = mark_safe(bleach.linkify(value, parse_email=True))
|
||||
text = mark_safe(bleach_linker.linkify(value))
|
||||
return text
|
||||
|
||||
@register.filter
|
||||
|
@ -92,4 +91,4 @@ def conference_url(value):
|
|||
return value if re.match(conf_re, value) else None
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -2,9 +2,11 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
import bleach
|
||||
import email
|
||||
import re
|
||||
import textwrap
|
||||
import tlds
|
||||
import unicodedata
|
||||
|
||||
from django.utils.functional import keep_lazy
|
||||
|
@ -14,6 +16,14 @@ import debug # pyflakes:ignore
|
|||
|
||||
from .texescape import init as texescape_init, tex_escape_map
|
||||
|
||||
tlds_sorted = sorted(tlds.tld_set, key=len, reverse=True)
|
||||
bleach_linker = bleach.Linker(
|
||||
url_re=bleach.linkifier.build_url_re(tlds=tlds_sorted),
|
||||
email_re=bleach.linkifier.build_email_re(tlds=tlds_sorted), # type: ignore
|
||||
parse_email=True
|
||||
)
|
||||
|
||||
|
||||
@keep_lazy(str)
|
||||
def xslugify(value):
|
||||
"""
|
||||
|
@ -206,4 +216,4 @@ def parse_unicode(text):
|
|||
pass
|
||||
else:
|
||||
text = decoded_string
|
||||
return text
|
||||
return text
|
||||
|
|
|
@ -68,6 +68,7 @@ scout-apm>=2.23.0
|
|||
selenium>=3.141.0,<4.0
|
||||
six>=1.10.0
|
||||
tblib>=1.3.0
|
||||
tlds>=2022042100 # Used to teach bleach about which TLDs currently exist
|
||||
tqdm>=3.7.0
|
||||
Unidecode>=0.4.18,<1.2.0
|
||||
#wsgiref>=0.1.2
|
||||
|
|
Loading…
Reference in a new issue