fix: Correctly linkify all current TLDs (#3868)

* fix: Correctly linkify all current TLDs

* Pass a list to the build_*_re functions, not a string

* Need to sort TLDs by length to force longer ones to match first

* chore: silence incorrect mypy complaint.

Co-authored-by: Robert Sparks <rjsparks@nostrum.com>
Co-authored-by: Nicolas Giard <github@ngpixel.com>
This commit is contained in:
Lars Eggert 2022-04-26 20:25:18 +03:00 committed by GitHub
parent 955d9ac489
commit 9db1d48258
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 18 additions and 9 deletions

View file

@ -2,7 +2,6 @@
# -*- coding: utf-8 -*-
import bleach
import datetime
import re
from urllib.parse import urljoin
@ -26,7 +25,7 @@ from ietf.doc.models import ConsensusDocEvent
from ietf.utils.html import sanitize_fragment
from ietf.utils import log
from ietf.doc.utils import prettify_std_name
from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped
from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped, bleach_linker
register = template.Library()
@ -428,14 +427,14 @@ def format_history_text(text, trunc_words=25):
full = mark_safe(text)
if "</a>" not in full:
full = urlize_ietf_docs(full)
full = bleach.linkify(full, parse_email=True)
full = bleach_linker.linkify(full)
return format_snippet(full, trunc_words)
@register.filter
def format_snippet(text, trunc_words=25):
# urlize if there aren't already links present
text = bleach.linkify(text, parse_email=True)
text = bleach_linker.linkify(text)
full = keep_spacing(collapsebr(linebreaksbr(mark_safe(sanitize_fragment(text)))))
snippet = truncatewords_html(full, trunc_words)
if snippet != full:

View file

@ -3,7 +3,6 @@
import re
import bleach
from django import template
from django.conf import settings
@ -12,7 +11,7 @@ from django.utils.safestring import mark_safe
import debug # pyflakes:ignore
from ietf.utils.text import xslugify as _xslugify, texescape
from ietf.utils.text import xslugify as _xslugify, texescape, bleach_linker
register = template.Library()
@ -75,7 +74,7 @@ def texescape_filter(value):
@register.filter
@stringfilter
def linkify(value):
text = mark_safe(bleach.linkify(value, parse_email=True))
text = mark_safe(bleach_linker.linkify(value))
return text
@register.filter
@ -92,4 +91,4 @@ def conference_url(value):
return value if re.match(conf_re, value) else None

View file

@ -2,9 +2,11 @@
# -*- coding: utf-8 -*-
import bleach
import email
import re
import textwrap
import tlds
import unicodedata
from django.utils.functional import keep_lazy
@ -14,6 +16,14 @@ import debug # pyflakes:ignore
from .texescape import init as texescape_init, tex_escape_map
tlds_sorted = sorted(tlds.tld_set, key=len, reverse=True)
bleach_linker = bleach.Linker(
url_re=bleach.linkifier.build_url_re(tlds=tlds_sorted),
email_re=bleach.linkifier.build_email_re(tlds=tlds_sorted), # type: ignore
parse_email=True
)
@keep_lazy(str)
def xslugify(value):
"""
@ -206,4 +216,4 @@ def parse_unicode(text):
pass
else:
text = decoded_string
return text
return text

View file

@ -68,6 +68,7 @@ scout-apm>=2.23.0
selenium>=3.141.0,<4.0
six>=1.10.0
tblib>=1.3.0
tlds>=2022042100 # Used to teach bleach about which TLDs currently exist
tqdm>=3.7.0
Unidecode>=0.4.18,<1.2.0
#wsgiref>=0.1.2