fix: Correctly linkify all current TLDs (#3868)
* fix: Correctly linkify all current TLDs * Pass a list to the build_*_re functions, not a string * Need to sort TLDs by length to force longer ones to match first * chore: silence incorrect mypy complaint. Co-authored-by: Robert Sparks <rjsparks@nostrum.com> Co-authored-by: Nicolas Giard <github@ngpixel.com>
This commit is contained in:
parent
955d9ac489
commit
9db1d48258
|
@ -2,7 +2,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
|
||||||
import bleach
|
|
||||||
import datetime
|
import datetime
|
||||||
import re
|
import re
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
@ -26,7 +25,7 @@ from ietf.doc.models import ConsensusDocEvent
|
||||||
from ietf.utils.html import sanitize_fragment
|
from ietf.utils.html import sanitize_fragment
|
||||||
from ietf.utils import log
|
from ietf.utils import log
|
||||||
from ietf.doc.utils import prettify_std_name
|
from ietf.doc.utils import prettify_std_name
|
||||||
from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped
|
from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped, bleach_linker
|
||||||
|
|
||||||
register = template.Library()
|
register = template.Library()
|
||||||
|
|
||||||
|
@ -428,14 +427,14 @@ def format_history_text(text, trunc_words=25):
|
||||||
full = mark_safe(text)
|
full = mark_safe(text)
|
||||||
if "</a>" not in full:
|
if "</a>" not in full:
|
||||||
full = urlize_ietf_docs(full)
|
full = urlize_ietf_docs(full)
|
||||||
full = bleach.linkify(full, parse_email=True)
|
full = bleach_linker.linkify(full)
|
||||||
|
|
||||||
return format_snippet(full, trunc_words)
|
return format_snippet(full, trunc_words)
|
||||||
|
|
||||||
@register.filter
|
@register.filter
|
||||||
def format_snippet(text, trunc_words=25):
|
def format_snippet(text, trunc_words=25):
|
||||||
# urlize if there aren't already links present
|
# urlize if there aren't already links present
|
||||||
text = bleach.linkify(text, parse_email=True)
|
text = bleach_linker.linkify(text)
|
||||||
full = keep_spacing(collapsebr(linebreaksbr(mark_safe(sanitize_fragment(text)))))
|
full = keep_spacing(collapsebr(linebreaksbr(mark_safe(sanitize_fragment(text)))))
|
||||||
snippet = truncatewords_html(full, trunc_words)
|
snippet = truncatewords_html(full, trunc_words)
|
||||||
if snippet != full:
|
if snippet != full:
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import bleach
|
|
||||||
|
|
||||||
from django import template
|
from django import template
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
@ -12,7 +11,7 @@ from django.utils.safestring import mark_safe
|
||||||
|
|
||||||
import debug # pyflakes:ignore
|
import debug # pyflakes:ignore
|
||||||
|
|
||||||
from ietf.utils.text import xslugify as _xslugify, texescape
|
from ietf.utils.text import xslugify as _xslugify, texescape, bleach_linker
|
||||||
|
|
||||||
register = template.Library()
|
register = template.Library()
|
||||||
|
|
||||||
|
@ -75,7 +74,7 @@ def texescape_filter(value):
|
||||||
@register.filter
|
@register.filter
|
||||||
@stringfilter
|
@stringfilter
|
||||||
def linkify(value):
|
def linkify(value):
|
||||||
text = mark_safe(bleach.linkify(value, parse_email=True))
|
text = mark_safe(bleach_linker.linkify(value))
|
||||||
return text
|
return text
|
||||||
|
|
||||||
@register.filter
|
@register.filter
|
||||||
|
@ -92,4 +91,4 @@ def conference_url(value):
|
||||||
return value if re.match(conf_re, value) else None
|
return value if re.match(conf_re, value) else None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,9 +2,11 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
|
||||||
|
import bleach
|
||||||
import email
|
import email
|
||||||
import re
|
import re
|
||||||
import textwrap
|
import textwrap
|
||||||
|
import tlds
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
from django.utils.functional import keep_lazy
|
from django.utils.functional import keep_lazy
|
||||||
|
@ -14,6 +16,14 @@ import debug # pyflakes:ignore
|
||||||
|
|
||||||
from .texescape import init as texescape_init, tex_escape_map
|
from .texescape import init as texescape_init, tex_escape_map
|
||||||
|
|
||||||
|
tlds_sorted = sorted(tlds.tld_set, key=len, reverse=True)
|
||||||
|
bleach_linker = bleach.Linker(
|
||||||
|
url_re=bleach.linkifier.build_url_re(tlds=tlds_sorted),
|
||||||
|
email_re=bleach.linkifier.build_email_re(tlds=tlds_sorted), # type: ignore
|
||||||
|
parse_email=True
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@keep_lazy(str)
|
@keep_lazy(str)
|
||||||
def xslugify(value):
|
def xslugify(value):
|
||||||
"""
|
"""
|
||||||
|
@ -206,4 +216,4 @@ def parse_unicode(text):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
text = decoded_string
|
text = decoded_string
|
||||||
return text
|
return text
|
||||||
|
|
|
@ -68,6 +68,7 @@ scout-apm>=2.23.0
|
||||||
selenium>=3.141.0,<4.0
|
selenium>=3.141.0,<4.0
|
||||||
six>=1.10.0
|
six>=1.10.0
|
||||||
tblib>=1.3.0
|
tblib>=1.3.0
|
||||||
|
tlds>=2022042100 # Used to teach bleach about which TLDs currently exist
|
||||||
tqdm>=3.7.0
|
tqdm>=3.7.0
|
||||||
Unidecode>=0.4.18,<1.2.0
|
Unidecode>=0.4.18,<1.2.0
|
||||||
#wsgiref>=0.1.2
|
#wsgiref>=0.1.2
|
||||||
|
|
Loading…
Reference in a new issue