From 9db1d4825896f9fa7707355f11d2ed39bbefa014 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Tue, 26 Apr 2022 20:25:18 +0300 Subject: [PATCH] fix: Correctly linkify all current TLDs (#3868) * fix: Correctly linkify all current TLDs * Pass a list to the build_*_re functions, not a string * Need to sort TLDs by length to force longer ones to match first * chore: silence incorrect mypy complaint. Co-authored-by: Robert Sparks Co-authored-by: Nicolas Giard --- ietf/doc/templatetags/ietf_filters.py | 7 +++---- ietf/utils/templatetags/textfilters.py | 7 +++---- ietf/utils/text.py | 12 +++++++++++- requirements.txt | 1 + 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/ietf/doc/templatetags/ietf_filters.py b/ietf/doc/templatetags/ietf_filters.py index bc2fe5e33..76f73eaba 100644 --- a/ietf/doc/templatetags/ietf_filters.py +++ b/ietf/doc/templatetags/ietf_filters.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- -import bleach import datetime import re from urllib.parse import urljoin @@ -26,7 +25,7 @@ from ietf.doc.models import ConsensusDocEvent from ietf.utils.html import sanitize_fragment from ietf.utils import log from ietf.doc.utils import prettify_std_name -from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped +from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped, bleach_linker register = template.Library() @@ -428,14 +427,14 @@ def format_history_text(text, trunc_words=25): full = mark_safe(text) if "" not in full: full = urlize_ietf_docs(full) - full = bleach.linkify(full, parse_email=True) + full = bleach_linker.linkify(full) return format_snippet(full, trunc_words) @register.filter def format_snippet(text, trunc_words=25): # urlize if there aren't already links present - text = bleach.linkify(text, parse_email=True) + text = bleach_linker.linkify(text) full = keep_spacing(collapsebr(linebreaksbr(mark_safe(sanitize_fragment(text))))) snippet = truncatewords_html(full, trunc_words) if snippet != full: diff --git a/ietf/utils/templatetags/textfilters.py b/ietf/utils/templatetags/textfilters.py index 1180bf483..70b94cf67 100644 --- a/ietf/utils/templatetags/textfilters.py +++ b/ietf/utils/templatetags/textfilters.py @@ -3,7 +3,6 @@ import re -import bleach from django import template from django.conf import settings @@ -12,7 +11,7 @@ from django.utils.safestring import mark_safe import debug # pyflakes:ignore -from ietf.utils.text import xslugify as _xslugify, texescape +from ietf.utils.text import xslugify as _xslugify, texescape, bleach_linker register = template.Library() @@ -75,7 +74,7 @@ def texescape_filter(value): @register.filter @stringfilter def linkify(value): - text = mark_safe(bleach.linkify(value, parse_email=True)) + text = mark_safe(bleach_linker.linkify(value)) return text @register.filter @@ -92,4 +91,4 @@ def conference_url(value): return value if re.match(conf_re, value) else None - \ No newline at end of file + diff --git a/ietf/utils/text.py b/ietf/utils/text.py index b7c7f69ef..5239419e8 100644 --- a/ietf/utils/text.py +++ b/ietf/utils/text.py @@ -2,9 +2,11 @@ # -*- coding: utf-8 -*- +import bleach import email import re import textwrap +import tlds import unicodedata from django.utils.functional import keep_lazy @@ -14,6 +16,14 @@ import debug # pyflakes:ignore from .texescape import init as texescape_init, tex_escape_map +tlds_sorted = sorted(tlds.tld_set, key=len, reverse=True) +bleach_linker = bleach.Linker( + url_re=bleach.linkifier.build_url_re(tlds=tlds_sorted), + email_re=bleach.linkifier.build_email_re(tlds=tlds_sorted), # type: ignore + parse_email=True +) + + @keep_lazy(str) def xslugify(value): """ @@ -206,4 +216,4 @@ def parse_unicode(text): pass else: text = decoded_string - return text \ No newline at end of file + return text diff --git a/requirements.txt b/requirements.txt index e274fddab..68c1e9cf1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -68,6 +68,7 @@ scout-apm>=2.23.0 selenium>=3.141.0,<4.0 six>=1.10.0 tblib>=1.3.0 +tlds>=2022042100 # Used to teach bleach about which TLDs currently exist tqdm>=3.7.0 Unidecode>=0.4.18,<1.2.0 #wsgiref>=0.1.2