diff --git a/bin/test-crawl b/bin/test-crawl
index 8ad475aea..20dc1f069 100755
--- a/bin/test-crawl
+++ b/bin/test-crawl
@@ -44,7 +44,6 @@ args = parser.parse_args()
# Import Django, call setup()
os.environ.setdefault("DJANGO_SETTINGS_MODULE", args.settings or "ietf.settings_testcrawl")
-os.environ["DJANGO_URLIZE_IETF_DOCS_PRODUCTION"] = "1"
import django
import django.test
@@ -175,7 +174,7 @@ def check_html_valid(url, response, args):
assert ret
for m in json.loads(ret)["messages"]:
if "lastLine" not in m:
- tag = m # just dump the raw JSON for now
+ tag = m["message"]
else:
tag = vnu_fmt_message(url, m, content.decode())
# disregard some HTML issues that are (usually) due to invalid
@@ -211,7 +210,7 @@ def skip_url(url):
r"^/wg/[a-z0-9-]+/deps/svg/",
# Skip other bad urls
r"^/dir/tsvdir/reviews/",
- r"^/ipr/\d{,3}/history/",
+ # r"^/ipr/\d{,3}/history/",
# Skip most html conversions, not worth the time
r"^/doc/html/draft-[0-9ac-z]",
r"^/doc/html/draft-b[0-9b-z]",
diff --git a/ietf/doc/templatetags/ietf_filters.py b/ietf/doc/templatetags/ietf_filters.py
index f162a6bbc..896d327e7 100644
--- a/ietf/doc/templatetags/ietf_filters.py
+++ b/ietf/doc/templatetags/ietf_filters.py
@@ -4,7 +4,6 @@
import datetime
import re
-import os
from urllib.parse import urljoin
from email.utils import parseaddr
@@ -19,7 +18,6 @@ from django.utils.encoding import force_text
from django.utils.encoding import force_str # pyflakes:ignore force_str is used in the doctests
from django.urls import reverse as urlreverse
from django.core.cache import cache
-from django.core.validators import URLValidator
from django.core.exceptions import ValidationError
import debug # pyflakes:ignore
@@ -29,7 +27,7 @@ from ietf.doc.models import ConsensusDocEvent
from ietf.utils.html import sanitize_fragment
from ietf.utils import log
from ietf.doc.utils import prettify_std_name
-from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped, bleach_linker
+from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped, bleach_linker, bleach_cleaner, validate_url
register = template.Library()
@@ -189,69 +187,82 @@ def rfceditor_info_url(rfcnum : str):
return urljoin(settings.RFC_EDITOR_INFO_BASE_URL, f'rfc{rfcnum}')
-def doc_exists(name):
- """Check whether a given document exists"""
+def doc_canonical_name(name):
+ """Check whether a given document exists, and return its canonical name"""
+
def find_unique(n):
key = hash(n)
found = cache.get(key)
if not found:
exact = DocAlias.objects.filter(name=n).first()
found = exact.name if exact else "_"
- cache.set(key, found)
+ cache.set(key, found, timeout=60*60*24) # cache for one day
return None if found == "_" else found
- # all documents exist when tests are running
- if settings.SERVER_MODE == 'test':
- # unless we are running test-crawl, which would otherwise 404
- if "DJANGO_URLIZE_IETF_DOCS_PRODUCTION" not in os.environ:
- return True
-
# chop away extension
- extension_split = re.search(r"^(.+)\.(txt|ps|pdf)$", name)
+ extension_split = re.search(r"^(.+)\.(txt|ps|pdf|html)$", name)
if extension_split:
name = extension_split.group(1)
if find_unique(name):
- return True
+ return name
# check for embedded rev - this may be ambiguous, so don't
# chop it off if we don't find a match
- rev_split = re.search("^(.+)-([0-9]{2,})$", name)
+ rev_split = re.search(r"^(charter-.+)-(\d{2}-\d{2})$", name) or re.search(
+ r"^(.+)-(\d{2}|[1-9]\d{2,})$", name
+ )
if rev_split:
name = rev_split.group(1)
if find_unique(name):
- return True
+ return name
- return False
+ return ""
def link_charter_doc_match1(match):
- if not doc_exists(match[0]):
+ if not doc_canonical_name(match[0]):
return match[0]
return f'{match[0]}'
def link_charter_doc_match2(match):
- if not doc_exists(match[0]):
+ if not doc_canonical_name(match[0]):
return match[0]
return f'{match[0]}'
def link_non_charter_doc_match(match):
- if not doc_exists(match[0]):
+ name = match[0]
+ cname = doc_canonical_name(name)
+ if not cname:
return match[0]
- if len(match[3]) == 2 and match[3].isdigit():
- return f'{match[0]}'
+ if name == cname:
+ return f'{match[0]}'
+
+ # if we get here, the name probably has a version number and/or extension at the end
+ rev_split = re.search(r"^(" + re.escape(cname) + r")-(\d{2,})", name)
+ if rev_split:
+ name = rev_split.group(1)
else:
- return f'{match[0]}'
+ return f'{match[0]}'
+
+ cname = doc_canonical_name(name)
+ if not cname:
+ return match[0]
+ if name == cname:
+ return f'{match[0]}'
+
+ # if we get here, we can't linkify
+ return match[0]
def link_other_doc_match(match):
- # there may be whitespace in the match
- doc = re.sub(r"\s+", "", match[0])
- if not doc_exists(doc):
+ doc = match[2].strip().lower()
+ rev = match[3]
+ if not doc_canonical_name(doc + rev):
return match[0]
- return f'{match[1]}'
+ return f'{match[1]}'
@register.filter(name="urlize_ietf_docs", is_safe=True, needs_autoescape=True)
@@ -264,8 +275,8 @@ def urlize_ietf_docs(string, autoescape=None):
string = escape(string)
else:
string = mark_safe(string)
- exp1 = r"\b(?" not in full:
- full = urlize_ietf_docs(full)
- full = bleach_linker.linkify(full)
+ full = mark_safe(bleach_cleaner.clean(text))
+ full = bleach_linker.linkify(urlize_ietf_docs(full))
return format_snippet(full, trunc_words)
@@ -840,7 +851,6 @@ def is_valid_url(url):
"""
Check if the given URL is syntactically valid
"""
- validate_url = URLValidator()
try:
validate_url(url)
except ValidationError:
diff --git a/ietf/doc/templatetags/tests_ietf_filters.py b/ietf/doc/templatetags/tests_ietf_filters.py
index 81fccc01d..9c9687015 100644
--- a/ietf/doc/templatetags/tests_ietf_filters.py
+++ b/ietf/doc/templatetags/tests_ietf_filters.py
@@ -1,56 +1,96 @@
# Copyright The IETF Trust 2022, All Rights Reserved
-from ietf.doc.templatetags.ietf_filters import urlize_ietf_docs
+from django.conf import settings
+
+from ietf.doc.factories import (
+ WgDraftFactory,
+ IndividualDraftFactory,
+ CharterFactory,
+ NewRevisionDocEventFactory,
+)
+from ietf.doc.models import State, DocEvent, DocAlias
+from ietf.doc.templatetags.ietf_filters import urlize_ietf_docs, is_valid_url
+from ietf.person.models import Person
from ietf.utils.test_utils import TestCase
-import debug # pyflakes: ignore
+import debug # pyflakes: ignore
+
# TODO: most other filters need test cases, too
class IetfFiltersTests(TestCase):
+ def test_is_valid_url(self):
+ cases = [(settings.IDTRACKER_BASE_URL, True), ("not valid", False)]
+ for url, result in cases:
+ self.assertEqual(is_valid_url(url), result)
+
def test_urlize_ietf_docs(self):
+ wg_id = WgDraftFactory()
+ wg_id.set_state(State.objects.get(type="draft", slug="rfc"))
+ wg_id.std_level_id = "bcp"
+ wg_id.save_with_history(
+ [
+ DocEvent.objects.create(
+ doc=wg_id,
+ rev=wg_id.rev,
+ type="published_rfc",
+ by=Person.objects.get(name="(System)"),
+ )
+ ]
+ )
+ DocAlias.objects.create(name="rfc123456").docs.add(wg_id)
+ DocAlias.objects.create(name="bcp123456").docs.add(wg_id)
+ DocAlias.objects.create(name="std123456").docs.add(wg_id)
+ DocAlias.objects.create(name="fyi123456").docs.add(wg_id)
+
+ id = IndividualDraftFactory(name="draft-me-rfc123456bis")
+ id_num = IndividualDraftFactory(name="draft-rosen-rfcefdp-update-2026")
+ id_num_two = IndividualDraftFactory(name="draft-spaghetti-idr-deprecate-8-9-10")
+ id_plus = IndividualDraftFactory(name="draft-odell-8+8")
+ id_plus_end = IndividualDraftFactory(name="draft-durand-gse+")
+ id_dot = IndividualDraftFactory(name="draft-ietf-pem-ansix9.17")
+ charter = CharterFactory()
+ e = NewRevisionDocEventFactory(doc=charter, rev="01")
+ charter.rev = e.rev
+ charter.save_with_history([e])
+ e = NewRevisionDocEventFactory(doc=charter, rev="01-00")
+ charter.rev = e.rev
+ charter.save_with_history([e])
+
cases = [
("no change", "no change"),
- ("bcp1", 'bcp1'),
- ("Std 003", 'Std 003'),
+ ("bCp123456", 'bCp123456'),
+ ("Std 00123456", 'Std 00123456'),
(
- "FYI02 changes Std 003",
- 'FYI02 changes Std 003',
+ "FyI 0123456 changes std 00123456",
+ 'FyI 0123456 changes std 00123456',
),
- ("rfc2119", 'rfc2119'),
- ("Rfc 02119", 'Rfc 02119'),
- ("draft-abc-123", 'draft-abc-123'),
+ ("rfc123456", 'rfc123456'),
+ ("Rfc 0123456", 'Rfc 0123456'),
+ (wg_id.name, f'{wg_id.name}'),
(
- "draft-ietf-rfc9999-bis-01.txt",
- 'draft-ietf-rfc9999-bis-01.txt',
+ f"{id.name}-{id.rev}.txt",
+ f'{id.name}-{id.rev}.txt',
),
(
- "foo RFC 9999 draft-ietf-rfc9999-bis-01 bar",
- 'foo RFC 9999 draft-ietf-rfc9999-bis-01 bar',
+ f"foo RFC 123456 {id.name}-{id.rev} bar",
+ f'foo RFC 123456 {id.name}-{id.rev} bar',
),
(
- "New version available: draft-bryan-sipping-p2p-03.txt",
- 'New version available: draft-bryan-sipping-p2p-03.txt',
+ f"New version available: {id.name}-{id.rev}.txt",
+ f'New version available: {id.name}-{id.rev}.txt',
),
(
- "New version available: charter-ietf-6man-04.txt",
- 'New version available: charter-ietf-6man-04.txt'
+ f"New version available: {charter.name}-{charter.rev}.txt",
+ f'New version available: {charter.name}-{charter.rev}.txt',
),
(
- "New version available: charter-ietf-6man-03-07.txt",
- 'New version available: charter-ietf-6man-03-07.txt'
+ f"New version available: {charter.name}-01-00.txt",
+ f'New version available: {charter.name}-01-00.txt',
),
(
"repository https://github.com/tlswg/draft-ietf-tls-ticketrequest",
- 'repository https://github.com/tlswg/draft-ietf-tls-ticketrequest'
- ),
- (
- "draft-madanapalli-nd-over-802.16-problems",
- 'draft-madanapalli-nd-over-802.16-problems'
- ),
- (
- "draft-madanapalli-nd-over-802.16-problems-02.txt",
- 'draft-madanapalli-nd-over-802.16-problems-02.txt'
+ "repository https://github.com/tlswg/draft-ietf-tls-ticketrequest",
),
(
'draft-ietf-some-names@ietf.org',
@@ -58,19 +98,50 @@ class IetfFiltersTests(TestCase):
),
(
"http://ieee802.org/1/files/public/docs2015/cn-thaler-Qcn-draft-PAR.pdf",
- "http://ieee802.org/1/files/public/docs2015/cn-thaler-Qcn-draft-PAR.pdf"
- )
+ "http://ieee802.org/1/files/public/docs2015/cn-thaler-Qcn-draft-PAR.pdf",
+ ),
+ (
+ f"{id_num.name}.pdf",
+ f'{id_num.name}.pdf',
+ ),
+ (
+ f"{id_num.name}-{id_num.rev}.txt",
+ f'{id_num.name}-{id_num.rev}.txt',
+ ),
+ (
+ f"{id_num_two.name}.pdf",
+ f'{id_num_two.name}.pdf',
+ ),
+ (
+ f"{id_num_two.name}-{id_num_two.rev}.txt",
+ f'{id_num_two.name}-{id_num_two.rev}.txt',
+ ),
+ (
+ f"{id_plus.name}",
+ f'{id_plus.name}',
+ ),
+ (
+ f"{id_plus.name}-{id_plus.rev}.txt",
+ f'{id_plus.name}-{id_plus.rev}.txt',
+ ),
+ (
+ f"{id_plus_end.name}",
+ f'{id_plus_end.name}',
+ ),
+ (
+ f"{id_plus_end.name}-{id_plus_end.rev}.txt",
+ f'{id_plus_end.name}-{id_plus_end.rev}.txt',
+ ),
+ (
+ f"{id_dot.name}",
+ f'{id_dot.name}',
+ ),
+ (
+ f"{id_dot.name}-{id_dot.rev}.txt",
+ f'{id_dot.name}-{id_dot.rev}.txt',
+ ),
]
- # Some edge cases scraped from existing old draft names
- for name in [
- # "draft-odell-8+8", # This fails since + matches the right side of \b
- # "draft-durand-gse+", # same failure
- "draft-kim-xcast+-few-2-few",
- #"draft-ietf-pem-ansix9.17", # Fails because of not being greedy with . before txt
- ]:
- cases.append((name,f'{name}'))
-
for input, output in cases:
#debug.show("(urlize_ietf_docs(input),output)")
self.assertEqual(urlize_ietf_docs(input), output)
diff --git a/ietf/static/js/ietf.js b/ietf/static/js/ietf.js
index 503a93b97..f8cf82d9b 100644
--- a/ietf/static/js/ietf.js
+++ b/ietf/static/js/ietf.js
@@ -199,12 +199,15 @@ $(function () {
.find("h1:visible, h2:visible, h3:visible, h4:visible, h5:visible, h6:visible, .nav-heading:visible")
.not(".navskip")
.each(function () {
- // Some headings have complex HTML in them - only use first part in that case.
- const text = $(this)
+ // Some headings have line breaks in them - only use first line in that case.
+ const frag = $(this)
.html()
- .split("<")
- .shift()
- .trim();
+ .split("
$(x)
+ .text())
+ .join(" ");
if (text === undefined || text === "") {
// Nothing to do for empty headings.
diff --git a/ietf/templates/doc/document_ballot_content.html b/ietf/templates/doc/document_ballot_content.html
index fb612f371..54f73d8d4 100644
--- a/ietf/templates/doc/document_ballot_content.html
+++ b/ietf/templates/doc/document_ballot_content.html
@@ -105,7 +105,7 @@
{{ section.text }}{% endif %} + {% if num == "1.4" %}
{{ section.text|urlize_ietf_docs|linkify }}{% endif %} {% if num >= "2" and num < "5" %} {% if num == "2" %}
diff --git a/ietf/templates/ipr/details_history.html b/ietf/templates/ipr/details_history.html index aa1ea8f83..b8fecf780 100644 --- a/ietf/templates/ipr/details_history.html +++ b/ietf/templates/ipr/details_history.html @@ -49,7 +49,7 @@ {% endif %}
, which is illegal. Need to rework the snippeting. #} diff --git a/ietf/templates/ipr/details_view.html b/ietf/templates/ipr/details_view.html index c98329b9f..bdda164a6 100644 --- a/ietf/templates/ipr/details_view.html +++ b/ietf/templates/ipr/details_view.html @@ -5,7 +5,7 @@ {% block title %}IPR Details - {{ ipr.title }}{% endblock %} {% block pagehead %} + content="IPR disclosure{% if ipr.ipr_id %} #{{ ipr.ipr_id }}{% endif %}: {{ ipr.title }} ({{ ipr.time|date:'Y' }})"> {% endblock %} {% block content %} {% origin %} @@ -170,7 +170,13 @@ {% endif %}- {% if prev %} @@ -207,7 +213,9 @@{{ prev.holder_legal_name }} {% endif %} + {% if prev %} ++ {% if prev.holder_legal_name %} + {{ prev.holder_legal_name }} + {% endif %} + + {% endif %}Holder legal name {{ ipr.holder_legal_name }} {% if prev %} - {{ prev.holder_contact_name }} + {% if prev.holder_contact_name %} + {{ prev.holder_contact_name }} + {% endif %} {% endif %}@@ -220,7 +228,9 @@ {% if prev %} - {{ prev.holder_contact_email|linkify }} + {% if prev.holder_contact_email %} + {{ prev.holder_contact_email|linkify }} + {% endif %} {% endif %}@@ -233,14 +243,16 @@ {% if prev %} @@ -282,7 +294,9 @@- {{ prev.holder_contact_info|linebreaks }} + {% if prev.holder_contact_info %} + {{ prev.holder_contact_info|linkify|linebreaks }} + {% endif %} {% endif %}Holder contact info - {% if ipr.holder_contact_info %}{{ ipr.holder_contact_info|linebreaks }}{% endif %} + {% if ipr.holder_contact_info %}{{ ipr.holder_contact_info|linkify|linebreaks }}{% endif %} {% if prev %} - {{ prev.ietfer_name }} + {% if prev.ietfer_name %} + {{ prev.ietfer_name }} + {% endif %} {% endif %}@@ -295,7 +309,9 @@ {% if prev %} - {{ prev.ietfer_contact_email|linkify }} + {% if prev.ietfer_contact_email %} + {{ prev.ietfer_contact_email|linkify }} + {% endif %} {% endif %}@@ -308,14 +324,16 @@ {% if prev %} @@ -474,7 +492,9 @@- {{ prev.ietfer_contact_info|linebreaks }} + {% if prev.ietfer_contact_info %} + {{ prev.ietfer_contact_info|linkify|linebreaks }} + {% endif %} {% endif %}Other info - {{ ipr.ietfer_contact_info|linebreaks }} + {{ ipr.ietfer_contact_info|linkify|linebreaks }} {% if prev %} @@ -523,7 +543,9 @@- {{ prev.patent_info|linebreaks }} + {% if prev.patent_info %} + {{ prev.patent_info|urlize_ietf_docs|linkify|linebreaks }} + {% endif %} {% endif %}@@ -483,7 +503,7 @@ or Application/File number(s) - {{ ipr.patent_info|linebreaks }} + {{ ipr.patent_info|urlize_ietf_docs|linkify|linebreaks }} {% if prev %} - {{ prev.has_patent_pending|yesno:"Yes,No" }} + {% if prev.has_patent_pending %} + {{ prev.has_patent_pending|yesno:"Yes,No" }} + {% endif %} {% endif %}@@ -592,10 +614,12 @@ {% if prev %} - {% if prev.licensing.slug == "provided-later" %} - {{ prev.licensing.desc|slice:"2:"|slice:":117" }}) - {% else %} - {{ prev.licensing.desc|slice:"2:" }} + {% if prev.licensing.slug %} + {% if prev.licensing.slug == "provided-later" %} + {{ prev.licensing.desc|slice:"2:"|slice:":117" }}) + {% else %} + {{ prev.licensing.desc|slice:"2:" }} + {% endif %} {% endif %} {% endif %} @@ -613,7 +637,9 @@{% if prev %} - {{ prev.licensing_comments|default:"(No information submitted)"|linebreaks }} + {% if prev.licensing_comments %} + {{ prev.licensing_comments|default:"(No information submitted)"|urlize_ietf_docs|linkify|linebreaks }} + {% endif %} {% endif %}@@ -666,14 +692,16 @@ {% if prev %} @@ -714,7 +742,9 @@- {{ prev.statement|linebreaks }} + {% if prev.statement %} + {{ prev.statement|urlize_ietf_docs|linkify|linebreaks }} + {% endif %} {% endif %}Statement - {{ ipr.statement|linebreaks }} + {{ ipr.statement|urlize_ietf_docs|linkify|linebreaks }} {% if prev %} - {{ prev.submitter_name }} + {% if prev.submitter_name %} + {{ prev.submitter_name }} + {% endif %} {% endif %}@@ -727,7 +757,9 @@ {% if prev %} - {{ prev.submitter_email|linkify }} + {% if prev.submitter_email %} + {{ prev.submitter_email|linkify }} + {% endif %} {% endif %}@@ -775,7 +807,9 @@ {% if prev %} - {{ prev.notes|urlize_ietf_docs|linkify|linebreaks }} + {% if prev.notes %} + {{ prev.notes|urlize_ietf_docs|linkify|linebreaks }} + {% endif %} {% endif %}@@ -791,4 +825,4 @@ Only those sections of the relevant entry form where the submitter provided information are displayed above.
-{% endblock %} \ No newline at end of file +{% endblock %} diff --git a/ietf/templates/meeting/agenda.html b/ietf/templates/meeting/agenda.html index d4c9566b2..ea18c9579 100644 --- a/ietf/templates/meeting/agenda.html +++ b/ietf/templates/meeting/agenda.html @@ -140,13 +140,13 @@ {% if item.session_keyword %} @@ -268,7 +268,9 @@ {% endif %}- {{ item.session.historic_group.historic_parent.acronym }}+ {% if item.session.historic_group.historic_parent.acronym %} +{{ item.session.historic_group.historic_parent.acronym }}+ {% endif %}{% if item.session.historic_group %} diff --git a/ietf/templates/person/person_link.html b/ietf/templates/person/person_link.html index 7264bead9..a8e2da9d7 100644 --- a/ietf/templates/person/person_link.html +++ b/ietf/templates/person/person_link.html @@ -1,3 +1,6 @@ +{% if email and email == "system@datatracker.ietf.org" or name and name == "(System)" %} + (System) +{% else %} {% if email or name %} - {% endif %}{% else %}(None){% endif %} \ No newline at end of file + {% endif %}{% else %}(None){% endif %} +{% endif %} diff --git a/ietf/utils/test_runner.py b/ietf/utils/test_runner.py index eb380df6f..200ce047c 100644 --- a/ietf/utils/test_runner.py +++ b/ietf/utils/test_runner.py @@ -126,6 +126,11 @@ http = urllib3.PoolManager(retries=urllib3.Retry(99, redirect=False)) def vnu_validate(html, content_type="text/html", port=8888): "Pass the HTML to the vnu server running on the indicated port" + if "** No value found for " in html.decode(): + return json.dumps( + {"messages": [{"message": '"** No value found for" in source'}]} + ) + gzippeddata = gzip.compress(html) try: req = http.request( @@ -183,8 +188,7 @@ def vnu_filter_message(msg, filter_db_issues, filter_test_issues): if filter_test_issues and re.search( r"""Ceci\ n'est\ pas\ une\ URL| ^The\ '\w+'\ attribute\ on\ the\ '\w+'\ element\ is\ obsolete| - ^Section\ lacks\ heading| - is\ not\ in\ Unicode\ Normalization\ Form\ C""", + ^Section\ lacks\ heading""", msg["message"], flags=re.VERBOSE, ): @@ -193,7 +197,8 @@ def vnu_filter_message(msg, filter_db_issues, filter_test_issues): return re.search( r"""document\ is\ not\ mappable\ to\ XML\ 1| ^Attribute\ 'required'\ not\ allowed\ on\ element\ 'div'| - ^The\ 'type'\ attribute\ is\ unnecessary\ for\ JavaScript""", + ^The\ 'type'\ attribute\ is\ unnecessary\ for\ JavaScript| + is\ not\ in\ Unicode\ Normalization\ Form\ C""", msg["message"], flags=re.VERBOSE, ) diff --git a/ietf/utils/text.py b/ietf/utils/text.py index 1ef35cc31..9596d192d 100644 --- a/ietf/utils/text.py +++ b/ietf/utils/text.py @@ -2,13 +2,16 @@ # -*- coding: utf-8 -*- -import bleach +import bleach # type: ignore +import copy import email import re import textwrap import tlds import unicodedata +from django.core.validators import URLValidator +from django.core.exceptions import ValidationError from django.utils.functional import keep_lazy from django.utils.safestring import mark_safe @@ -17,14 +20,32 @@ import debug # pyflakes:ignore from .texescape import init as texescape_init, tex_escape_map tlds_sorted = sorted(tlds.tld_set, key=len, reverse=True) -protocols = bleach.sanitizer.ALLOWED_PROTOCOLS +protocols = copy.copy(bleach.sanitizer.ALLOWED_PROTOCOLS) protocols.append("ftp") # we still have some ftp links +validate_url = URLValidator() + + +def check_url_validity(attrs, new=False): + url = attrs[(None, 'href')] + try: + if url.startswith("http"): + validate_url(url) + except ValidationError: + return None + return attrs + + bleach_linker = bleach.Linker( + callbacks=[check_url_validity], url_re=bleach.linkifier.build_url_re(tlds=tlds_sorted, protocols=protocols), email_re=bleach.linkifier.build_email_re(tlds=tlds_sorted), # type: ignore parse_email=True ) +tags = copy.copy(bleach.sanitizer.ALLOWED_TAGS) +tags.remove("a") +bleach_cleaner = bleach.sanitizer.Cleaner(tags=tags, protocols=protocols) + @keep_lazy(str) def xslugify(value):