fix: improve ietf doc string urlization. Fixes #3828. (#3831)

* fix: improve ietf doc string urlization. Fixes #3828.

* fix: dont urlize names immediately after - or /. Avoids urizing in the middle of urls.

* fix: cover some additional corners when urlifying document names.

* chore: remove unnecessary code complication
This commit is contained in:
Robert Sparks 2022-04-15 15:43:32 -05:00 committed by GitHub
parent 23fdf61ed4
commit eef29cf35d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 73 additions and 3 deletions

View file

@ -185,6 +185,12 @@ def rfceditor_info_url(rfcnum : str):
"""Link to the RFC editor info page for an RFC"""
return urljoin(settings.RFC_EDITOR_INFO_BASE_URL, f'rfc{rfcnum}')
def link_non_charter_doc_match(match):
if len(match[3])==2 and match[3].isdigit():
return f'<a href="/doc/{match[2][:-1]}/{match[3]}/">{match[0]}</a>'
else:
return f'<a href="/doc/{match[2]}{match[3]}/">{match[0]}</a>'
@register.filter(name='urlize_ietf_docs', is_safe=True, needs_autoescape=True)
def urlize_ietf_docs(string, autoescape=None):
"""
@ -192,8 +198,31 @@ def urlize_ietf_docs(string, autoescape=None):
"""
if autoescape and not isinstance(string, SafeData):
string = escape(string)
exp1 = r"\b(charter-(?:[\d\w\.+]+-)*)(\d\d-\d\d)(\.txt)?\b"
exp2 = r"\b(charter-(?:[\d\w\.+]+-)*)(\d\d)(\.txt)?\b"
if re.search(exp1, string):
string = re.sub(
exp1,
lambda x: f'<a href="/doc/{x[1][:-1]}/{x[2]}/">{x[0]}</a>',
string,
flags=re.IGNORECASE | re.ASCII,
)
elif re.search(exp2, string):
string = re.sub(
exp2,
lambda x: f'<a href="/doc/{x[1][:-1]}/{x[2]}/">{x[0]}</a>',
string,
flags=re.IGNORECASE | re.ASCII,
)
string = re.sub(
r"\b((RFC|BCP|STD|FYI|(?:draft-|bofreq-|conflict-review-|status-change-|charter-)[-\d\w.+]+)\s*0*(\d+))\b",
r"\b(?<![/-])(((?:draft-|bofreq-|conflict-review-|status-change-)(?:[\d\w\.+]+-)*)([\d\w\.+]+?)(\.txt)?)\b",
link_non_charter_doc_match,
string,
flags=re.IGNORECASE | re.ASCII,
)
string = re.sub(
# r"\b((RFC|BCP|STD|FYI|(?:draft-|bofreq-|conflict-review-|status-change-|charter-)[-\d\w.+]+)\s*0*(\d+))\b",
r"\b(?<!-)((RFC|BCP|STD|FYI)\s*0*(\d+))\b",
lambda x: f'<a href="/doc/{x[2].strip().lower()}{x[3]}/">{x[1]}</a>',
string,
flags=re.IGNORECASE | re.ASCII,

View file

@ -1,6 +1,9 @@
# Copyright The IETF Trust 2022, All Rights Reserved
from ietf.doc.templatetags.ietf_filters import urlize_ietf_docs
from ietf.utils.test_utils import TestCase
import debug # pyflakes: ignore
# TODO: most other filters need test cases, too
@ -18,10 +21,48 @@ class IetfFiltersTests(TestCase):
("Rfc 02119", '<a href="/doc/rfc2119/">Rfc 02119</a>'),
("draft-abc-123", '<a href="/doc/draft-abc-123/">draft-abc-123</a>'),
(
"draft-ietf-rfc9999-bis-01",
'<a href="/doc/draft-ietf-rfc9999-bis-01/">draft-ietf-rfc9999-bis-01</a>',
"draft-ietf-rfc9999-bis-01.txt",
'<a href="/doc/draft-ietf-rfc9999-bis/01/">draft-ietf-rfc9999-bis-01.txt</a>',
),
(
"foo RFC 9999 draft-ietf-rfc9999-bis-01 bar",
'foo <a href="/doc/rfc9999/">RFC 9999</a> <a href="/doc/draft-ietf-rfc9999-bis/01/">draft-ietf-rfc9999-bis-01</a> bar',
),
(
"New version available: <b>draft-bryan-sipping-p2p-03.txt</b>",
'New version available: <b><a href="/doc/draft-bryan-sipping-p2p/03/">draft-bryan-sipping-p2p-03.txt</a></b>',
),
(
"New version available: <b>charter-ietf-6man-04.txt</b>",
'New version available: <b><a href="/doc/charter-ietf-6man/04/">charter-ietf-6man-04.txt</a></b>'
),
(
"New version available: <b>charter-ietf-6man-03-07.txt</b>",
'New version available: <b><a href="/doc/charter-ietf-6man/03-07/">charter-ietf-6man-03-07.txt</a></b>'
),
(
"repository https://github.com/tlswg/draft-ietf-tls-ticketrequest",
'repository https://github.com/tlswg/draft-ietf-tls-ticketrequest'
),
(
"draft-madanapalli-nd-over-802.16-problems",
'<a href="/doc/draft-madanapalli-nd-over-802.16-problems/">draft-madanapalli-nd-over-802.16-problems</a>'
),
(
"draft-madanapalli-nd-over-802.16-problems-02.txt",
'<a href="/doc/draft-madanapalli-nd-over-802.16-problems/02/">draft-madanapalli-nd-over-802.16-problems-02.txt</a>'
)
]
# Some edge cases scraped from existing old draft names
for name in [
# "draft-odell-8+8", # This fails since + matches the right side of \b
# "draft-durand-gse+", # same failure
"draft-kim-xcast+-few-2-few",
#"draft-ietf-pem-ansix9.17", # Fails because of not being greedy with . before txt
]:
cases.append((name,f'<a href="/doc/{name}/">{name}</a>'))
for input, output in cases:
#debug.show("(urlize_ietf_docs(input),output)")
self.assertEqual(urlize_ietf_docs(input), output)