From 2b70735fd2a08dbf04e460fcd342416cd51344a6 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Mon, 17 Aug 2020 23:50:22 +0000 Subject: [PATCH] Improved the classification of some github related external resources. Tightened validation of new resource values. Commit ready to merge. - Legacy-Id: 18378 --- ietf/doc/migrations/0036_orgs_vs_repos.py | 34 +++++++++++++++++++++ ietf/doc/tests_draft.py | 4 ++- ietf/group/migrations/0036_orgs_vs_repos.py | 34 +++++++++++++++++++++ ietf/utils/validators.py | 7 +++-- 4 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 ietf/doc/migrations/0036_orgs_vs_repos.py create mode 100644 ietf/group/migrations/0036_orgs_vs_repos.py diff --git a/ietf/doc/migrations/0036_orgs_vs_repos.py b/ietf/doc/migrations/0036_orgs_vs_repos.py new file mode 100644 index 000000000..37a0db0d2 --- /dev/null +++ b/ietf/doc/migrations/0036_orgs_vs_repos.py @@ -0,0 +1,34 @@ +# Copyright The IETF Trust 2020, All Rights Reserved + +from urllib.parse import urlparse +from django.db import migrations + +def categorize(url): + # This will categorize a few urls pointing into files in a repo as a repo, but that's better than calling them an org + element_count = len(urlparse(url).path.strip('/').split('/')) + if element_count < 1: + print("Bad github resource:",url) + return 'github_org' if element_count == 1 else 'github_repo' + +def forward(apps, schema_editor): + DocExtResource = apps.get_model('doc','DocExtResource') + + for resource in DocExtResource.objects.filter(name__slug__in=('github_org','github_repo')): + category = categorize(resource.value) + if resource.name_id != category: + resource.name_id = category + resource.save() + +def reverse(apps, schema_editor): + # Intentionally don't try to return to former worse state + pass + +class Migration(migrations.Migration): + + dependencies = [ + ('doc', '0035_populate_docextresources'), + ] + + operations = [ + migrations.RunPython(forward, reverse), + ] diff --git a/ietf/doc/tests_draft.py b/ietf/doc/tests_draft.py index 945d0e9bd..6fca3e02d 100644 --- a/ietf/doc/tests_draft.py +++ b/ietf/doc/tests_draft.py @@ -1141,6 +1141,7 @@ class IndividualInfoFormsTests(TestCase): badlines = ( 'github_repo https://github3.com/some/repo', + 'github_org https://github.com/not/an_org', 'github_notify badaddr', 'website /not/a/good/url', 'notavalidtag blahblahblah', @@ -1154,6 +1155,7 @@ class IndividualInfoFormsTests(TestCase): goodlines = """ github_repo https://github.com/some/repo Some display text + github_org https://github.com/totally_some_org github_username githubuser webpage http://example.com/http/is/fine """ @@ -1163,7 +1165,7 @@ class IndividualInfoFormsTests(TestCase): doc = Document.objects.get(name=self.docname) self.assertEqual(doc.latest_event(DocEvent,type="changed_document").desc[:35], 'Changed document external resources') self.assertIn('github_username githubuser', doc.latest_event(DocEvent,type="changed_document").desc) - self.assertEqual(doc.docextresource_set.count(), 3) + self.assertEqual(doc.docextresource_set.count(), 4) self.assertEqual(doc.docextresource_set.get(name__slug='github_repo').display_name, 'Some display text') self.assertIn(doc.docextresource_set.first().name.slug,str(doc.docextresource_set.first())) diff --git a/ietf/group/migrations/0036_orgs_vs_repos.py b/ietf/group/migrations/0036_orgs_vs_repos.py new file mode 100644 index 000000000..b764b86f4 --- /dev/null +++ b/ietf/group/migrations/0036_orgs_vs_repos.py @@ -0,0 +1,34 @@ +# Copyright The IETF Trust 2020, All Rights Reserved + +from urllib.parse import urlparse +from django.db import migrations + +def categorize(url): + # This will categorize a few urls pointing into files in a repo as a repo, but that's better than calling them an org + element_count = len(urlparse(url).path.strip('/').split('/')) + if element_count < 1: + print("Bad github resource:",url) + return 'github_org' if element_count == 1 else 'github_repo' + +def forward(apps, schema_editor): + GroupExtResource = apps.get_model('group','GroupExtResource') + + for resource in GroupExtResource.objects.filter(name_id__in=('github_org','github_repo',)): + category = categorize(resource.value) + if resource.name_id != category: + resource.name_id = category + resource.save() + +def reverse(apps, schema_editor): + # Intentionally don't try to return to former worse state + pass + +class Migration(migrations.Migration): + + dependencies = [ + ('group', '0035_add_research_area_groups'), + ] + + operations = [ + migrations.RunPython(forward, reverse), + ] diff --git a/ietf/utils/validators.py b/ietf/utils/validators.py index 6bac6fb81..fdda6e5bf 100644 --- a/ietf/utils/validators.py +++ b/ietf/utils/validators.py @@ -170,13 +170,16 @@ validate_xmpp = XMPPURLValidator() def validate_external_resource_value(name, value): """ validate a resource value using its name's properties """ - if name.type.slug == 'url': + if name.type_id == 'url': if name.slug in ( 'github_org', 'github_repo' ): validate_http_url(value) - hostname = urlparse(value).netloc.lower() + parsed_url = urlparse(value) + hostname = parsed_url.netloc.lower() if not any([ hostname.endswith(x) for x in ('github.com','github.io' ) ]): raise ValidationError('URL must be a github url') + if name.slug == 'github_org' and len(parsed_url.path.strip('/').split('/'))!=1: + raise ValidationError('github path has too many components to be an organization URL') elif name.slug == 'jabber_room': validate_xmpp(value) else: