From f4fd4b1921b58c51160513520120fd88dfe12e66 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 19 Aug 2020 19:07:29 +0000 Subject: [PATCH 1/2] Gather actual repos to backup takinging user and organizational owners into account, iterating through owner repos when necessary using the github api. - Legacy-Id: 18384 --- .../commands/find_github_backup_info.py | 78 ++++++++++++------- requirements.txt | 1 + 2 files changed, 51 insertions(+), 28 deletions(-) diff --git a/ietf/doc/management/commands/find_github_backup_info.py b/ietf/doc/management/commands/find_github_backup_info.py index 19d95e75a..2ce36cfd5 100644 --- a/ietf/doc/management/commands/find_github_backup_info.py +++ b/ietf/doc/management/commands/find_github_backup_info.py @@ -1,45 +1,67 @@ # Copyright The IETF Trust 2020, All Rights Reserved + +import github3 + +from collections import Counter +from urllib.parse import urlparse + +from django.conf import settings from django.core.management.base import BaseCommand -from django.db.models import F from ietf.doc.models import DocExtResource from ietf.group.models import GroupExtResource from ietf.person.models import PersonExtResource +# TODO: Think more about submodules. This currently will only take top level repos, with the assumption that the clone will include arguments to grab all the submodules. +# As a consequence, we might end up pulling more than we need (or that the org or user expected) +# Make sure this is what we want. + class Command(BaseCommand): - help = ('Locate information about gihub repositories to backup') + help = ('Locate information about github repositories to backup') def handle(self, *args, **options): - info_dict = {} + if not settings.GITHUB_BACKUP_API_KEY: + # TODO: complain + return + github = github3.login(token = settings.GITHUB_BACKUP_API_KEY) + owners = dict() + repos = set() - for repo in DocExtResource.objects.filter(name__slug='github_repo'): - if not repo.value.endswith('/'): - repo.value += '/' - if repo not in info_dict: - info_dict[repo.value] = [] - for username in DocExtResource.objects.filter(name__slug='github_username', doc=F('doc')): - info_dict[repo.value].push(username.value) + for cls in (DocExtResource, GroupExtResource, PersonExtResource): + for res in cls.objects.filter(name_id__in=('github_repo','github_org')): + path_parts = urlparse(res.value).path.strip('/').split('/') + if not path_parts or not path_parts[0]: + continue - for repo in GroupExtResource.objects.filter(name__slug='github_repo'): - if not repo.value.endswith('/'): - repo.value += '/' - if repo not in info_dict: - info_dict[repo.value] = [] - for username in GroupExtResource.objects.filter(name__slug='github_username', group=F('group')): - info_dict[repo.value].push(username.value) + owner = path_parts[0] - for repo in PersonExtResource.objects.filter(name__slug='github_repo'): - if not repo.value.endswith('/'): - repo.value += '/' - if repo not in info_dict: - info_dict[repo.value] = [] - for username in PersonExtResource.objects.filter(name__slug='github_username', person=F('person')): - info_dict[repo.value].push(username.value) + if owner not in owners: + try: + gh_owner = github.user(username=owner) + owners[owner] = gh_owner + except github3.exceptions.NotFoundError: + continue - #print (json.dumps(info_dict)) - # For now, all we need are the repo names - for name in info_dict.keys(): - print(name) + if gh_owner.type in ('User', 'Organization'): + if len(path_parts) > 1: + repo = path_parts[1] + if (owner, repo) not in repos: + try: + _ = github.repository(owner,repo) + repos.add( (owner, repo) ) + except github3.exceptions.NotFoundError: + continue + else: + for repo in github.repositories_by(owner): + repos.add( (owner, repo.name) ) + + owner_types = Counter([owners[owner].type for owner in owners]) + print ("Owners:") + for key in owner_types: + print(" ",key,':',owner_types[key]) + print ("Repositories:", len(repos)) + for repo in sorted(repos): + print(" https://github.com/%s/%s" % repo ) diff --git a/requirements.txt b/requirements.txt index eff3d6e61..e07f360d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,6 +27,7 @@ django-widget-tweaks>=1.4.2 docutils>=0.12,!=0.15 factory-boy>=2.9.0,<3 Faker>=0.8.8,!=0.8.9,!=0.8.10 # from factory-boy # Faker 0.8.9,0.8.10 sometimes return string names instead of unicode. +github3.py>=1.2 hashids>=1.1.0 html2text>=2019.8.11 html5lib>=1.0.1 From ba7e1f3c6aa1b3bfc297d16c2f47637834a038d6 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Fri, 28 Aug 2020 15:28:22 +0000 Subject: [PATCH 2/2] changes to improve interfacing with the backup scripts - Legacy-Id: 18442 --- .../commands/find_github_backup_info.py | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/ietf/doc/management/commands/find_github_backup_info.py b/ietf/doc/management/commands/find_github_backup_info.py index 2ce36cfd5..dd371d088 100644 --- a/ietf/doc/management/commands/find_github_backup_info.py +++ b/ietf/doc/management/commands/find_github_backup_info.py @@ -7,7 +7,7 @@ from collections import Counter from urllib.parse import urlparse from django.conf import settings -from django.core.management.base import BaseCommand +from django.core.management.base import BaseCommand, CommandError from ietf.doc.models import DocExtResource from ietf.group.models import GroupExtResource @@ -20,11 +20,13 @@ from ietf.person.models import PersonExtResource class Command(BaseCommand): help = ('Locate information about github repositories to backup') + def add_arguments(self, parser): + parser.add_argument('--verbose', dest='verbose', action='store_true', help='Show counts of types of repositories') + def handle(self, *args, **options): - if not settings.GITHUB_BACKUP_API_KEY: - # TODO: complain - return + if not (hasattr(settings,'GITHUB_BACKUP_API_KEY') and settings.GITHUB_BACKUP_API_KEY): + raise CommandError("ERROR: can't find GITHUB_BACKUP_API_KEY") # TODO: at >= py3.1, use returncode github = github3.login(token = settings.GITHUB_BACKUP_API_KEY) owners = dict() @@ -59,9 +61,14 @@ class Command(BaseCommand): repos.add( (owner, repo.name) ) owner_types = Counter([owners[owner].type for owner in owners]) - print ("Owners:") - for key in owner_types: - print(" ",key,':',owner_types[key]) - print ("Repositories:", len(repos)) - for repo in sorted(repos): - print(" https://github.com/%s/%s" % repo ) + if options['verbose']: + self.stdout.write("Owners:") + for key in owner_types: + self.stdout.write(" %s: %s"%(key,owner_types[key])) + self.stdout.write("Repositories: %d" % len(repos)) + for repo in sorted(repos): + self.stdout.write(" https://github.com/%s/%s" % repo ) + else: + for repo in sorted(repos): + self.stdout.write("%s/%s" % repo ) +