Gather actual repos to backup takinging user and organizational owners into account, iterating through owner repos when necessary using the github api.

- Legacy-Id: 18384
This commit is contained in:
Robert Sparks 2020-08-19 19:07:29 +00:00
parent 2af4246225
commit f4fd4b1921
2 changed files with 51 additions and 28 deletions

View file

@ -1,45 +1,67 @@
# Copyright The IETF Trust 2020, All Rights Reserved # Copyright The IETF Trust 2020, All Rights Reserved
import github3
from collections import Counter
from urllib.parse import urlparse
from django.conf import settings
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db.models import F
from ietf.doc.models import DocExtResource from ietf.doc.models import DocExtResource
from ietf.group.models import GroupExtResource from ietf.group.models import GroupExtResource
from ietf.person.models import PersonExtResource from ietf.person.models import PersonExtResource
# TODO: Think more about submodules. This currently will only take top level repos, with the assumption that the clone will include arguments to grab all the submodules.
# As a consequence, we might end up pulling more than we need (or that the org or user expected)
# Make sure this is what we want.
class Command(BaseCommand): class Command(BaseCommand):
help = ('Locate information about gihub repositories to backup') help = ('Locate information about github repositories to backup')
def handle(self, *args, **options): def handle(self, *args, **options):
info_dict = {} if not settings.GITHUB_BACKUP_API_KEY:
# TODO: complain
return
github = github3.login(token = settings.GITHUB_BACKUP_API_KEY)
owners = dict()
repos = set()
for repo in DocExtResource.objects.filter(name__slug='github_repo'): for cls in (DocExtResource, GroupExtResource, PersonExtResource):
if not repo.value.endswith('/'): for res in cls.objects.filter(name_id__in=('github_repo','github_org')):
repo.value += '/' path_parts = urlparse(res.value).path.strip('/').split('/')
if repo not in info_dict: if not path_parts or not path_parts[0]:
info_dict[repo.value] = [] continue
for username in DocExtResource.objects.filter(name__slug='github_username', doc=F('doc')):
info_dict[repo.value].push(username.value)
for repo in GroupExtResource.objects.filter(name__slug='github_repo'): owner = path_parts[0]
if not repo.value.endswith('/'):
repo.value += '/'
if repo not in info_dict:
info_dict[repo.value] = []
for username in GroupExtResource.objects.filter(name__slug='github_username', group=F('group')):
info_dict[repo.value].push(username.value)
for repo in PersonExtResource.objects.filter(name__slug='github_repo'): if owner not in owners:
if not repo.value.endswith('/'): try:
repo.value += '/' gh_owner = github.user(username=owner)
if repo not in info_dict: owners[owner] = gh_owner
info_dict[repo.value] = [] except github3.exceptions.NotFoundError:
for username in PersonExtResource.objects.filter(name__slug='github_username', person=F('person')): continue
info_dict[repo.value].push(username.value)
#print (json.dumps(info_dict)) if gh_owner.type in ('User', 'Organization'):
# For now, all we need are the repo names if len(path_parts) > 1:
for name in info_dict.keys(): repo = path_parts[1]
print(name) if (owner, repo) not in repos:
try:
_ = github.repository(owner,repo)
repos.add( (owner, repo) )
except github3.exceptions.NotFoundError:
continue
else:
for repo in github.repositories_by(owner):
repos.add( (owner, repo.name) )
owner_types = Counter([owners[owner].type for owner in owners])
print ("Owners:")
for key in owner_types:
print(" ",key,':',owner_types[key])
print ("Repositories:", len(repos))
for repo in sorted(repos):
print(" https://github.com/%s/%s" % repo )

View file

@ -27,6 +27,7 @@ django-widget-tweaks>=1.4.2
docutils>=0.12,!=0.15 docutils>=0.12,!=0.15
factory-boy>=2.9.0,<3 factory-boy>=2.9.0,<3
Faker>=0.8.8,!=0.8.9,!=0.8.10 # from factory-boy # Faker 0.8.9,0.8.10 sometimes return string names instead of unicode. Faker>=0.8.8,!=0.8.9,!=0.8.10 # from factory-boy # Faker 0.8.9,0.8.10 sometimes return string names instead of unicode.
github3.py>=1.2
hashids>=1.1.0 hashids>=1.1.0
html2text>=2019.8.11 html2text>=2019.8.11
html5lib>=1.0.1 html5lib>=1.0.1