Gather actual repos to backup takinging user and organizational owners into account, iterating through owner repos when necessary using the github api.

- Legacy-Id: 18384
This commit is contained in:
Robert Sparks 2020-08-19 19:07:29 +00:00
parent 2af4246225
commit f4fd4b1921
2 changed files with 51 additions and 28 deletions

View file

@ -1,45 +1,67 @@
# Copyright The IETF Trust 2020, All Rights Reserved
import github3
from collections import Counter
from urllib.parse import urlparse
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db.models import F
from ietf.doc.models import DocExtResource
from ietf.group.models import GroupExtResource
from ietf.person.models import PersonExtResource
# TODO: Think more about submodules. This currently will only take top level repos, with the assumption that the clone will include arguments to grab all the submodules.
# As a consequence, we might end up pulling more than we need (or that the org or user expected)
# Make sure this is what we want.
class Command(BaseCommand):
help = ('Locate information about gihub repositories to backup')
help = ('Locate information about github repositories to backup')
def handle(self, *args, **options):
info_dict = {}
if not settings.GITHUB_BACKUP_API_KEY:
# TODO: complain
return
github = github3.login(token = settings.GITHUB_BACKUP_API_KEY)
owners = dict()
repos = set()
for repo in DocExtResource.objects.filter(name__slug='github_repo'):
if not repo.value.endswith('/'):
repo.value += '/'
if repo not in info_dict:
info_dict[repo.value] = []
for username in DocExtResource.objects.filter(name__slug='github_username', doc=F('doc')):
info_dict[repo.value].push(username.value)
for cls in (DocExtResource, GroupExtResource, PersonExtResource):
for res in cls.objects.filter(name_id__in=('github_repo','github_org')):
path_parts = urlparse(res.value).path.strip('/').split('/')
if not path_parts or not path_parts[0]:
continue
for repo in GroupExtResource.objects.filter(name__slug='github_repo'):
if not repo.value.endswith('/'):
repo.value += '/'
if repo not in info_dict:
info_dict[repo.value] = []
for username in GroupExtResource.objects.filter(name__slug='github_username', group=F('group')):
info_dict[repo.value].push(username.value)
owner = path_parts[0]
for repo in PersonExtResource.objects.filter(name__slug='github_repo'):
if not repo.value.endswith('/'):
repo.value += '/'
if repo not in info_dict:
info_dict[repo.value] = []
for username in PersonExtResource.objects.filter(name__slug='github_username', person=F('person')):
info_dict[repo.value].push(username.value)
if owner not in owners:
try:
gh_owner = github.user(username=owner)
owners[owner] = gh_owner
except github3.exceptions.NotFoundError:
continue
#print (json.dumps(info_dict))
# For now, all we need are the repo names
for name in info_dict.keys():
print(name)
if gh_owner.type in ('User', 'Organization'):
if len(path_parts) > 1:
repo = path_parts[1]
if (owner, repo) not in repos:
try:
_ = github.repository(owner,repo)
repos.add( (owner, repo) )
except github3.exceptions.NotFoundError:
continue
else:
for repo in github.repositories_by(owner):
repos.add( (owner, repo.name) )
owner_types = Counter([owners[owner].type for owner in owners])
print ("Owners:")
for key in owner_types:
print(" ",key,':',owner_types[key])
print ("Repositories:", len(repos))
for repo in sorted(repos):
print(" https://github.com/%s/%s" % repo )

View file

@ -27,6 +27,7 @@ django-widget-tweaks>=1.4.2
docutils>=0.12,!=0.15
factory-boy>=2.9.0,<3
Faker>=0.8.8,!=0.8.9,!=0.8.10 # from factory-boy # Faker 0.8.9,0.8.10 sometimes return string names instead of unicode.
github3.py>=1.2
hashids>=1.1.0
html2text>=2019.8.11
html5lib>=1.0.1