Skip to content

Instantly share code, notes, and snippets.

@stephenhouser
Forked from robwhess/gh_classroom_collect.py
Last active June 3, 2021 04:26
Show Gist options
  • Save stephenhouser/8096f09ce318ead52fe14220f978fed7 to your computer and use it in GitHub Desktop.
Save stephenhouser/8096f09ce318ead52fe14220f978fed7 to your computer and use it in GitHub Desktop.
Script to collect a set of assignments from GitHub Classroom.
#!/usr/bin/env python
#
# This is a simple python script to clone all of the repositories for an
# assignment managed via GitHub Classroom. It has a dependency on the
# requests module, so to use it, you must:
#
# pip install requests
#
# You can run the script with the '-h' option to get info on its usage.
#
import os
import re
import json
import textwrap
import subprocess as SP
import requests
from argparse import ArgumentParser
GRADING_TAG = '__graded_commit'
GRADING_BRANCH = '__grading_branch'
def clone_repos(repo_urls, output_dir, due_date=None):
"""
Clone a set GitHub repositories into a specified output directory.
:param repo_urls: A list of repository URLs to be cloned. The URLs should
have any needed authentication info embedded into them.
:param output_dir: The directory in which to save the cloned repos.
:param due_date: If not None, the last commit before this timestamp (in
ISO-8601 format) is checked out.
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
for i, repo_url in enumerate(repo_urls):
repo_dir = os.path.splitext(repo_url.rsplit('/', 1)[1])[0]
if not os.path.isdir(repo_dir):
print("== cloning %4d of %4d: %s" % (i + 1, len(repo_urls), repo_dir))
p = SP.Popen(['git', 'clone', repo_url, repo_dir],
stdout=SP.PIPE, stderr=SP.PIPE,
cwd=output_dir)
else:
print("== pulling %4d of %4d: %s" % (i + 1, len(repo_urls), repo_dir))
p = SP.Popen(['git', 'pull', 'origin', 'master'],
stdout=SP.PIPE, stderr=SP.PIPE,
cwd=(output_dir + '/' + repo_dir))
out, err = p.communicate()
if p.returncode != 0:
print(" == clone failed with this output:")
print(" == stdout:", out)
print(" == stderr:", err)
# If we have a due date specified, figure out the last commit before
# the due date. Otherwise, use the HEAD commit.
if due_date:
p = SP.Popen(['git', 'rev-list', '-n', '1', '--before', due_date, 'HEAD'],
stdout=SP.PIPE, stderr=SP.PIPE, cwd=(output_dir + '/' + repo_dir))
commit, _ = p.communicate()
commit = commit.strip()
else:
commit = 'HEAD'
# Tag the commit as being graded and check out a new grading branch.
#p = SP.Popen(['git', 'tag', '-a', GRADING_TAG, '-m', "Grading this commit.", commit],
# stdout=SP.PIPE, stderr=SP.PIPE, cwd=(output_dir + '/' + repo_dir))
#p.communicate()
#p = SP.Popen(['git', 'checkout', '-b', GRADING_BRANCH, commit],
# stdout=SP.PIPE, stderr=SP.PIPE, cwd=(output_dir + '/' + repo_dir))
#p.communicate()
def get_repo_urls(org, prefix, user, token):
"""
Gets a list of all URLS for GitHub repos within an organization starting
with a given prefix.
:param org: The GitHub organization.
:param prefix: The prefix by which to filter repo names.
:param user: The GitHub username with which to authorize.
:param token: The GitHub auth token/password with which to authorize.
"""
# Helper function to incorporate username and token into clone URL via
# regex matching (down below).
def repl(m):
_user = user if user else ''
_token = ':%s' % token if token else ''
_url_prefix = m.group(0)
if _user:
_url_prefix += _user + _token + '@'
return _url_prefix
org_repos_url = 'https://api.github.com/orgs/%s/repos' % org
params = {'per_page': 100}
page = 0
repo_urls = set()
while True:
print("== fetching repo list, page %d" % page)
# Request the current page of repos.
params['page'] = page
resp = requests.get(org_repos_url, params=params, auth=(user, token))
# For each repo whose name starts with prefix, add that repo's clone
# URL into the set after incorporating the username and token into it.
for repo in resp.json():
print("=== ", repo['name'], repo['ssh_url'])
if repo['name'].startswith(prefix):
repo_urls.add(repo['ssh_url'])
#repo_urls.add(re.sub(r'(https?://)', repl, repo['ssh_url']))
# Stop if this page of repos was empty.
if len(resp.json()) == 0:
break
page += 1
return list(repo_urls)
def main(args):
token = None
if args.token_file:
with open(args.token_file) as fh:
token = fh.read().strip()
repo_urls = get_repo_urls(args.ORG_NAME, args.ASSIGNMENT_PREFIX, args.user, token)
clone_repos(repo_urls, args.OUTPUT_DIR, args.due_date)
if __name__ == '__main__':
parser = ArgumentParser(description="Download GitHub Classroom repositories for a given assignment")
parser.add_argument('ORG_NAME', help="Organization name for GitHub Classroom")
parser.add_argument('ASSIGNMENT_PREFIX', help="Prefix string for the assignment.")
parser.add_argument('OUTPUT_DIR', help="Directory in which to output cloned repos.")
parser.add_argument('-d', '--due-date', help="Optional ISO-8601 timestamp corresponding to the assignment's due date.")
parser.add_argument('-u', '--user', help="GitHub username.")
parser.add_argument('-t', '--token-file', help="File containing GitHub authorization token/password.")
args = parser.parse_args()
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment