Last active
March 20, 2021 06:57
-
-
Save hashlash/410399c096cfb7b07cdb47736c7886ad to your computer and use it in GitHub Desktop.
pkg.go.dev repo imported by sorted by stats
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import re | |
import requests | |
import uuid | |
# No OAuth scope required | |
# Just create a personal access token on https://github.com/settings/tokens/new | |
API_KEY = 'api key example' | |
QUERY_FORMAT = ''' | |
{key}: repository(name: "{name}", owner: "{owner}") {{ | |
url | |
forkCount | |
watchers {{ | |
totalCount | |
}} | |
stargazers {{ | |
totalCount | |
}} | |
}} | |
''' | |
# request each 500 window (or smaller). 1000 gave me error (could be timeout) | |
def get_dependents_stats(package, chunk_size=500): | |
importedby_url = 'https://pkg.go.dev/{}?tab=importedby'.format(package) | |
godoc = requests.get(importedby_url).content.decode() | |
github_repos = list(set(re.findall('github.com\/[\w\-]+\/[\w\-]+', godoc))) | |
result = dict() | |
for i in range(0, len(github_repos), chunk_size): | |
print('Processing data from index {} to {}'.format(i, i+chunk_size)) | |
# graphql require alias to match `/[_A-Za-z][_0-9A-Za-z]*/ http://facebook.github.io/graphql/#sec-Names | |
chunk = {'id_'+str(uuid.uuid4()).replace('-', '_'): repo for repo in github_repos[i:i+chunk_size]} | |
query = ''.join( | |
QUERY_FORMAT.format(key=key, owner=owner, name=name) | |
for key, owner, name in map(lambda kv: (kv[0], *kv[1].split('/')[1:]), chunk.items()) | |
) | |
query = '{\n' + query + '\n}' | |
response = requests.post( | |
'https://api.github.com/graphql', | |
json={'query': query}, | |
headers={'Authorization': 'Bearer {}'.format(API_KEY)} | |
) | |
try: | |
response.raise_for_status() | |
result.update({chunk[k]: v for k, v in response.json()['data'].items()}) | |
except requests.HTTPError as e: | |
raise requests.HTTPError(str(e) + '\n' + response.headers) | |
except KeyError as e: | |
raise KeyError(str(e) + '\n' + response.json()) | |
# github's graphql api will return null data for unresolved repo | |
return {k: v for k, v in result.items() if v} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib.request | |
url = 'https://gist.githubusercontent.com/hashlash/410399c096cfb7b07cdb47736c7886ad/raw/66165429996ed02f4b9c03ad49ec5f6e3fa3e3e7/graphql.py' | |
a = urllib.request.urlopen(url) | |
exec(a.read()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
stats = get_dependents_stats('github.com/gorilla/mux') | |
with open('output.json', 'w') as f: | |
json.dump(stats, f) | |
repos = {v['url']: v for v in stats.values()} | |
by_forks = sorted( | |
((k, v['forkCount']) for k, v in repos.items()), | |
reverse=True, | |
key=lambda xy: xy[1] | |
) | |
by_stars = sorted( | |
((k, v['stargazers']['totalCount']) for k, v in repos.items()), | |
reverse=True, | |
key=lambda xy: xy[1] | |
) | |
by_watch = sorted( | |
((k, v['watchers']['totalCount']) for k, v in repos.items()), | |
reverse=True, | |
key=lambda xy: xy[1] | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment