Skip to content

Instantly share code, notes, and snippets.

@Totktonada
Created March 8, 2023 00:59
Show Gist options
  • Save Totktonada/e84a4b2eb3ed70b6059d8fe9c010e4f2 to your computer and use it in GitHub Desktop.
Save Totktonada/e84a4b2eb3ed70b6059d8fe9c010e4f2 to your computer and use it in GitHub Desktop.
Dump GitHub issues into a CSV file

Overview

The script dumps issue list from GitHub into a CSV file.

Requirements

  • Python 3 (Python 2 should also work, but not tested)
  • requests

How to use

Add a token on Personal access token GitHub page, give repo:public_repo access and copy the token to token.txt.

If you need to access private repositories, enable full repo scope.

Run like so:

./issues_csv.py tarantool/doc

Or, if you need issue bodies:

./issues_csv.py tarantool/doc --body
#!/usr/bin/env python
from __future__ import print_function
import os
import re
import sys
import argparse
import requests
import csv
def status(pages, pages_all, issues, url):
print('[pages {:2} / {:2}] [issues {:4} / ??] Downloading {}'.format(
pages, pages_all, issues, url), file=sys.stderr)
parser = argparse.ArgumentParser(description='Show open issues statistics')
parser.add_argument('repo_path', type=str, help='owner/repository')
parser.add_argument('--body', action='store_true',
help='Whether to store issue body')
args = parser.parse_args()
if '/' not in args.repo_path:
raise ValueError('repo_path must be in the form owner/repository')
owner, repo = args.repo_path.split('/', 1)
store_body = args.body
token_file = 'token.txt'
if not os.path.exists(token_file):
raise RuntimeError('{file} is not exists'.format(file=token_file))
if not os.path.isfile(token_file):
raise RuntimeError('{file} is not a regular file'.format(file=token_file))
with open(token_file, 'r') as f:
token = f.read().strip()
session = requests.Session()
headers = {
'Accept': 'application/vnd.github.v3+json',
'Authorization': 'token ' + token,
}
params = {
'state': 'all',
'direction': 'asc',
'per_page': 100,
}
url = 'https://api.github.com/repos/{}/{}/issues'.format(owner, repo)
status(0, '??', 0, url)
r = session.get(url, headers=headers, params=params)
r.raise_for_status()
data = []
data.extend(r.json())
# > The link header will be omitted if the endpoint does not support pagination
# > or *if all results fit on a single page.*
#
# https://docs.github.com/en/rest/guides/using-pagination-in-the-rest-api?apiVersion=2022-11-28
if r.links:
pages_all_str = '??'
last_url = r.links['last']['url']
pages_all_match = re.search(r'[^_]page=(\d+)', last_url)
if pages_all_match:
pages_all_str = pages_all_match.group(1)
pages = 1
while 'next' in r.links:
next_url = r.links['next']['url']
status(pages, pages_all_str, len(data), next_url)
r = session.get(next_url, headers=headers)
r.raise_for_status()
data.extend(r.json())
pages += 1
if store_body:
output_file = '{owner}_{repo}_with_bodies.csv'.format(owner=owner,
repo=repo)
else:
output_file = '{owner}_{repo}.csv'.format(owner=owner, repo=repo)
with open(output_file, 'w') as f:
header = [
'owner',
'repo',
'state',
'id',
'title',
'labels',
'created_at',
'updated_at',
'url',
]
if store_body:
header.append('body')
print(','.join(header), file=f)
w = csv.writer(f)
for issue in data:
if 'pull_request' in issue:
continue
labels = []
for label_def in issue['labels']:
label_name = label_def['name']
if label_name not in labels:
labels.append(label_name)
data = [
owner,
repo,
issue['state'],
'#{}'.format(issue['number']),
issue['title'],
';'.join(labels),
issue['created_at'],
issue['updated_at'],
issue['html_url'],
]
if store_body:
data.append(issue['body'])
w.writerow(data)
print('Written {}'.format(output_file), file=sys.stderr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment