Created
June 28, 2020 17:12
-
-
Save urbanecm/c45c05d7ac8605163b056a574d5718c9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#-*- coding: utf-8 -*- | |
import requests | |
import mwparserfromhell | |
import toolforge | |
import sys | |
conn = toolforge.connect(sys.argv[1]) # cswiki | |
API_URL = sys.argv[2] # https://cs.wikipedia.org/w/api.php | |
with conn.cursor() as cur: | |
cur.execute('select ct_rev_id from change_tag where ct_tag_id=(select ctd_id from change_tag_def where ctd_name="mentorship module question")') | |
edits = cur.fetchall() | |
print('Diff;Username;Question') | |
for row in edits: | |
rev_id = row[0] | |
r = requests.get(API_URL, params={ | |
"action": "query", | |
"format": "json", | |
"prop": "revisions", | |
"revids": rev_id, | |
"rvprop": "content|user" | |
}) | |
data = r.json()['query'].get('pages') | |
if data is None: | |
continue | |
data = data[list(data.keys())[0]]['revisions'][0] | |
text = data.get('*') | |
if text is None: | |
continue | |
code = mwparserfromhell.parse(text) | |
question = str(code.get_sections(include_headings=False)[-1]).replace('\n', ' ').strip() | |
#print('|-\n| [[Special:Diff/%s]] || %s || %s' % (rev_id, data['user'], question)) | |
print('%s;%s;%s' % (rev_id, data['user'], question)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment