Last active
March 9, 2021 18:50
-
-
Save mfa/0daae813055b59672c7eb7878e1ec11d to your computer and use it in GitHub Desktop.
Download a csv with uuids from myax
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# needs: Python 3.6, requests, tqdm | |
import asyncio | |
import csv | |
import requests | |
from concurrent.futures import ThreadPoolExecutor | |
from tqdm import tqdm | |
def get_token(): | |
data = { | |
"refresh_token": "INSERT_REFRESH_TOKEN_HERE" | |
} | |
r = requests.post("https://api.ax-semantics.com/v3/token-exchange/", json=data) | |
access_token = r.json()["id_token"] | |
return access_token | |
def fetch(session, row, headers): | |
r = session.get( | |
f"https://api.ax-semantics.com/v3/documents/{row['document_id']}/", | |
headers=headers, | |
) | |
row.update(r.json()) | |
return row | |
async def run(): | |
token = get_token() | |
headers = {"authorization": f"JWT {token}", "content-type": "application/json"} | |
# fields in source csv: | |
reader_fieldnames = ["generated_at", "collection_id", "document_id"] | |
writer_fieldnames = reader_fieldnames + ["uid", "name", "created", "modified"] | |
with ThreadPoolExecutor(max_workers=10) as executor: | |
with requests.Session() as session: | |
loop = asyncio.get_event_loop() | |
with open("INPUT_CSV.csv") as csvfile: | |
reader = csv.DictReader(csvfile, fieldnames=reader_fieldnames) | |
tasks = [ | |
loop.run_in_executor(executor, fetch, *(session, row, headers)) | |
for row in reader | |
] | |
with open("OUTPUT_CSV.csv", "w") as fp: | |
writer = csv.DictWriter(fp, fieldnames=writer_fieldnames) | |
writer.writeheader() | |
responses = [ | |
await r | |
for r in tqdm(asyncio.as_completed(tasks), total=len(tasks)) | |
] | |
for document in responses: | |
row = {} | |
for key in writer_fieldnames: | |
row[key] = document[key] | |
writer.writerow(row) | |
def main(): | |
loop = asyncio.get_event_loop() | |
future = asyncio.ensure_future(run()) | |
loop.run_until_complete(future) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment