Skip to content

Instantly share code, notes, and snippets.

@kindly
Created September 15, 2021 09:18
Show Gist options
  • Save kindly/4f9dc74d35fa401b5bbced818448b90d to your computer and use it in GitHub Desktop.
Save kindly/4f9dc74d35fa401b5bbced818448b90d to your computer and use it in GitHub Desktop.
import jsonlines
import flatterer
from pathlib import Path
import subprocess
def flatten_iterator(statement_type):
with jsonlines.open('statements.2021-08-31T08 14 34Z.jsonl') as f:
for object in f:
if object["statementType"] == statement_type:
yield object
p = Path('.')
flatterer.iterator_flatten(flatten_iterator("personStatement"), 'output_person', main_table_name='statement', force=True)
flatterer.iterator_flatten(flatten_iterator("ownershipOrControlStatement"), 'output_ooc', main_table_name='statement', force=True)
flatterer.iterator_flatten(flatten_iterator("entityStatement"), 'output_entity', main_table_name='statement', force=True)
for path in p.glob('output_person/csv/*.csv'):
parts = list(path.parts)
parts[-1] = 'person_' + parts[-1]
path.rename("/".join(parts))
for path in p.glob('output_ooc/csv/*.csv'):
parts = list(path.parts)
parts[-1] = 'ooc_' + parts[-1]
path.rename("/".join(parts))
for path in p.glob('output_entity/csv/*.csv'):
parts = list(path.parts)
parts[-1] = 'entity_' + parts[-1]
path.rename("/".join(parts))
for path in p.glob('*/csv/*.csv'):
print(path)
name = path.parts[-1].split('.')[0]
subprocess.run(['sqlite-utils', 'insert', '--silent', '-d', '-c', 'register.db', name, f'{str(path)}'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment