-
-
Save austinjp/01902a9f6a3ebad002a3c572801855bb to your computer and use it in GitHub Desktop.
Get papers from Open Academic Graph by FOS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sqlalchemy import create_engine | |
from sqlalchemy.sql import text as sql_text | |
from collections import defaultdict | |
with open('/Users/jklinger/Nesta-AWS/AWS-RDS-config/open-academic-graph.config') as f: | |
host, port, database, user, password = f.read().split(':') | |
database_uri = 'postgresql://{}:{}@{}/{}'.format(user, password, host, "microsoft_academic_graph") | |
con = create_engine(database_uri) | |
query = ''' select paper from microsoft_academic_graph | |
where ((paper -> 'fos'::text)) @> :fos | |
and ((paper ->> 'lang'::text)) = 'en' | |
and paper::jsonb ? 'abstract' | |
and paper::jsonb ? 'keywords' | |
limit 3000''' | |
fosses = ['Biology', 'Medicine','Geology','Chemistry', | |
'Psychology','Philosophy','Sociology','Engineering', | |
'Economics','Computer Science','Art','Physics', | |
'History','Political Science','Materials Science', | |
'Mathematics','Geography','Business'] #,'Environmental Science'] | |
ids = [] | |
i=0 | |
papers = defaultdict(list) | |
for fos in fosses: | |
print(fos) | |
exec_result = con.execute(sql_text(query), fos='["{}"]'.format(fos)) | |
for paper, in exec_result.fetchall(): | |
papers[fos].append(paper) | |
ids.append(paper['id']) | |
len(set(ids)), len(ids) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment