Created
November 11, 2019 07:00
-
-
Save ilyasst/e636f46f74f67ed8c1e9ffb58672b883 to your computer and use it in GitHub Desktop.
Relies on Arcas and Matplotlib to plot the evolution of the availability of open access publications through a few APIs using an arbitrary set of publications returned by each API.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This script is heavily inspired by the example provided by the developers of Arcas: | |
https://github.com/ArcasProject/ArcasExamples/blob/initial_commits/scripts/scraping_num_articles.py | |
""" | |
import arcas | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
keywords = ["composite", "carbon", "fiber"] | |
num_collect = 10 | |
max_num = 201 | |
dfs = [] | |
for p in [arcas.Nature, arcas.Arxiv, arcas.Springer]: | |
print(p) | |
api = p() | |
for key in keywords: | |
start = 1 | |
switch = True | |
while start < max_num and switch is True: | |
parameters = api.parameters_fix(title=key, records=num_collect, | |
abstract=key, start=start) | |
url = api.create_url_search(parameters) | |
request = api.make_request(url) | |
root = api.get_root(request) | |
raw_articles = api.parse(root) | |
try: | |
for art in raw_articles: | |
article = api.to_dataframe(art) | |
dfs.append(article) | |
except: | |
switch = False | |
start += 10 | |
df = pd.concat(dfs, ignore_index=True) | |
df = df.sort_values("date") | |
x = [] | |
y = [] | |
for an in df.date.unique(): | |
tdf = df[df.date == an] | |
open_access_count = tdf.loc[tdf.open_access == True, 'open_access'].count() | |
x.append(an) | |
y.append(open_access_count) | |
fig, ax = plt.subplots() | |
plt.bar(x, y) | |
plt.xlim(1990, 2020) | |
plt.ylabel('# publications') | |
plt.title('Open Access publications per year') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment