Created
May 21, 2018 15:53
-
-
Save Randl/80efead5dd9dc4582f6ae356001cdb89 to your computer and use it in GitHub Desktop.
Parse ICML submissions, get some statistics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
from contextlib import closing | |
from timeit import default_timer as timer | |
from selenium.common.exceptions import TimeoutException | |
from selenium.webdriver import Firefox, FirefoxProfile | |
from selenium.webdriver.support.ui import WebDriverWait | |
from tqdm import tqdm | |
unis = {'Google Deepmind': 'Deepmind', 'Google Brain': 'Google', 'Cmu': 'Carnegie Mellon University', | |
'Cargenie Mellon University': 'Carnegie Mellon University', | |
'Carnegie Mellen University': 'Carnegie Mellon University', | |
'Carnegie Mellon University': 'Carnegie Mellon University', | |
'Facebook Ai Research': 'Facebook', 'University Of Virginia--> Ucla': 'University Of Virginia', | |
'Openai / Uc Berkeley': 'Openai', | |
'Mpi For Intelligent Systems Tübingen, Germany': 'Max Planck Institute For Intelligent Systems', | |
'Mit Csail': 'Mit', 'Fair': 'Facebook', 'University Of Oxford': 'Oxford', 'Harvard University': 'Harvard', | |
'Uber Ai Labs': 'Uber', 'Uber Ai Labs & University Of Central Florida': 'Uber', | |
'Uber Atg / University Of Toronto': 'Uber', 'Uber/Cmu': 'Uber', | |
'University Of Cambridge & Uber': 'University Of Cambridge', 'Cambridge/Mpi': 'University Of Cambridge', | |
'University Of Cambridge And Mpi Tübingen': 'University Of Cambridge', | |
'University Of Cambridge, Alan Turing Institute': 'University Of Cambridge', | |
'Columbia University In The City Of New York': 'Columbia University', | |
'Columbia University Medical Center': 'Columbia University', | |
'Aalto University & Nvidia': 'Aalto University', | |
'Alan Turing Institute & University Of Warwick': 'Alan Turing Institute', | |
'Amazon / Ucsb': 'Amazon', | |
'Amazon Ai & Caltech': 'Amazon', 'Amazon Research': 'Amazon', 'Amazon Research Tübingen': 'Amazon', | |
'Amazon.Com': 'Amazon', | |
'Ant Financial Services Group': 'Ant Financial', | |
'Artificial Intelligence Department, Ant Financial': 'Ant Financial', | |
'Apple Inc.': 'Apple', | |
'Cornell University': 'Cornell', | |
'Deepmind/University Of Alberta': 'Deepmind', | |
'Google Inc': 'Google', 'Google Inc.': 'Google', 'Google Llc': 'Google', 'Google Research': 'Google', | |
'Google Research, Ny': 'Google', 'Google Uk': 'Google', 'Google, Inc.': 'Google', 'Google, Usa': 'Google', | |
'Facebook / Nyu': 'Facebook', 'Facebook Ai Research And Tel Aviv University': 'Facebook', | |
'Facebook Ai Research, Nyu': 'Facebook', 'Facebook Artificial Intelligence Research': 'Facebook', | |
'Facebook Research': 'Facebook', | |
'Oxford And Deepmind': 'Oxford', 'Oxford, Deepmind': 'Oxford', | |
'Northwestern': 'Northwestern University', 'Northwestern U': 'Northwestern University', | |
'Ut Austin': 'University Of Texas At Austin', 'Ut Austin & Amazon': 'University Of Texas At Austin', | |
'Ut Austin - Sentient Technologies': 'University Of Texas At Austin', | |
'Ut-Austin': 'University Of Texas At Austin', | |
'Weizmanninstitute': 'Weizmann Institute Of Science', | |
'Tu Darmstadt + Max Planck Institute For Intelligent Systems': 'Tu Darmstadt', | |
'Eecs, Uc Berkeley': 'University Of California, Berkeley', | |
'Eecs Department, University Of California, Berkeley': 'University Of California, Berkeley', | |
'Berkeley': 'University Of California, Berkeley', | |
'Uc Berkeley': 'University Of California, Berkeley', | |
'University Of California At Berkeley': 'University Of California, Berkeley', | |
'University Of California Berkeley': 'University Of California, Berkeley', | |
'Google / U. Michigan': 'Google', 'Google Ai': 'Google', 'Google Brain / Cornell University': 'Google', | |
'Google Brain And Princeton University': 'Google', 'Google Brain Robotics': 'Google', | |
'Google Deepmind And Inria': 'Deepmind', | |
'Deep Mind': 'Deepmind', 'Deepmind, University Of Oxford': 'Deepmind', | |
'Yandex; Msu': 'Yandex', | |
'University At Albany, State University Of New York': 'University At Albany', | |
'Massachusetts Institute Of Technology': 'Mit', | |
'Nyu': 'New York University', | |
'Skoltech & Criteo': 'Skoltech', | |
'Zhejiang University & Tencent Ai Lab': 'Zhejiang University', | |
'Mcgill University / Facebook': 'Mcgill University', | |
'U Oxford': 'Oxford', | |
'University Of California At San Diego': 'University Of California San Diego', | |
'Microsoft Research Ai': 'Microsoft', 'Microsoft Ai & Research': 'Microsoft', 'Microsoft Research': 'Microsoft', | |
'Baidu Research, Usa': 'Baidu', 'Baidu Research Usa': 'Baidu', 'Baidu Research': 'Baidu', | |
'Microsoft Maluuba': 'Microsoft', | |
'Technion – Israel Institute Of Technology': 'Technion', | |
'Technion Israeli Institute Of Technology': 'Technion', | |
'Okinawa Institute Of Science And Technology Graduate University': 'Okinawa Institute Of Science And Technology', | |
'Nvidia Research': 'Nvidia', 'Microsoft Research Cambridge': 'Microsoft', 'Stanford': 'Stanford University', | |
'Tel Aviv University, Google': 'Tel Aviv University', 'Stanford University & Google': 'Stanford University', | |
'Mit, Tau': 'Mit', 'Magic Leap, Inc': 'Magic Leap', | |
'Magic Leap Inc.': 'Magic Leap', | |
'Magic Leap, Inc.': 'Magic Leap', 'Princeton University And Google Brain': 'Princeton University', | |
'Univ Of Toronto | Toronto': 'University Of Toronto', | |
'Department Of Electrical And Computer Engineering, University Of Toronto': 'University Of Toronto', | |
'Hebrew University Of Jerusalem, Israel': 'Hebrew University', | |
'University Of Illinois Uc': 'University Of Illinois Urbana-Champaign', | |
'Mcgill': 'Mcgill University', 'Stanford University, California': 'Stanford', | |
'Microsoft Research Asia': 'Microsoft', 'The University Of Oxford': 'Oxford', | |
'Princeton University And Institute For Advanced Study': 'Princeton University', | |
'Princeton': 'Princeton University', 'Princeton Univerisity': 'Princeton University', | |
'The University Of Texas At Austin': 'University Of Texas At Austin', | |
'Epfl': 'École Polytechnique Fédérale De Lausanne', | |
'École Polytechnique Fédérale D': 'École Polytechnique Fédérale De Lausanne', | |
'University Of Toronto And Vector Institute': 'Univeristy Of Toronto', | |
'Universita Di Pisa': 'University Of Pisa', | |
'Cambridge': 'University Of Cambridge', | |
'Columbia': 'Columbia University', | |
'Department Of Statistics, Columbia University': 'Columbia University', | |
'Eth Zurich And University Of Zurich': 'Eth Zurich', | |
'Eth Zurich - Max-Planck-Institute': 'Eth Zurich', | |
'Eth Zürich': 'Eth Zurich', | |
'Ethz': 'Eth Zurich', | |
'Openai / University Of Edinburgh': 'Openai', | |
'Iiis, Tsinghua University': 'Tsinghua University', | |
'California Institute Of Technology': 'Caltech', | |
'Georgia Tech': 'Georgia Institute Of Technology', | |
'Georgia Institute Of Technology / Facebook Ai Research': 'Georgia Institute Of Technology', | |
'Tecent Ai Lab': 'Tencent Ai Lab', 'Tencent': 'Tencent Ai Lab', | |
} | |
url = 'https://icml.cc/Conferences/2018/AcceptedPapersInitial' | |
driver_timeout = 15 | |
with closing(Firefox()) as browser: | |
browser.get(url) | |
WebDriverWait(browser, timeout=driver_timeout).until( | |
lambda x: x.find_elements_by_xpath("//*[contains(text(), 'Successful Page Load')]")) | |
text = 'ICML 2018 Accepted Papers' | |
list_of_papers = browser.find_elements_by_xpath('//*[contains(text(), "' + text + '")]') | |
papers = [] | |
for el in list_of_papers[0].find_element_by_xpath('..').find_elements_by_tag_name('p')[1:]: | |
paper_name = el.find_element_by_tag_name('b').text | |
paper_autors = el.find_element_by_tag_name('i').text.split('·') | |
authors = [] | |
names = set() | |
institutions = set() | |
for author in paper_autors: | |
aut = author.split('(')[0].strip().lower().title() | |
institution = author.split('(')[1].split(')')[0].strip().lower().title() | |
if institution in unis: | |
institution = unis[institution] | |
authors.append((aut, institution)) | |
names.add(aut) | |
institutions.add(institution) | |
papers.append((paper_name, authors, names, institutions, authors[0][0], authors[-1][0])) | |
with open('papers.pickle', 'wb') as handle: | |
pickle.dump(papers, handle, protocol=pickle.HIGHEST_PROTOCOL) | |
authors_count = {} | |
institution_count = {} | |
authors_first_count = {} | |
authors_last_count = {} | |
for paper in papers: | |
for author in paper[2]: | |
if author in authors_count: | |
authors_count[author] += 1 | |
else: | |
authors_count[author] = 1 | |
for inst in paper[3]: | |
if inst in institution_count: | |
institution_count[inst] += 1 | |
else: | |
institution_count[inst] = 1 | |
if paper[4] in authors_first_count: | |
authors_first_count[paper[4]] += 1 | |
else: | |
authors_first_count[paper[4]] = 1 | |
if paper[5] in authors_last_count: | |
authors_last_count[paper[5]] += 1 | |
else: | |
authors_last_count[paper[5]] = 1 | |
print('Authors') | |
# Tong Zhang 8 | |
# Lawrence Carin 7 | |
# Jun Zhu 6 | |
# Quanquan Gu 6 | |
# Le Song 6 | |
# Remi Munos 6 | |
# Sergey Levine 6 | |
# Pieter Abbeel 6 | |
# Bernhard Schölkopf 5 | |
# Amin Karbasi 5 | |
# Shimon Whiteson 5 | |
for key, value in sorted(authors_count.items(), key=lambda kv: kv[1], reverse=True): | |
if value < 5: | |
break | |
print(key, value) | |
print('Institutions') | |
# Google 48 | |
# Carnegie Mellon University 32 | |
# University Of California, Berkeley 31 | |
# Deepmind 31 | |
# Mit 28 | |
# Stanford University 28 | |
# Microsoft 27 | |
# Princeton University 20 | |
# Oxford 19 | |
# Facebook 18 | |
# Cornell 16 | |
# University Of Texas At Austin 16 | |
# École Polytechnique Fédérale De Lausanne 15 | |
# University Of Toronto 15 | |
# University Of Cambridge 14 | |
# Eth Zurich 14 | |
# Columbia University 13 | |
# Tsinghua University 12 | |
# Georgia Institute Of Technology 11 | |
# University Of Southern California 10 | |
# Duke University 10 | |
for key, value in sorted(institution_count.items(), key=lambda kv: kv[1], reverse=True): | |
if value < 10: | |
break | |
print(key, value) | |
print('First authors') | |
for key, value in sorted(authors_first_count.items(), key=lambda kv: kv[1], reverse=True): | |
if value < 2: | |
break | |
print(key, value) | |
print('Last authors') | |
for key, value in sorted(authors_last_count.items(), key=lambda kv: kv[1], reverse=True): | |
if value < 4: | |
break | |
print(key, value) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment