Last active
December 2, 2022 06:41
-
-
Save niyaton/0c6ea8e1b2c3be419c613c4e6461dbf5 to your computer and use it in GitHub Desktop.
Using this scripts, you can get slack message for each session from researchr.org. This script is customized for APSEC2022.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
track2track = { | |
'EDU - Software Engineering Education': 'EDU', | |
'ERA - Early Research Achievements': 'ERA', | |
'SEIP - Software Engineering in Practice': 'SEIP', | |
'Technical Track': 'Technical Track' | |
} | |
# This file includes appended modal data. | |
# To create this file, you need to use "show_click_all_papers_command.py". | |
with open('apsec-program-expanded.html') as f: | |
program_text = f.read() | |
soup = BeautifulSoup(program_text, 'html.parser') | |
sessions = [] | |
# loop for each session | |
for table in soup.find_all('table'): | |
# ignore non-target tables | |
if not table.has_attr('data-facet-date'): | |
continue | |
event_type = table.find('div', attrs={'class': 'event-type'}) | |
if not event_type or not event_type.text == 'Paper': | |
continue | |
skip_flag = True | |
papers = [] | |
for tr in table.find_all('tr'): | |
# <tr class="firefox-fix"> is very useful as the anchor. | |
# each paper information is in the <tr> which place under the "firefox-fix". | |
if tr.attrs['class'][0] == 'firefox-fix': | |
skip_flag = False | |
continue | |
if skip_flag: | |
continue | |
start_time = tr.find('div', attrs={'class' : 'start-time'}).text | |
title_str = tr.find('a', {"data-event-modal": True}) | |
# modal_id is unique ID of the paper. | |
modal_id = title_str.attrs['data-event-modal'] | |
title_str = title_str.text | |
authors = [] | |
for author in tr.find('div', attrs={'class': 'performers'}).find_all('a'): | |
# we ignore author's affliation and link to the author information. | |
authors.append(author.text) | |
# convert track full name to abbrev. | |
track = track2track[tr.find('div', attrs={'class': 'prog-track'}).text] | |
if len(papers) != 0: | |
papers[-1]['end_time'] = start_time | |
papers.append({ | |
'modal_id': modal_id, | |
'start_time': start_time, | |
'title': title_str, | |
'authors': authors, | |
'track': track | |
}) | |
session_title = table.find('div', attrs={'class': 'session-info-in-table'}).contents[0] | |
room = table.attrs['data-facet-room'] | |
# add the end time of final presentation. | |
session_end_time = table.find('div', attrs={'class': 'slot-label'}).text.split('-')[-1].lstrip() | |
papers[-1]['end_time'] = session_end_time | |
sessions.append({ | |
'date': table.attrs['data-facet-date'], | |
'title': session_title, | |
'room': room, | |
'papers': papers | |
}) | |
# collect link URL to the each detailed paper information page | |
modal2link = {} | |
for modal_span in soup.find_all('span', attrs={'class': 'appended'}): | |
modal_id = modal_span.find('a', {'data-event-star': True}) | |
modal_id = modal_id.attrs['data-event-star'] | |
link = modal_span.find_all('a')[-2].attrs['href'] | |
modal2link[modal_id] = link | |
# print all session information | |
for session in sessions: | |
print(session['date']) | |
print(f'This channel is for *{session["title"]}* session at *{session["room"]}*.') | |
print('The titles and presenters are') | |
for paper in session['papers']: | |
link = modal2link[paper['modal_id']] | |
title = f'[{paper["title"]}]({link}) ({paper["track"]})' | |
print(f'* {paper["start_time"]} - {paper["end_time"]} {title}') | |
print(' * ', end='') | |
print(', '.join(paper['authors'])) | |
print() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
# run this command and paste ouput to the javascript console at APSEC program page. | |
# after run the javascript, please save modified HTML by using broweser's inspection feature. | |
# This file is just downloaded html. | |
# You can get this file by using curl or wget. | |
with open('apsec-program.html') as f: | |
program_text = f.read() | |
soup = BeautifulSoup(program_text, 'html.parser') | |
for table in soup.find_all('table'): | |
if not table.has_attr('data-facet-date'): | |
continue | |
event_type = table.find('div', attrs={'class': 'event-type'}) | |
if not event_type or not event_type.text == 'Paper': | |
continue | |
skip_flag = True | |
for tr in table.find_all('tr'): | |
if tr.attrs['class'][0] == 'firefox-fix': | |
skip_flag = False | |
continue | |
if skip_flag: | |
continue | |
start_time = tr.find('div', attrs={'class' : 'start-time'}).text | |
modal_id = tr.find('a', {"data-event-modal": True}).attrs['data-event-modal'] | |
print(f'document.querySelector("a[data-event-modal=\'{modal_id}\'").click()') | |
print('modals = document.querySelectorAll("a[data-dismiss=\'modal\']")') | |
print('modals[modals.length - 1].click()') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment