Last active
February 24, 2016 06:11
-
-
Save lackofdream/1457484a5a1889ba7ba0 to your computer and use it in GitHub Desktop.
\xd1\xa7\xc9\xfa\xb8\xf6\xc8\xcb\xd0\xc5\xcf\xa2\xcf\xb5\xcd\xb3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
from datetime import datetime, timedelta | |
import re | |
def get_start_day(): | |
return datetime(2016, 2, 29) | |
def date_to_str(dt): | |
return dt.strftime('%Y%m%dT%H%M%S') | |
bydays = ['SU', 'MO', 'TU', 'WE', 'TH', 'FR', 'SA', 'SU'] | |
def ics_head() -> str: | |
return """BEGIN:VCALENDAR | |
PRODID:-//Ning Humbert//Schedule2ICS//ZH | |
VERSION:0.1 | |
CALSCALE:GREGORIAN | |
METHOD:PUBLISH | |
BEGIN:VTIMEZONE | |
TZID:Asia/Shanghai | |
BEGIN:STANDARD | |
TZOFFSETFROM:+0800 | |
TZOFFSETTO:+0800 | |
TZNAME:CST | |
DTSTART:19700101T000000 | |
END:STANDARD | |
END:VTIMEZONE | |
""" | |
def ics_tail() -> str: | |
return "END:VCALENDAR\n" | |
def gen_ics(body: str): | |
with open('test.ics', 'w') as f: | |
f.write(ics_head() + body + ics_tail()) | |
def read_html_to_bs(path: str): | |
with open(path, 'rb') as f: | |
bin_data = f.read() | |
try: | |
html_data = bin_data.decode() | |
except UnicodeDecodeError: | |
html_data = bin_data.decode('gbk') | |
return BeautifulSoup(html_data, 'lxml') | |
def find_table(bs): | |
ts = bs.findAll('table') | |
for t in reversed(ts): | |
if t.attrs.get('bgcolor') == '#F2EDF8': | |
return t | |
return None | |
def find_courses(table): | |
trs = table.findAll('tr') | |
i = 0 | |
for tr in trs: | |
if len(tr.findAll('td')) != 9: | |
break | |
i += 1 | |
return trs[:i] + trs[i + 1:] | |
def vanish_dash(maybe_dash: str) -> list: | |
found = maybe_dash.find('-') | |
try: | |
if found == -1: | |
return [int(maybe_dash)] | |
start_n = int(maybe_dash[:found]) | |
end_n = int(maybe_dash[found + 1:]) | |
ret = [] | |
for i in range(start_n, end_n + 1): | |
ret.append(i) | |
return ret | |
except ValueError: | |
return [] | |
def generate_dash(none_dash: list) -> list: | |
if len(none_dash) == 0: | |
return [] | |
ret = [] | |
none_dash += [1000] | |
tmp_start = none_dash[0] | |
tmp_end = none_dash[0] | |
for i in none_dash[1:]: | |
if i - tmp_end == 1: | |
tmp_end = i | |
else: | |
ret.append((tmp_start, tmp_end)) | |
tmp_start = tmp_end = i | |
return ret | |
def handle_date(date: str): | |
maybe_dashes = re.findall('(\d+\-\d+|\d+)', date) | |
ret = [] | |
for i in maybe_dashes: | |
ret += vanish_dash(i) | |
return generate_dash(ret) | |
def get_time(start_week: int, course_time: str): | |
dash_pos = course_time.find('-') | |
assert dash_pos != -1 | |
course_day = bydays[int(course_time[:dash_pos])] | |
course_seq = int(course_time[dash_pos + 1:]) | |
that_day = get_start_day() + timedelta(weeks=start_week - 1) + timedelta(days=int(course_time[:dash_pos]) - 1) | |
assert isinstance(that_day, datetime) | |
if course_seq == 1: | |
that_start_time = datetime( | |
year=that_day.year, | |
month=that_day.month, | |
day=that_day.day, | |
hour=8 | |
) | |
that_end_time = datetime( | |
year=that_day.year, | |
month=that_day.month, | |
day=that_day.day, | |
hour=9, | |
minute=45 | |
) | |
elif course_seq == 2: | |
that_start_time = datetime( | |
year=that_day.year, | |
month=that_day.month, | |
day=that_day.day, | |
hour=10, | |
minute=5 | |
) | |
that_end_time = datetime( | |
year=that_day.year, | |
month=that_day.month, | |
day=that_day.day, | |
hour=11, | |
minute=50 | |
) | |
elif course_seq == 3: | |
that_start_time = datetime( | |
year=that_day.year, | |
month=that_day.month, | |
day=that_day.day, | |
hour=14 | |
) | |
that_end_time = datetime( | |
year=that_day.year, | |
month=that_day.month, | |
day=that_day.day, | |
hour=15, | |
minute=45 | |
) | |
elif course_seq == 4: | |
that_start_time = datetime( | |
year=that_day.year, | |
month=that_day.month, | |
day=that_day.day, | |
hour=16, | |
minute=5 | |
) | |
that_end_time = datetime( | |
year=that_day.year, | |
month=that_day.month, | |
day=that_day.day, | |
hour=17, | |
minute=50 | |
) | |
elif course_seq == 5: | |
that_start_time = datetime( | |
year=that_day.year, | |
month=that_day.month, | |
day=that_day.day, | |
hour=18, | |
minute=40 | |
) | |
that_end_time = datetime( | |
year=that_day.year, | |
month=that_day.month, | |
day=that_day.day, | |
hour=20, | |
minute=25 | |
) | |
else: | |
that_start_time = datetime( | |
year=that_day.year, | |
month=that_day.month, | |
day=that_day.day, | |
hour=20, | |
minute=35 | |
) | |
that_end_time = datetime( | |
year=that_day.year, | |
month=that_day.month, | |
day=that_day.day, | |
hour=22, | |
minute=20 | |
) | |
return { | |
'byday': course_day, | |
'start_time': that_start_time, | |
'end_time': that_end_time | |
} | |
def handle_a_course(_course): | |
tds = _course.findAll('td') | |
assert len(tds) == 9 | |
course_name = tds[0].get_text()[:-1] | |
course_id = tds[2].get_text()[:-1] | |
course_nid = tds[3].get_text()[:-1] | |
# course_is_must = tds[4].get_text()[:-1] | |
# course_is_exam = tds[5].get_text()[:-1] | |
course_location = tds[6].get_text()[:-1] | |
course_time = tds[7].get_text()[:-1] | |
course_date = tds[8].get_text()[:-1] | |
course_uid = course_id + course_nid + course_time | |
ret = "" | |
date_segments = handle_date(course_date) | |
for date_segment in date_segments: | |
time_info = get_time(date_segment[0], course_time) | |
ret += """BEGIN:VEVENT | |
DTSTART;TZID=Asia/Shanghai:{start_time} | |
DTEND;TZID=Asia/Shanghai:{end_time} | |
RRULE:FREQ=WEEKLY;COUNT={count};BYDAY={byday} | |
UID:{course_uid} | |
DESCRIPTION: | |
LOCATION:{location} | |
SEQUENCE:1 | |
STATUS:CONFIRMED | |
SUMMARY:{course_name} | |
TRANSP:OPAQUE | |
END:VEVENT | |
""".format( | |
start_time=date_to_str(time_info.get('start_time')), | |
end_time=date_to_str(time_info.get('end_time')), | |
byday=time_info.get('byday'), | |
course_uid=course_uid + str(date_segment[0]), | |
location=course_location, | |
course_name=course_name, | |
count=date_segment[1] - date_segment[0] + 1 | |
) | |
return ret | |
if __name__ == '__main__': | |
all_course = "" | |
courses = find_courses(find_table(read_html_to_bs('b.html'))) | |
for course in courses: | |
all_course += handle_a_course(course) | |
gen_ics(all_course) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment