Skip to content

Instantly share code, notes, and snippets.

@Sciroccogti
Created November 9, 2019 12:28
Show Gist options
  • Save Sciroccogti/8bbbcdeccc818db868abfadfa7f0cb48 to your computer and use it in GitHub Desktop.
Save Sciroccogti/8bbbcdeccc818db868abfadfa7f0cb48 to your computer and use it in GitHub Desktop.
爬取东南大学旧课程表的小脚本,作为自己第一个投入实战的小爬虫,留个纪念罢了
# coding:utf-8
import requests # 导入网页请求库
import re # 导入正则表达式库
import openpyxl
from bs4 import BeautifulSoup # 导入网页解析库
name_pattern = r'姓名:(.*?)</td>'
name_list = []
tongdian_pattern = '<font class="style8">通信电子线路</font>'
tongyuan_pattern = '<font class="style8">通信原理(双语)</font>'
dsp_pattern = '<font class="style8">数字信号处理</font>'
weibo_pattern = '<font class="style8">微波工程基础</font>'
coa_pattern = '<font class="style8">计算机组织与结构(双语)I</font>'
IDlist = list(range(4217701, 4217753))
IDlist.extend(list(range(4218801, 4218834)))
for studentID in IDlist:
tongdian = tongyuan = dsp = weibo = coa = ''
# 传入URL
r = requests.get('http://xk.urp.seu.edu.cn/jw_service/service/stuCurriculum.action?queryStudentId=0' + str(studentID) +'&queryAcademicYear=19-20-2')
# 解析URL
soup = BeautifulSoup(r.text, 'html.parser')
content_list = soup.find_all('td', attrs = {'width': '20%', 'align': 'left'})
for content in content_list:
# 匹配所有符合正则表达式的内容
name = re.search(name_pattern, str(content))
if name:
print(name.group(1), studentID)
lesson_list = soup.find_all('td', attrs={'height':'34', 'class':'line_topleft', 'width':'35%', 'align':'center'})
for lesson in lesson_list:
if not tongdian and str(lesson).find(tongdian_pattern) > 0:
tongdian = '■'
if not tongyuan and str(lesson).find(tongyuan_pattern) > 0:
tongyuan = '■'
if not dsp and str(lesson).find(dsp_pattern) > 0:
dsp = '■'
if not weibo and str(lesson).find(weibo_pattern) > 0:
weibo = '■'
if not coa and str(lesson).find(coa_pattern) > 0:
coa = '■'
if tongdian or tongyuan or dsp or weibo or coa:
print([name.group(1), tongdian, tongyuan, dsp, weibo, coa])
name_list.append([name.group(1), tongdian, tongyuan, dsp, weibo, coa])
print('opening xlsx...')
# 新建一个工作薄
wb = openpyxl.Workbook()
# 新建sheet页(可以插入到指定的索引处)
wb.create_sheet('list', 0)
ws = wb.worksheets[0]
ws.append(['姓名','通信电子线路','通信原理','数组信号处理','微波工程','计算机组织与结构'])
for row in name_list:
ws.append(row)
# 保存xlsx文件
wb.save('list.xlsx')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment