Skip to content

Instantly share code, notes, and snippets.

@neoplacer
Created November 7, 2017 09:42
Show Gist options
  • Save neoplacer/ff01f8a7e4a0587e8421562f6238c4c2 to your computer and use it in GitHub Desktop.
Save neoplacer/ff01f8a7e4a0587e8421562f6238c4c2 to your computer and use it in GitHub Desktop.
IT-SA Event extractor
from bs4 import BeautifulSoup
import urllib2
import re
html_page = urllib2.urlopen("https://www.it-sa.de/de/events/tagesuebersicht/2016-10-19")
soup = BeautifulSoup(html_page, 'html')
JSdata = {}
print '{0:10} ; {1:10} ; {2:10}'.format('Time', 'Thema', 'Ort')
f = open('event-10-19.txt', 'w+')
f.write('{0:10} ; {1:10} ; {2:10}'.format('Time', 'Thema', 'Ort'))
f.write('\n')
for aaa in soup.findAll('div', attrs={'class':'cdb-lecturelist__entry-inner'}):
try:
eventTime = aaa.find('div', attrs={'class','cdb-lecturelist__time'}).span.strong
getEventData = aaa.find('div', attrs={'cdb-lecturelist__content'}).find('h3').span.a
getOrt = aaa.find('div', attrs={'cdb-lecturelist__content'}).findAll('p')
# print eventTime.string
# print getEventData.contents #.string
# print getOrt[1].string #.find('strong')
# print ''
print '{0:10} ; {1:10} ; {2:10}'.format(eventTime.string, getEventData.contents, getOrt[1].string)
f.write('{0:10} ; {1:10} ; {2:10}'.format(eventTime.string, getEventData.contents, getOrt[1].string))
f.write('\n')
except Exception:
continue
# insanerTag = BeautifulSoup(aaa)
#for getTimeOb in insanerTag.findAll('div', attrs={'class','cdb-lecturelist__time'}):
# print getTimeOb.span[0].strong
#for getEventData in insanerTag.findAll('div', attrs={'cdb-lecturelist__content'}):
# print getEventData.h3.span[0].a
f.close()
# print soup.div['cdb-lecturelist__entry-inner']
#for link in soup.findAll('a', attrs={'href': re.compile("^http://")}):
# print link.get('href')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment