Skip to content

Instantly share code, notes, and snippets.

@refaim
Created October 21, 2015 01:32
Show Gist options
  • Save refaim/3d315fc29bfc8dac72c9 to your computer and use it in GitHub Desktop.
Save refaim/3d315fc29bfc8dac72c9 to your computer and use it in GitHub Desktop.
import lxml.html
import re
import sys
import urllib.request
URL = 'http://pda.baza-voprosov.ru/random/types123/limit1000'
def main():
response = urllib.request.urlopen(URL)
html = response.read()
tree = lxml.html.document_fromstring(html)
for i, question in enumerate(tree.cssselect('div.random_question')):
text = question.text_content()
if any(re.search(r'\W{}\W'.format(x), text.lower()) for x in ('дуплет', 'блиц')):
try:
print(text)
except UnicodeEncodeError:
pass
print('-' * 80)
return 1
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment