Skip to content

Instantly share code, notes, and snippets.

@shwangdev
Created November 1, 2012 12:28
Show Gist options
  • Save shwangdev/3993371 to your computer and use it in GitHub Desktop.
Save shwangdev/3993371 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# Time-stamp: <2012-11-01 22:36:50 Thursday by devil>
# @version 1.0
# @author Xiang Wang (xiang_wang@trendmicro.com.cn)
import urllib2,sys
import re
if len(sys.argv)==1:
sys.exit("Usage: " + sys.argv[0] + " [#English words or Chinese words#]")
words = sys.argv[1]
for word in sys.argv[2:]:
words = words + '%20' + word
uri = 'http://dict.cn/' + words
req = urllib2.Request(uri)
req.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5')
resp = urllib2.urlopen(req)
html = resp.read()
data = re.findall('\<li\>\<span\>.*<\/strong\>\<\/li\>', html.decode('utf8'))
for d in data:
#print d.encode('utf8')
m = re.search('\<span\>(.+?)\<\/span\>.*\<strong>(.+?)\<\/strong\>', d.encode('utf8'))
if m:
print m.group(1)+ '\t' + m.group(2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment