Skip to content

Instantly share code, notes, and snippets.

@yuiseki
Created June 8, 2013 23:39
Show Gist options
  • Save yuiseki/5737006 to your computer and use it in GitHub Desktop.
Save yuiseki/5737006 to your computer and use it in GitHub Desktop.
# -*- coding:utf-8 -*-
import sys
import traceback
import datetime
from dateutil.parser import parse
import pytz
import json
import tweepy
import pymongo
import MeCab
import re
keyword = "#akiba"
argvs = sys.argv
if (len(argvs) == 2):
keyword = argvs[1]
print "tracking to ", keyword
database = "twitter_track"
collection = keyword
conn = pymongo.Connection("localhost")
col = conn[database][collection]
m_wakati = MeCab.Tagger("-Owakati")
class CustomStreamListener(tweepy.StreamListener):
def on_data(self, data):
print "-----"
status = json.loads(data)
# 時刻でsort, findするために時刻型だけ変換しておく
at = status.get("created_at", None)
if at is None:
status["created_dt"] = datetime.datetime.now(pytz.utc)
else:
status["created_dt"] = parse(at)
# MeCabによる単語抽出
try:
# 単語を抽出する前に、textからURL, hashtag, usernameを除去する。
# その情報は別のフィールドから得られるし、mecabだとうまく抽出できない
hashtags = map(lambda x: x["text"] , status["entities"]["hashtags"])
usernames = map(lambda x: x["screen_name"] , status["entities"]["user_mentions"])
urls = map(lambda x: x["url"] , status["entities"]["urls"])
delwords = hashtags + usernames + urls + [":", "RT", "#", "@"]
wakati = status["text"]
for delw in delwords:
wakati = re.sub(re.compile(delw), "", wakati)
status["text_wakati"] = re.split(r'\s', m_wakati.parse(wakati.encode("utf-8")))
except Exception, e:
print traceback.format_exc()
col.insert(status)
print (status["created_dt"] + datetime.timedelta(hours=9)).strftime("%H:%M:%S"), \
status["user"]["screen_name"], ":", \
status["text"].replace('\n','')
def on_error(self, status_code):
print >> sys.stderr, 'Error! :', status_code
return False
def on_timeout(self):
print >> sys.stderr, 'Timeout...'
return False
conf = { "consumer_key": "",
"consumer_secret": "",
"access_key": "",
"access_secret": "" }
auth = tweepy.OAuthHandler(conf["consumer_key"], conf["consumer_secret"])
auth.set_access_token(conf["access_key"], conf["access_secret"])
try:
stream = tweepy.streaming.Stream(auth, CustomStreamListener())
stream.filter(track=[keyword])
except Exception, e:
print traceback.format_exc()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment