Skip to content

Instantly share code, notes, and snippets.

@matesio
Last active December 31, 2017 23:58
Show Gist options
  • Save matesio/47037a896b5b812cfa1210a62b6065fa to your computer and use it in GitHub Desktop.
Save matesio/47037a896b5b812cfa1210a62b6065fa to your computer and use it in GitHub Desktop.
extract json from a file and insert in to a cassandra table.
#---------------------------------
#requisites
#pip install cassandra-driver
#pip install json
#---------------------------------
#data,json
#---------------------------------
'''
{
"information" :[
{
"duration": "Approx. 2 hours",
"title": "五月天LIFE人生无限公司 巡回 新加坡站 MAYDAY 2017 LIFE TOUR",
"description": "五月天LIFE人生无限公司 巡回 新加坡站MAYDAY 2017 LIFE TOUR\n五月天LIFE 人生无限公司 新加坡站 三万张门票全数售出!主办单位 最后释出 限量门票 让更多社员入社!12月15-17日 一连三晚 五月天 LIFE 人生无限公司 新加坡站无限展开!\n[演唱会之王] 五月天再次打破自我记录,全新巡回LIFE人生无限公司新加坡站3万张门票开票当天既秒杀,为了让更多社员加入,主办单位经协调确定每场释出限量Side View Seats!\n五月天全新巡回LIFE人生无限公司3月18日从台湾高雄起跑,12月15,16 及17日一连三晚将在新加坡室内体育馆开唱,全场3万张门票在开票当日既秒杀,也刷新了五月天在新加坡的个人记录,足见五月天LIVE魅力无法挡!\n为了让更多社员能够打卡加入[人生无限公司],主办单位经协调,确定每场释出限量Side View Seats让大家抢购!限量门票将在10月2日早上10点透过SPORTSHUBTIX公开发售,每张演唱会门票将附送荧光棒一支,欲知更多详情请上新加坡滚石唱片官方脸书查询。\n五月天“LIFE人生无限公司”巡回演唱会新加坡站将在2017年12月15,16 &17日,连续三晚于新加坡室内体育馆无限展开! 人生中最好的一天,一生多活一场五月天,你绝对不能错过!\nMAYDAY sets a new personal record with 30,000 tickets for the Singapore leg of “MAYDAY 2017 LIFE TOUR” snapped up upon ticket launch. Due to the overwhelming response from fans, the organizer negotiated and will be releasing limited Side View Seats tickets to these sold-out shows! \nMAYDAY Life Tour kicked off on 18th March in Kaohsiung, Taiwan with the band’s smashing performances and spectacular concert production. This highly-anticipated and adrenaline-pumping new tour is coming to Singapore on 15,16 and 17 December and 30,000 tickets to the 3-night shows were snapped up upon launch. \n More fans can join MAYDAY Life tour in Singapore with the organizer releasing limited Side View Seats tickets for each show! Tickets will be on sale 2nd October from 10am onwards via SPORTSHUBTIX. Each ticket is entitled to a concert light stick, For more information, please check out Rock Records Singapore Facebook page.\n Don’t miss this last chance to grab your tickets to the year’s biggest year-end party. Come spend the best day of your life with MAYDAY. Fans be ready to scream and cheer at the MAYDAY Life Tour party!\n​​​​​​​​​",
"end date": "2017-12-17 22:00:00",
"start date": "2017-12-15 20:00:00",
"venue": "2;#Singapore Indoor Stadium"
}
]
}
'''
#----------------------------------
import json
import logging
import time
from cassandra.cluster import Cluster
from cassandra.query import SimpleStatement
#--------------------------
#suppressing logs
log = logging.getLogger("mylogger")
streamHandler = logging.StreamHandler()
streamHandler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %message)s')
streamHandler.setFormatter(formatter)
log.addHandler(streamHandler)#--------------------------
jsonFile = 'data.json'
cassandra_hosts = '127.0.0.1'
file = open (jsonFile,"r")
data = file.read()
jsonData = json.loads(data)
#print jsonData
#print jsonData['information'][0]["description"]
jsonIteration = jsonData ["information"]
#---------------------------------------
#start cassandra ingestion
#---------------------------------------
#cassandra connection initiation
session = None
def connect(nodes):
cluster = Cluster(nodes)
metadata = cluster.metadata
global session
session = cluster.connect()
log.info('Connected to cluster: ' + metadata.cluster_name)
def create_schema():
#drop keyspace
session.execute("""Drop KEYSPACE IF EXISTS mykeyspace;""")
#keyspace creation
session.execute("""CREATE KEYSPACE mykeyspace WITH replication = {'class':'SimpleStrategy', 'replication_factor':1};""")
#table creation
session.execute("""
CREATE TABLE mykeyspace.events
(title text PRIMARY KEY,description text,
duration text,end_date text,start_date text,
venue text) ;
""")
log.info ("keyspace and schema created successfully")
def close():
session.cluster.shutdown()
log.info('Connection closed.')
def insert(title,description,duration,end_date,start_date,venue):
query = "insert INTO fiverr.events(title,description,duration,end_date,start_date,venue)VALUES (?,?,?,?,?,?)"
prepared = session.prepare(query)
session.execute(prepared,(title,description,duration,end_date,start_date,venue))
log.info("successfully inserted data")
def ext_insert():
for val in jsonIteration:
duration = val['duration']
print ">>> duration"
print duration
title = val['title']
print ">>> title"
print title
description = val['description']
print ">>> description"
print description
end_date = val['end date']
print ">>> end_date"
print end_date
start_date = val['start date']
print start_date
venue = val ['venue']
print ">>> venue"
print venue
log.info("extraction completed successfully")
insert(title,description,duration,end_date,start_date,venue)
connect([cassandra_hosts])
create_schema()
ext_insert()
#close connection
close()
#---------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment