Created
June 12, 2016 00:08
-
-
Save theho/ee20727c9e4d086ac0053cd8821d83e9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Single Node Cassandra/Spark using pySpark | |
""" | |
import os | |
import sys | |
SPARK_HOME = "/Users/jimmy/dev/spark" | |
os.environ['SPARK_HOME'] = SPARK_HOME | |
os.environ["PYSPARK_PYTHON"] = "python3" | |
MASTER = 'spark://172.16.41.129:7077' | |
CASSANDRA = 'spark' # Cassandra installed onto the same node as spark master/slave | |
sys.path.append(os.path.join(SPARK_HOME, 'python')) | |
sys.path.append(os.path.join(SPARK_HOME, 'python/lib/py4j-0.9-src.zip')) | |
try: | |
import pyspark_cassandra | |
from pyspark import SparkContext, SparkConf | |
print("Successfully imported Spark Modules") | |
except ImportError as e: | |
print("Can not import Spark Modules", e) | |
sys.exit(1) | |
conf = SparkConf() \ | |
.setAppName("PySpark Cassandra Test") \ | |
.set('spark.master', MASTER) \ | |
.set("spark.jars.packages", 'TargetHolding/pyspark-cassandra:0.3.5') \ | |
.set("spark.cassandra.connection.host", CASSANDRA) | |
# spark.cassandra.username | |
# spark.cassandra.password | |
# A test table was created as test.kv, with two columns, k and v. | |
sc = pyspark_cassandra.CassandraSparkContext(conf=conf) | |
print(sc.cassandraTable("test", "kv").count()) | |
from pyspark.sql import SQLContext | |
sqlContext = SQLContext(sc) | |
kv = sqlContext.read.format("org.apache.spark.sql.cassandra"). \ | |
load(keyspace="test", table="kv") | |
kv.where(kv.k == 'hello').show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment