vaquarkhan · December 9, 2022 04:50
diff --git a/read_write_pyspark_redshift.py b/read_write_pyspark_redshift.py
 # Configuration needed of jars
 %%configure
 {
    "conf": {
        "spark.jars": "https://s3.amazonaws.com/redshift-downloads/drivers/jdbc/1.2.36.1060/RedshiftJDBC42-no-awssdk-1.2.36.1060.jar",
        "spark.jars.packages": "org.apache.spark:spark-avro_2.11:2.4.2,io.github.spark-redshift-community:spark-redshift_2.11:4.0.1"
    }
 }

 # define redshift connection info
 username = "UN"
 passw = "PW"
 url = "jdbc:redshift://CLUSTER_URL"
 path = url+"user="+username+"&"+"password="+passw
 tempdir = "TEMP_DIR"

 # Read via select statement
 query = " "

 from pyspark.sql import SQLContext

 sc = spark
 sql_context = SQLContext(sc)

 df = (
    spark.read
        .format("io.github.spark_redshift_community.spark.redshift")
        .option("url", url)
        #.option("dbtable", "schema_table")
        .option("query", query)
        .option("forward_spark_s3_credentials", "true")
        .option("tempdir", tempdir)
        .load()
 )

 # Write it to a table "test.test"
 schema_table = "test.test"

 df.write \
  .format("io.github.spark_redshift_community.spark.redshift") \
  .option("url", path) \
  .option("dbtable", schema_table) \
  .option("forward_spark_s3_credentials", "true") \
  .option("tempdir", tempdir) \
  .mode("error") \
  .save()
	# Configuration needed of jars
	%%configure
	{
	"conf": {
	"spark.jars": "https://s3.amazonaws.com/redshift-downloads/drivers/jdbc/1.2.36.1060/RedshiftJDBC42-no-awssdk-1.2.36.1060.jar",
	"spark.jars.packages": "org.apache.spark:spark-avro_2.11:2.4.2,io.github.spark-redshift-community:spark-redshift_2.11:4.0.1"
	}
	}

	# define redshift connection info
	username = "UN"
	passw = "PW"
	url = "jdbc:redshift://CLUSTER_URL"
	path = url+"user="+username+"&"+"password="+passw
	tempdir = "TEMP_DIR"

	# Read via select statement
	query = " "

	from pyspark.sql import SQLContext

	sc = spark
	sql_context = SQLContext(sc)

	df = (
	spark.read
	.format("io.github.spark_redshift_community.spark.redshift")
	.option("url", url)
	#.option("dbtable", "schema_table")
	.option("query", query)
	.option("forward_spark_s3_credentials", "true")
	.option("tempdir", tempdir)
	.load()
	)

	# Write it to a table "test.test"
	schema_table = "test.test"

	df.write \
	.format("io.github.spark_redshift_community.spark.redshift") \
	.option("url", path) \
	.option("dbtable", schema_table) \
	.option("forward_spark_s3_credentials", "true") \
	.option("tempdir", tempdir) \
	.mode("error") \
	.save()