Skip to content

Instantly share code, notes, and snippets.

View rssanders3's full-sized avatar
🏃‍♂️

Robert Sanders rssanders3

🏃‍♂️
View GitHub Profile
# Setup Test Data
df = spark.read.format("csv").option("header", "true").load(f"s3://{AWS_BUCKET}/data/input_data.csv")
df.registerTempTable("input_data")
df = spark.read.format("csv").option("header", "true").load(f"s3://{AWS_BUCKET}/data/input_data_updates.csv")
df.registerTempTable("input_data_updates")
## CREATE TABLE
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkConf, SparkContext
from pyspark.sql import SparkSession
from awsglue.context import GlueContext
from awsglue.job import Job
## @params: [JOB_NAME]
args = getResolvedOptions(sys.argv, ['JOB_NAME'])
[Confluent.dist]
name=Confluent repository (dist)
baseurl=https://packages.confluent.io/rpm/{CONFLUENT_VERSION}/7
gpgcheck=1
gpgkey=https://packages.confluent.io/rpm/{CONFLUENT_VERSION}/archive.key
enabled=1
[Confluent]
name=Confluent repository
baseurl=https://packages.confluent.io/rpm/{CONFLUENT_VERSION}
+--------------------+--------------+
| Confluent Platform | Apache Kafka |
+--------------------+--------------+
| 2.0.x | 0.9.0.x |
+--------------------+--------------+
| 3.0.x | 0.10.0.x |
+--------------------+--------------+
| 3.1.x | 0.10.1.x |
+--------------------+--------------+
| 3.2.x | 0.10.2.x |
{
"type": "record",
"name": "Order",
"fields": [
{
"name": "orderId",
"type": "long"
},
{
"name": "orderDate",
yum install -y mysql-devel python-devel python-setuptools
pip install MySQL-python
apt-get install python-dev libmysqlclient-dev
pip install mysqlclient
cd /opt
sudo wget --no-check-certificate https://www.python.org/ftp/python/3.5.7/Python-3.5.7.tar.xz
tar xf Python-3.5.7.tar.xz
cd Python-3.5.7
./configure --prefix=/usr/local
make && make altinstall
ls -ltr /usr/local/bin/python*
vi ~/.bashrc
cd airflow-{AIRFLOW_VERSION}/scripts/systemd/
# Update the contents of the airflow file.
# Set the AIRFLOW_HOME if its anything other then the default
vi airflow
# Copy the airflow property file to the target location
cp airflow /etc/sysconfig/
# Update the contents of the airflow-*.service files
# Set the User and Group values to the user and group you want the airflow service to run as
vi airflow-*.service
# Copy the airflow services files to the target location
unzip {AIRFLOW_VERSION}.zip
# This will output extract the contents into: airflow-{AIRFLOW_VERSION}