Created January 13, 2015 07:29
emr stuff
# Install AWS cli, reference ~/.aws/config for the result
pip install --upgrade awscli
aws configure
# Create Cluster
aws emr create-cluster --ami-version VERSION --instance-groups InstanceGroupType=MASTER,InstanceCount=1,InstanceType=m1.medium InstanceGroupType=CORE,InstanceCount=1,InstanceType=m1.medium --no-auto-terminate --name spark_cluster --bootstrap-action Path=s3://elasticmapreduce/samples/spark/1.0.0/install-spark-shark.rb,Name=install_spark --ec2-attributes KeyName=AWS_IAM
# SSH to master node
aws emr ssh --cluster-id JOB_ID --key-pair-file AWS_IAM.pem
# Copy jar to be executed to master node
hadoop fs -get s3n://bucket/jar /tmp
cd spark
sudo mv /tmp/*.jar .
# Run spark job
./bin/spark-submit —-master spark://MASTER_HOST:7077 --class "PACKAGE.CLASS" YOUR_JAR.jar JAR_PARAMs
# Terminate Cluster
aws emr terminate-clusters --cluster-ids j-jobid
