Created
January 13, 2015 07:29
-
-
Save bigsnarfdude/713803847a7b6ba98890 to your computer and use it in GitHub Desktop.
emr stuff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install AWS cli, reference ~/.aws/config for the result | |
pip install --upgrade awscli | |
aws configure | |
# Create Cluster | |
aws emr create-cluster --ami-version VERSION --instance-groups InstanceGroupType=MASTER,InstanceCount=1,InstanceType=m1.medium InstanceGroupType=CORE,InstanceCount=1,InstanceType=m1.medium --no-auto-terminate --name spark_cluster --bootstrap-action Path=s3://elasticmapreduce/samples/spark/1.0.0/install-spark-shark.rb,Name=install_spark --ec2-attributes KeyName=AWS_IAM | |
# SSH to master node | |
aws emr ssh --cluster-id JOB_ID --key-pair-file AWS_IAM.pem | |
# Copy jar to be executed to master node | |
hadoop fs -get s3n://bucket/jar /tmp | |
cd spark | |
sudo mv /tmp/*.jar . | |
# Run spark job | |
./bin/spark-submit —-master spark://MASTER_HOST:7077 --class "PACKAGE.CLASS" YOUR_JAR.jar JAR_PARAMs | |
# Terminate Cluster | |
aws emr terminate-clusters --cluster-ids j-jobid |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment