Skip to content

Instantly share code, notes, and snippets.

@warantesbr
Last active February 11, 2016 21:37
Show Gist options
  • Save warantesbr/088fa2d8139585df31bb to your computer and use it in GitHub Desktop.
Save warantesbr/088fa2d8139585df31bb to your computer and use it in GitHub Desktop.
Snowplow Storage Loader Files
#!/bin/bash
/vagrant/4-storage/storage-loader/deploy/snowplow-storage-loader -c /vagrant/0-config/snowplow-storage-loader.yml
aws:
access_key_id: XXXXXXXXXXXXXXXXXXXX
secret_access_key: YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY
s3:
region: us-west-2
buckets:
jsonpath_assets: s3://snowplow-etl-runner-test/jsonpaths-file/
assets: s3://snowplow-hosted-assets
log: s3n://snowplow-etl-runner-test/etl/logs/
raw:
in:
- s3n://snowplow-logs-test/
processing: s3n://snowplow-etl-runner-test/raw/processing/
archive: s3://snowplow-etl-runner-test/raw/archive
enriched:
good: s3://snowplow-etl-runner-test/enriched/good/
bad: s3://snowplow-etl-runner-test/enriched/bad
errors: s3://snowplow-etl-runner-test/enriched/errors
archive: s3://snowplow-etl-runner-test/enriched/archive
shredded:
good: s3://snowplow-etl-runner-test/shredded/good/
bad: s3://snowplow-etl-runner-test/shredded/bad
errors: s3://snowplow-etl-runner-test/shredded/errors
archive: s3://snowplow-etl-runner-test/shredded/archive
emr:
ami_version: 3.6.0
region: us-west-2
jobflow_role: EMR_EC2_DefaultRole # Created using $ aws emr create-default-roles
service_role: EMR_DefaultRole # Created using $ aws emr create-default-roles
placement: us-west-2a # Set this if not running in VPC. Leave blank otherwise
ec2_subnet_id: # Set this if running in VPC. Leave blank otherwise
ec2_key_name: "my_key"
bootstrap: []
software:
lingual: "1.1"
jobflow:
master_instance_type: m1.large
core_instance_count: 3
core_instance_type: m1.large
task_instance_count: 5
task_instance_type: m1.large
task_instance_bid: 0.015
bootstrap_failure_tries: 3
collectors:
format: clj-tomcat # Or 'clj-tomcat' for the Clojure Collector, or 'thrift' for Thrift records, or 'tsv/com.amazon.aws.cloudfront/wd_access_log' for Cloudfront access logs
enrich:
job_name: Snowplow ETL
versions:
hadoop_enrich: 1.5.1
hadoop_shred: 0.7.0
hadoop_elasticsearch: 0.1.0
continue_on_unexpected_error: true # Set to 'true' (and set out_errors: above) if you don't want any exceptions thrown from ETL
output_compression: NONE # Compression only supported with Redshift, set to NONE if you have Postgres targets. Allowed formats: NONE, GZIP
storage:
download:
folder: "tempdir" # Postgres-only config option. Where to store the downloaded files. Leave blank for Redshift
targets:
- name: "snowplow-rds-postgres-test"
type: postgres
host: "snowplow-rds-postgres-test.dfajksdhkasj.us-west-2.rds.amazonaws.com"
database: snowplow
port: 5432
table: atomic.events
username: storageloader
password: "password"
monitoring:
tags: {}
logging:
level: DEBUG # You can optionally switch to INFO for production
snowplow:
method: get
app_id: "snowplow-storage-loader"
collector: "snowplow-clojure-collector.elasticbeanstalk.com"
{
"schema": "iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-0",
"data": {
"cacheSize": 500,
"repositories": [
{
"name": "Iglu Central",
"priority": 0,
"vendorPrefixes": [ "com.snowplowanalytics" ],
"connection": {
"http": {
"uri": "http://iglucentral.com"
}
}
},
{
"name": "MyCompany",
"priority": 5,
"vendorPrefixes": [ "br.com.mycompany" ],
"connection": {
"http": {
"uri": "http://snowplow-iglu.s3.amazonaws.com"
}
}
}
]
}
}
{
"$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#",
"description": "Context for events on mycompany_portal",
"self": {
"vendor": "br.com.mycompany",
"name": "portal_context",
"format": "jsonschema",
"version": "1-0-0"
},
"type": "object",
"properties": {
"foo": {
"type": "string"
},
"bar": {
"type": "string"
},
"baz": {
"type": "string"
}
},
"additionalProperties": false
}
{
"jsonpaths": [
"$.schema.vendor",
"$.schema.name",
"$.schema.format",
"$.schema.version",
"$.hierarchy.rootId",
"$.hierarchy.rootTstamp",
"$.hierarchy.refRoot",
"$.hierarchy.refTree",
"$.hierarchy.refParent",
"$.data.foo",
"$.data.bar",
"$.data.baz"
]
}
CREATE SCHEMA IF NOT EXISTS "atomic";
CREATE TABLE IF NOT EXISTS "atomic"."br_com_mycompany_portal_context_1" (
-- Schema of this type
"schema_vendor" varchar(128) NOT NULL,
"schema_name" varchar(128) NOT NULL,
"schema_format" varchar(128) NOT NULL,
"schema_version" varchar(128) NOT NULL,
-- Parentage of this type
"root_id" char(36) NOT NULL,
"root_tstamp" timestamp NOT NULL,
"ref_root" varchar(255) NOT NULL,
"ref_tree" varchar(1500) NOT NULL,
"ref_parent" varchar(255) NOT NULL,
-- Properties of this type
"foo" varchar(255),
"bar" varchar(255),
"baz" varchar(255)
)
WITH (OIDS=FALSE);
COMMENT ON TABLE "atomic"."br_com_mycompany_portal_context_1" IS '0.1.0';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment