using avro-tools utility:
avro-tools tojson filename.avro | head -n 10
Using scala :
package org.sample.utils
import org.apache.spark.SparkConf
==> copy ssh keys between servers (edge <--> neo4j) | |
http://www.commandlinefu.com/commands/view/188/copy-your-ssh-public-key-to-a-server-from-a-machine-that-doesnt-have-ssh-copy-id | |
cat /home/user/id_neo4j_rsa.pub | ssh user@host.org 'cat >> ~/.ssh/authorized_keys' | |
cat /home/user/id_rsa.pub | ssh user@host.org 'cat >> ~/.ssh/authorized_keys' | |
==> generate keytab (if not available) | |
ktutil: addent -password -p user@host.org -k 1 -e rc4-hmac | |
Password for user@host.org: | |
ktutil: wkt /home/user/user.keytab |
import org.apache.tinkerpop.gremlin.structure.T | |
import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph | |
import org.apache.tinkerpop.gremlin.structure.Vertex | |
import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils | |
object TinkerpopGraphTest { | |
def main(args: Array[String]): Unit = { | |
val graph: org.apache.tinkerpop.gremlin.structure.Graph = TinkerGraph.open() | |
val marko: Vertex = graph.addVertex(T.label, "person", T.id, "1", "name", "marko", "age", "29") |
ps -eaf | grep user | grep -v bash | grep -v grep | grep -v "su - " | grep -v someTag| grep -v "ps \-eaf" | |
#ps -eaf | grep user | grep -v bash | grep -v grep | grep -v "su - " | grep -v someTag| grep -v "ps \-eaf" | awk -F " " '{print $2}' | xargs kill -9 | |
ps -eaf | grep user | grep -v bash | grep -v grep | grep -v "su - " | grep -v "someTag \-eaf" | awk -F " " '{print $2}' | xargs kill -9 |
Following are steps for running jupyter on hadoop cluster and connecting to it from local browser | |
Assuming you have a secured spark cluster created on linux | |
Assuming you have anaconda installed | |
## Steps for setting up Python Virtual Environments | |
Add conda to path (add following to ~/.bashrc file) | |
PATH=$PATH:/opt/anaconda/latest/bin/ | |
export PATH |
regex101.com | |
Capture tomcat log | |
2013-12-05 21:39:15,813 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Success! | |
/^([0-9]{4}-[0-9]{2}-[0-9]{2}\s[0-9:]{1,},[0-9]{3})\s\[([a-zA-Z]+)\]\s([a-zA-Z]{1,})\s+(.*)$/g | |
Extracting json fields from a file | |
grep -Po '"text":.*?[^\\]",' <file> | |
grep -Po '"TRANS_ID":.*?[^\\]",' <file> |
package org.rsol.log.util | |
import java.util.zip.GZIPOutputStream | |
import java.io.ByteArrayOutputStream | |
import java.util.zip.GZIPInputStream | |
import java.io.ByteArrayInputStream | |
import org.apache.commons.io.IOUtils | |
object GZipUtil extends App { |
val rdd = loadAvroData(sc,logPath,suffix).map(x => parseKV(x._2, kvPattern_quote).toMap) | |
convertToDF(sc,rdd) | |
def convertToDF(sc: SparkContext, rdd:RDD[Map[String,String]]): DataFrame = { | |
val sqlContext = new SQLContext(sc) | |
import sqlContext.implicits._ | |
val fields = Array("name","emp","dept","nick") | |
val schema = StructType(fields.map { x => StructField(x, StringType) }) | |
val result = rdd.map(x => (x.getOrElse(fields(0), ""),x.getOrElse(fields(1), ""),x.getOrElse(fields(2), ""),x.getOrElse(fields(3), ""))) | |
val df = result.toDF(fields:_*) |