Created November 9, 2017 19:00
##Tenny Susanto
##download messages from Kafka partition by partition
##do not use this in a Kakfa queue that is constantly receiving new messages
##this should only be used for manual pull for historical data done by platform team (they dump the data into Kafka once and don't write to it)
##This file contains the login/password to the Kafka server
export KAFKA_OPTS=""
##get first offset for each partition
/usr/bin/kafka-run-class --broker-list broker1:10002 --topic yourtopic --time -2 | awk -F: '{ print $2 "\t" $3}' | sort -k1 -n > first_offset
##get last offset for each partition
/usr/bin/kafka-run-class --broker-list broker1:10002 --topic yourtopic --time -1 | awk -F: '{ print $2 "\t" $3}' | sort -k1 -n > last_offset
##get the number of messages in each partition
paste first_offset last_offset | awk '{print $1,$4-$2}' > output
##Get all the messages for each partition
while read -r partition rowcount;
filename=$(printf %03d $partition)
echo $partition $rowcount $filename
/usr/bin/kafka-console-consumer --new-consumer --bootstrap-server broker1:9092,broker2:9092,broker3:9092 --topic yourtopic --from-beginning --max-messages $rowcount --partition $partition --consumer.config /path/consumer.cfg > partition_${filename}
done < output
