Skip to content

Instantly share code, notes, and snippets.

@chen206
Created September 13, 2016 03:35
Show Gist options
  • Save chen206/294043c2c2a556da27245a70aadfaf8f to your computer and use it in GitHub Desktop.
Save chen206/294043c2c2a556da27245a70aadfaf8f to your computer and use it in GitHub Desktop.
Shell scripts
0000ABC1111ABC2222,"id": "1001", "name": "Jack"
0000ABC1111ABC2222ABC3333,"id": "1002", "name": "Michael"
0000ABC1111ABC3333,"id": "1003", "name": "David"
0000ABC2222ABC3333,"id": "1001", "name": "Jack"
1111ABC2222ABC3333,"id": "1002", "name": "Michael"
0000ABC2222ABC3333,"id": "1003", "name": "David"
0000ABC1111,"id": "1003", "name": "David"
# 按id统计ABC出现的总次数,文件如上例
less infile | awk -F"profile_image_url" '{first=match($0, "\"gsid\": \"");last=match($0, "\", \"s\"");s=substr($0, first+9, last-first-9)}{print NF-1,s}' | awk '{Counter[$2]+=$1;} END {for(key in Counter)print key, Counter[key];}' | sort -nr
# 按第2列数值倒序,分隔符为^A(\1);将\1替换为\t输出
sort -k2 -rn -t $'\1' INFILE | tr '\001' '\t'
# 统计当前目录下每个文件第2列的sum(文件格式:filename sum)
for filename in *; do awk 'BEGIN{sum=0}{sum+=$2}END{print ARGV[1],sum}' $filename; done
# 定义输入输出分隔符
awk 'BEGIN{FS="\1";OFS="\t"} {print $1,$2,$3}' infile
#从file1中删除出现在file2中的数据
grep -v -x -f file2 file1
#按第二列统计第3列的sum
awk -F"\1" '{Counter[$2]+=$3;} END {for(key in Counter)print key", "Counter[key];}' infile
#截取100到1000行
sed -n 100,1000p infile > outfile
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment