Skip to content

Instantly share code, notes, and snippets.

@boronology
Last active September 16, 2016 10:23
Show Gist options
  • Save boronology/b15f0bffc4bbdd468c98c00c6251c13b to your computer and use it in GitHub Desktop.
Save boronology/b15f0bffc4bbdd468c98c00c6251c13b to your computer and use it in GitHub Desktop.
言語処理100本ノック http://www.cl.ecei.tohoku.ac.jp/nlp100/
#準備
wget http://www.cl.ecei.tohoku.ac.jp/nlp100/data/hightemp.txt
#10. 行数のカウント
wc -l hightemp.txt
#別解
grep -c "" hightemp.txt
#11. タブをスペースに置換
cat hightemp.txt |tr "\t" " "
#別解
sed hightemp.txt -e 's/\t/ /g'
sed hightemp.txt -e 'y/\t/ /'
#12. 1列目をcol1.txtに,2列目をcol2.txtに保存
cat hightemp.txt |cut -f 1 > col1.txt
cat hightemp.txt |cut -f 2 > col2.txt
#別解
cat hightemp.txt |awk '{print $1}' > col1.txt
cat hightemp.txt |awk '{print $2}' > col2.txt
#13. col1.txtとcol2.txtをマージ
paste col1.txt col2.txt > merged.txt
#14. 先頭からN行を出力
read N
head -n $N hightemp.txt
#別解
cat hightemp.txt |sed -n 1,${N}p
#15. 末尾のN行を出力
read N
tail -n $N hightemp.txt
#別解。かなり無理やり。
read N
cat hightemp.txt|awk "NR > $(($(grep -c "" hightemp.txt) - N)){print}"
#16. ファイルをN分割する
read N
cat hightemp.txt|split -l $((`grep -c "" hightemp.txt` / N))
#17. 1列目の文字列の異なり
cat hightemp.txt|cut -f 1 |sort |uniq |wc -l
#18. 各行を3コラム目の数値の降順にソート
cat hightemp.txt|sort -k3,3 -r
#19. 各行の1コラム目の文字列の出現頻度を求め,出現頻度の高い順に並べる
cat hightemp.txt |cut -f1 |sort |uniq -c|sort -r
#別解
cat hightemp.txt |awk '{print $1}' |sort |uniq -c|sort -r
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment