Last active
September 16, 2016 10:23
-
-
Save boronology/b15f0bffc4bbdd468c98c00c6251c13b to your computer and use it in GitHub Desktop.
言語処理100本ノック http://www.cl.ecei.tohoku.ac.jp/nlp100/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#準備 | |
wget http://www.cl.ecei.tohoku.ac.jp/nlp100/data/hightemp.txt | |
#10. 行数のカウント | |
wc -l hightemp.txt | |
#別解 | |
grep -c "" hightemp.txt | |
#11. タブをスペースに置換 | |
cat hightemp.txt |tr "\t" " " | |
#別解 | |
sed hightemp.txt -e 's/\t/ /g' | |
sed hightemp.txt -e 'y/\t/ /' | |
#12. 1列目をcol1.txtに,2列目をcol2.txtに保存 | |
cat hightemp.txt |cut -f 1 > col1.txt | |
cat hightemp.txt |cut -f 2 > col2.txt | |
#別解 | |
cat hightemp.txt |awk '{print $1}' > col1.txt | |
cat hightemp.txt |awk '{print $2}' > col2.txt | |
#13. col1.txtとcol2.txtをマージ | |
paste col1.txt col2.txt > merged.txt | |
#14. 先頭からN行を出力 | |
read N | |
head -n $N hightemp.txt | |
#別解 | |
cat hightemp.txt |sed -n 1,${N}p | |
#15. 末尾のN行を出力 | |
read N | |
tail -n $N hightemp.txt | |
#別解。かなり無理やり。 | |
read N | |
cat hightemp.txt|awk "NR > $(($(grep -c "" hightemp.txt) - N)){print}" | |
#16. ファイルをN分割する | |
read N | |
cat hightemp.txt|split -l $((`grep -c "" hightemp.txt` / N)) | |
#17. 1列目の文字列の異なり | |
cat hightemp.txt|cut -f 1 |sort |uniq |wc -l | |
#18. 各行を3コラム目の数値の降順にソート | |
cat hightemp.txt|sort -k3,3 -r | |
#19. 各行の1コラム目の文字列の出現頻度を求め,出現頻度の高い順に並べる | |
cat hightemp.txt |cut -f1 |sort |uniq -c|sort -r | |
#別解 | |
cat hightemp.txt |awk '{print $1}' |sort |uniq -c|sort -r |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment