Last active
September 14, 2016 13:43
-
-
Save boronology/114601690a43ac9c3a96b36a3a36cd47 to your computer and use it in GitHub Desktop.
言語処理100本ノック http://www.cl.ecei.tohoku.ac.jp/nlp100/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#00.文字列の逆順 | |
s = "" | |
for c in "stressed": | |
s += c | |
print(s) | |
#01.「パタトクカシーー」 | |
f = "パタトクカシーー" | |
s = "" | |
l = [1,3,5,7] | |
for i in l: | |
s += f[i] | |
print(s) | |
#02. 「パトカー」+「タクシー」=「パタトクカシーー」 | |
f1 = "パトカー" | |
f2 = "タクシー" | |
s = "" | |
for i in range(len(f1)): | |
s += f1[i] + f2[i] | |
print(s) | |
#03. 円周率 | |
#単にsplitでは句読点が邪魔になるので正規表現を使って除去 | |
import re | |
regex = r'[^a-zA-Z ]' | |
f = re.sub(regex,"","Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics.") | |
l = list(map(len,f.split())) | |
print(l) | |
#04. 元素記号 | |
f = "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can.".split() | |
l = [1, 5, 6, 7, 8, 9, 15, 16, 19] | |
a = {} | |
for i in range(len(f)): | |
if i+1 in l: | |
a[f[i][0]] = i | |
else: | |
a[f[i][0:2]] = i | |
print(a) | |
#05. n-gram | |
def ngram(seq,n): | |
l = [] | |
for i in range(len(seq)-n): | |
l.append(seq[i:i+n]) | |
return l | |
print(ngram("I am an NLPer".split(),2)) | |
print(ngram("I am an NLPer",2)) | |
#06. 集合 | |
X = set(ngram("paraparaparadise",2)) | |
Y = set(ngram("paragraph",2)) | |
#和集合 | |
print(X.union(Y)) | |
#積集合 | |
#print([s for s in X if s in Y]) | |
print(X.intersection(Y)) | |
#差集合(XにあってYにないもの) | |
#print([s for s in X if s not in Y]) | |
print(X.difference(Y)) | |
#差集合(YにあってXにないもの) | |
#print([s for s in Y if s not in X]) | |
print(Y.difference(X)) | |
#07. テンプレートによる文生成 | |
def template_string(x,y,z): | |
return "{}時の{}は{}".format(x,y,z) | |
print(template_string(x=12, y="気温", z=22.4)) | |
#08. 暗号文 | |
def cipher(s): | |
a = ord('a') | |
z = ord('z') | |
r = "" | |
m = map(lambda x: chr(219 - ord(x)) if a <= ord(x) <= z else x,list(s)) | |
for i in m: | |
r += i | |
return r | |
def decipher(s): | |
a = 219 - ord('a') | |
z = 219 - ord('z') | |
r = "" | |
m = map (lambda x: chr(219 - ord(x)) if z <= ord(x) <= a else x,list(s)) | |
for i in m: | |
r += i | |
return r | |
#09. Typoglycemia | |
#これって各単語に対して「先頭と末尾の文字は残し,それ以外の文字の順序をランダムに並び替える」でいいんだよね? | |
import random | |
def shuffle(x): | |
random.shuffle(x) | |
return x | |
def list_to_str(x): | |
s = "" | |
for i in x: | |
s += i | |
return s | |
def typoglycemia(s): | |
l = list(map(list,s.split())) | |
t = [] | |
for i in l: | |
if len(i) > 4: | |
t.append(i[0] + list_to_str(shuffle(i[1:-1])) + i[-1]) | |
else: | |
t.append(list_to_str(i)) | |
a = "" | |
for i in t: | |
a += (i + " ") | |
return a |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment