Skip to content

Instantly share code, notes, and snippets.

@boronology
Last active September 14, 2016 13:43
Show Gist options
  • Save boronology/114601690a43ac9c3a96b36a3a36cd47 to your computer and use it in GitHub Desktop.
Save boronology/114601690a43ac9c3a96b36a3a36cd47 to your computer and use it in GitHub Desktop.
言語処理100本ノック http://www.cl.ecei.tohoku.ac.jp/nlp100/
#00.文字列の逆順
s = ""
for c in "stressed":
s += c
print(s)
#01.「パタトクカシーー」
f = "パタトクカシーー"
s = ""
l = [1,3,5,7]
for i in l:
s += f[i]
print(s)
#02. 「パトカー」+「タクシー」=「パタトクカシーー」
f1 = "パトカー"
f2 = "タクシー"
s = ""
for i in range(len(f1)):
s += f1[i] + f2[i]
print(s)
#03. 円周率
#単にsplitでは句読点が邪魔になるので正規表現を使って除去
import re
regex = r'[^a-zA-Z ]'
f = re.sub(regex,"","Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics.")
l = list(map(len,f.split()))
print(l)
#04. 元素記号
f = "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can.".split()
l = [1, 5, 6, 7, 8, 9, 15, 16, 19]
a = {}
for i in range(len(f)):
if i+1 in l:
a[f[i][0]] = i
else:
a[f[i][0:2]] = i
print(a)
#05. n-gram
def ngram(seq,n):
l = []
for i in range(len(seq)-n):
l.append(seq[i:i+n])
return l
print(ngram("I am an NLPer".split(),2))
print(ngram("I am an NLPer",2))
#06. 集合
X = set(ngram("paraparaparadise",2))
Y = set(ngram("paragraph",2))
#和集合
print(X.union(Y))
#積集合
#print([s for s in X if s in Y])
print(X.intersection(Y))
#差集合(XにあってYにないもの)
#print([s for s in X if s not in Y])
print(X.difference(Y))
#差集合(YにあってXにないもの)
#print([s for s in Y if s not in X])
print(Y.difference(X))
#07. テンプレートによる文生成
def template_string(x,y,z):
return "{}時の{}は{}".format(x,y,z)
print(template_string(x=12, y="気温", z=22.4))
#08. 暗号文
def cipher(s):
a = ord('a')
z = ord('z')
r = ""
m = map(lambda x: chr(219 - ord(x)) if a <= ord(x) <= z else x,list(s))
for i in m:
r += i
return r
def decipher(s):
a = 219 - ord('a')
z = 219 - ord('z')
r = ""
m = map (lambda x: chr(219 - ord(x)) if z <= ord(x) <= a else x,list(s))
for i in m:
r += i
return r
#09. Typoglycemia
#これって各単語に対して「先頭と末尾の文字は残し,それ以外の文字の順序をランダムに並び替える」でいいんだよね?
import random
def shuffle(x):
random.shuffle(x)
return x
def list_to_str(x):
s = ""
for i in x:
s += i
return s
def typoglycemia(s):
l = list(map(list,s.split()))
t = []
for i in l:
if len(i) > 4:
t.append(i[0] + list_to_str(shuffle(i[1:-1])) + i[-1])
else:
t.append(list_to_str(i))
a = ""
for i in t:
a += (i + " ")
return a
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment