Skip to content

Instantly share code, notes, and snippets.

@Witty-Kitty
Created February 18, 2019 16:38
Show Gist options
  • Save Witty-Kitty/c82a4213794a4328e765d182c698690e to your computer and use it in GitHub Desktop.
Save Witty-Kitty/c82a4213794a4328e765d182c698690e to your computer and use it in GitHub Desktop.
Read in word vectors and visualize using TSNE
import gensim
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
model = gensim.models.KeyedVectors.load_word2vec_format('MUSE/dumped/debug/8gywilp7r1/vectors-rw1.txt')
model2 = gensim.models.KeyedVectors.load_word2vec_format('MUSE/dumped/debug/8gywilp7r1/vectors-lg1.txt')
lg_dict = {
"katonda": "god",
"emu": "one",
"kabaka": "king",
"mutabani" : "son",
"mukazi":"son-in-law",
"bantu":"people",
"lukalu":"land",
"ttaka":"land",
"ettaka":"land",
"nsi":"land",
"olukalu":"land",
"nnyumba":"house",
"ennyumba":"house",
"omulenzi":"son",
"taata":"father",
"ekintu":"thing",
"engoye":"thing",
"kintu":"thing",
"mukono":"hand",
"omukono":"hand",
"kifo":"place",
"ekifo":"place",
"ekibuga":"city",
"kibuga":"city",
"ensi":"earth",
"erinnya":"name",
"lunaku":"day",
"misana":"day",
"emisana":"day",
"olunaku":"day",
"mutima":"heart",
"ekigambo":"word",
"kigambo":"word",
"byombi":"two",
"mannyo":"two",
"ekikoola":"two",
"erinnyo":"two",
"liiso":"eye",
"eriiso":"eye",
"nnyindo":"nose",
"ennyindo":"nose",
"omukwano":"love",
"mukwano":"love",
"mwanyina":"brother",
"kwagalana":"to-love-each-other",
"omuntu":"man",
"omusajja":"man",
"musajja":"man",
}
rw_dict = {
"imana": "god",
"rimwe": "one",
"umwami": "king",
"mwene" : "son",
"umukwe":"son-in-law",
"rubanda":"people",
"isambu":"land",
"igihugu":"land",
"ubutaka":"land",
"inzu":"house",
"akazu":"house",
"baba":"father",
"ikintu":"thing",
"akantu":"small-thing",
"ikiganza":"hand",
"umukono":"hand",
"ahantu":"place",
"igitaka":"earth",
"isi":"earth",
"izina":"name",
"umunsi":"day",
"umutima":"heart",
"ijambo":"word",
"ebyiri":"two",
"kabiri":"two",
"ijisho":"eye",
"izuru":"nose",
"ishyanga":"foreign-country",
"urukundo":"love",
"musaza":"brother-of-a-female",
"umugambi":"plan",
"umugambanyi":"traitor",
"umusaza":"old-man",
"umusore":"male-teenager",
"data":"paternal-uncle",
"sebukwe":"father-in-law-of-someone-else",
"sobukwe":"father-in-law-of-person-being-spoken-to",
"kurandata":"to-lead-by-the-hand",
"gukunda":"to-like-love",
"gukundana":"to-like-or-love-each-other",
"gukundwa":"to-be-loved"
}
def tsne_plot(model1, model2):
"Creates and TSNE model and plots it"
labels1 = []
tokens1 = []
labels2 = []
tokens2 = []
for word in model1.wv.vocab:
tokens1.append(model1[word])
labels1.append(word)
for word in model2.wv.vocab:
tokens2.append(model2[word])
labels2.append(word)
tsne_model1 = TSNE(perplexity=50, n_components=2, init='pca', n_iter=2500, random_state=23)
new_values1 = tsne_model1.fit_transform(tokens1)
tsne_model2 = TSNE(perplexity=50, n_components=2, init='pca', n_iter=2500, random_state=23)
new_values2 = tsne_model2.fit_transform(tokens2)
x1 = []
y1 = []
for value in new_values1:
x1.append(value[0])
y1.append(value[1])
x2 = []
y2 = []
for value in new_values2:
x2.append(value[0])
y2.append(value[1])
plt.figure(figsize=(16, 16))
for i in range(len(x1)):
translation = ''
if (labels1[i]) in (rw_dict.keys()):
translation = rw_dict[labels1[i]]
else:
translation = "null"
plt.scatter(x1[i],y1[i],c='b')
plt.annotate(labels1[i] + " - " + translation,
xy=(x1[i], y1[i]),
xytext=(5, 2),
textcoords='offset points',
ha='right',
va='bottom')
for i in range(len(x2)):
translation = ''
if (labels2[i]) in (lg_dict.keys()):
translation = lg_dict[labels2[i]]
else:
translation = "null"
plt.scatter(x2[i],y2[i],c='g')
plt.annotate(labels2[i] + " - " + translation,
xy=(x2[i], y2[i]),
xytext=(5, 2),
textcoords='offset points',
ha='right',
va='bottom')
plt.savefig('shared-filtered.png')
plt.show()
tsne_plot(model, model2)
@EVANE-hub
Copy link

I love it. from Gabon

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment