I'm going to practice on Word2Vec using gensim.
Gensim is an open-source library for unsupervised topic modeling and natural language processing, using modern statistical machine learning.
https://en.wikipedia.org/wiki/Gensim
I referred to this blog. Language is Korean.
from gensim.models import Word2Vec
import matplotlib.pyplot as plt
#this is a function that draws 2 dimension graph.
def plot_2d_graph(vocabs, xs,ys):
plt.figure(figsize=(8,6))
plt.scatter(xs,ys,marker='o')
for i, v in enumerate(vocabs):
plt.annotate(v,xy=(xs[i],ys[i]))
sentences = [
['this','is','a','good','product'],
['it','is','a','excellent','product'],
['it','is','a','bad','product'],
['that','is','the','worst','product'],
['you','are','beautiful'],
['you','are','nice'],
['he','is','nice'],
['she','is','beautiful'],
['we','are','good','team'],
['you','are','lovely'],
['you','are','pretty']
]
#It generates words and vectors from using sentences.
model = Word2Vec(sentences, size=300, window=3, min_count=1, workers=1)
#Get word vectors.
word_vectors=model.wv
vocabs = word_vectors.vocab.keys()
word_vectors_list = [word_vectors[v] for v in vocabs]
#Get similarity between 'this' and 'that'.
print(word_vectors.similarity(w1='this',w2='that'))
#The similarity between 'this' and 'that'.
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
xys=pca.fit_transform(word_vectors_list)
xs = xys[:,0]
ys = xys[:,1]
plot_2d_graph(vocabs,xs,ys)
#The plot shows vectored words.
#Merge it with the model that's already learned.
file_name = 'Downloads/GoogleNews-vectors-negative300.bin'
model.intersect_word2vec_format(fname=file_name, binary=True)
#Get word vectors.
word_vectors = model.wv
vocabs = word_vectors.vocab.keys()
word_vectors_list = [word_vectors[v] for v in vocabs]
#The similarity between 'this' and 'that'.
print(word_vectors.similarity(w1='this',w2='that'))
#We can show improvement of similarity between 'this' ans 'that'.
pca = PCA(n_components=2)
xys=pca.fit_transform(word_vectors_list)
xs = xys[:,0]
ys = xys[:,1]
#We can show improvement of similarity between 'this' ans 'that'.
plot_2d_graph(vocabs,xs,ys)