An Open Source GenSIm Word2Vec Plotting Libarary in python¶
In [15]:
import pandas as pd
import os
import numpy as np
from gensim.models import Word2Vec
from sklearn.decomposition import PCA
import pandas as pd
import os
import numpy as np
from gensim.models import Word2Vec
import matplotlib.pyplot as plt
%matplotlib inline
model = Word2Vec.load("word2vec.model")
In [32]:
helper = GensimWord2vecPlotter(model=model)
In [33]:
helper.plot_catter()
In [34]:
helper.plot_scatter_words(Size=30)
In [35]:
helper.getPandasDF().head(5)
Out[35]:
In [29]:
from sklearn.decomposition import PCA
import pandas as pd
import os
import numpy as np
from gensim.models import Word2Vec
class GensimWord2vecPlotter(object):
__slots__ = ["model", "words", "result", "_tem"]
def __init__(self, model):
self.model = model
self.words = None
self.result = None
self._tem = self.preprocess()
def plot_scatter_words(self, Size=80):
SIZE = Size
words = self.words
result = self.result
for i, word in enumerate(words):
if i == SIZE:
break
plt.annotate(word,
xy=(result[i, 0], result[i, 1]),
horizontalalignment='left',verticalalignment='bottom')
plt.scatter(result[:SIZE, 0], result[:SIZE, 1],s=(40,))
plt.title("Skills of Candidates")
plt.grid(True, alpha=1)
plt.legend()
plt.show()
def preprocess(self):
"""
Pre Processing sets all the variable in constructor
:return: None
"""
model = self.model
X = model[model.wv.vocab]
pca = PCA(n_components=2)
result = pca.fit_transform(X)
words = list(model.wv.vocab)
self.words = words
self.result = result
def getPandasDF(self):
result = self.result
words = self.words
x = [result[i, 0] for i, word in enumerate(words) ]
y = [result[i, 1] for i, word in enumerate(words) ]
wordsdf = pd.DataFrame(data={
"words":words,
"x":x,
"y":y
})
return wordsdf
def plot_catter(self):
model = self.model
X = model[model.wv.vocab]
result = self.result
words = self.words
x = [result[i, 0] for i, word in enumerate(words) ]
y = [result[i, 1] for i, word in enumerate(words) ]
wordsdf = pd.DataFrame(data={
"words":words,
"x":x,
"y":y
})
wordsdf.plot.scatter("x", "y", s=10, figsize=(20, 12))
No comments:
Post a Comment