代码语言:javascript复制记录下
%matplotlib inline
from jupyterthemes import jtplot
jtplot.style(theme='grade3') #选择一个绘图主题
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
import adjustText
from gensim.models.keyedvectors import KeyedVectors
word_vectors = KeyedVectors.load_word2vec_format(
'C:/Users/yue/Desktop/1.bin',
binary = False, limit = 1000000)
def plot_2d_representation_of_words(
word_list,
word_vectors,
flip_x_axis = False,
flip_y_axis = False,
label_x_axis = "x",
label_y_axis = "y",
label_label = "fruit"):
pca = PCA(n_components = 2)
word_plus_coordinates=[]
for word in word_list:
current_row = []
current_row.append(word)
current_row.extend(word_vectors[word])
word_plus_coordinates.append(current_row)
word_plus_coordinates = pd.DataFrame(word_plus_coordinates)
coordinates_2d = pca.fit_transform(
word_plus_coordinates.iloc[:,1:300])
coordinates_2d = pd.DataFrame(
coordinates_2d, columns=[label_x_axis, label_y_axis])
coordinates_2d[label_label] = word_plus_coordinates.iloc[:,0]
if flip_x_axis:
coordinates_2d[label_x_axis] =
coordinates_2d[label_x_axis] * (-1)
if flip_y_axis:
coordinates_2d[label_y_axis] =
coordinates_2d[label_y_axis] * (-1)
plt.figure(figsize = (5, 3))
p1=sns.scatterplot(
data=coordinates_2d, x=label_x_axis, y=label_y_axis)
x = coordinates_2d[label_x_axis]
y = coordinates_2d[label_y_axis]
label = coordinates_2d[label_label]
texts = [plt.text(x[i], y[i], label[i]) for i in range(len(x))]
adjustText.adjust_text(texts)
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['STZhongsong'] # 指定默认字体:解决plot不能显示中文问题
mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
#fruits = ['apple','orange','banana','lemon','car','tram','boat','bicycle','cherry','mango','grape','durian','watermelon','train','motorbike','ship', 'peach','pear','pomegranate','strawberry','bike','bus','truck','subway','airplane']
fruits = ['苹果', '自行车', '香蕉', '汽车', '人']
plot_2d_representation_of_words(
word_list = fruits,
word_vectors = word_vectors,
flip_y_axis = True)
这是在jupyter notebook运行的,使用的是腾讯AI Lab的中文词向量,下载压缩包下来解压,最里面的txt改成bin文件
可以看到寓意之间的关系