from gensim.models import KeyedVectors
We will load only first 1000 (top 1000) vectors from python fasttext (128) model
model = KeyedVectors.load_word2vec_format("../model.vec", binary=False, limit=1000)
In case you are loading GloVe embeddings, you need to convert it first
tmpfile=get_tmpfile("source2vec")
glove2word2vec(datapath("../glove_model.txt"), tmpfile)
model = KeyedVectors.load_word2vec_format(tmpfile, binary=False, limit=1000)
Now we can do fancy staff
model.most_similar("for")
[('in', 0.8999497890472412), ('enumerate', 0.7851556539535522), ('and', 0.6600955724716187), ('set', 0.6515539884567261), ('range', 0.6467443704605103), ('the', 0.6438981294631958), ('are', 0.6360925436019897), ('len', 0.6327202916145325), ('or', 0.625091552734375), ('list', 0.6236062049865723)]
model.most_similar_cosmul("for")
[('in', 0.949974000453949), ('enumerate', 0.8925769925117493), ('and', 0.8300470113754272), ('set', 0.825776219367981), ('range', 0.823371410369873), ('the', 0.8219482898712158), ('are', 0.8180454969406128), ('len', 0.8163593411445618), ('or', 0.8125450015068054), ('list', 0.811802327632904)]
model.doesnt_match(["for", "i", "a"])
'a'
model.similarity("for", "i")
0.6013880741598091
model.similar_by_word("for")
[('in', 0.8999497890472412), ('enumerate', 0.7851556539535522), ('and', 0.6600955724716187), ('set', 0.6515539884567261), ('range', 0.6467443704605103), ('the', 0.6438981294631958), ('are', 0.6360925436019897), ('len', 0.6327202916145325), ('or', 0.625091552734375), ('list', 0.6236062049865723)]
Raw vector values
for word in model.vocab:
print(word, ":", model[word])
break # only first one now
</s> : [-0.0040119 -0.2772 0.39069 -0.11142 -0.064213 0.031526 0.072355 0.28111 0.098242 0.44947 0.0033427 0.059818 0.10572 0.033005 -0.16825 0.027287 -0.014041 -0.13167 0.20144 0.097083 0.13253 0.09556 -0.12805 0.10373 -0.12057 0.36752 -0.13177 -0.070997 -0.079466 0.29838 -0.066887 -0.069284 -0.26501 0.21408 0.020991 -0.34294 -0.3189 -0.1705 0.11337 -0.22872 -0.024095 0.069369 -0.31733 0.63158 0.084219 -0.23931 -0.17847 -0.38957 -0.038808 -0.046805 -0.20444 -0.15775 -0.12279 -0.014646 -0.10996 -0.060379 -0.16898 -0.0048211 -0.57151 0.18944 0.11457 -0.2425 -0.08871 -0.054677 -0.2549 -0.15642 0.12891 -0.27773 0.10004 -0.46064 0.25698 0.039099 0.24376 -0.14525 -0.27021 0.018427 0.046646 -0.090066 0.1492 0.0032186 -0.15175 -0.11093 0.35132 -0.068802 0.0021299 0.29755 -0.19092 0.0321 -0.086515 0.36746 -0.15456 -0.051887 0.63347 0.02882 0.3993 -0.20558 0.08532 0.10247 -0.056457 -0.12951 -0.28994 0.15222 0.16311 -0.22158 0.032566 -0.38924 -0.20935 -0.12184 0.064111 -0.11226 0.10365 0.065956 -0.064537 0.072354 -0.029869 0.016191 0.22993 0.03368 -0.074305 0.080369 0.062322 0.14384 0.059349 0.25721 -0.016504 0.034721 -0.35689 -0.20129 ]