Notebook
%%time lsi = lsimodel.LsiModel(corpus=corpus_tfidf, id2word=dictionary, num_topics=50)topics = lsi.show_topics()for t in range(5): print('=====') print(topics[t][1])corpus_lsi = lsi[corpus] index = similarities.MatrixSimilarity(lsi[corpus]) sims = index[corpus_lsi] sims = (sims + 1)/2.quert = u"поисковый запрос" vec_bow = dictionary.doc2bow(doc.lower().split()) vec_lsi = lsi[vec_bow] # convert the query to LSI spacesims = index[vec_lsi] print(list(enumerate(sims))) # print (document_number, document_similarity) 2-tuples