Commit d472f3e1 authored by Kavindu's avatar Kavindu

important word capture func commit

parent 704afb83
......@@ -12,4 +12,21 @@ with open('data/test1.txt') as file:
count_vect = CountVectorizer(stop_words=stopwords.words('english'), lowercase=True)
x_counts = count_vect.fit_transform(corpus)
x_counts.todense()
\ No newline at end of file
x_counts.todense()
x = count_vect.get_feature_names()
print(x)
tfidf_transformer = TfidfTransformer()
x_tfidf = tfidf_transformer.fit_transform(x_counts)
dimension = 1
lda = LDA(n_components=dimension)
lda_array = lda.fit_transform(x_tfidf)
lda_array
components = [lda.components_[i] for i in range(len(lda.components_))]
features = count_vect.get_feature_names()
important_words = [sorted(features, key=lambda x: components[j][features.index(x)], reverse=True)[:3] for j in
range(len(components))]
print(important_words)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment