important word capture func commit

d472f3e1 · Kavindu · 704afb83 · d472f3e1
Commit d472f3e1 authored Oct 27, 2023 by Kavindu
Hide whitespace changes
Inline Side-by-side

Showing with 18 additions and 1 deletion

Objective_3/test.py Objective_3/test.py +18 -1

No files found.
--- a/Objective_3/test.py
+++ b/Objective_3/test.py
@@ -12,4 +12,21 @@ with open('data/test1.txt') as file:

 count_vect = CountVectorizer(stop_words=stopwords.words('english'), lowercase=True)
 x_counts = count_vect.fit_transform(corpus)
-x_counts.todense()
\ No newline at end of file
+x_counts.todense()
+
+x = count_vect.get_feature_names()
+print(x)
+
+tfidf_transformer = TfidfTransformer()
+x_tfidf = tfidf_transformer.fit_transform(x_counts)
+
+dimension = 1
+lda = LDA(n_components=dimension)
+lda_array = lda.fit_transform(x_tfidf)
+lda_array
+
+components = [lda.components_[i] for i in range(len(lda.components_))]
+features = count_vect.get_feature_names()
+important_words = [sorted(features, key=lambda x: components[j][features.index(x)], reverse=True)[:3] for j in
+                   range(len(components))]
+print(important_words)
\ No newline at end of file