Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
2022-231
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
2022-231
2022-231
Commits
b5e6c052
Commit
b5e6c052
authored
Oct 08, 2022
by
Tharushika P.R
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Delete news_classification.py
parent
9d040799
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
57 deletions
+0
-57
news_classification.py
news_classification.py
+0
-57
No files found.
news_classification.py
deleted
100644 → 0
View file @
9d040799
import
pandas
as
pd
import
numpy
as
np
# import seaborn as sns
import
matplotlib.pyplot
as
plt
# for text pre-processing
import
re
,
string
import
nltk
from
nltk.tokenize
import
word_tokenize
from
nltk.corpus
import
stopwords
from
nltk.tokenize
import
word_tokenize
from
nltk.stem
import
SnowballStemmer
from
nltk.corpus
import
wordnet
from
nltk.stem
import
WordNetLemmatizer
#
# nltk.download('punkt')
# nltk.download('averaged_perceptron_tagger')
# nltk.download('wordnet')
# for model-building
from
sklearn.model_selection
import
train_test_split
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.linear_model
import
SGDClassifier
from
sklearn.naive_bayes
import
MultinomialNB
from
sklearn.metrics
import
classification_report
,
f1_score
,
accuracy_score
,
confusion_matrix
from
sklearn.metrics
import
roc_curve
,
auc
,
roc_auc_score
# bag of words
from
sklearn.feature_extraction.text
import
TfidfVectorizer
from
sklearn.feature_extraction.text
import
CountVectorizer
# for word embedding
# import gensim
# from gensim.models import Word2Vec # Word2Vec is mostly used for huge datasets
import
os
import
csv
os
.
chdir
(
'../impact_of_news'
)
df_train
=
pd
.
read_csv
(
'data/train.csv'
)
import
csv
with
open
(
"data/train.csv"
)
as
f
:
reader
=
csv
.
reader
(
f
)
for
row
in
reader
:
print
(
" "
.
join
(
row
))
print
(
df_train
.
shape
)
# print(df_train)
#
# x = df_train['target'].value_counts()
# print(x)
# sns.barplot(x.index, x)
#
# df_train.isna().sum()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment