Commit 194cab30 authored by Tharushika P.R's avatar Tharushika P.R

Upload New File

parent d7cf97d3
import pandas as pd
import numpy as np
# import seaborn as sns
import matplotlib.pyplot as plt
# for text pre-processing
import re, string
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import SnowballStemmer
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
#
# nltk.download('punkt')
# nltk.download('averaged_perceptron_tagger')
# nltk.download('wordnet')
# for model-building
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, f1_score, accuracy_score, confusion_matrix
from sklearn.metrics import roc_curve, auc, roc_auc_score
# bag of words
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
# for word embedding
# import gensim
# from gensim.models import Word2Vec # Word2Vec is mostly used for huge datasets
import os
import csv
os.chdir('../impact_of_news')
df_train = pd.read_csv('data/train.csv')
import csv
with open("data/train.csv") as f:
reader = csv.reader(f)
for row in reader:
print(" ".join(row))
print(df_train.shape)
# print(df_train)
#
# x = df_train['target'].value_counts()
# print(x)
# sns.barplot(x.index, x)
#
# df_train.isna().sum()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment