Update 4.Text_Preprocessing.py

parent 381b4c9d
......@@ -9,8 +9,10 @@ from collections import Counter
import re
import unidecode
res = open("ResumeStr.txt").read()
tweet = open('Tweet.txt').read()
#nltk.download('stopwords')
res = open("Experience.txt",'r', encoding='utf-8').read()
tweet = open('Tweet.txt','r', encoding='utf-8').read()
#Lower case conversion
def lower_case_convertion(text):
......@@ -87,7 +89,7 @@ def lemmatization(text):
# initialize lemmatizer object
lemma = WordNetLemmatizer()
res = lemmatization(res)
tweet = lemmatization(tweet)
# Implementation of removing punctuations using string library
def remove_punctuation(text):
......@@ -227,7 +229,7 @@ tweet = '\n'.join(k)
#print(res)
#print(tweet)
with open("ClearText.txt", "w") as text_file:
with open("Experience.txt", "w") as text_file:
print(res, file=text_file)
with open("Tweet.txt", "w") as text_file:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment