Commit fb778e1c authored by Amuthini Kulatheepan's avatar Amuthini Kulatheepan

Deleted Personality_prediction/.idea/inspectionProfiles/profiles_settings.xml,...

Deleted Personality_prediction/.idea/inspectionProfiles/profiles_settings.xml, Personality_prediction/.idea/.gitignore, Personality_prediction/.idea/Personality_prediction.iml, Personality_prediction/.idea/misc.xml, Personality_prediction/.idea/modules.xml, Personality_prediction/Personality_prediction/.idea/inspectionProfiles/profiles_settings.xml, Personality_prediction/sample_predictor.py, Personality_prediction/separate_clean_and_unclean.py, Personality_prediction/simple_rnn.py files
parent 0d80139b
# Default ignored files
/shelf/
/workspace.xml
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.8 (2)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (2)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/Personality_prediction.iml" filepath="$PROJECT_DIR$/.idea/Personality_prediction.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
import csv
import os
import pickle
import numpy as np
from keras.models import load_model
from keras.preprocessing import sequence
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
MODELS_DIRECTORY = "models"
DATA_DIRECTORY = "data/sample_data"
SAMPLE_TWEETS_PATH = os.path.join(DATA_DIRECTORY, "0xnickrodriguez_tweets.csv")
DIMENSIONS = ["IE", "NS", "FT", "PJ"]
DIMENSIONS_with_strings = ["Introversion Extroversion", "Intuition Sensing", "Feeling Thinking", "Perceiving Judging"]
MODEL_BATCH_SIZE = 128
TOP_WORDS = 2500
MAX_POST_LENGTH = 40
EMBEDDING_VECTOR_LENGTH = 20
final = ""
x_test = []
with open(SAMPLE_TWEETS_PATH, "r", encoding="ISO-8859-1") as f:
reader = csv.reader(f)
for row in f:
x_test.append(row)
types = [
"INFJ",
"ENTP",
"INTP",
"INTJ",
"ENTJ",
"ENFJ",
"INFP",
"ENFP",
"ISFP",
"ISTP",
"ISFJ",
"ISTJ",
"ESTP",
"ESFP",
"ESTJ",
"ESFJ",
]
types = [x.lower() for x in types]
lemmatizer = WordNetLemmatizer()
stop_words = stopwords.words("english")
def lemmatize(x):
lemmatized = []
for post in x:
temp = post.lower()
for type_ in types:
temp = temp.replace(" " + type_, "")
temp = " ".join(
[
lemmatizer.lemmatize(word)
for word in temp.split(" ")
if (word not in stop_words)
]
)
lemmatized.append(temp)
return np.array(lemmatized)
for k in range(len(DIMENSIONS)):
model = load_model(
os.path.join(MODELS_DIRECTORY, "rnn_model_{}.h5".format(DIMENSIONS[k]))
)
tokenizer = None
with open(
os.path.join(MODELS_DIRECTORY, "rnn_tokenizer_{}.pkl".format(DIMENSIONS[k])), "rb"
) as f:
tokenizer = pickle.load(f)
def preprocess(x):
lemmatized = lemmatize(x)
tokenized = tokenizer.texts_to_sequences(lemmatized)
return sequence.pad_sequences(tokenized, maxlen=MAX_POST_LENGTH)
predictions = model.predict(preprocess(x_test))
prediction = float(sum(predictions) / len(predictions))
print(DIMENSIONS_with_strings[k])
print(prediction)
if prediction >= 0.5:
final += DIMENSIONS[k][1]
print("Personality type - ", DIMENSIONS[k][1])
else:
final += DIMENSIONS[k][0]
print("Personality type - ", DIMENSIONS[k][0])
print("")
print("")
print("Personality Type of the Person : {} ".format(final))
import os
import collections
import pandas as pd
import csv
DATA_DIRECTORY = "data"
MBTI_RAW_CSV_PATH = os.path.join(DATA_DIRECTORY, "mbti_personality.csv")
MBTI_CLEAN_CSV_PATH = os.path.join(DATA_DIRECTORY, "mbti_personality_clean.csv")
MBTI_UNCLEAN_CSV_PATH = os.path.join(DATA_DIRECTORY, "mbti_personality_unclean.csv")
MBTI_TO_FREQUENCY_DICT = {
"ISTJ": 0.11,
"ISFJ": 0.09,
"INFJ": 0.04,
"INTJ": 0.05,
"ISTP": 0.05,
"ISFP": 0.05,
"INFP": 0.06,
"INTP": 0.06,
"ESTP": 0.04,
"ESFP": 0.04,
"ENFP": 0.08,
"ENTP": 0.06,
"ESTJ": 0.08,
"ESFJ": 0.09,
"ENFJ": 0.05,
"ENTJ": 0.05,
}
df = pd.read_csv(MBTI_RAW_CSV_PATH)
counts = collections.defaultdict(int)
for mbti in df["type"]:
counts[mbti] += 1
limiting_type = None
min_size = float("infinity")
for mbti in counts.keys():
size = counts[mbti] / MBTI_TO_FREQUENCY_DICT[mbti]
if size < min_size:
min_size = size
limiting_type = mbti
dic = collections.defaultdict(list)
for index, row in df.iterrows():
dic[row["type"]].append(row)
unclean_list = []
with open(MBTI_CLEAN_CSV_PATH, "w", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["type", "posts"])
for mbti in MBTI_TO_FREQUENCY_DICT.keys():
list1 = dic[mbti]
for x in range(0, int(round(min_size * MBTI_TO_FREQUENCY_DICT[mbti]))):
writer.writerow(list1[x])
unclean_list.append(
list1[int(round(min_size * MBTI_TO_FREQUENCY_DICT[mbti])) : len(list1)]
)
with open(MBTI_UNCLEAN_CSV_PATH, "w", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["type", "posts"])
for mbti in unclean_list:
for x in mbti:
writer.writerow(x)
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment