Delete sample_predictor.py

01d7244c · Amuthini Kulatheepan · fe509d64 · fe509d64
Commit 01d7244c authored Jul 09, 2021 by Amuthini Kulatheepan
Hide whitespace changes
Inline Side-by-side

Showing with 0 additions and 102 deletions

Personality_prediction/Personality_prediction/sample_predictor.py ...ity_prediction/Personality_prediction/sample_predictor.py +0 -102

No files found.
--- a/Personality_prediction/Personality_prediction/sample_predictor.py
+++ b/Personality_prediction/Personality_prediction/sample_predictor.py
-import csv
-import os
-import pickle
-
-import numpy as np
-from keras.models import load_model
-from keras.preprocessing import sequence
-from nltk.corpus import stopwords
-from nltk.stem import WordNetLemmatizer
-
-MODELS_DIRECTORY = "models"
-DATA_DIRECTORY = "data/sample_data"
-SAMPLE_TWEETS_PATH = os.path.join(DATA_DIRECTORY, "0xnickrodriguez_tweets.csv")
-
-DIMENSIONS = ["IE", "NS", "FT", "PJ"]
-DIMENSIONS_with_strings = ["Introversion Extroversion", "Intuition Sensing", "Feeling Thinking", "Perceiving Judging"]
-MODEL_BATCH_SIZE = 128
-TOP_WORDS = 2500
-MAX_POST_LENGTH = 40
-EMBEDDING_VECTOR_LENGTH = 20
-
-final = ""
-
-x_test = []
-with open(SAMPLE_TWEETS_PATH, "r", encoding="ISO-8859-1") as f:
-    reader = csv.reader(f)
-    for row in f:
-        x_test.append(row)
-
-types = [
-    "INFJ",
-    "ENTP",
-    "INTP",
-    "INTJ",
-    "ENTJ",
-    "ENFJ",
-    "INFP",
-    "ENFP",
-    "ISFP",
-    "ISTP",
-    "ISFJ",
-    "ISTJ",
-    "ESTP",
-    "ESFP",
-    "ESTJ",
-    "ESFJ",
-]
-types = [x.lower() for x in types]
-lemmatizer = WordNetLemmatizer()
-stop_words = stopwords.words("english")
-
-
-def lemmatize(x):
-    lemmatized = []
-    for post in x:
-        temp = post.lower()
-        for type_ in types:
-            temp = temp.replace(" " + type_, "")
-        temp = " ".join(
-            [
-                lemmatizer.lemmatize(word)
-                for word in temp.split(" ")
-                if (word not in stop_words)
-            ]
-        )
-        lemmatized.append(temp)
-    return np.array(lemmatized)
-
-
-for k in range(len(DIMENSIONS)):
-    model = load_model(
-        os.path.join(MODELS_DIRECTORY, "rnn_model_{}.h5".format(DIMENSIONS[k]))
-    )
-    tokenizer = None
-    with open(
-            os.path.join(MODELS_DIRECTORY, "rnn_tokenizer_{}.pkl".format(DIMENSIONS[k])), "rb"
-    ) as f:
-        tokenizer = pickle.load(f)
-
-
-    def preprocess(x):
-        lemmatized = lemmatize(x)
-        tokenized = tokenizer.texts_to_sequences(lemmatized)
-        return sequence.pad_sequences(tokenized, maxlen=MAX_POST_LENGTH)
-
-
-    predictions = model.predict(preprocess(x_test))
-    prediction = float(sum(predictions) / len(predictions))
-    print(DIMENSIONS_with_strings[k])
-    print(prediction)
-    if prediction >= 0.5:
-        final += DIMENSIONS[k][1]
-        print("Personality type - ", DIMENSIONS[k][1])
-
-    else:
-        final += DIMENSIONS[k][0]
-        print("Personality type - ", DIMENSIONS[k][0])
-
-    print("")
-
-print("")
-print("Personality Type of the Person : {}  ".format(final))