Commit c2ef0447 authored by Amuthini Kulatheepan's avatar Amuthini Kulatheepan

Delete make_test_set.py

parent 31724392
import os
import collections
import pandas as pd
import csv
import re
DATA_DIRECTORY = "data"
MBTI_CLEAN_CSV_PATH = os.path.join(DATA_DIRECTORY, "mbti_clean.csv")
DIMENSIONS = ("IE", "NS", "TF", "PJ")
df = pd.read_csv(MBTI_CLEAN_CSV_PATH)
for dimension in DIMENSIONS:
letter_1, letter_2 = dimension
for letter in [letter_1, letter_2]:
posts = []
for index, row in df.iterrows():
if letter in row["type"]:
hundred_posts = row["posts"].split("|||")
for post in hundred_posts:
if (
("http" in post)
or (post == "")
or (post == None)
or (not re.search("[a-zA-Z]", post))
): # ignore deformed posts
continue
posts.append(post)
test_csv_path = os.path.join(DATA_DIRECTORY, f"test_{letter}.csv")
with open(test_csv_path, "w", encoding="utf-8") as f:
writer = csv.writer(f)
for post in posts:
writer.writerow([post])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment