Upload New File

parent 4855344e
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"id": "changing-opening",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import make_classification\n",
"from matplotlib import pyplot as plt\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import confusion_matrix\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.model_selection import GridSearchCV\n",
"import pandas as pd\n",
"import numpy as np\n",
"import pickle\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n",
"from sklearn.preprocessing import OneHotEncoder"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "floral-tours",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>question no</th>\n",
" <th>answer</th>\n",
" <th>emotion</th>\n",
" <th>reason</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>9 hours</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>heavy</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>between 1-2 hours</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>friendly staff</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>satisfy</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" question no answer emotion reason\n",
"0 1 9 hours 6 5\n",
"1 2 heavy 1 5\n",
"2 3 between 1-2 hours 6 5\n",
"3 4 friendly staff 3 5\n",
"4 5 satisfy 6 5"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv('data/reason_data_up.csv')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "asian-passenger",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'df' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"Input \u001b[1;32mIn [2]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mquestion no\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mdf\u001b[49m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mquestion no\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstr\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 2\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124manswer\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124manswer\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstr\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 3\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124memotion\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124memotion\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstr\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
"\u001b[1;31mNameError\u001b[0m: name 'df' is not defined"
]
}
],
"source": [
"df['question no'] = df['question no'].astype('str')\n",
"df['answer'] = df['answer'].astype('str')\n",
"df['emotion'] = df['emotion'].astype('str')\n",
"df[['question no','answer','emotion']] = df[['question no','answer','emotion']].apply(LabelEncoder().fit_transform)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "fallen-continuity",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>question no</th>\n",
" <th>answer</th>\n",
" <th>emotion</th>\n",
" <th>reason</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>19</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>30</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>22</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>27</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>39</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1010</th>\n",
" <td>0</td>\n",
" <td>19</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1011</th>\n",
" <td>1</td>\n",
" <td>35</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1012</th>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1013</th>\n",
" <td>3</td>\n",
" <td>27</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1014</th>\n",
" <td>4</td>\n",
" <td>38</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1015 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" question no answer emotion reason\n",
"0 0 19 5 5\n",
"1 1 30 1 5\n",
"2 2 22 5 5\n",
"3 3 27 2 5\n",
"4 4 39 5 5\n",
"... ... ... ... ...\n",
"1010 0 19 5 5\n",
"1011 1 35 5 5\n",
"1012 2 4 5 5\n",
"1013 3 27 2 5\n",
"1014 4 38 5 5\n",
"\n",
"[1015 rows x 4 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "breeding-tuition",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>question no</th>\n",
" <th>answer</th>\n",
" <th>emotion</th>\n",
" <th>reason</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1015.000000</td>\n",
" <td>1015.000000</td>\n",
" <td>1015.000000</td>\n",
" <td>1015.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>2.000000</td>\n",
" <td>24.922167</td>\n",
" <td>3.901478</td>\n",
" <td>3.201970</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.414911</td>\n",
" <td>11.073488</td>\n",
" <td>1.694680</td>\n",
" <td>1.561872</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1.000000</td>\n",
" <td>17.000000</td>\n",
" <td>2.000000</td>\n",
" <td>2.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>2.000000</td>\n",
" <td>27.000000</td>\n",
" <td>5.000000</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>3.000000</td>\n",
" <td>35.000000</td>\n",
" <td>5.000000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>4.000000</td>\n",
" <td>39.000000</td>\n",
" <td>5.000000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" question no answer emotion reason\n",
"count 1015.000000 1015.000000 1015.000000 1015.000000\n",
"mean 2.000000 24.922167 3.901478 3.201970\n",
"std 1.414911 11.073488 1.694680 1.561872\n",
"min 0.000000 0.000000 0.000000 1.000000\n",
"25% 1.000000 17.000000 2.000000 2.000000\n",
"50% 2.000000 27.000000 5.000000 4.000000\n",
"75% 3.000000 35.000000 5.000000 5.000000\n",
"max 4.000000 39.000000 5.000000 5.000000"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "beautiful-explanation",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 1015 entries, 0 to 1014\n",
"Data columns (total 4 columns):\n",
" # Column Non-Null Count Dtype\n",
"--- ------ -------------- -----\n",
" 0 question no 1015 non-null int64\n",
" 1 answer 1015 non-null int64\n",
" 2 emotion 1015 non-null int64\n",
" 3 reason 1015 non-null int64\n",
"dtypes: int64(4)\n",
"memory usage: 31.8 KB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "excited-ethernet",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEICAYAAABPgw/pAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAcIklEQVR4nO3dfZRddX3v8feHMMBcHpxiZkGeZCpiqvIU7zFC42pzqZqEROGy7CpWpdCWFKW3eC9CiXp5sKVwS69Xllk1DWgRQRRLzOIKGLOqVEACTh4gYsytSGwekAykk4cyAgnf+8f+BU7OnDPnzMyZOZnffF5rnZVzfvu39/7uc37nc/bZZ0+2IgIzMxv7Dml1AWZm1hwOdDOzTDjQzcwy4UA3M8uEA93MLBMOdDOzTDjQxxFJ10q6Y5TWNUvSv0raI+nc0VhnoyQ9IOmPWl3HQCTNlrSl1XW0iqQuSSHp0CHO/2lJtza7roOdA70KSe+R9CNJOyXtkPSIpHcNc5kXSnq4ou02SX89vGr7rec2SS+nIN0haaWk3xrCcjZJeu8wSvkcsDgijoqI5cNYzrBU+xCLiHkR8dVW1WTNVe3DLyL+JiL+tFU1tYoDvYKkY4DvAF8EjgWmANcBL7WyrmoG2Hv524g4CpgKbAduG7WiXncC8FQL1jvuDHUvtpXGYs1jQkT4VnYDSkBvnT4XAxuA3cBPgXem9quAp8va/2tqfxvwa2AfsAfoBRYCrwAvp7b/m/pOBu4BeoBngL8oW++1wD8BdwC7gD+tUtttwF+XPZ4P7Cmb/46yaR+kCN1e4EHgban9a8CrQF+q7coBnoefAzuAe4HJqf3pivkPrzJvve38VtrO3cB64K3AIooPqM3A+yuWdW+q4+fAxal9bnp+X0l1PJHaH9z/3FHs1HwW+GVa9u3AG9K0LiCAPwL+DXge+MwA42I+sDa9NpuBa8umDbgsoD29dv9OMXauALYMsK4ALgX+FXgmtS0A1qXX80fAqWX9q47NNO0twL8AO1Nd3yyb9tvAj9O0HwO/XTbtQeCvgEfScr8HTKxR72xgC/CXwK8oxtghZXW9ANwNHFvxfB2aHl/E6++5XwB/ltqPpBhnr6bXeE8aD9fSwFhP0zYBnwKeTNv5TeCIVmfRkPKr1QUcbDfgmDS4vgrMA36jYvrvA1uBdwFKb4YTyqZNTgP1D4D/ACalaRcCD1cs6zYODN9DgNXA1cBhwJvT4J2Tpl9LEU7npr7tVep/bZnAUcDXgYfK5r8j3X9rqu99QBtwJUUYHpambwLeO8DzdBbFm/+dwOEU32h+WDa95vwNbuevgTnAoRQh+wzwmVTrxaQQS/3/Bfh74AjgdIoPid+r3Oay/g/yeqD/cdruN6fnaxnwtTStiyJUbqEI3NMovqm9rcZ2zQZOSdt3KvAccG4jywJuBB6i+FY4DfgJ9QN9Zerfnl6H7cC7gQkUHxybSB+mDDw270rP7SHpOXxPaj+W4gPmY+l1+HB6/May5/FpirHUnh7fOMBzsxf4XxTjpR34JLCK4pvk4cA/AHdVPF/7A30+cCLFe+53gRd5fUdqduVzxeDH+uPp+TmW4oPjklZn0ZDyq9UFHIw3ij3q2yj2KPZS7P0dl6atAC5rcDnrgHPS/QupH+jvBv6tos8i4B/T/WspC80a67yNIgx7KfaE7gVOLJt//yD/n8DdZfMdQvFBNTs93sTAgf5likM7+x8fRfFh01Vv/ga3c2XZtA9Q7HlNSI+PTm/2Dorw2wccXdb/BuC2ym0um/4grwf6PwOfKJs2PW3HobweKlPLpj8OnN/g6/8F4P+k+wMui+IDbW7ZtIXUD/Szyh5/Cfirij4bgd9tYGzeDiwtry21fwx4vKLtUeDCsufxs2XTPgF8t8b6ZlN8WzqirG0D6YM3PZ5U5bk/tMbylpPeh9QP9EbG+kfLpv8tsKSR1/hgu/kYehURsSEiLoyIqcDJFJ/cX0iTp1HslfQj6QJJ6yT1SupN804cxKpPACbvnz8t49PAcWV9NjewnL+LiI6IOD4iPhgR1eqdTHGYAYCIeDUte0qDtVbOv4fim00j8zeync+V3e8Dno+IfWWPofgQmQzsiIjdZf1/OdTtSPcPrajlV2X3X0zr7UfSuyX9QFKPpJ3AJfR//WstazIHvrblNdVS3v8E4PKK53RaWm69sXklxZ7v45KekvTHZTVV1lH53Db03CQ9EfHripq/XVbTBooP5+MqZ5Q0T9Kq9EN/L3A2jb+3Ghnrg9mOg5YDvY6I+BnFXu/JqWkzxVe/A0g6geLr9J9TfCXtoPjarP2Lqrb4isebKQ4ldJTdjo6IsweYZ6i2Ubyh9tcvigDY2uB6Kuc/Enhj2fwDaWQ7G7UNOFbS0WVtb2KI25Hm3cuBHyiN+jrFN6JpEfEGYAmvv/71PEvx/JfXUU/5tm0Grq94Tv9TRNxVb2xGxK8i4uKImAz8GfD3kt5C/+dmf12NvMb16t1f87yKmo+IiAOWL+lwit9b/o7im3IHcD8Dv7fK1Rvr2XCgV5D0W5IulzQ1PZ5GcexwVepyK/ApSf9ZhbekN8yRFAOrJ813Ea9/CEAREFMlHVbR9uayx48DuyT9paR2SRMknTzcUyZruBuYL+n3JLUBl1Mc0/1RjdoqfR24SNLp6Q33N8BjEbGpgXU3bTsjYnOq+QZJR0g6FfgT4M6y7eiSVGus3wX8d0m/KemotB3fjIi9g62F4lDQjoj4taSZwB8OYt67gUWSfiONvf82yHXfAlySviVI0pGS5qcPugHHpqTf3z/eKY6RB8We8v3AWyX9oaRDJf0B8HaKs8CaYQlwfXr/IKlT0jlV+h1GcYy9B9graR7w/rLpzwFvlPSGGuupN9az4UDvbzfFMd7HJP0HRZD/hGIQEBHfAq6nCLTdFMfyjo2InwL/m+IY43MUP449Urbc71P8yv4rSc+nti8Db09fOZenQwofoPhh7xmKHx1vBWoN1CGLiI3ARyl+zHw+rfcDEfFy6nID8NlU26eqzP/PFMcm76HYuzwROL/BdTd7Oz9Mccx1G/Bt4JqIWJmmfSv9+4KkNVXm/QrFGRc/TLX8msGH6X6fAD4naTfFD753D2Le6ygOCzxDcbbI1waz4ojopvixeDFFKP+c4ncbGhib76IY73sovmFcFhHPRMQLFGfOXE5xOO1KYEFEPE9z3JzW9730nK2ieO9Vbttu4C8ons9/p/igvLds+s8oPph/kcbr5Ir56431bCj9CGBmZmOc99DNzDLhQDczy4QD3cwsEw50M7NMtOw/yJk4cWJ0dXW1avVmZmPS6tWrn4+IzmrTWhboXV1ddHd3t2r1ZmZjkqSaf0XsQy5mZplwoJuZZcKBbmaWCQe6mVkmHOhmZplo6CwXSZso/iOqfcDeiChVTBfFf7RzNsX/JXxhRFT7j5CG5SO3PMojT+947fGsE4/lzovPbPZqDipdV93Xr23TjfNbUMnoGY/bPB7H9ng00mN7MHvo/yUiTq8M82QecFK6LaS4ekpTVQ54gEee3sFHbnm02as6aFR78Qdqz8F43ObxOLbHo9EY28065HIOcHsUVgEdkiY1adkA/QZ8vXazscJj25ql0UAPiv+zeLWkhVWmT+HAy2FtocolwCQtlNQtqbunp2fw1ZqZWU2NBvqsiHgnxaGVSyX9TsX0apfZ6vcfrUfE0ogoRUSps7PqX66amdkQNRToEbEt/bud4oowMyu6bOHA6yFOpbh6TNPMOvHYQbWbjRUe29YsdQM9XZvw6P33Ka7l95OKbvcCF6RrGZ4B7IyIZ5tZ6J0Xn9lvgOd+JkCtX79zPuNjPG7zeBzb49FojO26l6CT9GaKvXIoTnP8ekRcL+kSgIhYkk5bXAzMpTht8aJ0jcOaSqVS+D/nMjMbHEmra5xtWP889Ij4BXBalfYlZfcDuHQ4RZqZ2fD4L0XNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMNBzokiZIWivpO1WmzZa0U9K6dLu6uWWamVk9da9YVOYyYANwTI3pD0XEguGXZGZmQ9HQHrqkqcB84NaRLcfMzIaq0UMuXwCuBF4doM+Zkp6Q9ICkd1TrIGmhpG5J3T09PYMs1czMBlI30CUtALZHxOoBuq0BToiI04AvAsurdYqIpRFRiohSZ2fnUOo1M7MaGtlDnwV8UNIm4BvAWZLuKO8QEbsiYk+6fz/QJmlis4s1M7Pa6gZ6RCyKiKkR0QWcD3w/Ij5a3kfS8ZKU7s9My31hBOo1M7MaBnOWywEkXQIQEUuADwEfl7QX6APOj4hoTolmZtYItSp3S6VSdHd3t2TdZmZjlaTVEVGqNs1/KWpmlgkHuplZJhzoZmaZcKCbmWXCgW5mlgkHuplZJhzoZmaZcKCbmWXCgW5mlgkHuplZJhzoZmaZcKCbmWXCgW5mlgkHuplZJhzoZmaZcKCbmWWi4SsWSZoAdANbI2JBxTQBNwNnAy8CF0bEmmYWOl51XXVfv7ZNN85vQSWjZzxu83i0fO1WblqxkW29fUzuaOeKOdM5d8aUVpc1okZ6bA9mD/0yYEONafOAk9JtIfClYdZlVH/xB2rPwXjc5vFo+dqtLFq2nq29fQSwtbePRcvWs3zt1laXNmJGY2w3FOiSpgLzgVtrdDkHuD0Kq4AOSZOaVKOZZeamFRvpe2XfAW19r+zjphUbW1RRHhrdQ/8CcCXwao3pU4DNZY+3pLYDSFooqVtSd09Pz2DqNLOMbOvtG1S7NaZuoEtaAGyPiNUDdavS1u/q0xGxNCJKEVHq7OwcRJlmlpPJHe2DarfGNLKHPgv4oKRNwDeAsyTdUdFnCzCt7PFUYFtTKjSz7FwxZzrtbRMOaGtvm8AVc6a3qKI81A30iFgUEVMjogs4H/h+RHy0otu9wAUqnAHsjIhnm1/u+FLr1++cz/gYj9s8Hp07Ywo3nHcKUzraETClo50bzjsl67NcRmNsK6LfkZHanaXZwKciYoGkSwAiYkk6bXExMJfitMWLIqJ7oGWVSqXo7h6wi5mZVZC0OiJK1aY1fB46QEQ8CDyY7i8paw/g0qGXaGZmw+W/FDUzy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMNHJN0SMkPS7pCUlPSbquSp/ZknZKWpduV49MuWZmVksjF7h4CTgrIvZIagMelvRARKyq6PdQRCxofolmZtaIuoGerka0Jz1sS7fGr1tnZmajoqFj6JImSFoHbAdWRsRjVbqdmQ7LPCDpHTWWs1BSt6Tunp6eoVdtZmb9NBToEbEvIk4HpgIzJZ1c0WUNcEJEnAZ8EVheYzlLI6IUEaXOzs6hV21mZv0M6iyXiOiluEj03Ir2XRGxJ92/H2iTNLFJNZqZWQMaOculU1JHut8OvBf4WUWf4yUp3Z+ZlvtC06s1M7OaGjnLZRLwVUkTKIL67oj4jqRLACJiCfAh4OOS9gJ9wPnpx1QzMxsljZzl8iQwo0r7krL7i4HFzS3NzMwGw38pamaWCQe6mVkmHOhmZplwoJuZZcKBbmaWCQe6mVkmHOhmZplwoJuZZcKBbmaWCQe6mVkmHOhmZplwoJuZZcKBbmaWCQe6mVkmHOhmZplwoJuZZaLuBS4kHQH8EDg89f+niLimoo+Am4GzgReBCyNiTbOL/cgtj/LI0zteezzrxGO58+Izm72ag0rXVff1a9t04/wWVDJ63n39Sp7b/fJrj487+jAe+8z7WljRyFu+dis3rdjItt4+Jne0c8Wc6Zw7Y0qry7ImG+n3cyN76C8BZ0XEacDpwFxJZ1T0mQeclG4LgS81rcKkMswBHnl6Bx+55dFmr+qgUe3FH6g9B5VhDvDc7pd59/UrW1TRyFu+diuLlq1na28fAWzt7WPRsvUsX7u11aVZE43G+7luoEdhT3rYlm6V1ws9B7g99V0FdEia1LQqoV+Y12u3sakyzOu15+CmFRvpe2XfAW19r+zjphUbW1SRjVUNHUOXNEHSOmA7sDIiHqvoMgXYXPZ4S2qrXM5CSd2Sunt6eoZYslletvX2DardrJaGAj0i9kXE6cBUYKakkyu6qNpsVZazNCJKEVHq7OwcdLFmOZrc0T6odrNaBnWWS0T0Ag8CcysmbQGmlT2eCmwbTmGVZp147KDabWw67ujDBtWegyvmTKe9bcIBbe1tE7hizvQWVWRjVd1Al9QpqSPdbwfeC/ysotu9wAUqnAHsjIhnm1nonRef2S+8cz/Lpdav3zmf5fLYZ97XL7xzP8vl3BlTuOG8U5jS0Y6AKR3t3HDeKT7LJTOj8X5WRL8jIwd2kE4FvgpMoPgAuDsiPifpEoCIWJJOW1xMsef+InBRRHQPtNxSqRTd3QN2MTOzCpJWR0Sp2rS656FHxJPAjCrtS8ruB3DpcIo0M7Ph8V+KmpllwoFuZpYJB7qZWSYc6GZmmXCgm5llwoFuZpYJB7qZWSYc6GZmmXCgm5llwoFuZpYJB7qZWSYc6GZmmXCgm5llwoFuZpYJB7qZWSYc6GZmmWjkEnTTJP1A0gZJT0m6rEqf2ZJ2SlqXblePTLlmZlZL3SsWAXuByyNijaSjgdWSVkbETyv6PRQRC5pfopmZNaLuHnpEPBsRa9L93cAGwFevNTM7yAzqGLqkLorriz5WZfKZkp6Q9ICkd9SYf6GkbkndPT09g6/WzMxqajjQJR0F3AN8MiJ2VUxeA5wQEacBXwSWV1tGRCyNiFJElDo7O4dYspmZVdNQoEtqowjzOyNiWeX0iNgVEXvS/fuBNkkTm1qpmZkNqJGzXAR8GdgQEZ+v0ef41A9JM9NyX2hmoWZmNrBGznKZBXwMWC9pXWr7NPAmgIhYAnwI+LikvUAfcH5ERPPLNTOzWuoGekQ8DKhOn8XA4mYVZWZmg+e/FDUzy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NM1P3/0CVNA24HjgdeBZZGxM0VfQTcDJwNvAhcGBFrml/u+NN11X392jbdOL8FlYye8bjNNj6M9NhuZA99L3B5RLwNOAO4VNLbK/rMA05Kt4XAl5pW4ThW7cUfqD0H43GbbXwYjbFdN9Aj4tn9e9sRsRvYAEyp6HYOcHsUVgEdkiY1rUozM6trUMfQJXUBM4DHKiZNATaXPd5C/9BH0kJJ3ZK6e3p6BlmqmZkNpOFAl3QUcA/wyYjYVTm5yiz9LhIdEUsjohQRpc7OzsFVamZmA2oo0CW1UYT5nRGxrEqXLcC0ssdTgW3DL8/MzBpVN9DTGSxfBjZExOdrdLsXuECFM4CdEfFsE+scl2r9+p3zGR/jcZttfBiNsa2IfkdGDuwgvQd4CFhPcdoiwKeBNwFExJIU+ouBuRSnLV4UEd0DLbdUKkV394BdzMysgqTVEVGqNq3ueegR8TDVj5GX9wng0qGVZ2ZmzeC/FDUzy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLRyCXoviJpu6Sf1Jg+W9JOSevS7erml2lmZvXUvWIRcBvF5eVuH6DPQxGxoCkVmZnZkNTdQ4+IHwI7RqEWMzMbhmYdQz9T0hOSHpD0jlqdJC2U1C2pu6enp0mrNjMzaE6grwFOiIjTgC8Cy2t1jIilEVGKiFJnZ2cTVm1mZvsNO9AjYldE7En37wfaJE0cdmVmZjYoww50ScdLUro/My3zheEu18zMBqfuWS6S7gJmAxMlbQGuAdoAImIJ8CHg45L2An3A+RERI1axmZlVVTfQI+LDdaYvpjit0czMWsh/KWpmlgkHuplZJhzoZmaZcKCbmWXCgW5mlgkHuplZJhzoZmaZcKCbmWXCgW5mlgkHuplZJhzoZmaZcKCbmWXCgW5mlgkHuplZJhzoZmaZcKCbmWWikSsWfQVYAGyPiJOrTBdwM3A28CJwYUSsaXahAKde8112vbTvtcfHHD6BJ6+bOxKrOmh0XXVfv7ZNN85vQSWjZzxu8/K1W7lpxUa29fYxuaOdK+ZM59wZU1pdljXZSI/tRvbQbwMGSs15wEnpthD40vDL6q8yzAF2vbSPU6/57kis7qBQ7cUfqD0H43Gbl6/dyqJl69na20cAW3v7WLRsPcvXbm11adZEozG26wZ6RPwQ2DFAl3OA26OwCuiQNKlZBe5XGeb12s3GiptWbKTvlQPHcd8r+7hpxcYWVWRjVTOOoU8BNpc93pLa+pG0UFK3pO6enp4mrNps7NvW2zeodrNamhHoqtIW1TpGxNKIKEVEqbOzswmrNhv7Jne0D6rdrJZmBPoWYFrZ46nAtiYs9wDHHD5hUO1mY8UVc6bT3nbgOG5vm8AVc6a3qCIbq5oR6PcCF6hwBrAzIp5twnIP8OR1c/uFd+5nudT69TvnMz7G4zafO2MKN5x3ClM62hEwpaOdG847xWe5ZGY0xrYiqh4deb2DdBcwG5gIPAdcA7QBRMSSdNriYoozYV4ELoqI7norLpVK0d1dt5uZmZWRtDoiStWm1T0PPSI+XGd6AJcOsTYzM2sS/6WomVkmHOhmZplwoJuZZcKBbmaWibpnuYzYiqUe4JdDnH0i8HwTyxkLvM3jg7d5fBjONp8QEVX/MrNlgT4ckrprnbaTK2/z+OBtHh9Gapt9yMXMLBMOdDOzTIzVQF/a6gJawNs8Pnibx4cR2eYxeQzdzMz6G6t76GZmVsGBbmaWiTEX6JLmStoo6eeSrmp1PSNN0lckbZf0k1bXMlokTZP0A0kbJD0l6bJW1zTSJB0h6XFJT6Rtvq7VNY0GSRMkrZX0nVbXMhokbZK0XtI6SU3/72bH1DF0SROA/we8j+LCGj8GPhwRP21pYSNI0u8Aeyiu23pyq+sZDematJMiYo2ko4HVwLmZv84CjoyIPZLagIeBy9J1erMl6X8AJeCYiFjQ6npGmqRNQCkiRuQPqcbaHvpM4OcR8YuIeBn4BsVFqrPVwEW6sxMRz0bEmnR/N7CBGtepzUW6yPqe9LAt3cbO3tYQSJoKzAdubXUtuRhrgd7wBaktD5K6gBnAYy0uZcSlww/rgO3AyojIfZu/AFwJvNriOkZTAN+TtFrSwmYvfKwFesMXpLaxT9JRwD3AJyNiV6vrGWkRsS8iTqe4Lu9MSdkeYpO0ANgeEatbXcsomxUR7wTmAZemQ6pNM9YCfVQuSG2tl44j3wPcGRHLWl3PaIqIXuBBiss65moW8MF0TPkbwFmS7mhtSSMvIralf7cD36Y4jNw0Yy3QfwycJOk3JR0GnE9xkWrLSPqB8MvAhoj4fKvrGQ2SOiV1pPvtwHuBn7W0qBEUEYsiYmpEdFG8j78fER9tcVkjStKR6Ud+JB0JvB9o6tlrYyrQI2Iv8OfACoofyu6OiKdaW9XIShfpfhSYLmmLpD9pdU2jYBbwMYq9tnXpdnarixphk4AfSHqSYsdlZUSMi1P5xpHjgIclPQE8DtwXEd9t5grG1GmLZmZW25jaQzczs9oc6GZmmXCgm5llwoFuZpYJB7qZWSYc6GZmmXCgm5ll4v8DK49bLe4ke1sAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(df['emotion'], df['reason'], cmap='rainbow')\n",
"plt.title('Scatter Plot of emotion and reason relation')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "conventional-spectrum",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'np' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"Input \u001b[1;32mIn [12]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m X_var \u001b[38;5;241m=\u001b[39m \u001b[43mnp\u001b[49m\u001b[38;5;241m.\u001b[39masarray(df[[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mquestion no\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124manswer\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124memotion\u001b[39m\u001b[38;5;124m'\u001b[39m]])\n\u001b[0;32m 2\u001b[0m y_var \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39masarray(df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreason\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mX_var samples : \u001b[39m\u001b[38;5;124m'\u001b[39m, X_var[:\u001b[38;5;241m5\u001b[39m])\n",
"\u001b[1;31mNameError\u001b[0m: name 'np' is not defined"
]
}
],
"source": [
"X_var = np.asarray(df[['question no','answer','emotion']])\n",
"y_var = np.asarray(df['reason'])\n",
"\n",
"print('X_var samples : ', X_var[:5])\n",
"print('y_var samples : ', y_var[:5])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "physical-ghana",
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X_var, y_var, test_size = 0.3, random_state = 4)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "executed-swaziland",
"metadata": {
"scrolled": true
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'LogisticRegression' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"Input \u001b[1;32mIn [10]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m lr \u001b[38;5;241m=\u001b[39m \u001b[43mLogisticRegression\u001b[49m(solver \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlbfgs\u001b[39m\u001b[38;5;124m'\u001b[39m, max_iter\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# lr = LogisticRegression()\u001b[39;00m\n\u001b[0;32m 3\u001b[0m lr\u001b[38;5;241m.\u001b[39mfit(X_train,y_train)\n",
"\u001b[1;31mNameError\u001b[0m: name 'LogisticRegression' is not defined"
]
}
],
"source": [
"lr = LogisticRegression(solver = 'lbfgs', max_iter=1)\n",
"# lr = LogisticRegression()\n",
"lr.fit(X_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "distant-bulgaria",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 2, 2, 5],\n",
" [ 3, 27, 2],\n",
" [ 2, 2, 5],\n",
" [ 2, 9, 5],\n",
" [ 2, 5, 5],\n",
" [ 2, 22, 5],\n",
" [ 1, 30, 1],\n",
" [ 4, 38, 5],\n",
" [ 1, 35, 2],\n",
" [ 4, 37, 0],\n",
" [ 0, 19, 5],\n",
" [ 3, 27, 5],\n",
" [ 3, 27, 5],\n",
" [ 1, 30, 1],\n",
" [ 4, 38, 5],\n",
" [ 2, 9, 5],\n",
" [ 3, 27, 2],\n",
" [ 1, 35, 5],\n",
" [ 1, 36, 5],\n",
" [ 1, 35, 5],\n",
" [ 1, 35, 5],\n",
" [ 1, 35, 5],\n",
" [ 4, 38, 5],\n",
" [ 2, 9, 2],\n",
" [ 0, 16, 2],\n",
" [ 3, 27, 5],\n",
" [ 4, 37, 5],\n",
" [ 4, 38, 5],\n",
" [ 3, 27, 2],\n",
" [ 3, 27, 5],\n",
" [ 1, 30, 1],\n",
" [ 1, 30, 5],\n",
" [ 4, 38, 5],\n",
" [ 4, 37, 1],\n",
" [ 0, 19, 5],\n",
" [ 3, 33, 5],\n",
" [ 0, 19, 5],\n",
" [ 1, 35, 5],\n",
" [ 4, 38, 2],\n",
" [ 3, 27, 2],\n",
" [ 0, 25, 5],\n",
" [ 0, 25, 5],\n",
" [ 0, 16, 5],\n",
" [ 0, 18, 5],\n",
" [ 2, 9, 5],\n",
" [ 2, 10, 5],\n",
" [ 3, 27, 2],\n",
" [ 2, 9, 5],\n",
" [ 3, 27, 2],\n",
" [ 0, 13, 5],\n",
" [ 4, 38, 5],\n",
" [ 3, 27, 5],\n",
" [ 2, 4, 5],\n",
" [ 2, 22, 5],\n",
" [ 4, 38, 5],\n",
" [ 4, 38, 5],\n",
" [ 1, 31, 0],\n",
" [ 0, 13, 5],\n",
" [ 0, 19, 1],\n",
" [ 3, 27, 5],\n",
" [ 1, 35, 5],\n",
" [ 2, 7, 1],\n",
" [ 0, 16, 5],\n",
" [ 2, 4, 5],\n",
" [ 4, 38, 5],\n",
" [ 2, 22, 1],\n",
" [ 1, 35, 5],\n",
" [ 0, 20, 5],\n",
" [ 2, 8, 1],\n",
" [ 3, 27, 5],\n",
" [ 3, 27, 2],\n",
" [ 4, 38, 5],\n",
" [ 3, 27, 5],\n",
" [ 0, 15, 5],\n",
" [ 1, 30, 1],\n",
" [ 2, 9, 5],\n",
" [ 0, 15, 5],\n",
" [ 2, 9, 5],\n",
" [ 3, 34, 0],\n",
" [ 0, 19, 5],\n",
" [ 3, 27, 5],\n",
" [ 2, 22, 1],\n",
" [ 0, 15, 5],\n",
" [ 3, 33, 1],\n",
" [ 4, 37, 5],\n",
" [ 1, 30, 1],\n",
" [ 1, 30, 1],\n",
" [ 3, 27, 5],\n",
" [ 0, 19, 5],\n",
" [ 3, 27, 5],\n",
" [ 3, 27, 2],\n",
" [ 4, 39, 2],\n",
" [ 1, 35, 5],\n",
" [ 4, 39, 5],\n",
" [ 1, 35, 5],\n",
" [ 0, 14, 1],\n",
" [ 4, 39, 5],\n",
" [ 0, 19, 5],\n",
" [ 2, 2, 5],\n",
" [ 2, 2, 5],\n",
" [ 0, 14, 5],\n",
" [ 1, 30, 1],\n",
" [ 1, 35, 5],\n",
" [ 0, 19, 5],\n",
" [ 4, 38, 5],\n",
" [ 3, 27, 5],\n",
" [ 2, 8, 5],\n",
" [ 3, 27, 2],\n",
" [ 1, 35, 5],\n",
" [ 1, 30, 5],\n",
" [ 2, 9, 5],\n",
" [ 2, 2, 5],\n",
" [ 0, 20, 5],\n",
" [ 2, 9, 5],\n",
" [ 1, 30, 5],\n",
" [ 3, 27, 5],\n",
" [ 2, 22, 1],\n",
" [ 4, 39, 2],\n",
" [ 3, 27, 5],\n",
" [ 0, 20, 5],\n",
" [ 1, 35, 5],\n",
" [ 0, 17, 2],\n",
" [ 0, 19, 5],\n",
" [ 2, 4, 5],\n",
" [ 0, 13, 5],\n",
" [ 3, 27, 5],\n",
" [ 3, 27, 2],\n",
" [ 4, 38, 5],\n",
" [ 4, 38, 5],\n",
" [ 1, 30, 1],\n",
" [ 1, 30, 1],\n",
" [ 2, 7, 1],\n",
" [ 3, 27, 5],\n",
" [ 4, 37, 5],\n",
" [ 2, 4, 5],\n",
" [ 0, 15, 5],\n",
" [ 4, 37, 1],\n",
" [ 3, 27, 2],\n",
" [ 4, 37, 1],\n",
" [ 0, 19, 1],\n",
" [ 0, 19, 5],\n",
" [ 4, 37, 5],\n",
" [ 2, 7, 1],\n",
" [ 0, 19, 5],\n",
" [ 0, 17, 5],\n",
" [ 2, 10, 5],\n",
" [ 2, 9, 5],\n",
" [ 4, 38, 5],\n",
" [ 4, 39, 2],\n",
" [ 1, 35, 5],\n",
" [ 3, 33, 5],\n",
" [ 2, 4, 5],\n",
" [ 0, 19, 5],\n",
" [ 3, 27, 5],\n",
" [ 1, 35, 5],\n",
" [ 4, 38, 5],\n",
" [ 2, 4, 5],\n",
" [ 4, 37, 1],\n",
" [ 1, 30, 5],\n",
" [ 2, 4, 5],\n",
" [ 4, 38, 5],\n",
" [ 1, 35, 5],\n",
" [ 3, 27, 2],\n",
" [ 0, 15, 5],\n",
" [ 2, 9, 5],\n",
" [ 2, 4, 5],\n",
" [ 3, 33, 1],\n",
" [ 4, 38, 5],\n",
" [ 2, 22, 5],\n",
" [ 2, 9, 5],\n",
" [ 1, 35, 5],\n",
" [ 0, 15, 5],\n",
" [ 2, 4, 5],\n",
" [ 2, 4, 5],\n",
" [ 2, 10, 5],\n",
" [ 3, 33, 1],\n",
" [ 4, 38, 5],\n",
" [ 4, 38, 5],\n",
" [ 0, 20, 5],\n",
" [ 1, 35, 5],\n",
" [ 0, 19, 5],\n",
" [ 0, 15, 5],\n",
" [ 4, 37, 0],\n",
" [ 1, 35, 5],\n",
" [ 0, 19, 5],\n",
" [ 2, 9, 5],\n",
" [ 0, 16, 5],\n",
" [ 2, 10, 5],\n",
" [ 2, 8, 5],\n",
" [ 0, 19, 5],\n",
" [ 1, 35, 5],\n",
" [ 3, 27, 2],\n",
" [ 2, 2, 5],\n",
" [ 4, 37, 1],\n",
" [ 1, 35, 5],\n",
" [ 4, 38, 2],\n",
" [ 3, 27, 2],\n",
" [ 4, 37, 5],\n",
" [ 0, 19, 5],\n",
" [ 0, 19, 5],\n",
" [ 2, 21, 1],\n",
" [ 1, 30, 1],\n",
" [ 2, 10, 5],\n",
" [ 1, 35, 5],\n",
" [ 2, 4, 5],\n",
" [ 4, 38, 5],\n",
" [ 1, 35, 2],\n",
" [ 4, 38, 5],\n",
" [ 1, 35, 5],\n",
" [ 0, 19, 5],\n",
" [ 2, 10, 2],\n",
" [ 1, 30, 1],\n",
" [ 2, 8, 1],\n",
" [ 2, 7, 5],\n",
" [ 4, 38, 5],\n",
" [ 0, 25, 1],\n",
" [ 3, 27, 5],\n",
" [ 2, 21, 1],\n",
" [ 4, 38, 5],\n",
" [ 1, 35, 5],\n",
" [ 1, 30, 1],\n",
" [ 1, 35, 5],\n",
" [ 0, 19, 5],\n",
" [ 2, 2, 5],\n",
" [ 4, 39, 5],\n",
" [ 2, 7, 1],\n",
" [ 3, 27, 2],\n",
" [ 4, 37, 5],\n",
" [ 0, 16, 5],\n",
" [ 4, 37, 1],\n",
" [ 2, 22, 5],\n",
" [ 4, 37, 5],\n",
" [ 4, 37, 1],\n",
" [ 0, 25, 1],\n",
" [ 2, 2, 1],\n",
" [ 3, 27, 2],\n",
" [ 1, 30, 1],\n",
" [ 3, 27, 2],\n",
" [ 3, 28, 2],\n",
" [ 4, 38, 2],\n",
" [ 2, 7, 5],\n",
" [ 0, 25, 1],\n",
" [ 3, 32, 1],\n",
" [ 2, 2, 5],\n",
" [ 3, 27, 5],\n",
" [ 1, 30, 1],\n",
" [ 3, 27, 2],\n",
" [ 4, 38, 5],\n",
" [ 1, 30, 5],\n",
" [ 2, 9, 5],\n",
" [ 0, 19, 5],\n",
" [ 1, 30, 1],\n",
" [ 2, 4, 5],\n",
" [ 4, 39, 5],\n",
" [ 4, 38, 5],\n",
" [ 0, 16, 5],\n",
" [ 1, 35, 5],\n",
" [ 4, 39, 5],\n",
" [ 4, 39, 5],\n",
" [ 0, 25, 1],\n",
" [ 4, 38, 5],\n",
" [ 0, 16, 5],\n",
" [ 0, 25, 1],\n",
" [ 1, 30, 1],\n",
" [ 0, 19, 5],\n",
" [ 3, 27, 5],\n",
" [ 1, 0, 1],\n",
" [ 3, 33, 1],\n",
" [ 0, 19, 5],\n",
" [ 1, 30, 5],\n",
" [ 4, 37, 5],\n",
" [ 3, 27, 2],\n",
" [ 0, 19, 5],\n",
" [ 2, 2, 5],\n",
" [ 1, 30, 5],\n",
" [ 4, 37, 5],\n",
" [ 2, 9, 5],\n",
" [ 1, 30, 1],\n",
" [ 1, 35, 5],\n",
" [ 0, 13, 5],\n",
" [ 3, 27, 5],\n",
" [ 1, 30, 5],\n",
" [ 3, 27, 2],\n",
" [ 3, 27, 5],\n",
" [ 3, 33, 0],\n",
" [ 0, 16, 5],\n",
" [ 4, 38, 5],\n",
" [ 3, 27, 2],\n",
" [ 0, 16, 5],\n",
" [ 0, 19, 5],\n",
" [ 0, 19, 5],\n",
" [ 2, 23, 5],\n",
" [ 0, 13, 5],\n",
" [ 4, 37, 1],\n",
" [ 0, 20, 5],\n",
" [ 3, 27, 2],\n",
" [ 2, 7, 5],\n",
" [ 3, 27, 5],\n",
" [ 4, 38, 5],\n",
" [ 3, 27, 2],\n",
" [ 3, 27, 2],\n",
" [ 0, 20, 5],\n",
" [ 3, 27, 5],\n",
" [ 2, 9, 5],\n",
" [ 0, 19, 5]])"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "right-partition",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([3, 5, 5, 5, 3, 4, 2, 4, 4, 5, 5, 5, 1, 1, 4, 5, 2, 2, 4, 5, 3, 4,\n",
" 5, 2, 5, 1, 2, 1, 1, 4, 4, 2, 4, 2, 5, 4, 4, 4, 5, 5, 5, 2, 2, 1,\n",
" 5, 3, 4, 2, 5, 4, 1, 5, 3, 4, 5, 4, 1, 2, 1, 1, 4, 1, 1, 1, 5, 5,\n",
" 4, 1, 4, 1, 5, 1, 1, 5, 2, 5, 3, 5, 5, 2, 5, 5, 5, 1, 2, 1, 1, 5,\n",
" 5, 5, 1, 2, 4, 5, 5, 4, 1, 2, 1, 5, 4, 1, 5, 2, 5, 5, 5, 4, 2, 1,\n",
" 5, 1, 4, 4, 2, 4, 5, 5, 5, 3, 5, 5, 4, 5, 4, 2, 4, 4, 4, 1, 2, 5,\n",
" 5, 5, 4, 2, 1, 4, 4, 1, 1, 3, 4, 2, 3, 5, 2, 5, 4, 2, 5, 5, 5, 2,\n",
" 4, 5, 1, 2, 1, 5, 4, 5, 2, 5, 2, 5, 1, 5, 2, 2, 5, 4, 4, 2, 1, 3,\n",
" 1, 3, 5, 5, 4, 3, 1, 2, 1, 4, 2, 1, 5, 4, 5, 5, 1, 2, 3, 5, 4, 2,\n",
" 2, 1, 1, 5, 4, 5, 4, 4, 3, 5, 5, 1, 5, 1, 1, 2, 5, 3, 5, 2, 1, 5,\n",
" 2, 5, 3, 3, 5, 1, 1, 2, 4, 2, 5, 5, 2, 4, 1, 5, 1, 1, 4, 4, 4, 1,\n",
" 3, 5, 3, 2, 2, 1, 2, 1, 1, 2, 5, 5, 5, 3, 5, 4, 4, 1, 5, 5, 1, 1,\n",
" 2, 2, 1, 3, 4, 3, 2, 1, 4, 1, 2, 2, 4, 1, 5, 5, 5, 4, 5, 1, 3, 5,\n",
" 1, 4, 1, 4, 1, 3, 5, 5, 2, 1, 5, 4, 5, 1, 1, 5, 5, 1, 2])"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_test"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "relevant-tsunami",
"metadata": {},
"outputs": [],
"source": [
"y_pred = lr.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "welcome-reduction",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.3180327868852459"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"score =accuracy_score(y_test,y_pred)\n",
"score"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "executed-compatibility",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0, 0, 0, 0, 69],\n",
" [ 0, 0, 0, 0, 54],\n",
" [ 0, 0, 0, 0, 24],\n",
" [ 0, 0, 0, 0, 61],\n",
" [ 0, 0, 0, 0, 97]])"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"confusion_matrix(y_test, y_pred)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "global-melbourne",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 4 38 5]]\n",
"Prediction: [5]\n"
]
}
],
"source": [
"features = np.array([[ 4, 38, 5]])\n",
"print(features)\n",
"prediction = lr.predict(features)\n",
"print(\"Prediction: {}\".format(prediction))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "selected-cattle",
"metadata": {},
"outputs": [],
"source": [
"param_grid = [ \n",
" {'penalty' : ['l1', 'l2', 'elasticnet', 'none'],\n",
" 'C' : np.logspace(-1, 1, 2000),\n",
" 'solver' : ['lbfgs','newton-cg','liblinear','sag','saga'],\n",
" 'max_iter' : [1000]\n",
" }\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "appointed-controversy",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'lr' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"Input \u001b[1;32mIn [4]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m clf \u001b[38;5;241m=\u001b[39m GridSearchCV(\u001b[43mlr\u001b[49m, param_grid \u001b[38;5;241m=\u001b[39m param_grid, cv \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m3\u001b[39m, verbose\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, n_jobs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n",
"\u001b[1;31mNameError\u001b[0m: name 'lr' is not defined"
]
}
],
"source": [
"clf = GridSearchCV(lr, param_grid = param_grid, cv = 3, verbose=True, n_jobs=-1)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "southwest-disco",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 3 folds for each of 40000 candidates, totalling 120000 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
"[Parallel(n_jobs=-1)]: Done 34 tasks | elapsed: 2.3s\n",
"[Parallel(n_jobs=-1)]: Done 920 tasks | elapsed: 7.5s\n",
"[Parallel(n_jobs=-1)]: Done 2920 tasks | elapsed: 19.8s\n",
"[Parallel(n_jobs=-1)]: Done 5720 tasks | elapsed: 37.7s\n",
"[Parallel(n_jobs=-1)]: Done 9320 tasks | elapsed: 1.0min\n",
"[Parallel(n_jobs=-1)]: Done 13720 tasks | elapsed: 1.5min\n",
"[Parallel(n_jobs=-1)]: Done 18920 tasks | elapsed: 2.2min\n",
"[Parallel(n_jobs=-1)]: Done 24920 tasks | elapsed: 2.8min\n",
"[Parallel(n_jobs=-1)]: Done 31720 tasks | elapsed: 3.5min\n",
"[Parallel(n_jobs=-1)]: Done 39320 tasks | elapsed: 4.3min\n",
"[Parallel(n_jobs=-1)]: Done 47720 tasks | elapsed: 5.2min\n",
"[Parallel(n_jobs=-1)]: Done 56920 tasks | elapsed: 6.2min\n",
"[Parallel(n_jobs=-1)]: Done 66920 tasks | elapsed: 7.3min\n",
"[Parallel(n_jobs=-1)]: Done 77720 tasks | elapsed: 8.5min\n",
"[Parallel(n_jobs=-1)]: Done 87056 tasks | elapsed: 9.6min\n",
"[Parallel(n_jobs=-1)]: Done 93256 tasks | elapsed: 10.4min\n",
"[Parallel(n_jobs=-1)]: Done 99856 tasks | elapsed: 11.3min\n",
"[Parallel(n_jobs=-1)]: Done 106856 tasks | elapsed: 12.3min\n",
"[Parallel(n_jobs=-1)]: Done 114256 tasks | elapsed: 13.2min\n",
"[Parallel(n_jobs=-1)]: Done 120000 out of 120000 | elapsed: 14.0min finished\n"
]
}
],
"source": [
"best_clf = clf.fit(X_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "coastal-commitment",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LogisticRegression(C=0.1253282641400343, max_iter=1000, penalty='l1',\n",
" solver='liblinear')"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"best_clf.best_estimator_"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "pointed-humanitarian",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy - : 0.299\n"
]
}
],
"source": [
"print (f'Accuracy - : {best_clf.score(X_train,y_train):.3f}')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "stretch-tobacco",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.32786885245901637"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
" intercept_scaling=1, l1_ratio=None, max_iter=1000,\n",
" multi_class='auto', n_jobs=None, penalty='l2',\n",
" random_state=5042, solver='lbfgs', tol=0.0001, verbose=0,\n",
" warm_start=False)\n",
"model.fit(X_train,y_train)\n",
"y_pred = model.predict(X_test)\n",
"score =accuracy_score(y_test,y_pred)\n",
"score"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "coordinated-monthly",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.32786885245901637"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_pred = model.predict(X_test)\n",
"score =accuracy_score(y_test,y_pred)\n",
"score"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "major-delaware",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[11, 0, 0, 0, 58],\n",
" [ 7, 0, 0, 0, 47],\n",
" [ 4, 0, 0, 0, 20],\n",
" [ 3, 0, 0, 0, 58],\n",
" [ 8, 0, 0, 0, 89]])"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"confusion_matrix(y_test, y_pred)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "italian-blake",
"metadata": {},
"outputs": [],
"source": [
"filename = 'trained_models/finalized_model_lr.sav'\n",
"pickle.dump(model, open(filename, 'wb'))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "miniature-clerk",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.32786885245901637\n"
]
}
],
"source": [
"loaded_model = pickle.load(open(filename, 'rb'))\n",
"result = loaded_model.score(X_test, y_test)\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "positive-tablet",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment