Upload New File

aa2a87e7 · Karagoda Gamage Pasan Malaka · 4855344e · aa2a87e7
Commit aa2a87e7 authored Oct 08, 2022 by Karagoda Gamage Pasan Malaka
Show whitespace changes
Inline Side-by-side

Showing with 1214 additions and 0 deletions

Logistic_regression_.ipynb Logistic_regression_.ipynb +1214 -0

No files found.
--- a/Logistic_regression_.ipynb
+++ b/Logistic_regression_.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "changing-opening",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import make_classification\n",
+    "from matplotlib import pyplot as plt\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.model_selection import GridSearchCV\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import pickle\n",
+    "from sklearn.preprocessing import LabelEncoder\n",
+    "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n",
+    "from sklearn.preprocessing import OneHotEncoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "floral-tours",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>question no</th>\n",
+       "      <th>answer</th>\n",
+       "      <th>emotion</th>\n",
+       "      <th>reason</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>9 hours</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>heavy</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>between 1-2 hours</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>friendly staff</td>\n",
+       "      <td>3</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>satisfy</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   question no             answer  emotion  reason\n",
+       "0            1            9 hours        6       5\n",
+       "1            2              heavy        1       5\n",
+       "2            3  between 1-2 hours        6       5\n",
+       "3            4     friendly staff        3       5\n",
+       "4            5           satisfy         6       5"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.read_csv('data/reason_data_up.csv')\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "asian-passenger",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'df' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Input \u001b[1;32mIn [2]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mquestion no\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mdf\u001b[49m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mquestion no\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstr\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m      2\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124manswer\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124manswer\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstr\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m      3\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124memotion\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124memotion\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstr\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'df' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "df['question no'] = df['question no'].astype('str')\n",
+    "df['answer'] = df['answer'].astype('str')\n",
+    "df['emotion'] = df['emotion'].astype('str')\n",
+    "df[['question no','answer','emotion']] =  df[['question no','answer','emotion']].apply(LabelEncoder().fit_transform)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "fallen-continuity",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>question no</th>\n",
+       "      <th>answer</th>\n",
+       "      <th>emotion</th>\n",
+       "      <th>reason</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>19</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>30</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>22</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>27</td>\n",
+       "      <td>2</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>39</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1010</th>\n",
+       "      <td>0</td>\n",
+       "      <td>19</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1011</th>\n",
+       "      <td>1</td>\n",
+       "      <td>35</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1012</th>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1013</th>\n",
+       "      <td>3</td>\n",
+       "      <td>27</td>\n",
+       "      <td>2</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1014</th>\n",
+       "      <td>4</td>\n",
+       "      <td>38</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>1015 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      question no  answer  emotion  reason\n",
+       "0               0      19        5       5\n",
+       "1               1      30        1       5\n",
+       "2               2      22        5       5\n",
+       "3               3      27        2       5\n",
+       "4               4      39        5       5\n",
+       "...           ...     ...      ...     ...\n",
+       "1010            0      19        5       5\n",
+       "1011            1      35        5       5\n",
+       "1012            2       4        5       5\n",
+       "1013            3      27        2       5\n",
+       "1014            4      38        5       5\n",
+       "\n",
+       "[1015 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "breeding-tuition",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>question no</th>\n",
+       "      <th>answer</th>\n",
+       "      <th>emotion</th>\n",
+       "      <th>reason</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>1015.000000</td>\n",
+       "      <td>1015.000000</td>\n",
+       "      <td>1015.000000</td>\n",
+       "      <td>1015.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>24.922167</td>\n",
+       "      <td>3.901478</td>\n",
+       "      <td>3.201970</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>1.414911</td>\n",
+       "      <td>11.073488</td>\n",
+       "      <td>1.694680</td>\n",
+       "      <td>1.561872</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>17.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>27.000000</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>35.000000</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>5.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>39.000000</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>5.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       question no       answer      emotion       reason\n",
+       "count  1015.000000  1015.000000  1015.000000  1015.000000\n",
+       "mean      2.000000    24.922167     3.901478     3.201970\n",
+       "std       1.414911    11.073488     1.694680     1.561872\n",
+       "min       0.000000     0.000000     0.000000     1.000000\n",
+       "25%       1.000000    17.000000     2.000000     2.000000\n",
+       "50%       2.000000    27.000000     5.000000     4.000000\n",
+       "75%       3.000000    35.000000     5.000000     5.000000\n",
+       "max       4.000000    39.000000     5.000000     5.000000"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "beautiful-explanation",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 1015 entries, 0 to 1014\n",
+      "Data columns (total 4 columns):\n",
+      " #   Column       Non-Null Count  Dtype\n",
+      "---  ------       --------------  -----\n",
+      " 0   question no  1015 non-null   int64\n",
+      " 1   answer       1015 non-null   int64\n",
+      " 2   emotion      1015 non-null   int64\n",
+      " 3   reason       1015 non-null   int64\n",
+      "dtypes: int64(4)\n",
+      "memory usage: 31.8 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "df.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "excited-ethernet",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEICAYAAABPgw/pAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAcIklEQVR4nO3dfZRddX3v8feHMMBcHpxiZkGeZCpiqvIU7zFC42pzqZqEROGy7CpWpdCWFKW3eC9CiXp5sKVwS69Xllk1DWgRQRRLzOIKGLOqVEACTh4gYsytSGwekAykk4cyAgnf+8f+BU7OnDPnzMyZOZnffF5rnZVzfvu39/7uc37nc/bZZ0+2IgIzMxv7Dml1AWZm1hwOdDOzTDjQzcwy4UA3M8uEA93MLBMOdDOzTDjQxxFJ10q6Y5TWNUvSv0raI+nc0VhnoyQ9IOmPWl3HQCTNlrSl1XW0iqQuSSHp0CHO/2lJtza7roOdA70KSe+R9CNJOyXtkPSIpHcNc5kXSnq4ou02SX89vGr7rec2SS+nIN0haaWk3xrCcjZJeu8wSvkcsDgijoqI5cNYzrBU+xCLiHkR8dVW1WTNVe3DLyL+JiL+tFU1tYoDvYKkY4DvAF8EjgWmANcBL7WyrmoG2Hv524g4CpgKbAduG7WiXncC8FQL1jvuDHUvtpXGYs1jQkT4VnYDSkBvnT4XAxuA3cBPgXem9quAp8va/2tqfxvwa2AfsAfoBRYCrwAvp7b/m/pOBu4BeoBngL8oW++1wD8BdwC7gD+tUtttwF+XPZ4P7Cmb/46yaR+kCN1e4EHgban9a8CrQF+q7coBnoefAzuAe4HJqf3pivkPrzJvve38VtrO3cB64K3AIooPqM3A+yuWdW+q4+fAxal9bnp+X0l1PJHaH9z/3FHs1HwW+GVa9u3AG9K0LiCAPwL+DXge+MwA42I+sDa9NpuBa8umDbgsoD29dv9OMXauALYMsK4ALgX+FXgmtS0A1qXX80fAqWX9q47NNO0twL8AO1Nd3yyb9tvAj9O0HwO/XTbtQeCvgEfScr8HTKxR72xgC/CXwK8oxtghZXW9ANwNHFvxfB2aHl/E6++5XwB/ltqPpBhnr6bXeE8aD9fSwFhP0zYBnwKeTNv5TeCIVmfRkPKr1QUcbDfgmDS4vgrMA36jYvrvA1uBdwFKb4YTyqZNTgP1D4D/ACalaRcCD1cs6zYODN9DgNXA1cBhwJvT4J2Tpl9LEU7npr7tVep/bZnAUcDXgYfK5r8j3X9rqu99QBtwJUUYHpambwLeO8DzdBbFm/+dwOEU32h+WDa95vwNbuevgTnAoRQh+wzwmVTrxaQQS/3/Bfh74AjgdIoPid+r3Oay/g/yeqD/cdruN6fnaxnwtTStiyJUbqEI3NMovqm9rcZ2zQZOSdt3KvAccG4jywJuBB6i+FY4DfgJ9QN9Zerfnl6H7cC7gQkUHxybSB+mDDw270rP7SHpOXxPaj+W4gPmY+l1+HB6/May5/FpirHUnh7fOMBzsxf4XxTjpR34JLCK4pvk4cA/AHdVPF/7A30+cCLFe+53gRd5fUdqduVzxeDH+uPp+TmW4oPjklZn0ZDyq9UFHIw3ij3q2yj2KPZS7P0dl6atAC5rcDnrgHPS/QupH+jvBv6tos8i4B/T/WspC80a67yNIgx7KfaE7gVOLJt//yD/n8DdZfMdQvFBNTs93sTAgf5likM7+x8fRfFh01Vv/ga3c2XZtA9Q7HlNSI+PTm/2Dorw2wccXdb/BuC2ym0um/4grwf6PwOfKJs2PW3HobweKlPLpj8OnN/g6/8F4P+k+wMui+IDbW7ZtIXUD/Szyh5/Cfirij4bgd9tYGzeDiwtry21fwx4vKLtUeDCsufxs2XTPgF8t8b6ZlN8WzqirG0D6YM3PZ5U5bk/tMbylpPeh9QP9EbG+kfLpv8tsKSR1/hgu/kYehURsSEiLoyIqcDJFJ/cX0iTp1HslfQj6QJJ6yT1SupN804cxKpPACbvnz8t49PAcWV9NjewnL+LiI6IOD4iPhgR1eqdTHGYAYCIeDUte0qDtVbOv4fim00j8zeync+V3e8Dno+IfWWPofgQmQzsiIjdZf1/OdTtSPcPrajlV2X3X0zr7UfSuyX9QFKPpJ3AJfR//WstazIHvrblNdVS3v8E4PKK53RaWm69sXklxZ7v45KekvTHZTVV1lH53Db03CQ9EfHripq/XVbTBooP5+MqZ5Q0T9Kq9EN/L3A2jb+3Ghnrg9mOg5YDvY6I+BnFXu/JqWkzxVe/A0g6geLr9J9TfCXtoPjarP2Lqrb4isebKQ4ldJTdjo6IsweYZ6i2Ubyh9tcvigDY2uB6Kuc/Enhj2fwDaWQ7G7UNOFbS0WVtb2KI25Hm3cuBHyiN+jrFN6JpEfEGYAmvv/71PEvx/JfXUU/5tm0Grq94Tv9TRNxVb2xGxK8i4uKImAz8GfD3kt5C/+dmf12NvMb16t1f87yKmo+IiAOWL+lwit9b/o7im3IHcD8Dv7fK1Rvr2XCgV5D0W5IulzQ1PZ5GcexwVepyK/ApSf9ZhbekN8yRFAOrJ813Ea9/CEAREFMlHVbR9uayx48DuyT9paR2SRMknTzcUyZruBuYL+n3JLUBl1Mc0/1RjdoqfR24SNLp6Q33N8BjEbGpgXU3bTsjYnOq+QZJR0g6FfgT4M6y7eiSVGus3wX8d0m/KemotB3fjIi9g62F4lDQjoj4taSZwB8OYt67gUWSfiONvf82yHXfAlySviVI0pGS5qcPugHHpqTf3z/eKY6RB8We8v3AWyX9oaRDJf0B8HaKs8CaYQlwfXr/IKlT0jlV+h1GcYy9B9graR7w/rLpzwFvlPSGGuupN9az4UDvbzfFMd7HJP0HRZD/hGIQEBHfAq6nCLTdFMfyjo2InwL/m+IY43MUP449Urbc71P8yv4rSc+nti8Db09fOZenQwofoPhh7xmKHx1vBWoN1CGLiI3ARyl+zHw+rfcDEfFy6nID8NlU26eqzP/PFMcm76HYuzwROL/BdTd7Oz9Mccx1G/Bt4JqIWJmmfSv9+4KkNVXm/QrFGRc/TLX8msGH6X6fAD4naTfFD753D2Le6ygOCzxDcbbI1waz4ojopvixeDFFKP+c4ncbGhib76IY73sovmFcFhHPRMQLFGfOXE5xOO1KYEFEPE9z3JzW9730nK2ieO9Vbttu4C8ons9/p/igvLds+s8oPph/kcbr5Ir56431bCj9CGBmZmOc99DNzDLhQDczy4QD3cwsEw50M7NMtOw/yJk4cWJ0dXW1avVmZmPS6tWrn4+IzmrTWhboXV1ddHd3t2r1ZmZjkqSaf0XsQy5mZplwoJuZZcKBbmaWCQe6mVkmHOhmZplo6CwXSZso/iOqfcDeiChVTBfFf7RzNsX/JXxhRFT7j5CG5SO3PMojT+947fGsE4/lzovPbPZqDipdV93Xr23TjfNbUMnoGY/bPB7H9ng00mN7MHvo/yUiTq8M82QecFK6LaS4ekpTVQ54gEee3sFHbnm02as6aFR78Qdqz8F43ObxOLbHo9EY28065HIOcHsUVgEdkiY1adkA/QZ8vXazscJj25ql0UAPiv+zeLWkhVWmT+HAy2FtocolwCQtlNQtqbunp2fw1ZqZWU2NBvqsiHgnxaGVSyX9TsX0apfZ6vcfrUfE0ogoRUSps7PqX66amdkQNRToEbEt/bud4oowMyu6bOHA6yFOpbh6TNPMOvHYQbWbjRUe29YsdQM9XZvw6P33Ka7l95OKbvcCF6RrGZ4B7IyIZ5tZ6J0Xn9lvgOd+JkCtX79zPuNjPG7zeBzb49FojO26l6CT9GaKvXIoTnP8ekRcL+kSgIhYkk5bXAzMpTht8aJ0jcOaSqVS+D/nMjMbHEmra5xtWP889Ij4BXBalfYlZfcDuHQ4RZqZ2fD4L0XNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMNBzokiZIWivpO1WmzZa0U9K6dLu6uWWamVk9da9YVOYyYANwTI3pD0XEguGXZGZmQ9HQHrqkqcB84NaRLcfMzIaq0UMuXwCuBF4doM+Zkp6Q9ICkd1TrIGmhpG5J3T09PYMs1czMBlI30CUtALZHxOoBuq0BToiI04AvAsurdYqIpRFRiohSZ2fnUOo1M7MaGtlDnwV8UNIm4BvAWZLuKO8QEbsiYk+6fz/QJmlis4s1M7Pa6gZ6RCyKiKkR0QWcD3w/Ij5a3kfS8ZKU7s9My31hBOo1M7MaBnOWywEkXQIQEUuADwEfl7QX6APOj4hoTolmZtYItSp3S6VSdHd3t2TdZmZjlaTVEVGqNs1/KWpmlgkHuplZJhzoZmaZcKCbmWXCgW5mlgkHuplZJhzoZmaZcKCbmWXCgW5mlgkHuplZJhzoZmaZcKCbmWXCgW5mlgkHuplZJhzoZmaZcKCbmWWi4SsWSZoAdANbI2JBxTQBNwNnAy8CF0bEmmYWOl51XXVfv7ZNN85vQSWjZzxu83i0fO1WblqxkW29fUzuaOeKOdM5d8aUVpc1okZ6bA9mD/0yYEONafOAk9JtIfClYdZlVH/xB2rPwXjc5vFo+dqtLFq2nq29fQSwtbePRcvWs3zt1laXNmJGY2w3FOiSpgLzgVtrdDkHuD0Kq4AOSZOaVKOZZeamFRvpe2XfAW19r+zjphUbW1RRHhrdQ/8CcCXwao3pU4DNZY+3pLYDSFooqVtSd09Pz2DqNLOMbOvtG1S7NaZuoEtaAGyPiNUDdavS1u/q0xGxNCJKEVHq7OwcRJlmlpPJHe2DarfGNLKHPgv4oKRNwDeAsyTdUdFnCzCt7PFUYFtTKjSz7FwxZzrtbRMOaGtvm8AVc6a3qKI81A30iFgUEVMjogs4H/h+RHy0otu9wAUqnAHsjIhnm1/u+FLr1++cz/gYj9s8Hp07Ywo3nHcKUzraETClo50bzjsl67NcRmNsK6LfkZHanaXZwKciYoGkSwAiYkk6bXExMJfitMWLIqJ7oGWVSqXo7h6wi5mZVZC0OiJK1aY1fB46QEQ8CDyY7i8paw/g0qGXaGZmw+W/FDUzy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMNHJN0SMkPS7pCUlPSbquSp/ZknZKWpduV49MuWZmVksjF7h4CTgrIvZIagMelvRARKyq6PdQRCxofolmZtaIuoGerka0Jz1sS7fGr1tnZmajoqFj6JImSFoHbAdWRsRjVbqdmQ7LPCDpHTWWs1BSt6Tunp6eoVdtZmb9NBToEbEvIk4HpgIzJZ1c0WUNcEJEnAZ8EVheYzlLI6IUEaXOzs6hV21mZv0M6iyXiOiluEj03Ir2XRGxJ92/H2iTNLFJNZqZWQMaOculU1JHut8OvBf4WUWf4yUp3Z+ZlvtC06s1M7OaGjnLZRLwVUkTKIL67oj4jqRLACJiCfAh4OOS9gJ9wPnpx1QzMxsljZzl8iQwo0r7krL7i4HFzS3NzMwGw38pamaWCQe6mVkmHOhmZplwoJuZZcKBbmaWCQe6mVkmHOhmZplwoJuZZcKBbmaWCQe6mVkmHOhmZplwoJuZZcKBbmaWCQe6mVkmHOhmZplwoJuZZaLuBS4kHQH8EDg89f+niLimoo+Am4GzgReBCyNiTbOL/cgtj/LI0zteezzrxGO58+Izm72ag0rXVff1a9t04/wWVDJ63n39Sp7b/fJrj487+jAe+8z7WljRyFu+dis3rdjItt4+Jne0c8Wc6Zw7Y0qry7ImG+n3cyN76C8BZ0XEacDpwFxJZ1T0mQeclG4LgS81rcKkMswBHnl6Bx+55dFmr+qgUe3FH6g9B5VhDvDc7pd59/UrW1TRyFu+diuLlq1na28fAWzt7WPRsvUsX7u11aVZE43G+7luoEdhT3rYlm6V1ws9B7g99V0FdEia1LQqoV+Y12u3sakyzOu15+CmFRvpe2XfAW19r+zjphUbW1SRjVUNHUOXNEHSOmA7sDIiHqvoMgXYXPZ4S2qrXM5CSd2Sunt6eoZYslletvX2DardrJaGAj0i9kXE6cBUYKakkyu6qNpsVZazNCJKEVHq7OwcdLFmOZrc0T6odrNaBnWWS0T0Ag8CcysmbQGmlT2eCmwbTmGVZp147KDabWw67ujDBtWegyvmTKe9bcIBbe1tE7hizvQWVWRjVd1Al9QpqSPdbwfeC/ysotu9wAUqnAHsjIhnm1nonRef2S+8cz/Lpdav3zmf5fLYZ97XL7xzP8vl3BlTuOG8U5jS0Y6AKR3t3HDeKT7LJTOj8X5WRL8jIwd2kE4FvgpMoPgAuDsiPifpEoCIWJJOW1xMsef+InBRRHQPtNxSqRTd3QN2MTOzCpJWR0Sp2rS656FHxJPAjCrtS8ruB3DpcIo0M7Ph8V+KmpllwoFuZpYJB7qZWSYc6GZmmXCgm5llwoFuZpYJB7qZWSYc6GZmmXCgm5llwoFuZpYJB7qZWSYc6GZmmXCgm5llwoFuZpYJB7qZWSYc6GZmmWjkEnTTJP1A0gZJT0m6rEqf2ZJ2SlqXblePTLlmZlZL3SsWAXuByyNijaSjgdWSVkbETyv6PRQRC5pfopmZNaLuHnpEPBsRa9L93cAGwFevNTM7yAzqGLqkLorriz5WZfKZkp6Q9ICkd9SYf6GkbkndPT09g6/WzMxqajjQJR0F3AN8MiJ2VUxeA5wQEacBXwSWV1tGRCyNiFJElDo7O4dYspmZVdNQoEtqowjzOyNiWeX0iNgVEXvS/fuBNkkTm1qpmZkNqJGzXAR8GdgQEZ+v0ef41A9JM9NyX2hmoWZmNrBGznKZBXwMWC9pXWr7NPAmgIhYAnwI+LikvUAfcH5ERPPLNTOzWuoGekQ8DKhOn8XA4mYVZWZmg+e/FDUzy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NM1P3/0CVNA24HjgdeBZZGxM0VfQTcDJwNvAhcGBFrml/u+NN11X392jbdOL8FlYye8bjNNj6M9NhuZA99L3B5RLwNOAO4VNLbK/rMA05Kt4XAl5pW4ThW7cUfqD0H43GbbXwYjbFdN9Aj4tn9e9sRsRvYAEyp6HYOcHsUVgEdkiY1rUozM6trUMfQJXUBM4DHKiZNATaXPd5C/9BH0kJJ3ZK6e3p6BlmqmZkNpOFAl3QUcA/wyYjYVTm5yiz9LhIdEUsjohQRpc7OzsFVamZmA2oo0CW1UYT5nRGxrEqXLcC0ssdTgW3DL8/MzBpVN9DTGSxfBjZExOdrdLsXuECFM4CdEfFsE+scl2r9+p3zGR/jcZttfBiNsa2IfkdGDuwgvQd4CFhPcdoiwKeBNwFExJIU+ouBuRSnLV4UEd0DLbdUKkV394BdzMysgqTVEVGqNq3ueegR8TDVj5GX9wng0qGVZ2ZmzeC/FDUzy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLhQDczy4QD3cwsEw50M7NMONDNzDLRyCXoviJpu6Sf1Jg+W9JOSevS7erml2lmZvXUvWIRcBvF5eVuH6DPQxGxoCkVmZnZkNTdQ4+IHwI7RqEWMzMbhmYdQz9T0hOSHpD0jlqdJC2U1C2pu6enp0mrNjMzaE6grwFOiIjTgC8Cy2t1jIilEVGKiFJnZ2cTVm1mZvsNO9AjYldE7En37wfaJE0cdmVmZjYoww50ScdLUro/My3zheEu18zMBqfuWS6S7gJmAxMlbQGuAdoAImIJ8CHg45L2An3A+RERI1axmZlVVTfQI+LDdaYvpjit0czMWsh/KWpmlgkHuplZJhzoZmaZcKCbmWXCgW5mlgkHuplZJhzoZmaZcKCbmWXCgW5mlgkHuplZJhzoZmaZcKCbmWXCgW5mlgkHuplZJhzoZmaZcKCbmWWikSsWfQVYAGyPiJOrTBdwM3A28CJwYUSsaXahAKde8112vbTvtcfHHD6BJ6+bOxKrOmh0XXVfv7ZNN85vQSWjZzxu8/K1W7lpxUa29fYxuaOdK+ZM59wZU1pdljXZSI/tRvbQbwMGSs15wEnpthD40vDL6q8yzAF2vbSPU6/57kis7qBQ7cUfqD0H43Gbl6/dyqJl69na20cAW3v7WLRsPcvXbm11adZEozG26wZ6RPwQ2DFAl3OA26OwCuiQNKlZBe5XGeb12s3GiptWbKTvlQPHcd8r+7hpxcYWVWRjVTOOoU8BNpc93pLa+pG0UFK3pO6enp4mrNps7NvW2zeodrNamhHoqtIW1TpGxNKIKEVEqbOzswmrNhv7Jne0D6rdrJZmBPoWYFrZ46nAtiYs9wDHHD5hUO1mY8UVc6bT3nbgOG5vm8AVc6a3qCIbq5oR6PcCF6hwBrAzIp5twnIP8OR1c/uFd+5nudT69TvnMz7G4zafO2MKN5x3ClM62hEwpaOdG847xWe5ZGY0xrYiqh4deb2DdBcwG5gIPAdcA7QBRMSSdNriYoozYV4ELoqI7norLpVK0d1dt5uZmZWRtDoiStWm1T0PPSI+XGd6AJcOsTYzM2sS/6WomVkmHOhmZplwoJuZZcKBbmaWibpnuYzYiqUe4JdDnH0i8HwTyxkLvM3jg7d5fBjONp8QEVX/MrNlgT4ckrprnbaTK2/z+OBtHh9Gapt9yMXMLBMOdDOzTIzVQF/a6gJawNs8Pnibx4cR2eYxeQzdzMz6G6t76GZmVsGBbmaWiTEX6JLmStoo6eeSrmp1PSNN0lckbZf0k1bXMlokTZP0A0kbJD0l6bJW1zTSJB0h6XFJT6Rtvq7VNY0GSRMkrZX0nVbXMhokbZK0XtI6SU3/72bH1DF0SROA/we8j+LCGj8GPhwRP21pYSNI0u8Aeyiu23pyq+sZDematJMiYo2ko4HVwLmZv84CjoyIPZLagIeBy9J1erMl6X8AJeCYiFjQ6npGmqRNQCkiRuQPqcbaHvpM4OcR8YuIeBn4BsVFqrPVwEW6sxMRz0bEmnR/N7CBGtepzUW6yPqe9LAt3cbO3tYQSJoKzAdubXUtuRhrgd7wBaktD5K6gBnAYy0uZcSlww/rgO3AyojIfZu/AFwJvNriOkZTAN+TtFrSwmYvfKwFesMXpLaxT9JRwD3AJyNiV6vrGWkRsS8iTqe4Lu9MSdkeYpO0ANgeEatbXcsomxUR7wTmAZemQ6pNM9YCfVQuSG2tl44j3wPcGRHLWl3PaIqIXuBBiss65moW8MF0TPkbwFmS7mhtSSMvIralf7cD36Y4jNw0Yy3QfwycJOk3JR0GnE9xkWrLSPqB8MvAhoj4fKvrGQ2SOiV1pPvtwHuBn7W0qBEUEYsiYmpEdFG8j78fER9tcVkjStKR6Ud+JB0JvB9o6tlrYyrQI2Iv8OfACoofyu6OiKdaW9XIShfpfhSYLmmLpD9pdU2jYBbwMYq9tnXpdnarixphk4AfSHqSYsdlZUSMi1P5xpHjgIclPQE8DtwXEd9t5grG1GmLZmZW25jaQzczs9oc6GZmmXCgm5llwoFuZpYJB7qZWSYc6GZmmXCgm5ll4v8DK49bLe4ke1sAAAAASUVORK5CYII=\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plt.scatter(df['emotion'], df['reason'], cmap='rainbow')\n",
+    "plt.title('Scatter Plot of emotion and reason relation')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "conventional-spectrum",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'np' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Input \u001b[1;32mIn [12]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m X_var \u001b[38;5;241m=\u001b[39m \u001b[43mnp\u001b[49m\u001b[38;5;241m.\u001b[39masarray(df[[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mquestion no\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124manswer\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124memotion\u001b[39m\u001b[38;5;124m'\u001b[39m]])\n\u001b[0;32m      2\u001b[0m y_var \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39masarray(df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreason\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m      4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mX_var samples : \u001b[39m\u001b[38;5;124m'\u001b[39m, X_var[:\u001b[38;5;241m5\u001b[39m])\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'np' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "X_var = np.asarray(df[['question no','answer','emotion']])\n",
+    "y_var = np.asarray(df['reason'])\n",
+    "\n",
+    "print('X_var samples : ', X_var[:5])\n",
+    "print('y_var samples : ', y_var[:5])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "physical-ghana",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X_var, y_var, test_size = 0.3, random_state = 4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "executed-swaziland",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'LogisticRegression' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Input \u001b[1;32mIn [10]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m lr \u001b[38;5;241m=\u001b[39m \u001b[43mLogisticRegression\u001b[49m(solver \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlbfgs\u001b[39m\u001b[38;5;124m'\u001b[39m, max_iter\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m      2\u001b[0m \u001b[38;5;66;03m# lr = LogisticRegression()\u001b[39;00m\n\u001b[0;32m      3\u001b[0m lr\u001b[38;5;241m.\u001b[39mfit(X_train,y_train)\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'LogisticRegression' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "lr = LogisticRegression(solver = 'lbfgs', max_iter=1)\n",
+    "# lr = LogisticRegression()\n",
+    "lr.fit(X_train,y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "distant-bulgaria",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 2,  2,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 2,  2,  5],\n",
+       "       [ 2,  9,  5],\n",
+       "       [ 2,  5,  5],\n",
+       "       [ 2, 22,  5],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 1, 35,  2],\n",
+       "       [ 4, 37,  0],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 2,  9,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 1, 36,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 2,  9,  2],\n",
+       "       [ 0, 16,  2],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 4, 37,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 1, 30,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 4, 37,  1],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 3, 33,  5],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 4, 38,  2],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 0, 25,  5],\n",
+       "       [ 0, 25,  5],\n",
+       "       [ 0, 16,  5],\n",
+       "       [ 0, 18,  5],\n",
+       "       [ 2,  9,  5],\n",
+       "       [ 2, 10,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 2,  9,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 0, 13,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 2,  4,  5],\n",
+       "       [ 2, 22,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 1, 31,  0],\n",
+       "       [ 0, 13,  5],\n",
+       "       [ 0, 19,  1],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 2,  7,  1],\n",
+       "       [ 0, 16,  5],\n",
+       "       [ 2,  4,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 2, 22,  1],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 0, 20,  5],\n",
+       "       [ 2,  8,  1],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 0, 15,  5],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 2,  9,  5],\n",
+       "       [ 0, 15,  5],\n",
+       "       [ 2,  9,  5],\n",
+       "       [ 3, 34,  0],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 2, 22,  1],\n",
+       "       [ 0, 15,  5],\n",
+       "       [ 3, 33,  1],\n",
+       "       [ 4, 37,  5],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 4, 39,  2],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 4, 39,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 0, 14,  1],\n",
+       "       [ 4, 39,  5],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 2,  2,  5],\n",
+       "       [ 2,  2,  5],\n",
+       "       [ 0, 14,  5],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 2,  8,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 1, 30,  5],\n",
+       "       [ 2,  9,  5],\n",
+       "       [ 2,  2,  5],\n",
+       "       [ 0, 20,  5],\n",
+       "       [ 2,  9,  5],\n",
+       "       [ 1, 30,  5],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 2, 22,  1],\n",
+       "       [ 4, 39,  2],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 0, 20,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 0, 17,  2],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 2,  4,  5],\n",
+       "       [ 0, 13,  5],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 2,  7,  1],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 4, 37,  5],\n",
+       "       [ 2,  4,  5],\n",
+       "       [ 0, 15,  5],\n",
+       "       [ 4, 37,  1],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 4, 37,  1],\n",
+       "       [ 0, 19,  1],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 4, 37,  5],\n",
+       "       [ 2,  7,  1],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 0, 17,  5],\n",
+       "       [ 2, 10,  5],\n",
+       "       [ 2,  9,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 4, 39,  2],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 3, 33,  5],\n",
+       "       [ 2,  4,  5],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 2,  4,  5],\n",
+       "       [ 4, 37,  1],\n",
+       "       [ 1, 30,  5],\n",
+       "       [ 2,  4,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 0, 15,  5],\n",
+       "       [ 2,  9,  5],\n",
+       "       [ 2,  4,  5],\n",
+       "       [ 3, 33,  1],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 2, 22,  5],\n",
+       "       [ 2,  9,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 0, 15,  5],\n",
+       "       [ 2,  4,  5],\n",
+       "       [ 2,  4,  5],\n",
+       "       [ 2, 10,  5],\n",
+       "       [ 3, 33,  1],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 0, 20,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 0, 15,  5],\n",
+       "       [ 4, 37,  0],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 2,  9,  5],\n",
+       "       [ 0, 16,  5],\n",
+       "       [ 2, 10,  5],\n",
+       "       [ 2,  8,  5],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 2,  2,  5],\n",
+       "       [ 4, 37,  1],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 4, 38,  2],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 4, 37,  5],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 2, 21,  1],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 2, 10,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 2,  4,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 1, 35,  2],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 2, 10,  2],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 2,  8,  1],\n",
+       "       [ 2,  7,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 0, 25,  1],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 2, 21,  1],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 2,  2,  5],\n",
+       "       [ 4, 39,  5],\n",
+       "       [ 2,  7,  1],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 4, 37,  5],\n",
+       "       [ 0, 16,  5],\n",
+       "       [ 4, 37,  1],\n",
+       "       [ 2, 22,  5],\n",
+       "       [ 4, 37,  5],\n",
+       "       [ 4, 37,  1],\n",
+       "       [ 0, 25,  1],\n",
+       "       [ 2,  2,  1],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 3, 28,  2],\n",
+       "       [ 4, 38,  2],\n",
+       "       [ 2,  7,  5],\n",
+       "       [ 0, 25,  1],\n",
+       "       [ 3, 32,  1],\n",
+       "       [ 2,  2,  5],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 1, 30,  5],\n",
+       "       [ 2,  9,  5],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 2,  4,  5],\n",
+       "       [ 4, 39,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 0, 16,  5],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 4, 39,  5],\n",
+       "       [ 4, 39,  5],\n",
+       "       [ 0, 25,  1],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 0, 16,  5],\n",
+       "       [ 0, 25,  1],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 1,  0,  1],\n",
+       "       [ 3, 33,  1],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 1, 30,  5],\n",
+       "       [ 4, 37,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 2,  2,  5],\n",
+       "       [ 1, 30,  5],\n",
+       "       [ 4, 37,  5],\n",
+       "       [ 2,  9,  5],\n",
+       "       [ 1, 30,  1],\n",
+       "       [ 1, 35,  5],\n",
+       "       [ 0, 13,  5],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 1, 30,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 3, 33,  0],\n",
+       "       [ 0, 16,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 0, 16,  5],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 0, 19,  5],\n",
+       "       [ 2, 23,  5],\n",
+       "       [ 0, 13,  5],\n",
+       "       [ 4, 37,  1],\n",
+       "       [ 0, 20,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 2,  7,  5],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 4, 38,  5],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 3, 27,  2],\n",
+       "       [ 0, 20,  5],\n",
+       "       [ 3, 27,  5],\n",
+       "       [ 2,  9,  5],\n",
+       "       [ 0, 19,  5]])"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "right-partition",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([3, 5, 5, 5, 3, 4, 2, 4, 4, 5, 5, 5, 1, 1, 4, 5, 2, 2, 4, 5, 3, 4,\n",
+       "       5, 2, 5, 1, 2, 1, 1, 4, 4, 2, 4, 2, 5, 4, 4, 4, 5, 5, 5, 2, 2, 1,\n",
+       "       5, 3, 4, 2, 5, 4, 1, 5, 3, 4, 5, 4, 1, 2, 1, 1, 4, 1, 1, 1, 5, 5,\n",
+       "       4, 1, 4, 1, 5, 1, 1, 5, 2, 5, 3, 5, 5, 2, 5, 5, 5, 1, 2, 1, 1, 5,\n",
+       "       5, 5, 1, 2, 4, 5, 5, 4, 1, 2, 1, 5, 4, 1, 5, 2, 5, 5, 5, 4, 2, 1,\n",
+       "       5, 1, 4, 4, 2, 4, 5, 5, 5, 3, 5, 5, 4, 5, 4, 2, 4, 4, 4, 1, 2, 5,\n",
+       "       5, 5, 4, 2, 1, 4, 4, 1, 1, 3, 4, 2, 3, 5, 2, 5, 4, 2, 5, 5, 5, 2,\n",
+       "       4, 5, 1, 2, 1, 5, 4, 5, 2, 5, 2, 5, 1, 5, 2, 2, 5, 4, 4, 2, 1, 3,\n",
+       "       1, 3, 5, 5, 4, 3, 1, 2, 1, 4, 2, 1, 5, 4, 5, 5, 1, 2, 3, 5, 4, 2,\n",
+       "       2, 1, 1, 5, 4, 5, 4, 4, 3, 5, 5, 1, 5, 1, 1, 2, 5, 3, 5, 2, 1, 5,\n",
+       "       2, 5, 3, 3, 5, 1, 1, 2, 4, 2, 5, 5, 2, 4, 1, 5, 1, 1, 4, 4, 4, 1,\n",
+       "       3, 5, 3, 2, 2, 1, 2, 1, 1, 2, 5, 5, 5, 3, 5, 4, 4, 1, 5, 5, 1, 1,\n",
+       "       2, 2, 1, 3, 4, 3, 2, 1, 4, 1, 2, 2, 4, 1, 5, 5, 5, 4, 5, 1, 3, 5,\n",
+       "       1, 4, 1, 4, 1, 3, 5, 5, 2, 1, 5, 4, 5, 1, 1, 5, 5, 1, 2])"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y_test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "relevant-tsunami",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_pred = lr.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "welcome-reduction",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.3180327868852459"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "score =accuracy_score(y_test,y_pred)\n",
+    "score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "executed-compatibility",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 0,  0,  0,  0, 69],\n",
+       "       [ 0,  0,  0,  0, 54],\n",
+       "       [ 0,  0,  0,  0, 24],\n",
+       "       [ 0,  0,  0,  0, 61],\n",
+       "       [ 0,  0,  0,  0, 97]])"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "confusion_matrix(y_test, y_pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "global-melbourne",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[ 4 38  5]]\n",
+      "Prediction: [5]\n"
+     ]
+    }
+   ],
+   "source": [
+    "features = np.array([[ 4, 38,  5]])\n",
+    "print(features)\n",
+    "prediction = lr.predict(features)\n",
+    "print(\"Prediction: {}\".format(prediction))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "selected-cattle",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "param_grid = [    \n",
+    "    {'penalty' : ['l1', 'l2', 'elasticnet', 'none'],\n",
+    "    'C' : np.logspace(-1, 1, 2000),\n",
+    "    'solver' : ['lbfgs','newton-cg','liblinear','sag','saga'],\n",
+    "    'max_iter' : [1000]\n",
+    "    }\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "appointed-controversy",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'lr' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Input \u001b[1;32mIn [4]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m clf \u001b[38;5;241m=\u001b[39m GridSearchCV(\u001b[43mlr\u001b[49m, param_grid \u001b[38;5;241m=\u001b[39m param_grid, cv \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m3\u001b[39m, verbose\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, n_jobs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'lr' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "clf = GridSearchCV(lr, param_grid = param_grid, cv = 3, verbose=True, n_jobs=-1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "southwest-disco",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fitting 3 folds for each of 40000 candidates, totalling 120000 fits\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+      "[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    2.3s\n",
+      "[Parallel(n_jobs=-1)]: Done 920 tasks      | elapsed:    7.5s\n",
+      "[Parallel(n_jobs=-1)]: Done 2920 tasks      | elapsed:   19.8s\n",
+      "[Parallel(n_jobs=-1)]: Done 5720 tasks      | elapsed:   37.7s\n",
+      "[Parallel(n_jobs=-1)]: Done 9320 tasks      | elapsed:  1.0min\n",
+      "[Parallel(n_jobs=-1)]: Done 13720 tasks      | elapsed:  1.5min\n",
+      "[Parallel(n_jobs=-1)]: Done 18920 tasks      | elapsed:  2.2min\n",
+      "[Parallel(n_jobs=-1)]: Done 24920 tasks      | elapsed:  2.8min\n",
+      "[Parallel(n_jobs=-1)]: Done 31720 tasks      | elapsed:  3.5min\n",
+      "[Parallel(n_jobs=-1)]: Done 39320 tasks      | elapsed:  4.3min\n",
+      "[Parallel(n_jobs=-1)]: Done 47720 tasks      | elapsed:  5.2min\n",
+      "[Parallel(n_jobs=-1)]: Done 56920 tasks      | elapsed:  6.2min\n",
+      "[Parallel(n_jobs=-1)]: Done 66920 tasks      | elapsed:  7.3min\n",
+      "[Parallel(n_jobs=-1)]: Done 77720 tasks      | elapsed:  8.5min\n",
+      "[Parallel(n_jobs=-1)]: Done 87056 tasks      | elapsed:  9.6min\n",
+      "[Parallel(n_jobs=-1)]: Done 93256 tasks      | elapsed: 10.4min\n",
+      "[Parallel(n_jobs=-1)]: Done 99856 tasks      | elapsed: 11.3min\n",
+      "[Parallel(n_jobs=-1)]: Done 106856 tasks      | elapsed: 12.3min\n",
+      "[Parallel(n_jobs=-1)]: Done 114256 tasks      | elapsed: 13.2min\n",
+      "[Parallel(n_jobs=-1)]: Done 120000 out of 120000 | elapsed: 14.0min finished\n"
+     ]
+    }
+   ],
+   "source": [
+    "best_clf = clf.fit(X_train,y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "coastal-commitment",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LogisticRegression(C=0.1253282641400343, max_iter=1000, penalty='l1',\n",
+       "                   solver='liblinear')"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "best_clf.best_estimator_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "pointed-humanitarian",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy - : 0.299\n"
+     ]
+    }
+   ],
+   "source": [
+    "print (f'Accuracy - : {best_clf.score(X_train,y_train):.3f}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "stretch-tobacco",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.32786885245901637"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
+    "                   intercept_scaling=1, l1_ratio=None, max_iter=1000,\n",
+    "                   multi_class='auto', n_jobs=None, penalty='l2',\n",
+    "                   random_state=5042, solver='lbfgs', tol=0.0001, verbose=0,\n",
+    "                   warm_start=False)\n",
+    "model.fit(X_train,y_train)\n",
+    "y_pred = model.predict(X_test)\n",
+    "score =accuracy_score(y_test,y_pred)\n",
+    "score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "coordinated-monthly",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.32786885245901637"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y_pred = model.predict(X_test)\n",
+    "score =accuracy_score(y_test,y_pred)\n",
+    "score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "major-delaware",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[11,  0,  0,  0, 58],\n",
+       "       [ 7,  0,  0,  0, 47],\n",
+       "       [ 4,  0,  0,  0, 20],\n",
+       "       [ 3,  0,  0,  0, 58],\n",
+       "       [ 8,  0,  0,  0, 89]])"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "confusion_matrix(y_test, y_pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "italian-blake",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filename = 'trained_models/finalized_model_lr.sav'\n",
+    "pickle.dump(model, open(filename, 'wb'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "miniature-clerk",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.32786885245901637\n"
+     ]
+    }
+   ],
+   "source": [
+    "loaded_model = pickle.load(open(filename, 'rb'))\n",
+    "result = loaded_model.score(X_test, y_test)\n",
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "positive-tablet",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}