dataset loaded

68170fbf · Udara Rangika · d35ad8fc · 68170fbf
Commit 68170fbf authored Nov 07, 2023 by Udara Rangika
Hide whitespace changes
Inline Side-by-side

Showing with 84 additions and 0 deletions

proficiency-identification.ipynb proficiency-identification.ipynb +84 -0

No files found.
--- a/proficiency-identification.ipynb
+++ b/proficiency-identification.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle, os\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from collections import defaultdict \n",
+    "from sklearn.preprocessing import LabelEncoder\n",
+    "from sklearn.model_selection import train_test_split"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class_dict = {\n",
+    "            'Basic' : 0,\n",
+    "            'Intermediate' : 1,\n",
+    "            'Advanced' : 2\n",
+    "            }\n",
+    "\n",
+    "class_dict_rev = {\n",
+    "                0 : 'Basic',\n",
+    "                1 : 'Intermediate',\n",
+    "                2 : 'Advanced'\n",
+    "                }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('data/dataset.csv')\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Load Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('data/dataset.csv')\n",
+    "del df['Student_ID']\n",
+    "\n",
+    "df['Proficiency_Level'] = df['Proficiency_Level'].map(class_dict)\n",
+    "\n",
+    "Y = df['Proficiency_Level'].values\n",
+    "del df['Proficiency_Level']\n",
+    "\n",
+    "X = df.values\n",
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "                                                    X, \n",
+    "                                                    Y, \n",
+    "                                                    test_size=0.15, \n",
+    "                                                    random_state=42\n",
+    "                                                    )"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}