Commit c2337dc9 authored by IT20155070's avatar IT20155070

initial model

parent 10792cc6
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"id": "mlYhNSx9a0wp"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n"
]
},
{
"cell_type": "code",
"source": [
"# Load the dataset into a Pandas DataFrame\n",
"df=pd.read_csv('RPDataSetFinal.csv')"
],
"metadata": {
"id": "KtEi6giYEeGW"
},
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# displays the number of rows and columns in the dataset\n",
"print(\"Dataset dimensions:\", df.shape)\n",
"\n",
"# Display column names and data types\n",
"print(df.info())\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "sbGGoIVFcI1w",
"outputId": "0baf490a-21d0-44f0-f3d8-4531ccc53cfa"
},
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Dataset dimensions: (1700, 17)\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 1700 entries, 0 to 1699\n",
"Data columns (total 17 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 1700 non-null int64 \n",
" 1 age 1700 non-null int64 \n",
" 2 gender 1700 non-null object\n",
" 3 marital_status 1700 non-null object\n",
" 4 drug_type 1700 non-null object\n",
" 5 no_of_time_rehabilitated 1700 non-null int64 \n",
" 6 duration_of_last_rehabilitation_(Month) 1700 non-null int64 \n",
" 7 rehabilitation_center 1700 non-null object\n",
" 8 behavior_of_individual 1700 non-null object\n",
" 9 mental_health_status 1700 non-null object\n",
" 10 area_of_residence 1695 non-null object\n",
" 11 drug_use_in _that_area 1695 non-null object\n",
" 12 employed_before_rehabilitation 1700 non-null object\n",
" 13 employment_after_rehabilitation 1700 non-null object\n",
" 14 continue_receiving_drug_treatment 1700 non-null object\n",
" 15 drug_type_again _addicted 1700 non-null object\n",
" 16 status 1700 non-null object\n",
"dtypes: int64(4), object(13)\n",
"memory usage: 225.9+ KB\n",
"None\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"#checking how many duplicated rows are there\n",
"df.duplicated().sum()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "3QBBeYM-Pjrr",
"outputId": "a7ad734a-ab60-428a-a1d7-cd9981de43c0"
},
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0"
]
},
"metadata": {},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"source": [
"#Removing unwanted Columns\n",
"useless_col = ['marital_status','area_of_residence']\n",
"df.drop(useless_col, axis = 1, inplace = True)"
],
"metadata": {
"id": "M6Mso6W3RDMz"
},
"execution_count": 8,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Calculate summary statistics\n",
"print(df.describe())\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "_2ucWfk4SFKT",
"outputId": "12c1ca92-8b44-44cf-c79e-f2d1888a310e"
},
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" id age no_of_time_rehabilitated \\\n",
"count 1700.000000 1700.000000 1700.000000 \n",
"mean 850.500000 34.195294 4.681765 \n",
"std 490.892045 14.293550 2.664575 \n",
"min 1.000000 15.000000 1.000000 \n",
"25% 425.750000 23.000000 2.000000 \n",
"50% 850.500000 28.000000 4.000000 \n",
"75% 1275.250000 44.000000 6.000000 \n",
"max 1700.000000 70.000000 10.000000 \n",
"\n",
" duration_of_last_rehabilitation_(Month) \n",
"count 1700.000000 \n",
"mean 3.495294 \n",
"std 1.702915 \n",
"min 1.000000 \n",
"25% 2.000000 \n",
"50% 4.000000 \n",
"75% 5.000000 \n",
"max 6.000000 \n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Check for missing values\n",
"print(df.isnull().sum())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "kYAr65gXSPhA",
"outputId": "15310443-4774-4d3d-973e-fff3f5f94b9c"
},
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"id 0\n",
"age 0\n",
"gender 0\n",
"drug_type 0\n",
"no_of_time_rehabilitated 0\n",
"duration_of_last_rehabilitation_(Month) 0\n",
"rehabilitation_center 0\n",
"behavior_of_individual 0\n",
"mental_health_status 0\n",
"drug_use_in _that_area 5\n",
"employed_before_rehabilitation 0\n",
"employment_after_rehabilitation 0\n",
"continue_receiving_drug_treatment 0\n",
"drug_type_again _addicted 0\n",
"status 0\n",
"dtype: int64\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# remove rows with missing values\n",
"df = df.dropna()"
],
"metadata": {
"id": "FvB2OXz19vnK"
},
"execution_count": 11,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#sum of missing values for each column\n",
"print(df.isnull().sum())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "reqmr-8b9xl2",
"outputId": "2800b71b-96d7-45d4-cc46-ca7691805c07"
},
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"id 0\n",
"age 0\n",
"gender 0\n",
"drug_type 0\n",
"no_of_time_rehabilitated 0\n",
"duration_of_last_rehabilitation_(Month) 0\n",
"rehabilitation_center 0\n",
"behavior_of_individual 0\n",
"mental_health_status 0\n",
"drug_use_in _that_area 0\n",
"employed_before_rehabilitation 0\n",
"employment_after_rehabilitation 0\n",
"continue_receiving_drug_treatment 0\n",
"drug_type_again _addicted 0\n",
"status 0\n",
"dtype: int64\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"df"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 644
},
"id": "Gv6xTawxfp88",
"outputId": "4793aea4-15b1-4ea6-9ce7-3bece07d6981"
},
"execution_count": 13,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" id age gender drug_type no_of_time_rehabilitated \\\n",
"0 1 45 Male Cannabis 6 \n",
"1 2 23 Male Cannabis 4 \n",
"2 3 40 Male Cannabis 5 \n",
"3 4 63 Male Cannabis 7 \n",
"4 5 23 Male Cannabis 6 \n",
"... ... ... ... ... ... \n",
"1695 1696 20 Male Psychotropic Substances 2 \n",
"1696 1697 21 Female Psychotropic Substances 2 \n",
"1697 1698 21 Female Psychotropic Substances 6 \n",
"1698 1699 24 Female Psychotropic Substances 4 \n",
"1699 1700 24 Female Psychotropic Substances 4 \n",
"\n",
" duration_of_last_rehabilitation_(Month) \\\n",
"0 2 \n",
"1 5 \n",
"2 4 \n",
"3 2 \n",
"4 6 \n",
"... ... \n",
"1695 1 \n",
"1696 1 \n",
"1697 2 \n",
"1698 2 \n",
"1699 2 \n",
"\n",
" rehabilitation_center \\\n",
"0 Galle Youth Prevention \n",
"1 Kandakadu Treatment and Rehabilitation Centre \n",
"2 Kandy Youth Prevention \n",
"3 Talangama Prevention \n",
"4 Treatment & Rehabilitation Centre \n",
"... ... \n",
"1695 Kandakadu Treatment and Rehabilitation Centre \n",
"1696 Kandakadu Treatment and Rehabilitation Centre \n",
"1697 Arunodadaya Residential and Rehabilitation Cen... \n",
"1698 Kandakadu Treatment and Rehabilitation Centre \n",
"1699 Sahanaya(National Council for Mental Health) K... \n",
"\n",
" behavior_of_individual mental_health_status drug_use_in _that_area \\\n",
"0 Fair Good High \n",
"1 Fair Poor High \n",
"2 Bad Only fair High \n",
"3 Bad Only fair High \n",
"4 Bad Only fair High \n",
"... ... ... ... \n",
"1695 Poor Good Medium \n",
"1696 Poor Poor Medium \n",
"1697 Good Good Extremely High \n",
"1698 Bad Good High \n",
"1699 Bad Good High \n",
"\n",
" employed_before_rehabilitation employment_after_rehabilitation \\\n",
"0 Yes No \n",
"1 Yes No \n",
"2 No No \n",
"3 No Yes \n",
"4 No No \n",
"... ... ... \n",
"1695 No No \n",
"1696 Yes Yes \n",
"1697 Yes No \n",
"1698 No Yes \n",
"1699 Yes No \n",
"\n",
" continue_receiving_drug_treatment drug_type_again _addicted status \n",
"0 Yes Cannabis Yes \n",
"1 Yes Cannabis Yes \n",
"2 No Cannabis Yes \n",
"3 No Cannabis Yes \n",
"4 No Cannabis Yes \n",
"... ... ... ... \n",
"1695 No Psychotropic Substances Yes \n",
"1696 No Psychotropic Substances Yes \n",
"1697 Yes Psychotropic Substances Yes \n",
"1698 No Psychotropic Substances Yes \n",
"1699 Yes Psychotropic Substances Yes \n",
"\n",
"[1695 rows x 15 columns]"
],
"text/html": [
"\n",
" <div id=\"df-e7646a01-e6e9-4e3e-870d-0d55330899ab\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>age</th>\n",
" <th>gender</th>\n",
" <th>drug_type</th>\n",
" <th>no_of_time_rehabilitated</th>\n",
" <th>duration_of_last_rehabilitation_(Month)</th>\n",
" <th>rehabilitation_center</th>\n",
" <th>behavior_of_individual</th>\n",
" <th>mental_health_status</th>\n",
" <th>drug_use_in _that_area</th>\n",
" <th>employed_before_rehabilitation</th>\n",
" <th>employment_after_rehabilitation</th>\n",
" <th>continue_receiving_drug_treatment</th>\n",
" <th>drug_type_again _addicted</th>\n",
" <th>status</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>45</td>\n",
" <td>Male</td>\n",
" <td>Cannabis</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>Galle Youth Prevention</td>\n",
" <td>Fair</td>\n",
" <td>Good</td>\n",
" <td>High</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Cannabis</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>23</td>\n",
" <td>Male</td>\n",
" <td>Cannabis</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>Kandakadu Treatment and Rehabilitation Centre</td>\n",
" <td>Fair</td>\n",
" <td>Poor</td>\n",
" <td>High</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Cannabis</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>40</td>\n",
" <td>Male</td>\n",
" <td>Cannabis</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>Kandy Youth Prevention</td>\n",
" <td>Bad</td>\n",
" <td>Only fair</td>\n",
" <td>High</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Cannabis</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>63</td>\n",
" <td>Male</td>\n",
" <td>Cannabis</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>Talangama Prevention</td>\n",
" <td>Bad</td>\n",
" <td>Only fair</td>\n",
" <td>High</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Cannabis</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>23</td>\n",
" <td>Male</td>\n",
" <td>Cannabis</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>Treatment &amp; Rehabilitation Centre</td>\n",
" <td>Bad</td>\n",
" <td>Only fair</td>\n",
" <td>High</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Cannabis</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1695</th>\n",
" <td>1696</td>\n",
" <td>20</td>\n",
" <td>Male</td>\n",
" <td>Psychotropic Substances</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>Kandakadu Treatment and Rehabilitation Centre</td>\n",
" <td>Poor</td>\n",
" <td>Good</td>\n",
" <td>Medium</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Psychotropic Substances</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1696</th>\n",
" <td>1697</td>\n",
" <td>21</td>\n",
" <td>Female</td>\n",
" <td>Psychotropic Substances</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>Kandakadu Treatment and Rehabilitation Centre</td>\n",
" <td>Poor</td>\n",
" <td>Poor</td>\n",
" <td>Medium</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Psychotropic Substances</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1697</th>\n",
" <td>1698</td>\n",
" <td>21</td>\n",
" <td>Female</td>\n",
" <td>Psychotropic Substances</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>Arunodadaya Residential and Rehabilitation Cen...</td>\n",
" <td>Good</td>\n",
" <td>Good</td>\n",
" <td>Extremely High</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Psychotropic Substances</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1698</th>\n",
" <td>1699</td>\n",
" <td>24</td>\n",
" <td>Female</td>\n",
" <td>Psychotropic Substances</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>Kandakadu Treatment and Rehabilitation Centre</td>\n",
" <td>Bad</td>\n",
" <td>Good</td>\n",
" <td>High</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Psychotropic Substances</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1699</th>\n",
" <td>1700</td>\n",
" <td>24</td>\n",
" <td>Female</td>\n",
" <td>Psychotropic Substances</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>Sahanaya(National Council for Mental Health) K...</td>\n",
" <td>Bad</td>\n",
" <td>Good</td>\n",
" <td>High</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Psychotropic Substances</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1695 rows × 15 columns</p>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-e7646a01-e6e9-4e3e-870d-0d55330899ab')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-e7646a01-e6e9-4e3e-870d-0d55330899ab button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-e7646a01-e6e9-4e3e-870d-0d55330899ab');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 13
}
]
},
{
"cell_type": "code",
"source": [
"# Preprocessing Categorical Variables\n",
"\n",
"df = df.replace({'behavior_of_individual': {'Excellent': 1, \n",
" 'Good': 2, 'Fair': 3, 'Poor': 4, 'Bad': 5}})"
],
"metadata": {
"id": "Hnc9pGiSoDY9"
},
"execution_count": 14,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df = df.replace({'mental_health_status': {'Good': 1, \n",
" 'Only fair': 2, 'Poor': 3}})"
],
"metadata": {
"id": "d6_p1HlJpONU"
},
"execution_count": 15,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df = df.replace({'drug_use_in _that_area': {'Extremely High': 1,\n",
" 'High': 2,\n",
" 'Low': 3,\n",
" 'Medium': 4,\n",
" 'Kaluthara': 5}})"
],
"metadata": {
"id": "I_E_qJTme6VZ"
},
"execution_count": 16,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df = df.replace({'gender': {'Male': 1, \n",
" 'Female': 2}})"
],
"metadata": {
"id": "dZbvkJLPgUaO"
},
"execution_count": 17,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df = df.replace({'drug_type': {'Heroin': 1,\n",
" 'Cannabis': 2,\n",
" 'Opium': 3,\n",
" 'Hashish': 4,\n",
" 'Cocaine': 5,\n",
" 'Methamphetamine': 6,\n",
" 'Psychotropic Substances': 7}})"
],
"metadata": {
"id": "heXW4QBLg8nY"
},
"execution_count": 18,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df = df.replace({'employed_before_rehabilitation': { 'Yes': 1,\n",
" 'No': 2}})"
],
"metadata": {
"id": "j1LsUfRphOpg"
},
"execution_count": 19,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df = df.replace({'employment_after_rehabilitation': { 'Yes': 1,\n",
" 'No': 2}})"
],
"metadata": {
"id": "eBC8niFDjp1U"
},
"execution_count": 20,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df = df.replace({'continue_receiving_drug_treatment': { 'Yes': 1,\n",
" 'No': 2}})"
],
"metadata": {
"id": "U5W-ENoOkVNX"
},
"execution_count": 21,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df = df.replace({'drug_type_again _addicted': {'Heroin': 1,\n",
" 'Cannabis': 2,\n",
" 'Opium': 3,\n",
" 'Hashish': 4,\n",
" 'No': 5,\n",
" 'Cocaine': 6,\n",
" 'Psychotropic Substances': 7}})"
],
"metadata": {
"id": "Vu1p6A0VqNpt"
},
"execution_count": 22,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df = df.replace({'status': { 'Yes': 1,\n",
" 'No': 2}})"
],
"metadata": {
"id": "3PoHN-5yqsw2"
},
"execution_count": 23,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df = df.replace({'rehabilitation_center': { 'Navajeevana Rehabilitation Centre Hambanthota': 1,\n",
" 'Arunodadaya Residential and Rehabilitation Centre Kaluthara': 2,\n",
" 'Galle Youth Prevention': 3,\n",
" 'Kandakadu Treatment and Rehabilitation Centre': 4,\n",
" 'Kandy Youth Prevention': 5,\n",
" 'Nawadiganthaya Treatment and Rehabilitation Center': 6,\n",
" 'Sahanaya(National Council for Mental Health) Kaluthara': 7,\n",
" 'Suwa Sewana (Institute of Psychological counselling and psychotherapy) Anuradapura': 8,\n",
" 'Talangama Prevention': 9,\n",
" 'Treatment & Rehabilitation Centre': 10}})"
],
"metadata": {
"id": "ux6t31qOq50X"
},
"execution_count": 24,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 666
},
"id": "rUydH7H3udXW",
"outputId": "b46bebdf-bbe5-48b8-b9d3-c970ad46f06a"
},
"execution_count": 25,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" id age gender drug_type no_of_time_rehabilitated \\\n",
"0 1 45 1 2 6 \n",
"1 2 23 1 2 4 \n",
"2 3 40 1 2 5 \n",
"3 4 63 1 2 7 \n",
"4 5 23 1 2 6 \n",
"... ... ... ... ... ... \n",
"1695 1696 20 1 7 2 \n",
"1696 1697 21 2 7 2 \n",
"1697 1698 21 2 7 6 \n",
"1698 1699 24 2 7 4 \n",
"1699 1700 24 2 7 4 \n",
"\n",
" duration_of_last_rehabilitation_(Month) rehabilitation_center \\\n",
"0 2 3 \n",
"1 5 4 \n",
"2 4 5 \n",
"3 2 9 \n",
"4 6 10 \n",
"... ... ... \n",
"1695 1 4 \n",
"1696 1 4 \n",
"1697 2 2 \n",
"1698 2 4 \n",
"1699 2 7 \n",
"\n",
" behavior_of_individual mental_health_status drug_use_in _that_area \\\n",
"0 3 1 2 \n",
"1 3 3 2 \n",
"2 5 2 2 \n",
"3 5 2 2 \n",
"4 5 2 2 \n",
"... ... ... ... \n",
"1695 4 1 4 \n",
"1696 4 3 4 \n",
"1697 2 1 1 \n",
"1698 5 1 2 \n",
"1699 5 1 2 \n",
"\n",
" employed_before_rehabilitation employment_after_rehabilitation \\\n",
"0 1 2 \n",
"1 1 2 \n",
"2 2 2 \n",
"3 2 1 \n",
"4 2 2 \n",
"... ... ... \n",
"1695 2 2 \n",
"1696 1 1 \n",
"1697 1 2 \n",
"1698 2 1 \n",
"1699 1 2 \n",
"\n",
" continue_receiving_drug_treatment drug_type_again _addicted status \n",
"0 1 2 1 \n",
"1 1 2 1 \n",
"2 2 2 1 \n",
"3 2 2 1 \n",
"4 2 2 1 \n",
"... ... ... ... \n",
"1695 2 7 1 \n",
"1696 2 7 1 \n",
"1697 1 7 1 \n",
"1698 2 7 1 \n",
"1699 1 7 1 \n",
"\n",
"[1695 rows x 15 columns]"
],
"text/html": [
"\n",
" <div id=\"df-0f911b90-3689-4012-bf50-9c72a24f1bb7\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>age</th>\n",
" <th>gender</th>\n",
" <th>drug_type</th>\n",
" <th>no_of_time_rehabilitated</th>\n",
" <th>duration_of_last_rehabilitation_(Month)</th>\n",
" <th>rehabilitation_center</th>\n",
" <th>behavior_of_individual</th>\n",
" <th>mental_health_status</th>\n",
" <th>drug_use_in _that_area</th>\n",
" <th>employed_before_rehabilitation</th>\n",
" <th>employment_after_rehabilitation</th>\n",
" <th>continue_receiving_drug_treatment</th>\n",
" <th>drug_type_again _addicted</th>\n",
" <th>status</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>45</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>23</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>40</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>63</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>23</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>10</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1695</th>\n",
" <td>1696</td>\n",
" <td>20</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1696</th>\n",
" <td>1697</td>\n",
" <td>21</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1697</th>\n",
" <td>1698</td>\n",
" <td>21</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1698</th>\n",
" <td>1699</td>\n",
" <td>24</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1699</th>\n",
" <td>1700</td>\n",
" <td>24</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1695 rows × 15 columns</p>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-0f911b90-3689-4012-bf50-9c72a24f1bb7')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-0f911b90-3689-4012-bf50-9c72a24f1bb7 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-0f911b90-3689-4012-bf50-9c72a24f1bb7');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 25
}
]
},
{
"cell_type": "code",
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"# columns represent the features used for prediction\n",
"features = df[['age', 'gender', 'drug_type', 'no_of_time_rehabilitated', 'duration_of_last_rehabilitation_(Month)', 'rehabilitation_center', 'behavior_of_individual', 'mental_health_status', 'drug_use_in _that_area', 'employed_before_rehabilitation', 'employment_after_rehabilitation', 'continue_receiving_drug_treatment']]\n",
"\n",
"# target variable for predicting the drug addiction status after the rehabilitation process\n",
"target_status = df['status']\n",
"\n",
"# target variable for predicting the drug type\n",
"target_drug_type = df['drug_type_again _addicted']\n",
"\n",
"# Split the data for predicting the current addiction status of individuals\n",
"X_train_status, X_test_status, y_train_status, y_test_target_status = train_test_split(features, target_status, test_size=0.2, random_state=42)\n",
"\n",
"# Split the data for drug_type prediction\n",
"X_train_drug_type, X_test_drug_type, y_train_drug_type, y_test_drug_type = train_test_split(features, target_drug_type, test_size=0.2, random_state=42)\n",
"\n",
"\n",
"# Print the number of rows and columns of the training and testing sets\n",
"print(\"Training set shape for status prediction:\", X_train_status.shape, y_train_status.shape)\n",
"print(\"Testing set shape for status prediction:\", X_test_status.shape, y_test_target_status.shape)\n",
"print(\"Training set shape for drug_type prediction:\", X_train_drug_type.shape, y_train_drug_type.shape)\n",
"print(\"Testing set shape for drug_type prediction:\", X_test_drug_type.shape, y_test_drug_type.shape)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uhe9P7p3nW4B",
"outputId": "4c6ebcff-5594-4438-c6b8-1aeb776c805e"
},
"execution_count": 26,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Training set shape for status prediction: (1356, 12) (1356,)\n",
"Testing set shape for status prediction: (339, 12) (339,)\n",
"Training set shape for drug_type prediction: (1356, 12) (1356,)\n",
"Testing set shape for drug_type prediction: (339, 12) (339,)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# represent the features that will be used to train the models\n",
"X = df[['age', 'gender', 'drug_type', 'no_of_time_rehabilitated', 'duration_of_last_rehabilitation_(Month)', 'rehabilitation_center', 'behavior_of_individual', 'mental_health_status', 'drug_use_in _that_area', 'employed_before_rehabilitation', 'employment_after_rehabilitation', 'continue_receiving_drug_treatment']]\n",
"\n",
"# represents the target variable for predicting the status\n",
"y_status = df['status']\n",
"\n",
"# represents the target variable for predicting the drug type\n",
"y_drug_type = df['drug_type_again _addicted']\n"
],
"metadata": {
"id": "b8qvt35T2lWe"
},
"execution_count": 27,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# splitting the dataset into training and testing sets for status prediction\n",
"X_train, X_test, y_train_status, y_test_status = train_test_split(X, y_status, test_size=0.2, random_state=42)\n",
"\n",
"# splitting the dataset into training and testing sets for drug type prediction\n",
"X_train, X_test, y_train_drug_type, y_test_drug_type = train_test_split(X, y_drug_type, test_size=0.2, random_state=42)\n",
"\n",
"\n",
"# Importing the RandomForestClassifier class\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"\n",
"# creating instances of the RandomForestClassifier models for drug type prediction and status prediction\n",
"status_model_RC = RandomForestClassifier()\n",
"drug_type_model_RC = RandomForestClassifier()\n",
"\n",
"\n",
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"# fitting the drug type model and the status model on the training data\n",
"status_model_RC.fit(X_train, y_train_status)\n",
"drug_type_model_RC.fit(X_train, y_train_drug_type)\n",
"\n",
"\n",
"# making predictions for drug types and status using the trained models\n",
"status_predictions_RC =status_model_RC.predict(X_test)\n",
"drug_type_predictions_RC = drug_type_model_RC.predict(X_test)\n",
"\n",
"\n",
"# converting the data type of status predictions to match the data type of y_test_status\n",
"status_predictions_RC = status_predictions_RC.astype(y_test_status.dtype)\n",
"\n",
"# Importing evaluation metrics (accuracy_score, precision_score, recall_score) from sklearn.metrics and calculating the evaluation scores for drug type and status predictions\n",
"\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score\n",
"\n",
"\n",
"status_accuracy = accuracy_score(y_test_status, status_predictions_RC)\n",
"status_precision = precision_score(y_test_status, status_predictions_RC, average='micro')\n",
"status_recall = recall_score(y_test_status, status_predictions_RC, average='micro')\n",
"\n",
"drug_type_accuracy = accuracy_score(y_test_drug_type, drug_type_predictions_RC)\n",
"drug_type_precision = precision_score(y_test_drug_type, drug_type_predictions_RC, average='micro')\n",
"drug_type_recall = recall_score(y_test_drug_type, drug_type_predictions_RC, average='micro')\n",
"\n",
"\n",
"# printing the evaluation results\n",
"print(\"Status Accuracy:\", status_accuracy)\n",
"print(\"Status Precision:\", status_precision)\n",
"print(\"Status Recall:\", status_recall)\n",
"\n",
"print(\"Drug Type Accuracy:\", drug_type_accuracy)\n",
"print(\"Drug Type Precision:\", drug_type_precision)\n",
"print(\"Drug Type Recall:\", drug_type_recall)\n",
"\n",
"# fitting the drug type model and status model on the full dataset\n",
"drug_type_model.fit(X, y_drug_type)\n",
"status_model.fit(X, y_status)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 179
},
"id": "tE-BgyGXJsDl",
"outputId": "7bd82a67-7ba6-4142-ca25-27b41e55e5c9"
},
"execution_count": 34,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Status Accuracy: 0.9616519174041298\n",
"Status Precision: 0.9616519174041298\n",
"Status Recall: 0.9616519174041298\n",
"Drug Type Accuracy: 0.9321533923303835\n",
"Drug Type Precision: 0.9321533923303835\n",
"Drug Type Recall: 0.9321533923303835\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"DecisionTreeClassifier()"
],
"text/html": [
"<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>DecisionTreeClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DecisionTreeClassifier</label><div class=\"sk-toggleable__content\"><pre>DecisionTreeClassifier()</pre></div></div></div></div></div>"
]
},
"metadata": {},
"execution_count": 34
}
]
},
{
"cell_type": "code",
"source": [
"# splitting the dataset into training and testing sets for status prediction\n",
"X_train, X_test, y_train_status, y_test_status = train_test_split(X, y_status, test_size=0.2, random_state=42)\n",
"\n",
"# splitting the dataset into training and testing sets for drug type prediction\n",
"X_train, X_test, y_train_drug_type, y_test_drug_type = train_test_split(X, y_drug_type, test_size=0.2, random_state=42)\n",
"\n",
"# Importing the LogisticRegression class and StandardScaler from sklearn\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"# creating instances of the LogisticRegression models for drug type prediction and status prediction\n",
"status_model = LogisticRegression(max_iter=1000, solver='sag')\n",
"drug_type_model = LogisticRegression(max_iter=1000, solver='sag')\n",
"\n",
"# scaling the data\n",
"scaler = StandardScaler()\n",
"X_train_scaled = scaler.fit_transform(X_train)\n",
"X_test_scaled = scaler.transform(X_test)\n",
"\n",
"# fitting the drug type model and the status model on the scaled training data\n",
"status_model.fit(X_train_scaled, y_train_status)\n",
"drug_type_model.fit(X_train_scaled, y_train_drug_type)\n",
"\n",
"# making predictions for drug types and status using the trained models on the scaled test data\n",
"status_predictions = status_model.predict(X_test_scaled)\n",
"drug_type_predictions = drug_type_model.predict(X_test_scaled)\n",
"\n",
"# Importing evaluation metrics (accuracy_score, precision_score, recall_score) from sklearn.metrics and calculating the evaluation scores for drug type and status predictions\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score\n",
"\n",
"status_accuracy = accuracy_score(y_test_status, status_predictions)\n",
"status_precision = precision_score(y_test_status, status_predictions, average='micro')\n",
"status_recall = recall_score(y_test_status, status_predictions, average='micro')\n",
"\n",
"drug_type_accuracy = accuracy_score(y_test_drug_type, drug_type_predictions)\n",
"drug_type_precision = precision_score(y_test_drug_type, drug_type_predictions, average='micro')\n",
"drug_type_recall = recall_score(y_test_drug_type, drug_type_predictions, average='micro')\n",
"\n",
"# printing the evaluation results\n",
"print(\"Status Accuracy:\", status_accuracy)\n",
"print(\"Status Precision:\", status_precision)\n",
"print(\"Status Recall:\", status_recall)\n",
"\n",
"print(\"Drug Type Accuracy:\", drug_type_accuracy)\n",
"print(\"Drug Type Precision:\", drug_type_precision)\n",
"print(\"Drug Type Recall:\", drug_type_recall)\n",
"\n",
"# fitting the drug type model and status model on the full dataset\n",
"#drug_type_model.fit(X_scaled, y_drug_type)\n",
"#status_model.fit(X_scaled, y_status)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "MECR-nBZJ2UD",
"outputId": "08b338b6-62b2-40f0-dff7-6e4ae5c8f583"
},
"execution_count": 30,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Status Accuracy: 0.9203539823008849\n",
"Status Precision: 0.9203539823008849\n",
"Status Recall: 0.9203539823008849\n",
"Drug Type Accuracy: 0.8053097345132744\n",
"Drug Type Precision: 0.8053097345132744\n",
"Drug Type Recall: 0.8053097345132744\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from sklearn.tree import DecisionTreeClassifier\n",
"\n",
"# Create decision tree classifier\n",
"dt_classifier = DecisionTreeClassifier()\n",
"\n",
"# Create decision tree classifier model\n",
"\n",
"status_model = DecisionTreeClassifier()\n",
"drug_type_model = DecisionTreeClassifier()\n",
"\n",
"status_model.fit(X_train, y_train_status)\n",
"drug_type_model.fit(X_train, y_train_drug_type)\n",
"\n",
"\n",
"# 3. Make predictions\n",
"drug_type_predictions = drug_type_model.predict(X_test)\n",
"status_predictions =status_model.predict(X_test)\n",
"\n",
"# For status prediction\n",
"\n",
"status_model.fit(X_train, y_train_status)\n",
"status_predictions = status_model.predict(X_test) \n",
"\n",
"status_accuracy = accuracy_score(y_test_status, status_predictions)\n",
"status_precision = precision_score(y_test_status, status_predictions, average='micro')\n",
"status_recall = recall_score(y_test_status, status_predictions, average='micro')\n",
"\n",
"\n",
"print(\"status Accuracy:\", status_accuracy)\n",
"print(\"Status Type Precision:\", status_precision)\n",
"print(\"Status Type Recall:\", status_recall)\n",
"\n",
"# For drug type prediction\n",
"\n",
"drug_type_model.fit(X_train, y_train_drug_type)\n",
"drug_type_predictions = drug_type_model.predict(X_test)\n",
"\n",
"drug_type_accuracy = accuracy_score(y_test_drug_type, drug_type_predictions)\n",
"drug_type_precision = precision_score(y_test_drug_type, drug_type_predictions, average='micro')\n",
"drug_type_recall = recall_score(y_test_drug_type, drug_type_predictions, average='micro')\n",
"\n",
"print(\"Drug Type Accuracy:\", drug_type_accuracy)\n",
"print(\"Drug Type Precision:\", drug_type_precision)\n",
"print(\"Drug Type Recall:\", drug_type_recall)\n",
"\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uTcIru0-_EPj",
"outputId": "c2ae5d59-a8d7-4a97-ff56-0f81b11174cb"
},
"execution_count": 31,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"status Accuracy: 0.9616519174041298\n",
"Status Type Precision: 0.9616519174041298\n",
"Status Type Recall: 0.9616519174041298\n",
"Drug Type Accuracy: 0.9498525073746312\n",
"Drug Type Precision: 0.9498525073746312\n",
"Drug Type Recall: 0.9498525073746312\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import pickle\n",
"\n",
"# Save the trained model\n",
"with open('trained_model.pkl', 'wb') as file:\n",
" pickle.dump(status_model_RC, file)\n",
"\n",
"# Load the new data\n",
"new_data = pd.DataFrame({\n",
" 'age': [20],\n",
" 'gender': [1],\n",
" 'drug_type': [7],\n",
" 'no_of_time_rehabilitated': [2],\n",
" 'duration_of_last_rehabilitation_(Month)': [1],\n",
" 'rehabilitation_center': [4],\n",
" 'behavior_of_individual': [4],\n",
" 'mental_health_status': [1],\n",
" 'drug_use_in _that_area': [4],\n",
" 'employed_before_rehabilitation': [2],\n",
" 'employment_after_rehabilitation': [2],\n",
" 'continue_receiving_drug_treatment': [2]\n",
"})\n",
"\n",
"# Load the trained model\n",
"with open('trained_model.pkl', 'rb') as file:\n",
" status_model_RC = pickle.load(file)\n",
"\n",
"# Make predictions for the new data\n",
"status_prediction_RC = status_model_RC.predict(new_data)\n",
"\n",
"# Print the predicted status\n",
"print('Predicted Status:', status_prediction_RC)\n",
"\n",
"if (status_prediction_RC[0] == 1):\n",
"\n",
" print('Yes')\n",
"\n",
"else:\n",
"\n",
" print('No')\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ka7tF7LcwehT",
"outputId": "ec811f5d-2c02-49e2-98fc-4b61cf31df14"
},
"execution_count": 35,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Predicted Status: [1]\n",
"Yes\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import pickle\n",
"\n",
"# Save the trained model\n",
"with open('trained_model.pkl', 'wb') as file:\n",
" pickle.dump(drug_type_model_RC, file)\n",
"\n",
"# Load the new data\n",
"new_data = pd.DataFrame({\n",
" 'age': [20],\n",
" 'gender': [1],\n",
" 'drug_type': [7],\n",
" 'no_of_time_rehabilitated': [2],\n",
" 'duration_of_last_rehabilitation_(Month)': [1],\n",
" 'rehabilitation_center': [4],\n",
" 'behavior_of_individual': [4],\n",
" 'mental_health_status': [1],\n",
" 'drug_use_in _that_area': [4],\n",
" 'employed_before_rehabilitation': [2],\n",
" 'employment_after_rehabilitation': [2],\n",
" 'continue_receiving_drug_treatment': [2]\n",
"})\n",
"\n",
"\n",
"# Load the trained model\n",
"with open('trained_model.pkl', 'rb') as file:\n",
" drug_type_model_RC = pickle.load(file)\n",
"\n",
"# Make predictions for the new data\n",
"drug_type_prediction_RC = drug_type_model_RC.predict(new_data)\n",
"\n",
"if drug_type_prediction_RC == 1:\n",
"\n",
" drug_type = 'Heroin'\n",
"\n",
"elif drug_type_prediction_RC == 2:\n",
"\n",
" drug_type = 'Cannabis'\n",
"\n",
"elif drug_type_prediction_RC == 3:\n",
"\n",
" drug_type = 'Opium'\n",
"\n",
"elif drug_type_prediction_RC == 4:\n",
"\n",
" drug_type = 'Hashish'\n",
"\n",
"elif drug_type_prediction_RC == 5:\n",
"\n",
" drug_type = 'No'\n",
"\n",
"elif drug_type_prediction_RC == 6:\n",
"\n",
" drug_type = 'Cocaine'\n",
"\n",
"else:\n",
"\n",
" drug_type = 'Psychotropic Substances'\n",
"\n",
"# Print the predicted drug type\n",
"print('Predicted Drug Type:', drug_type_prediction_RC)\n",
"print(drug_type)\n",
"\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "19AOghaNwc2k",
"outputId": "6a5f9574-57ba-4dbe-9c20-848fe5fc3ee7"
},
"execution_count": 36,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Predicted Drug Type: [7]\n",
"Psychotropic Substances\n"
]
}
]
}
]
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment