Commit 189402b3 authored by Chathura IT19243986's avatar Chathura IT19243986

Upload New File

parent 6d1d1ead
{
"cells": [
{
"cell_type": "markdown",
"id": "bfe87d02",
"metadata": {},
"source": [
"# Import required packages"
]
},
{
"cell_type": "code",
"execution_count": 189,
"id": "742cb123",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"%matplotlib inline\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn import metrics\n",
"from sklearn.metrics import confusion_matrix,accuracy_score"
]
},
{
"cell_type": "markdown",
"id": "519acb7a",
"metadata": {},
"source": [
"# Reading & Assigning data from dataset to a variable"
]
},
{
"cell_type": "code",
"execution_count": 190,
"id": "18378cff",
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv(\"COVID-19 DATA SET.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 191,
"id": "8f8fa097",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(100, 8)"
]
},
"execution_count": 191,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.shape"
]
},
{
"cell_type": "code",
"execution_count": 192,
"id": "03839491",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PNo</th>\n",
" <th>Age</th>\n",
" <th>DIABETIC</th>\n",
" <th>HEART D.</th>\n",
" <th>LUNG D.</th>\n",
" <th>HBP</th>\n",
" <th>M/F</th>\n",
" <th>D/R</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>68</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>78</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>69</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>F</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>78</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>M</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>79</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>M</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PNo Age DIABETIC HEART D. LUNG D. HBP M/F D/R\n",
"0 1 68 0 0 0 0 F 0\n",
"1 2 78 0 0 0 0 F 0\n",
"2 3 69 1 1 0 1 F 1\n",
"3 4 78 0 1 0 1 M 1\n",
"4 5 79 1 0 0 0 M 0"
]
},
"execution_count": 192,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(5)"
]
},
{
"cell_type": "markdown",
"id": "4bca021b",
"metadata": {},
"source": [
"## # Check if any null value is present"
]
},
{
"cell_type": "code",
"execution_count": 193,
"id": "44384634",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 193,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.isnull().values.any()"
]
},
{
"cell_type": "code",
"execution_count": 194,
"id": "ac6b4d49",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PNo</th>\n",
" <th>Age</th>\n",
" <th>DIABETIC</th>\n",
" <th>HEART D.</th>\n",
" <th>LUNG D.</th>\n",
" <th>HBP</th>\n",
" <th>D/R</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>PNo</th>\n",
" <td>1.000000</td>\n",
" <td>-0.399201</td>\n",
" <td>-0.202974</td>\n",
" <td>-0.219943</td>\n",
" <td>-4.959658e-02</td>\n",
" <td>-0.331164</td>\n",
" <td>-7.302691e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Age</th>\n",
" <td>-0.399201</td>\n",
" <td>1.000000</td>\n",
" <td>0.105824</td>\n",
" <td>0.165694</td>\n",
" <td>-1.516316e-02</td>\n",
" <td>0.373607</td>\n",
" <td>4.310349e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DIABETIC</th>\n",
" <td>-0.202974</td>\n",
" <td>0.105824</td>\n",
" <td>1.000000</td>\n",
" <td>-0.013348</td>\n",
" <td>2.704413e-01</td>\n",
" <td>0.283042</td>\n",
" <td>2.339367e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>HEART D.</th>\n",
" <td>-0.219943</td>\n",
" <td>0.165694</td>\n",
" <td>-0.013348</td>\n",
" <td>1.000000</td>\n",
" <td>6.912111e-02</td>\n",
" <td>0.181032</td>\n",
" <td>1.931218e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>LUNG D.</th>\n",
" <td>-0.049597</td>\n",
" <td>-0.015163</td>\n",
" <td>0.270441</td>\n",
" <td>0.069121</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.062460</td>\n",
" <td>-9.934125e-18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>HBP</th>\n",
" <td>-0.331164</td>\n",
" <td>0.373607</td>\n",
" <td>0.283042</td>\n",
" <td>0.181032</td>\n",
" <td>6.246048e-02</td>\n",
" <td>1.000000</td>\n",
" <td>4.532471e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D/R</th>\n",
" <td>-0.730269</td>\n",
" <td>0.431035</td>\n",
" <td>0.233937</td>\n",
" <td>0.193122</td>\n",
" <td>-9.934125e-18</td>\n",
" <td>0.453247</td>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PNo Age DIABETIC HEART D. LUNG D. HBP \\\n",
"PNo 1.000000 -0.399201 -0.202974 -0.219943 -4.959658e-02 -0.331164 \n",
"Age -0.399201 1.000000 0.105824 0.165694 -1.516316e-02 0.373607 \n",
"DIABETIC -0.202974 0.105824 1.000000 -0.013348 2.704413e-01 0.283042 \n",
"HEART D. -0.219943 0.165694 -0.013348 1.000000 6.912111e-02 0.181032 \n",
"LUNG D. -0.049597 -0.015163 0.270441 0.069121 1.000000e+00 0.062460 \n",
"HBP -0.331164 0.373607 0.283042 0.181032 6.246048e-02 1.000000 \n",
"D/R -0.730269 0.431035 0.233937 0.193122 -9.934125e-18 0.453247 \n",
"\n",
" D/R \n",
"PNo -7.302691e-01 \n",
"Age 4.310349e-01 \n",
"DIABETIC 2.339367e-01 \n",
"HEART D. 1.931218e-01 \n",
"LUNG D. -9.934125e-18 \n",
"HBP 4.532471e-01 \n",
"D/R 1.000000e+00 "
]
},
"execution_count": 194,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.corr()"
]
},
{
"cell_type": "code",
"execution_count": 195,
"id": "05f646f1",
"metadata": {},
"outputs": [],
"source": [
"dr_true_count = len(data.loc[data[\"D/R\"] == 1])\n",
"dr_false_count = len(data.loc[data[\"D/R\"] == 0])"
]
},
{
"cell_type": "code",
"execution_count": 196,
"id": "49d36f7f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(50, 50)"
]
},
"execution_count": 196,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(dr_true_count,dr_false_count)"
]
},
{
"cell_type": "markdown",
"id": "db54a599",
"metadata": {},
"source": [
"## Train Test Split"
]
},
{
"cell_type": "code",
"execution_count": 197,
"id": "88d87b88",
"metadata": {},
"outputs": [],
"source": [
"\n",
"feature_columns = ['Age','DIABETIC','HEART D.','LUNG D.','HBP']\n",
"predicted_class = ['D/R']"
]
},
{
"cell_type": "code",
"execution_count": 198,
"id": "5c0ed8cd",
"metadata": {},
"outputs": [],
"source": [
"x = data[feature_columns].values\n",
"y = data[predicted_class].values\n",
"\n",
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state=10)"
]
},
{
"cell_type": "code",
"execution_count": 199,
"id": "73ac20c6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[73, 1, 0, 0, 1],\n",
" [90, 1, 0, 0, 1],\n",
" [63, 1, 0, 0, 1],\n",
" [87, 1, 0, 0, 1],\n",
" [62, 0, 0, 0, 0],\n",
" [78, 0, 1, 0, 1],\n",
" [72, 0, 0, 0, 1],\n",
" [63, 1, 0, 0, 1],\n",
" [82, 0, 0, 0, 1],\n",
" [76, 0, 0, 0, 0],\n",
" [69, 1, 1, 0, 1],\n",
" [78, 0, 0, 0, 0],\n",
" [19, 0, 0, 0, 0],\n",
" [74, 0, 0, 0, 0],\n",
" [10, 0, 0, 0, 0],\n",
" [31, 0, 0, 1, 0],\n",
" [15, 0, 0, 0, 0],\n",
" [72, 1, 0, 1, 0],\n",
" [82, 0, 0, 0, 0],\n",
" [79, 0, 0, 0, 0]], dtype=int64)"
]
},
"execution_count": 199,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x_test"
]
},
{
"cell_type": "markdown",
"id": "e7dafd44",
"metadata": {},
"source": [
"# Model training"
]
},
{
"cell_type": "code",
"execution_count": 200,
"id": "2d370d35",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RandomForestClassifier(random_state=10)"
]
},
"execution_count": 200,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"random_forest_model = RandomForestClassifier(random_state=10)\n",
"\n",
"random_forest_model.fit(x_train, y_train.ravel())"
]
},
{
"cell_type": "markdown",
"id": "c86c265d",
"metadata": {},
"source": [
"# Check model accuracy "
]
},
{
"cell_type": "code",
"execution_count": 201,
"id": "17baad43",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy = 0.750\n"
]
}
],
"source": [
"predict_train_data = random_forest_model.predict(x_test)\n",
"\n",
"print(\"Accuracy = {0:.3f}\".format(metrics.accuracy_score(y_test, predict_train_data)))"
]
},
{
"cell_type": "markdown",
"id": "7c8d94be",
"metadata": {},
"source": [
"# Test Model accuracy with confusion matrix"
]
},
{
"cell_type": "code",
"execution_count": 202,
"id": "e068d77f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[8 1]\n",
" [4 7]]\n",
"0.75\n"
]
}
],
"source": [
"cm=confusion_matrix(y_test,predict_train_data)\n",
"score=accuracy_score(y_test,predict_train_data)\n",
"\n",
"print(cm)\n",
"print(score)"
]
},
{
"cell_type": "markdown",
"id": "6383ed5e",
"metadata": {},
"source": [
"# Model Saving"
]
},
{
"cell_type": "code",
"execution_count": 203,
"id": "19ba2607",
"metadata": {},
"outputs": [],
"source": [
"#symthomchecker Model saving\n",
"import pickle\n",
"with open('covid_critical_ilness.pickle','wb') as file:\n",
" pickle.dump(random_forest_model,file)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment