Initial commit

parents
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "TPU"
},
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WXK1h7IS5ua0"
},
"outputs": [],
"source": [
"#Import Libraries\n",
"\n",
"import pandas as pd\n",
"from matplotlib import pyplot as plt\n",
"import numpy as np\n",
"%matplotlib inline\n",
"\n",
"import tensorflow as tf\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense\n",
"from tensorflow.keras.layers import Flatten\n",
"from sklearn.model_selection import train_test_split\n",
"import time\n",
"\n",
"\n",
"#Data Balancing libraries\n",
"from imblearn.under_sampling import NearMiss\n",
"from imblearn.over_sampling import ADASYN\n",
"\n",
"from imblearn.over_sampling import SMOTE\n",
"from imblearn.under_sampling import RandomUnderSampler\n",
"from imblearn.combine import SMOTEENN #Hybrid method\n",
"\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, roc_curve, auc\n",
"\n",
"\n",
"#apply standardization\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"#Visual libraries\n",
"import seaborn as sns\n",
"\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.metrics import classification_report"
]
},
{
"cell_type": "code",
"source": [
"data = pd.read_csv('/content/disease_preprocess4.csv')"
],
"metadata": {
"id": "fMhfrc507QwL"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"data.head()"
],
"metadata": {
"id": "JryGjddVC8xL"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"data.columns"
],
"metadata": {
"id": "F_PjX618F5l6"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# define target variable and features\n",
"\n",
"# Defining the features (X) and the target (y)\n",
"\n",
"X = data.drop('HeartDisease', axis=1) # Features\n",
"y = data['HeartDisease'] # Target variable\n",
"\n",
"# Performing the train-test split\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n",
"X_train.shape, X_test.shape, y_train.shape, y_test.shape\n",
"\n",
"print(X_train.head())\n",
"print(y_train.head())"
],
"metadata": {
"id": "Jkmi2N0aC8nZ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"##Perform Scaling"
],
"metadata": {
"id": "E2WufmJdz8g_"
}
},
{
"cell_type": "code",
"source": [
"#apply standardization\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"# Create a StandardScaler instance\n",
"scaler = StandardScaler()\n",
"\n",
"\n",
"# Fit the scaler on the training data and transform it\n",
"X_train_scaled = scaler.fit_transform(X_train)\n",
"\n",
"# Use the same scaler to transform the test data\n",
"X_test_scaled = scaler.transform(X_test)"
],
"metadata": {
"id": "Y3FJTGSrUJIh"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Print the count of each class in the before resample data\n",
"print(y_train.value_counts())"
],
"metadata": {
"id": "HZThz9by_GfH"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### NearMiss"
],
"metadata": {
"id": "DkcD5UvyWld6"
}
},
{
"cell_type": "code",
"source": [
"# Resample the training data\n",
"\n",
"from imblearn.under_sampling import NearMiss\n",
"\n",
"# Apply NearMiss to Undersample the majority class\n",
"nearmiss = NearMiss(sampling_strategy='auto')\n",
"X_train_nearmiss, y_train_nearmiss = nearmiss.fit_resample(X_train_scaled, y_train)\n"
],
"metadata": {
"id": "w0wWq8wtWoZ3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Print the count of each class in the resampled data\n",
"print(y_train_nearmiss.value_counts())"
],
"metadata": {
"id": "VvkPNmPaXHKq"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization\n",
"from tensorflow.keras.optimizers import Adam\n",
"from tensorflow.keras.losses import BinaryCrossentropy\n",
"\n",
"input_shape = (X_train_nearmiss.shape[1], 1)\n",
"model = Sequential()\n",
"\n",
"# Add Convolutional and Pooling layers\n",
"model.add(Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=input_shape))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(MaxPooling1D(pool_size=2))\n",
"model.add(Conv1D(filters=256, kernel_size=3, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(MaxPooling1D(pool_size=2))\n",
"\n",
"model.add(Flatten())\n",
"\n",
"# Add Dense layers\n",
"model.add(Dense(units=512, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=256, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=128, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=64, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=1, activation='sigmoid'))\n",
"\n",
"# Compile the model\n",
"model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])\n",
"#model.summary()"
],
"metadata": {
"id": "mgOGqHJwXHBr"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"start_time = time.time()\n",
"history = model.fit(X_train_nearmiss, y_train_nearmiss, epochs=50, validation_split=0.2, verbose=2)\n",
"end_time = time.time()\n",
"execution_time = end_time - start_time\n",
"print(\"Execution time:\", execution_time, \"seconds\")"
],
"metadata": {
"id": "XqcE7QX_XUfL"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"original_model_accuracy = model.evaluate(X_test_scaled, y_test)[1]\n",
"print(\"Original Model Accuracy:\", original_model_accuracy)"
],
"metadata": {
"id": "r2iwyhwYXVQx"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sklearn.metrics import confusion_matrix , classification_report\n",
"y_pred = model.predict(X_test_scaled) > 0.5\n",
"print(confusion_matrix(y_test, y_pred))\n",
"print(classification_report(y_test, y_pred))"
],
"metadata": {
"id": "9lt13HjQXliz"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### ADASYN"
],
"metadata": {
"id": "FicbvbTzXsHp"
}
},
{
"cell_type": "code",
"source": [
"# Resample the training data\n",
"\n",
"from imblearn.over_sampling import ADASYN\n",
"\n",
"# Apply NearMiss to Undersample the majority class\n",
"adasyn = ADASYN(sampling_strategy='auto', random_state=23)\n",
"X_train_adasyn, y_train_adasyn = adasyn.fit_resample(X_train_scaled, y_train)\n"
],
"metadata": {
"id": "z_vRKICpX1IB"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization\n",
"from tensorflow.keras.optimizers import Adam\n",
"from tensorflow.keras.losses import BinaryCrossentropy\n",
"\n",
"input_shape = (X_train_adasyn.shape[1], 1)\n",
"model = Sequential()\n",
"\n",
"# Add Convolutional and Pooling layers\n",
"model.add(Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=input_shape))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(MaxPooling1D(pool_size=2))\n",
"model.add(Conv1D(filters=256, kernel_size=3, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(MaxPooling1D(pool_size=2))\n",
"\n",
"model.add(Flatten())\n",
"\n",
"# Add Dense layers\n",
"model.add(Dense(units=512, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=256, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=128, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=64, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=1, activation='sigmoid'))\n",
"\n",
"# Compile the model\n",
"model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])\n",
"#model.summary()"
],
"metadata": {
"id": "VSFetfoqZA3g"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"start_time = time.time()\n",
"history = model.fit(X_train_adasyn, y_train_adasyn, epochs=50, validation_split=0.2, verbose=2)\n",
"end_time = time.time()\n",
"execution_time = end_time - start_time\n",
"print(\"Execution time:\", execution_time, \"seconds\")"
],
"metadata": {
"id": "89tJFpG_Xq6I"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"original_model_accuracy = model.evaluate(X_test_scaled, y_test)[1]\n",
"print(\"Original Model Accuracy:\", original_model_accuracy)"
],
"metadata": {
"id": "X_pXi9WqZIRz"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sklearn.metrics import confusion_matrix , classification_report\n",
"y_pred = model.predict(X_test_scaled) > 0.5\n",
"print(confusion_matrix(y_test, y_pred))\n",
"print(classification_report(y_test, y_pred))"
],
"metadata": {
"id": "R4KHz7iEZI97"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### SMOTE"
],
"metadata": {
"id": "yo5CABz5ZMKk"
}
},
{
"cell_type": "code",
"source": [
"# Resample the training data\n",
"\n",
"from imblearn.over_sampling import SMOTE\n",
"\n",
"# Apply NearMiss to Undersample the majority class\n",
"smote = SMOTE(sampling_strategy='auto', random_state=23)\n",
"X_train_smote, y_train_smote = smote.fit_resample(X_train_scaled, y_train)"
],
"metadata": {
"id": "besSIFVZZl3T"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization\n",
"from tensorflow.keras.optimizers import Adam\n",
"from tensorflow.keras.losses import BinaryCrossentropy\n",
"\n",
"input_shape = (X_train_smote.shape[1], 1)\n",
"model = Sequential()\n",
"\n",
"# Add Convolutional and Pooling layers\n",
"model.add(Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=input_shape))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(MaxPooling1D(pool_size=2))\n",
"model.add(Conv1D(filters=256, kernel_size=3, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(MaxPooling1D(pool_size=2))\n",
"\n",
"model.add(Flatten())\n",
"\n",
"# Add Dense layers\n",
"model.add(Dense(units=512, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=256, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=128, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=64, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=1, activation='sigmoid'))\n",
"\n",
"# Compile the model\n",
"model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])\n",
"#model.summary()"
],
"metadata": {
"id": "fTSo2sIdZehk"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"start_time = time.time()\n",
"history = model.fit(X_train_smote, y_train_smote, epochs=50, validation_split=0.2, verbose=2)\n",
"end_time = time.time()\n",
"execution_time = end_time - start_time\n",
"print(\"Execution time:\", execution_time, \"seconds\")"
],
"metadata": {
"id": "cLfhhYJqZPJL"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"original_model_accuracy = model.evaluate(X_test_scaled, y_test)[1]\n",
"print(\"Original Model Accuracy:\", original_model_accuracy)"
],
"metadata": {
"id": "6CYHP6CXZPD9"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sklearn.metrics import confusion_matrix , classification_report\n",
"y_pred = model.predict(X_test_scaled) > 0.5\n",
"print(confusion_matrix(y_test, y_pred))\n",
"print(classification_report(y_test, y_pred))"
],
"metadata": {
"id": "Zj_sDeLvZO7E"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### Random Under Sampler"
],
"metadata": {
"id": "SemurYg5Z8p0"
}
},
{
"cell_type": "code",
"source": [
"# Resample the training data\n",
"\n",
"from imblearn.under_sampling import RandomUnderSampler\n",
"\n",
"# Apply NearMiss to Undersample the majority class\n",
"random_under_sampler= RandomUnderSampler(sampling_strategy='auto', random_state=23)\n",
"X_train_random, y_train_random = random_under_sampler.fit_resample(X_train_scaled, y_train)"
],
"metadata": {
"id": "hx80eO5-aBqr"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization\n",
"from tensorflow.keras.optimizers import Adam\n",
"from tensorflow.keras.losses import BinaryCrossentropy\n",
"\n",
"input_shape = (X_train_random.shape[1], 1)\n",
"model = Sequential()\n",
"\n",
"# Add Convolutional and Pooling layers\n",
"model.add(Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=input_shape))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(MaxPooling1D(pool_size=2))\n",
"model.add(Conv1D(filters=256, kernel_size=3, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(MaxPooling1D(pool_size=2))\n",
"\n",
"model.add(Flatten())\n",
"\n",
"# Add Dense layers\n",
"model.add(Dense(units=512, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=256, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=128, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=64, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=1, activation='sigmoid'))\n",
"\n",
"# Compile the model\n",
"model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])\n",
"#model.summary()"
],
"metadata": {
"id": "6DK0YDZrvofA"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"start_time = time.time()\n",
"history = model.fit(X_train_random, y_train_random, epochs=50, validation_split=0.2, verbose=2)\n",
"end_time = time.time()\n",
"execution_time = end_time - start_time\n",
"print(\"Execution time:\", execution_time, \"seconds\")"
],
"metadata": {
"id": "oO4pTApTvuXw"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"original_model_accuracy = model.evaluate(X_test_scaled, y_test)[1]\n",
"print(\"Original Model Accuracy:\", original_model_accuracy)"
],
"metadata": {
"id": "TiuKYZuovzaI"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sklearn.metrics import confusion_matrix , classification_report\n",
"y_pred = model.predict(X_test_scaled) > 0.5\n",
"print(confusion_matrix(y_test, y_pred))\n",
"print(classification_report(y_test, y_pred))"
],
"metadata": {
"id": "xkE0Sy1Lv2Kw"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## SMOTE and Random Combined"
],
"metadata": {
"id": "zJtzN8ctUoIo"
}
},
{
"cell_type": "code",
"source": [
"# Resample the training data\n",
"\n",
"from imblearn.over_sampling import SMOTE\n",
"from imblearn.under_sampling import RandomUnderSampler\n",
"from imblearn.combine import SMOTEENN # A hybrid method\n",
"\n",
"\n",
"# Apply SMOTE to oversample the minority class\n",
"smote=SMOTE(sampling_strategy='auto', random_state=23)\n",
"X_train_smote, y_train_smote = smote.fit_resample(X_train_scaled, y_train)\n",
"\n",
"# Apply undersampling to the majority class\n",
"under_sampler = RandomUnderSampler(sampling_strategy='auto', random_state=23)\n",
"X_train_combined, y_train_combined = under_sampler.fit_resample(X_train_smote, y_train_smote)\n",
"\n",
"# Train and evaluate your machine learning model using X_train_combined and y_train_combined\n",
"# Evaluate the model on X_test_scaled and y_test\n"
],
"metadata": {
"id": "DX0w3ww2UUFw"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Print the count of each class in the resampled data\n",
"print(y_train.value_counts())"
],
"metadata": {
"id": "9YO5Ql4l_Bl0"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Visualization of target variable after resampling\n",
"\n",
"g = sns.countplot(x= y_train_combined,data=data, palette=\"muted\")\n",
"g.set_ylabel(\"Patients\", fontsize=14)\n",
"g.set_xlabel(\"Heart Disease\", fontsize=14)"
],
"metadata": {
"id": "v9-BQ1bpiN3i"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### Model Training"
],
"metadata": {
"id": "71z03akwbnSu"
}
},
{
"cell_type": "code",
"source": [
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization\n",
"from tensorflow.keras.optimizers import Adam\n",
"from tensorflow.keras.losses import BinaryCrossentropy\n",
"\n",
"input_shape = (X_train_combined.shape[1], 1)\n",
"model = Sequential()\n",
"\n",
"# Add Convolutional and Pooling layers\n",
"model.add(Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=input_shape))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(MaxPooling1D(pool_size=2))\n",
"model.add(Conv1D(filters=256, kernel_size=3, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(MaxPooling1D(pool_size=2))\n",
"\n",
"model.add(Flatten())\n",
"\n",
"# Add Dense layers\n",
"model.add(Dense(units=512, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=256, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=128, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=64, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=1, activation='sigmoid'))\n",
"\n",
"# Compile the model\n",
"model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])\n",
"#model.summary()\n"
],
"metadata": {
"id": "0rkXxtX4bqOH"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"start_time = time.time()\n",
"history = model.fit(X_train_combined, y_train_combined, epochs=50, validation_split=0.2, verbose=2)\n",
"end_time = time.time()\n",
"execution_time = end_time - start_time\n",
"print(\"Execution time:\", execution_time, \"seconds\")"
],
"metadata": {
"id": "qcpsJFNQbyEw"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"original_model_accuracy = model.evaluate(X_test_scaled, y_test)[1]\n",
"print(\"Original Model Accuracy:\", original_model_accuracy)"
],
"metadata": {
"id": "ZuYctwmBb0Rw"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sklearn.metrics import confusion_matrix , classification_report\n",
"y_pred = model.predict(X_test_scaled) > 0.5\n",
"print(confusion_matrix(y_test, y_pred))\n",
"print(classification_report(y_test, y_pred))"
],
"metadata": {
"id": "FDlrN-l0b1JI"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#Import the necessary libraries\n",
"import numpy as np\n",
"from sklearn.metrics import confusion_matrix\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"y_pred = model.predict(X_test_scaled) # Replace 'model' with your trained model\n",
"\n",
"# Convert predicted probabilities to binary labels (0 or 1)\n",
"y_pred_binary = (y_pred > 0.5).astype(int)\n",
"\n",
"#compute the confusion matrix.\n",
"cm = confusion_matrix(y_test,y_pred_binary)\n",
"\n",
"#Plot the confusion matrix.\n",
"sns.heatmap(cm,\n",
" annot=True,\n",
" fmt='g',\n",
" xticklabels=['Class 0','Class 1'],\n",
" yticklabels=['Class 0','Class 1'])\n",
"plt.ylabel('Prediction',fontsize=13)\n",
"plt.xlabel('Actual',fontsize=13)\n",
"plt.title('Confusion Matrix',fontsize=17)\n",
"plt.show()"
],
"metadata": {
"id": "EEdrLiFpvdWz"
},
"execution_count": null,
"outputs": []
}
]
}
\ No newline at end of file
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "TPU"
},
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "WXK1h7IS5ua0"
},
"outputs": [],
"source": [
"#Import Libraries\n",
"\n",
"import pandas as pd\n",
"from matplotlib import pyplot as plt\n",
"import numpy as np\n",
"%matplotlib inline\n",
"\n",
"import tensorflow as tf\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense\n",
"from tensorflow.keras.layers import Flatten\n",
"from sklearn.model_selection import train_test_split\n",
"import time\n",
"\n",
"\n",
"#Data Balancing libraries\n",
"from imblearn.under_sampling import NearMiss\n",
"from imblearn.over_sampling import ADASYN\n",
"\n",
"from imblearn.over_sampling import SMOTE\n",
"from imblearn.under_sampling import RandomUnderSampler\n",
"from imblearn.combine import SMOTEENN #Hybrid method\n",
"\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, roc_curve, auc\n",
"\n",
"\n",
"#apply standardization\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"#Visual libraries\n",
"import seaborn as sns\n",
"\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.metrics import classification_report"
]
},
{
"cell_type": "code",
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"metadata": {
"id": "MihfTY1_l4j0",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "b804c129-a5f1-4902-e999-ff1efdcb41ba"
},
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Mounted at /content/drive\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"data = pd.read_csv('/content/drive/MyDrive/ML Model Attack/disease_preprocess4.csv')\n",
"data"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 443
},
"id": "ZCZ6F-vSF187",
"outputId": "8d872c4c-5d4c-4f95-f427-eabedc8a70a2"
},
"execution_count": 3,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" GeneralHealth Checkup Exercise HeartDisease Depression Diabetes \\\n",
"0 1 2 0 0 0 0 \n",
"1 5 1 0 1 0 1 \n",
"2 5 1 1 0 0 1 \n",
"3 1 1 1 1 0 1 \n",
"4 4 1 0 0 0 0 \n",
"... ... ... ... ... ... ... \n",
"308849 5 1 1 0 0 0 \n",
"308850 2 3 1 0 0 1 \n",
"308851 5 4 1 0 1 1 \n",
"308852 5 1 1 0 0 0 \n",
"308853 3 1 1 0 0 0 \n",
"\n",
" Arthritis Gender AgeCategory BMI SmokingHistory \\\n",
"0 1 1 10 -2.159696 1 \n",
"1 0 1 10 -0.051548 0 \n",
"2 0 1 8 0.742649 0 \n",
"3 0 0 11 0.015913 0 \n",
"4 0 0 12 -0.652562 1 \n",
"... ... ... ... ... ... \n",
"308849 0 0 1 0.064975 0 \n",
"308850 0 0 9 -1.095656 0 \n",
"308851 0 1 2 -0.603499 1 \n",
"308852 0 0 9 -0.750686 0 \n",
"308853 0 1 5 0.472806 0 \n",
"\n",
" AlcoholConsumption FriedPotatoConsumption \n",
"0 -0.621527 0.664502 \n",
"1 -0.621527 -0.267579 \n",
"2 -0.133707 1.130543 \n",
"3 -0.621527 0.198462 \n",
"4 -0.621527 -0.733620 \n",
"... ... ... \n",
"308849 -0.133707 -0.733620 \n",
"308850 0.354113 -0.267579 \n",
"308851 -0.133707 -0.267579 \n",
"308852 -0.255662 -0.733620 \n",
"308853 -0.499572 -0.617110 \n",
"\n",
"[308854 rows x 13 columns]"
],
"text/html": [
"\n",
" <div id=\"df-a1d33194-1690-4787-be5c-74cd87680864\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GeneralHealth</th>\n",
" <th>Checkup</th>\n",
" <th>Exercise</th>\n",
" <th>HeartDisease</th>\n",
" <th>Depression</th>\n",
" <th>Diabetes</th>\n",
" <th>Arthritis</th>\n",
" <th>Gender</th>\n",
" <th>AgeCategory</th>\n",
" <th>BMI</th>\n",
" <th>SmokingHistory</th>\n",
" <th>AlcoholConsumption</th>\n",
" <th>FriedPotatoConsumption</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>-2.159696</td>\n",
" <td>1</td>\n",
" <td>-0.621527</td>\n",
" <td>0.664502</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>-0.051548</td>\n",
" <td>0</td>\n",
" <td>-0.621527</td>\n",
" <td>-0.267579</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>0.742649</td>\n",
" <td>0</td>\n",
" <td>-0.133707</td>\n",
" <td>1.130543</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>11</td>\n",
" <td>0.015913</td>\n",
" <td>0</td>\n",
" <td>-0.621527</td>\n",
" <td>0.198462</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>12</td>\n",
" <td>-0.652562</td>\n",
" <td>1</td>\n",
" <td>-0.621527</td>\n",
" <td>-0.733620</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>308849</th>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0.064975</td>\n",
" <td>0</td>\n",
" <td>-0.133707</td>\n",
" <td>-0.733620</td>\n",
" </tr>\n",
" <tr>\n",
" <th>308850</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" <td>-1.095656</td>\n",
" <td>0</td>\n",
" <td>0.354113</td>\n",
" <td>-0.267579</td>\n",
" </tr>\n",
" <tr>\n",
" <th>308851</th>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>-0.603499</td>\n",
" <td>1</td>\n",
" <td>-0.133707</td>\n",
" <td>-0.267579</td>\n",
" </tr>\n",
" <tr>\n",
" <th>308852</th>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" <td>-0.750686</td>\n",
" <td>0</td>\n",
" <td>-0.255662</td>\n",
" <td>-0.733620</td>\n",
" </tr>\n",
" <tr>\n",
" <th>308853</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>0.472806</td>\n",
" <td>0</td>\n",
" <td>-0.499572</td>\n",
" <td>-0.617110</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>308854 rows × 13 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-a1d33194-1690-4787-be5c-74cd87680864')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-a1d33194-1690-4787-be5c-74cd87680864 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-a1d33194-1690-4787-be5c-74cd87680864');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-9210d2ca-c514-4600-92ee-77a8cbe3ab65\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-9210d2ca-c514-4600-92ee-77a8cbe3ab65')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-9210d2ca-c514-4600-92ee-77a8cbe3ab65 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" <div id=\"id_12c3ad94-eec6-462f-a894-ad4022869e86\">\n",
" <style>\n",
" .colab-df-generate {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-generate:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-generate {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-generate:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
" <button class=\"colab-df-generate\" onclick=\"generateWithVariable('data')\"\n",
" title=\"Generate code using this dataframe.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
" </svg>\n",
" </button>\n",
" <script>\n",
" (() => {\n",
" const buttonEl =\n",
" document.querySelector('#id_12c3ad94-eec6-462f-a894-ad4022869e86 button.colab-df-generate');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" buttonEl.onclick = () => {\n",
" google.colab.notebook.generateWithVariable('data');\n",
" }\n",
" })();\n",
" </script>\n",
" </div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "data"
}
},
"metadata": {},
"execution_count": 3
}
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "fMhfrc507QwL"
},
"outputs": [],
"source": [
"#data = pd.read_csv('/content/disease_preprocess4.csv')"
]
},
{
"cell_type": "code",
"source": [
"data.head()"
],
"metadata": {
"id": "kY7U0J9yV4C_",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 226
},
"outputId": "061675d3-8f50-4960-ff42-522b6a3f0591"
},
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" GeneralHealth Checkup Exercise HeartDisease Depression Diabetes \\\n",
"0 1 2 0 0 0 0 \n",
"1 5 1 0 1 0 1 \n",
"2 5 1 1 0 0 1 \n",
"3 1 1 1 1 0 1 \n",
"4 4 1 0 0 0 0 \n",
"\n",
" Arthritis Gender AgeCategory BMI SmokingHistory \\\n",
"0 1 1 10 -2.159696 1 \n",
"1 0 1 10 -0.051548 0 \n",
"2 0 1 8 0.742649 0 \n",
"3 0 0 11 0.015913 0 \n",
"4 0 0 12 -0.652562 1 \n",
"\n",
" AlcoholConsumption FriedPotatoConsumption \n",
"0 -0.621527 0.664502 \n",
"1 -0.621527 -0.267579 \n",
"2 -0.133707 1.130543 \n",
"3 -0.621527 0.198462 \n",
"4 -0.621527 -0.733620 "
],
"text/html": [
"\n",
" <div id=\"df-75a792c9-a0f4-4a9f-bcf1-1922a4f72640\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GeneralHealth</th>\n",
" <th>Checkup</th>\n",
" <th>Exercise</th>\n",
" <th>HeartDisease</th>\n",
" <th>Depression</th>\n",
" <th>Diabetes</th>\n",
" <th>Arthritis</th>\n",
" <th>Gender</th>\n",
" <th>AgeCategory</th>\n",
" <th>BMI</th>\n",
" <th>SmokingHistory</th>\n",
" <th>AlcoholConsumption</th>\n",
" <th>FriedPotatoConsumption</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>-2.159696</td>\n",
" <td>1</td>\n",
" <td>-0.621527</td>\n",
" <td>0.664502</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>-0.051548</td>\n",
" <td>0</td>\n",
" <td>-0.621527</td>\n",
" <td>-0.267579</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>0.742649</td>\n",
" <td>0</td>\n",
" <td>-0.133707</td>\n",
" <td>1.130543</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>11</td>\n",
" <td>0.015913</td>\n",
" <td>0</td>\n",
" <td>-0.621527</td>\n",
" <td>0.198462</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>12</td>\n",
" <td>-0.652562</td>\n",
" <td>1</td>\n",
" <td>-0.621527</td>\n",
" <td>-0.733620</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-75a792c9-a0f4-4a9f-bcf1-1922a4f72640')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-75a792c9-a0f4-4a9f-bcf1-1922a4f72640 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-75a792c9-a0f4-4a9f-bcf1-1922a4f72640');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-7759a549-9c9f-45b3-9086-1f56c2420319\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-7759a549-9c9f-45b3-9086-1f56c2420319')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-7759a549-9c9f-45b3-9086-1f56c2420319 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "data"
}
},
"metadata": {},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"source": [
"data.columns"
],
"metadata": {
"id": "PyUC2rneV_CL",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "05b924de-8276-4a83-cff9-d38fdf06c185"
},
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Index(['GeneralHealth', 'Checkup', 'Exercise', 'HeartDisease', 'Depression',\n",
" 'Diabetes', 'Arthritis', 'Gender', 'AgeCategory', 'BMI',\n",
" 'SmokingHistory', 'AlcoholConsumption', 'FriedPotatoConsumption'],\n",
" dtype='object')"
]
},
"metadata": {},
"execution_count": 5
}
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"id": "F_PjX618F5l6",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "18446a4e-9119-4a39-a1cc-42a26b8caae6"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" GeneralHealth Checkup Exercise Depression Diabetes Arthritis \\\n",
"192601 5 1 1 0 0 0 \n",
"196337 5 1 0 0 0 0 \n",
"99888 5 1 1 0 0 0 \n",
"282228 4 3 1 0 0 0 \n",
"216188 3 1 1 0 0 0 \n",
"\n",
" Gender AgeCategory BMI SmokingHistory AlcoholConsumption \\\n",
"192601 1 5 -0.368920 1 0.841932 \n",
"196337 1 11 -0.333656 1 -0.499572 \n",
"99888 1 8 1.898681 0 -0.377617 \n",
"282228 0 11 0.728850 0 2.427347 \n",
"216188 0 7 0.314887 0 -0.621527 \n",
"\n",
" FriedPotatoConsumption \n",
"192601 1.596584 \n",
"196337 0.198462 \n",
"99888 0.198462 \n",
"282228 0.198462 \n",
"216188 -0.267579 \n",
"192601 0\n",
"196337 0\n",
"99888 0\n",
"282228 0\n",
"216188 0\n",
"Name: HeartDisease, dtype: int64\n"
]
}
],
"source": [
"# define target variable and features\n",
"\n",
"# Defining the features (X) and the target (y)\n",
"\n",
"X = data.drop('HeartDisease', axis=1) # Features\n",
"y = data['HeartDisease'] # Target variable\n",
"\n",
"# Performing the train-test split\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n",
"X_train.shape, X_test.shape, y_train.shape, y_test.shape\n",
"\n",
"print(X_train.head())\n",
"print(y_train.head())"
]
},
{
"cell_type": "markdown",
"source": [
"### Perform Scaling"
],
"metadata": {
"id": "W-F1vvzXWN2B"
}
},
{
"cell_type": "code",
"source": [
"#apply standardization\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"# Create a StandardScaler instance\n",
"scaler = StandardScaler()\n",
"\n",
"\n",
"# Fit the scaler on the training data and transform it\n",
"X_train_scaled = scaler.fit_transform(X_train)\n",
"\n",
"# Use the same scaler to transform the test data\n",
"X_test_scaled = scaler.transform(X_test)"
],
"metadata": {
"id": "Odb4wSf-WBJo"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Print the count of each class in the before resample data\n",
"print(y_train.value_counts())"
],
"metadata": {
"id": "FnYSi2n4WWLy",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "6310d4c2-af03-46f5-fd00-9a6401ab42c0"
},
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0 227109\n",
"1 19974\n",
"Name: HeartDisease, dtype: int64\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"### SMOTE and Random Combined"
],
"metadata": {
"id": "UQ2gbEX8Wn_K"
}
},
{
"cell_type": "code",
"source": [
"# Resample the training data\n",
"\n",
"from imblearn.over_sampling import SMOTE\n",
"from imblearn.under_sampling import RandomUnderSampler\n",
"\n",
"\n",
"\n",
"# Apply SMOTE to oversample the minority class\n",
"smote=SMOTE(sampling_strategy='auto', random_state=23)\n",
"X_train_smote, y_train_smote = smote.fit_resample(X_train_scaled, y_train)\n",
"\n",
"# Apply Random undersampling to the majority class\n",
"under_sampler = RandomUnderSampler(sampling_strategy='auto', random_state=23)\n",
"X_train_combined, y_train_combined = under_sampler.fit_resample(X_train_smote, y_train_smote)\n",
"\n",
"# Train and evaluate your machine learning model using X_train_combined and y_train_combined\n",
"# Evaluate the model on X_test_scaled and y_test"
],
"metadata": {
"id": "L6y3LLIWWY3R"
},
"execution_count": 10,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Print the count of each class in the resampled data\n",
"print(y_train_combined.value_counts())"
],
"metadata": {
"id": "JHUux-UyW4HS",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "2a0b84f6-9d55-412f-eea7-d998d65d1954"
},
"execution_count": 11,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0 227109\n",
"1 227109\n",
"Name: HeartDisease, dtype: int64\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Visualization of target variable after resampling\n",
"\n",
"g = sns.countplot(x= y_train_combined,data=data, palette=\"muted\")\n",
"g.set_ylabel(\"Patients\", fontsize=14)\n",
"g.set_xlabel(\"Heart Disease\", fontsize=14)"
],
"metadata": {
"id": "APcWMC2SW-Vs",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 578
},
"outputId": "455400da-ae25-4322-c8ae-35ef2934a4d6"
},
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"<ipython-input-12-42433e52f457>:3: FutureWarning: \n",
"\n",
"Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.\n",
"\n",
" g = sns.countplot(x= y_train_combined,data=data, palette=\"muted\")\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Text(0.5, 0, 'Heart Disease')"
]
},
"metadata": {},
"execution_count": 12
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAloAAAG1CAYAAAAhoVogAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAvSElEQVR4nO3de1RVdf7/8dcB5KIGaCZIotBVTcNCJbpYKl8PZs046YxZTXjJRpdaSurkVEDZjOWl1K+WM80Y+S1LrclSizIMLMUbZaWJk2ZCowevgKCCwv790WL/PIHXw0dAno+1zlru/Xnvz35zWsRr7b3P5zgsy7IEAACAGudV2w0AAABcqghaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCf2m6goauoqNCePXt02WWXyeFw1HY7AADgHFiWpSNHjigsLExeXqe/bkXQqmV79uxReHh4bbcBAAAuQF5enlq3bn3acYJWLbvssssk/fIfKjAwsJa7AQAA56KoqEjh4eH23/HTIWjVssrbhYGBgQQtAADqmbM99sPD8AAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIT613QAujgeSMmq7BaDOWfjcXbXdQo34z/TBtd0CUOdcNz61tluQxBUtAAAAYwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCkzgWtKVOmqGvXrrrsssvUsmVL9evXT9u3b3erOX78uEaNGqXLL79cTZs2Vf/+/ZWfn+9Wk5ubq759+6px48Zq2bKlJkyYoJMnT7rVZGRk6Oabb5afn5+uueYapaamVuln7ty5ioiIkL+/v2JiYrRhw4bz7gUAADRMdS5oZWZmatSoUVq3bp1WrlypEydOqHfv3iopKbFrxo0bp2XLlmnJkiXKzMzUnj17dN9999nj5eXl6tu3r8rKyrR27Vq98cYbSk1NVVJSkl2za9cu9e3bVz169NDmzZs1duxYPfLII/rkk0/smkWLFikxMVHJycn66quvFBUVJafTqX379p1zLwAAoOFyWJZl1XYTZ7J//361bNlSmZmZ6t69uwoLC3XFFVdo4cKFGjBggCQpJydH7du3V1ZWlm655RZ9/PHHuueee7Rnzx6FhIRIkubNm6c///nP2r9/v3x9ffXnP/9ZK1as0JYtW+xz3X///SooKFBaWpokKSYmRl27dtWcOXMkSRUVFQoPD9eYMWP05JNPnlMvZ1NUVKSgoCAVFhYqMDCwRt+7Uz2QlGFsbqC+WvjcXbXdQo34z/TBtd0CUOdcNz7V6Pzn+ve7zl3R+rXCwkJJUvPmzSVJ2dnZOnHihOLi4uyadu3aqU2bNsrKypIkZWVlqVOnTnbIkiSn06mioiJt3brVrjl1jsqayjnKysqUnZ3tVuPl5aW4uDi75lx6+bXS0lIVFRW5vQAAwKWpTgetiooKjR07Vrfddps6duwoSXK5XPL19VVwcLBbbUhIiFwul11zasiqHK8cO1NNUVGRjh07pgMHDqi8vLzamlPnOFsvvzZlyhQFBQXZr/Dw8HN8NwAAQH1Tp4PWqFGjtGXLFr3zzju13UqNmTRpkgoLC+1XXl5ebbcEAAAM8antBk5n9OjRWr58uVavXq3WrVvb+0NDQ1VWVqaCggK3K0n5+fkKDQ21a3796cDKTwKeWvPrTwfm5+crMDBQAQEB8vb2lre3d7U1p85xtl5+zc/PT35+fufxTgAAgPqqzl3RsixLo0eP1vvvv69Vq1YpMjLSbTw6OlqNGjVSenq6vW/79u3Kzc1VbGysJCk2Nlbfffed26cDV65cqcDAQHXo0MGuOXWOyprKOXx9fRUdHe1WU1FRofT0dLvmXHoBAAANV527ojVq1CgtXLhQH3zwgS677DL7WaegoCAFBAQoKChIw4YNU2Jiopo3b67AwECNGTNGsbGx9qf8evfurQ4dOuiPf/yjpk6dKpfLpaefflqjRo2yryaNGDFCc+bM0cSJEzV06FCtWrVKixcv1ooVK+xeEhMTlZCQoC5duqhbt26aOXOmSkpKNGTIELuns/UCAAAarjoXtF599VVJ0l133eW2//XXX9fgwYMlSS+//LK8vLzUv39/lZaWyul06pVXXrFrvb29tXz5co0cOVKxsbFq0qSJEhIS9Nxzz9k1kZGRWrFihcaNG6dZs2apdevW+uc//ymn02nXDBw4UPv371dSUpJcLpc6d+6stLQ0twfkz9YLAABouOr8OlqXOtbRAmoP62gBly7W0QIAALjEEbQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhS54LW6tWrde+99yosLEwOh0NLly51Gx88eLAcDofbKz4+3q3m0KFDevDBBxUYGKjg4GANGzZMxcXFbjXffvut7rjjDvn7+ys8PFxTp06t0suSJUvUrl07+fv7q1OnTvroo4/cxi3LUlJSklq1aqWAgADFxcXphx9+qJk3AgAA1Ht1LmiVlJQoKipKc+fOPW1NfHy89u7da7/efvttt/EHH3xQW7du1cqVK7V8+XKtXr1ajz76qD1eVFSk3r17q23btsrOzta0adOUkpKif/zjH3bN2rVrNWjQIA0bNkxff/21+vXrp379+mnLli12zdSpUzV79mzNmzdP69evV5MmTeR0OnX8+PEafEcAAEB95VPbDfxanz591KdPnzPW+Pn5KTQ0tNqxbdu2KS0tTRs3blSXLl0kSf/7v/+ru+++W9OnT1dYWJjeeustlZWVaf78+fL19dUNN9ygzZs366WXXrID2axZsxQfH68JEyZIkiZPnqyVK1dqzpw5mjdvnizL0syZM/X000/rt7/9rSRpwYIFCgkJ0dKlS3X//fdX219paalKS0vt7aKiovN7gwAAQL1R565onYuMjAy1bNlS119/vUaOHKmDBw/aY1lZWQoODrZDliTFxcXJy8tL69evt2u6d+8uX19fu8bpdGr79u06fPiwXRMXF+d2XqfTqaysLEnSrl275HK53GqCgoIUExNj11RnypQpCgoKsl/h4eEevBMAAKAuq3dBKz4+XgsWLFB6erpefPFFZWZmqk+fPiovL5ckuVwutWzZ0u0YHx8fNW/eXC6Xy64JCQlxq6ncPlvNqeOnHlddTXUmTZqkwsJC+5WXl3dePz8AAKg/6tytw7M59ZZcp06ddOONN+rqq69WRkaGevXqVYudnRs/Pz/5+fnVdhsAAOAiqHdXtH7tqquuUosWLbRjxw5JUmhoqPbt2+dWc/LkSR06dMh+ris0NFT5+fluNZXbZ6s5dfzU46qrAQAADVu9D1o///yzDh48qFatWkmSYmNjVVBQoOzsbLtm1apVqqioUExMjF2zevVqnThxwq5ZuXKlrr/+ejVr1syuSU9PdzvXypUrFRsbK0mKjIxUaGioW01RUZHWr19v1wAAgIatzgWt4uJibd68WZs3b5b0y0PnmzdvVm5uroqLizVhwgStW7dOP/30k9LT0/Xb3/5W11xzjZxOpySpffv2io+P1/Dhw7VhwwatWbNGo0eP1v3336+wsDBJ0gMPPCBfX18NGzZMW7du1aJFizRr1iwlJibafTz++ONKS0vTjBkzlJOTo5SUFG3atEmjR4+WJDkcDo0dO1bPP/+8PvzwQ3333Xd6+OGHFRYWpn79+l3U9wwAANRNde4ZrU2bNqlHjx72dmX4SUhI0Kuvvqpvv/1Wb7zxhgoKChQWFqbevXtr8uTJbs89vfXWWxo9erR69eolLy8v9e/fX7Nnz7bHg4KC9Omnn2rUqFGKjo5WixYtlJSU5LbW1q233qqFCxfq6aef1l/+8hdde+21Wrp0qTp27GjXTJw4USUlJXr00UdVUFCg22+/XWlpafL39zf5FgEAgHrCYVmWVdtNNGRFRUUKCgpSYWGhAgMDjZ3ngaQMY3MD9dXC5+6q7RZqxH+mD67tFoA657rxqUbnP9e/33Xu1iEAAMClgqAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhngUtPLy8rRq1SodPXrU3ldRUaEXX3xRt912m+Li4rRixQqPmwQAAKiPPFqw9JlnntGyZcvkcrnsfX/961+VnJxsb2dmZmrt2rXq2rWrJ6cCAACodzy6orVmzRrFxcWpUaNGkiTLsjRnzhy1a9dOubm52rBhg5o0aaJp06bVSLMAAAD1iUdBa9++fWrbtq29vXnzZu3fv19jxoxR69at1aVLF/Xr108bN270uFEAAID6xqOgVVFRoYqKCns7IyNDDodDPXv2tPddeeWVbrcWAQAAGgqPglabNm20YcMGe3vp0qVq1aqVrr/+enufy+VScHCwJ6cBAAColzwKWv3799eaNWs0YMAAPfTQQ/ryyy/Vv39/t5rvv/9eV111lUdNAgAA1Ecefepw/Pjx+vTTT/Xvf/9bknTjjTcqJSXFHt+9e7c2bNigJ5980qMmAQAA6iOPglZgYKDWrVunLVu2SJLat28vb29vt5p///vf6tKliyenAQAAqJc8Clq5ubkKDg5Wx44dqx1v27atmjdvrsOHD3tyGgAAgHrJo2e0IiMjNXPmzDPWzJ49W5GRkZ6cBgAAoF7yKGhZllUjNQAAAJci418q/fPPP+uyyy4zfRoAAIA657yf0XruuefctjMyMqqtKy8vV15ent555x3dcsstF9QcAABAfXbeQevU5RscDocyMjJOG7YkKSwsTC+++OKF9AYAAFCvnXfQ+vzzzyX98uxVz549NXjwYCUkJFSp8/b2VvPmzdWuXTt5eRm/QwkAAFDnnHfQuvPOO+1/Jycnq0ePHurevXuNNgUAAHAp8GgdreTk5JrqAwAA4JLjUdCq5HK5lJ2drYKCApWXl1db8/DDD9fEqQAAAOoNj4LW8ePHNXz4cL3zzjuqqKiotsayLDkcDoIWAABocDwKWk8++aTeeustXXfddRo0aJBat24tH58auUgGAABQ73mUihYvXqwOHTooOztbfn5+NdUTAADAJcGjdRcKCgoUHx9PyAIAAKiGR0Hr+uuvV35+fk31AgAAcEnxKGhNmDBBH3zwgXbs2FFT/QAAAFwyPHpGq3Xr1nI6nerWrZvGjh2rm2++WYGBgdXWsqgpAABoaDwKWnfddZccDocsy1JKSoocDsdpa0+3vhYAAMClyqOglZSUdMZwBQAA0JB5FLRSUlJqqA0AAIBLj0cPwwMAAOD0amQZ96+//lpvv/22cnJydPToUX322WeSpN27d2v9+vWKi4tT8+bNa+JUAAAA9YbHQWvixImaMWOGLMuSJLdntizL0gMPPKAZM2bo8ccf9/RUAAAA9YpHtw5ff/11TZ8+Xffcc4++/fZbTZo0yW08IiJC3bp104cffuhRkwAAAPWRR1e0XnnlFbVv317vvfeefHx85OvrW6WmXbt29q1EAACAhsSjK1rff/+9/ud//kc+PqfPayEhIdq3b58npwEAAKiXPApaPj4+KisrO2PNnj171LRpU09OAwAAUC95FLQ6deqkVatWnXbV98pPIEZHR3tyGgAAgHrJo6A1dOhQ/ec//9GIESNUWlrqNlZUVKTBgwfL5XJp+PDhHjUJAABQH3n0MPzQoUP12Wef6V//+pcWLVqk4OBgSVK3bt20bds2lZSUaPDgwRowYEBN9AoAAFCveLwy/MKFC/X3v/9dkZGR+u9//yvLsrRp0ya1adNGr776qubPn18TfQIAANQ7NbIy/PDhwzV8+HAdO3ZMhw8fVmBgIA/AAwCABq9GglalgIAABQQE1OSUAAAA9RZfKg0AAGDIeV3Ruuqqq+RwOPTZZ58pMjJSV1111Tkd53A4tHPnzgtqEAAAoL46r6BVUVHh9qXRv94+ncovnAYAAGhIzito/fTTT2fcBgAAwP/HM1oAAACGeBS0evbsqQULFpyx5s0331TPnj09OQ0AAEC95FHQysjIOOvtw927dyszM9OT0wAAANRLxm8dlpSUqFGjRqZPAwAAUOec94Klubm5btsFBQVV9klSeXm58vLy9N577ykiIuKCGwQAAKivzjtoRURE2Es6OBwOzZo1S7NmzTptvWVZmjZt2oV3CAAAUE+dd9B6+OGH5XA4ZFmWFixYoKioKHXu3LlKnbe3t5o3b66ePXsqPj6+JnoFAACoV847aKWmptr/zszM1JAhQ/TYY4/VZE8AAACXBI++VHrXrl011QcAAMAlhwVLAQAADPHoipYkHTlyRHPmzNFnn32mPXv2qLS0tEoNXyoNAAAaIo+C1v79+3Xrrbdq586dCgwMVFFRkYKCglRWVqZjx45JksLCwlhHCwAANEge3TpMSUnRzp07tWDBAh0+fFiSNG7cOJWUlGj9+vXq1q2bIiIitHXr1hppFgAAoD7xKGh99NFH6tWrlx566CF7ba1KXbt21ccff6yffvpJzz77rEdNAgAA1EceBa29e/fqpptusre9vb3tW4aS1KxZM/Xp00eLFy8+5zlXr16te++9V2FhYXI4HFq6dKnbuGVZSkpKUqtWrRQQEKC4uDj98MMPbjWHDh3Sgw8+qMDAQAUHB2vYsGEqLi52q/n22291xx13yN/fX+Hh4Zo6dWqVXpYsWaJ27drJ399fnTp10kcffXTevQAAgIbLo6AVFBSkEydO2NvNmjXTzz//7FYTGBio/Pz8c56zpKREUVFRmjt3brXjU6dO1ezZszVv3jytX79eTZo0kdPp1PHjx+2aBx98UFu3btXKlSu1fPlyrV69Wo8++qg9XlRUpN69e6tt27bKzs7WtGnTlJKSon/84x92zdq1azVo0CANGzZMX3/9tfr166d+/fppy5Yt59ULAABouByWZVkXenBsbKxCQ0P1/vvvS5KcTqe++eYbbd26VZdffrmOHTumqKgoeXl5KScn5/ybczj0/vvvq1+/fpJ+uYIUFhamJ554QuPHj5ckFRYWKiQkRKmpqbr//vu1bds2dejQQRs3blSXLl0kSWlpabr77rv1888/KywsTK+++qqeeuopuVwu+fr6SpKefPJJLV261O5z4MCBKikp0fLly+1+brnlFnXu3Fnz5s07p17OReUHCAoLCxUYGHje79G5eiApw9jcQH218Lm7aruFGvGf6YNruwWgzrlufKrR+c/177dHV7R69+6t9PR0HT16VJL0pz/9Sfv27VNUVJR+//vfq2PHjtq5c6cGDx7syWlsu3btksvlUlxcnL0vKChIMTExysrKkiRlZWUpODjYDlmSFBcXJy8vL61fv96u6d69ux2ypF9C4vbt2+2H+rOystzOU1lTeZ5z6aU6paWlKioqcnsBAIBLk0dBa+TIkXrttdfsoHXfffdp2rRpKikp0XvvvSeXy6XExERNmDChRpp1uVySpJCQELf9ISEh9pjL5VLLli3dxn18fNS8eXO3murmOPUcp6s5dfxsvVRnypQpCgoKsl/h4eFn+akBAEB9dUFBKysrSz179tS1116r4cOHa9CgQdqwYYMk6YknntCBAwe0d+9eFRcXa9q0afL29q7RpuuzSZMmqbCw0H7l5eXVdksAAMCQ8w5a3333nXr16qWMjAyVlJSouLhY6enp6tGjh71elre3t0JCQqos+eCp0NBQSarycH1+fr49Fhoaqn379rmNnzx5UocOHXKrqW6OU89xuppTx8/WS3X8/PwUGBjo9gIAAJem8w5aL7zwgo4fP24/TO5yufTMM8/o2LFjevHFF030aIuMjFRoaKjS09PtfUVFRVq/fr1iY2Ml/fKAfkFBgbKzs+2aVatWqaKiQjExMXbN6tWr3T4xuXLlSl1//fVq1qyZXXPqeSprKs9zLr0AAICG7byD1hdffKHbb79dkydPVsuWLdWyZUs9++yzuuOOO5SZmelxQ8XFxdq8ebM2b94s6ZeHzjdv3qzc3Fw5HA6NHTtWzz//vD788EN99913evjhhxUWFmZ/MrF9+/aKj4/X8OHDtWHDBq1Zs0ajR4/W/fffr7CwMEnSAw88IF9fXw0bNkxbt27VokWLNGvWLCUmJtp9PP7440pLS9OMGTOUk5OjlJQUbdq0SaNHj5akc+oFAAA0bOf9XYf5+fnVLl0QExNjf6rPE5s2bVKPHj3s7crwk5CQoNTUVE2cOFElJSV69NFHVVBQoNtvv11paWny9/e3j3nrrbc0evRo9erVS15eXurfv79mz55tjwcFBenTTz/VqFGjFB0drRYtWigpKcltra1bb71VCxcu1NNPP62//OUvuvbaa7V06VJ17NjRrjmXXgAAQMN13utoeXl5KSUlRUlJSW77n332WT333HMqLy+v0QYvdayjBdQe1tECLl2XxDpaAAAAOL3zvnUoSW+++abWrVvntm/Hjh2SpLvvvrtKvcPh0IoVKy7kVAAAAPXWBQWtHTt22MHq19LS0qrsq+llHgAAAOqD8w5au3btMtEHAADAJee8g1bbtm1N9AEAAHDJ4WF4AAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGBIvQtaKSkpcjgcbq927drZ48ePH9eoUaN0+eWXq2nTpurfv7/y8/Pd5sjNzVXfvn3VuHFjtWzZUhMmTNDJkyfdajIyMnTzzTfLz89P11xzjVJTU6v0MnfuXEVERMjf318xMTHasGGDkZ8ZAADUT/UuaEnSDTfcoL1799qvL7/80h4bN26cli1bpiVLligzM1N79uzRfffdZ4+Xl5erb9++Kisr09q1a/XGG28oNTVVSUlJds2uXbvUt29f9ejRQ5s3b9bYsWP1yCOP6JNPPrFrFi1apMTERCUnJ+urr75SVFSUnE6n9u3bd3HeBAAAUOfVy6Dl4+Oj0NBQ+9WiRQtJUmFhof71r3/ppZdeUs+ePRUdHa3XX39da9eu1bp16yRJn376qb7//nu9+eab6ty5s/r06aPJkydr7ty5KisrkyTNmzdPkZGRmjFjhtq3b6/Ro0drwIABevnll+0eXnrpJQ0fPlxDhgxRhw4dNG/ePDVu3Fjz588/Y++lpaUqKipyewEAgEtTvQxaP/zwg8LCwnTVVVfpwQcfVG5uriQpOztbJ06cUFxcnF3brl07tWnTRllZWZKkrKwsderUSSEhIXaN0+lUUVGRtm7datecOkdlTeUcZWVlys7Odqvx8vJSXFycXXM6U6ZMUVBQkP0KDw/34J0AAAB1Wb0LWjExMUpNTVVaWppeffVV7dq1S3fccYeOHDkil8slX19fBQcHux0TEhIil8slSXK5XG4hq3K8cuxMNUVFRTp27JgOHDig8vLyamsq5zidSZMmqbCw0H7l5eWd93sAAADqB5/abuB89enTx/73jTfeqJiYGLVt21aLFy9WQEBALXZ2bvz8/OTn51fbbQAAgIug3l3R+rXg4GBdd9112rFjh0JDQ1VWVqaCggK3mvz8fIWGhkqSQkNDq3wKsXL7bDWBgYEKCAhQixYt5O3tXW1N5RwAAAD1PmgVFxdr586datWqlaKjo9WoUSOlp6fb49u3b1dubq5iY2MlSbGxsfruu+/cPh24cuVKBQYGqkOHDnbNqXNU1lTO4evrq+joaLeaiooKpaen2zUAAAD1LmiNHz9emZmZ+umnn7R27Vr97ne/k7e3twYNGqSgoCANGzZMiYmJ+vzzz5Wdna0hQ4YoNjZWt9xyiySpd+/e6tChg/74xz/qm2++0SeffKKnn35ao0aNsm/pjRgxQj/++KMmTpyonJwcvfLKK1q8eLHGjRtn95GYmKjXXntNb7zxhrZt26aRI0eqpKREQ4YMqZX3BQAA1D317hmtn3/+WYMGDdLBgwd1xRVX6Pbbb9e6det0xRVXSJJefvlleXl5qX///iotLZXT6dQrr7xiH+/t7a3ly5dr5MiRio2NVZMmTZSQkKDnnnvOromMjNSKFSs0btw4zZo1S61bt9Y///lPOZ1Ou2bgwIHav3+/kpKS5HK51LlzZ6WlpVV5QB4AADRcDsuyrNpuoiErKipSUFCQCgsLFRgYaOw8DyRlGJsbqK8WPndXbbdQI/4zfXBttwDUOdeNTzU6/7n+/a53tw4BAADqC4IWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADCFoAQAAGELQAgAAMISgBQAAYAhBCwAAwBCCFgAAgCEELQAAAEMIWgAAAIYQtAAAAAwhaAEAABhC0AIAADCEoAUAAGAIQQsAAMAQghYAAIAhBK0aMHfuXEVERMjf318xMTHasGFDbbcEAADqAIKWhxYtWqTExEQlJyfrq6++UlRUlJxOp/bt21fbrQEAgFpG0PLQSy+9pOHDh2vIkCHq0KGD5s2bp8aNG2v+/Pm13RoAAKhlPrXdQH1WVlam7OxsTZo0yd7n5eWluLg4ZWVlVXtMaWmpSktL7e3CwkJJUlFRkdFeT5SWGJ0fqI9M/95dLMXHy2q7BaDOMf37XTm/ZVlnrCNoeeDAgQMqLy9XSEiI2/6QkBDl5ORUe8yUKVP07LPPVtkfHh5upEcAp/fu1NruAIAxz7x9UU5z5MgRBQUFnXacoHWRTZo0SYmJifZ2RUWFDh06pMsvv1wOh6MWO8PFUFRUpPDwcOXl5SkwMLC22wFQg/j9blgsy9KRI0cUFhZ2xjqClgdatGghb29v5efnu+3Pz89XaGhotcf4+fnJz8/PbV9wcLCpFlFHBQYG8j9i4BLF73fDcaYrWZV4GN4Dvr6+io6OVnp6ur2voqJC6enpio2NrcXOAABAXcAVLQ8lJiYqISFBXbp0Ubdu3TRz5kyVlJRoyJAhtd0aAACoZQQtDw0cOFD79+9XUlKSXC6XOnfurLS0tCoPyAPSL7eOk5OTq9w+BlD/8fuN6jiss30uEQAAABeEZ7QAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELuEjmzp2riIgI+fv7KyYmRhs2bKjtlgDUgNWrV+vee+9VWFiYHA6Hli5dWtstoQ4haAEXwaJFi5SYmKjk5GR99dVXioqKktPp1L59+2q7NQAeKikpUVRUlObOnVvbraAOYnkH4CKIiYlR165dNWfOHEm/fINAeHi4xowZoyeffLKWuwNQUxwOh95//33169evtltBHcEVLcCwsrIyZWdnKy4uzt7n5eWluLg4ZWVl1WJnAADTCFqAYQcOHFB5eXmVbwsICQmRy+Wqpa4AABcDQQsAAMAQghZgWIsWLeTt7a38/Hy3/fn5+QoNDa2lrgAAFwNBCzDM19dX0dHRSk9Pt/dVVFQoPT1dsbGxtdgZAMA0n9puAGgIEhMTlZCQoC5duqhbt26aOXOmSkpKNGTIkNpuDYCHiouLtWPHDnt7165d2rx5s5o3b642bdrUYmeoC1jeAbhI5syZo2nTpsnlcqlz586aPXu2YmJiarstAB7KyMhQjx49quxPSEhQamrqxW8IdQpBCwAAwBCe0QIAADCEoAUAAGAIQQsAAMAQghYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACgFoUERGhiIiI2m4DgCEELQA16qeffpLD4VB8fPxpazIyMuRwODRixIiL2NnZ3XXXXXI4HBd8XOWrUaNGuvzyy9W5c2cNGzZMaWlpqqioMNAxgLqOL5UGgBryxBNPqGnTpqqoqFBBQYG2bdumt956S/Pnz9ett96qt99+u8qXDKenp9dStwAuBoIWANSQ8ePHKzQ01G3fgQMH9Nhjj+ntt9+W0+nUpk2b1KRJE3v86quvvthtAriIuHUIoE45cuSIkpOTdcMNNyggIEDBwcFyOp368ssvq9RmZ2dr9OjR6tixo4KCghQQEKBOnTrphRde0IkTJ6rUVz4PVVBQoNGjRys8PFw+Pj5KTU2Vw+FQZmamJLndBhw8eLBHP0+LFi305ptvqmfPnsrJydHcuXOr7elUx48f14wZMxQVFaWgoCA1adJEERER+sMf/qBvvvmmyjk++OAD9erVS82aNZO/v786duyo6dOnq7y83K2usLBQL774ou68806FhYXJ19dXYWFhevjhh7Vz584q85rqA2hIuKIFoM44dOiQunfvrq1bt+q2227TiBEjVFRUpA8++EA9evTQkiVL1K9fP7v+tdde07Jly9S9e3fdfffdOnr0qDIyMjRp0iRt3LhR7733XpVzlJaWqmfPniouLtZvfvMb+fj4KCQkRMnJyUpNTdXu3buVnJxs13fu3Nnjn8vLy0tPPfWUVq1apUWLFmnixIlnrE9ISNDixYt14403asiQIfLz81NeXp4+//xzbdy4UVFRUXbtpEmT9MILL+jKK6/Ufffdp6CgIH3xxReaMGGC1q9fryVLlti127ZtU1JSknr06KHf/e53atKkiXJycrRw4UKtWLFCX331ldq2bWu8D6BBsQCgBu3atcuSZF199dVWcnJyta+EhARLkvWnP/3J7dgHHnjAkmS99tprbvvz8/Ot8PBw64orrrCOHTtm79+9e7d18uRJt9qKigpr6NChliTryy+/dBtr27atJclyOp3W0aNHq/R+5513Whfyv8XK4/bu3XvamuPHj1s+Pj6Wl5eXdeLECbee2rZta28XFBRYDofDio6OrvKznTx50jp8+LC9/emnn9o/T3Fxsb2/oqLCGjFihCXJevfdd93mPnjwYJXeVq1aZXl5eVmPPPLIRekDaEgIWgBqVGXQOpfXqUFr//79lre3t9WzZ89q5509e7YlyVq2bNlZe8jOzrYkWSkpKW77K4PWN998U+1xJoOWZVlWSEiIJcnKz8936+nUoFVYWGhJsm677TaroqLijPP95je/sSRZu3fvrjJWGZT69+9/Tj9Dp06drIiIiFrvA7jUcOsQgBFOp1NpaWnVjmVkZKhHjx5u+zZu3Kjy8nKVlpYqJSWlyjE//PCDJCknJ0f33HOPJKmsrExz5szRO++8o5ycHBUXF8uyLPuYPXv2VJnH399fnTp1utAfy7jAwEDdfffd+uijj3TzzTfr97//ve666y517dpVjRo1cqtdt26dmjRpovnz51c7V0BAgHJyctz2ZWRkaObMmVq/fr0OHDigkydP2mO+vr4XrQ+goSBoAagTDh06JElas2aN1qxZc9q6kpIS+98DBgzQsmXLdN1112ngwIFq2bKlGjVqpIKCAs2aNUulpaVVjm/ZsuUFrZXlqdLSUh08eFDe3t5q3rz5GWuXLFmiv/3tb1q4cKGeeuopSb8EnyFDhuhvf/ubGjduLOmX9+zkyZN69tlnTzvXqe/XkiVLNHDgQDVt2lROp1MRERFq3LixHA6H/XzaxegDaEgIWgDqhMDAQEm/rEU1ffr0s9Zv3LhRy5Ytk9Pp1IoVK+Tt7W2PrVu3TrNmzar2uNoIWdIvAfLkyZOKjo6Wj8+Z/9fbuHFjPf/883r++ee1a9cuff7555o3b55mzZqlY8eO6e9//7ukX94zh8OhAwcOnFMPKSkp8vf3V3Z2tq699lq3sXfeeeei9QE0JCzvAKBO6Nq1qxwOh7Kyss6pvnI5gr59+7qFLEn64osvLqiHynlqejmCiooK/fWvf5UkDRo06LyOjYyM1NChQ5WZmammTZvqww8/tMdiYmJ08OBB+7bq2ezcuVPt27evErL27t2rH3/88aL1ATQkBC0AdUJoaKj+8Ic/aO3atZo2bZrbs1aV1q9fr6NHj0qSvQzBr9fX2rp1q6ZMmXJBPVTe0svLy7ug46tz4MABPfTQQ1q1apU6dOigkSNHnrF+//792rJlS5X9hw8fVmlpqfz9/e19jz32mCRp6NChOnjwYJVjXC6Xtm3bZm+3bdtWO3bsUH5+vr3v+PHjGjlyZJV1x0z2ATQk3DoEUGe88sor2r59uyZOnKj/+7//U2xsrIKDg5WXl6dNmzbphx9+0N69e9W4cWN169ZN3bp10+LFi7V3717dcsstys3N1Ycffqi+ffvq3XffPe/z9+zZU++++6769++vPn36yN/fX1FRUbr33nvP6fjp06fbX8FTVFSk77//Xl988YWOHz+u2267TW+//bb9XNPp/Pe//9VNN92kqKgo3Xjjjbryyit18OBBffDBBzpx4oTGjx9v18bHx+uZZ57R5MmTdc011yg+Pl5t27bVwYMHtWPHDn3xxRd6/vnn1b59e0nSmDFjNGbMGN10000aMGCATp48qZUrV8qyLEVFRbktQmqyD6BBqeVPPQK4xFQu7+B0Ok9b8/nnn1e7jpZlWdbRo0etqVOnWtHR0VaTJk2sgIAAKzIy0urXr5+1YMECtzWo9u3bZw0dOtQKCwuz/P39rU6dOllz5861fvzxR0uSlZCQ4Db3r5dS+LUTJ05YEydOtNq0aWP5+PhUO0d1Kpd3qHz5+PhYzZo1s6KioqyhQ4daaWlpVnl5ebXH/rqnw4cPWykpKVb37t2tVq1aWb6+vlZYWJgVHx9vffzxx9XOsXLlSuvee++1rrjiCqtRo0ZWaGioFRsba02ePNnKzc216yoqKqx58+ZZN9xwg+Xv72+FhoZaw4YNs/bt21dlaQuTfQANicOyqrk+DwAAAI/xjBYAAIAhBC0AAABDCFoAAACGELQAAAAMIWgBAAAYQtACAAAwhKAFAABgCEELAADAEIIWAACAIQQtAAAAQwhaAAAAhhC0AAAADPl/2iXrrdB670YAAAAASUVORK5CYII=\n"
},
"metadata": {}
}
]
},
{
"cell_type": "markdown",
"source": [
"### 1D CNN Model Training"
],
"metadata": {
"id": "JtTol09RXES7"
}
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"id": "0rkXxtX4bqOH"
},
"outputs": [],
"source": [
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization\n",
"from tensorflow.keras.optimizers import Adam\n",
"from tensorflow.keras.losses import BinaryCrossentropy\n",
"\n",
"input_shape = (X_train_combined.shape[1], 1)\n",
"model = Sequential()\n",
"\n",
"# Add Convolutional and Pooling layers\n",
"model.add(Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=input_shape))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(MaxPooling1D(pool_size=2))\n",
"model.add(Conv1D(filters=256, kernel_size=3, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(MaxPooling1D(pool_size=2))\n",
"\n",
"model.add(Flatten())\n",
"\n",
"# Add Dense layers\n",
"model.add(Dense(units=512, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=256, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=128, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=64, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=1, activation='sigmoid'))\n",
"\n",
"# Compile the model\n",
"model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])\n",
"#model.summary()\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"id": "qcpsJFNQbyEw",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "cebd6cc9-4046-4ac6-d2b6-27923f93b188"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Epoch 1/12\n",
"11356/11356 - 218s - loss: 0.5716 - accuracy: 0.7132 - val_loss: 0.6844 - val_accuracy: 0.6402 - 218s/epoch - 19ms/step\n",
"Epoch 2/12\n",
"11356/11356 - 214s - loss: 0.4923 - accuracy: 0.7541 - val_loss: 0.6181 - val_accuracy: 0.7370 - 214s/epoch - 19ms/step\n",
"Epoch 3/12\n",
"11356/11356 - 220s - loss: 0.4848 - accuracy: 0.7584 - val_loss: 0.6424 - val_accuracy: 0.7148 - 220s/epoch - 19ms/step\n",
"Epoch 4/12\n",
"11356/11356 - 206s - loss: 0.4808 - accuracy: 0.7612 - val_loss: 0.6594 - val_accuracy: 0.7013 - 206s/epoch - 18ms/step\n",
"Epoch 5/12\n",
"11356/11356 - 212s - loss: 0.4790 - accuracy: 0.7616 - val_loss: 0.6522 - val_accuracy: 0.6773 - 212s/epoch - 19ms/step\n",
"Epoch 6/12\n",
"11356/11356 - 207s - loss: 0.4767 - accuracy: 0.7633 - val_loss: 0.6203 - val_accuracy: 0.7317 - 207s/epoch - 18ms/step\n",
"Epoch 7/12\n",
"11356/11356 - 212s - loss: 0.4747 - accuracy: 0.7649 - val_loss: 0.6222 - val_accuracy: 0.7131 - 212s/epoch - 19ms/step\n",
"Epoch 8/12\n",
"11356/11356 - 203s - loss: 0.4734 - accuracy: 0.7660 - val_loss: 0.6149 - val_accuracy: 0.7224 - 203s/epoch - 18ms/step\n",
"Epoch 9/12\n",
"11356/11356 - 212s - loss: 0.4706 - accuracy: 0.7672 - val_loss: 0.6341 - val_accuracy: 0.6976 - 212s/epoch - 19ms/step\n",
"Epoch 10/12\n",
"11356/11356 - 214s - loss: 0.4687 - accuracy: 0.7686 - val_loss: 0.6373 - val_accuracy: 0.6918 - 214s/epoch - 19ms/step\n",
"Epoch 11/12\n",
"11356/11356 - 213s - loss: 0.4670 - accuracy: 0.7692 - val_loss: 0.5775 - val_accuracy: 0.7357 - 213s/epoch - 19ms/step\n",
"Epoch 12/12\n",
"11356/11356 - 204s - loss: 0.4662 - accuracy: 0.7695 - val_loss: 0.5884 - val_accuracy: 0.7209 - 204s/epoch - 18ms/step\n",
"Execution time: 2537.3050854206085 seconds\n"
]
}
],
"source": [
"start_time = time.time()\n",
"history = model.fit(X_train_combined, y_train_combined, epochs=12, validation_split=0.2, verbose=2)\n",
"end_time = time.time()\n",
"execution_time = end_time - start_time\n",
"print(\"Execution time:\", execution_time, \"seconds\")"
]
},
{
"cell_type": "code",
"source": [
"original_model_accuracy = model.evaluate(X_test_scaled, y_test)[1]\n",
"print(\"Original Model Accuracy:\", original_model_accuracy)"
],
"metadata": {
"id": "khGgZyQKXDcl",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "7c56e378-7928-44ff-94c9-9688236408f7"
},
"execution_count": 15,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1931/1931 [==============================] - 18s 9ms/step - loss: 0.3928 - accuracy: 0.8003\n",
"Original Model Accuracy: 0.8003432154655457\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from sklearn.metrics import confusion_matrix , classification_report\n",
"y_pred = model.predict(X_test_scaled) > 0.5\n",
"print(confusion_matrix(y_test, y_pred))\n",
"print(classification_report(y_test, y_pred))"
],
"metadata": {
"id": "4d3HH7iKXR4i",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "b88873fa-e99f-445e-e516-0ffcd23f1b72"
},
"execution_count": 16,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1931/1931 [==============================] - 17s 9ms/step\n",
"[[46087 10687]\n",
" [ 1646 3351]]\n",
" precision recall f1-score support\n",
"\n",
" 0 0.97 0.81 0.88 56774\n",
" 1 0.24 0.67 0.35 4997\n",
"\n",
" accuracy 0.80 61771\n",
" macro avg 0.60 0.74 0.62 61771\n",
"weighted avg 0.91 0.80 0.84 61771\n",
"\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"### Apply Boundary Attack"
],
"metadata": {
"id": "Q45IY3Gogh-J"
}
},
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"\n",
"def identify_binary_features(data, threshold=0.05):\n",
" num_of_samples, num_of_features = data.shape\n",
" binary_mask = np.zeros(num_of_features, dtype=bool)\n",
"\n",
" for feature_idx in range(num_of_features):\n",
" unique_values = np.unique(data[:, feature_idx])\n",
" unique_ratio = len(unique_values) / num_of_samples\n",
"\n",
" if unique_ratio <= threshold:\n",
" binary_mask[feature_idx] = True\n",
"\n",
" return binary_mask"
],
"metadata": {
"id": "f1vY41X8ghWJ"
},
"execution_count": 22,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"\n",
"# Define compute_norm and sel_direction functions\n",
"def compute_norm(x, x_adv):\n",
" return np.linalg.norm(x_adv - x)\n",
"\n",
"def sel_direction(x, x_adv, x_adv_p):\n",
" norm1 = compute_norm(x, x_adv)\n",
" norm2 = compute_norm(x, x_adv_p)\n",
" if norm2 > norm1:\n",
" direction = -1\n",
" elif norm2 < norm1:\n",
" direction = 1\n",
" else:\n",
" direction = 0\n",
" return direction\n",
"\n",
"def boundary_attack_tabular(model, x, y, max_iterations=50, step_size=0.1, epsilon=0.01):\n",
" binary_mask = identify_binary_features(x)\n",
"\n",
" # Initialize the adversarial example with a perturbed version of the original input\n",
" binary_perturbations = np.random.normal(0, step_size, size=x.shape)\n",
" binary_perturbations *= binary_mask # Apply the binary mask to select binary features\n",
"\n",
" continuous_perturbations = np.random.normal(0, step_size, size=x.shape)\n",
" continuous_perturbations *= (1 - binary_mask) # Apply the inverse of the binary mask\n",
"\n",
" total_perturbations = binary_perturbations + continuous_perturbations\n",
"\n",
" x_adv = x + total_perturbations # Initialize x_adv with the perturbed version of x\n",
"\n",
" for _ in range(max_iterations):\n",
"\n",
"\n",
" distance = compute_norm(x, x_adv)\n",
"\n",
" p_normalized = total_perturbations / np.linalg.norm(total_perturbations)\n",
"\n",
" magnitude = distance * p_normalized\n",
"\n",
" direction = sel_direction(x, x_adv, x_adv + epsilon * (x - x_adv) + magnitude) #The direction of adjustment (closer, away, or stay).\n",
"\n",
" x_adv = x_adv + direction * (epsilon * magnitude) # Add the projected perturbation to the update rule\n",
"\n",
" # Clip feature values to appropriate ranges\n",
" x_adv = np.clip(x_adv, 0, 1) # Binary features\n",
" x_adv = np.clip(x_adv, x.min(axis=0), x.max(axis=0)) # Continuous/integer features\n",
"\n",
" # Check if the adversarial example has caused a misclassification\n",
" adv_preds = model.predict(x_adv)\n",
"\n",
" if np.argmax(adv_preds) != np.argmax(y):\n",
" return x_adv #Updated adversarial example after applying the adjustment.\n",
"\n",
" # If misclassifictation doesnt occur, return last recorded x_adv\n",
" return x_adv\n"
],
"metadata": {
"id": "btabcjsiglLZ"
},
"execution_count": 20,
"outputs": []
},
{
"cell_type": "code",
"source": [
"X_test_adv = boundary_attack_tabular(model, X_test_scaled, y_test)"
],
"metadata": {
"id": "z-Y93kPngx8Q",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "06dd9d17-00b3-4289-e03e-870839742f22"
},
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1931/1931 [==============================] - 10s 5ms/step\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Calculate the accuracy of the model on the adversarial examples\n",
"adv_accuracy = model.evaluate(X_test_adv, y_test)"
],
"metadata": {
"id": "gK7LNnpPg0gp",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "1305f556-f100-424c-fa8f-f6296b2e15e4"
},
"execution_count": 25,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1931/1931 [==============================] - 10s 5ms/step - loss: 12.9580 - accuracy: 0.0809\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"#comparing accuracies pre and post attack\n",
"org_accuracy=model.evaluate(X_test,y_test)\n",
"\n",
"print(\"Comparing Accuracy Pre and Post Boundary Attack\")\n",
"print(\"Pre Attack: \" ,original_model_accuracy)\n",
"print(\"Post Attack: \",adv_accuracy[1])"
],
"metadata": {
"id": "ZuV-pwBbg3Bv",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "9b9780a0-9ba5-49c2-a47d-f0f3dc25b128"
},
"execution_count": 26,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1931/1931 [==============================] - 10s 5ms/step - loss: 0.9550 - accuracy: 0.6878\n",
"Comparing Accuracy Pre and Post Boundary Attack\n",
"Pre Attack: 0.8003432154655457\n",
"Post Attack: 0.08094413578510284\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from sklearn.metrics import confusion_matrix , classification_report\n",
"y_pred = model.predict(X_test_adv) > 0.5\n",
"print(confusion_matrix(y_test, y_pred))\n",
"print(classification_report(y_test, y_pred))"
],
"metadata": {
"id": "7VFKr5c6g5j2",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "4707d3c8-190f-41e3-eaf5-95c6957f6803"
},
"execution_count": 27,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1931/1931 [==============================] - 10s 5ms/step\n",
"[[ 3 56771]\n",
" [ 0 4997]]\n",
" precision recall f1-score support\n",
"\n",
" 0 1.00 0.00 0.00 56774\n",
" 1 0.08 1.00 0.15 4997\n",
"\n",
" accuracy 0.08 61771\n",
" macro avg 0.54 0.50 0.07 61771\n",
"weighted avg 0.93 0.08 0.01 61771\n",
"\n"
]
}
]
}
]
}
\ No newline at end of file
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WXK1h7IS5ua0"
},
"outputs": [],
"source": [
"#Import Libraries\n",
"\n",
"import pandas as pd\n",
"from matplotlib import pyplot as plt\n",
"import numpy as np\n",
"%matplotlib inline\n",
"\n",
"import tensorflow as tf\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense\n",
"from tensorflow.keras.layers import Flatten\n",
"from sklearn.model_selection import train_test_split\n",
"import time\n",
"\n",
"\n",
"#Data Balancing libraries\n",
"from imblearn.under_sampling import NearMiss\n",
"from imblearn.over_sampling import ADASYN\n",
"\n",
"from imblearn.over_sampling import SMOTE\n",
"from imblearn.under_sampling import RandomUnderSampler\n",
"from imblearn.combine import SMOTEENN #Hybrid method\n",
"\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, roc_curve, auc\n",
"\n",
"\n",
"#apply standardization\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"#Visual libraries\n",
"import seaborn as sns\n",
"\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.metrics import classification_report"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "MihfTY1_l4j0",
"outputId": "0560e2d4-429b-411c-eca1-c57ad70d1fff"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mounted at /content/drive\n"
]
}
],
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ZCZ6F-vSF187"
},
"outputs": [],
"source": [
"data = pd.read_csv('/content/drive/MyDrive/ML Model Attack/disease_preprocess4.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "fMhfrc507QwL"
},
"outputs": [],
"source": [
"data = pd.read_csv('/content/disease_preprocess4.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 226
},
"id": "JryGjddVC8xL",
"outputId": "49d6a37b-6b92-445f-b115-f4989ec40ef4"
},
"outputs": [
{
"data": {
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "data"
},
"text/html": [
"\n",
" <div id=\"df-f7ee6c0e-29e6-47d6-bfb8-ab66e14a21cf\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GeneralHealth</th>\n",
" <th>Checkup</th>\n",
" <th>Exercise</th>\n",
" <th>HeartDisease</th>\n",
" <th>Depression</th>\n",
" <th>Diabetes</th>\n",
" <th>Arthritis</th>\n",
" <th>Gender</th>\n",
" <th>AgeCategory</th>\n",
" <th>BMI</th>\n",
" <th>SmokingHistory</th>\n",
" <th>AlcoholConsumption</th>\n",
" <th>FriedPotatoConsumption</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>-2.159696</td>\n",
" <td>1</td>\n",
" <td>-0.621527</td>\n",
" <td>0.664502</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>-0.051548</td>\n",
" <td>0</td>\n",
" <td>-0.621527</td>\n",
" <td>-0.267579</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>0.742649</td>\n",
" <td>0</td>\n",
" <td>-0.133707</td>\n",
" <td>1.130543</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>11</td>\n",
" <td>0.015913</td>\n",
" <td>0</td>\n",
" <td>-0.621527</td>\n",
" <td>0.198462</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>12</td>\n",
" <td>-0.652562</td>\n",
" <td>1</td>\n",
" <td>-0.621527</td>\n",
" <td>-0.733620</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f7ee6c0e-29e6-47d6-bfb8-ab66e14a21cf')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-f7ee6c0e-29e6-47d6-bfb8-ab66e14a21cf button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-f7ee6c0e-29e6-47d6-bfb8-ab66e14a21cf');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-f5949bf6-8493-4bae-90ed-a325b814bc5c\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-f5949bf6-8493-4bae-90ed-a325b814bc5c')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-f5949bf6-8493-4bae-90ed-a325b814bc5c button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"text/plain": [
" GeneralHealth Checkup Exercise HeartDisease Depression Diabetes \\\n",
"0 1 2 0 0 0 0 \n",
"1 5 1 0 1 0 1 \n",
"2 5 1 1 0 0 1 \n",
"3 1 1 1 1 0 1 \n",
"4 4 1 0 0 0 0 \n",
"\n",
" Arthritis Gender AgeCategory BMI SmokingHistory \\\n",
"0 1 1 10 -2.159696 1 \n",
"1 0 1 10 -0.051548 0 \n",
"2 0 1 8 0.742649 0 \n",
"3 0 0 11 0.015913 0 \n",
"4 0 0 12 -0.652562 1 \n",
"\n",
" AlcoholConsumption FriedPotatoConsumption \n",
"0 -0.621527 0.664502 \n",
"1 -0.621527 -0.267579 \n",
"2 -0.133707 1.130543 \n",
"3 -0.621527 0.198462 \n",
"4 -0.621527 -0.733620 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "F_PjX618F5l6"
},
"outputs": [],
"source": [
"data.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Jkmi2N0aC8nZ",
"outputId": "1df3449b-b629-4a79-fc10-e54d1b6b27a6"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" GeneralHealth Checkup Exercise Depression Diabetes Arthritis \\\n",
"192601 5 1 1 0 0 0 \n",
"196337 5 1 0 0 0 0 \n",
"99888 5 1 1 0 0 0 \n",
"282228 4 3 1 0 0 0 \n",
"216188 3 1 1 0 0 0 \n",
"\n",
" Gender AgeCategory BMI SmokingHistory AlcoholConsumption \\\n",
"192601 1 5 -0.368920 1 0.841932 \n",
"196337 1 11 -0.333656 1 -0.499572 \n",
"99888 1 8 1.898681 0 -0.377617 \n",
"282228 0 11 0.728850 0 2.427347 \n",
"216188 0 7 0.314887 0 -0.621527 \n",
"\n",
" FriedPotatoConsumption \n",
"192601 1.596584 \n",
"196337 0.198462 \n",
"99888 0.198462 \n",
"282228 0.198462 \n",
"216188 -0.267579 \n",
"192601 0\n",
"196337 0\n",
"99888 0\n",
"282228 0\n",
"216188 0\n",
"Name: HeartDisease, dtype: int64\n"
]
}
],
"source": [
"# define target variable and features\n",
"\n",
"# Defining the features (X) and the target (y)\n",
"\n",
"X = data.drop('HeartDisease', axis=1) # Features\n",
"y = data['HeartDisease'] # Target variable\n",
"\n",
"# Performing the train-test split\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n",
"X_train.shape, X_test.shape, y_train.shape, y_test.shape\n",
"\n",
"print(X_train.head())\n",
"print(y_train.head())"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "E2WufmJdz8g_"
},
"source": [
"##Perform Scaling"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Y3FJTGSrUJIh"
},
"outputs": [],
"source": [
"#apply standardization\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"# Create a StandardScaler instance\n",
"scaler = StandardScaler()\n",
"\n",
"\n",
"# Fit the scaler on the training data and transform it\n",
"X_train_scaled = scaler.fit_transform(X_train)\n",
"\n",
"# Use the same scaler to transform the test data\n",
"X_test_scaled = scaler.transform(X_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HZThz9by_GfH",
"outputId": "160a500a-073e-40ab-ec29-9e981f26a1b9"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"HeartDisease\n",
"0 227109\n",
"1 19974\n",
"Name: count, dtype: int64\n"
]
}
],
"source": [
"# Print the count of each class in the before resample data\n",
"print(y_train.value_counts())"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "zJtzN8ctUoIo"
},
"source": [
"## SMOTE and Random Combined"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "DX0w3ww2UUFw"
},
"outputs": [],
"source": [
"# Resample the training data\n",
"\n",
"from imblearn.over_sampling import SMOTE\n",
"from imblearn.under_sampling import RandomUnderSampler\n",
"from imblearn.combine import SMOTEENN # A hybrid method\n",
"\n",
"\n",
"# Apply SMOTE to oversample the minority class\n",
"smote=SMOTE(sampling_strategy='auto', random_state=23)\n",
"X_train_smote, y_train_smote = smote.fit_resample(X_train_scaled, y_train)\n",
"\n",
"# Apply undersampling to the majority class\n",
"under_sampler = RandomUnderSampler(sampling_strategy='auto', random_state=23)\n",
"X_train_combined, y_train_combined = under_sampler.fit_resample(X_train_smote, y_train_smote)\n",
"\n",
"# Train and evaluate your machine learning model using X_train_combined and y_train_combined\n",
"# Evaluate the model on X_test_scaled and y_test\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "9YO5Ql4l_Bl0"
},
"outputs": [],
"source": [
"# Print the count of each class in the resampled data\n",
"print(y_train.value_counts())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "v9-BQ1bpiN3i"
},
"outputs": [],
"source": [
"# Visualization of target variable after resampling\n",
"\n",
"g = sns.countplot(x= y_train_combined,data=data, palette=\"muted\")\n",
"g.set_ylabel(\"Patients\", fontsize=14)\n",
"g.set_xlabel(\"Heart Disease\", fontsize=14)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "71z03akwbnSu"
},
"source": [
"### Model Training"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "0rkXxtX4bqOH"
},
"outputs": [],
"source": [
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization\n",
"from tensorflow.keras.optimizers import Adam\n",
"from tensorflow.keras.losses import BinaryCrossentropy\n",
"\n",
"input_shape = (X_train_combined.shape[1], 1)\n",
"model = Sequential()\n",
"\n",
"# Add Convolutional and Pooling layers\n",
"model.add(Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=input_shape))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(MaxPooling1D(pool_size=2))\n",
"model.add(Conv1D(filters=256, kernel_size=3, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(MaxPooling1D(pool_size=2))\n",
"\n",
"model.add(Flatten())\n",
"\n",
"# Add Dense layers\n",
"model.add(Dense(units=512, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=256, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=128, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=64, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(units=1, activation='sigmoid'))\n",
"\n",
"# Compile the model\n",
"model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])\n",
"#model.summary()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "qcpsJFNQbyEw"
},
"outputs": [],
"source": [
"start_time = time.time()\n",
"history = model.fit(X_train_combined, y_train_combined, epochs=10, validation_split=0.2, verbose=2)\n",
"end_time = time.time()\n",
"execution_time = end_time - start_time\n",
"print(\"Execution time:\", execution_time, \"seconds\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ZuYctwmBb0Rw"
},
"outputs": [],
"source": [
"original_model_accuracy = model.evaluate(X_test_scaled, y_test)[1]\n",
"print(\"Original Model Accuracy:\", original_model_accuracy)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "FDlrN-l0b1JI"
},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix , classification_report\n",
"y_pred = model.predict(X_test_scaled) > 0.5\n",
"print(confusion_matrix(y_test, y_pred))\n",
"print(classification_report(y_test, y_pred))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "EEdrLiFpvdWz"
},
"outputs": [],
"source": [
"#Import the necessary libraries\n",
"import numpy as np\n",
"from sklearn.metrics import confusion_matrix\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"y_pred = model.predict(X_test_scaled) # Replace 'model' with your trained model\n",
"\n",
"# Convert predicted probabilities to binary labels (0 or 1)\n",
"y_pred_binary = (y_pred > 0.5).astype(int)\n",
"\n",
"#compute the confusion matrix.\n",
"cm = confusion_matrix(y_test,y_pred_binary)\n",
"\n",
"#Plot the confusion matrix.\n",
"sns.heatmap(cm,\n",
" annot=True,\n",
" fmt='g',\n",
" xticklabels=['Class 0','Class 1'],\n",
" yticklabels=['Class 0','Class 1'])\n",
"plt.ylabel('Prediction',fontsize=13)\n",
"plt.xlabel('Actual',fontsize=13)\n",
"plt.title('Confusion Matrix',fontsize=17)\n",
"plt.show()"
]
}
],
"metadata": {
"accelerator": "TPU",
"colab": {
"gpuType": "V28",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment