#Import Libraries
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
%matplotlib inline
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from sklearn.model_selection import train_test_split
import time
#Data Balancing libraries
from imblearn.under_sampling import NearMiss
from imblearn.over_sampling import ADASYN
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.combine import SMOTEENN #Hybrid method
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, roc_curve, auc
#apply standardization
from sklearn.preprocessing import StandardScaler
#Visual libraries
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from google.colab import drive
"execution_count": null,
Mounted at /content/drive
drive.mount('/content/drive')
"cell_type": "code",
"execution_count": null,
data = pd.read_csv('/content/drive/MyDrive/ML Model Attack/disease_preprocess4.csv')
"cell_type": "code",
"execution_count": null,
data = pd.read_csv('/content/disease_preprocess4.csv')
"cell_type": "code",
"execution_count": null,
" GeneralHealth Checkup Exercise HeartDisease Depression Diabetes \\\n",
"0 1 2 0 0 0 0 \n",
"1 5 1 0 1 0 1 \n",
"2 5 1 1 0 0 1 \n",
"3 1 1 1 1 0 1 \n",
"4 4 1 0 0 0 0 \n",
" Arthritis Gender AgeCategory BMI SmokingHistory \\\n",
"0 1 1 10 -2.159696 1 \n",
"1 0 1 10 -0.051548 0 \n",
"2 0 1 8 0.742649 0 \n",
"3 0 0 11 0.015913 0 \n",
"4 0 0 12 -0.652562 1 \n",
" AlcoholConsumption FriedPotatoConsumption \n",
"0 -0.621527 0.664502 \n",
"1 -0.621527 -0.267579 \n",
"2 -0.133707 1.130543 \n",
"3 -0.621527 0.198462 \n",
"4 -0.621527 -0.733620 "
data.head()
"execution_count": null,
data.shape
"execution_count": null,
" GeneralHealth Checkup Exercise Depression Diabetes Arthritis \\\n",
"192601 5 1 1 0 0 0 \n",
"196337 5 1 0 0 0 0 \n",
"99888 5 1 1 0 0 0 \n",
"282228 4 3 1 0 0 0 \n",
"216188 3 1 1 0 0 0 \n",
" Gender AgeCategory BMI SmokingHistory AlcoholConsumption \\\n",
"192601 1 5 -0.368920 1 0.841932 \n",
"196337 1 11 -0.333656 1 -0.499572 \n",
"99888 1 8 1.898681 0 -0.377617 \n",
"282228 0 11 0.728850 0 2.427347 \n",
"216188 0 7 0.314887 0 -0.621527 \n",
" FriedPotatoConsumption \n",
"192601 1.596584 \n",
"196337 0.198462 \n",
"99888 0.198462 \n",
"282228 0.198462 \n",
"216188 -0.267579 \n",
"192601 0\n",
"196337 0\n",
"99888 0\n",
"282228 0\n",
"216188 0\n",
"Name: HeartDisease, dtype: int64\n"
"# define target variable and features\n",
"# Defining the features (X) and the target (y)\n",
"X = data.drop('HeartDisease', axis=1) # Features\n",
"y = data['HeartDisease'] # Target variable\n",
"# Performing the train-test split\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"X_train.shape, X_test.shape, y_train.shape, y_test.shape\n",
"cell_type": "markdown",
##Perform Scaling
"cell_type": "code",
"execution_count": null,
"#apply standardization\n",
"from sklearn.preprocessing import StandardScaler\n",
"# Create a StandardScaler instance\n",
"scaler = StandardScaler()\n",
"# Fit the scaler on the training data and transform it\n",
"X_train_scaled = scaler.fit_transform(X_train)\n",
"# Use the same scaler to transform the test data\n",
"X_test_scaled = scaler.transform(X_test)"
"cell_type": "code",
"0 227109\n",
"1 19974\n",
"Name: count, dtype: int64\n"
# Print the count of each class in the before resample data
print(y_train.value_counts())
"cell_type": "markdown",
## SMOTE and Random Combined
"cell_type": "code",
"# Resample the training data\n",
"from imblearn.over_sampling import SMOTE\n",
"from imblearn.under_sampling import RandomUnderSampler\n",
"from imblearn.combine import SMOTEENN # A hybrid method\n",
"# Apply SMOTE to oversample the minority class\n",
"smote=SMOTE(sampling_strategy='auto', random_state=23)\n",
"X_train_smote, y_train_smote = smote.fit_resample(X_train_scaled, y_train)\n",
"# Apply undersampling to the majority class\n",
"under_sampler = RandomUnderSampler(sampling_strategy='auto', random_state=23)\n",
"X_train_combined, y_train_combined = under_sampler.fit_resample(X_train_smote, y_train_smote)\n",
"# Train and evaluate your machine learning model using X_train_combined and y_train_combined\n",
"# Evaluate the model on X_test_scaled and y_test\n"
"cell_type": "code",
"# Print the count of each class in the resampled data\n",
"cell_type": "code",
"# Visualization of target variable after resampling\n",
"g = sns.countplot(x= y_train_combined,data=data, palette=\"muted\")\n",
"g.set_ylabel(\"Patients\", fontsize=14)\n",
"g.set_xlabel(\"Heart Disease\", fontsize=14)"
"cell_type": "markdown",
### Model Training
"cell_type": "code",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization\n",
"from tensorflow.keras.optimizers import Adam\n",
"from tensorflow.keras.losses import BinaryCrossentropy\n",
"input_shape = (X_train_combined.shape[1], 1)\n",
"model = Sequential()\n",
"# Add Convolutional and Pooling layers\n",
"model.add(Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=input_shape))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Conv1D(filters=256, kernel_size=3, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"# Add Dense layers\n",
"model.add(Dense(units=512, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dense(units=256, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dense(units=128, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dense(units=64, activation='relu'))\n",
"model.add(BatchNormalization()) # Add batch normalization\n",
"model.add(Dense(units=1, activation='sigmoid'))\n",
"# Compile the model\n",
"model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])\n",
"cell_type": "code",
"start_time = time.time()\n",
"history =, y_train_combined, epochs=10, validation_split=0.2, verbose=2)\n",
"end_time = time.time()\n",
"execution_time = end_time - start_time\n",
"print(\"Execution time:\", execution_time, \"seconds\")"
"cell_type": "code",
"original_model_accuracy = model.evaluate(X_test_scaled, y_test)[1]\n",
"print(\"Original Model Accuracy:\", original_model_accuracy)"
"cell_type": "code",
"from sklearn.metrics import confusion_matrix , classification_report\n",
"y_pred = model.predict(X_test_scaled) > 0.5\n",
"print(confusion_matrix(y_test, y_pred))\n",
"print(classification_report(y_test, y_pred))"
"cell_type": "code",
"#Import the necessary libraries\n",
"import numpy as np\n",
"from sklearn.metrics import confusion_matrix\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"y_pred = model.predict(X_test_scaled) # Replace 'model' with your trained model\n",
"# Convert predicted probabilities to binary labels (0 or 1)\n",
"y_pred_binary = (y_pred > 0.5).astype(int)\n",
"#compute the confusion matrix.\n",
"cm = confusion_matrix(y_test,y_pred_binary)\n",
"#Plot the confusion matrix.\n",
" annot=True,\n",
" fmt='g',\n",
" xticklabels=['Class 0','Class 1'],\n",
" yticklabels=['Class 0','Class 1'])\n",
"plt.title('Confusion Matrix',fontsize=17)\n",
