Brendel_and_bethge_defensive_distillation

726d1656 · Savindhya Bandara · 8f15763b · 726d1656
Commit 726d1656 authored Sep 08, 2024 by Savindhya Bandara
Hide whitespace changes
Inline Side-by-side

Showing with 116 additions and 0 deletions

Brendel_and_bethge_defensive_distillation.py Brendel_and_bethge_defensive_distillation.py +116 -0

No files found.
--- a/Brendel_and_bethge_defensive_distillation.py
+++ b/Brendel_and_bethge_defensive_distillation.py
+# -*- coding: utf-8 -*-
+"""Brendel and Bethge Defensive Distillation.ipynb
+
+Automatically generated by Colab.
+
+Original file is located at
+    https://colab.research.google.com/drive/19eFeJhzXHAmenx5EiwCUoSTiHy1t1GIy
+"""
+
+# Import necessary libraries
+import pandas as pd
+import numpy as np
+from matplotlib import pyplot as plt
+import tensorflow as tf
+from tensorflow.keras.models import Sequential, load_model, Model
+from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, Input
+from tensorflow.keras.optimizers import Adam
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import accuracy_score
+import time
+
+from google.colab import drive
+drive.mount('/content/drive')
+
+# Load the dataset
+data = pd.read_csv('/content/drive/MyDrive/Research/Dataset/disease_preprocess.csv')
+X = data.drop('HeartDisease', axis=1).values  # Ensure .values to get numpy array
+y = data['HeartDisease'].values
+
+# Split the data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+# Standardize the data
+scaler = StandardScaler()
+X_train_scaled = scaler.fit_transform(X_train)
+X_test_scaled = scaler.transform(X_test)
+
+# Load your model
+model = load_model('/content/drive/MyDrive/Research/Models/1D_CNN_model_Final_1.h5')
+
+def brendel_bethge_attack(model, x_test, y_test, epsilon=0.1, iterations=100, alpha=0.02):
+    x_adv = x_test.copy()  # Start with copies of the original inputs
+    for i in range(iterations):
+        # Introduce a random perturbation
+        perturbation = np.random.normal(loc=0.0, scale=epsilon, size=x_test.shape)
+        x_temp = x_adv + perturbation  # Temporarily add noise
+
+        preds = model.predict(x_temp)  # Make predictions on the modified inputs
+        preds_class = (preds > 0.5).astype(int)  # Assuming binary classification with a sigmoid output
+        mask = preds_class.flatten() != y_test  # Identify where the attack changed the prediction
+
+        # Only keep changes that successfully fooled the model
+        x_adv[mask] = x_temp[mask]
+
+        # Gradually reduce epsilon to fine-tune the adversarial examples
+        epsilon *= (1 - alpha)
+
+    return x_adv
+
+# Evaluate the clean model on original test data
+original_accuracy = accuracy_score(y_test, (model.predict(X_test_scaled) > 0.5).astype(int))
+print("Original Model Accuracy: ", original_accuracy)
+
+# Generate adversarial examples
+X_test_adv = brendel_bethge_attack(model, X_test_scaled, y_test)
+
+# Evaluate the model on adversarial examples
+y_pred_adv = (model.predict(X_test_adv) > 0.5).astype(int)
+adv_accuracy = accuracy_score(y_test, y_pred_adv.flatten())  # Flatten if needed depending on the shape of y_test
+print("Adversarial Model Accuracy: ", adv_accuracy)
+
+# Train the teacher model
+teacher_model = Sequential([
+    Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train_scaled.shape[1], 1)),
+    MaxPooling1D(pool_size=2),
+    Flatten(),
+    Dense(100, activation='relu'),
+    Dropout(0.5),
+    Dense(1, activation='sigmoid')
+])
+teacher_model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
+X_train_scaled = X_train_scaled.reshape(-1, X_train_scaled.shape[1], 1)
+X_test_scaled = X_test_scaled.reshape(-1, X_test_scaled.shape[1], 1)
+teacher_model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_split=0.1)
+
+# Generate soft labels using the teacher model
+soft_labels = teacher_model.predict(X_train_scaled)
+
+# Train the student model using soft labels
+student_model = Sequential([
+    Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train_scaled.shape[1], 1)),
+    MaxPooling1D(pool_size=2),
+    Flatten(),
+    Dense(100, activation='relu'),
+    Dropout(0.5),
+    Dense(1, activation='sigmoid')
+])
+student_model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
+student_model.fit(X_train_scaled, soft_labels, epochs=10, batch_size=32, validation_split=0.1)
+
+# Evaluate the student model on the original and adversarial examples
+original_distilled_accuracy = accuracy_score(y_test, (student_model.predict(X_test_scaled) > 0.5).astype(int))
+X_test_adv_defended = brendel_bethge_attack(student_model, X_test_scaled, y_test)
+adv_distilled_accuracy = accuracy_score(y_test, (student_model.predict(X_test_adv_defended) > 0.5).astype(int))
+
+print("Distilled Model Accuracy on Original Data: ", original_distilled_accuracy)
+print("Distilled Model Accuracy on Adversarial Data: ", adv_distilled_accuracy)
+
+# Comparing accuracies pre and post attack, and post-defense
+distill_data_metrics = student_model.evaluate(X_test_scaled, y_test)
+print("Comparing Accuracies")
+print("Pre Attack: ", original_accuracy)
+print("Post Attack: ", adv_accuracy)
+print("Post Defense - Defensive Distillation: ", distill_data_metrics[1])
+