Commit 726d1656 authored by Savindhya Bandara's avatar Savindhya Bandara

Brendel_and_bethge_defensive_distillation

parent 8f15763b
Pipeline #7322 failed with stages
# -*- coding: utf-8 -*-
"""Brendel and Bethge Defensive Distillation.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/19eFeJhzXHAmenx5EiwCUoSTiHy1t1GIy
"""
# Import necessary libraries
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, Input
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import time
from google.colab import drive
drive.mount('/content/drive')
# Load the dataset
data = pd.read_csv('/content/drive/MyDrive/Research/Dataset/disease_preprocess.csv')
X = data.drop('HeartDisease', axis=1).values # Ensure .values to get numpy array
y = data['HeartDisease'].values
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Load your model
model = load_model('/content/drive/MyDrive/Research/Models/1D_CNN_model_Final_1.h5')
def brendel_bethge_attack(model, x_test, y_test, epsilon=0.1, iterations=100, alpha=0.02):
x_adv = x_test.copy() # Start with copies of the original inputs
for i in range(iterations):
# Introduce a random perturbation
perturbation = np.random.normal(loc=0.0, scale=epsilon, size=x_test.shape)
x_temp = x_adv + perturbation # Temporarily add noise
preds = model.predict(x_temp) # Make predictions on the modified inputs
preds_class = (preds > 0.5).astype(int) # Assuming binary classification with a sigmoid output
mask = preds_class.flatten() != y_test # Identify where the attack changed the prediction
# Only keep changes that successfully fooled the model
x_adv[mask] = x_temp[mask]
# Gradually reduce epsilon to fine-tune the adversarial examples
epsilon *= (1 - alpha)
return x_adv
# Evaluate the clean model on original test data
original_accuracy = accuracy_score(y_test, (model.predict(X_test_scaled) > 0.5).astype(int))
print("Original Model Accuracy: ", original_accuracy)
# Generate adversarial examples
X_test_adv = brendel_bethge_attack(model, X_test_scaled, y_test)
# Evaluate the model on adversarial examples
y_pred_adv = (model.predict(X_test_adv) > 0.5).astype(int)
adv_accuracy = accuracy_score(y_test, y_pred_adv.flatten()) # Flatten if needed depending on the shape of y_test
print("Adversarial Model Accuracy: ", adv_accuracy)
# Train the teacher model
teacher_model = Sequential([
Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train_scaled.shape[1], 1)),
MaxPooling1D(pool_size=2),
Flatten(),
Dense(100, activation='relu'),
Dropout(0.5),
Dense(1, activation='sigmoid')
])
teacher_model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
X_train_scaled = X_train_scaled.reshape(-1, X_train_scaled.shape[1], 1)
X_test_scaled = X_test_scaled.reshape(-1, X_test_scaled.shape[1], 1)
teacher_model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_split=0.1)
# Generate soft labels using the teacher model
soft_labels = teacher_model.predict(X_train_scaled)
# Train the student model using soft labels
student_model = Sequential([
Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train_scaled.shape[1], 1)),
MaxPooling1D(pool_size=2),
Flatten(),
Dense(100, activation='relu'),
Dropout(0.5),
Dense(1, activation='sigmoid')
])
student_model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
student_model.fit(X_train_scaled, soft_labels, epochs=10, batch_size=32, validation_split=0.1)
# Evaluate the student model on the original and adversarial examples
original_distilled_accuracy = accuracy_score(y_test, (student_model.predict(X_test_scaled) > 0.5).astype(int))
X_test_adv_defended = brendel_bethge_attack(student_model, X_test_scaled, y_test)
adv_distilled_accuracy = accuracy_score(y_test, (student_model.predict(X_test_adv_defended) > 0.5).astype(int))
print("Distilled Model Accuracy on Original Data: ", original_distilled_accuracy)
print("Distilled Model Accuracy on Adversarial Data: ", adv_distilled_accuracy)
# Comparing accuracies pre and post attack, and post-defense
distill_data_metrics = student_model.evaluate(X_test_scaled, y_test)
print("Comparing Accuracies")
print("Pre Attack: ", original_accuracy)
print("Post Attack: ", adv_accuracy)
print("Post Defense - Defensive Distillation: ", distill_data_metrics[1])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment