Commit 808fd2ea authored by Dinushe Jayasekera's avatar Dinushe Jayasekera

Merge branch 'Dinushe' into 'master'

DL model

See merge request 2021-005/2021-005!6
parents e96023e6 8df08fde
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import keras
from keras.layers import Activation, Dense, Dropout, Conv2D, Flatten, MaxPooling2D
from keras.models import Sequential
from keras.utils import np_utils
import librosa
import librosa.display
import numpy as np
import pandas as pd
import random
import warnings
warnings.filterwarnings('ignore')
# In[2]:
# Read Data
data = pd.read_csv('ASDmeta.csv')
data.head(5)
# In[3]:
data.shape
# In[4]:
# Get data over 3 seconds long
valid_data = data[['slice_file_name', 'fold' ,'classID', 'class']][ data['end']-data['start'] >= 3 ]
valid_data.shape
# In[5]:
# Example of a children play spectrogram
y, sr = librosa.load('audio/fold5/100263-2-0-137.wav', duration=2.97)
ps = librosa.feature.melspectrogram(y=y, sr=sr)
ps.shape
# In[6]:
librosa.display.specshow(ps, y_axis='mel', x_axis='time')
# In[7]:
###for one audio file
def features_extractor(file):
audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
return mfccs_scaled_features
# In[39]:
import os
from tqdm import tqdm #to see progress
#audio_dataset_path='D:/4thYear/Research/AudioClassifier(ETA)/UrbanSound8K/audio'
extracted_features=[]
for index_num,row in tqdm(valid_data.iterrows()):
#if row["class"] == 'children_playing' or row["class"] == 'children_playing2':
#y, sr = librosa.load('audio/fold'+ str(row["fold"]) +'/'+ row["slice_file_name"], duration=2.97)
y, sr = librosa.load('audio/fold'+ str(row["fold"]) +'/'+ row["slice_file_name"], duration=2.97)
#for newly created set
if os.path.exists('audio/fold' + str(row["fold"]) + '/speed_107'+ row["slice_file_name"]):
#os.makedirs(newpath)
ps = librosa.feature.melspectrogram(y=y, sr=sr)
if ps.shape != (128, 128):
continue
librosa.display.specshow(ps, y_axis='mel', x_axis='time')
ps.shape
#print(' ID:'+ str(row.classID))
extracted_features.append( (ps, row.classID) )
ps = librosa.feature.melspectrogram(y=y, sr=sr)
if ps.shape != (128, 128):
continue
librosa.display.specshow(ps, y_axis='mel', x_axis='time')
ps.shape
#print(' ID:'+ str(row.classID))
extracted_features.append( (ps, row.classID) )
# In[41]:
print("Number of samples: ", len(extracted_features))
# In[10]:
dataset = extracted_features
random.shuffle(dataset)
train = dataset[:1500]
test = dataset[1500:]
X_train, y_train = zip(*train)
X_test, y_test = zip(*test)
# Reshape for CNN input
X_train = np.array([x.reshape( (128, 128, 1) ) for x in X_train])
X_test = np.array([x.reshape( (128, 128, 1) ) for x in X_test])
# One-Hot encoding for classes
y_train = np.array(keras.utils.np_utils.to_categorical(y_train, 10))
y_test = np.array(keras.utils.np_utils.to_categorical(y_test, 10))
# In[11]:
model = Sequential()
input_shape=(128, 128, 1)
model.add(Conv2D(24, (5, 5), strides=(1, 1), input_shape=input_shape))
model.add(MaxPooling2D((4, 2), strides=(4, 2)))
model.add(Activation('relu'))
model.add(Conv2D(48, (5, 5), padding="valid"))
model.add(MaxPooling2D((4, 2), strides=(4, 2)))
model.add(Activation('relu'))
model.add(Conv2D(48, (5, 5), padding="valid"))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dropout(rate=0.5))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(rate=0.5))
model.add(Dense(10))
model.add(Activation('softmax'))
# In[44]:
model.compile(optimizer="Adam",loss="categorical_crossentropy",metrics=['accuracy'])
model.fit(x=X_train, y=y_train,epochs=50,batch_size=128,validation_data= (X_test, y_test))
score = model.evaluate(x=X_test,y=y_test)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
# In[18]:
#data augmentation
#time variation
import soundfile as sf
y, sr = librosa.load('audio/fold1/14113-4-0-1.wav', duration=2.97)
y_changed = librosa.effects.time_stretch(y, rate=0.81)
sf.write('augmented/fold1/speed_81/14113-4-0-1.wav' ,y_changed, sr)
# In[ ]:
#newpath = r'C:\Program Files\arbitrary'
if not os.path.exists('audio/fold' + str(row["fold"]) + '/speed_' + str(int(rate*100))):
os.makedirs(newpath)
# In[34]:
rate = 1.07 # replace with 0.81 and execute again
for index_num,row in tqdm(valid_data.iterrows()):
if row["class"] == 'children_playing' or row["class"] == 'children_playing2':
y, sr = librosa.load('audio/fold'+ str(row["fold"]) +'/'+ row["slice_file_name"])
y_changed = librosa.effects.time_stretch(y, rate=rate)
if not os.path.exists('audio/fold' + str(row["fold"]) + '/speed_' + str(int(rate*100))):
os.makedirs('audio/fold' + str(row["fold"]) + '/speed_' + str(int(rate*100)))
print('new path created : ' + 'audio/fold' + str(row["fold"]) + '/speed_' + str(int(rate*100)))
sf.write('audio/fold' + str(row["fold"]) + '/speed_' + str(int(rate*100)) + '/' + row["slice_file_name"] ,y_changed, sr)
# In[37]:
len(extracted_features)
# In[45]:
model.save_weights('model_weights_acc_new.h5')
# In[ ]:
#for varying pitch
n_steps = 2 #-1, -2, 2, 1
for row in valid_data.itertuples():
y, sr = librosa.load('audio/' + row.path)
y_changed = librosa.effects.pitch_shift(y, sr, n_steps=n_steps)
librosa.output.write_wav('augmented/fold' + str(row.fold) + '/ps1_' + str(int(n_steps)) + '/' + row.slice_file_name ,y_changed, sr)
# In[ ]:
n_steps = 2.5 #-2.5, -3.5, 2.5, 3.5
for row in valid_data.itertuples():
y, sr = librosa.load('audio/' + row.path)
y_changed = librosa.effects.pitch_shift(y, sr, n_steps=n_steps)
librosa.output.write_wav('code/augmented/fold' + str(row.fold) + '/ps2_m' + str(int(n_steps*10)) + '/' + row.slice_file_name ,y_changed, sr)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment