DL model

8df08fde · Dinushe Jayasekera · fa77e102 · 8df08fde
Commit 8df08fde authored Nov 26, 2021 by Dinushe Jayasekera
Hide whitespace changes
Inline Side-by-side

Showing with 235 additions and 0 deletions

Audio.py Audio.py +235 -0

No files found.
--- a/Audio.py
+++ b/Audio.py
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[1]:
+
+
+import keras
+from keras.layers import Activation, Dense, Dropout, Conv2D,                          Flatten, MaxPooling2D
+from keras.models import Sequential
+from keras.utils import np_utils
+import librosa
+import librosa.display
+import numpy as np
+import pandas as pd
+import random
+
+import warnings
+warnings.filterwarnings('ignore')
+
+
+# In[2]:
+
+
+# Read Data
+data = pd.read_csv('ASDmeta.csv')
+data.head(5)
+
+
+# In[3]:
+
+
+data.shape
+
+
+# In[4]:
+
+
+# Get data over 3 seconds long
+valid_data = data[['slice_file_name', 'fold' ,'classID', 'class']][ data['end']-data['start'] >= 3 ]
+valid_data.shape
+
+
+# In[5]:
+
+
+# Example of a children play spectrogram
+y, sr = librosa.load('audio/fold5/100263-2-0-137.wav', duration=2.97)
+ps = librosa.feature.melspectrogram(y=y, sr=sr)
+ps.shape
+
+
+# In[6]:
+
+
+librosa.display.specshow(ps, y_axis='mel', x_axis='time')
+
+
+# In[7]:
+
+
+###for one audio file 
+def features_extractor(file):
+    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
+    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
+    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
+    return mfccs_scaled_features
+
+
+# In[39]:
+
+
+import os
+from tqdm import tqdm #to see progress
+#audio_dataset_path='D:/4thYear/Research/AudioClassifier(ETA)/UrbanSound8K/audio'
+extracted_features=[]
+for index_num,row in tqdm(valid_data.iterrows()):
+    #if row["class"] == 'children_playing' or row["class"] == 'children_playing2':
+        #y, sr = librosa.load('audio/fold'+ str(row["fold"]) +'/'+ row["slice_file_name"], duration=2.97)
+        y, sr = librosa.load('audio/fold'+ str(row["fold"]) +'/'+ row["slice_file_name"], duration=2.97)
+        #for newly created set
+        if os.path.exists('audio/fold' + str(row["fold"]) + '/speed_107'+ row["slice_file_name"]):
+            #os.makedirs(newpath)
+            ps = librosa.feature.melspectrogram(y=y, sr=sr)
+            if ps.shape != (128, 128): 
+                continue
+            librosa.display.specshow(ps, y_axis='mel', x_axis='time')
+            ps.shape
+            #print(' ID:'+ str(row.classID))
+            extracted_features.append( (ps, row.classID) )
+        ps = librosa.feature.melspectrogram(y=y, sr=sr)
+        if ps.shape != (128, 128): 
+            continue
+        librosa.display.specshow(ps, y_axis='mel', x_axis='time')
+        ps.shape
+        #print(' ID:'+ str(row.classID))
+        extracted_features.append( (ps, row.classID) )
+
+
+# In[41]:
+
+
+print("Number of samples: ", len(extracted_features))
+
+
+# In[10]:
+
+
+dataset = extracted_features
+random.shuffle(dataset)
+
+train = dataset[:1500]
+test = dataset[1500:]
+
+X_train, y_train = zip(*train)
+X_test, y_test = zip(*test)
+
+# Reshape for CNN input
+X_train = np.array([x.reshape( (128, 128, 1) ) for x in X_train])
+X_test = np.array([x.reshape( (128, 128, 1) ) for x in X_test])
+
+# One-Hot encoding for classes
+y_train = np.array(keras.utils.np_utils.to_categorical(y_train, 10))
+y_test = np.array(keras.utils.np_utils.to_categorical(y_test, 10))
+
+
+# In[11]:
+
+
+model = Sequential()
+input_shape=(128, 128, 1)
+
+model.add(Conv2D(24, (5, 5), strides=(1, 1), input_shape=input_shape))
+model.add(MaxPooling2D((4, 2), strides=(4, 2)))
+model.add(Activation('relu'))
+
+model.add(Conv2D(48, (5, 5), padding="valid"))
+model.add(MaxPooling2D((4, 2), strides=(4, 2)))
+model.add(Activation('relu'))
+
+model.add(Conv2D(48, (5, 5), padding="valid"))
+model.add(Activation('relu'))
+
+model.add(Flatten())
+model.add(Dropout(rate=0.5))
+
+model.add(Dense(64))
+model.add(Activation('relu'))
+model.add(Dropout(rate=0.5))
+
+model.add(Dense(10))
+model.add(Activation('softmax'))
+
+
+# In[44]:
+
+
+model.compile(optimizer="Adam",loss="categorical_crossentropy",metrics=['accuracy'])
+
+model.fit(x=X_train, y=y_train,epochs=50,batch_size=128,validation_data= (X_test, y_test))
+
+score = model.evaluate(x=X_test,y=y_test)
+
+print('Test loss:', score[0])
+print('Test accuracy:', score[1])
+
+
+# In[18]:
+
+
+#data augmentation
+#time variation
+import soundfile as sf
+
+y, sr = librosa.load('audio/fold1/14113-4-0-1.wav', duration=2.97)
+y_changed = librosa.effects.time_stretch(y, rate=0.81)
+sf.write('augmented/fold1/speed_81/14113-4-0-1.wav' ,y_changed, sr)
+
+
+# In[ ]:
+
+
+#newpath = r'C:\Program Files\arbitrary' 
+if not os.path.exists('audio/fold' + str(row["fold"]) + '/speed_' + str(int(rate*100))):
+    os.makedirs(newpath)
+
+
+# In[34]:
+
+
+rate = 1.07 # replace with 0.81 and execute again
+
+for index_num,row in tqdm(valid_data.iterrows()):
+    if row["class"] == 'children_playing' or row["class"] == 'children_playing2':
+        y, sr = librosa.load('audio/fold'+ str(row["fold"]) +'/'+ row["slice_file_name"])  
+        y_changed = librosa.effects.time_stretch(y, rate=rate)
+        if not os.path.exists('audio/fold' + str(row["fold"]) + '/speed_' + str(int(rate*100))):
+            os.makedirs('audio/fold' + str(row["fold"]) + '/speed_' + str(int(rate*100)))
+            print('new path created : ' + 'audio/fold' + str(row["fold"]) + '/speed_' + str(int(rate*100)))
+        sf.write('audio/fold' + str(row["fold"]) + '/speed_' + str(int(rate*100)) + '/' + row["slice_file_name"] ,y_changed, sr)
+
+
+# In[37]:
+
+
+len(extracted_features)
+
+
+# In[45]:
+
+
+model.save_weights('model_weights_acc_new.h5')
+
+
+# In[ ]:
+
+
+#for varying pitch
+n_steps = 2 #-1, -2, 2, 1
+
+for row in valid_data.itertuples():
+    y, sr = librosa.load('audio/' + row.path)  
+    y_changed = librosa.effects.pitch_shift(y, sr, n_steps=n_steps)
+    librosa.output.write_wav('augmented/fold' + str(row.fold) + '/ps1_' + str(int(n_steps)) + '/' + row.slice_file_name ,y_changed, sr)
+
+
+# In[ ]:
+
+
+n_steps = 2.5 #-2.5, -3.5, 2.5, 3.5
+
+for row in valid_data.itertuples():
+    y, sr = librosa.load('audio/' + row.path)  
+    y_changed = librosa.effects.pitch_shift(y, sr, n_steps=n_steps)
+    librosa.output.write_wav('code/augmented/fold' + str(row.fold) + '/ps2_m' + str(int(n_steps*10)) + '/' + row.slice_file_name ,y_changed, sr)
+