Commit 29ddc2e0 authored by Dinushe Jayasekera's avatar Dinushe Jayasekera

python notebook for audio recognition

parent 5091c0b3
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import keras\n",
"from keras.layers import Activation, Dense, Dropout, Conv2D, \\\n",
" Flatten, MaxPooling2D\n",
"from keras.models import Sequential\n",
"from keras.utils import np_utils\n",
"import librosa\n",
"import librosa.display\n",
"import numpy as np\n",
"import pandas as pd\n",
"import random\n",
"\n",
"import warnings\n",
"warnings.filterwarnings('ignore')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>slice_file_name</th>\n",
" <th>fsID</th>\n",
" <th>start</th>\n",
" <th>end</th>\n",
" <th>salience</th>\n",
" <th>fold</th>\n",
" <th>classID</th>\n",
" <th>class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100032-3-0-0.wav</td>\n",
" <td>100032</td>\n",
" <td>0.0</td>\n",
" <td>0.317551</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>children_playing2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100263-2-0-117.wav</td>\n",
" <td>100263</td>\n",
" <td>58.5</td>\n",
" <td>62.500000</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>children_playing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>100263-2-0-121.wav</td>\n",
" <td>100263</td>\n",
" <td>60.5</td>\n",
" <td>64.500000</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>children_playing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>100263-2-0-126.wav</td>\n",
" <td>100263</td>\n",
" <td>63.0</td>\n",
" <td>67.000000</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>children_playing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>100263-2-0-137.wav</td>\n",
" <td>100263</td>\n",
" <td>68.5</td>\n",
" <td>72.500000</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>children_playing</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" slice_file_name fsID start end salience fold classID \\\n",
"0 100032-3-0-0.wav 100032 0.0 0.317551 1 5 3 \n",
"1 100263-2-0-117.wav 100263 58.5 62.500000 1 5 2 \n",
"2 100263-2-0-121.wav 100263 60.5 64.500000 1 5 2 \n",
"3 100263-2-0-126.wav 100263 63.0 67.000000 1 5 2 \n",
"4 100263-2-0-137.wav 100263 68.5 72.500000 1 5 2 \n",
"\n",
" class \n",
"0 children_playing2 \n",
"1 children_playing \n",
"2 children_playing \n",
"3 children_playing \n",
"4 children_playing "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Read Data\n",
"data = pd.read_csv('ASDmeta.csv')\n",
"data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(8732, 8)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.shape"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(7468, 4)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Get data over 3 seconds long\n",
"valid_data = data[['slice_file_name', 'fold' ,'classID', 'class']][ data['end']-data['start'] >= 3 ]\n",
"valid_data.shape"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(128, 128)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Example of a children play spectrogram\n",
"y, sr = librosa.load('audio/fold5/100263-2-0-137.wav', duration=2.97)\n",
"ps = librosa.feature.melspectrogram(y=y, sr=sr)\n",
"ps.shape"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.collections.QuadMesh at 0x2ae074fe280>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"librosa.display.specshow(ps, y_axis='mel', x_axis='time')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"###for one audio file \n",
"def features_extractor(file):\n",
" audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') \n",
" mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)\n",
" mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)\n",
" return mfccs_scaled_features"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"7468it [05:14, 23.76it/s]\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"import os\n",
"from tqdm import tqdm #to see progress\n",
"#audio_dataset_path='D:/4thYear/Research/AudioClassifier(ETA)/UrbanSound8K/audio'\n",
"extracted_features=[]\n",
"for index_num,row in tqdm(valid_data.iterrows()):\n",
" #if row[\"class\"] == 'children_playing' or row[\"class\"] == 'children_playing2':\n",
" #y, sr = librosa.load('audio/fold'+ str(row[\"fold\"]) +'/'+ row[\"slice_file_name\"], duration=2.97)\n",
" y, sr = librosa.load('audio/fold'+ str(row[\"fold\"]) +'/'+ row[\"slice_file_name\"], duration=2.97)\n",
" #for newly created set\n",
" if os.path.exists('audio/fold' + str(row[\"fold\"]) + '/speed_107'+ row[\"slice_file_name\"]):\n",
" #os.makedirs(newpath)\n",
" ps = librosa.feature.melspectrogram(y=y, sr=sr)\n",
" if ps.shape != (128, 128): \n",
" continue\n",
" librosa.display.specshow(ps, y_axis='mel', x_axis='time')\n",
" ps.shape\n",
" #print(' ID:'+ str(row.classID))\n",
" extracted_features.append( (ps, row.classID) )\n",
" ps = librosa.feature.melspectrogram(y=y, sr=sr)\n",
" if ps.shape != (128, 128): \n",
" continue\n",
" librosa.display.specshow(ps, y_axis='mel', x_axis='time')\n",
" ps.shape\n",
" #print(' ID:'+ str(row.classID))\n",
" extracted_features.append( (ps, row.classID) )"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of samples: 1677\n"
]
}
],
"source": [
"print(\"Number of samples: \", len(extracted_features))\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"dataset = extracted_features\n",
"random.shuffle(dataset)\n",
"\n",
"train = dataset[:1500]\n",
"test = dataset[1500:]\n",
"\n",
"X_train, y_train = zip(*train)\n",
"X_test, y_test = zip(*test)\n",
"\n",
"# Reshape for CNN input\n",
"X_train = np.array([x.reshape( (128, 128, 1) ) for x in X_train])\n",
"X_test = np.array([x.reshape( (128, 128, 1) ) for x in X_test])\n",
"\n",
"# One-Hot encoding for classes\n",
"y_train = np.array(keras.utils.np_utils.to_categorical(y_train, 10))\n",
"y_test = np.array(keras.utils.np_utils.to_categorical(y_test, 10))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"model = Sequential()\n",
"input_shape=(128, 128, 1)\n",
"\n",
"model.add(Conv2D(24, (5, 5), strides=(1, 1), input_shape=input_shape))\n",
"model.add(MaxPooling2D((4, 2), strides=(4, 2)))\n",
"model.add(Activation('relu'))\n",
"\n",
"model.add(Conv2D(48, (5, 5), padding=\"valid\"))\n",
"model.add(MaxPooling2D((4, 2), strides=(4, 2)))\n",
"model.add(Activation('relu'))\n",
"\n",
"model.add(Conv2D(48, (5, 5), padding=\"valid\"))\n",
"model.add(Activation('relu'))\n",
"\n",
"model.add(Flatten())\n",
"model.add(Dropout(rate=0.5))\n",
"\n",
"model.add(Dense(64))\n",
"model.add(Activation('relu'))\n",
"model.add(Dropout(rate=0.5))\n",
"\n",
"model.add(Dense(10))\n",
"model.add(Activation('softmax'))"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/50\n",
"12/12 [==============================] - 19s 1s/step - loss: 0.5703 - accuracy: 0.7495 - val_loss: 0.4692 - val_accuracy: 0.8023\n",
"Epoch 2/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.6080 - accuracy: 0.7511 - val_loss: 0.4458 - val_accuracy: 0.8079\n",
"Epoch 3/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.6012 - accuracy: 0.7419 - val_loss: 0.4120 - val_accuracy: 0.8305\n",
"Epoch 4/50\n",
"12/12 [==============================] - 17s 1s/step - loss: 0.5109 - accuracy: 0.7659 - val_loss: 0.4143 - val_accuracy: 0.8249\n",
"Epoch 5/50\n",
"12/12 [==============================] - 18s 1s/step - loss: 0.4989 - accuracy: 0.7868 - val_loss: 0.4059 - val_accuracy: 0.8136\n",
"Epoch 6/50\n",
"12/12 [==============================] - 17s 1s/step - loss: 0.4659 - accuracy: 0.7853 - val_loss: 0.4052 - val_accuracy: 0.8362\n",
"Epoch 7/50\n",
"12/12 [==============================] - 17s 1s/step - loss: 0.4537 - accuracy: 0.8102 - val_loss: 0.4432 - val_accuracy: 0.8249\n",
"Epoch 8/50\n",
"12/12 [==============================] - 16s 1s/step - loss: 0.4390 - accuracy: 0.7917 - val_loss: 0.4371 - val_accuracy: 0.8362\n",
"Epoch 9/50\n",
"12/12 [==============================] - 19s 2s/step - loss: 0.4265 - accuracy: 0.8203 - val_loss: 0.4093 - val_accuracy: 0.8192\n",
"Epoch 10/50\n",
"12/12 [==============================] - 17s 1s/step - loss: 0.4112 - accuracy: 0.8354 - val_loss: 0.3964 - val_accuracy: 0.8305\n",
"Epoch 11/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.4019 - accuracy: 0.8209 - val_loss: 0.4434 - val_accuracy: 0.7740\n",
"Epoch 12/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.3762 - accuracy: 0.8350 - val_loss: 0.4082 - val_accuracy: 0.8192\n",
"Epoch 13/50\n",
"12/12 [==============================] - 16s 1s/step - loss: 0.3343 - accuracy: 0.8576 - val_loss: 0.4074 - val_accuracy: 0.8588\n",
"Epoch 14/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.3491 - accuracy: 0.8521 - val_loss: 0.4439 - val_accuracy: 0.8192\n",
"Epoch 15/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.3548 - accuracy: 0.8498 - val_loss: 0.5008 - val_accuracy: 0.8305\n",
"Epoch 16/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.4044 - accuracy: 0.8507 - val_loss: 0.6734 - val_accuracy: 0.8136\n",
"Epoch 17/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.5081 - accuracy: 0.8129 - val_loss: 0.4393 - val_accuracy: 0.7797\n",
"Epoch 18/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.4016 - accuracy: 0.8106 - val_loss: 0.4132 - val_accuracy: 0.8305\n",
"Epoch 19/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.3454 - accuracy: 0.8516 - val_loss: 0.4011 - val_accuracy: 0.8588\n",
"Epoch 20/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.3868 - accuracy: 0.8440 - val_loss: 0.4531 - val_accuracy: 0.8305\n",
"Epoch 21/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.3538 - accuracy: 0.8414 - val_loss: 0.3996 - val_accuracy: 0.8418\n",
"Epoch 22/50\n",
"12/12 [==============================] - 16s 1s/step - loss: 0.3289 - accuracy: 0.8568 - val_loss: 0.4396 - val_accuracy: 0.8531\n",
"Epoch 23/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.2906 - accuracy: 0.8743 - val_loss: 0.4508 - val_accuracy: 0.8192\n",
"Epoch 24/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.2776 - accuracy: 0.8785 - val_loss: 0.4434 - val_accuracy: 0.8418\n",
"Epoch 25/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.3088 - accuracy: 0.8714 - val_loss: 0.4205 - val_accuracy: 0.8644\n",
"Epoch 26/50\n",
"12/12 [==============================] - 16s 1s/step - loss: 0.2936 - accuracy: 0.8863 - val_loss: 0.3697 - val_accuracy: 0.8531\n",
"Epoch 27/50\n",
"12/12 [==============================] - 16s 1s/step - loss: 0.2680 - accuracy: 0.8900 - val_loss: 0.3482 - val_accuracy: 0.8870\n",
"Epoch 28/50\n",
"12/12 [==============================] - 18s 1s/step - loss: 0.2504 - accuracy: 0.9007 - val_loss: 0.4004 - val_accuracy: 0.8870\n",
"Epoch 29/50\n",
"12/12 [==============================] - 16s 1s/step - loss: 0.2617 - accuracy: 0.8909 - val_loss: 0.5071 - val_accuracy: 0.8644\n",
"Epoch 30/50\n",
"12/12 [==============================] - 19s 2s/step - loss: 0.2602 - accuracy: 0.8958 - val_loss: 0.5950 - val_accuracy: 0.8531\n",
"Epoch 31/50\n",
"12/12 [==============================] - 21s 2s/step - loss: 0.2396 - accuracy: 0.8976 - val_loss: 0.6384 - val_accuracy: 0.8644\n",
"Epoch 32/50\n",
"12/12 [==============================] - 18s 2s/step - loss: 0.2983 - accuracy: 0.8772 - val_loss: 0.6448 - val_accuracy: 0.8305\n",
"Epoch 33/50\n",
"12/12 [==============================] - 18s 2s/step - loss: 0.2954 - accuracy: 0.8778 - val_loss: 0.3525 - val_accuracy: 0.8531\n",
"Epoch 34/50\n",
"12/12 [==============================] - 17s 1s/step - loss: 0.2521 - accuracy: 0.9003 - val_loss: 0.3455 - val_accuracy: 0.8644\n",
"Epoch 35/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.2271 - accuracy: 0.8958 - val_loss: 0.4037 - val_accuracy: 0.8644\n",
"Epoch 36/50\n",
"12/12 [==============================] - 18s 1s/step - loss: 0.2035 - accuracy: 0.9187 - val_loss: 0.3996 - val_accuracy: 0.8983\n",
"Epoch 37/50\n",
"12/12 [==============================] - 20s 2s/step - loss: 0.2229 - accuracy: 0.9141 - val_loss: 0.5673 - val_accuracy: 0.8644\n",
"Epoch 38/50\n",
"12/12 [==============================] - 17s 1s/step - loss: 0.2107 - accuracy: 0.9098 - val_loss: 0.4846 - val_accuracy: 0.8475\n",
"Epoch 39/50\n",
"12/12 [==============================] - 16s 1s/step - loss: 0.2309 - accuracy: 0.8998 - val_loss: 0.6008 - val_accuracy: 0.8588\n",
"Epoch 40/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.2562 - accuracy: 0.9124 - val_loss: 0.4937 - val_accuracy: 0.8588\n",
"Epoch 41/50\n",
"12/12 [==============================] - 16s 1s/step - loss: 0.2103 - accuracy: 0.9180 - val_loss: 0.5335 - val_accuracy: 0.8644\n",
"Epoch 42/50\n",
"12/12 [==============================] - 18s 1s/step - loss: 0.1739 - accuracy: 0.9195 - val_loss: 0.5026 - val_accuracy: 0.8701\n",
"Epoch 43/50\n",
"12/12 [==============================] - 16s 1s/step - loss: 0.2074 - accuracy: 0.9024 - val_loss: 0.8531 - val_accuracy: 0.8362\n",
"Epoch 44/50\n",
"12/12 [==============================] - 16s 1s/step - loss: 0.2637 - accuracy: 0.9094 - val_loss: 0.4599 - val_accuracy: 0.8588\n",
"Epoch 45/50\n",
"12/12 [==============================] - 16s 1s/step - loss: 0.2997 - accuracy: 0.8787 - val_loss: 0.4988 - val_accuracy: 0.8588\n",
"Epoch 46/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.2285 - accuracy: 0.9101 - val_loss: 0.5542 - val_accuracy: 0.8588\n",
"Epoch 47/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.2146 - accuracy: 0.9145 - val_loss: 0.7282 - val_accuracy: 0.7853\n",
"Epoch 48/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.2460 - accuracy: 0.9063 - val_loss: 0.9213 - val_accuracy: 0.7288\n",
"Epoch 49/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.4337 - accuracy: 0.8456 - val_loss: 0.4394 - val_accuracy: 0.8531\n",
"Epoch 50/50\n",
"12/12 [==============================] - 15s 1s/step - loss: 0.2752 - accuracy: 0.8978 - val_loss: 0.4272 - val_accuracy: 0.8531\n",
"6/6 [==============================] - 1s 87ms/step - loss: 0.4272 - accuracy: 0.8531\n",
"Test loss: 0.427224338054657\n",
"Test accuracy: 0.8531073331832886\n"
]
}
],
"source": [
"model.compile(optimizer=\"Adam\",loss=\"categorical_crossentropy\",metrics=['accuracy'])\n",
"\n",
"model.fit(x=X_train, y=y_train,epochs=50,batch_size=128,validation_data= (X_test, y_test))\n",
"\n",
"score = model.evaluate(x=X_test,y=y_test)\n",
"\n",
"print('Test loss:', score[0])\n",
"print('Test accuracy:', score[1])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"#data augmentation\n",
"#time variation\n",
"import soundfile as sf\n",
"\n",
"y, sr = librosa.load('audio/fold1/14113-4-0-1.wav', duration=2.97)\n",
"y_changed = librosa.effects.time_stretch(y, rate=0.81)\n",
"sf.write('augmented/fold1/speed_81/14113-4-0-1.wav' ,y_changed, sr)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#newpath = r'C:\\Program Files\\arbitrary' \n",
"if not os.path.exists('audio/fold' + str(row[\"fold\"]) + '/speed_' + str(int(rate*100))):\n",
" os.makedirs(newpath)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"1it [00:00, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"new path created : audio/fold5/speed_107\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"9it [00:02, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"new path created : audio/fold2/speed_107\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"13it [00:02, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"new path created : audio/fold10/speed_107\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"48it [00:03, 6.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"new path created : audio/fold6/speed_107\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"57it [00:05, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"new path created : audio/fold1/speed_107\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"95it [00:06, 8.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"new path created : audio/fold4/speed_107\n",
"new path created : audio/fold3/speed_107\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"400it [00:12, 13.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"new path created : audio/fold9/speed_107\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"580it [00:21, 10.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"new path created : audio/fold8/speed_107\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"591it [00:22, 10.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"new path created : audio/fold7/speed_107\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"7468it [06:22, 19.50it/s]\n"
]
}
],
"source": [
"rate = 1.07 # replace with 0.81 and execute again\n",
"\n",
"for index_num,row in tqdm(valid_data.iterrows()):\n",
" if row[\"class\"] == 'children_playing' or row[\"class\"] == 'children_playing2':\n",
" y, sr = librosa.load('audio/fold'+ str(row[\"fold\"]) +'/'+ row[\"slice_file_name\"]) \n",
" y_changed = librosa.effects.time_stretch(y, rate=rate)\n",
" if not os.path.exists('audio/fold' + str(row[\"fold\"]) + '/speed_' + str(int(rate*100))):\n",
" os.makedirs('audio/fold' + str(row[\"fold\"]) + '/speed_' + str(int(rate*100)))\n",
" print('new path created : ' + 'audio/fold' + str(row[\"fold\"]) + '/speed_' + str(int(rate*100)))\n",
" sf.write('audio/fold' + str(row[\"fold\"]) + '/speed_' + str(int(rate*100)) + '/' + row[\"slice_file_name\"] ,y_changed, sr)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1677"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(extracted_features)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"model.save_weights('model_weights_acc_new.h5')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#for varying pitch\n",
"n_steps = 2 #-1, -2, 2, 1\n",
"\n",
"for row in valid_data.itertuples():\n",
" y, sr = librosa.load('audio/' + row.path) \n",
" y_changed = librosa.effects.pitch_shift(y, sr, n_steps=n_steps)\n",
" librosa.output.write_wav('augmented/fold' + str(row.fold) + '/ps1_' + str(int(n_steps)) + '/' + row.slice_file_name ,y_changed, sr)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"n_steps = 2.5 #-2.5, -3.5, 2.5, 3.5\n",
"\n",
"for row in valid_data.itertuples():\n",
" y, sr = librosa.load('audio/' + row.path) \n",
" y_changed = librosa.effects.pitch_shift(y, sr, n_steps=n_steps)\n",
" librosa.output.write_wav('code/augmented/fold' + str(row.fold) + '/ps2_m' + str(int(n_steps*10)) + '/' + row.slice_file_name ,y_changed, sr)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment