Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
2021-005
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
IT18019896 A.R.M.R.M.Mudalinayake
2021-005
Commits
8df08fde
Commit
8df08fde
authored
Nov 26, 2021
by
Dinushe Jayasekera
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
DL model
parent
fa77e102
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
235 additions
and
0 deletions
+235
-0
Audio.py
Audio.py
+235
-0
No files found.
Audio.py
0 → 100644
View file @
8df08fde
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import
keras
from
keras.layers
import
Activation
,
Dense
,
Dropout
,
Conv2D
,
Flatten
,
MaxPooling2D
from
keras.models
import
Sequential
from
keras.utils
import
np_utils
import
librosa
import
librosa.display
import
numpy
as
np
import
pandas
as
pd
import
random
import
warnings
warnings
.
filterwarnings
(
'ignore'
)
# In[2]:
# Read Data
data
=
pd
.
read_csv
(
'ASDmeta.csv'
)
data
.
head
(
5
)
# In[3]:
data
.
shape
# In[4]:
# Get data over 3 seconds long
valid_data
=
data
[[
'slice_file_name'
,
'fold'
,
'classID'
,
'class'
]][
data
[
'end'
]
-
data
[
'start'
]
>=
3
]
valid_data
.
shape
# In[5]:
# Example of a children play spectrogram
y
,
sr
=
librosa
.
load
(
'audio/fold5/100263-2-0-137.wav'
,
duration
=
2.97
)
ps
=
librosa
.
feature
.
melspectrogram
(
y
=
y
,
sr
=
sr
)
ps
.
shape
# In[6]:
librosa
.
display
.
specshow
(
ps
,
y_axis
=
'mel'
,
x_axis
=
'time'
)
# In[7]:
###for one audio file
def
features_extractor
(
file
):
audio
,
sample_rate
=
librosa
.
load
(
file_name
,
res_type
=
'kaiser_fast'
)
mfccs_features
=
librosa
.
feature
.
mfcc
(
y
=
audio
,
sr
=
sample_rate
,
n_mfcc
=
40
)
mfccs_scaled_features
=
np
.
mean
(
mfccs_features
.
T
,
axis
=
0
)
return
mfccs_scaled_features
# In[39]:
import
os
from
tqdm
import
tqdm
#to see progress
#audio_dataset_path='D:/4thYear/Research/AudioClassifier(ETA)/UrbanSound8K/audio'
extracted_features
=
[]
for
index_num
,
row
in
tqdm
(
valid_data
.
iterrows
()):
#if row["class"] == 'children_playing' or row["class"] == 'children_playing2':
#y, sr = librosa.load('audio/fold'+ str(row["fold"]) +'/'+ row["slice_file_name"], duration=2.97)
y
,
sr
=
librosa
.
load
(
'audio/fold'
+
str
(
row
[
"fold"
])
+
'/'
+
row
[
"slice_file_name"
],
duration
=
2.97
)
#for newly created set
if
os
.
path
.
exists
(
'audio/fold'
+
str
(
row
[
"fold"
])
+
'/speed_107'
+
row
[
"slice_file_name"
]):
#os.makedirs(newpath)
ps
=
librosa
.
feature
.
melspectrogram
(
y
=
y
,
sr
=
sr
)
if
ps
.
shape
!=
(
128
,
128
):
continue
librosa
.
display
.
specshow
(
ps
,
y_axis
=
'mel'
,
x_axis
=
'time'
)
ps
.
shape
#print(' ID:'+ str(row.classID))
extracted_features
.
append
(
(
ps
,
row
.
classID
)
)
ps
=
librosa
.
feature
.
melspectrogram
(
y
=
y
,
sr
=
sr
)
if
ps
.
shape
!=
(
128
,
128
):
continue
librosa
.
display
.
specshow
(
ps
,
y_axis
=
'mel'
,
x_axis
=
'time'
)
ps
.
shape
#print(' ID:'+ str(row.classID))
extracted_features
.
append
(
(
ps
,
row
.
classID
)
)
# In[41]:
print
(
"Number of samples: "
,
len
(
extracted_features
))
# In[10]:
dataset
=
extracted_features
random
.
shuffle
(
dataset
)
train
=
dataset
[:
1500
]
test
=
dataset
[
1500
:]
X_train
,
y_train
=
zip
(
*
train
)
X_test
,
y_test
=
zip
(
*
test
)
# Reshape for CNN input
X_train
=
np
.
array
([
x
.
reshape
(
(
128
,
128
,
1
)
)
for
x
in
X_train
])
X_test
=
np
.
array
([
x
.
reshape
(
(
128
,
128
,
1
)
)
for
x
in
X_test
])
# One-Hot encoding for classes
y_train
=
np
.
array
(
keras
.
utils
.
np_utils
.
to_categorical
(
y_train
,
10
))
y_test
=
np
.
array
(
keras
.
utils
.
np_utils
.
to_categorical
(
y_test
,
10
))
# In[11]:
model
=
Sequential
()
input_shape
=
(
128
,
128
,
1
)
model
.
add
(
Conv2D
(
24
,
(
5
,
5
),
strides
=
(
1
,
1
),
input_shape
=
input_shape
))
model
.
add
(
MaxPooling2D
((
4
,
2
),
strides
=
(
4
,
2
)))
model
.
add
(
Activation
(
'relu'
))
model
.
add
(
Conv2D
(
48
,
(
5
,
5
),
padding
=
"valid"
))
model
.
add
(
MaxPooling2D
((
4
,
2
),
strides
=
(
4
,
2
)))
model
.
add
(
Activation
(
'relu'
))
model
.
add
(
Conv2D
(
48
,
(
5
,
5
),
padding
=
"valid"
))
model
.
add
(
Activation
(
'relu'
))
model
.
add
(
Flatten
())
model
.
add
(
Dropout
(
rate
=
0.5
))
model
.
add
(
Dense
(
64
))
model
.
add
(
Activation
(
'relu'
))
model
.
add
(
Dropout
(
rate
=
0.5
))
model
.
add
(
Dense
(
10
))
model
.
add
(
Activation
(
'softmax'
))
# In[44]:
model
.
compile
(
optimizer
=
"Adam"
,
loss
=
"categorical_crossentropy"
,
metrics
=
[
'accuracy'
])
model
.
fit
(
x
=
X_train
,
y
=
y_train
,
epochs
=
50
,
batch_size
=
128
,
validation_data
=
(
X_test
,
y_test
))
score
=
model
.
evaluate
(
x
=
X_test
,
y
=
y_test
)
print
(
'Test loss:'
,
score
[
0
])
print
(
'Test accuracy:'
,
score
[
1
])
# In[18]:
#data augmentation
#time variation
import
soundfile
as
sf
y
,
sr
=
librosa
.
load
(
'audio/fold1/14113-4-0-1.wav'
,
duration
=
2.97
)
y_changed
=
librosa
.
effects
.
time_stretch
(
y
,
rate
=
0.81
)
sf
.
write
(
'augmented/fold1/speed_81/14113-4-0-1.wav'
,
y_changed
,
sr
)
# In[ ]:
#newpath = r'C:\Program Files\arbitrary'
if
not
os
.
path
.
exists
(
'audio/fold'
+
str
(
row
[
"fold"
])
+
'/speed_'
+
str
(
int
(
rate
*
100
))):
os
.
makedirs
(
newpath
)
# In[34]:
rate
=
1.07
# replace with 0.81 and execute again
for
index_num
,
row
in
tqdm
(
valid_data
.
iterrows
()):
if
row
[
"class"
]
==
'children_playing'
or
row
[
"class"
]
==
'children_playing2'
:
y
,
sr
=
librosa
.
load
(
'audio/fold'
+
str
(
row
[
"fold"
])
+
'/'
+
row
[
"slice_file_name"
])
y_changed
=
librosa
.
effects
.
time_stretch
(
y
,
rate
=
rate
)
if
not
os
.
path
.
exists
(
'audio/fold'
+
str
(
row
[
"fold"
])
+
'/speed_'
+
str
(
int
(
rate
*
100
))):
os
.
makedirs
(
'audio/fold'
+
str
(
row
[
"fold"
])
+
'/speed_'
+
str
(
int
(
rate
*
100
)))
print
(
'new path created : '
+
'audio/fold'
+
str
(
row
[
"fold"
])
+
'/speed_'
+
str
(
int
(
rate
*
100
)))
sf
.
write
(
'audio/fold'
+
str
(
row
[
"fold"
])
+
'/speed_'
+
str
(
int
(
rate
*
100
))
+
'/'
+
row
[
"slice_file_name"
]
,
y_changed
,
sr
)
# In[37]:
len
(
extracted_features
)
# In[45]:
model
.
save_weights
(
'model_weights_acc_new.h5'
)
# In[ ]:
#for varying pitch
n_steps
=
2
#-1, -2, 2, 1
for
row
in
valid_data
.
itertuples
():
y
,
sr
=
librosa
.
load
(
'audio/'
+
row
.
path
)
y_changed
=
librosa
.
effects
.
pitch_shift
(
y
,
sr
,
n_steps
=
n_steps
)
librosa
.
output
.
write_wav
(
'augmented/fold'
+
str
(
row
.
fold
)
+
'/ps1_'
+
str
(
int
(
n_steps
))
+
'/'
+
row
.
slice_file_name
,
y_changed
,
sr
)
# In[ ]:
n_steps
=
2.5
#-2.5, -3.5, 2.5, 3.5
for
row
in
valid_data
.
itertuples
():
y
,
sr
=
librosa
.
load
(
'audio/'
+
row
.
path
)
y_changed
=
librosa
.
effects
.
pitch_shift
(
y
,
sr
,
n_steps
=
n_steps
)
librosa
.
output
.
write_wav
(
'code/augmented/fold'
+
str
(
row
.
fold
)
+
'/ps2_m'
+
str
(
int
(
n_steps
*
10
))
+
'/'
+
row
.
slice_file_name
,
y_changed
,
sr
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment