Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
2023-232
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
T.H.C. Heshan
2023-232
Commits
c5934ed9
Commit
c5934ed9
authored
May 24, 2023
by
thirani
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
f:script
parent
94db1936
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
102 additions
and
0 deletions
+102
-0
audio_classification/audio_classify.py
audio_classification/audio_classify.py
+102
-0
audio_classification/sound.tflite
audio_classification/sound.tflite
+0
-0
No files found.
audio_classification/audio_classify.py
0 → 100644
View file @
c5934ed9
import
os
import
tensorflow
as
tf
import
numpy
as
np
import
joblib
import
librosa
from
pydub
import
AudioSegment
import
simpleaudio
as
sa
# Define audio feature extraction function using librosa
def
extract_features
(
file_path
):
audio
,
_
=
librosa
.
load
(
file_path
,
sr
=
16000
)
mfccs
=
librosa
.
feature
.
mfcc
(
y
=
audio
,
sr
=
16000
,
n_mfcc
=
20
)
return
mfccs
# Function to convert MP3 to WAV format
def
convert_to_wav
(
file_path
):
audio
=
AudioSegment
.
from_mp3
(
file_path
)
wav_file_path
=
file_path
.
replace
(
'.mp3'
,
'.wav'
)
audio
.
export
(
wav_file_path
,
format
=
'wav'
)
return
wav_file_path
# Load the TensorFlow Lite model
interpreter
=
tf
.
lite
.
Interpreter
(
model_path
=
'sound.tflite'
)
interpreter
.
allocate_tensors
()
# Load the label encoder
label_encoder_path
=
'labels_encoder.pkl'
label_encoder
=
joblib
.
load
(
label_encoder_path
)
# Load the label names from the label.txt file
label_file_path
=
'label.txt'
with
open
(
label_file_path
,
'r'
)
as
f
:
labels
=
f
.
read
()
.
splitlines
()
# Define the output messages
output_messages
=
{
'cat'
:
'There is a cat somewhere'
,
'dog'
:
'There is a dog somewhere'
,
'door'
:
'There is a door near you'
,
'person'
:
'People are around you'
,
'computer'
:
"That's a computer"
}
# Get input and output details
input_details
=
interpreter
.
get_input_details
()
output_details
=
interpreter
.
get_output_details
()
# Define the path to the audio folder
audio_folder
=
'audio'
# Get a list of all files in the audio folder
audio_files
=
os
.
listdir
(
audio_folder
)
for
file_name
in
audio_files
:
file_path
=
os
.
path
.
join
(
audio_folder
,
file_name
)
sample_audio_format
=
os
.
path
.
splitext
(
file_path
)[
1
][
1
:]
.
lower
()
# Get the file format
if
sample_audio_format
==
'mp3'
:
file_path
=
convert_to_wav
(
file_path
)
# Convert MP3 to WAV if it's an MP3 file
# Extract features for the audio file
sample_features
=
extract_features
(
file_path
)
expected_shape
=
(
20
,
157
)
if
sample_features
.
shape
[
1
]
<
expected_shape
[
1
]:
# Pad the features with zeros
pad_width
=
expected_shape
[
1
]
-
sample_features
.
shape
[
1
]
sample_features
=
np
.
pad
(
sample_features
,
((
0
,
0
),
(
0
,
pad_width
)))
elif
sample_features
.
shape
[
1
]
>
expected_shape
[
1
]:
# Truncate the features
sample_features
=
sample_features
[:,
:
expected_shape
[
1
]]
# Reshape and expand dimensions to match the input tensor shape
sample_features
=
np
.
expand_dims
(
sample_features
,
axis
=
0
)
sample_features
=
np
.
expand_dims
(
sample_features
,
axis
=-
1
)
expected_shape
=
input_details
[
0
][
'shape'
]
# Reshape the sample_features array
sample_features
=
np
.
reshape
(
sample_features
,
expected_shape
)
# Run inference on the audio file
interpreter
.
set_tensor
(
input_details
[
0
][
'index'
],
sample_features
)
interpreter
.
invoke
()
output
=
interpreter
.
get_tensor
(
output_details
[
0
][
'index'
])
predicted_class_index
=
np
.
argmax
(
output
)
predicted_class
=
labels
[
predicted_class_index
]
# Map the predicted class index to the actual class label
predicted_label
=
label_encoder
.
inverse_transform
([
predicted_class_index
])[
0
]
# Play the audio file
wave_obj
=
sa
.
WaveObject
.
from_wave_file
(
file_path
)
play_obj
=
wave_obj
.
play
()
play_obj
.
wait_done
()
# Display the predicted class
output_message
=
output_messages
.
get
(
predicted_class
,
'Unknown sound'
)
print
(
f
'Audio File: {file_name}
\n
Output Message: {output_message}
\n
'
)
audio_classification/sound.tflite
0 → 100644
View file @
c5934ed9
File added
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment