Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
21_22-J-02
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
21_22-J-02
21_22-J-02
Commits
15fe6897
Commit
15fe6897
authored
Jan 07, 2022
by
Shehara AKGH - IT18205152
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat: Audio Comparison
parent
b0e09f67
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
153 additions
and
0 deletions
+153
-0
BE-Pronunciation/Audio Comparison.py
BE-Pronunciation/Audio Comparison.py
+153
-0
No files found.
BE-Pronunciation/Audio Comparison.py
0 → 100644
View file @
15fe6897
#!/usr/bin/env python
# coding: utf-8
# In[1]:
get_ipython
()
.
system
(
'pip install librosa'
)
# In[2]:
import
matplotlib.pyplot
as
plt
import
librosa
import
librosa.display
import
numpy
as
np
import
warnings
warnings
.
filterwarnings
(
'ignore'
)
path
=
'D:
\
Presently/test.wav'
y
,
sr
=
librosa
.
load
(
path
,
duration
=
10
)
y_filt
=
librosa
.
effects
.
preemphasis
(
y
)
# In[3]:
# and plot the results for comparison
S_orig
=
librosa
.
amplitude_to_db
(
np
.
abs
(
librosa
.
stft
(
y
)),
ref
=
np
.
max
,
top_db
=
None
)
S_preemph
=
librosa
.
amplitude_to_db
(
np
.
abs
(
librosa
.
stft
(
y_filt
)),
ref
=
np
.
max
,
top_db
=
None
)
fig
,
ax
=
plt
.
subplots
(
nrows
=
2
,
sharex
=
True
,
sharey
=
True
)
librosa
.
display
.
specshow
(
S_orig
,
y_axis
=
'log'
,
x_axis
=
'time'
,
ax
=
ax
[
0
])
ax
[
0
]
.
set
(
title
=
'Original signal'
)
ax
[
0
]
.
label_outer
()
img
=
librosa
.
display
.
specshow
(
S_preemph
,
y_axis
=
'log'
,
x_axis
=
'time'
,
ax
=
ax
[
1
])
ax
[
1
]
.
set
(
title
=
'Pre-emphasized signal'
)
fig
.
colorbar
(
img
,
ax
=
ax
,
format
=
"
%+2
.f dB"
)
# #Apply pre-emphasis in pieces for block streaming.
# #Note that the second block initializes zi with the final state zf returned by the first call.
# In[4]:
y_filt_1
,
zf
=
librosa
.
effects
.
preemphasis
(
y
[:
1000
],
return_zf
=
True
)
y_filt_2
,
zf
=
librosa
.
effects
.
preemphasis
(
y
[
1000
:],
zi
=
zf
,
return_zf
=
True
)
np
.
allclose
(
y_filt
,
np
.
concatenate
([
y_filt_1
,
y_filt_2
]))
# Framing and windowing of voice signals
# In[21]:
def
framing
(
sig
,
fs
=
16000
,
win_len
=
0.025
,
win_hop
=
0.01
):
"""
transform a signal into a series of overlapping frames.
Args:
sig (array) : a mono audio signal (Nx1) from which to compute features.
fs (int) : the sampling frequency of the signal we are working with.
Default is 16000.
win_len (float) : window length in sec.
Default is 0.025.
win_hop (float) : step between successive windows in sec.
Default is 0.01.
Returns:
array of frames.
frame length.
"""
#
# # pre-emphasis
# if pre_emph:
# sig = pre_emphasis(sig=sig, pre_emph_coeff=0.97)
#
# # -> framing
# frames, frame_length = framing(sig=sig,
# fs=fs,
# win_len=win_len,
# win_hop=win_hop)
#
# # -> windowing
# windows = windowing(frames=frames,
# frame_len=frame_length,
# win_type=win_type)
#
# In[25]:
# compute frame length and frame step (convert from seconds to samples)
frame_length
=
win_len
*
fs
frame_step
=
win_hop
*
fs
signal_length
=
len
(
sig
)
frames_overlap
=
frame_length
-
frame_step
# Make sure that we have at least 1 frame+
num_frames
=
np
.
abs
(
signal_length
-
frames_overlap
)
//
np
.
abs
(
frame_length
-
frames_overlap
)
rest_samples
=
np
.
abs
(
signal_length
-
frames_overlap
)
%
np
.
abs
(
frame_length
-
frames_overlap
)
# Pad Signal to make sure that all frames have equal number of samples
# without truncating any samples from the original signal
if
rest_samples
!=
0
:
pad_signal_length
=
int
(
frame_step
-
rest_samples
)
z
=
np
.
zeros
((
pad_signal_length
))
pad_signal
=
np
.
append
(
sig
,
z
)
num_frames
+=
1
else
:
pad_signal
=
sig
# make sure to use integers as indices
frame_length
=
int
(
frame_length
)
frame_step
=
int
(
frame_step
)
num_frames
=
int
(
num_frames
)
# compute indices
idx1
=
np
.
tile
(
np
.
arange
(
0
,
frame_length
),
(
num_frames
,
1
))
idx2
=
np
.
tile
(
np
.
arange
(
0
,
num_frames
*
frame_step
,
frame_step
),
(
frame_length
,
1
))
.
T
indices
=
idx1
+
idx2
frames
=
pad_signal
[
indices
.
astype
(
np
.
int32
,
copy
=
False
)]
return
frames
# In[ ]:
# In[ ]:
# In[ ]:
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment