feat: Audio Comparison

parent b0e09f67
#!/usr/bin/env python
# coding: utf-8
# In[1]:
get_ipython().system('pip install librosa')
# In[2]:
import matplotlib.pyplot as plt
import librosa
import librosa.display
import numpy as np
import warnings
warnings.filterwarnings('ignore')
path = 'D:\Presently/test.wav'
y, sr = librosa.load(path, duration=10)
y_filt = librosa.effects.preemphasis(y)
# In[3]:
# and plot the results for comparison
S_orig = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max, top_db=None)
S_preemph = librosa.amplitude_to_db(np.abs(librosa.stft(y_filt)), ref=np.max, top_db=None)
fig, ax = plt.subplots(nrows=2, sharex=True, sharey=True)
librosa.display.specshow(S_orig, y_axis='log', x_axis='time', ax=ax[0])
ax[0].set(title='Original signal')
ax[0].label_outer()
img = librosa.display.specshow(S_preemph, y_axis='log', x_axis='time', ax=ax[1])
ax[1].set(title='Pre-emphasized signal')
fig.colorbar(img, ax=ax, format="%+2.f dB")
# #Apply pre-emphasis in pieces for block streaming.
# #Note that the second block initializes zi with the final state zf returned by the first call.
# In[4]:
y_filt_1, zf = librosa.effects.preemphasis(y[:1000], return_zf=True)
y_filt_2, zf = librosa.effects.preemphasis(y[1000:], zi=zf, return_zf=True)
np.allclose(y_filt, np.concatenate([y_filt_1, y_filt_2]))
# Framing and windowing of voice signals
# In[21]:
def framing(sig, fs=16000, win_len=0.025, win_hop=0.01):
"""
transform a signal into a series of overlapping frames.
Args:
sig (array) : a mono audio signal (Nx1) from which to compute features.
fs (int) : the sampling frequency of the signal we are working with.
Default is 16000.
win_len (float) : window length in sec.
Default is 0.025.
win_hop (float) : step between successive windows in sec.
Default is 0.01.
Returns:
array of frames.
frame length.
"""
#
# # pre-emphasis
# if pre_emph:
# sig = pre_emphasis(sig=sig, pre_emph_coeff=0.97)
#
# # -> framing
# frames, frame_length = framing(sig=sig,
# fs=fs,
# win_len=win_len,
# win_hop=win_hop)
#
# # -> windowing
# windows = windowing(frames=frames,
# frame_len=frame_length,
# win_type=win_type)
#
# In[25]:
# compute frame length and frame step (convert from seconds to samples)
frame_length = win_len * fs
frame_step = win_hop * fs
signal_length = len(sig)
frames_overlap = frame_length - frame_step
# Make sure that we have at least 1 frame+
num_frames = np.abs(signal_length - frames_overlap) // np.abs(frame_length - frames_overlap)
rest_samples = np.abs(signal_length - frames_overlap) % np.abs(frame_length - frames_overlap)
# Pad Signal to make sure that all frames have equal number of samples
# without truncating any samples from the original signal
if rest_samples != 0:
pad_signal_length = int(frame_step - rest_samples)
z = np.zeros((pad_signal_length))
pad_signal = np.append(sig, z)
num_frames += 1
else:
pad_signal = sig
# make sure to use integers as indices
frame_length = int(frame_length)
frame_step = int(frame_step)
num_frames = int(num_frames)
# compute indices
idx1 = np.tile(np.arange(0, frame_length), (num_frames, 1))
idx2 = np.tile(np.arange(0, num_frames * frame_step, frame_step),
(frame_length, 1)).T
indices = idx1 + idx2
frames = pad_signal[indices.astype(np.int32, copy=False)]
return frames
# In[ ]:
# In[ ]:
# In[ ]:
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment