feat: Audio Comparison

15fe6897 · Shehara AKGH - IT18205152 · b0e09f67 · 15fe6897
Commit 15fe6897 authored Jan 07, 2022 by Shehara AKGH - IT18205152
Hide whitespace changes
Inline Side-by-side

Showing with 153 additions and 0 deletions

BE-Pronunciation/Audio Comparison.py BE-Pronunciation/Audio Comparison.py +153 -0

No files found.
--- a/BE-Pronunciation/Audio Comparison.py
+++ b/BE-Pronunciation/Audio Comparison.py
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[1]:
+
+
+get_ipython().system('pip install librosa')
+
+
+# In[2]:
+
+
+import matplotlib.pyplot as plt
+import librosa
+import librosa.display
+import numpy as np
+import warnings
+warnings.filterwarnings('ignore')
+
+path = 'D:\Presently/test.wav'
+
+y, sr = librosa.load(path, duration=10)
+y_filt = librosa.effects.preemphasis(y)
+
+
+# In[3]:
+
+
+# and plot the results for comparison
+S_orig = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max, top_db=None)
+S_preemph = librosa.amplitude_to_db(np.abs(librosa.stft(y_filt)), ref=np.max, top_db=None)
+fig, ax = plt.subplots(nrows=2, sharex=True, sharey=True)
+librosa.display.specshow(S_orig, y_axis='log', x_axis='time', ax=ax[0])
+ax[0].set(title='Original signal')
+ax[0].label_outer()
+img = librosa.display.specshow(S_preemph, y_axis='log', x_axis='time', ax=ax[1])
+ax[1].set(title='Pre-emphasized signal')
+fig.colorbar(img, ax=ax, format="%+2.f dB")
+
+
+# #Apply pre-emphasis in pieces for block streaming.
+# #Note that the second block initializes zi with the final state zf returned by the first call.
+
+# In[4]:
+
+
+y_filt_1, zf = librosa.effects.preemphasis(y[:1000], return_zf=True)
+y_filt_2, zf = librosa.effects.preemphasis(y[1000:], zi=zf, return_zf=True)
+np.allclose(y_filt, np.concatenate([y_filt_1, y_filt_2]))
+
+
+# Framing and windowing of voice signals
+
+# In[21]:
+
+
+
+ def framing(sig, fs=16000, win_len=0.025, win_hop=0.01):
+     """
+     transform a signal into a series of overlapping frames.
+
+     Args:
+         sig            (array) : a mono audio signal (Nx1) from which to compute features.
+         fs               (int) : the sampling frequency of the signal we are working with.
+                                  Default is 16000.
+         win_len        (float) : window length in sec.
+                                  Default is 0.025.
+         win_hop        (float) : step between successive windows in sec.
+                                  Default is 0.01.
+
+     Returns:
+         array of frames.
+         frame length.
+     """
+
+
+# 
+#     # pre-emphasis
+#     if pre_emph:
+#         sig = pre_emphasis(sig=sig, pre_emph_coeff=0.97)
+#  
+#     # -> framing
+#     frames, frame_length = framing(sig=sig,
+#                                    fs=fs,
+#                                    win_len=win_len,
+#                                    win_hop=win_hop)
+#  
+#     # -> windowing
+#     windows = windowing(frames=frames,
+#                         frame_len=frame_length,
+#                         win_type=win_type)
+#  
+
+# In[25]:
+
+
+# compute frame length and frame step (convert from seconds to samples)
+
+
+frame_length = win_len * fs
+frame_step = win_hop * fs
+signal_length = len(sig)
+frames_overlap = frame_length - frame_step
+
+    # Make sure that we have at least 1 frame+
+
+num_frames = np.abs(signal_length - frames_overlap) // np.abs(frame_length - frames_overlap)
+rest_samples = np.abs(signal_length - frames_overlap) % np.abs(frame_length - frames_overlap)
+
+     # Pad Signal to make sure that all frames have equal number of samples
+     # without truncating any samples from the original signal
+        
+if rest_samples != 0:
+    pad_signal_length = int(frame_step - rest_samples)
+    z = np.zeros((pad_signal_length))
+    pad_signal = np.append(sig, z)
+    num_frames += 1
+else:
+    pad_signal = sig
+
+     # make sure to use integers as indices
+        
+frame_length = int(frame_length)
+frame_step = int(frame_step)
+num_frames = int(num_frames)
+
+     # compute indices
+    
+idx1 = np.tile(np.arange(0, frame_length), (num_frames, 1))
+idx2 = np.tile(np.arange(0, num_frames * frame_step, frame_step),
+            (frame_length, 1)).T
+indices = idx1 + idx2
+frames = pad_signal[indices.astype(np.int32, copy=False)]
+return frames
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+
+