Commit 353fcd25 authored by Linisha Siriwardana's avatar Linisha Siriwardana

Test projects : background-noise-reduce

parent 2d26c54d
# noise_reduction
> See test results on: [https://dodiku.github.io/noise_reduction/](https://dodiku.github.io/noise_reduction/)
## Audio enhancements feature tests in Python3
#### Installation
To install:
1. ``$ brew install sox``
1. ``$ brew install vorbis-tools``
1. Create a virtualenv
1. Install dependencies in one of two options:
- manually *(recommended)*:
``$ pip3 install librosa``
``$ pip3 install pysndfx``
- or automatically using pip:
``$ pip3 install -r requirements.txt``
To run:
``$ python3 noise.py``
#### Interesting resources:
- LibROSA ([documentation](http://librosa.github.io/librosa/index.html) + [repository](https://github.com/librosa/librosa) + [paper](https://bmcfee.github.io/papers/scipy2015_librosa.pdf))
- Think DSP ([book](http://greenteapress.com/wp/think-dsp/) + [repository](https://github.com/AllenDowney/ThinkDSP/))
- Pyo ([blog post](http://www.matthieuamiguet.ch/blog/diy-guitar-effects-python) + [repository](https://github.com/belangeo/pyo))
- pysndfx ([repository](https://github.com/carlthome/python-audio-effects/tree/04dbee6063b0537b63346bb1e55deb03406e1170/pysndfx))
#### A bit less relevant papers:
- Noise Cancellation Method for Robust Speech Recognition ([PDF](http://research.ijcaonline.org/volume45/number11/pxc3879438.pdf))
- Robust Features for Noisy Speech Recognition using MFCC Computation from Magnitude Spectrum of Higher Order Autocorrelation Coefficients
([PDF](https://pdfs.semanticscholar.org/a483/5f28c02f07e6bef04ff9db948505dc990af7.pdf))
- Improving the Noise-Robustness of Mel-Frequency Cepstral Coefficients for Speech Processing
([PDF](http://www.sapaworkshops.org/2006/2006/papers/131.pdf))
This diff is collapsed.
import librosa
from pysndfx import AudioEffectsChain
import numpy as np
import math
import python_speech_features
import scipy as sp
from scipy import signal
import soundfile
# http://python-speech-features.readthedocs.io/en/latest/
# https://github.com/jameslyons/python_speech_features
# http://practicalcryptography.com/miscellaneous/machine-learning/guide-mel-frequency-cepstral-coefficients-mfccs/#deltas-and-delta-deltas
# http://dsp.stackexchange.com/search?q=noise+reduction/
'''------------------------------------
FILE READER:
receives filename,
returns audio time series (y) and sampling rate of y (sr)
------------------------------------'''
def read_file(file_name):
sample_file = file_name
sample_directory = '00_samples/'
sample_path = sample_directory + sample_file
# generating audio time series and a sampling rate (int)
y, sr = librosa.load(sample_path)
return y, sr
'''------------------------------------
NOISE REDUCTION USING POWER:
receives an audio matrix,
returns the matrix after gain reduction on noise
------------------------------------'''
def reduce_noise_power(y, sr):
cent = librosa.feature.spectral_centroid(y=y, sr=sr)
threshold_h = round(np.median(cent))*1.5
threshold_l = round(np.median(cent))*0.1
less_noise = AudioEffectsChain().lowshelf(gain=-30.0, frequency=threshold_l, slope=0.8).highshelf(gain=-12.0, frequency=threshold_h, slope=0.5)#.limiter(gain=6.0)
y_clean = less_noise(y)
return y_clean
'''------------------------------------
NOISE REDUCTION USING CENTROID ANALYSIS:
receives an audio matrix,
returns the matrix after gain reduction on noise
------------------------------------'''
def reduce_noise_centroid_s(y, sr):
cent = librosa.feature.spectral_centroid(y=y, sr=sr)
threshold_h = np.max(cent)
threshold_l = np.min(cent)
less_noise = AudioEffectsChain().lowshelf(gain=-12.0, frequency=threshold_l, slope=0.5).highshelf(gain=-12.0, frequency=threshold_h, slope=0.5).limiter(gain=6.0)
y_cleaned = less_noise(y)
return y_cleaned
def reduce_noise_centroid_mb(y, sr):
cent = librosa.feature.spectral_centroid(y=y, sr=sr)
threshold_h = np.max(cent)
threshold_l = np.min(cent)
less_noise = AudioEffectsChain().lowshelf(gain=-30.0, frequency=threshold_l, slope=0.5).highshelf(gain=-30.0, frequency=threshold_h, slope=0.5).limiter(gain=10.0)
# less_noise = AudioEffectsChain().lowpass(frequency=threshold_h).highpass(frequency=threshold_l)
y_cleaned = less_noise(y)
cent_cleaned = librosa.feature.spectral_centroid(y=y_cleaned, sr=sr)
columns, rows = cent_cleaned.shape
boost_h = math.floor(rows/3*2)
boost_l = math.floor(rows/6)
boost = math.floor(rows/3)
# boost_bass = AudioEffectsChain().lowshelf(gain=20.0, frequency=boost, slope=0.8)
boost_bass = AudioEffectsChain().lowshelf(gain=16.0, frequency=boost_h, slope=0.5)#.lowshelf(gain=-20.0, frequency=boost_l, slope=0.8)
y_clean_boosted = boost_bass(y_cleaned)
return y_clean_boosted
'''------------------------------------
NOISE REDUCTION USING MFCC:
receives an audio matrix,
returns the matrix after gain reduction on noise
------------------------------------'''
def reduce_noise_mfcc_down(y, sr):
hop_length = 512
## librosa
# mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
# librosa.mel_to_hz(mfcc)
## mfcc
mfcc = python_speech_features.base.mfcc(y)
mfcc = python_speech_features.base.logfbank(y)
mfcc = python_speech_features.base.lifter(mfcc)
sum_of_squares = []
index = -1
for r in mfcc:
sum_of_squares.append(0)
index = index + 1
for n in r:
sum_of_squares[index] = sum_of_squares[index] + n**2
strongest_frame = sum_of_squares.index(max(sum_of_squares))
hz = python_speech_features.base.mel2hz(mfcc[strongest_frame])
max_hz = max(hz)
min_hz = min(hz)
speech_booster = AudioEffectsChain().highshelf(frequency=min_hz*(-1)*1.2, gain=-12.0, slope=0.6).limiter(gain=8.0)
y_speach_boosted = speech_booster(y)
return (y_speach_boosted)
def reduce_noise_mfcc_up(y, sr):
hop_length = 512
## librosa
# mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
# librosa.mel_to_hz(mfcc)
## mfcc
mfcc = python_speech_features.base.mfcc(y)
mfcc = python_speech_features.base.logfbank(y)
mfcc = python_speech_features.base.lifter(mfcc)
sum_of_squares = []
index = -1
for r in mfcc:
sum_of_squares.append(0)
index = index + 1
for n in r:
sum_of_squares[index] = sum_of_squares[index] + n**2
strongest_frame = sum_of_squares.index(max(sum_of_squares))
hz = python_speech_features.base.mel2hz(mfcc[strongest_frame])
max_hz = max(hz)
min_hz = min(hz)
speech_booster = AudioEffectsChain().lowshelf(frequency=min_hz*(-1), gain=12.0, slope=0.5)#.highshelf(frequency=min_hz*(-1)*1.2, gain=-12.0, slope=0.5)#.limiter(gain=8.0)
y_speach_boosted = speech_booster(y)
return (y_speach_boosted)
'''------------------------------------
NOISE REDUCTION USING MEDIAN:
receives an audio matrix,
returns the matrix after gain reduction on noise
------------------------------------'''
def reduce_noise_median(y, sr):
y = sp.signal.medfilt(y,3)
return (y)
'''------------------------------------
SILENCE TRIMMER:
receives an audio matrix,
returns an audio matrix with less silence and the amout of time that was trimmed
------------------------------------'''
def trim_silence(y):
y_trimmed, index = librosa.effects.trim(y, top_db=20, frame_length=2, hop_length=500)
trimmed_length = librosa.get_duration(y) - librosa.get_duration(y_trimmed)
return y_trimmed, trimmed_length
'''------------------------------------
AUDIO ENHANCER:
receives an audio matrix,
returns the same matrix after audio manipulation
------------------------------------'''
def enhance(y):
apply_audio_effects = AudioEffectsChain().lowshelf(gain=10.0, frequency=260, slope=0.1).reverb(reverberance=25, hf_damping=5, room_scale=5, stereo_depth=50, pre_delay=20, wet_gain=0, wet_only=False)#.normalize()
y_enhanced = apply_audio_effects(y)
return y_enhanced
'''------------------------------------
OUTPUT GENERATOR:
receives a destination path, file name, audio matrix, and sample rate,
generates a wav file based on input
------------------------------------'''
def output_file(destination ,filename, y, sr, ext=""):
destination = destination + filename[:-4] + ext + '.wav'
librosa.output.write_wav(destination, y, sr)
'''------------------------------------
LOGIC:
[1] load file
[2] reduce noise
[3] trim silence
[4] output file
sample files:
01_counting.m4a
02_wind_and_cars.m4a
03_truck.m4a
04_voices.m4a
05_ambeint.m4a
06_office.m4a
------------------------------------'''
samples = ['01_counting.m4a','02_wind_and_cars.m4a','03_truck.m4a','04_voices.m4a','05_ambeint.m4a','06_office.m4a']
for s in samples:
# reading a file
filename = s
y, sr = read_file(filename)
# reducing noise using db power
y_reduced_power = reduce_noise_power(y, sr)
y_reduced_centroid_s = reduce_noise_centroid_s(y, sr)
y_reduced_centroid_mb = reduce_noise_centroid_mb(y, sr)
y_reduced_mfcc_up = reduce_noise_mfcc_up(y, sr)
y_reduced_mfcc_down = reduce_noise_mfcc_down(y, sr)
y_reduced_median = reduce_noise_median(y, sr)
# trimming silences
y_reduced_power, time_trimmed = trim_silence(y_reduced_power)
# print (time_trimmed)
y_reduced_centroid_s, time_trimmed = trim_silence(y_reduced_centroid_s)
# print (time_trimmed)
y_reduced_power, time_trimmed = trim_silence(y_reduced_power)
# print (time_trimmed)
y_reduced_centroid_mb, time_trimmed = trim_silence(y_reduced_centroid_mb)
# print (time_trimmed)
y_reduced_mfcc_up, time_trimmed = trim_silence(y_reduced_mfcc_up)
# print (time_trimmed)
y_reduced_mfcc_down, time_trimmed = trim_silence(y_reduced_mfcc_down)
# print (time_trimmed)
y_reduced_median, time_trimmed = trim_silence(y_reduced_median)
# generating output file [1]
output_file('01_samples_trimmed_noise_reduced/' ,filename, y_reduced_power, sr, '_pwr')
output_file('01_samples_trimmed_noise_reduced/' ,filename, y_reduced_centroid_s, sr, '_ctr_s')
output_file('01_samples_trimmed_noise_reduced/' ,filename, y_reduced_centroid_mb, sr, '_ctr_mb')
output_file('01_samples_trimmed_noise_reduced/' ,filename, y_reduced_mfcc_up, sr, '_mfcc_up')
output_file('01_samples_trimmed_noise_reduced/' ,filename, y_reduced_mfcc_down, sr, '_mfcc_down')
output_file('01_samples_trimmed_noise_reduced/' ,filename, y_reduced_median, sr, '_median')
output_file('01_samples_trimmed_noise_reduced/' ,filename, y, sr, '_org')
appdirs==1.4.3
audioread==2.1.4
Cython==0.25.2
decorator==4.0.11
joblib==0.11
librosa==0.5.0
numpy==1.12.1
packaging==16.8
pyparsing==2.2.0
pysndfx==0.1.0
python-speech-features==0.5
resampy==0.1.5
scikit-learn==0.18.1
scipy==0.19.0
six==1.10.0
html {
background-color: LightSeaGreen;
}
body {
font-family: "Courier New", Courier, monospace;
color: #222;
font-size: 0.9em;
}
h1 {
font-size: 4em;
text-align: center;
margin-bottom: 0.5em;
margin-top: 2em;
}
h2 {
font-size: 2.2em;
font-weight:normal;
text-align: center;
margin-bottom: 48px;
margin-top: 0px;
}
.text_container {
margin: auto;
margin-top: 76px;
max-width: 600px;
line-height: 2em;
margin-bottom: 100px;
}
h3 {
font-size: 1.6em;
font-weight: bold;
/* text-align: center; */
margin-bottom: 0.5em;
margin-top: 4em;
}
h4 {
font-size: 1.2em;
/*font-weight: bold;*/
/* text-align: center; */
margin-bottom: 0.5em;
margin-top: 4em;
}
.code{
border: 0.5px solid lightgray;
background-color: GhostWhite;
border-radius: 3px;
padding-top: 4px;
padding-bottom: 4px;
padding-right: 12px;
padding-left: 12px;
color: gray;
}
.duration{
color: rgba(34,34,34,0.6);
font-style: italic;
/*margin-top: -20px;*/
}
ul {
line-height: 36px;
}
footer {
text-align: center;
font-size: 14px;
margin-top: 128px;
margin-bottom: 64px;
color: rgba(255, 255, 255, 0.4);
}
import librosa
'''
01_counting.m4a
02_wind_and_cars.m4a
03_truck.m4a
04_voices.m4a
05_ambeint.m4a
06_office.m4a
'''
sample_file = '06_office.m4a'
sample_directory = '00_samples/'
sample_path = sample_directory + sample_file
trimmed_destination = 'samples_trimmed/'
silenced_destination = 'samples_silence_reduced/'
y, sr = librosa.load(sample_path)
y_trimmed, index = librosa.effects.trim(y, top_db=12, frame_length=2)
print(librosa.get_duration(y), librosa.get_duration(y_trimmed))
destination = trimmed_destination + sample_file[:-4] + '.wav'
librosa.output.write_wav(destination, y_trimmed, sr)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment