Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
2022-066
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
2022-066
2022-066
Commits
e29e7ea8
Commit
e29e7ea8
authored
May 12, 2022
by
Tandin Wangchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
trying out initial implementation
parent
d51f7fd3
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
256 additions
and
0 deletions
+256
-0
capturing_audio_noise/audio_signal.py
capturing_audio_noise/audio_signal.py
+24
-0
capturing_audio_noise/listenaudio.py
capturing_audio_noise/listenaudio.py
+8
-0
converting2waFormate/wavConvert.py
converting2waFormate/wavConvert.py
+10
-0
silenceremover/silenceremove.py
silenceremover/silenceremove.py
+147
-0
silenceremover/silenceremover1.py
silenceremover/silenceremover1.py
+21
-0
splitAudioFiles/split.py
splitAudioFiles/split.py
+31
-0
video2audio_ffmpeg.py
video2audio_ffmpeg.py
+15
-0
No files found.
capturing_audio_noise/audio_signal.py
0 → 100644
View file @
e29e7ea8
from
scipy.io.wavfile
import
read
import
numpy
as
np
import
matplotlib.pyplot
as
plt
# Read the Audiofile
samplerate
,
data
=
read
(
'/home/tandin/notebook/Research/videosrc/myvoice.wav'
)
# Frame rate for the Audio
print
(
samplerate
)
# Duration of the audio in Seconds
duration
=
len
(
data
)
/
samplerate
print
(
"Duration of Audio in Seconds"
,
duration
)
print
(
"Duration of Audio in Minutes"
,
duration
/
60
)
time
=
np
.
arange
(
0
,
duration
,
1
/
samplerate
)
# Plotting the Graph using Matplotlib
plt
.
plot
(
time
,
data
)
plt
.
xlabel
(
'Time [s]'
)
plt
.
ylabel
(
'Amplitude'
)
plt
.
title
(
'data1.wav'
)
plt
.
show
()
\ No newline at end of file
capturing_audio_noise/listenaudio.py
0 → 100644
View file @
e29e7ea8
# Import packages
from
pydub
import
AudioSegment
from
pydub.playback
import
play
# Play audio
playaudio
=
AudioSegment
.
from_file
(
"/home/tandin/notebook/Research/videosrc/IT4010ResearchProject34384144_full_video.mp3"
,
format
=
"mp3"
)
play
(
playaudio
)
\ No newline at end of file
converting2waFormate/wavConvert.py
0 → 100644
View file @
e29e7ea8
from
os
import
path
from
pydub
import
AudioSegment
# files
src
=
"/home/tandin/notebook/Research/videosrc/try1.mp3"
dst
=
"/home/tandin/notebook/Research/videosrc/myvoice.wav"
# convert wav to mp3
sound
=
AudioSegment
.
from_mp3
(
src
)
sound
.
export
(
dst
,
format
=
"wav"
)
\ No newline at end of file
silenceremover/silenceremove.py
0 → 100644
View file @
e29e7ea8
import
collections
import
contextlib
import
sys
import
wave
import
webrtcvad
def
read_wave
(
path
):
"""Reads a .wav file.
Takes the path, and returns (PCM audio data, sample rate).
"""
with
contextlib
.
closing
(
wave
.
open
(
path
,
'rb'
))
as
wf
:
num_channels
=
wf
.
getnchannels
()
assert
num_channels
==
1
sample_width
=
wf
.
getsampwidth
()
assert
sample_width
==
2
sample_rate
=
wf
.
getframerate
()
assert
sample_rate
in
(
8000
,
16000
,
32000
,
48000
)
pcm_data
=
wf
.
readframes
(
wf
.
getnframes
())
return
pcm_data
,
sample_rate
def
write_wave
(
path
,
audio
,
sample_rate
):
"""Writes a .wav file.
Takes path, PCM audio data, and sample rate.
"""
with
contextlib
.
closing
(
wave
.
open
(
path
,
'wb'
))
as
wf
:
wf
.
setnchannels
(
1
)
wf
.
setsampwidth
(
2
)
wf
.
setframerate
(
sample_rate
)
wf
.
writeframes
(
audio
)
class
Frame
(
object
):
"""Represents a "frame" of audio data."""
def
__init__
(
self
,
bytes
,
timestamp
,
duration
):
self
.
bytes
=
bytes
self
.
timestamp
=
timestamp
self
.
duration
=
duration
def
frame_generator
(
frame_duration_ms
,
audio
,
sample_rate
):
"""Generates audio frames from PCM audio data.
Takes the desired frame duration in milliseconds, the PCM data, and
the sample rate.
Yields Frames of the requested duration.
"""
n
=
int
(
sample_rate
*
(
frame_duration_ms
/
1000.0
)
*
2
)
offset
=
0
timestamp
=
0.0
duration
=
(
float
(
n
)
/
sample_rate
)
/
2.0
while
offset
+
n
<
len
(
audio
):
yield
Frame
(
audio
[
offset
:
offset
+
n
],
timestamp
,
duration
)
timestamp
+=
duration
offset
+=
n
def
vad_collector
(
sample_rate
,
frame_duration_ms
,
padding_duration_ms
,
vad
,
frames
):
"""Filters out non-voiced audio frames.
Given a webrtcvad.Vad and a source of audio frames, yields only
the voiced audio.
Uses a padded, sliding window algorithm over the audio frames.
When more than 90
%
of the frames in the window are voiced (as
reported by the VAD), the collector triggers and begins yielding
audio frames. Then the collector waits until 90
%
of the frames in
the window are unvoiced to detrigger.
The window is padded at the front and back to provide a small
amount of silence or the beginnings/endings of speech around the
voiced frames.
Arguments:
sample_rate - The audio sample rate, in Hz.
frame_duration_ms - The frame duration in milliseconds.
padding_duration_ms - The amount to pad the window, in milliseconds.
vad - An instance of webrtcvad.Vad.
frames - a source of audio frames (sequence or generator).
Returns: A generator that yields PCM audio data.
"""
num_padding_frames
=
int
(
padding_duration_ms
/
frame_duration_ms
)
# We use a deque for our sliding window/ring buffer.
ring_buffer
=
collections
.
deque
(
maxlen
=
num_padding_frames
)
# We have two states: TRIGGERED and NOTTRIGGERED. We start in the
# NOTTRIGGERED state.
triggered
=
False
voiced_frames
=
[]
for
frame
in
frames
:
is_speech
=
vad
.
is_speech
(
frame
.
bytes
,
sample_rate
)
sys
.
stdout
.
write
(
'1'
if
is_speech
else
'0'
)
if
not
triggered
:
ring_buffer
.
append
((
frame
,
is_speech
))
num_voiced
=
len
([
f
for
f
,
speech
in
ring_buffer
if
speech
])
# If we're NOTTRIGGERED and more than 90% of the frames in
# the ring buffer are voiced frames, then enter the
# TRIGGERED state.
if
num_voiced
>
0.9
*
ring_buffer
.
maxlen
:
triggered
=
True
sys
.
stdout
.
write
(
'+(
%
s)'
%
(
ring_buffer
[
0
][
0
]
.
timestamp
,))
# We want to yield all the audio we see from now until
# we are NOTTRIGGERED, but we have to start with the
# audio that's already in the ring buffer.
for
f
,
s
in
ring_buffer
:
voiced_frames
.
append
(
f
)
ring_buffer
.
clear
()
else
:
# We're in the TRIGGERED state, so collect the audio data
# and add it to the ring buffer.
voiced_frames
.
append
(
frame
)
ring_buffer
.
append
((
frame
,
is_speech
))
num_unvoiced
=
len
([
f
for
f
,
speech
in
ring_buffer
if
not
speech
])
# If more than 90% of the frames in the ring buffer are
# unvoiced, then enter NOTTRIGGERED and yield whatever
# audio we've collected.
if
num_unvoiced
>
0.9
*
ring_buffer
.
maxlen
:
sys
.
stdout
.
write
(
'-(
%
s)'
%
(
frame
.
timestamp
+
frame
.
duration
))
triggered
=
False
yield
b
''
.
join
([
f
.
bytes
for
f
in
voiced_frames
])
ring_buffer
.
clear
()
voiced_frames
=
[]
if
triggered
:
sys
.
stdout
.
write
(
'-(
%
s)'
%
(
frame
.
timestamp
+
frame
.
duration
))
sys
.
stdout
.
write
(
'
\n
'
)
# If we have any leftover voiced audio when we run out of input,
# yield it.
if
voiced_frames
:
yield
b
''
.
join
([
f
.
bytes
for
f
in
voiced_frames
])
def
main
(
args
):
if
len
(
args
)
!=
2
:
sys
.
stderr
.
write
(
'Usage: example.py <aggressiveness> <path to wav file>
\n
'
)
sys
.
exit
(
1
)
audio
,
sample_rate
=
read_wave
(
args
[
1
])
vad
=
webrtcvad
.
Vad
(
int
(
args
[
0
]))
frames
=
frame_generator
(
30
,
audio
,
sample_rate
)
frames
=
list
(
frames
)
segments
=
vad_collector
(
sample_rate
,
30
,
300
,
vad
,
frames
)
for
i
,
segment
in
enumerate
(
segments
):
path
=
'chunk-
%002
d.wav'
%
(
i
,)
print
(
' Writing
%
s'
%
(
path
,))
write_wave
(
path
,
segment
,
sample_rate
)
if
__name__
==
'__main__'
:
main
(
sys
.
argv
[
1
:])
\ No newline at end of file
silenceremover/silenceremover1.py
0 → 100644
View file @
e29e7ea8
from
pydub
import
AudioSegment
from
pydub.silence
import
split_on_silence
# Variables for the audio file
file_path
=
"/home/tandin/notebook/Research/splitAudioFiles/splitaudio1/chunk0.mp4"
file_name
=
file_path
.
split
(
'/'
)[
-
1
]
audio_format
=
"mp4"
# Reading and splitting the audio file into chunks
sound
=
AudioSegment
.
from_file
(
file_path
,
format
=
audio_format
)
audio_chunks
=
split_on_silence
(
sound
,
min_silence_len
=
100
,
silence_thresh
=
-
45
,
keep_silence
=
50
)
# Putting the file back together
combined
=
AudioSegment
.
empty
()
for
chunk
in
audio_chunks
:
combined
+=
chunk
combined
.
export
(
f
'/home/tandin/notebook/Research/silenceremover{file_name}'
,
format
=
audio_format
)
\ No newline at end of file
splitAudioFiles/split.py
0 → 100644
View file @
e29e7ea8
from
pydub
import
AudioSegment
import
os
if
not
os
.
path
.
isdir
(
"splitaudio"
):
os
.
mkdir
(
"splitaudio"
)
audio
=
AudioSegment
.
from_file
(
"/home/tandin/notebook/Research/videosrc/IT4010ResearchProject34384144_full_video.mp4"
)
lengthaudio
=
len
(
audio
)
print
(
"Length of Audio File"
,
lengthaudio
)
start
=
0
# # In Milliseconds, this will cut 10 Sec of audio(1 Sec = 1000 milliseconds)
threshold
=
300000
end
=
0
counter
=
0
while
start
<
len
(
audio
):
end
+=
threshold
print
(
start
,
end
)
chunk
=
audio
[
start
:
end
]
filename
=
f
'splitaudio1/chunk{counter}.mp4'
chunk
.
export
(
filename
,
format
=
"mp4"
)
counter
+=
1
start
+=
threshold
\ No newline at end of file
video2audio_ffmpeg.py
0 → 100644
View file @
e29e7ea8
import
subprocess
import
os
import
sys
def
convert_video_to_audio_ffmpeg
(
video_file
,
output_ext
=
"mp3"
):
"""Converts video to audio directly using `ffmpeg` command
with the help of subprocess module"""
filename
,
ext
=
os
.
path
.
splitext
(
video_file
)
subprocess
.
call
([
"ffmpeg"
,
"-y"
,
"-i"
,
video_file
,
f
"{filename}.{output_ext}"
],
stdout
=
subprocess
.
DEVNULL
,
stderr
=
subprocess
.
STDOUT
)
if
__name__
==
"__main__"
:
vf
=
sys
.
argv
[
1
]
convert_video_to_audio_ffmpeg
(
vf
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment