Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
2023-286
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
2023-286
2023-286
Commits
659f9e8a
Commit
659f9e8a
authored
May 27, 2023
by
Pulasthi Tharaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
upload preprocessing files
parent
2d0a9ddc
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
229 additions
and
0 deletions
+229
-0
Training_models/Preprocessing and Training_voices.py
Training_models/Preprocessing and Training_voices.py
+229
-0
No files found.
Training_models/Preprocessing and Training_voices.py
0 → 100644
View file @
659f9e8a
# -*- coding: utf-8 -*-
"""Untitled1.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1VT1vD9lWM4WJeb0Y6JmRbsH89yR5C2dE
"""
import
os
import
torch
import
torchaudio
import
torch.nn
as
nn
import
torch.optim
as
optim
from
torch.utils.data
import
Dataset
,
DataLoader
# Set the paths to your dataset folders
data_dir
=
'drive/MyDrive/DatasetNew'
# Set the dimensions and parameters for audio processing
sample_rate
=
44100
# Adjust based on your dataset
duration
=
4
# Adjust based on your dataset
num_classes
=
len
(
os
.
listdir
(
data_dir
))
batch_size
=
32
# Custom dataset class
class
VoiceDataset
(
Dataset
):
def
_init_
(
self
,
data_dir
):
self
.
data_dir
=
data_dir
self
.
file_list
=
[]
self
.
labels
=
[]
class_labels
=
sorted
(
os
.
listdir
(
data_dir
))
for
i
,
label
in
enumerate
(
class_labels
):
class_dir
=
os
.
path
.
join
(
data_dir
,
label
)
for
audio_file
in
os
.
listdir
(
class_dir
):
audio_path
=
os
.
path
.
join
(
class_dir
,
audio_file
)
self
.
file_list
.
append
(
audio_path
)
self
.
labels
.
append
(
i
)
def
_getitem_
(
self
,
index
):
audio_path
=
self
.
file_list
[
index
]
waveform
,
_
=
torchaudio
.
load
(
audio_path
,
num_frames
=
duration
*
sample_rate
)
label
=
self
.
labels
[
index
]
return
waveform
,
label
def
_len_
(
self
):
return
len
(
self
.
file_list
)
class
VoiceModel
(
nn
.
Module
):
def
__init__
(
self
,
num_classes
):
super
(
VoiceModel
,
self
)
.
__init__
()
self
.
conv1
=
nn
.
Conv1d
(
1
,
32
,
kernel_size
=
3
,
stride
=
2
)
self
.
relu1
=
nn
.
ReLU
()
self
.
conv2
=
nn
.
Conv1d
(
32
,
64
,
kernel_size
=
3
,
stride
=
2
)
self
.
relu2
=
nn
.
ReLU
()
self
.
flatten
=
nn
.
Flatten
()
self
.
fc1
=
nn
.
Linear
(
64
*
25
,
128
)
self
.
relu3
=
nn
.
ReLU
()
self
.
fc2
=
nn
.
Linear
(
128
,
num_classes
)
self
.
softmax
=
nn
.
Softmax
(
dim
=
1
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
relu1
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
relu2
(
x
)
x
=
self
.
flatten
(
x
)
x
=
self
.
fc1
(
x
)
x
=
self
.
relu3
(
x
)
x
=
self
.
fc2
(
x
)
x
=
self
.
softmax
(
x
)
return
x
# Initialize the model
model
=
VoiceModel
(
num_classes
)
# Define the loss function and optimizer
criterion
=
nn
.
CrossEntropyLoss
()
optimizer
=
optim
.
Adam
(
model
.
parameters
(),
lr
=
0.001
)
# Train the model
def
collate_fn
(
batch
):
# Get the maximum length of the waveforms in the batch
max_length
=
max
([
waveform
.
size
(
1
)
for
waveform
,
label
in
batch
])
# Resize all waveforms to the maximum length
batch
=
[(
F
.
pad
(
waveform
,
(
0
,
max_length
-
waveform
.
size
(
1
))),
label
)
for
waveform
,
label
in
batch
]
# Stack the waveforms and labels
waveforms
=
torch
.
stack
([
waveform
for
waveform
,
label
in
batch
],
dim
=
0
)
labels
=
torch
.
stack
([
label
for
waveform
,
label
in
batch
],
dim
=
0
)
return
waveforms
,
labels
# Assuming you have a dataset called 'dataset' and have defined appropriate transformations
batch_size
=
32
# Specify your desired batch size
shuffle
=
True
# Specify whether you want to shuffle the data
num_workers
=
4
# Specify the number of worker processes for data loading
dataloader
=
torch
.
utils
.
data
.
DataLoader
(
data_dir
,
batch_size
=
batch_size
,
shuffle
=
shuffle
,
num_workers
=
num_workers
)
# Evaluate the model
model
.
eval
()
total
=
0
correct
=
0
with
torch
.
no_grad
():
for
i
,
(
waveform
,
label
)
in
enumerate
(
dataloader
):
waveform
=
waveform
.
to
(
device
)
label
=
label
.
to
(
device
)
outputs
=
model
(
waveform
.
unsqueeze
(
1
))
_
,
predicted
=
torch
.
max
(
outputs
.
data
,
1
)
total
+=
label
.
size
(
0
)
correct
+=
(
predicted
==
label
)
.
sum
()
.
item
()
accuracy
=
100
*
correct
/
total
print
(
'Accuracy:'
,
accuracy
)
import
os
import
torch
import
torchaudio
import
torch.nn
as
nn
import
torch.optim
as
optim
from
torch.utils.data
import
Dataset
,
DataLoader
import
torch.nn.functional
as
F
# Set the paths to your dataset folders
data_dir
=
'drive/MyDrive/DatasetNew'
# Set the dimensions and parameters for audio processing
sample_rate
=
44100
# Adjust based on your dataset
duration
=
4
# Adjust based on your dataset
num_classes
=
len
(
os
.
listdir
(
data_dir
))
batch_size
=
32
# Custom dataset class
class
VoiceDataset
(
Dataset
):
def
__init__
(
self
,
data_dir
):
self
.
data_dir
=
data_dir
self
.
file_list
=
[]
self
.
labels
=
[]
class_labels
=
sorted
(
os
.
listdir
(
data_dir
))
for
i
,
label
in
enumerate
(
class_labels
):
class_dir
=
os
.
path
.
join
(
data_dir
,
label
)
for
audio_file
in
os
.
listdir
(
class_dir
):
audio_path
=
os
.
path
.
join
(
class_dir
,
audio_file
)
self
.
file_list
.
append
(
audio_path
)
self
.
labels
.
append
(
i
)
def
__getitem__
(
self
,
index
):
audio_path
=
self
.
file_list
[
index
]
waveform
,
_
=
torchaudio
.
load
(
audio_path
,
num_frames
=
duration
*
sample_rate
)
label
=
self
.
labels
[
index
]
return
waveform
,
label
def
__len__
(
self
):
return
len
(
self
.
file_list
)
class
VoiceModel
(
nn
.
Module
):
def
__init__
(
self
,
num_classes
):
super
(
VoiceModel
,
self
)
.
__init__
()
self
.
conv1
=
nn
.
Conv1d
(
1
,
32
,
kernel_size
=
3
,
stride
=
2
)
self
.
relu1
=
nn
.
ReLU
()
self
.
conv2
=
nn
.
Conv1d
(
32
,
64
,
kernel_size
=
3
,
stride
=
2
)
self
.
relu2
=
nn
.
ReLU
()
self
.
flatten
=
nn
.
Flatten
()
self
.
fc1
=
nn
.
Linear
(
64
*
25
,
128
)
self
.
relu3
=
nn
.
ReLU
()
self
.
fc2
=
nn
.
Linear
(
128
,
num_classes
)
self
.
softmax
=
nn
.
Softmax
(
dim
=
1
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
relu1
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
relu2
(
x
)
x
=
self
.
flatten
(
x
)
x
=
self
.
fc1
(
x
)
x
=
self
.
relu3
(
x
)
x
=
self
.
fc2
(
x
)
x
=
self
.
softmax
(
x
)
return
x
# Initialize the model
model
=
VoiceModel
(
num_classes
)
# Define the loss function and optimizer
criterion
=
nn
.
CrossEntropyLoss
()
optimizer
=
optim
.
Adam
(
model
.
parameters
(),
lr
=
0.001
)
# Train the model
def
collate_fn
(
batch
):
# Get the maximum length of the waveforms in the batch
max_length
=
max
([
waveform
.
size
(
0
)
for
waveform
,
label
in
batch
])
# Pad the waveforms to the maximum length
padded_batch
=
[]
for
waveform
,
label
in
batch
:
padded_waveform
=
F
.
pad
(
waveform
,
(
0
,
max_length
-
waveform
.
size
(
0
)))
padded_batch
.
append
((
padded_waveform
,
label
))
# Stack the padded waveforms and labels
waveforms
=
torch
.
stack
([
waveform
for
waveform
,
label
in
padded_batch
],
dim
=
0
)
labels
=
torch
.
stack
([
label
for
waveform
,
label
in
padded_batch
],
dim
=
0
)
return
waveforms
,
labels
# Create the dataset and dataloader
dataset
=
VoiceDataset
(
data_dir
)
dataloader
=
torch
.
utils
.
data
.
DataLoader
(
dataset
,
batch_size
=
batch_size
,
shuffle
=
True
,
num_workers
=
4
,
collate_fn
=
collate_fn
)
# Evaluate the model
model
.
eval
()
total
=
0
correct
=
0
with
torch
.
no_grad
():
for
i
,
(
waveform
,
label
)
in
enumerate
(
dataloader
):
outputs
=
model
(
waveform
.
unsqueeze
(
1
))
_
,
predicted
=
torch
.
max
(
outputs
.
data
,
1
)
total
+=
label
.
size
(
0
)
correct
+=
(
predicted
==
label
)
.
sum
()
.
item
()
accuracy
=
100
*
correct
/
total
print
(
'Accuracy:'
,
accuracy
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment