Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
I
Intelligent English Tutor
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
2023-24-027
Intelligent English Tutor
Commits
0a50abe9
Commit
0a50abe9
authored
Nov 07, 2023
by
Piumi Navoda
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
train the model
parent
4cab3664
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
198 additions
and
0 deletions
+198
-0
voicerecognizion/train.py
voicerecognizion/train.py
+198
-0
No files found.
voicerecognizion/train.py
0 → 100644
View file @
0a50abe9
import
json
import
numpy
as
np
import
tensorflow
as
tf
import
matplotlib.pyplot
as
plt
from
sklearn.model_selection
import
train_test_split
DATA_PATH
=
"data.json"
SAVED_MODEL_PATH
=
"model.h5"
EPOCHS
=
40
BATCH_SIZE
=
32
PATIENCE
=
5
LEARNING_RATE
=
0.0001
def
load_data
(
data_path
):
"""Loads training dataset from json file.
:param data_path (str): Path to json file containing data
:return X (ndarray): Inputs
:return y (ndarray): Targets
"""
with
open
(
data_path
,
"r"
)
as
fp
:
data
=
json
.
load
(
fp
)
X
=
np
.
array
(
data
[
"MFCCs"
])
y
=
np
.
array
(
data
[
"labels"
])
print
(
"Training sets loaded!"
)
return
X
,
y
def
prepare_dataset
(
data_path
,
test_size
=
0.2
,
validation_size
=
0.2
):
"""Creates train, validation and test sets.
:param data_path (str): Path to json file containing data
:param test_size (flaot): Percentage of dataset used for testing
:param validation_size (float): Percentage of train set used for cross-validation
:return X_train (ndarray): Inputs for the train set
:return y_train (ndarray): Targets for the train set
:return X_validation (ndarray): Inputs for the validation set
:return y_validation (ndarray): Targets for the validation set
:return X_test (ndarray): Inputs for the test set
:return X_test (ndarray): Targets for the test set
"""
# load dataset
X
,
y
=
load_data
(
data_path
)
# create train, validation, test split
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
test_size
)
X_train
,
X_validation
,
y_train
,
y_validation
=
train_test_split
(
X_train
,
y_train
,
test_size
=
validation_size
)
# add an axis to nd array
X_train
=
X_train
[
...
,
np
.
newaxis
]
X_test
=
X_test
[
...
,
np
.
newaxis
]
X_validation
=
X_validation
[
...
,
np
.
newaxis
]
return
X_train
,
y_train
,
X_validation
,
y_validation
,
X_test
,
y_test
def
build_model
(
input_shape
,
loss
=
"sparse_categorical_crossentropy"
,
learning_rate
=
0.0001
):
"""Build neural network using keras.
:param input_shape (tuple): Shape of array representing a sample train. E.g.: (44, 13, 1)
:param loss (str): Loss function to use
:param learning_rate (float):
:return model: TensorFlow model
"""
# build network architecture using convolutional layers
model
=
tf
.
keras
.
models
.
Sequential
()
# 1st conv layer
model
.
add
(
tf
.
keras
.
layers
.
Conv2D
(
64
,
(
3
,
3
),
activation
=
'relu'
,
input_shape
=
input_shape
,
kernel_regularizer
=
tf
.
keras
.
regularizers
.
l2
(
0.001
)))
model
.
add
(
tf
.
keras
.
layers
.
BatchNormalization
())
model
.
add
(
tf
.
keras
.
layers
.
MaxPooling2D
((
3
,
3
),
strides
=
(
2
,
2
),
padding
=
'same'
))
# 2nd conv layer
model
.
add
(
tf
.
keras
.
layers
.
Conv2D
(
32
,
(
3
,
3
),
activation
=
'relu'
,
kernel_regularizer
=
tf
.
keras
.
regularizers
.
l2
(
0.001
)))
model
.
add
(
tf
.
keras
.
layers
.
BatchNormalization
())
model
.
add
(
tf
.
keras
.
layers
.
MaxPooling2D
((
3
,
3
),
strides
=
(
2
,
2
),
padding
=
'same'
))
# 3rd conv layer
model
.
add
(
tf
.
keras
.
layers
.
Conv2D
(
32
,
(
2
,
2
),
activation
=
'relu'
,
kernel_regularizer
=
tf
.
keras
.
regularizers
.
l2
(
0.001
)))
model
.
add
(
tf
.
keras
.
layers
.
BatchNormalization
())
model
.
add
(
tf
.
keras
.
layers
.
MaxPooling2D
((
2
,
2
),
strides
=
(
2
,
2
),
padding
=
'same'
))
# flatten output and feed into dense layer
model
.
add
(
tf
.
keras
.
layers
.
Flatten
())
model
.
add
(
tf
.
keras
.
layers
.
Dense
(
64
,
activation
=
'relu'
))
tf
.
keras
.
layers
.
Dropout
(
0.3
)
# softmax output layer
model
.
add
(
tf
.
keras
.
layers
.
Dense
(
33
,
activation
=
'softmax'
))
optimiser
=
tf
.
optimizers
.
Adam
(
learning_rate
=
learning_rate
)
# compile model
model
.
compile
(
optimizer
=
optimiser
,
loss
=
loss
,
metrics
=
[
"accuracy"
])
# print model parameters on console
model
.
summary
()
return
model
def
train
(
model
,
epochs
,
batch_size
,
patience
,
X_train
,
y_train
,
X_validation
,
y_validation
):
"""Trains model
:param epochs (int): Num training epochs
:param batch_size (int): Samples per batch
:param patience (int): Num epochs to wait before early stop, if there isn't an improvement on accuracy
:param X_train (ndarray): Inputs for the train set
:param y_train (ndarray): Targets for the train set
:param X_validation (ndarray): Inputs for the validation set
:param y_validation (ndarray): Targets for the validation set
:return history: Training history
"""
earlystop_callback
=
tf
.
keras
.
callbacks
.
EarlyStopping
(
monitor
=
"accuracy"
,
min_delta
=
0.001
,
patience
=
patience
)
# train model
history
=
model
.
fit
(
X_train
,
y_train
,
epochs
=
epochs
,
batch_size
=
batch_size
,
validation_data
=
(
X_validation
,
y_validation
),
callbacks
=
[
earlystop_callback
])
return
history
def
plot_history
(
history
):
"""Plots accuracy/loss for training/validation set as a function of the epochs
:param history: Training history of model
:return:
"""
fig
,
axs
=
plt
.
subplots
(
2
)
# create accuracy subplot
axs
[
0
]
.
plot
(
history
.
history
[
"accuracy"
],
label
=
"accuracy"
)
axs
[
0
]
.
plot
(
history
.
history
[
'val_accuracy'
],
label
=
"val_accuracy"
)
axs
[
0
]
.
set_ylabel
(
"Accuracy"
)
axs
[
0
]
.
legend
(
loc
=
"lower right"
)
axs
[
0
]
.
set_title
(
"Accuracy evaluation"
)
# create loss subplot
axs
[
1
]
.
plot
(
history
.
history
[
"loss"
],
label
=
"loss"
)
axs
[
1
]
.
plot
(
history
.
history
[
'val_loss'
],
label
=
"val_loss"
)
axs
[
1
]
.
set_xlabel
(
"Epoch"
)
axs
[
1
]
.
set_ylabel
(
"Loss"
)
axs
[
1
]
.
legend
(
loc
=
"upper right"
)
axs
[
1
]
.
set_title
(
"Loss evaluation"
)
plt
.
show
()
def
main
():
# generate train, validation and test sets
X_train
,
y_train
,
X_validation
,
y_validation
,
X_test
,
y_test
=
prepare_dataset
(
DATA_PATH
)
# create network
input_shape
=
(
X_train
.
shape
[
1
],
X_train
.
shape
[
2
],
1
)
model
=
build_model
(
input_shape
,
learning_rate
=
LEARNING_RATE
)
# train network
history
=
train
(
model
,
EPOCHS
,
BATCH_SIZE
,
PATIENCE
,
X_train
,
y_train
,
X_validation
,
y_validation
)
# plot accuracy/loss for training/validation set as a function of the epochs
plot_history
(
history
)
# evaluate network on test set
test_loss
,
test_acc
=
model
.
evaluate
(
X_test
,
y_test
)
print
(
"
\n
Test loss: {}, test accuracy: {}"
.
format
(
test_loss
,
100
*
test_acc
))
# save model
model
.
save
(
SAVED_MODEL_PATH
)
if
__name__
==
"__main__"
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment