Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
2023-362
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
2023-362
2023-362
Commits
44fbceb5
Commit
44fbceb5
authored
Sep 13, 2023
by
Thathsarani R.P.H.S.R
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
model file uploaded.
parent
c0155364
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
213 additions
and
0 deletions
+213
-0
IT20201364/CNN_Model_Train.py
IT20201364/CNN_Model_Train.py
+93
-0
IT20201364/Dialogue_Model_TRain.py
IT20201364/Dialogue_Model_TRain.py
+120
-0
No files found.
IT20201364/CNN_Model_Train.py
0 → 100644
View file @
44fbceb5
import
os
import
warnings
import
librosa.display
import
numpy
as
np
from
keras.layers
import
Dense
,
Conv2D
,
MaxPooling2D
,
Flatten
,
Dropout
,
BatchNormalization
from
keras.models
import
Sequential
from
keras.utils
import
pad_sequences
from
keras.utils
import
to_categorical
from
sklearn.model_selection
import
train_test_split
from
tqdm
import
tqdm
warnings
.
filterwarnings
(
"ignore"
)
dataList
=
os
.
listdir
(
'C:/Users/dell/Desktop/AI_Virtual_C/RawData'
)
classLabels
=
(
'Angry'
,
'Fear'
,
'Disgust'
,
'Happy'
,
'Sad'
,
'Surprised'
,
'Neutral'
)
data
=
[]
labels
=
[]
for
number
,
path
in
enumerate
(
tqdm
(
dataList
)):
X
,
sample_rate
=
librosa
.
load
(
'C:/Users/dell/Desktop/AI_Virtual_C/RawData/'
+
path
,
res_type
=
'kaiser_best'
,
duration
=
2.5
,
sr
=
22050
*
2
,
offset
=
0.5
)
sample_rate
=
np
.
array
(
sample_rate
)
mfccs
=
librosa
.
feature
.
mfcc
(
y
=
X
,
sr
=
sample_rate
,
n_mfcc
=
39
)
feature
=
mfccs
data
.
append
(
feature
)
if
path
[
6
:
8
]
==
'01'
or
path
[
0
:
1
]
==
'n'
:
labels
.
append
(
6
)
if
path
[
6
:
8
]
==
'02'
:
labels
.
append
(
6
)
if
path
[
6
:
8
]
==
'03'
or
path
[
0
:
1
]
==
'h'
:
labels
.
append
(
3
)
if
path
[
6
:
8
]
==
'04'
or
path
[
0
:
2
]
==
'sa'
:
labels
.
append
(
4
)
if
path
[
6
:
8
]
==
'05'
or
path
[
0
:
1
]
==
'a'
:
labels
.
append
(
0
)
if
path
[
6
:
8
]
==
'06'
or
path
[
0
:
1
]
==
'f'
:
labels
.
append
(
1
)
if
path
[
6
:
8
]
==
'07'
or
path
[
0
:
1
]
==
'd'
:
labels
.
append
(
2
)
if
path
[
6
:
8
]
==
'08'
or
path
[
0
:
2
]
==
'su'
:
labels
.
append
(
5
)
max_len
=
216
data
=
np
.
array
([
pad_sequences
(
x
,
maxlen
=
max_len
,
padding
=
'post'
,
truncating
=
'post'
)
for
x
in
data
])
labels
=
np
.
array
(
labels
)
X_train
,
X_test
,
Y_train
,
Y_test
=
train_test_split
(
data
,
labels
,
test_size
=
0.3
,
random_state
=
42
)
numLabels
=
len
(
classLabels
)
Y_train
=
to_categorical
(
Y_train
)
Y_test
=
to_categorical
(
Y_test
)
X_train
=
X_train
[
...
,
np
.
newaxis
]
X_test
=
X_test
[
...
,
np
.
newaxis
]
model
=
Sequential
()
model
.
add
(
Conv2D
(
32
,
(
3
,
3
),
activation
=
'relu'
,
input_shape
=
(
X_train
.
shape
[
1
:])))
model
.
add
(
BatchNormalization
())
model
.
add
(
MaxPooling2D
(
pool_size
=
(
2
,
2
)))
model
.
add
(
Conv2D
(
64
,
(
3
,
3
),
activation
=
'relu'
))
model
.
add
(
BatchNormalization
())
model
.
add
(
MaxPooling2D
(
pool_size
=
(
2
,
2
)))
model
.
add
(
Conv2D
(
128
,
(
3
,
3
),
activation
=
'relu'
))
model
.
add
(
BatchNormalization
())
model
.
add
(
MaxPooling2D
(
pool_size
=
(
2
,
2
)))
model
.
add
(
Flatten
())
model
.
add
(
Dense
(
256
,
activation
=
'relu'
))
model
.
add
(
Dropout
(
0.3
))
model
.
add
(
Dense
(
numLabels
,
activation
=
'softmax'
))
model
.
compile
(
loss
=
'categorical_crossentropy'
,
optimizer
=
'adam'
,
metrics
=
[
'accuracy'
])
print
(
model
.
summary
())
best_acc
=
0
epochs
=
50
for
i
in
tqdm
(
range
(
epochs
)):
model
.
fit
(
X_train
,
Y_train
,
batch_size
=
32
,
epochs
=
1
)
loss
,
acc
=
model
.
evaluate
(
X_test
,
Y_test
)
if
acc
>
best_acc
:
best_acc
=
acc
model
.
evaluate
(
X_test
,
Y_test
)
print
(
"Best Accuracy:"
,
best_acc
)
model
.
save
(
"my_model.h5"
)
IT20201364/Dialogue_Model_TRain.py
0 → 100644
View file @
44fbceb5
import
torch
from
torch.utils.data
import
Dataset
,
DataLoader
from
transformers
import
T5Tokenizer
,
T5ForConditionalGeneration
,
AdamW
class
MovieDialogDataset
(
Dataset
):
def
__init__
(
self
,
lines_file
,
conversations_file
):
self
.
load_dataset
(
lines_file
,
conversations_file
)
def
load_dataset
(
self
,
lines_file
,
conversations_file
):
lines
=
{}
with
open
(
lines_file
,
"r"
,
encoding
=
"iso-8859-1"
)
as
file
:
for
line
in
file
:
parts
=
line
.
strip
()
.
split
(
" +++$+++ "
)
if
len
(
parts
)
==
5
:
line_id
=
int
(
parts
[
0
][
1
:])
dialog_id
=
parts
[
1
]
character_id
=
parts
[
2
]
text
=
parts
[
4
]
lines
[
line_id
]
=
{
"dialog_id"
:
dialog_id
,
"character_id"
:
character_id
,
"text"
:
text
}
dialogues
=
[]
with
open
(
conversations_file
,
"r"
,
encoding
=
"iso-8859-1"
)
as
file
:
for
line
in
file
:
parts
=
line
.
strip
()
.
split
(
" +++$+++ "
)
if
len
(
parts
)
>
4
:
dialogue_ids
=
eval
(
parts
[
3
])
for
i
in
range
(
len
(
dialogue_ids
)
-
1
):
input_line_id
=
dialogue_ids
[
i
]
target_line_id
=
dialogue_ids
[
i
+
1
]
input_text
=
lines
[
input_line_id
][
"text"
]
.
strip
()
target_text
=
lines
[
target_line_id
][
"text"
]
.
strip
()
if
input_text
and
target_text
:
dialogues
.
append
({
"input_text"
:
input_text
,
"target_text"
:
target_text
})
self
.
dialogues
=
dialogues
def
__len__
(
self
):
return
len
(
self
.
dialogues
)
def
__getitem__
(
self
,
index
):
return
self
.
dialogues
[
index
]
# Initialize the tokenizer and model
tokenizer
=
T5Tokenizer
.
from_pretrained
(
"t5-base"
)
model
=
T5ForConditionalGeneration
.
from_pretrained
(
"t5-base"
)
# Set the device for training
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
# Load the dataset
dataset
=
MovieDialogDataset
(
lines_file
=
"C:/Users/dell/Desktop/AI_Virtual_C/cornels_dataset/movie_lines.tsv"
,
conversations_file
=
"C:/Users/dell/Desktop/AI_Virtual_C/cornels_dataset/movie_conversations.tsv"
)
# Create data loader
data_loader
=
DataLoader
(
dataset
,
batch_size
=
16
,
shuffle
=
True
)
# Set the model in training mode
model
.
train
()
# Initialize the optimizer
optimizer
=
AdamW
(
model
.
parameters
(),
lr
=
1e-4
)
# Training loop
num_epochs
=
10
for
epoch
in
range
(
num_epochs
):
total_loss
=
0
for
batch
in
data_loader
:
# Preprocess the input and target sequences
inputs
=
tokenizer
.
batch_encode_plus
(
batch
[
"input_text"
],
padding
=
True
,
truncation
=
True
,
max_length
=
128
,
return_tensors
=
"pt"
)
.
to
(
device
)
targets
=
tokenizer
.
batch_encode_plus
(
batch
[
"target_text"
],
padding
=
True
,
truncation
=
True
,
max_length
=
128
,
return_tensors
=
"pt"
)
.
to
(
device
)
# Clear gradients
optimizer
.
zero_grad
()
# Forward pass
outputs
=
model
(
input_ids
=
inputs
[
"input_ids"
],
attention_mask
=
inputs
[
"attention_mask"
],
labels
=
targets
[
"input_ids"
],
decoder_attention_mask
=
targets
[
"attention_mask"
]
)
# Compute the loss
loss
=
outputs
.
loss
# Backward pass
loss
.
backward
()
# Update model parameters
optimizer
.
step
()
total_loss
+=
loss
.
item
()
# Print the average loss for the epoch
print
(
f
"Epoch {epoch+1} Loss: {total_loss / len(data_loader)}"
)
# Save the trained model
model
.
save_pretrained
(
"dialogue_model"
)
tokenizer
.
save_pretrained
(
"tokenizer"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment