Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
LearnJoy-ML
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Chamodi Yapa
LearnJoy-ML
Commits
78da6b02
Commit
78da6b02
authored
Jan 27, 2024
by
Prabuddha Gimhan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
API developed for dyslexia
parent
65affb14
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
293 additions
and
0 deletions
+293
-0
API/app/Service 3/lj_functiono3.py
API/app/Service 3/lj_functiono3.py
+293
-0
No files found.
API/app/Service 3/lj_functiono3.py
0 → 100644
View file @
78da6b02
import
librosa
import
torch
#import IPython.display as display
#import transformers
import
numpy
as
np
import
os
import
nltk
import
torchaudio
from
transformers
import
SpeechT5Processor
,
SpeechT5ForTextToSpeech
,
SpeechT5HifiGan
from
datasets
import
load_dataset
from
transformers
import
Wav2Vec2ForCTC
,
Wav2Vec2Processor
from
nltk.tokenize
import
sent_tokenize
,
word_tokenize
from
transformers
import
pipeline
nltk
.
download
(
'punkt'
)
####Speech to text######
current_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
# Construct the absolute path to the model and scalers
processor_path_stt
=
os
.
path
.
join
(
current_dir
,
"fun03_model/Wav2Vec2Processor"
)
model_path_stt
=
os
.
path
.
join
(
current_dir
,
"fun03_model/Wav2Vec2ForCTC"
)
# Load the saved tokenizer &model for speech to text
processor_stt
=
Wav2Vec2Processor
.
from_pretrained
(
processor_path_stt
,
local_files_only
=
True
)
model_stt
=
Wav2Vec2ForCTC
.
from_pretrained
(
model_path_stt
,
local_files_only
=
True
)
# Construct the absolute path to the model and scalers
processor_path_tts
=
os
.
path
.
join
(
current_dir
,
"fun03_model/SpeechT5_TTS-model/SpeechT5Processor"
)
model_path_tts
=
os
.
path
.
join
(
current_dir
,
"fun03_model/SpeechT5_TTS-model/SpeechT5model"
)
vocoder_path_tts
=
os
.
path
.
join
(
current_dir
,
"fun03_model/SpeechT5_TTS-model/SpeechT5vocoder"
)
# Load the saved processor & model for text to speech model
processor_tts
=
SpeechT5Processor
.
from_pretrained
(
processor_path_tts
,
local_files_only
=
True
)
model_tts
=
SpeechT5ForTextToSpeech
.
from_pretrained
(
model_path_tts
,
local_files_only
=
True
)
vocoder_tts
=
SpeechT5HifiGan
.
from_pretrained
(
vocoder_path_tts
,
local_files_only
=
True
)
def
speech_to_text
(
audio_file
):
# Load pretrained model and processor
#model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
##processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
#model.save_pretrained("Wav2Vec2ForCTC")
#processor.save_pretrained("Wav2Vec2Processor")
# Load pretrained model and processor
#model_stt= Wav2Vec2ForCTC.from_pretrained("/content/drive/MyDrive/Work_space/Silverline_IT/Project/Learn_Joy/API/app/service03/fun03_model/Wav2Vec2ForCTC")
#processor_stt = Wav2Vec2Processor.from_pretrained("/content/drive/MyDrive/Work_space/Silverline_IT/Project/Learn_Joy/API/app/service03/fun03_model/Wav2Vec2Processor")
# Process audio input with specified sampling rate
audio_input
,
_
=
torchaudio
.
load
(
audio_file
,
normalize
=
True
)
sampling_rate
=
16000
# Replace with the actual sampling rate of your audio file
input_values
=
processor_stt
(
audio_input
.
squeeze
()
.
numpy
(),
return_tensors
=
"pt"
,
sampling_rate
=
sampling_rate
)
.
input_values
# Perform inference
with
torch
.
no_grad
():
logits
=
model_stt
(
input_values
)
.
logits
predicted_ids
=
torch
.
argmax
(
logits
,
dim
=-
1
)
transcription
=
processor_stt
.
batch_decode
(
predicted_ids
)[
0
]
return
transcription
#########scoring#############
def
scoring
(
words
,
transcriptions
):
words
=
words
.
lower
()
transcriptions
=
transcriptions
.
lower
()
unwanted
=
[
"."
,
","
,
"/"
,
"?"
,
"-"
,
";"
,
":"
,
"`"
,
"@"
,
"&"
,
"
%
"
,
"*"
]
clean_words
=
[]
clean_voices
=
[]
#remove the unwanted symbol in the paragraph
clean_word
=
nltk
.
word_tokenize
(
words
)
clean_voice
=
nltk
.
word_tokenize
(
transcriptions
)
for
i
in
clean_word
:
if
i
not
in
unwanted
:
clean_words
.
append
(
i
)
else
:
pass
for
i
in
clean_voice
:
if
i
not
in
unwanted
:
clean_voices
.
append
(
i
)
else
:
pass
####technic 01
#tokenized the word
words_sent
=
nltk
.
sent_tokenize
(
words
)
voice_sent
=
nltk
.
sent_tokenize
(
transcriptions
)
#check write sentences
write_sentences
=
[]
write_word
=
[]
missing_voice
=
[]
for
i
,
j
in
enumerate
(
words_sent
):
for
k
,
l
in
enumerate
(
voice_sent
):
if
i
==
k
:
#clean j
i_token
=
nltk
.
word_tokenize
(
j
)
clean_word
=
[]
for
a
in
i_token
:
if
a
not
in
unwanted
:
clean_word
.
append
(
a
)
j
=
" "
.
join
(
clean_word
)
#clean l
k_token
=
nltk
.
word_tokenize
(
l
)
clean_word2
=
[]
for
b
in
k_token
:
if
b
not
in
unwanted
:
clean_word2
.
append
(
b
)
l
=
" "
.
join
(
clean_word2
)
#compair j & l
if
j
==
l
:
write_sentences
.
append
(
l
)
else
:
text_words
=
nltk
.
word_tokenize
(
j
)
voice_words
=
nltk
.
word_tokenize
(
l
)
for
q
,
w
in
enumerate
(
text_words
):
for
d
,
f
in
enumerate
(
voice_words
):
if
q
==
d
:
if
w
==
f
:
write_word
.
append
(
w
)
else
:
missing_voice
.
append
(
w
)
else
:
pass
else
:
pass
#get the write_sentences`s word
for
i
in
write_sentences
:
len_write_sentences
=
nltk
.
word_tokenize
(
i
)
for
j
in
len_write_sentences
:
write_word
.
append
(
j
)
#technic 01 final score
sentences_score1
=
len
(
write_sentences
)
/
len
(
words_sent
)
*
100
word_score1
=
len
(
write_word
)
/
len
(
clean_words
)
*
100
####technic 02
write_sentences2
=
[]
write_word2
=
[]
missing_voice2
=
[]
for
i
,
j
in
enumerate
(
clean_words
):
for
k
,
l
in
enumerate
(
clean_voices
):
if
i
==
k
:
if
j
==
l
:
write_sentences2
.
append
(
j
)
else
:
pass
else
:
pass
for
i
in
clean_words
:
for
j
in
clean_voices
:
if
i
==
j
:
write_word2
.
append
(
i
)
else
:
pass
for
i
in
clean_words
:
if
i
not
in
write_word2
:
missing_voice2
.
append
(
i
)
else
:
pass
#thecnic 02 final score
sentences_score2
=
len
(
write_sentences2
)
/
len
(
clean_words
)
*
100
word_score2
=
len
(
set
(
write_word2
))
/
len
(
set
(
clean_words
))
*
100
###function final score
final_sent_score
=
''
final_word_score
=
''
if
sentences_score1
>=
sentences_score2
:
final_sent_score
=
sentences_score1
else
:
final_sent_score
=
sentences_score2
if
word_score1
>=
word_score2
:
final_word_score
=
word_score1
else
:
final_word_score
=
word_score2
return
final_sent_score
,
final_word_score
,
missing_voice2
##################scoring letter###################
def
scoring_letter
(
words
,
transcriptions
):
pronunsation
=
{
"a"
:[
"ah"
,
"a"
,
"aa"
,
"ae"
],
"b"
:[
"b"
,
"be"
,
"bhe"
,
"bee"
,
"e"
],
"c"
:[
"C"
,
"cee"
,
"see"
,
"s"
],
"d"
:[
"d"
,
"de"
,
"dee"
,
"the"
,
"tha"
],
"e"
:[
"e"
,
"ae"
,
"ee"
],
"f"
:[
"af"
,
"f"
,
"ahf"
],
"g"
:[
"g"
,
"gee"
,
"jee"
],
"h"
:[
"h"
,
"ah"
,
"ag"
,
"age"
],
"i"
:[
"i"
,
"ai"
,
"ii"
],
"j"
:[
"j"
,
"ja"
,
"jee"
],
"k"
:[
"k"
,
"kha"
,
"k`"
],
"l"
:[
"l"
,
"al"
,
"el"
],
"m"
:[
"am"
,
"m"
,
"em"
,
"eam"
],
"n"
:[
"n"
,
"en"
,
"an"
]
,
"o"
:[
"o`"
,
"oo"
,
"o"
,
"oh"
],
"p"
:[
"p"
,
"pe"
,
"pee"
,
"pi"
,
"phi"
,
"phe"
],
"q"
:[
"q"
,
"que"
,
"queue"
],
"r"
:[
"r"
,
"ar"
,
"aer"
,
"er"
],
"s"
:[
"as"
,
"s"
,
"es"
],
"t"
:[
"t"
,
"tee"
,
"tea"
,
"ti"
],
"u"
:[
"u"
,
"you"
,
"yuu"
,
"yu"
],
"v"
:[
"v"
,
"ve"
,
"we"
,
"wee"
],
"w"
:[
"w"
,
"dabluev"
],
"x"
:[
"x"
,
"ax"
,
"ex"
,
"xe"
],
"y"
:[
"y"
,
"why"
,
"whe"
],
"z"
:[
"z"
,
"ezed"
,
"Esed"
,
"zed"
,
"sed"
]
}
#get the lower case
words
=
words
.
lower
()
transcriptions
=
transcriptions
.
lower
()
score
=
''
#print(pronunsation[words])
l
=
pronunsation
[
words
]
if
transcriptions
in
l
:
score
=
100
else
:
score
=
0
return
score
#########Text to speech#####
def
text_to_speech
(
text
,
return_tensors
=
"pt"
):
#load model in outside
#processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
#model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
#vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
# Save the models and their configurations to the specified directory
#processor.save_pretrained("SpeechT5Processor")
#model.save_pretrained("SpeechT5model")
#vocoder.save_pretrained("SpeechT5vocoder")
#processor = SpeechT5Processor.from_pretrained("SpeechT5Processor")
#model = SpeechT5ForTextToSpeech.from_pretrained("SpeechT5model")
#vocoder = SpeechT5HifiGan.from_pretrained("SpeechT5vocoder")
#load model in local pc
#processor_tts = SpeechT5Processor.from_pretrained(r"/content/drive/MyDrive/Work_space/Silverline_IT/Project/Learn_Joy/API/app/service03/fun03_model/SpeechT5_TTS-model/SpeechT5Processor", local_files_only=True)
#model_tts = SpeechT5ForTextToSpeech.from_pretrained(r"/content/drive/MyDrive/Work_space/Silverline_IT/Project/Learn_Joy/API/app/service03/fun03_model/SpeechT5_TTS-model/SpeechT5model", local_files_only=True)
#vocoder_tts = SpeechT5HifiGan.from_pretrained(r"/content/drive/MyDrive/Work_space/Silverline_IT/Project/Learn_Joy/API/app/service03/fun03_model/SpeechT5_TTS-model/SpeechT5vocoder", local_files_only=True)
inputs
=
processor_tts
(
text
=
text
,
return_tensors
=
return_tensors
)
# load xvector containing speaker's voice characteristics from a dataset
embeddings_dataset
=
load_dataset
(
"Matthijs/cmu-arctic-xvectors"
,
split
=
"validation"
)
speaker_embeddings
=
torch
.
tensor
(
embeddings_dataset
[
7306
][
"xvector"
])
.
unsqueeze
(
0
)
speech
=
model_tts
.
generate_speech
(
inputs
[
"input_ids"
],
speaker_embeddings
,
vocoder
=
vocoder_tts
)
# Ensure that speech is a 1D NumPy array
speech_array
=
speech
.
numpy
()
.
flatten
()
# Return the speech_array a response
return
speech_array
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment