API developed for dyslexia

78da6b02 · Prabuddha Gimhan · 65affb14 · 78da6b02
Commit 78da6b02 authored Jan 27, 2024 by Prabuddha Gimhan
Show whitespace changes
Inline Side-by-side

Showing with 293 additions and 0 deletions

API/app/Service 3/lj_functiono3.py API/app/Service 3/lj_functiono3.py +293 -0

No files found.
--- a/API/app/Service 3/lj_functiono3.py
+++ b/API/app/Service 3/lj_functiono3.py
+import librosa
+import torch
+#import IPython.display as display
+#import transformers
+import numpy as np
+import os
+import nltk
+import torchaudio
+
+
+from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
+from datasets import load_dataset
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+from nltk.tokenize import sent_tokenize, word_tokenize
+from transformers import pipeline
+nltk.download('punkt')
+
+
+####Speech to text######
+
+current_dir = os.path.dirname(os.path.abspath(__file__))
+
+# Construct the absolute path to the model and scalers
+processor_path_stt= os.path.join(current_dir,"fun03_model/Wav2Vec2Processor")
+model_path_stt= os.path.join(current_dir,"fun03_model/Wav2Vec2ForCTC")
+
+# Load the saved tokenizer &model for speech to text
+processor_stt = Wav2Vec2Processor.from_pretrained(processor_path_stt, local_files_only=True)
+model_stt = Wav2Vec2ForCTC.from_pretrained(model_path_stt, local_files_only=True)
+
+
+# Construct the absolute path to the model and scalers
+processor_path_tts= os.path.join(current_dir,"fun03_model/SpeechT5_TTS-model/SpeechT5Processor")
+model_path_tts= os.path.join(current_dir,"fun03_model/SpeechT5_TTS-model/SpeechT5model")
+vocoder_path_tts=os.path.join(current_dir,"fun03_model/SpeechT5_TTS-model/SpeechT5vocoder")
+
+# Load the saved processor & model for text to speech model
+processor_tts = SpeechT5Processor.from_pretrained(processor_path_tts, local_files_only=True)
+model_tts = SpeechT5ForTextToSpeech.from_pretrained(model_path_tts, local_files_only=True)
+vocoder_tts = SpeechT5HifiGan.from_pretrained(vocoder_path_tts, local_files_only=True)
+
+
+
+
+def speech_to_text(audio_file):
+
+    # Load pretrained model and processor
+    #model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
+    ##processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
+
+
+    #model.save_pretrained("Wav2Vec2ForCTC")
+    #processor.save_pretrained("Wav2Vec2Processor")
+
+    # Load pretrained model and processor
+    #model_stt= Wav2Vec2ForCTC.from_pretrained("/content/drive/MyDrive/Work_space/Silverline_IT/Project/Learn_Joy/API/app/service03/fun03_model/Wav2Vec2ForCTC")
+    #processor_stt = Wav2Vec2Processor.from_pretrained("/content/drive/MyDrive/Work_space/Silverline_IT/Project/Learn_Joy/API/app/service03/fun03_model/Wav2Vec2Processor")
+
+
+    # Process audio input with specified sampling rate
+    audio_input, _ = torchaudio.load(audio_file, normalize=True)
+    sampling_rate = 16000  # Replace with the actual sampling rate of your audio file
+    input_values = processor_stt(audio_input.squeeze().numpy(), return_tensors="pt", sampling_rate=sampling_rate).input_values
+
+    # Perform inference
+    with torch.no_grad():
+      logits = model_stt(input_values).logits
+
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = processor_stt.batch_decode(predicted_ids)[0]
+
+    return transcription
+
+#########scoring#############
+
+def scoring(words,transcriptions):
+  
+  words=words.lower()
+  transcriptions=transcriptions.lower()
+
+  unwanted=[".",",","/","?","-",";",":","`","@","&","%","*"]
+
+  clean_words=[]
+  clean_voices=[]
+
+  #remove the unwanted symbol in the paragraph
+  clean_word = nltk.word_tokenize(words)
+  clean_voice= nltk.word_tokenize(transcriptions)
+
+  for i in clean_word:
+    if i not in unwanted:
+      clean_words.append(i)
+
+    else:
+      pass
+
+  for i in clean_voice:
+    if i not in unwanted:
+      clean_voices.append(i)
+
+    else:
+      pass
+
+####technic 01
+
+  #tokenized the word
+  words_sent = nltk.sent_tokenize(words)
+  voice_sent = nltk.sent_tokenize(transcriptions)
+
+  #check write sentences
+  write_sentences=[]
+  write_word=[]
+  missing_voice=[]
+
+  for i , j in enumerate(words_sent):
+    for k,l in enumerate(voice_sent):
+      if i==k:
+        #clean j
+        i_token=nltk.word_tokenize(j)
+        clean_word=[]
+        for a in i_token:
+          if a not in unwanted:
+            clean_word.append(a)
+            j=" ".join(clean_word)
+
+        #clean l
+        k_token=nltk.word_tokenize(l)
+        clean_word2=[]
+        for b in k_token:
+          if b not in unwanted:
+            clean_word2.append(b)
+            l=" ".join(clean_word2)
+
+        #compair j & l
+        if j==l:
+          write_sentences.append(l)
+
+        else:
+          text_words=nltk.word_tokenize(j)
+          voice_words=nltk.word_tokenize(l)
+
+          for q,w in enumerate (text_words):
+            for d,f in enumerate (voice_words):
+              if q==d:
+                if w==f:
+                  write_word.append(w)
+
+                else:
+                  missing_voice.append(w)
+
+              else:
+                pass
+      else:
+        pass
+
+
+  #get the write_sentences`s word
+  for i in write_sentences:
+    len_write_sentences=nltk.word_tokenize(i)
+    for j in len_write_sentences:
+      write_word.append(j)
+
+
+  #technic 01 final score
+  sentences_score1=len(write_sentences)/len(words_sent)*100
+  word_score1=len(write_word)/len(clean_words)*100
+
+
+
+
+
+####technic 02
+
+  write_sentences2=[]
+  write_word2=[]
+  missing_voice2=[]
+
+  for i,j in enumerate(clean_words):
+    for k, l in enumerate(clean_voices):
+      if i==k:
+        if j==l:
+          write_sentences2.append(j)
+        else:
+          pass
+      else:
+        pass
+
+  for i in clean_words:
+    for j in clean_voices:
+      if i==j:
+        write_word2.append(i)
+
+      else:
+        pass
+
+  for i in clean_words:
+    if i not in write_word2:
+      missing_voice2.append(i)
+    else:
+      pass
+
+  #thecnic 02 final score
+  sentences_score2=len(write_sentences2)/len(clean_words)*100
+  word_score2=len(set(write_word2))/len(set(clean_words))*100
+
+
+  ###function final score
+  final_sent_score=''
+  final_word_score=''
+
+
+  if  sentences_score1 >= sentences_score2:
+    final_sent_score=sentences_score1
+  else:
+    final_sent_score=sentences_score2
+
+
+
+  if word_score1 >= word_score2:
+    final_word_score=word_score1
+  else:
+    final_word_score= word_score2
+
+
+  return final_sent_score,final_word_score,missing_voice2
+
+##################scoring letter###################
+def scoring_letter(words,transcriptions):
+
+  pronunsation={"a":["ah","a","aa","ae"],"b":["b","be","bhe","bee","e"],"c":["C","cee","see","s"],"d":["d","de","dee","the","tha"],"e":["e","ae","ee"],"f":["af","f","ahf"],"g":["g","gee","jee"],
+ "h":["h","ah","ag","age"],"i":["i","ai","ii"],"j":["j","ja","jee"],"k":["k","kha","k`"],"l":["l","al","el"],"m":["am","m","em","eam"],"n":["n","en","an"] ,"o":["o`","oo","o","oh"],
+ "p":["p","pe","pee","pi","phi","phe"],"q":["q","que","queue"],"r":["r","ar","aer","er"],"s":["as","s","es"],"t":["t","tee","tea","ti"],"u":["u","you","yuu","yu"],
+ "v":["v","ve","we","wee"],"w":["w","dabluev"],"x":["x","ax","ex","xe"],"y":["y","why","whe"],"z":["z","ezed","Esed","zed","sed"] }
+
+  #get the lower case
+  words=words.lower()
+  transcriptions=transcriptions.lower()
+
+  score=''
+
+  #print(pronunsation[words])
+  l=pronunsation[words]
+
+  if transcriptions in l:
+    score=100
+  else:
+    score=0
+  return score
+
+#########Text to speech#####
+
+
+def text_to_speech(text,return_tensors="pt"):
+
+  #load model in outside
+
+  #processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
+  #model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
+  #vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
+
+
+  # Save the models and their configurations to the specified directory
+
+  #processor.save_pretrained("SpeechT5Processor")
+  #model.save_pretrained("SpeechT5model")
+  #vocoder.save_pretrained("SpeechT5vocoder")
+
+  #processor = SpeechT5Processor.from_pretrained("SpeechT5Processor")
+  #model = SpeechT5ForTextToSpeech.from_pretrained("SpeechT5model")
+  #vocoder = SpeechT5HifiGan.from_pretrained("SpeechT5vocoder")
+
+
+  #load model in local pc
+
+  #processor_tts = SpeechT5Processor.from_pretrained(r"/content/drive/MyDrive/Work_space/Silverline_IT/Project/Learn_Joy/API/app/service03/fun03_model/SpeechT5_TTS-model/SpeechT5Processor", local_files_only=True)
+  #model_tts = SpeechT5ForTextToSpeech.from_pretrained(r"/content/drive/MyDrive/Work_space/Silverline_IT/Project/Learn_Joy/API/app/service03/fun03_model/SpeechT5_TTS-model/SpeechT5model", local_files_only=True)
+  #vocoder_tts = SpeechT5HifiGan.from_pretrained(r"/content/drive/MyDrive/Work_space/Silverline_IT/Project/Learn_Joy/API/app/service03/fun03_model/SpeechT5_TTS-model/SpeechT5vocoder", local_files_only=True)
+
+
+  inputs = processor_tts (text=text, return_tensors=return_tensors)
+
+  # load xvector containing speaker's voice characteristics from a dataset
+  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
+  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
+
+  speech = model_tts.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder_tts)
+
+  # Ensure that speech is a 1D NumPy array
+  speech_array = speech.numpy().flatten()
+
+
+  # Return the speech_array a response
+  return speech_array
\ No newline at end of file