voice verification

aceb2402 · Emika Chamodi · b698b996 · aceb2402 · aceb2402 · aceb2402
Commit aceb2402 authored Mar 30, 2023 by Emika Chamodi
19 changed files
--- a/NodeServer/src/config/types.ts
+++ b/NodeServer/src/config/types.ts
@@ -73,6 +73,7 @@ export type CandidateType = {
  state: "INTIAL" | "READY";
  resume?: string;
  resumeData?: ResumeDataType;
+  selfIntro?: string;
 };

 export type OrganizationType = {
@@ -124,6 +125,7 @@ export type ApplicationType = {
    time: string;
    link: string;
    videoRef?: string;
+    voiceVerification?: number;
  };
  score: {
    primary: number;
@@ -135,6 +137,12 @@ export type ApplicationType = {
  };
 };

+export type AnalyseApplicationPayload = {
+  applicationId: string;
+  startTime: number;
+  endTime: number;
+};
+
 export interface TypedRequest<T extends Query, U> extends Request {
  body: U;
  query: T;

--- a/NodeServer/src/index.ts
+++ b/NodeServer/src/index.ts
@@ -10,6 +10,7 @@ app.use(cors());
 const authRoute = require("./routes/auth");
 const userRoute = require("./routes/user");
 const jobsRoute = require("./routes/jobs");
+const applicationsRoute = require("./routes/application");

 // Service Initialisation
 mongoose.connect(MONGO_URL, {
@@ -29,5 +30,6 @@ app.use(express.urlencoded({ extended: false }));
 app.use("/auth", authRoute);
 app.use("/user", userRoute);
 app.use("/jobs", jobsRoute);
+app.use("/applications", applicationsRoute);

 app.listen(API_PORT, () => console.log(`Listening on port ${API_PORT}`));
--- a/NodeServer/src/models/Application.ts
+++ b/NodeServer/src/models/Application.ts
@@ -11,6 +11,7 @@ const applicationSchema = new Schema<ApplicationType>({
      time: String,
      link: String,
      videoRef: String,
+      voiceVerification: Number,
    },
    require: false,
  },

--- a/NodeServer/src/models/Candidate.ts
+++ b/NodeServer/src/models/Candidate.ts
@@ -42,6 +42,7 @@ const candidateSchema = new Schema<CandidateType>({
  state: { type: String, default: "INTIAL" },
  resume: { type: String, require: false },
  resumeData: { type: ResumeDataSchema, require: false },
+  selfIntro: { type: String, require: false },
 });

 const Candidates = model<CandidateType>("candidates", candidateSchema);

--- a/NodeServer/src/routes/application.ts
+++ b/NodeServer/src/routes/application.ts
+import { Router } from "express";
+import {
+  ApplicationType,
+  TypedRequest,
+  AnalyseApplicationPayload,
+} from "../config/types";
+import { authMiddleware, candidateMiddleware } from "../middlewares/auth";
+import Application from "../models/Application";
+import Jobs from "../models/Job";
+import ResumeAPI from "../utilities/apis/resume";
+import VoiceAPI from "../utilities/apis/voice";
+
+const router = Router();
+
+router.post(
+  "/apply",
+  authMiddleware,
+  candidateMiddleware,
+  async (
+    req: TypedRequest<
+      { userId: string },
+      { application: ApplicationType; resumeUrl: string }
+    >,
+    res
+  ) => {
+    try {
+      const { application, resumeUrl } = req.body;
+      const job = await Jobs.findById(application.job);
+
+      const data: any = await ResumeAPI.getResumeScores({
+        user_id: req.query.userId,
+        resume_url: resumeUrl,
+        primary_skills: job.primarySkills,
+        secondary_skills: job.secondarySkills,
+        job_desc: job.description,
+      });
+
+      const score: ApplicationType["score"] = {
+        primary: data.primary_score,
+        primatyMatch: data.primary_match,
+        secondary: data.secondary_score,
+        secondaryMatch: data.secondary_match,
+        similarity: data.similarity,
+        total: data.primary_score + data.secondary_score + data.similarity,
+      };
+
+      const newApplication = new Application({ ...application, score });
+      const _application = await newApplication.save();
+      job.applications.push(_application.id);
+      await job.save();
+      return res.json({
+        success: true,
+        applicationId: _application.id,
+      });
+    } catch (error) {
+      return res.json({ success: false, error });
+    }
+  }
+);
+
+router.put(
+  "/update",
+  authMiddleware,
+  async (
+    req: TypedRequest<
+      {},
+      {
+        applicationId: string;
+        update: Partial<ApplicationType>;
+        candidateId: string;
+      }
+    >,
+    res
+  ) => {
+    let update = req.body.update;
+
+    if (update.interview?.videoRef) {
+      const score: any = await VoiceAPI.verifyVoice({
+        video_url: update.interview?.videoRef,
+        user_id: req.body.candidateId,
+        application_id: req.body.applicationId,
+      });
+
+      update.interview.voiceVerification = score;
+    }
+
+    Application.findByIdAndUpdate(req.body.applicationId, {
+      $set: update,
+    })
+      .then((_application) => {
+        res.json({
+          success: true,
+          application: { ..._application, ...update },
+        });
+      })
+      .catch((err) => res.send(err));
+  }
+);
+
+router.post(
+  "/analyse",
+  authMiddleware,
+  async (req: TypedRequest<{}, AnalyseApplicationPayload>, res) => {
+    const { applicationId, startTime, endTime } = req.body;
+    const data = await VoiceAPI.analyseVoice({
+      start: startTime,
+      end: endTime,
+      application_id: applicationId,
+    });
+
+    return res.json({ voice: data });
+  }
+);
+
+module.exports = router;
--- a/NodeServer/src/routes/jobs.ts
+++ b/NodeServer/src/routes/jobs.ts
@@ -45,6 +45,7 @@ router.get(
              "profilePicture",
              "resume",
              "resumeData",
+              "selfIntro",
            ],
          },
        });
@@ -105,50 +106,4 @@ router.delete(
  }
 );

-router.put(
-  "/apply",
-  authMiddleware,
-  candidateMiddleware,
-  async (
-    req: TypedRequest<
-      { userId: string },
-      { application: ApplicationType; resumeUrl: string }
-    >,
-    res
-  ) => {
-    try {
-      const { application, resumeUrl } = req.body;
-      const job = await Jobs.findById(application.job);
-
-      const data: any = await ResumeAPI.getResumeScores({
-        user_id: req.query.userId,
-        resume_url: resumeUrl,
-        primary_skills: job.primarySkills,
-        secondary_skills: job.secondarySkills,
-        job_desc: job.description,
-      });
-
-      const score: ApplicationType["score"] = {
-        primary: data.primary_score,
-        primatyMatch: data.primary_match,
-        secondary: data.secondary_score,
-        secondaryMatch: data.secondary_match,
-        similarity: data.similarity,
-        total: data.primary_score + data.secondary_score + data.similarity,
-      };
-
-      const newApplication = new Application({ ...application, score });
-      const _application = await newApplication.save();
-      job.applications.push(_application.id);
-      await job.save();
-      return res.json({
-        success: true,
-        applicationId: _application.id,
-      });
-    } catch (error) {
-      return res.json({ success: false, error });
-    }
-  }
-);
-
 module.exports = router;
--- a/NodeServer/src/routes/user.ts
+++ b/NodeServer/src/routes/user.ts
@@ -3,6 +3,7 @@ import { CandidateType, TypedRequest } from "../config/types";
 import { authMiddleware } from "../middlewares/auth";
 import Candidates from "../models/Candidate";
 import ResumeAPI from "../utilities/apis/resume";
+import VoiceAPI from "../utilities/apis/voice";

 const router = Router();

@@ -19,6 +20,12 @@ router.post(
        });
        update.resumeData = data;
      }
+      if (req.body?.selfIntro) {
+        await VoiceAPI.enrollVoice({
+          user_id: req.query.userId,
+          video_url: req.body.selfIntro,
+        });
+      }
      await Candidates.findByIdAndUpdate(req.query.userId, { $set: req.body });
      return res.status(200).json({ data: req.body });
    } catch (error) {

--- a/NodeServer/src/utilities/apis/resume.ts
+++ b/NodeServer/src/utilities/apis/resume.ts
-import { ResumeDataType } from "../../config/types";
 import { request } from "../requests";

 export default class ResumeAPI {

--- a/NodeServer/src/utilities/apis/voice.ts
+++ b/NodeServer/src/utilities/apis/voice.ts
+import { request } from "../requests";
+
+export default class VoiceAPI {
+  static enrollVoice = (payload: { video_url: string; user_id: string }) =>
+    request("<BASE_URL>/voice/enroll", "POST", payload);
+
+  static verifyVoice = (payload: {
+    video_url: string;
+    user_id: string;
+    application_id: string;
+  }) => request("<BASE_URL>/voice/verify", "POST", payload);
+
+  static analyseVoice = (payload: {
+    start: number;
+    end: number;
+    application_id: string;
+  }) => request("<BASE_URL>/voice/analyse", "POST", payload);
+}
--- a/voice_analyzer/Voice_Emotion/ReadMe.md
+++ b/voice_analyzer/Voice_Emotion/ReadMe.md
-DataSet: https://drive.google.com/file/d/1wWsrN2Ep7x6lWqOXfr4rpKGYrJhWc8z7/view
--- a/voice_analyzer/Voice_Emotion/convert_wavs.py
+++ b/voice_analyzer/Voice_Emotion/convert_wavs.py
-"""
-A utility script used for converting audio samples to be 
-suitable for feature extraction
-"""
-
-import os
-
-def convert_audio(audio_path, target_path, remove=False):
-    """This function sets the audio `audio_path` to:
-        - 16000Hz Sampling rate
-        - one audio channel ( mono )
-            Params:
-                audio_path (str): the path of audio wav file you want to convert
-                target_path (str): target path to save your new converted wav file
-                remove (bool): whether to remove the old file after converting
-        Note that this function requires ffmpeg installed in your system."""
-
-    os.system(f"ffmpeg -i {audio_path} -ac 1 -ar 16000 {target_path}")
-    # os.system(f"ffmpeg -i {audio_path} -ac 1 {target_path}")
-    if remove:
-        os.remove(audio_path)
-
-
-def convert_audios(path, target_path, remove=False):
-    """Converts a path of wav files to:
-        - 16000Hz Sampling rate
-        - one audio channel ( mono )
-        and then put them into a new folder called `target_path`
-            Params:
-                audio_path (str): the path of audio wav file you want to convert
-                target_path (str): target path to save your new converted wav file
-                remove (bool): whether to remove the old file after converting
-        Note that this function requires ffmpeg installed in your system."""
-
-    for dirpath, dirnames, filenames in os.walk(path):
-        for dirname in dirnames:
-            dirname = os.path.join(dirpath, dirname)
-            target_dir = dirname.replace(path, target_path)
-            if not os.path.isdir(target_dir):
-                os.mkdir(target_dir)
-
-    for dirpath, _, filenames in os.walk(path):
-        for filename in filenames:
-            file = os.path.join(dirpath, filename)
-            if file.endswith(".wav"):
-                # it is a wav file
-                target_file = file.replace(path, target_path)
-                convert_audio(file, target_file, remove=remove)
-
-
-if __name__ == "__main__":
-    import argparse
-    parser = argparse.ArgumentParser(description="""Convert ( compress ) wav files to 16MHz and mono audio channel ( 1 channel )
-                                                    This utility helps for compressing wav files for training and testing""")
-    parser.add_argument("audio_path", help="Folder that contains wav files you want to convert")
-    parser.add_argument("target_path", help="Folder to save new wav files")
-    parser.add_argument("-r", "--remove", type=bool, help="Whether to remove the old wav file after converting", default=False)
-
-    args = parser.parse_args()
-    audio_path = args.audio_path
-    target_path = args.target_path
-
-    if os.path.isdir(audio_path):
-        if not os.path.isdir(target_path):
-            os.makedirs(target_path)
-            convert_audios(audio_path, target_path, remove=args.remove)
-    elif os.path.isfile(audio_path) and audio_path.endswith(".wav"):
-        if not target_path.endswith(".wav"):
-            target_path += ".wav"
-        convert_audio(audio_path, target_path, remove=args.remove)
-    else:
-        raise TypeError("The audio_path file you specified isn't appropriate for this operation")
--- a/voice_analyzer/Voice_Emotion/lib.py
+++ b/voice_analyzer/Voice_Emotion/lib.py
-import soundfile
-import numpy as np
-import librosa
-import glob
-import os
-from sklearn.model_selection import train_test_split
-
-EMOTIONS = {
-    "01": "neutral",
-    "02": "calm",
-    "03": "happy",
-    "04": "sad",
-    "05": "angry",
-    "06": "fearful",
-    "07": "disgust",
-    "08": "surprised"
-}
-
-
-AVAILABLE_EMOTIONS = {
-    "angry",
-    "sad",
-    "neutral",
-    "happy"
-}
-
-def extract_feature(file_name, **kwargs):
-    mfcc = kwargs.get("mfcc")
-    chroma = kwargs.get("chroma")
-    mel = kwargs.get("mel")
-    contrast = kwargs.get("contrast")
-    tonnetz = kwargs.get("tonnetz")
-    with soundfile.SoundFile(file_name) as sound_file:
-        X = sound_file.read(dtype="float32")
-        sample_rate = sound_file.samplerate
-        if chroma or contrast:
-            stft = np.abs(librosa.stft(X))
-        result = np.array([])
-        if mfcc:
-            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
-            result = np.hstack((result, mfccs))
-        if chroma:
-            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
-            result = np.hstack((result, chroma))
-        if mel:
-            mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
-            result = np.hstack((result, mel))
-        if contrast:
-            contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
-            result = np.hstack((result, contrast))
-        if tonnetz:
-            tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
-            result = np.hstack((result, tonnetz))
-    return result
-
-# update random_state=9
-def load_data(test_size=0.2, random_state=7):
-    X, y = [], []
-    for file in glob.glob("data/Actor_*/*.wav"):
-        basename = os.path.basename(file)
-        emotion = EMOTIONS[basename.split("-")[2]]
-
-        if emotion not in AVAILABLE_EMOTIONS:
-            continue
-
-        features = extract_feature(file, mfcc=True, chroma=True, mel=True)
-        X.append(features)
-        y.append(emotion)
-
-    return train_test_split(np.array(X), y, test_size=test_size, random_state=random_state)
-
-
-import os, glob, pickle
-import numpy as np
-from sklearn.model_selection import train_test_split
-from sklearn.neural_network import MLPClassifier
-from sklearn.metrics import accuracy_score
-
-def extract_feature_2(file_name, mfcc, chroma, mel):
-    with soundfile.SoundFile(file_name) as sound_file:
-        X = sound_file.read(dtype="float32")
-        sample_rate=sound_file.samplerate
-        if chroma:
-            stft=np.abs(librosa.stft(X))
-        result=np.array([])
-        if mfcc:
-            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
-            result=np.hstack((result, mfccs))
-        if chroma:
-            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
-            result=np.hstack((result, chroma))
-        if mel:
-            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
-            result=np.hstack((result, mel))
-    return result
-
--- a/voice_analyzer/Voice_Emotion/main
+++ b/voice_analyzer/Voice_Emotion/main
-import pyaudio
-import os
-import wave
-import pickle
-from sys import byteorder
-from array import array
-from struct import pack
-from sklearn.neural_network import MLPClassifier
-
-from lib import extract_feature
-
-THRESHOLD = 500
-CHUNK_SIZE = 1024
-FORMAT = pyaudio.paInt16
-RATE = 16000
-
-SILENCE = 30
-
-def is_silent(snd_data):
-    return max(snd_data) < THRESHOLD
-
-def normalize(snd_data):
-    MAXIMUM = 16384
-    times = float(MAXIMUM)/max(abs(i) for i in snd_data)
-
-    r = array('h')
-    for i in snd_data:
-        r.append(int(i*times))
-    return r
-
-def trim(snd_data):
-    def _trim(snd_data):
-        snd_started = False
-        r = array('h')
-
-        for i in snd_data:
-            if not snd_started and abs(i)>THRESHOLD:
-                snd_started = True
-                r.append(i)
-
-            elif snd_started:
-                r.append(i)
-        return r
-
-    snd_data = _trim(snd_data)
-
-    snd_data.reverse()
-    snd_data = _trim(snd_data)
-    snd_data.reverse()
-    return snd_data
-
-def add_silence(snd_data, seconds):
-    r = array('h', [0 for i in range(int(seconds*RATE))])
-    r.extend(snd_data)
-    r.extend([0 for i in range(int(seconds*RATE))])
-    return r
-
-def record():
-    p = pyaudio.PyAudio()
-    stream = p.open(format=FORMAT, channels=1, rate=RATE,
-        input=True, output=True,
-        frames_per_buffer=CHUNK_SIZE)
-
-    num_silent = 0
-    snd_started = False
-
-    r = array('h')
-
-    while 1:
-        # little endian, signed short
-        snd_data = array('h', stream.read(CHUNK_SIZE))
-        if byteorder == 'big':
-            snd_data.byteswap()
-        r.extend(snd_data)
-
-        silent = is_silent(snd_data)
-
-        if silent and snd_started:
-            num_silent += 1
-        elif not silent and not snd_started:
-            snd_started = True
-
-        if snd_started and num_silent > SILENCE:
-            break
-
-    sample_width = p.get_sample_size(FORMAT)
-    stream.stop_stream()
-    stream.close()
-    p.terminate()
-
-    r = normalize(r)
-    r = trim(r)
-    r = add_silence(r, 0.5)
-    return sample_width, r
-
-def record_to_file(path):
-    sample_width, data = record()
-    data = pack('<' + ('h'*len(data)), *data)
-
-    wf = wave.open(path, 'wb')
-    wf.setnchannels(1)
-    wf.setsampwidth(sample_width)
-    wf.setframerate(RATE)
-    wf.writeframes(data)
-    wf.close()
-
-
-
-if __name__ == "__main__":
-    model = pickle.load(open("result/mlp_classifier.model", "rb"))
-    print("Please talk")
-    filename = "test.wav"
-
-    record_to_file(filename)
-    features = extract_feature(filename, mfcc=True, chroma=True, mel=True).reshape(1, -1)
-    result = model.predict(features)[0]
-    print("result:", result)
\ No newline at end of file
--- a/voice_analyzer/Voice_Emotion/train.py
+++ b/voice_analyzer/Voice_Emotion/train.py
-from sklearn.neural_network import MLPClassifier
-
-from sklearn.metrics import accuracy_score
-from lib import load_data
-
-import os
-import pickle
-
-
-X_train, X_test, y_train, y_test = load_data(test_size=0.25)
-
-model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)
-
-print("Training the model...")
-model.fit(X_train, y_train)
-
-y_pred = model.predict(X_test)
-
-accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
-
-print("Accuracy: {:.2f}%".format(accuracy*100))
-
-if not os.path.isdir("result"):
-    os.mkdir("result")
-
-pickle.dump(model, open("result/mlp_classifier.model", "wb"))
\ No newline at end of file
--- a/voice_analyzer/Voice_recognizer/Pipfile
+++ b/voice_analyzer/Voice_recognizer/Pipfile
-[[source]]
-url = "https://pypi.org/simple"
-verify_ssl = true
-name = "pypi"
-
-[packages]
-vosk = "*"
-pydub = "*"
-transformers = "*"
-torch = "*"
-pyaudio = "*"
-regex = "*"
-ipywidgets = "*"
-spacy = "*"
-
-[dev-packages]
-
-[requires]
-python_version = "3.9"
--- a/voice_analyzer/Voice_recognizer/Pipfile.lock
+++ b/voice_analyzer/Voice_recognizer/Pipfile.lock
--- a/voice_analyzer/Voice_recognizer/ReadMe.md
+++ b/voice_analyzer/Voice_recognizer/ReadMe.md
-Pretrained models:
-English : https://alphacephei.com/vosk/models/vosk-model-en-us-0.22.zip or https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip
-
-Punctuation : https://alphacephei.com/vosk/models/vosk-recasepunc-en-0.22.zip
--- a/voice_analyzer/Voice_recognizer/main.py
+++ b/voice_analyzer/Voice_recognizer/main.py
-from vosk import Model, KaldiRecognizer
-from pydub import AudioSegment
-from transformers import pipeline
-import json
-import subprocess
-import spacy
-from spacy.lang.en.stop_words import STOP_WORDS
-from string import punctuation
-from heapq import nlargest
-
-FRAME_RATE = 16000
-CHANNELS=1
-
-def voice_recognition(filename):
-    model = Model(model_name="vosk-model-en-us-0.22")
-    rec = KaldiRecognizer(model, FRAME_RATE)
-    rec.SetWords(True)
-    
-    mp3 = AudioSegment.from_mp3(filename)
-    mp3 = mp3.set_channels(CHANNELS)
-    mp3 = mp3.set_frame_rate(FRAME_RATE)
-    
-    step = 45000
-    transcript = ""
-    for i in range(0, len(mp3), step):
-        print(f"Progress: {i/len(mp3)}")
-        segment = mp3[i:i+step]
-        rec.AcceptWaveform(segment.raw_data)
-        result = rec.Result()
-        text = json.loads(result)["text"]
-        transcript += text
-    
-    cased = subprocess.check_output('python recasepunc/recasepunc.py predict recasepunc/checkpoint', shell=True, text=True, input=transcript)
-    return cased
-
-def summarize(text, per):
-    nlp = spacy.load('en_core_web_sm')
-    doc= nlp(text)
-    tokens=[token.text for token in doc]
-    word_frequencies={}
-    for word in doc:
-        if word.text.lower() not in list(STOP_WORDS):
-            if word.text.lower() not in punctuation:
-                if word.text not in word_frequencies.keys():
-                    word_frequencies[word.text] = 1
-                else:
-                    word_frequencies[word.text] += 1
-    max_frequency=max(word_frequencies.values())
-    for word in word_frequencies.keys():
-        word_frequencies[word]=word_frequencies[word]/max_frequency
-    sentence_tokens= [sent for sent in doc.sents]
-    sentence_scores = {}
-    for sent in sentence_tokens:
-        for word in sent:
-            if word.text.lower() in word_frequencies.keys():
-                if sent not in sentence_scores.keys():                            
-                    sentence_scores[sent]=word_frequencies[word.text.lower()]
-                else:
-                    sentence_scores[sent]+=word_frequencies[word.text.lower()]
-    select_length=int(len(sentence_tokens)*per)
-    summary=nlargest(select_length, sentence_scores,key=sentence_scores.get)
-    final_summary=[word.text for word in summary]
-    summary=''.join(final_summary)
-    return summary 
-
-transcript = voice_recognition("sample_voice.mp3")
-summary = summarize(transcript, 0.05)
-print(summary)
\ No newline at end of file
--- a/voice_analyzer/Voice_recognizer/requirements.txt
+++ b/voice_analyzer/Voice_recognizer/requirements.txt
 Binary files a/voice_analyzer/Voice_recognizer/requirements.txt and /dev/null differ