Commit aceb2402 authored by Emika Chamodi's avatar Emika Chamodi

voice verification

parent b698b996
......@@ -73,6 +73,7 @@ export type CandidateType = {
state: "INTIAL" | "READY";
resume?: string;
resumeData?: ResumeDataType;
selfIntro?: string;
};
export type OrganizationType = {
......@@ -124,6 +125,7 @@ export type ApplicationType = {
time: string;
link: string;
videoRef?: string;
voiceVerification?: number;
};
score: {
primary: number;
......@@ -135,6 +137,12 @@ export type ApplicationType = {
};
};
export type AnalyseApplicationPayload = {
applicationId: string;
startTime: number;
endTime: number;
};
export interface TypedRequest<T extends Query, U> extends Request {
body: U;
query: T;
......
......@@ -10,6 +10,7 @@ app.use(cors());
const authRoute = require("./routes/auth");
const userRoute = require("./routes/user");
const jobsRoute = require("./routes/jobs");
const applicationsRoute = require("./routes/application");
// Service Initialisation
mongoose.connect(MONGO_URL, {
......@@ -29,5 +30,6 @@ app.use(express.urlencoded({ extended: false }));
app.use("/auth", authRoute);
app.use("/user", userRoute);
app.use("/jobs", jobsRoute);
app.use("/applications", applicationsRoute);
app.listen(API_PORT, () => console.log(`Listening on port ${API_PORT}`));
......@@ -11,6 +11,7 @@ const applicationSchema = new Schema<ApplicationType>({
time: String,
link: String,
videoRef: String,
voiceVerification: Number,
},
require: false,
},
......
......@@ -42,6 +42,7 @@ const candidateSchema = new Schema<CandidateType>({
state: { type: String, default: "INTIAL" },
resume: { type: String, require: false },
resumeData: { type: ResumeDataSchema, require: false },
selfIntro: { type: String, require: false },
});
const Candidates = model<CandidateType>("candidates", candidateSchema);
......
import { Router } from "express";
import {
ApplicationType,
TypedRequest,
AnalyseApplicationPayload,
} from "../config/types";
import { authMiddleware, candidateMiddleware } from "../middlewares/auth";
import Application from "../models/Application";
import Jobs from "../models/Job";
import ResumeAPI from "../utilities/apis/resume";
import VoiceAPI from "../utilities/apis/voice";
const router = Router();
router.post(
"/apply",
authMiddleware,
candidateMiddleware,
async (
req: TypedRequest<
{ userId: string },
{ application: ApplicationType; resumeUrl: string }
>,
res
) => {
try {
const { application, resumeUrl } = req.body;
const job = await Jobs.findById(application.job);
const data: any = await ResumeAPI.getResumeScores({
user_id: req.query.userId,
resume_url: resumeUrl,
primary_skills: job.primarySkills,
secondary_skills: job.secondarySkills,
job_desc: job.description,
});
const score: ApplicationType["score"] = {
primary: data.primary_score,
primatyMatch: data.primary_match,
secondary: data.secondary_score,
secondaryMatch: data.secondary_match,
similarity: data.similarity,
total: data.primary_score + data.secondary_score + data.similarity,
};
const newApplication = new Application({ ...application, score });
const _application = await newApplication.save();
job.applications.push(_application.id);
await job.save();
return res.json({
success: true,
applicationId: _application.id,
});
} catch (error) {
return res.json({ success: false, error });
}
}
);
router.put(
"/update",
authMiddleware,
async (
req: TypedRequest<
{},
{
applicationId: string;
update: Partial<ApplicationType>;
candidateId: string;
}
>,
res
) => {
let update = req.body.update;
if (update.interview?.videoRef) {
const score: any = await VoiceAPI.verifyVoice({
video_url: update.interview?.videoRef,
user_id: req.body.candidateId,
application_id: req.body.applicationId,
});
update.interview.voiceVerification = score;
}
Application.findByIdAndUpdate(req.body.applicationId, {
$set: update,
})
.then((_application) => {
res.json({
success: true,
application: { ..._application, ...update },
});
})
.catch((err) => res.send(err));
}
);
router.post(
"/analyse",
authMiddleware,
async (req: TypedRequest<{}, AnalyseApplicationPayload>, res) => {
const { applicationId, startTime, endTime } = req.body;
const data = await VoiceAPI.analyseVoice({
start: startTime,
end: endTime,
application_id: applicationId,
});
return res.json({ voice: data });
}
);
module.exports = router;
......@@ -45,6 +45,7 @@ router.get(
"profilePicture",
"resume",
"resumeData",
"selfIntro",
],
},
});
......@@ -105,50 +106,4 @@ router.delete(
}
);
router.put(
"/apply",
authMiddleware,
candidateMiddleware,
async (
req: TypedRequest<
{ userId: string },
{ application: ApplicationType; resumeUrl: string }
>,
res
) => {
try {
const { application, resumeUrl } = req.body;
const job = await Jobs.findById(application.job);
const data: any = await ResumeAPI.getResumeScores({
user_id: req.query.userId,
resume_url: resumeUrl,
primary_skills: job.primarySkills,
secondary_skills: job.secondarySkills,
job_desc: job.description,
});
const score: ApplicationType["score"] = {
primary: data.primary_score,
primatyMatch: data.primary_match,
secondary: data.secondary_score,
secondaryMatch: data.secondary_match,
similarity: data.similarity,
total: data.primary_score + data.secondary_score + data.similarity,
};
const newApplication = new Application({ ...application, score });
const _application = await newApplication.save();
job.applications.push(_application.id);
await job.save();
return res.json({
success: true,
applicationId: _application.id,
});
} catch (error) {
return res.json({ success: false, error });
}
}
);
module.exports = router;
......@@ -3,6 +3,7 @@ import { CandidateType, TypedRequest } from "../config/types";
import { authMiddleware } from "../middlewares/auth";
import Candidates from "../models/Candidate";
import ResumeAPI from "../utilities/apis/resume";
import VoiceAPI from "../utilities/apis/voice";
const router = Router();
......@@ -19,6 +20,12 @@ router.post(
});
update.resumeData = data;
}
if (req.body?.selfIntro) {
await VoiceAPI.enrollVoice({
user_id: req.query.userId,
video_url: req.body.selfIntro,
});
}
await Candidates.findByIdAndUpdate(req.query.userId, { $set: req.body });
return res.status(200).json({ data: req.body });
} catch (error) {
......
import { ResumeDataType } from "../../config/types";
import { request } from "../requests";
export default class ResumeAPI {
......
import { request } from "../requests";
export default class VoiceAPI {
static enrollVoice = (payload: { video_url: string; user_id: string }) =>
request("<BASE_URL>/voice/enroll", "POST", payload);
static verifyVoice = (payload: {
video_url: string;
user_id: string;
application_id: string;
}) => request("<BASE_URL>/voice/verify", "POST", payload);
static analyseVoice = (payload: {
start: number;
end: number;
application_id: string;
}) => request("<BASE_URL>/voice/analyse", "POST", payload);
}
DataSet: https://drive.google.com/file/d/1wWsrN2Ep7x6lWqOXfr4rpKGYrJhWc8z7/view
"""
A utility script used for converting audio samples to be
suitable for feature extraction
"""
import os
def convert_audio(audio_path, target_path, remove=False):
"""This function sets the audio `audio_path` to:
- 16000Hz Sampling rate
- one audio channel ( mono )
Params:
audio_path (str): the path of audio wav file you want to convert
target_path (str): target path to save your new converted wav file
remove (bool): whether to remove the old file after converting
Note that this function requires ffmpeg installed in your system."""
os.system(f"ffmpeg -i {audio_path} -ac 1 -ar 16000 {target_path}")
# os.system(f"ffmpeg -i {audio_path} -ac 1 {target_path}")
if remove:
os.remove(audio_path)
def convert_audios(path, target_path, remove=False):
"""Converts a path of wav files to:
- 16000Hz Sampling rate
- one audio channel ( mono )
and then put them into a new folder called `target_path`
Params:
audio_path (str): the path of audio wav file you want to convert
target_path (str): target path to save your new converted wav file
remove (bool): whether to remove the old file after converting
Note that this function requires ffmpeg installed in your system."""
for dirpath, dirnames, filenames in os.walk(path):
for dirname in dirnames:
dirname = os.path.join(dirpath, dirname)
target_dir = dirname.replace(path, target_path)
if not os.path.isdir(target_dir):
os.mkdir(target_dir)
for dirpath, _, filenames in os.walk(path):
for filename in filenames:
file = os.path.join(dirpath, filename)
if file.endswith(".wav"):
# it is a wav file
target_file = file.replace(path, target_path)
convert_audio(file, target_file, remove=remove)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="""Convert ( compress ) wav files to 16MHz and mono audio channel ( 1 channel )
This utility helps for compressing wav files for training and testing""")
parser.add_argument("audio_path", help="Folder that contains wav files you want to convert")
parser.add_argument("target_path", help="Folder to save new wav files")
parser.add_argument("-r", "--remove", type=bool, help="Whether to remove the old wav file after converting", default=False)
args = parser.parse_args()
audio_path = args.audio_path
target_path = args.target_path
if os.path.isdir(audio_path):
if not os.path.isdir(target_path):
os.makedirs(target_path)
convert_audios(audio_path, target_path, remove=args.remove)
elif os.path.isfile(audio_path) and audio_path.endswith(".wav"):
if not target_path.endswith(".wav"):
target_path += ".wav"
convert_audio(audio_path, target_path, remove=args.remove)
else:
raise TypeError("The audio_path file you specified isn't appropriate for this operation")
import soundfile
import numpy as np
import librosa
import glob
import os
from sklearn.model_selection import train_test_split
EMOTIONS = {
"01": "neutral",
"02": "calm",
"03": "happy",
"04": "sad",
"05": "angry",
"06": "fearful",
"07": "disgust",
"08": "surprised"
}
AVAILABLE_EMOTIONS = {
"angry",
"sad",
"neutral",
"happy"
}
def extract_feature(file_name, **kwargs):
mfcc = kwargs.get("mfcc")
chroma = kwargs.get("chroma")
mel = kwargs.get("mel")
contrast = kwargs.get("contrast")
tonnetz = kwargs.get("tonnetz")
with soundfile.SoundFile(file_name) as sound_file:
X = sound_file.read(dtype="float32")
sample_rate = sound_file.samplerate
if chroma or contrast:
stft = np.abs(librosa.stft(X))
result = np.array([])
if mfcc:
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
result = np.hstack((result, mfccs))
if chroma:
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
result = np.hstack((result, chroma))
if mel:
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
result = np.hstack((result, mel))
if contrast:
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
result = np.hstack((result, contrast))
if tonnetz:
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
result = np.hstack((result, tonnetz))
return result
# update random_state=9
def load_data(test_size=0.2, random_state=7):
X, y = [], []
for file in glob.glob("data/Actor_*/*.wav"):
basename = os.path.basename(file)
emotion = EMOTIONS[basename.split("-")[2]]
if emotion not in AVAILABLE_EMOTIONS:
continue
features = extract_feature(file, mfcc=True, chroma=True, mel=True)
X.append(features)
y.append(emotion)
return train_test_split(np.array(X), y, test_size=test_size, random_state=random_state)
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
def extract_feature_2(file_name, mfcc, chroma, mel):
with soundfile.SoundFile(file_name) as sound_file:
X = sound_file.read(dtype="float32")
sample_rate=sound_file.samplerate
if chroma:
stft=np.abs(librosa.stft(X))
result=np.array([])
if mfcc:
mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
result=np.hstack((result, mfccs))
if chroma:
chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
result=np.hstack((result, chroma))
if mel:
mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
result=np.hstack((result, mel))
return result
import pyaudio
import os
import wave
import pickle
from sys import byteorder
from array import array
from struct import pack
from sklearn.neural_network import MLPClassifier
from lib import extract_feature
THRESHOLD = 500
CHUNK_SIZE = 1024
FORMAT = pyaudio.paInt16
RATE = 16000
SILENCE = 30
def is_silent(snd_data):
return max(snd_data) < THRESHOLD
def normalize(snd_data):
MAXIMUM = 16384
times = float(MAXIMUM)/max(abs(i) for i in snd_data)
r = array('h')
for i in snd_data:
r.append(int(i*times))
return r
def trim(snd_data):
def _trim(snd_data):
snd_started = False
r = array('h')
for i in snd_data:
if not snd_started and abs(i)>THRESHOLD:
snd_started = True
r.append(i)
elif snd_started:
r.append(i)
return r
snd_data = _trim(snd_data)
snd_data.reverse()
snd_data = _trim(snd_data)
snd_data.reverse()
return snd_data
def add_silence(snd_data, seconds):
r = array('h', [0 for i in range(int(seconds*RATE))])
r.extend(snd_data)
r.extend([0 for i in range(int(seconds*RATE))])
return r
def record():
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT, channels=1, rate=RATE,
input=True, output=True,
frames_per_buffer=CHUNK_SIZE)
num_silent = 0
snd_started = False
r = array('h')
while 1:
# little endian, signed short
snd_data = array('h', stream.read(CHUNK_SIZE))
if byteorder == 'big':
snd_data.byteswap()
r.extend(snd_data)
silent = is_silent(snd_data)
if silent and snd_started:
num_silent += 1
elif not silent and not snd_started:
snd_started = True
if snd_started and num_silent > SILENCE:
break
sample_width = p.get_sample_size(FORMAT)
stream.stop_stream()
stream.close()
p.terminate()
r = normalize(r)
r = trim(r)
r = add_silence(r, 0.5)
return sample_width, r
def record_to_file(path):
sample_width, data = record()
data = pack('<' + ('h'*len(data)), *data)
wf = wave.open(path, 'wb')
wf.setnchannels(1)
wf.setsampwidth(sample_width)
wf.setframerate(RATE)
wf.writeframes(data)
wf.close()
if __name__ == "__main__":
model = pickle.load(open("result/mlp_classifier.model", "rb"))
print("Please talk")
filename = "test.wav"
record_to_file(filename)
features = extract_feature(filename, mfcc=True, chroma=True, mel=True).reshape(1, -1)
result = model.predict(features)[0]
print("result:", result)
\ No newline at end of file
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from lib import load_data
import os
import pickle
X_train, X_test, y_train, y_test = load_data(test_size=0.25)
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)
print("Training the model...")
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
print("Accuracy: {:.2f}%".format(accuracy*100))
if not os.path.isdir("result"):
os.mkdir("result")
pickle.dump(model, open("result/mlp_classifier.model", "wb"))
\ No newline at end of file
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
vosk = "*"
pydub = "*"
transformers = "*"
torch = "*"
pyaudio = "*"
regex = "*"
ipywidgets = "*"
spacy = "*"
[dev-packages]
[requires]
python_version = "3.9"
This diff is collapsed.
Pretrained models:
English : https://alphacephei.com/vosk/models/vosk-model-en-us-0.22.zip or https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip
Punctuation : https://alphacephei.com/vosk/models/vosk-recasepunc-en-0.22.zip
from vosk import Model, KaldiRecognizer
from pydub import AudioSegment
from transformers import pipeline
import json
import subprocess
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from heapq import nlargest
FRAME_RATE = 16000
CHANNELS=1
def voice_recognition(filename):
model = Model(model_name="vosk-model-en-us-0.22")
rec = KaldiRecognizer(model, FRAME_RATE)
rec.SetWords(True)
mp3 = AudioSegment.from_mp3(filename)
mp3 = mp3.set_channels(CHANNELS)
mp3 = mp3.set_frame_rate(FRAME_RATE)
step = 45000
transcript = ""
for i in range(0, len(mp3), step):
print(f"Progress: {i/len(mp3)}")
segment = mp3[i:i+step]
rec.AcceptWaveform(segment.raw_data)
result = rec.Result()
text = json.loads(result)["text"]
transcript += text
cased = subprocess.check_output('python recasepunc/recasepunc.py predict recasepunc/checkpoint', shell=True, text=True, input=transcript)
return cased
def summarize(text, per):
nlp = spacy.load('en_core_web_sm')
doc= nlp(text)
tokens=[token.text for token in doc]
word_frequencies={}
for word in doc:
if word.text.lower() not in list(STOP_WORDS):
if word.text.lower() not in punctuation:
if word.text not in word_frequencies.keys():
word_frequencies[word.text] = 1
else:
word_frequencies[word.text] += 1
max_frequency=max(word_frequencies.values())
for word in word_frequencies.keys():
word_frequencies[word]=word_frequencies[word]/max_frequency
sentence_tokens= [sent for sent in doc.sents]
sentence_scores = {}
for sent in sentence_tokens:
for word in sent:
if word.text.lower() in word_frequencies.keys():
if sent not in sentence_scores.keys():
sentence_scores[sent]=word_frequencies[word.text.lower()]
else:
sentence_scores[sent]+=word_frequencies[word.text.lower()]
select_length=int(len(sentence_tokens)*per)
summary=nlargest(select_length, sentence_scores,key=sentence_scores.get)
final_summary=[word.text for word in summary]
summary=''.join(final_summary)
return summary
transcript = voice_recognition("sample_voice.mp3")
summary = summarize(transcript, 0.05)
print(summary)
\ No newline at end of file
Binary files a/voice_analyzer/Voice_recognizer/requirements.txt and /dev/null differ
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment