Commit 9d39d3a0 authored by H.M.C. Nadunithara Wijerathne's avatar H.M.C. Nadunithara Wijerathne

Merge branch 'it19953298' into 'master'

Voice verification

See merge request !4
parents 59c6a55f aceb2402
...@@ -73,6 +73,7 @@ export type CandidateType = { ...@@ -73,6 +73,7 @@ export type CandidateType = {
state: "INTIAL" | "READY"; state: "INTIAL" | "READY";
resume?: string; resume?: string;
resumeData?: ResumeDataType; resumeData?: ResumeDataType;
selfIntro?: string;
}; };
export type OrganizationType = { export type OrganizationType = {
...@@ -124,6 +125,7 @@ export type ApplicationType = { ...@@ -124,6 +125,7 @@ export type ApplicationType = {
time: string; time: string;
link: string; link: string;
videoRef?: string; videoRef?: string;
voiceVerification?: number;
}; };
score: { score: {
primary: number; primary: number;
...@@ -135,6 +137,12 @@ export type ApplicationType = { ...@@ -135,6 +137,12 @@ export type ApplicationType = {
}; };
}; };
export type AnalyseApplicationPayload = {
applicationId: string;
startTime: number;
endTime: number;
};
export interface TypedRequest<T extends Query, U> extends Request { export interface TypedRequest<T extends Query, U> extends Request {
body: U; body: U;
query: T; query: T;
......
...@@ -10,6 +10,7 @@ app.use(cors()); ...@@ -10,6 +10,7 @@ app.use(cors());
const authRoute = require("./routes/auth"); const authRoute = require("./routes/auth");
const userRoute = require("./routes/user"); const userRoute = require("./routes/user");
const jobsRoute = require("./routes/jobs"); const jobsRoute = require("./routes/jobs");
const applicationsRoute = require("./routes/application");
// Service Initialisation // Service Initialisation
mongoose.connect(MONGO_URL, { mongoose.connect(MONGO_URL, {
...@@ -29,5 +30,6 @@ app.use(express.urlencoded({ extended: false })); ...@@ -29,5 +30,6 @@ app.use(express.urlencoded({ extended: false }));
app.use("/auth", authRoute); app.use("/auth", authRoute);
app.use("/user", userRoute); app.use("/user", userRoute);
app.use("/jobs", jobsRoute); app.use("/jobs", jobsRoute);
app.use("/applications", applicationsRoute);
app.listen(API_PORT, () => console.log(`Listening on port ${API_PORT}`)); app.listen(API_PORT, () => console.log(`Listening on port ${API_PORT}`));
...@@ -11,6 +11,7 @@ const applicationSchema = new Schema<ApplicationType>({ ...@@ -11,6 +11,7 @@ const applicationSchema = new Schema<ApplicationType>({
time: String, time: String,
link: String, link: String,
videoRef: String, videoRef: String,
voiceVerification: Number,
}, },
require: false, require: false,
}, },
......
...@@ -42,6 +42,7 @@ const candidateSchema = new Schema<CandidateType>({ ...@@ -42,6 +42,7 @@ const candidateSchema = new Schema<CandidateType>({
state: { type: String, default: "INTIAL" }, state: { type: String, default: "INTIAL" },
resume: { type: String, require: false }, resume: { type: String, require: false },
resumeData: { type: ResumeDataSchema, require: false }, resumeData: { type: ResumeDataSchema, require: false },
selfIntro: { type: String, require: false },
}); });
const Candidates = model<CandidateType>("candidates", candidateSchema); const Candidates = model<CandidateType>("candidates", candidateSchema);
......
import { Router } from "express";
import {
ApplicationType,
TypedRequest,
AnalyseApplicationPayload,
} from "../config/types";
import { authMiddleware, candidateMiddleware } from "../middlewares/auth";
import Application from "../models/Application";
import Jobs from "../models/Job";
import ResumeAPI from "../utilities/apis/resume";
import VoiceAPI from "../utilities/apis/voice";
const router = Router();
router.post(
"/apply",
authMiddleware,
candidateMiddleware,
async (
req: TypedRequest<
{ userId: string },
{ application: ApplicationType; resumeUrl: string }
>,
res
) => {
try {
const { application, resumeUrl } = req.body;
const job = await Jobs.findById(application.job);
const data: any = await ResumeAPI.getResumeScores({
user_id: req.query.userId,
resume_url: resumeUrl,
primary_skills: job.primarySkills,
secondary_skills: job.secondarySkills,
job_desc: job.description,
});
const score: ApplicationType["score"] = {
primary: data.primary_score,
primatyMatch: data.primary_match,
secondary: data.secondary_score,
secondaryMatch: data.secondary_match,
similarity: data.similarity,
total: data.primary_score + data.secondary_score + data.similarity,
};
const newApplication = new Application({ ...application, score });
const _application = await newApplication.save();
job.applications.push(_application.id);
await job.save();
return res.json({
success: true,
applicationId: _application.id,
});
} catch (error) {
return res.json({ success: false, error });
}
}
);
router.put(
"/update",
authMiddleware,
async (
req: TypedRequest<
{},
{
applicationId: string;
update: Partial<ApplicationType>;
candidateId: string;
}
>,
res
) => {
let update = req.body.update;
if (update.interview?.videoRef) {
const score: any = await VoiceAPI.verifyVoice({
video_url: update.interview?.videoRef,
user_id: req.body.candidateId,
application_id: req.body.applicationId,
});
update.interview.voiceVerification = score;
}
Application.findByIdAndUpdate(req.body.applicationId, {
$set: update,
})
.then((_application) => {
res.json({
success: true,
application: { ..._application, ...update },
});
})
.catch((err) => res.send(err));
}
);
router.post(
"/analyse",
authMiddleware,
async (req: TypedRequest<{}, AnalyseApplicationPayload>, res) => {
const { applicationId, startTime, endTime } = req.body;
const data = await VoiceAPI.analyseVoice({
start: startTime,
end: endTime,
application_id: applicationId,
});
return res.json({ voice: data });
}
);
module.exports = router;
...@@ -45,6 +45,7 @@ router.get( ...@@ -45,6 +45,7 @@ router.get(
"profilePicture", "profilePicture",
"resume", "resume",
"resumeData", "resumeData",
"selfIntro",
], ],
}, },
}); });
...@@ -105,50 +106,4 @@ router.delete( ...@@ -105,50 +106,4 @@ router.delete(
} }
); );
router.put(
"/apply",
authMiddleware,
candidateMiddleware,
async (
req: TypedRequest<
{ userId: string },
{ application: ApplicationType; resumeUrl: string }
>,
res
) => {
try {
const { application, resumeUrl } = req.body;
const job = await Jobs.findById(application.job);
const data: any = await ResumeAPI.getResumeScores({
user_id: req.query.userId,
resume_url: resumeUrl,
primary_skills: job.primarySkills,
secondary_skills: job.secondarySkills,
job_desc: job.description,
});
const score: ApplicationType["score"] = {
primary: data.primary_score,
primatyMatch: data.primary_match,
secondary: data.secondary_score,
secondaryMatch: data.secondary_match,
similarity: data.similarity,
total: data.primary_score + data.secondary_score + data.similarity,
};
const newApplication = new Application({ ...application, score });
const _application = await newApplication.save();
job.applications.push(_application.id);
await job.save();
return res.json({
success: true,
applicationId: _application.id,
});
} catch (error) {
return res.json({ success: false, error });
}
}
);
module.exports = router; module.exports = router;
...@@ -3,6 +3,7 @@ import { CandidateType, TypedRequest } from "../config/types"; ...@@ -3,6 +3,7 @@ import { CandidateType, TypedRequest } from "../config/types";
import { authMiddleware } from "../middlewares/auth"; import { authMiddleware } from "../middlewares/auth";
import Candidates from "../models/Candidate"; import Candidates from "../models/Candidate";
import ResumeAPI from "../utilities/apis/resume"; import ResumeAPI from "../utilities/apis/resume";
import VoiceAPI from "../utilities/apis/voice";
const router = Router(); const router = Router();
...@@ -19,6 +20,12 @@ router.post( ...@@ -19,6 +20,12 @@ router.post(
}); });
update.resumeData = data; update.resumeData = data;
} }
if (req.body?.selfIntro) {
await VoiceAPI.enrollVoice({
user_id: req.query.userId,
video_url: req.body.selfIntro,
});
}
await Candidates.findByIdAndUpdate(req.query.userId, { $set: req.body }); await Candidates.findByIdAndUpdate(req.query.userId, { $set: req.body });
return res.status(200).json({ data: req.body }); return res.status(200).json({ data: req.body });
} catch (error) { } catch (error) {
......
import { ResumeDataType } from "../../config/types";
import { request } from "../requests"; import { request } from "../requests";
export default class ResumeAPI { export default class ResumeAPI {
......
import { request } from "../requests";
export default class VoiceAPI {
static enrollVoice = (payload: { video_url: string; user_id: string }) =>
request("<BASE_URL>/voice/enroll", "POST", payload);
static verifyVoice = (payload: {
video_url: string;
user_id: string;
application_id: string;
}) => request("<BASE_URL>/voice/verify", "POST", payload);
static analyseVoice = (payload: {
start: number;
end: number;
application_id: string;
}) => request("<BASE_URL>/voice/analyse", "POST", payload);
}
__pycache__ __pycache__
**/__pycache__ **/__pycache__
\ No newline at end of file voices/auth/embed/**
voices/auth/temp/**
voices/interviews/**
resumes
models/**
data/**
\ No newline at end of file
filename,name,mobile_number,email,company_names,college_name,experience,skills,experience_age,degree,words,primary_score,primary_match,secondary_score,secondary_match,no_of_pages,document_similarity,document_score,Score
resumes/Dhaval_Thakkar_Resume.pdf,Dhaval Thakkar,9191729595,thakkar.dhaval.haresh@gmail.com,['UNIFYND TECHNOLOGIES PVT. LTD'],None,"['UNIFYND TECHNOLOGIES PVT. LTD. | Data Scientist', 'Mumbai, MH, India | June 2018 – Present', '• Led the development of a Templatized OCR Engine with GUI to onboard 2000+ retailers from different malls. The', 'microservice deployed is currently operating at an accuracy of 81%', '• Built a Customer Segmentation model to target customers with relevant coupons, rewards, and content resulting', 'in a 3x increase in revenue and 2x increase in coupon utilization', '• Built a Dynamic Coupon Pricing Engine for malls that led to a 5x increase in coupon consumption on the coupon', 'marketplace', '• Built a Pricing Engine and Customer Segmentation Model for a logistics company which saw a 32% reduction in', 'Customer Attrition and a 12% increase in Repeat Purchase Rate', '• Developed an Automated End to End Reporting system to track KPIs performance for 10 malls that saves 60', 'hours of manual labour each month', 'UNIFYND TECHNOLOGIES PVT. LTD. | Intern Data Scientist Mumbai, MH, India | Sept 2017 - June 2018', '• Built a Smart Cryptocurrency trading platform which used social data and historical prices to optimize current', 'portfolio. Boosted overall profit from the portfolio by 30%', '• Worked with Product and Marketing teams to identify the power users of an app which resulted in 43% increase in', 'activity and a 65% increase in revenue from these users', 'ZIFF, INC | Deep Learning Intern', 'Provo, UT, USA | May 2017 – Aug 2017', '• Demonstrated competency in Hyperparameter Optimization, Image Augmentation and Learning Rate decay', 'strategies using the Keras Library', '• Deployed a Multi-Class Image classifier microservice written on Flask as a container on AWS EC2 using Docker']","['Html', 'Data analytics', 'Marketing', 'Segmentation', 'Content', 'Algorithms', 'Numpy', 'Pandas', 'Github', 'R', 'Logistics', 'Css', 'Operating systems', 'Testing', 'Flask', 'Mysql', 'Scrapy', 'Machine learning', 'Security', 'Keras', 'Python', 'Kpis', 'System', 'Docker', 'Reporting', 'Analytics', 'Aws', 'Engineering', 'Anaconda', 'Networking', 'Sql']",5.75,['Bachelor of Engineering'],350,44,"['ocr', 'aws', 'python', 'gcp']",42,"['data', 'ocr', 'science']",1,32,50.0,168.0
resumes/python-developer-resume-2.pdf,Python Developer,456-7890,ggonzalez@email.com,None,None,"['Python Developer Intern', 'Knewton', 'April 2016 - April 2017', '· Worked alongside another developer to implement RESTful APIs', 'Chicago, IL', 'in Django that enabled internal analytics team to increase', 'reporting speed by 24%', '· Using Selenium, built out a unit testing infrastructure for a client', 'web application that reduced the number of bugs reported by', 'the client by 11% month over month']","['Django', 'Math', 'Oracle', 'Requests', 'Github', 'Api', 'Database', 'Css', 'Design', 'Postgresql', 'Testing', 'Agile', 'Apis', 'Selenium', 'Rest', 'Python', 'Writing', 'System', 'Updates', 'Javascript', 'Reporting', 'Analytics', 'Aws', 'Sql', 'Process']",1.0,"['B.S.', 'M.S.']",223,22,"['python', 'aws']",28,"['science', 'data']",1,20,50.0,120.0
resumes/software-engineer-resume-1.pdf,New York,456-7890,cmcturland@email.com,None,None,"['Software Engineer', 'Embark', 'January 2015 - current / New York, NY', 'Worked with product managers to re-architect a multi-page web', 'app into a single page web-app, boosting yearly revenue by $1.4M', 'Constructed the logic for a streamlined ad-serving platform that', 'scaled to our 35M users, which improved the page speed by 15%', 'after implementation', 'Tested software for bugs and operating speed, fixing bugs and', 'documenting processes to increase efficiency by 18%', 'Iterated platform for college admissions, collaborating with a group', 'of 4 engineers to create features across the software', 'Software Engineer', 'MarketSmart', 'April 2012 - January 2015 / Washington, DC', 'Built RESTful APIs that served data to the JavaScript front-end', 'based on dynamically chosen user inputs that handled over 500,000', 'concurrent users', 'Built internal tool using NodeJS and Pupeteer.js to automate QA and', 'monitoring of donor-facing web app, which improved CTR by 3%', 'Reviewed code and conducted testing for 3 additional features on', 'donor-facing web app that increased contributions by 12%', 'Software Engineer Intern', 'Marketing Science Company', 'April 2011 - March 2012 / Pittsburgh, PA', 'Partnered with a developer to implement RESTful APIs in Django,', 'enabling analytics team to increase reporting speed by 24%', 'Using Selenium I built out a unit testing infrastructure for a client', 'application that reduced the number of bugs reported by the client', 'by 11% month over month']","['Django', 'Marketing', 'Unix', 'Nosql', 'R', 'Css', 'Postgresql', 'Testing', 'Mysql', 'Sci', 'Apis', 'Selenium', 'Admissions', 'Python', 'Html5', 'Javascript', 'Reporting', 'Analytics', 'C', 'Sql', 'Aws']",3.67,['B.S.'],233,22,"['python', 'aws']",28,"['science', 'data']",1,10,50.0,110.0
resumes/Santhosh_Narayanan.pdf,SANTHOSH NARAYANAN,417-6755,santhosn@usc.edu,None,None,"['on an EC2 server supported by S3 and RDS.', '\uf0a7 Maintained AWS infrastructure for institute’s annual technical festival website, by hosting the website', 'on an EC2 Ubuntu server.', 'K J Somaiya Inst. of Engg. & I.T – Penetration tester', 'December 2016 – January 2016', '\uf0a7 Conducted penetration testing for institute’s online admission and examination portal.', '\uf0a7 Performed authentication checks, access control checks, per screen checks (XSS, SQL injection.).', '\uf0a7 Delivered error free application, incorporating patches for the respective bugs using ASP.NET']","['Html', 'Jupyter', 'Access', 'Numpy', 'Php', 'Matplotlib', 'Oracle', 'Pandas', 'Computer science', 'Css', 'Purchasing', 'Schedule', 'Scheduling', 'Flask', 'Testing', 'Lan', 'Mysql', 'Scrapy', 'Security', 'Programming', 'Website', 'Keras', 'Python', 'System', 'Wordpress', 'Spyder', 'Technical', 'Ubuntu', 'Javascript', 'Java', 'Aws', 'Engineering', 'Sql', 'Certification']",,None,367,22,"['python', 'aws']",14,['science'],1,7,50.0,93.0
About the job
Borneo.io is building the next-generation ML Powered data privacy platform for hyper-growth companies. The Data Scientist role is at the core of Borneo's engineering. You will be building models, manipulating big data, and working with APIs essential to the Borneo product.
We are growing fast and expanding our data science family with outstanding minds and diverse personalities.
As a Data Scientist at Borneo, you'll have the opportunity to:
Work with some of the largest data sets used by some of the leading global technology companies
Help build a predictive product and inform features at the ground level.
Lead the way in leveraging unstructured data for predictive modeling, anomaly detection, and drive privacy compliance.
Responsibilities:
Identify, automate data collection processes
Dive into complex data sets to analyze trends and identify opportunities for improvement.
Build predictive models and machine-learning algorithms
Present information using data visualization techniques
Propose solutions and strategies to business challenges
Have a data-driven decision making approach
Requirements:
5-8 years of relevant experience, B2B startup experience preferred
Proven experience as a Data Scientist or Data Analyst
Experience in building ML models and deploying them to production
A solid understanding of data science fundamentals, statistical techniques, NLP algorithms
Understand research papers and create quick proof of concept relevant to the product
Expert in implementing quick prototypes that shows business value
Experience with programming languages such as NodeJs /Python/JavaScript: Cloud technologies: AWS/GCP/K8 etc.
### Create conda python 3.9 env ### Create conda python 3.9 env
conda create -n server python=3.9 conda create -n server tensorflow python=3.9
### PIP Packages ### PIP Packages
gensim==3.8.1 gensim==3.8.1<br/>
texthero==1.1.0 texthero==1.1.0
pyresparser
- install gfortran : https://fortran-lang.org/learn/os_setup/install_gfortran
- download and install correct scipy wheel https://pypi.org/project/scipy/#files : pip install <filename>.whl
- conda install scipy=1.9.3
pyresparser<br/>
- conda install -c conda-forge importlib_metadata
soundfile
pip install librosa==0.9.2
### CONDA Packages ### CONDA Packages
tqdm tqdm
pdf2image
pandas pandas
pytesseract pytesseract
"uvicorn[standard]" "uvicorn[standard]"
fastapi fastapi
moviepy
conda install -c blaze sqlite3
### Run server ### Run server
uvicorn main:app --reload uvicorn main:app --reload
### Datasets & models
voice: https://drive.google.com/file/d/1wWsrN2Ep7x6lWqOXfr4rpKGYrJhWc8z7/view
models: https://drive.google.com/file/d/1TWjIiyyInXHaXySKymM6VcsIR5YMVQ1V/view?usp=share_link
conda list --export > conda-requirements.txt
pip freeze > pip-requirements.txt
...@@ -10,4 +10,18 @@ class ResumeScores(BaseModel): ...@@ -10,4 +10,18 @@ class ResumeScores(BaseModel):
user_id:str user_id:str
primary_skills:List[str] = [] primary_skills:List[str] = []
secondary_skills:List[str] = [] secondary_skills:List[str] = []
job_desc:str job_desc:str
\ No newline at end of file
class EnrollVoice(BaseModel):
video_url: str
user_id:str
class VerifyVoice(BaseModel):
video_url: str
application_id:str
user_id:str
class AnalyseVoice(BaseModel):
start: int
end:int
application_id:str
\ No newline at end of file
# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: win-64
absl-py=1.4.0=pypi_0
aiohttp=3.8.4=pypi_0
aiosignal=1.3.1=pypi_0
alembic=1.10.2=pypi_0
antlr4-python3-runtime=4.9.3=pypi_0
anyio=3.6.2=pyhd8ed1ab_0
aom=3.5.0=h63175ca_0
asteroid-filterbanks=0.4.0=pypi_0
astunparse=1.6.3=pypi_0
async-timeout=4.0.2=pypi_0
attrs=22.2.0=pypi_0
audioread=3.0.0=pypi_0
backports-cached-property=1.0.2=pypi_0
blis=0.7.9=pypi_0
brotli=1.0.9=hcfcfb64_8
brotli-bin=1.0.9=hcfcfb64_8
brotlipy=0.7.0=py39ha55989b_1005
bzip2=1.0.8=h8ffe710_4
ca-certificates=2022.12.7=h5b45459_0
cachetools=5.3.0=pypi_0
catalogue=1.0.2=pypi_0
certifi=2022.12.7=pyhd8ed1ab_0
cffi=1.15.1=py39h68f70e3_3
chardet=5.1.0=pypi_0
charset-normalizer=3.1.0=pypi_0
click=8.1.3=win_pyhd8ed1ab_2
cmaes=0.9.1=pypi_0
colorama=0.4.6=pyhd8ed1ab_0
colorlog=6.7.0=pypi_0
commonmark=0.9.1=pypi_0
contourpy=1.0.7=py39h1f6ef14_0
cryptography=40.0.1=py39hb6bd5e6_0
cycler=0.11.0=pyhd8ed1ab_0
cymem=2.0.7=pypi_0
decorator=5.1.1=pyhd8ed1ab_0
docopt=0.6.2=pypi_0
docx2txt=0.8=pypi_0
einops=0.3.2=pypi_0
en-core-web-sm=2.3.1=pypi_0
expat=2.5.0=h1537add_0
fastapi=0.95.0=pyhd8ed1ab_0
ffmpeg=5.1.2=gpl_h5b1d025_106
filelock=3.10.7=pypi_0
flatbuffers=23.3.3=pypi_0
font-ttf-dejavu-sans-mono=2.37=hab24e00_0
font-ttf-inconsolata=3.000=h77eed37_0
font-ttf-source-code-pro=2.038=h77eed37_0
font-ttf-ubuntu=0.83=hab24e00_0
fontconfig=2.14.2=hbde0cde_0
fonts-conda-ecosystem=1=0
fonts-conda-forge=1=0
fonttools=4.39.3=py39ha55989b_0
freetype=2.12.1=h546665d_1
frozenlist=1.3.3=pypi_0
fsspec=2023.3.0=pypi_0
future=0.18.3=pyhd8ed1ab_0
gast=0.4.0=pypi_0
gensim=3.8.1=pypi_0
gettext=0.21.1=h5728263_0
glib=2.74.1=h12be248_1
glib-tools=2.74.1=h12be248_1
google-auth=2.17.0=pypi_0
google-auth-oauthlib=0.4.6=pypi_0
google-pasta=0.2.0=pypi_0
greenlet=2.0.2=pypi_0
grpcio=1.53.0=pypi_0
gst-plugins-base=1.22.0=h001b923_2
gstreamer=1.22.0=h6b5321d_2
h11=0.14.0=pyhd8ed1ab_0
h5py=3.8.0=pypi_0
hmmlearn=0.2.8=pypi_0
huggingface-hub=0.13.3=pypi_0
hyperpyyaml=1.1.0=pypi_0
icu=70.1=h0e60522_0
idna=3.4=pyhd8ed1ab_0
imageio=2.27.0=pyh24c5eb1_0
imageio-ffmpeg=0.4.8=pyhd8ed1ab_0
importlib-metadata=6.1.0=pyha770c72_0
importlib-resources=5.12.0=pyhd8ed1ab_0
importlib_metadata=6.1.0=hd8ed1ab_0
importlib_resources=5.12.0=pyhd8ed1ab_0
intel-openmp=2023.0.0=h57928b3_25922
joblib=1.2.0=pyhd8ed1ab_0
jsonschema=4.17.3=pypi_0
julius=0.2.7=pypi_0
keras=2.10.0=pypi_0
keras-preprocessing=1.1.2=pypi_0
kiwisolver=1.4.4=py39h1f6ef14_1
krb5=1.20.1=heb0366b_0
lcms2=2.15=h3e3b177_1
lerc=4.0.0=h63175ca_0
libblas=3.9.0=16_win64_mkl
libbrotlicommon=1.0.9=hcfcfb64_8
libbrotlidec=1.0.9=hcfcfb64_8
libbrotlienc=1.0.9=hcfcfb64_8
libcblas=3.9.0=16_win64_mkl
libclang=16.0.0=pypi_0
libclang13=15.0.7=default_h77d9078_1
libdeflate=1.18=hcfcfb64_0
libffi=3.4.2=h8ffe710_5
libglib=2.74.1=he8f3873_1
libhwloc=2.9.0=h51c2c0f_0
libiconv=1.17=h8ffe710_0
libjpeg-turbo=2.1.5.1=hcfcfb64_0
liblapack=3.9.0=16_win64_mkl
libogg=1.3.4=h8ffe710_1
libopus=1.3.1=h8ffe710_1
libpng=1.6.39=h19919ed_0
librosa=0.9.2=pypi_0
libsqlite=3.40.0=hcfcfb64_0
libtiff=4.5.0=h6c8260b_6
libvorbis=1.3.7=h0e60522_0
libwebp-base=1.3.0=hcfcfb64_0
libxcb=1.13=hcd874cb_1004
libxml2=2.10.3=hc3477c8_6
libzlib=1.2.13=hcfcfb64_4
llvmlite=0.39.1=pypi_0
m2w64-gcc-libgfortran=5.3.0=6
m2w64-gcc-libs=5.3.0=7
m2w64-gcc-libs-core=5.3.0=7
m2w64-gmp=6.1.0=2
m2w64-libwinpthread-git=5.0.0.4634.697f757=2
mako=1.2.4=pypi_0
markdown=3.4.3=pypi_0
markupsafe=2.1.2=pypi_0
matplotlib=3.7.1=py39hcbf5309_0
matplotlib-base=3.7.1=py39haf65ace_0
mkl=2022.1.0=h6a75c08_874
moviepy=1.0.3=pyhd8ed1ab_1
mpmath=1.3.0=pypi_0
msys2-conda-epoch=20160418=1
multidict=6.0.4=pypi_0
munkres=1.1.4=pyh9f0ad1d_0
murmurhash=1.0.9=pypi_0
networkx=2.8.8=pypi_0
nltk=3.8.1=pypi_0
numba=0.56.4=pypi_0
numpy=1.23.5=pypi_0
oauthlib=3.2.2=pypi_0
olefile=0.46=pypi_0
omegaconf=2.3.0=pypi_0
openh264=2.3.1=h63175ca_2
openjpeg=2.5.0=ha2aaf27_2
openssl=3.1.0=hcfcfb64_0
opt-einsum=3.3.0=pypi_0
optuna=3.1.0=pypi_0
packaging=23.0=pyhd8ed1ab_0
pandas=1.5.3=py39h2ba5b7c_0
pcre2=10.40=h17e33f8_0
pdfminer=20191125=pyhd8ed1ab_1
pdfminer-six=20221105=pypi_0
pillow=9.4.0=py39haa1d754_2
pip=23.0.1=pyhd8ed1ab_0
plac=1.1.3=pypi_0
platformdirs=3.2.0=pypi_0
plotly=5.13.1=pypi_0
ply=3.11=py_1
pooch=1.7.0=pypi_0
preprocess=1.2.3=py_1
preshed=3.0.8=pypi_0
primepy=1.3=pypi_0
proglog=0.1.9=py_0
protobuf=3.19.6=pypi_0
pthread-stubs=0.4=hcd874cb_1001
pthreads-win32=2.9.1=hfa6e2cd_3
pyannote-audio=2.1.1=pypi_0
pyannote-core=4.5=pypi_0
pyannote-database=4.1.3=pypi_0
pyannote-metrics=3.2.1=pypi_0
pyannote-pipeline=2.3=pypi_0
pyasn1=0.4.8=pypi_0
pyasn1-modules=0.2.8=pypi_0
pyaudio=0.2.13=pypi_0
pycparser=2.21=pyhd8ed1ab_0
pycryptodome=3.17=pypi_0
pydantic=1.10.7=py39ha55989b_0
pydeprecate=0.3.2=pypi_0
pygments=2.14.0=pypi_0
pyopenssl=23.1.1=pyhd8ed1ab_0
pyparsing=3.0.9=pyhd8ed1ab_0
pyqt=5.15.7=py39hb77abff_3
pyqt5-sip=12.11.0=py39h99910a6_3
pyresparser=1.0.6=pypi_0
pyrsistent=0.19.3=pypi_0
pysocks=1.7.1=pyh0701188_6
python=3.9.16=h4de0772_0_cpython
python-dateutil=2.8.2=pyhd8ed1ab_0
python-speech-features=0.6=pypi_0
python_abi=3.9=3_cp39
pytorch-lightning=1.6.5=pypi_0
pytorch-metric-learning=1.7.3=pypi_0
pytz=2023.3=pyhd8ed1ab_0
pyyaml=6.0=pypi_0
qt-main=5.15.8=h88fe7eb_7
regex=2023.3.23=pypi_0
requests=2.28.2=pyhd8ed1ab_0
requests-oauthlib=1.3.1=pypi_0
resampy=0.4.2=pypi_0
rich=12.6.0=pypi_0
rsa=4.9=pypi_0
ruamel-yaml=0.17.21=pypi_0
ruamel-yaml-clib=0.2.7=pypi_0
scikit-learn=1.2.0=py39hd77b12b_1
scipy=1.9.3=py39hfbf2dce_2
semver=2.13.0=pypi_0
sentencepiece=0.1.97=pypi_0
setuptools=67.6.1=pyhd8ed1ab_0
shellingham=1.5.0.post1=pypi_0
simplejson=3.18.4=pypi_0
singledispatchmethod=1.0=pypi_0
sip=6.7.7=py39h99910a6_0
six=1.16.0=pyh6c4a22f_0
smart-open=6.3.0=pypi_0
sniffio=1.3.0=pyhd8ed1ab_0
sortedcontainers=2.4.0=pypi_0
soundfile=0.10.3.post1=pypi_0
spacy=2.3.9=pypi_0
speechbrain=0.5.14=pypi_0
sqlalchemy=2.0.7=pypi_0
sqlite3=3.8.6=0
srsly=1.0.6=pypi_0
starlette=0.26.1=pyhd8ed1ab_0
svt-av1=1.4.1=h63175ca_0
sympy=1.11.1=pypi_0
tabulate=0.9.0=pypi_0
tbb=2021.8.0=h91493d7_0
tenacity=8.2.2=pypi_0
tensorboard=2.10.1=pypi_0
tensorboard-data-server=0.6.1=pypi_0
tensorboard-plugin-wit=1.8.1=pypi_0
tensorflow=2.10.1=pypi_0
tensorflow-estimator=2.10.0=pypi_0
tensorflow-io-gcs-filesystem=0.31.0=pypi_0
termcolor=2.2.0=pypi_0
texthero=1.1.0=pypi_0
thinc=7.4.6=pypi_0
threadpoolctl=3.1.0=pyh8a188c0_0
tk=8.6.12=h8ffe710_0
toml=0.10.2=pyhd8ed1ab_0
torch=1.13.1=pypi_0
torch-audiomentations=0.11.0=pypi_0
torch-pitch-shift=1.2.2=pypi_0
torchaudio=0.13.1=pypi_0
torchmetrics=0.11.4=pypi_0
tornado=6.2=py39ha55989b_1
tqdm=4.65.0=pyhd8ed1ab_1
typer=0.7.0=pypi_0
typing-extensions=4.5.0=hd8ed1ab_0
typing_extensions=4.5.0=pyha770c72_0
tzdata=2023c=h71feb2d_0
ucrt=10.0.22621.0=h57928b3_0
unicodedata2=15.0.0=py39ha55989b_0
unidecode=1.3.6=pypi_0
urllib3=1.26.15=pyhd8ed1ab_0
uvicorn=0.21.1=py39hcbf5309_0
vc=14.3=hb6edc58_10
vs2015_runtime=14.34.31931=h4c5c07a_10
wasabi=0.10.1=pypi_0
werkzeug=2.2.3=pypi_0
wheel=0.40.0=pyhd8ed1ab_0
win_inet_pton=1.1.0=pyhd8ed1ab_6
wordcloud=1.8.2.2=pypi_0
wrapt=1.15.0=pypi_0
x264=1!164.3095=h8ffe710_2
x265=3.5=h2d74725_3
xorg-libxau=1.0.9=hcd874cb_0
xorg-libxdmcp=1.1.3=hcd874cb_0
xz=5.2.6=h8d14728_0
yarl=1.8.2=pypi_0
zipp=3.15.0=pyhd8ed1ab_0
zstd=1.5.2=h12be248_6
from fastapi import FastAPI from fastapi import FastAPI
import routes.resume as resumes import routes.resume as resumes
import routes.voice as voice
app = FastAPI() app = FastAPI()
app.include_router(resumes.router) app.include_router(resumes.router)
app.include_router(voice.router)
@app.get("/") @app.get("/")
def read_root(): def read_root():
......
absl-py==1.4.0
aiohttp==3.8.4
aiosignal==1.3.1
alembic==1.10.2
antlr4-python3-runtime==4.9.3
anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1666191106763/work/dist
asteroid-filterbanks==0.4.0
astunparse==1.6.3
async-timeout==4.0.2
attrs==22.2.0
audioread==3.0.0
backports.cached-property==1.0.2
blis==0.7.9
brotlipy @ file:///D:/bld/brotlipy_1666764815687/work
cachetools==5.3.0
catalogue==1.0.2
certifi==2022.12.7
cffi @ file:///D:/bld/cffi_1671179514672/work
chardet==5.1.0
charset-normalizer==3.1.0
click @ file:///D:/bld/click_1666798499870/work
cmaes==0.9.1
colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work
colorlog==6.7.0
commonmark==0.9.1
contourpy @ file:///D:/bld/contourpy_1673633852898/work
cryptography @ file:///D:/bld/cryptography-split_1679811407000/work
cycler @ file:///home/conda/feedstock_root/build_artifacts/cycler_1635519461629/work
cymem==2.0.7
decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work
docopt==0.6.2
docx2txt==0.8
einops==0.3.2
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
fastapi @ file:///home/conda/feedstock_root/build_artifacts/fastapi_1679196090342/work
filelock==3.10.7
flatbuffers==23.3.3
fonttools @ file:///D:/bld/fonttools_1680021390608/work
frozenlist==1.3.3
fsspec==2023.3.0
future @ file:///home/conda/feedstock_root/build_artifacts/future_1673596611778/work
gast==0.4.0
gensim==3.8.1
google-auth==2.17.0
google-auth-oauthlib==0.4.6
google-pasta==0.2.0
greenlet==2.0.2
grpcio==1.53.0
h11 @ file:///home/conda/feedstock_root/build_artifacts/h11_1664132893548/work
h5py==3.8.0
hmmlearn==0.2.8
huggingface-hub==0.13.3
HyperPyYAML==1.1.0
idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1663625384323/work
imageio @ file:///home/conda/feedstock_root/build_artifacts/imageio_1679914882579/work
imageio-ffmpeg @ file:///home/conda/feedstock_root/build_artifacts/imageio-ffmpeg_1673483481485/work
importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1679167925176/work
importlib-resources @ file:///home/conda/feedstock_root/build_artifacts/importlib_resources_1676919000169/work
joblib @ file:///home/conda/feedstock_root/build_artifacts/joblib_1663332044897/work
jsonschema==4.17.3
julius==0.2.7
keras==2.10.0
Keras-Preprocessing==1.1.2
kiwisolver @ file:///D:/bld/kiwisolver_1666805897768/work
libclang==16.0.0
librosa==0.9.2
llvmlite==0.39.1
Mako==1.2.4
Markdown==3.4.3
MarkupSafe==2.1.2
matplotlib @ file:///D:/bld/matplotlib-suite_1678135799522/work
moviepy @ file:///home/conda/feedstock_root/build_artifacts/moviepy_1665160419595/work
mpmath==1.3.0
multidict==6.0.4
munkres==1.1.4
murmurhash==1.0.9
networkx==2.8.8
nltk==3.8.1
numba==0.56.4
numpy==1.23.5
oauthlib==3.2.2
olefile==0.46
omegaconf==2.3.0
opt-einsum==3.3.0
optuna==3.1.0
packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1673482170163/work
pandas @ file:///D:/bld/pandas_1674136542219/work
pdfminer @ file:///home/conda/feedstock_root/build_artifacts/pdfminer_1613401440402/work
pdfminer.six==20221105
Pillow @ file:///D:/bld/pillow_1678273632076/work
plac==1.1.3
platformdirs==3.2.0
plotly==5.13.1
ply==3.11
pooch==1.7.0
preprocess==1.2.3
preshed==3.0.8
primePy==1.3
proglog==0.1.9
protobuf==3.19.6
pyannote.audio==2.1.1
pyannote.core==4.5
pyannote.database==4.1.3
pyannote.metrics==3.2.1
pyannote.pipeline==2.3
pyasn1==0.4.8
pyasn1-modules==0.2.8
PyAudio==0.2.13
pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1636257122734/work
pycryptodome==3.17
pydantic @ file:///D:/bld/pydantic_1679565539355/work
pyDeprecate==0.3.2
Pygments==2.14.0
pyOpenSSL @ file:///home/conda/feedstock_root/build_artifacts/pyopenssl_1680037383858/work
pyparsing @ file:///home/conda/feedstock_root/build_artifacts/pyparsing_1652235407899/work
PyQt5==5.15.7
PyQt5-sip @ file:///D:/bld/pyqt-split_1674666735227/work/pyqt_sip
pyresparser==1.0.6
pyrsistent==0.19.3
PySocks @ file:///D:/bld/pysocks_1661604991356/work
python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
python-speech-features==0.6
pytorch-lightning==1.6.5
pytorch-metric-learning==1.7.3
pytz @ file:///home/conda/feedstock_root/build_artifacts/pytz_1680088766131/work
PyYAML==6.0
regex==2023.3.23
requests @ file:///home/conda/feedstock_root/build_artifacts/requests_1673863902341/work
requests-oauthlib==1.3.1
resampy==0.4.2
rich==12.6.0
rsa==4.9
ruamel.yaml==0.17.21
ruamel.yaml.clib==0.2.7
scikit-learn @ file:///C:/b/abs_e01rh8f1vi/croot/scikit-learn_1675454931501/work
scipy==1.9.3
semver==2.13.0
sentencepiece==0.1.97
shellingham==1.5.0.post1
simplejson==3.18.4
singledispatchmethod==1.0
sip @ file:///D:/bld/sip_1675696791179/work
six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work
smart-open==6.3.0
sniffio @ file:///home/conda/feedstock_root/build_artifacts/sniffio_1662051266223/work
sortedcontainers==2.4.0
SoundFile==0.10.3.post1
spacy==2.3.9
speechbrain==0.5.14
SQLAlchemy==2.0.7
srsly==1.0.6
starlette @ file:///home/conda/feedstock_root/build_artifacts/starlette-recipe_1678817698143/work
sympy==1.11.1
tabulate==0.9.0
tenacity==8.2.2
tensorboard==2.10.1
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.1
tensorflow==2.10.1
tensorflow-estimator==2.10.0
tensorflow-io-gcs-filesystem==0.31.0
termcolor==2.2.0
texthero==1.1.0
thinc==7.4.6
threadpoolctl @ file:///home/conda/feedstock_root/build_artifacts/threadpoolctl_1643647933166/work
toml @ file:///home/conda/feedstock_root/build_artifacts/toml_1604308577558/work
torch==1.13.1
torch-audiomentations==0.11.0
torch-pitch-shift==1.2.2
torchaudio==0.13.1
torchmetrics==0.11.4
tornado @ file:///D:/bld/tornado_1666788767305/work
tqdm @ file:///home/conda/feedstock_root/build_artifacts/tqdm_1677948868469/work
typer==0.7.0
typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1678559861143/work
unicodedata2 @ file:///D:/bld/unicodedata2_1667240049903/work
Unidecode==1.3.6
urllib3 @ file:///home/conda/feedstock_root/build_artifacts/urllib3_1678635778344/work
uvicorn @ file:///D:/bld/uvicorn-split_1678984112139/work
wasabi==0.10.1
Werkzeug==2.2.3
win-inet-pton @ file:///D:/bld/win_inet_pton_1667051142467/work
wordcloud==1.8.2.2
wrapt==1.15.0
yarl==1.8.2
zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1677313463193/work
tqdm==4.61.2
pdf2image==1.16.0
pandas==1.3.0
pytesseract==0.3.8
from fastapi import APIRouter
import moviepy.editor
import pickle
import os
import urllib
import numpy as np
from baseModels.payloads import EnrollVoice, AnalyseVoice, VerifyVoice
from scipy.spatial.distance import euclidean
from scripts.processing import extract_input_feature
from keras.models import load_model
from scripts.parameters import MODEL_FILE, MAX_SEC
from scripts.voice_feature_extraction import get_embedding
router = APIRouter(prefix='/voice')
@router.post("/enroll")
def enroll(payload:EnrollVoice):
video_filename = 'voices/'+payload.user_id+'.mp4'
urllib.request.urlretrieve(payload.video_url, video_filename)
# Download video and save audio
video = moviepy.editor.VideoFileClip(video_filename)
audio = video.audio
audio_filename = 'voices/auth/temp'+payload.user_id+'.wav'
audio.write_audiofile(audio_filename, codec='pcm_s16le', bitrate='50k')
# Extract and entrol audio features
model = load_model(MODEL_FILE)
enroll_result = get_embedding(model, audio_filename, MAX_SEC)
enroll_embs = np.array(enroll_result.tolist())
np.save("voices/auth/embed/"+ payload.user_id+".npy", enroll_embs)
try:
os.remove(video_filename)
except:
print('error')
try:
os.remove(audio_filename)
except:
print('error')
return 'SUCCESS'
@router.post("/verify")
def verify(payload:VerifyVoice):
video_filename = 'voices/interviews/'+payload.application_id+'.mp4'
urllib.request.urlretrieve(payload.video_url, video_filename)
# Download video and save audio
video = moviepy.editor.VideoFileClip(video_filename)
audio = video.audio
audio_filename = 'voices/interviews/'+payload.application_id+'.wav'
audio.write_audiofile(audio_filename, codec='pcm_s16le', bitrate='50k')
model = load_model(MODEL_FILE)
test_result = get_embedding(model, audio_filename, MAX_SEC)
test_embs = np.array(test_result.tolist())
enroll_embs = np.load("voices/auth/embed/"+ payload.user_id+".npy")
distance = euclidean(test_embs, enroll_embs)
try:
os.remove(video_filename)
except:
print('error')
return round(1-distance, 5)
@router.post("/analyse")
def analys(payload:AnalyseVoice):
model = pickle.load(open("models/voice_classifier.model", "rb"))
filename = 'voices/interviews/'+payload.application_id+'.wav'
features = extract_input_feature(filename, mfcc=True, chroma=True, mel=True, start=float(payload.start), end=float(payload.end)).reshape(1, -1)
result = model.predict(features)[0]
return result
\ No newline at end of file
from pyaudio import paInt16
# Signal processing
SAMPLE_RATE = 16000
PREEMPHASIS_ALPHA = 0.97
FRAME_LEN = 0.025
FRAME_STEP = 0.01
NUM_FFT = 512
BUCKET_STEP = 1
MAX_SEC = 10
# Model
MODEL_FILE = "models/voice_auth_model_cnn"
COST_METRIC = "cosine" # euclidean or cosine
INPUT_SHAPE=(NUM_FFT,None,1)
# IO
EMBED_LIST_FILE = "voices/auth/embed"
# Recognition
THRESHOLD = 0.2
\ No newline at end of file
import pandas as pd import pandas as pd
import texthero as hero import texthero as hero
import numpy as np
import librosa
from pyresparser.utils import extract_text from pyresparser.utils import extract_text
from PIL import Image
from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity from sklearn.metrics.pairwise import cosine_similarity
# skills = { from pyannote.audio import Audio
# "primary" : ['Python', 'Machine Learning', 'node.js', 'AWS', 'Kubernetese', 'NLP', 'GCP', 'predective', 'OCR'], from pyannote.core import Segment
# "secondary" : ['data', 'science', 'modeling', 'anomaly', 'privacy', 'visualization', 'OCR'],
# }
class document_processing: class document_processing:
...@@ -119,3 +118,39 @@ class document_processing: ...@@ -119,3 +118,39 @@ class document_processing:
'secondary_score': sec_score, 'secondary_score': sec_score,
'secondary_match': sec_match, 'secondary_match': sec_match,
'similarity': int(doc_sim)} 'similarity': int(doc_sim)}
def extract_input_feature(file_name, **kwargs):
mfcc = kwargs.get("mfcc")
chroma = kwargs.get("chroma")
mel = kwargs.get("mel")
contrast = kwargs.get("contrast")
tonnetz = kwargs.get("tonnetz")
start = kwargs.get("start")
end = kwargs.get("end")
sample_rate = 16000
audio = Audio(sample_rate=sample_rate, mono=True)
segment = Segment(start, end)
sound, sample_rate = audio.crop(file_name, segment)
X = sound[0].numpy()
if chroma or contrast:
stft = np.abs(librosa.stft(X))
result = np.array([])
if mfcc:
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
result = np.hstack((result, mfccs))
if chroma:
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
result = np.hstack((result, chroma))
if mel:
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
result = np.hstack((result, mel))
if contrast:
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
result = np.hstack((result, contrast))
if tonnetz:
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
result = np.hstack((result, tonnetz))
return result
import glob
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import librosa
import soundfile
import pickle
EMOTIONS = {
"01": "neutral",
"02": "calm",
"03": "happy",
"04": "sad",
"05": "angry",
"06": "fearful",
"07": "disgust",
"08": "surprised"
}
AVAILABLE_EMOTIONS = {
"angry",
"sad",
"neutral",
"happy"
}
def extract_feature(file_name, **kwargs):
mfcc = kwargs.get("mfcc")
chroma = kwargs.get("chroma")
mel = kwargs.get("mel")
contrast = kwargs.get("contrast")
tonnetz = kwargs.get("tonnetz")
sample_rate = 16000
with soundfile.SoundFile(file_name) as sound_file:
X = sound_file.read(dtype="float32")
if chroma or contrast:
stft = np.abs(librosa.stft(X))
result = np.array([])
if mfcc:
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
result = np.hstack((result, mfccs))
if chroma:
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
result = np.hstack((result, chroma))
if mel:
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
result = np.hstack((result, mel))
if contrast:
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
result = np.hstack((result, contrast))
if tonnetz:
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
result = np.hstack((result, tonnetz))
return result
# update random_state=9
def load_data(test_size=0.2, random_state=7):
X, y = [], []
for file in glob.glob("../data/voice/Actor_*/*.wav"):
basename = os.path.basename(file)
emotion = EMOTIONS[basename.split("-")[2]]
if emotion not in AVAILABLE_EMOTIONS:
continue
features = extract_feature(file, mfcc=True, chroma=True, mel=True)
X.append(features)
y.append(emotion)
return train_test_split(np.array(X), y, test_size=test_size, random_state=random_state)
def train_voice():
X_train, X_test, y_train, y_test = load_data(test_size=0.25)
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
print("Accuracy: {:.2f}%".format(accuracy*100))
if not os.path.isdir("result"):
os.mkdir("result")
pickle.dump(model, open("../result/mlp_classifier.model", "wb"))
train_voice()
\ No newline at end of file
import os
import numpy as np
import pandas as pd
# from scipy.spatial.distance import cdist, euclidean, cosine
from scripts.voice_preprocess import get_fft_spectrum
from scripts.parameters import BUCKET_STEP,FRAME_STEP,MAX_SEC
def buckets(max_time, steptime, frameskip):
buckets = {}
frames_per_sec = int(1/frameskip)
end_frame = int(max_time*frames_per_sec)
step_frame = int(steptime*frames_per_sec)
for i in range(0, end_frame+1, step_frame):
s = i
s = np.floor((s-7+2)/2) + 1 # for first conv layer
s = np.floor((s-3)/2) + 1 # for first maxpool layer
s = np.floor((s-5+2)/2) + 1 # for second conv layer
s = np.floor((s-3)/2) + 1 # for second maxpool layer
s = np.floor((s-3+2)/1) + 1 # for third conv layer
s = np.floor((s-3+2)/1) + 1 # for fourth conv layer
s = np.floor((s-3+2)/1) + 1 # for fifth conv layer
s = np.floor((s-3)/2) + 1 # for fifth maxpool layer
s = np.floor((s-1)/1) + 1 # for sixth fully connected layer
if s > 0:
buckets[i] = int(s)
return buckets
def get_embedding(model, wav_file, max_time):
buckets_var = buckets(MAX_SEC, BUCKET_STEP, FRAME_STEP)
signal = get_fft_spectrum(wav_file, buckets_var)
embedding = np.squeeze(model.predict(signal.reshape(1,*signal.shape,1)))
return embedding
def get_embedding_batch(model, wav_files, max_time):
return [ get_embedding(model, wav_file, max_time) for wav_file in wav_files ]
def get_embeddings_from_list_file(model, list_file, max_time):
buckets_var = buckets(MAX_SEC, BUCKET_STEP, FRAME_STEP)
result = pd.read_csv(list_file, delimiter=",")
result['features'] = result['filename'].apply(lambda x: get_fft_spectrum(x, buckets_var))
result['embedding'] = result['features'].apply(lambda x: np.squeeze(model.predict(x.reshape(1,*x.shape,1))))
return result[['filename','speaker','embedding']]
import librosa
import numpy as np
from scipy.signal import lfilter, butter
from python_speech_features import sigproc
from scripts.parameters import SAMPLE_RATE, PREEMPHASIS_ALPHA, FRAME_LEN, FRAME_STEP, NUM_FFT
def load(filename, sample_rate):
audio, sr = librosa.load(filename, sr=sample_rate, mono=True)
audio = audio.flatten()
return audio
def normalize_frames(m,epsilon=1e-12):
return np.array([(v - np.mean(v)) / max(np.std(v),epsilon) for v in m])
# Valuable dc and dither removal function implemented
# https://github.com/christianvazquez7/ivector/blob/master/MSRIT/rm_dc_n_dither.m
def remove_dc_and_dither(sin, sample_rate):
if sample_rate == 16e3:
alpha = 0.99
elif sample_rate == 8e3:
alpha = 0.999
else:
print("Sample rate must be 16kHz or 8kHz only")
exit(1)
sin = lfilter([1,-1], [1,-alpha], sin)
dither = np.random.random_sample(len(sin)) + np.random.random_sample(len(sin)) - 1
spow = np.std(dither)
sout = sin + 1e-6 * spow * dither
return sout
def get_fft_spectrum(filename, buckets):
signal = load(filename, SAMPLE_RATE)
signal *= 2**15
# get FFT spectrum
signal = remove_dc_and_dither(signal, SAMPLE_RATE)
signal = sigproc.preemphasis(signal, coeff=PREEMPHASIS_ALPHA)
frames = sigproc.framesig(signal, frame_len=FRAME_LEN*SAMPLE_RATE, frame_step=FRAME_STEP*SAMPLE_RATE, winfunc=np.hamming)
fft = abs(np.fft.fft(frames,n=NUM_FFT))
fft_norm = normalize_frames(fft.T)
# truncate to max bucket sizes
rsize = max(k for k in buckets if k <= fft_norm.shape[1])
rstart = int((fft_norm.shape[1]-rsize)/2)
out = fft_norm[:,rstart:rstart+rsize]
return out
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment