Commit 4bfd17e0 authored by Birahavi Kugathasan's avatar Birahavi Kugathasan

Resume analyser

parent ed59c38e
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
"author": "Namit Nathwani", "author": "Namit Nathwani",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"axios": "^1.3.4",
"bcryptjs": "^2.4.3", "bcryptjs": "^2.4.3",
"cors": "^2.8.5", "cors": "^2.8.5",
"express": "^4.18.2", "express": "^4.18.2",
......
...@@ -13,3 +13,4 @@ export const DEFAULT_CONTROLS = { ...@@ -13,3 +13,4 @@ export const DEFAULT_CONTROLS = {
use: true, use: true,
}, },
}; };
export const UTILITY_SERVER = "http://127.0.0.1:8000";
...@@ -47,6 +47,16 @@ export type AuthType = { ...@@ -47,6 +47,16 @@ export type AuthType = {
controls?: ControlsType; controls?: ControlsType;
}; };
export type ResumeDataType = {
skills: string[] | null;
degree: string[] | null;
designation: string[] | null;
experience: string[] | null;
company_names: string[] | null;
no_of_pages: number;
total_experience: number;
};
export type CandidateType = { export type CandidateType = {
_id?: string; _id?: string;
name: string; name: string;
...@@ -60,6 +70,9 @@ export type CandidateType = { ...@@ -60,6 +70,9 @@ export type CandidateType = {
dateOfBirth: string; dateOfBirth: string;
jobIds: string[]; jobIds: string[];
profilePicture: string; profilePicture: string;
state: "INTIAL" | "READY";
resume?: string;
resumeData?: ResumeDataType;
}; };
export type OrganizationType = { export type OrganizationType = {
...@@ -87,6 +100,41 @@ export type ControlsType = { ...@@ -87,6 +100,41 @@ export type ControlsType = {
}; };
}; };
export type JobType = {
_id: string;
title: string;
description: string;
primarySkills: string[];
secondarySkills?: string[];
salary: {
min: number;
max: number;
currency: string;
};
applications: string[];
organization: string;
};
export type ApplicationType = {
candidate: string;
job: string;
status: "Pending" | "Accepted" | "In progress" | "Rejected";
interview?: {
date: string;
time: string;
link: string;
videoRef?: string;
};
score: {
primary: number;
primatyMatch: string[];
secondary: number;
secondaryMatch: string[];
similarity: number;
total: number;
};
};
export interface TypedRequest<T extends Query, U> extends Request { export interface TypedRequest<T extends Query, U> extends Request {
body: U; body: U;
query: T; query: T;
......
...@@ -8,11 +8,10 @@ app.use(cors()); ...@@ -8,11 +8,10 @@ app.use(cors());
// Routes // Routes
const authRoute = require("./routes/auth"); const authRoute = require("./routes/auth");
const userRoute = require("./routes/user");
// Environment constants const jobsRoute = require("./routes/jobs");
// Service Initialisation // Service Initialisation
mongoose.connect(MONGO_URL, { mongoose.connect(MONGO_URL, {
useFindAndModify: false, useFindAndModify: false,
useNewUrlParser: true, useNewUrlParser: true,
...@@ -20,13 +19,6 @@ mongoose.connect(MONGO_URL, { ...@@ -20,13 +19,6 @@ mongoose.connect(MONGO_URL, {
useCreateIndex: true, useCreateIndex: true,
}); });
// const db = mongoose.connection;
// db.on("error", (error) => console.log(error, "connection error:"));
// db.once("open", () => {
// console.log("Connected to MongoDB Instance");
// });
// Express Initialisation // Express Initialisation
app.use(express.urlencoded({ extended: true })); app.use(express.urlencoded({ extended: true }));
app.use(express.json()); app.use(express.json());
...@@ -35,5 +27,7 @@ app.use(express.urlencoded({ extended: false })); ...@@ -35,5 +27,7 @@ app.use(express.urlencoded({ extended: false }));
// Routes // Routes
app.use("/auth", authRoute); app.use("/auth", authRoute);
app.use("/user", userRoute);
app.use("/jobs", jobsRoute);
app.listen(API_PORT, () => console.log(`Listening on port ${API_PORT}`)); app.listen(API_PORT, () => console.log(`Listening on port ${API_PORT}`));
import { Request, Response, NextFunction } from "express"; import { Request, Response, NextFunction } from "express";
import * as jwt from "jsonwebtoken"; import * as jwt from "jsonwebtoken";
import { JWT_SECRET } from "../config/contants"; import { JWT_SECRET } from "../config/contants";
import { TypedRequest, USER_TYPE } from "../config/types";
import Auth from "../models/Auth";
export const authMiddleware = ( export const authMiddleware = (
req: Request, req: Request,
...@@ -21,3 +23,37 @@ export const authMiddleware = ( ...@@ -21,3 +23,37 @@ export const authMiddleware = (
} }
} }
}; };
export const organizationMiddleware = async (
req: TypedRequest<{ userId: string }, any>,
res: Response,
next: NextFunction
) => {
try {
const org = await Auth.findOne({ userId: req.query.userId });
if (org && org.userType === USER_TYPE.ORGANIZATION) {
return next();
} else {
throw new Error("Organization not found");
}
} catch (error) {
return res.status(400).send(error);
}
};
export const candidateMiddleware = async (
req: TypedRequest<{ userId: string }, any>,
res: Response,
next: NextFunction
) => {
try {
const org = await Auth.findOne({ userId: req.query.userId });
if (org && org.userType === USER_TYPE.CANDIDATE) {
return next();
} else {
throw new Error("Candidate not found");
}
} catch (error) {
return res.status(400).send(error);
}
};
import { Schema, model } from "mongoose";
import { ApplicationType } from "../config/types";
const applicationSchema = new Schema<ApplicationType>({
candidate: { type: Schema.Types.ObjectId, ref: "candidates" },
job: { type: Schema.Types.ObjectId, ref: "jobs" },
status: { type: String, require: false, default: "Pending" },
interview: {
type: {
date: String,
time: String,
link: String,
videoRef: String,
},
require: false,
},
score: {
type: {
primary: Number,
primatyMatch: [String],
secondary: Number,
secondaryMatch: [String],
similarity: Number,
total: Number,
},
require: false,
},
});
const Application = model<ApplicationType>("applications", applicationSchema);
export default Application;
import { Schema, model } from "mongoose"; import { Schema, model } from "mongoose";
import { AddressType, CandidateType } from "../config/types"; import { AddressType, CandidateType, ResumeDataType } from "../config/types";
const AddressSchema = new Schema<AddressType>( const AddressSchema = new Schema<AddressType>(
{ {
...@@ -19,6 +19,18 @@ const ContactsSchema = new Schema<AddressType>( ...@@ -19,6 +19,18 @@ const ContactsSchema = new Schema<AddressType>(
}, },
{ id: false } { id: false }
); );
const ResumeDataSchema = new Schema<ResumeDataType>(
{
skills: { type: [String], require: false },
degree: { type: [String], require: false },
designation: { type: [String], require: false },
experience: { type: [String], require: false },
company_names: { type: [String], require: false },
no_of_pages: { type: Number, require: false },
total_experience: { type: Number, require: false },
},
{ id: false }
);
const candidateSchema = new Schema<CandidateType>({ const candidateSchema = new Schema<CandidateType>({
name: String, name: String,
...@@ -27,6 +39,9 @@ const candidateSchema = new Schema<CandidateType>({ ...@@ -27,6 +39,9 @@ const candidateSchema = new Schema<CandidateType>({
dateOfBirth: String, dateOfBirth: String,
jobIds: [{ type: Schema.Types.ObjectId, ref: "jobs" }], jobIds: [{ type: Schema.Types.ObjectId, ref: "jobs" }],
profilePicture: String, profilePicture: String,
state: { type: String, default: "INTIAL" },
resume: { type: String, require: false },
resumeData: { type: ResumeDataSchema, require: false },
}); });
const Candidates = model<CandidateType>("candidates", candidateSchema); const Candidates = model<CandidateType>("candidates", candidateSchema);
......
import { Schema, model } from "mongoose";
import { JobType } from "../config/types";
const jobSchema = new Schema<JobType>({
title: String,
description: String,
primarySkills: { type: [String], require: true },
secondarySkills: { type: [String], require: false },
salary: {
min: Number,
max: Number,
currency: String,
},
applications: [{ type: Schema.Types.ObjectId, ref: "applications" }],
organization: [{ type: Schema.Types.ObjectId, ref: "organizations" }],
});
const Jobs = model<JobType>("jobs", jobSchema);
export default Jobs;
...@@ -137,7 +137,7 @@ router.post("/login", async (req: TypedRequest<{}, SignInPayload>, res) => { ...@@ -137,7 +137,7 @@ router.post("/login", async (req: TypedRequest<{}, SignInPayload>, res) => {
} }
const token = await jwt.sign({ userId: auth.userId }, JWT_SECRET, { const token = await jwt.sign({ userId: auth.userId }, JWT_SECRET, {
expiresIn: "2h", expiresIn: "5h",
}); });
return res.json({ return res.json({
......
import { Router } from "express";
import {
ApplicationType,
JobType,
TypedRequest,
USER_TYPE,
} from "../config/types";
import {
authMiddleware,
candidateMiddleware,
organizationMiddleware,
} from "../middlewares/auth";
import Application from "../models/Application";
import Auth from "../models/Auth";
import Jobs from "../models/Job";
import ResumeAPI from "../utilities/apis/resume";
const router = Router();
router.get(
"/",
authMiddleware,
async (req: TypedRequest<{ userId: string }, null>, res) => {
try {
const user = await Auth.findOne({ userId: req.query.userId });
let jobs;
if (user.userType === USER_TYPE.CANDIDATE) {
jobs = await Jobs.find()
.populate({
path: "applications",
select: ["candidate", "status"],
})
.populate({ path: "organization" });
} else {
jobs = await Jobs.find({ organization: req.query.userId }).populate({
path: "applications",
populate: {
path: "candidate",
select: [
"name",
"contacts",
"dateOfBirth",
"profilePicture",
"resume",
"resumeData",
],
},
});
}
return res.json({ jobs, success: true });
} catch (error) {
return res.json({ error, success: false });
}
}
);
router.post(
"/",
authMiddleware,
organizationMiddleware,
async (req: TypedRequest<{ userId: string }, JobType>, res) => {
try {
const newJob = new Jobs({ ...req.body, organization: req.query.userId });
const job = await newJob.save();
return res.json({ success: true, job });
} catch (error) {
return res.json({ success: false, error });
}
}
);
router.put(
"/",
authMiddleware,
organizationMiddleware,
async (req: TypedRequest<{ userId: string }, JobType>, res) => {
try {
const job = await Jobs.findByIdAndUpdate(req.body._id, {
$set: req.body,
});
return res.json({ success: true, job });
} catch (error) {
return res.json({ success: false, error });
}
}
);
router.delete(
"/",
authMiddleware,
organizationMiddleware,
async (req: TypedRequest<{ userId: string }, { jobId: string }>, res) => {
try {
await Jobs.deleteOne({
organization: req.query.userId,
_id: req.body.jobId,
});
return res.json({ success: true });
} catch (error) {
return res.json({ success: false });
}
}
);
router.put(
"/apply",
authMiddleware,
candidateMiddleware,
async (
req: TypedRequest<
{ userId: string },
{ application: ApplicationType; resumeUrl: string }
>,
res
) => {
try {
const { application, resumeUrl } = req.body;
const job = await Jobs.findById(application.job);
const data: any = await ResumeAPI.getResumeScores({
user_id: req.query.userId,
resume_url: resumeUrl,
primary_skills: job.primarySkills,
secondary_skills: job.secondarySkills,
job_desc: job.description,
});
const score: ApplicationType["score"] = {
primary: data.primary_score,
primatyMatch: data.primary_match,
secondary: data.secondary_score,
secondaryMatch: data.secondary_match,
similarity: data.similarity,
total: data.primary_score + data.secondary_score + data.similarity,
};
const newApplication = new Application({ ...application, score });
const _application = await newApplication.save();
job.applications.push(_application.id);
await job.save();
return res.json({
success: true,
applicationId: _application.id,
});
} catch (error) {
return res.json({ success: false, error });
}
}
);
module.exports = router;
import { Router } from "express";
import { CandidateType, TypedRequest } from "../config/types";
import { authMiddleware } from "../middlewares/auth";
import Candidates from "../models/Candidate";
import ResumeAPI from "../utilities/apis/resume";
const router = Router();
router.post(
"/candidate",
authMiddleware,
async (req: TypedRequest<{ userId: string }, CandidateType>, res) => {
try {
const update = req.body;
if (req.body?.resume) {
const data: any = await ResumeAPI.extractResumeData({
user_id: req.query.userId,
resume_url: req.body.resume,
});
update.resumeData = data;
}
await Candidates.findByIdAndUpdate(req.query.userId, { $set: req.body });
return res.status(200).json({ data: req.body });
} catch (error) {
return res.status(400).send(error);
}
}
);
module.exports = router;
import { ResumeDataType } from "../../config/types";
import { request } from "../requests";
export default class ResumeAPI {
static extractResumeData = (payload: {
resume_url: string;
user_id: string;
}) => request("<BASE_URL>/resume/extract", "POST", payload);
static getResumeScores = (payload: {
resume_url: string;
user_id: string;
primary_skills: string[];
secondary_skills: string[];
job_desc: string;
}) => request("<BASE_URL>/resume/get-scores", "POST", payload);
}
...@@ -401,8 +401,15 @@ export const processAttempt = ({ ...@@ -401,8 +401,15 @@ export const processAttempt = ({
accepted: false, accepted: false,
}; };
result.accepted = const standardCheck = controls.standard.use
result.standard.inRange.full || result.fullStandard.inRange.full; ? result.standard.inRange.full
: true;
const fullStandardCheck = controls.fullStandard.use
? result.fullStandard.inRange.full
: true;
result.accepted = standardCheck || fullStandardCheck;
return result; return result;
}; };
......
import axios, { AxiosError, AxiosResponse, AxiosRequestConfig } from "axios";
import { UTILITY_SERVER } from "../config/contants";
axios.interceptors.response.use(
(response) => response,
(error: AxiosError) => {
return Promise.reject(error);
}
);
export const request = (
url: AxiosRequestConfig["url"],
method: AxiosRequestConfig["method"],
requestData?: AxiosRequestConfig["data"] | AxiosRequestConfig["params"],
contentType?: string
) =>
new Promise(async (resolve, reject) => {
const endpoint = url?.replace?.("<BASE_URL>", UTILITY_SERVER);
const params = method === "GET" ? requestData : null;
const data = method === "GET" ? null : requestData;
const headers = {
"Content-Type": contentType || "application/json",
};
axios({
url: endpoint,
method,
data,
params,
headers,
timeout: 30000,
})
.then(async (response: AxiosResponse) => {
resolve(response.data);
})
.catch(async (error: AxiosError) => {
if (error?.response) {
return reject(error?.response?.data);
}
reject(error);
});
});
...@@ -276,6 +276,20 @@ astral-regex@^2.0.0: ...@@ -276,6 +276,20 @@ astral-regex@^2.0.0:
resolved "https://registry.npmjs.org/astral-regex/-/astral-regex-2.0.0.tgz" resolved "https://registry.npmjs.org/astral-regex/-/astral-regex-2.0.0.tgz"
integrity sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ== integrity sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==
asynckit@^0.4.0:
version "0.4.0"
resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==
axios@^1.3.4:
version "1.3.4"
resolved "https://registry.yarnpkg.com/axios/-/axios-1.3.4.tgz#f5760cefd9cfb51fd2481acf88c05f67c4523024"
integrity sha512-toYm+Bsyl6VC5wSkfkbbNB6ROv7KY93PEBBL6xyDczaIHasAiv4wPqQ/c4RjoQzipxRD2W5g21cOqQulZ7rHwQ==
dependencies:
follow-redirects "^1.15.0"
form-data "^4.0.0"
proxy-from-env "^1.1.0"
balanced-match@^1.0.0: balanced-match@^1.0.0:
version "1.0.2" version "1.0.2"
resolved "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz" resolved "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz"
...@@ -441,6 +455,13 @@ color-name@~1.1.4: ...@@ -441,6 +455,13 @@ color-name@~1.1.4:
resolved "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz" resolved "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz"
integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==
combined-stream@^1.0.8:
version "1.0.8"
resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f"
integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==
dependencies:
delayed-stream "~1.0.0"
complex.js@^2.0.11: complex.js@^2.0.11:
version "2.1.1" version "2.1.1"
resolved "https://registry.npmjs.org/complex.js/-/complex.js-2.1.1.tgz" resolved "https://registry.npmjs.org/complex.js/-/complex.js-2.1.1.tgz"
...@@ -546,6 +567,11 @@ define-properties@^1.1.3, define-properties@^1.1.4: ...@@ -546,6 +567,11 @@ define-properties@^1.1.3, define-properties@^1.1.4:
has-property-descriptors "^1.0.0" has-property-descriptors "^1.0.0"
object-keys "^1.1.1" object-keys "^1.1.1"
delayed-stream@~1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619"
integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==
denque@^1.4.1: denque@^1.4.1:
version "1.5.1" version "1.5.1"
resolved "https://registry.npmjs.org/denque/-/denque-1.5.1.tgz" resolved "https://registry.npmjs.org/denque/-/denque-1.5.1.tgz"
...@@ -934,6 +960,20 @@ flatted@^3.1.0: ...@@ -934,6 +960,20 @@ flatted@^3.1.0:
resolved "https://registry.npmjs.org/flatted/-/flatted-3.2.7.tgz" resolved "https://registry.npmjs.org/flatted/-/flatted-3.2.7.tgz"
integrity sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ== integrity sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ==
follow-redirects@^1.15.0:
version "1.15.2"
resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.2.tgz#b460864144ba63f2681096f274c4e57026da2c13"
integrity sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==
form-data@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/form-data/-/form-data-4.0.0.tgz#93919daeaf361ee529584b9b31664dc12c9fa452"
integrity sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==
dependencies:
asynckit "^0.4.0"
combined-stream "^1.0.8"
mime-types "^2.1.12"
forwarded@0.2.0: forwarded@0.2.0:
version "0.2.0" version "0.2.0"
resolved "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz" resolved "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz"
...@@ -1410,7 +1450,7 @@ mime-db@1.52.0: ...@@ -1410,7 +1450,7 @@ mime-db@1.52.0:
resolved "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz" resolved "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz"
integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg== integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==
mime-types@~2.1.24, mime-types@~2.1.34: mime-types@^2.1.12, mime-types@~2.1.24, mime-types@~2.1.34:
version "2.1.35" version "2.1.35"
resolved "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz" resolved "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz"
integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw== integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==
...@@ -1682,6 +1722,11 @@ proxy-addr@~2.0.7: ...@@ -1682,6 +1722,11 @@ proxy-addr@~2.0.7:
forwarded "0.2.0" forwarded "0.2.0"
ipaddr.js "1.9.1" ipaddr.js "1.9.1"
proxy-from-env@^1.1.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz#e102f16ca355424865755d2c9e8ea4f24d58c3e2"
integrity sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==
pstree.remy@^1.1.8: pstree.remy@^1.1.8:
version "1.1.8" version "1.1.8"
resolved "https://registry.npmjs.org/pstree.remy/-/pstree.remy-1.1.8.tgz" resolved "https://registry.npmjs.org/pstree.remy/-/pstree.remy-1.1.8.tgz"
......
__pycache__
**/__pycache__
\ No newline at end of file
About the job About the job
Borneo.io is building the next-generation ML Powered data privacy platform for hyper-growth companies. The Data Scientist role is at the core of Borneos engineering. You will be building models, manipulating big data, and working with APIs essential to the Borneo product. Borneo.io is building the next-generation ML Powered data privacy platform for hyper-growth companies. The Data Scientist role is at the core of Borneo's engineering. You will be building models, manipulating big data, and working with APIs essential to the Borneo product.
We are growing fast and expanding our data science family with outstanding minds and diverse personalities. We are growing fast and expanding our data science family with outstanding minds and diverse personalities.
As a Data Scientist at Borneo, youll have the opportunity to: As a Data Scientist at Borneo, you'll have the opportunity to:
Work with some of the largest data sets used by some of the leading global technology companies Work with some of the largest data sets used by some of the leading global technology companies
Help build a predictive product and inform features at the ground level. Help build a predictive product and inform features at the ground level.
......
### Create conda python 3.9 env
conda create -n server python=3.9
### PIP Packages
gensim==3.8.1
texthero==1.1.0
pyresparser
### CONDA Packages
tqdm
pdf2image
pandas
pytesseract
"uvicorn[standard]"
fastapi
### Run server
uvicorn main:app --reload
from typing import List, Union
from pydantic import BaseModel
class ExtractResume(BaseModel):
resume_url: str
user_id:str
class ResumeScores(BaseModel):
resume_url: str
user_id:str
primary_skills:List[str] = []
secondary_skills:List[str] = []
job_desc:str
\ No newline at end of file
from fastapi import FastAPI
import routes.resume as resumes
app = FastAPI()
app.include_router(resumes.router)
@app.get("/")
def read_root():
return {"status": "running"}
\ No newline at end of file
tqdm==4.61.2 tqdm==4.61.2
pdf2image==1.16.0 pdf2image==1.16.0
pandas==1.3.0 pandas==1.3.0
pyresparser==1.0.6
pytesseract==0.3.8 pytesseract==0.3.8
texthero==1.1.0
numpy==1.21.0
requests==2.25.1
cleantext==1.1.3
Pillow==8.3.1
scikit_learn==0.24.2
import pandas as pd import pandas as pd
import os import os
......
from fastapi import APIRouter
import os
from pyresparser import ResumeParser
import urllib
from baseModels.payloads import ExtractResume, ResumeScores
from scripts.processing import document_processing
router = APIRouter(prefix='/resume')
@router.post("/extract")
def extract(payload:ExtractResume):
filename = 'resumes/'+payload.user_id+'.pdf'
urllib.request.urlretrieve(payload.resume_url, filename)
pyres_data = ResumeParser(filename).get_extracted_data()
os.remove(filename)
return pyres_data
@router.post("/get-scores")
def get_scores(payload:ResumeScores):
filename = 'resumes/'+payload.user_id+'.pdf'
urllib.request.urlretrieve(payload.resume_url, filename)
skills = {"primary":payload.primary_skills, "secondary":payload.secondary_skills}
result = document_processing(filename, skills, payload.job_desc)
candidate = result.skills_match()
os.remove(filename)
return candidate
\ No newline at end of file
import pandas as pd import pandas as pd
import numpy as np
import pytesseract
import os
import glob
import texthero as hero import texthero as hero
from pyresparser import ResumeParser
from pyresparser.utils import extract_text from pyresparser.utils import extract_text
from PIL import Image from PIL import Image
from pdf2image import convert_from_path
from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity from sklearn.metrics.pairwise import cosine_similarity
...@@ -19,81 +12,22 @@ from sklearn.metrics.pairwise import cosine_similarity ...@@ -19,81 +12,22 @@ from sklearn.metrics.pairwise import cosine_similarity
# "secondary" : ['data', 'science', 'modeling', 'anomaly', 'privacy', 'visualization', 'OCR'], # "secondary" : ['data', 'science', 'modeling', 'anomaly', 'privacy', 'visualization', 'OCR'],
# } # }
class document_processing: class document_processing:
def __init__(self, resume, skills, job_desc): #(resumes/Dhaval_Thakkar_Resume.pdf, skills, Job_description.txt) def __init__(self, resume, skills, job_desc):
with open('Job_description.txt', 'rb') as file:
job_desc = file.read()
self.resume = resume self.resume = resume
self.skills = skills self.skills = skills
self.job_desc = job_desc self.job_desc = job_desc
def extract_resume(self): def extract_resume(self):
filepath = self.resume #resumes/Dhaval_Thakkar_Resume.pdf
extension = filepath.split('.')[-1] #pdf
extension = '.'+extension #.pdf
resume_ner = ResumeParser(filepath).get_extracted_data()
resume_txt = extract_text(filepath, extension=extension)
return resume_ner, resume_txt
def ocr_text(self):
filepath = self.resume filepath = self.resume
files = glob.glob('temp/*') extension = filepath.split('.')[-1]
for f in files: extension = '.'+extension
os.remove(f)
# Store all the pages of the PDF in a variable
pages = convert_from_path(filepath, 500)
# Counter to store images of each page of PDF to image
image_counter = 1
# Iterate through all the pages stored above
for page in pages:
# PDF page n -> page_n.jpg resume_txt = extract_text(filepath, extension=extension)
filename = "page_"+str(image_counter)+".jpg"
# Save the image of the page in system
page.save('temp/'+filename, 'JPEG')
# Increment the counter to update filename
image_counter = image_counter + 1
########## OCR ##########
# Variable to get count of total number of pages
filelimit = image_counter-1
text_op = ''
count = 0
# Iterate from 1 to total number of pages
for i in range(1, filelimit + 1):
filename = "temp/page_"+str(i)+".jpg"
# Recognize the text as string in image using pytesserct
text = str(((pytesseract.image_to_string(Image.open(filename)))))
text = text.replace('-\n', '')
# Finally, write the processed text to the file.
text_op+=text
count+=1
with open('out_text.txt', 'w') as f:
f.write(text_op)
return text_op, count return resume_txt
def find_unigram(df, column): def find_unigram(df, column):
...@@ -166,64 +100,22 @@ class document_processing: ...@@ -166,64 +100,22 @@ class document_processing:
skills = self.skills skills = self.skills
# Load data from pyresparser pyres_text = self.extract_resume()
pyres_data, pyres_text = self.extract_resume()
self.data = pyres_data
self.text = pyres_text self.text = pyres_text
ocr_ser = pd.Series(pyres_text) ocr_ser = pd.Series(pyres_text)
cleaned_words = hero.clean(ocr_ser) #[ [clean words set], [@,#,$%,_,1234567890,], [] ] cleaned_words = hero.clean(ocr_ser)
# Main dataframe for manipulation
main_df = pd.DataFrame(cleaned_words[0].split(), columns = ['text']) main_df = pd.DataFrame(cleaned_words[0].split(), columns = ['text'])
self.clean_data = main_df self.clean_data = main_df
# Add the primary match and score
pri_score, pri_match = self.find_match(main_df, pd.DataFrame(skills['primary'])) pri_score, pri_match = self.find_match(main_df, pd.DataFrame(skills['primary']))
sec_score, sec_match = self.find_match(main_df, pd.DataFrame(skills['secondary'])) sec_score, sec_match = self.find_match(main_df, pd.DataFrame(skills['secondary']))
columns = ['filename', 'name', 'mobile_number', 'email', 'company_names',
'college_name', 'experience', 'skills', 'experience_age',
'degree', 'words',
'primary_score', 'primary_match',
'secondary_score', 'secondary_match',
'no_of_pages', 'document_similarity']
details = pd.DataFrame(columns = columns)
# Add the document similarity score
doc_sim = self.resume_cosine_score(cleaned_words[0]) doc_sim = self.resume_cosine_score(cleaned_words[0])
words = len(main_df)
# Add details in a dataframe
details.loc[0, 'filename'] = self.resume
details = self.fill_data(details, pyres_data, 'name')
details = self.fill_data(details, pyres_data, 'mobile_number')
details = self.fill_data(details, pyres_data, 'email')
details = self.fill_data(details, pyres_data, 'company_names')
details = self.fill_data(details, pyres_data, 'college_name')
details = self.fill_data(details, pyres_data, 'degree')
details = self.fill_data(details, pyres_data, 'experience')
details = self.fill_data(details, pyres_data, 'skills')
details.loc[0, 'words'] = words
if pyres_data['no_of_pages'] == None:
details.loc[0, 'no_of_pages'] = 0
else:
details = self.fill_data(details, pyres_data, 'no_of_pages')
details.loc[0, 'primary_score'] = pri_score
details.loc[0, 'primary_match'] = str(pri_match)
details.loc[0, 'secondary_score'] = sec_score
details.loc[0, 'secondary_match'] = str(sec_match)
details.loc[0, 'document_similarity'] = int(doc_sim)
if pyres_data['total_experience'] > 0:
details.loc[0, 'experience_age'] = pyres_data['total_experience']
else:
details.loc[0, 'experience_age'] = np.NaN
details['no_of_pages'] = details['no_of_pages'].astype(int)
return details
return {'primary_score': pri_score,
'primary_match': pri_match,
'secondary_score': sec_score,
'secondary_match': sec_match,
'similarity': int(doc_sim)}
__pycache__
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment