Commit 1504eea2 authored by Chamila Dilshan's avatar Chamila Dilshan

Front end

parent ec524bb8
import streamlit as st
import numpy as np
import pandas as pd
import nltk
nltk.download('stopwords')
import spacy
import sys
import sklearn
import pickle
import io
from sklearn.feature_extraction.text import TfidfVectorizer
import neattext.functions as nfx
import re
import string
from pyresparser import ResumeParser
from docx import Document
from PIL import Image
import os
st.set_page_config(layout="wide")
st.markdown(
f"""
<style>
.reportview-container .main .block-container{{
max-width: 100%;
background-color: #F5F5F5;
padding-top: 5rem;
padding-right: 5rem;
padding-left: 5rem;
padding-bottom: 5rem;
}}
img{{
max-width:99%;
margin-bottom:99px;
justify-content: center;
}}
</style>
""",
unsafe_allow_html=True,
)
header = st.container()
input_file = st.container()
correlation = st.container()
feature_imp = st.container()
feature_dist = st.container()
def save_uploadedfile(uploadedfile):
with open(os.path.join("tempDir",uploadedfile.name),"wb") as f:
f.write(uploadedfile.getbuffer())
return st.success("Saved File:{} to tempDir".format(uploadedfile.name))
#Load the Model and vectorizer
loaded_model = pickle.load(open("resume_screening_model.pickle", "rb"))
loaded_vec = pickle.load(open("resume_screening_vectorizer.pickle","rb"))
with header:
st.markdown("<h1 style='text-align: center;'>Welcome To The Smart Labour Turnover Solutions' Resume Screening System.</h1>", unsafe_allow_html=True)
st.markdown("")
st.markdown("* Companies receive hundreds of resumes for their job postings, and it is very time-consuming to go through every resume.")
st.markdown("* Our System is mainly capable of automating the resume screening process using machine learning algorithms and natural language processing (NLP) techniques.")
st.markdown(""" * Following are the features of our resume screening system.
- Can input five resumes at once,
- Display the best job position for each candidate,
- Display each candidate's probability for all the job positions available,
- All the records can be downloaded in .csv format. """)
st.markdown("* You can input 5 resumes at once in .pdf, .doc, or .txt formats.")
st.markdown("* When you upload the resume, you must name the file with the candidate's name. ex: John Wick.pdf")
st.markdown("* Then our system will parse the resumes using NLP's name entity recognition (NER) techniques and display each candidate's name, skills, and experience.")
st.markdown("* Based on complete extracted data (skills, experience, degree, designation), our system will choose the best position for each candidate from 25 different job positions listed below.")
image = Image.open("position.png")
st.image(image)
# Resume files Inpuation
with input_file:
# resumes Upoload
st.subheader("1. Please Upload All Your Five Resumes in .pdf, .doc, .docx or .txt Formats")
file1 = st.file_uploader("Choose the Resume 1")
file2 = st.file_uploader("Choose the Resume 2")
file3 = st.file_uploader("Choose the Resume 3")
file4 = st.file_uploader("Choose the Resume 4")
file5 = st.file_uploader("Choose the Resume 5")
# Resume Parser
if file1 is not None:
file_details1 = {"FileName":file1.name,"FileType":file1.type}
file_name1 = file1.name[:-4]
data1 = ResumeParser(file1).get_extracted_data()
data1 = {key: data1[key] for key in data1.keys() & {'skills', 'designation', 'experience', 'degree'}}
st.markdown("<h2 style='text-align: center;'>Welcome to the Resume Parser</h2>", unsafe_allow_html=True)
st.subheader("2. Here is the Most Critical Extracted Information in the Resumes.")
st.markdown("* You can get an overview of the candidate's profile according to the below-extracted data.")
#data Cleaning and vectorization
dt1 = str(data1)
dt1 = re.sub(r'[^\w\s]', '', dt1) # remove special charcters and puctuations
dt1 = dt1.lower() # lower case conversion
dt1 = re.sub("\d+", "", dt1) # remove numbers
#do Predictions
pred1 = loaded_model.predict(loaded_vec.transform([dt1])[0])
data1["Name"] = file_name1
data1["Best_Position"] = str(pred1)
df = pd.DataFrame()
df = df.append(data1, ignore_index = True)
#pred_df
pred_val1 = loaded_model.predict_proba(loaded_vec.transform([dt1])[0])
pred_df1 = pd.DataFrame(pred_val1, columns=loaded_model.classes_, index=[file_name1])
if file2 is not None:
file_details2 = {"FileName":file2.name,"FileType":file2.type}
file_name2 = file2.name[:-4]
data2 = ResumeParser(file2).get_extracted_data()
data2 = {key: data2[key] for key in data2.keys() & {'skills', 'designation', 'experience', 'degree'}}
#data Cleaning and vectorization
dt2 = str(data2)
dt2 = re.sub(r'[^\w\s]', '', dt2) # remove special charcters and puctuations
dt2 = dt2.lower() # lower case conversion
dt2 = re.sub("\d+", "", dt2) # remove numbers
#do Predictions
pred2 = loaded_model.predict(loaded_vec.transform([dt2])[0])
data2["Name"] = file_name2
data2["Best_Position"] = str(pred2)
df = df.append(data2, ignore_index = True)
#pred_df
pred_val2 = loaded_model.predict_proba(loaded_vec.transform([dt2])[0])
pred_df2 = pd.DataFrame(pred_val2, columns=loaded_model.classes_, index=[file_name2])
if file3 is not None:
file_details3 = {"FileName":file3.name,"FileType":file3.type}
file_name3 = file3.name[:-4]
data3 = ResumeParser(file3).get_extracted_data()
data3 = {key: data3[key] for key in data3.keys() & {'skills', 'designation', 'experience', 'degree'}}
#data Cleaning and vectorization
dt3 = str(data3)
dt3 = re.sub(r'[^\w\s]', '', dt3) # remove special charcters and puctuations
dt3 = dt3.lower() # lower case conversion
dt3 = re.sub("\d+", "", dt3) # remove numbers
#do Predictions
pred3 = loaded_model.predict(loaded_vec.transform([dt3])[0])
data3["Name"] = file_name3
data3["Best_Position"] = str(pred3)
df = df.append(data3, ignore_index = True)
#pred_df
pred_val3 = loaded_model.predict_proba(loaded_vec.transform([dt3])[0])
pred_df3 = pd.DataFrame(pred_val3, columns=loaded_model.classes_, index=[file_name3])
if file4 is not None:
file_details4 = {"FileName":file4.name,"FileType":file4.type}
file_name4 = file4.name[:-4]
data4 = ResumeParser(file4).get_extracted_data()
data4 = {key: data4[key] for key in data4.keys() & {'skills', 'designation', 'experience', 'degree'}}
#data Cleaning and vectorization
dt4 = str(data4)
dt4 = re.sub(r'[^\w\s]', '', dt4) # remove special charcters and puctuations
dt4 = dt4.lower() # lower case conversion
dt4 = re.sub("\d+", "", dt4) # remove numbers
#do Predictions
pred4 = loaded_model.predict(loaded_vec.transform([dt4])[0])
data4["Name"] = file_name4
data4["Best_Position"] = str(pred4)
df = df.append(data4, ignore_index = True)
#pred_df
pred_val4 = loaded_model.predict_proba(loaded_vec.transform([dt4])[0])
pred_df4 = pd.DataFrame(pred_val4, columns=loaded_model.classes_, index=[file_name4])
if file5 is not None:
file_details5 = {"FileName":file5.name,"FileType":file5.type}
file_name5 = file5.name[:-4]
data5 = ResumeParser(file5).get_extracted_data()
data5 = {key: data5[key] for key in data5.keys() & {'skills', 'designation', 'experience', 'degree'}}
#data Cleaning and vectorization
dt5 = str(data5)
dt5 = re.sub(r'[^\w\s]', '', dt5) # remove special charcters and puctuations
dt5 = dt5.lower() # lower case conversion
dt5 = re.sub("\d+", "", dt5) # remove numbersS
#do Predictions
pred5 = loaded_model.predict(loaded_vec.transform([dt5])[0])
data5["Name"] = file_name5
data5["Best_Position"] = str(pred5)
df = df.append(data5, ignore_index = True)
#pred_df
pred_val5 = loaded_model.predict_proba(loaded_vec.transform([dt5])[0])
pred_df5 = pd.DataFrame(pred_val5, columns=loaded_model.classes_, index=[file_name5])
final_pred = pd.DataFrame(np.concatenate([pred_val1, pred_val2, pred_val3, pred_val4, pred_val5], axis=0), columns= loaded_model.classes_,index=[file_name1,file_name2,file_name3,file_name4,file_name5])
st.dataframe(data=df.loc[:, ["Name","skills","experience","degree"]])
# Downloads
st.markdown("* Please click the download button below to download the Extracted Resumes Data Report.")
@st.cache
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv().encode('utf-8')
csv1 = convert_df(df.loc[:, ["Name","skills","experience","degree"]])
csv2 = convert_df(df.loc[:, ["Name","Best_Position"]])
st.download_button(
label="Click here to download the Extracted Resumes Data Report",
data=csv1,
file_name='Extracted Resumes Data Report - Smart Labour Turnover Solutions.csv',
mime='text/csv',)
#Resume Classification - Part 1 - Best Position Prediction
st.markdown("<h2 style='text-align: center;'>Welcome to the Resume Classifier</h2>", unsafe_allow_html=True)
st.subheader("3. Here is the Best-Predicted Position for Each Candidate.")
st.markdown("* Below is the best position for each candidate according to their skills, experience, degree, and designation.")
col1, col2, col3 = st.columns(3)
with col1:
st.write("")
with col2:
st.dataframe(data=df.loc[:, ["Name","Best_Position"]])
with col3:
st.write("")
st.write("")
#downloads
st.markdown("* Please click the download button below to download the Best Position Prediction Report.")
st.download_button(
label="Click here to download the Best Position Prediction Report",
data=csv2,
file_name='Best Podition Prediction Report - Smart Labour Turnover Solutions.csv',
mime='text/csv',)
#Resume Classification - Part 2 - All Positions Probability
st.subheader("4. Here is Each Candidate's Probability for All 25 Job Positions Available.")
st.markdown("* By examining each candidate's probability for each job position, you can select the best candidates even if they are suitable for the same job position. ")
pred1, pred2, pred3, pred4, pred5 = st.columns(5)
with pred1:
st.dataframe(data=pred_df1[pred_df1.iloc[-1].sort_values(ascending=False).index].T, height=914)
with pred2:
st.dataframe(data=pred_df2[pred_df2.iloc[-1].sort_values(ascending=False).index].T, height=914)
with pred3:
st.dataframe(data=pred_df3[pred_df3.iloc[-1].sort_values(ascending=False).index].T, height=914)
with pred4:
st.dataframe(data=pred_df4[pred_df4.iloc[-1].sort_values(ascending=False).index].T, height=914)
with pred5:
st.dataframe(data=pred_df5[pred_df5.iloc[-1].sort_values(ascending=False).index].T, height=914)
#downloads
#downloads
st.markdown("* Please click the download button below to download the All positions Prediction Report.")
@st.cache
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv().encode('utf-8')
csv3 = convert_df(final_pred)
st.download_button(
label="Click here to download the All positions Prediction Report",
data=csv3,
file_name='All positions Prediction Report - Smart Labour Turnover Solutions.csv',
mime='text/csv',)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment