Front end

1504eea2 · Chamila Dilshan · ec524bb8 · 1504eea2
Commit 1504eea2 authored Oct 10, 2022 by Chamila Dilshan
Hide whitespace changes
Inline Side-by-side

Showing with 253 additions and 0 deletions

main.py main.py +253 -0

No files found.
--- a/main.py
+++ b/main.py
+import streamlit as st
+import numpy as np
+import pandas as pd
+import nltk
+nltk.download('stopwords')
+import spacy
+import sys
+import sklearn
+import pickle
+import io
+from sklearn.feature_extraction.text import TfidfVectorizer
+import neattext.functions as nfx
+import re 
+import string
+from pyresparser import ResumeParser
+from docx import Document
+from PIL import Image
+import os
+st.set_page_config(layout="wide")
+st.markdown(
+        f"""
+<style>
+    .reportview-container .main .block-container{{
+        max-width: 100%;
+        background-color: #F5F5F5;
+        padding-top: 5rem;
+        padding-right: 5rem;
+        padding-left: 5rem;
+        padding-bottom: 5rem;
+    }}
+    img{{
+    	max-width:99%;
+    	margin-bottom:99px;
+        justify-content: center;
+    }}
+</style>
+""",
+        unsafe_allow_html=True,
+    )
+header = st.container()
+input_file = st.container()
+correlation = st.container()
+feature_imp = st.container()
+feature_dist = st.container()
+def save_uploadedfile(uploadedfile):
+    with open(os.path.join("tempDir",uploadedfile.name),"wb") as f:
+        f.write(uploadedfile.getbuffer())
+    return st.success("Saved File:{} to tempDir".format(uploadedfile.name))
+#Load the Model and vectorizer
+loaded_model = pickle.load(open("resume_screening_model.pickle", "rb"))
+loaded_vec = pickle.load(open("resume_screening_vectorizer.pickle","rb"))
+with header:
+    st.markdown("<h1 style='text-align: center;'>Welcome To The Smart Labour Turnover Solutions' Resume Screening System.</h1>", unsafe_allow_html=True)
+    st.markdown("")
+    st.markdown("* Companies receive hundreds of resumes for their job postings, and it is very time-consuming to go through every resume.")
+    st.markdown("* Our System is mainly capable of automating the resume screening process using machine learning algorithms and natural language processing (NLP) techniques.")
+    st.markdown(""" * Following are the features of our resume screening system.
+    - Can input five resumes at once,
+    - Display the best job position for each candidate,
+    - Display each candidate's probability for all the job positions available, 
+    - All the records can be downloaded in .csv format. """)
+    st.markdown("* You can input 5 resumes at once in .pdf, .doc, or .txt formats.")
+    st.markdown("* When you upload the resume, you must name the file with the candidate's name. ex: John Wick.pdf")
+    st.markdown("* Then our system will parse the resumes using NLP's name entity recognition (NER) techniques and display each candidate's name, skills, and experience.")
+    st.markdown("* Based on complete extracted data (skills, experience, degree, designation), our system will choose the best position for each candidate from 25 different job positions listed below.")
+    image = Image.open("position.png")
+    st.image(image)
+# Resume files Inpuation 
+with input_file:
+    # resumes Upoload
+    st.subheader("1. Please Upload All Your Five Resumes in .pdf, .doc, .docx or .txt Formats")
+    file1 = st.file_uploader("Choose the Resume 1")
+    file2 = st.file_uploader("Choose the Resume 2")
+    file3 = st.file_uploader("Choose the Resume 3")
+    file4 = st.file_uploader("Choose the Resume 4")
+    file5 = st.file_uploader("Choose the Resume 5")
+# Resume Parser 
+    if file1 is not None:
+        file_details1 = {"FileName":file1.name,"FileType":file1.type}
+        file_name1 = file1.name[:-4]
+        data1 = ResumeParser(file1).get_extracted_data()
+        data1 = {key: data1[key] for key in data1.keys() & {'skills', 'designation', 'experience', 'degree'}}
+        st.markdown("<h2 style='text-align: center;'>Welcome to the Resume Parser</h2>", unsafe_allow_html=True)
+        st.subheader("2. Here is the Most Critical Extracted Information in the Resumes.")
+        st.markdown("* You can get an overview of the candidate's profile according to the below-extracted data.")
+        #data Cleaning and vectorization
+        dt1 = str(data1)
+        dt1 = re.sub(r'[^\w\s]', '', dt1) # remove special charcters and puctuations
+        dt1 = dt1.lower() # lower case conversion
+        dt1 = re.sub("\d+", "", dt1) # remove numbers
+        #do Predictions
+        pred1 = loaded_model.predict(loaded_vec.transform([dt1])[0])
+        data1["Name"] = file_name1
+        data1["Best_Position"] = str(pred1)
+        df = pd.DataFrame()
+        df = df.append(data1, ignore_index = True)
+        #pred_df
+        pred_val1 = loaded_model.predict_proba(loaded_vec.transform([dt1])[0])
+        pred_df1 = pd.DataFrame(pred_val1, columns=loaded_model.classes_, index=[file_name1])
+    if file2 is not None:
+        file_details2 = {"FileName":file2.name,"FileType":file2.type}
+        file_name2 = file2.name[:-4]
+        data2 = ResumeParser(file2).get_extracted_data()
+        data2 = {key: data2[key] for key in data2.keys() & {'skills', 'designation', 'experience', 'degree'}}
+        #data Cleaning and vectorization
+        dt2 = str(data2)
+        dt2 = re.sub(r'[^\w\s]', '', dt2) # remove special charcters and puctuations
+        dt2 = dt2.lower() # lower case conversion
+        dt2 = re.sub("\d+", "", dt2) # remove numbers
+        #do Predictions
+        pred2 = loaded_model.predict(loaded_vec.transform([dt2])[0])
+        data2["Name"] = file_name2
+        data2["Best_Position"] = str(pred2)
+        df = df.append(data2, ignore_index = True)
+        #pred_df
+        pred_val2 = loaded_model.predict_proba(loaded_vec.transform([dt2])[0])
+        pred_df2 = pd.DataFrame(pred_val2, columns=loaded_model.classes_, index=[file_name2])
+    if file3 is not None:
+        file_details3 = {"FileName":file3.name,"FileType":file3.type}
+        file_name3 = file3.name[:-4]
+        data3 = ResumeParser(file3).get_extracted_data()
+        data3 = {key: data3[key] for key in data3.keys() & {'skills', 'designation', 'experience', 'degree'}}
+        #data Cleaning and vectorization
+        dt3 = str(data3)
+        dt3 = re.sub(r'[^\w\s]', '', dt3) # remove special charcters and puctuations
+        dt3 = dt3.lower() # lower case conversion
+        dt3 = re.sub("\d+", "", dt3) # remove numbers
+        #do Predictions
+        pred3 = loaded_model.predict(loaded_vec.transform([dt3])[0])
+        data3["Name"] = file_name3
+        data3["Best_Position"] = str(pred3)
+        df = df.append(data3, ignore_index = True)
+        #pred_df
+        pred_val3 = loaded_model.predict_proba(loaded_vec.transform([dt3])[0])
+        pred_df3 = pd.DataFrame(pred_val3, columns=loaded_model.classes_, index=[file_name3])
+    if file4 is not None:
+        file_details4 = {"FileName":file4.name,"FileType":file4.type}
+        file_name4 = file4.name[:-4]
+        data4 = ResumeParser(file4).get_extracted_data()
+        data4 = {key: data4[key] for key in data4.keys() & {'skills', 'designation', 'experience', 'degree'}}
+        #data Cleaning and vectorization
+        dt4 = str(data4)
+        dt4 = re.sub(r'[^\w\s]', '', dt4) # remove special charcters and puctuations
+        dt4 = dt4.lower() # lower case conversion
+        dt4 = re.sub("\d+", "", dt4) # remove numbers
+        #do Predictions
+        pred4 = loaded_model.predict(loaded_vec.transform([dt4])[0])
+        data4["Name"] = file_name4
+        data4["Best_Position"] = str(pred4)
+        df = df.append(data4, ignore_index = True)
+        #pred_df
+        pred_val4 = loaded_model.predict_proba(loaded_vec.transform([dt4])[0])
+        pred_df4 = pd.DataFrame(pred_val4, columns=loaded_model.classes_, index=[file_name4])
+    if file5 is not None:
+        file_details5 = {"FileName":file5.name,"FileType":file5.type}
+        file_name5 = file5.name[:-4]
+        data5 = ResumeParser(file5).get_extracted_data()
+        data5 = {key: data5[key] for key in data5.keys() & {'skills', 'designation', 'experience', 'degree'}}
+        #data Cleaning and vectorization
+        dt5 = str(data5)
+        dt5 = re.sub(r'[^\w\s]', '', dt5) # remove special charcters and puctuations
+        dt5 = dt5.lower() # lower case conversion
+        dt5 = re.sub("\d+", "", dt5) # remove numbersS
+        #do Predictions
+        pred5 = loaded_model.predict(loaded_vec.transform([dt5])[0])
+        data5["Name"] = file_name5
+        data5["Best_Position"] = str(pred5)
+        df = df.append(data5, ignore_index = True)
+        #pred_df
+        pred_val5 = loaded_model.predict_proba(loaded_vec.transform([dt5])[0])
+        pred_df5 = pd.DataFrame(pred_val5, columns=loaded_model.classes_, index=[file_name5])
+        final_pred = pd.DataFrame(np.concatenate([pred_val1, pred_val2, pred_val3, pred_val4, pred_val5], axis=0), columns= loaded_model.classes_,index=[file_name1,file_name2,file_name3,file_name4,file_name5]) 
+        st.dataframe(data=df.loc[:, ["Name","skills","experience","degree"]])
+        # Downloads 
+        st.markdown("* Please click the download button below to download the Extracted Resumes Data Report.")
+        @st.cache
+        def convert_df(df):
+            # IMPORTANT: Cache the conversion to prevent computation on every rerun
+            return df.to_csv().encode('utf-8')
+        csv1 = convert_df(df.loc[:, ["Name","skills","experience","degree"]])
+        csv2 = convert_df(df.loc[:, ["Name","Best_Position"]])
+        st.download_button(
+            label="Click here to download the Extracted Resumes Data Report",
+            data=csv1,
+            file_name='Extracted Resumes Data Report - Smart Labour Turnover Solutions.csv',
+            mime='text/csv',)
+#Resume Classification - Part 1 - Best Position Prediction
+        st.markdown("<h2 style='text-align: center;'>Welcome to the Resume Classifier</h2>", unsafe_allow_html=True)
+        st.subheader("3. Here is the Best-Predicted Position for Each Candidate.")
+        st.markdown("* Below is the best position for each candidate according to their skills, experience, degree, and designation.")
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            st.write("")
+        with col2:
+            st.dataframe(data=df.loc[:, ["Name","Best_Position"]])
+        with col3:
+            st.write("")
+        st.write("")
+        #downloads
+        st.markdown("* Please click the download button below to download the Best Position Prediction Report.")
+        st.download_button(
+            label="Click here to download the Best Position Prediction Report",
+            data=csv2,
+            file_name='Best Podition Prediction Report - Smart Labour Turnover Solutions.csv',
+            mime='text/csv',)
+#Resume Classification - Part 2 - All Positions Probability
+        st.subheader("4. Here is Each Candidate's Probability for All 25 Job Positions Available.")
+        st.markdown("* By examining each candidate's probability for each job position, you can select the best candidates even if they are suitable for the same job position. ")
+        pred1, pred2, pred3, pred4, pred5 = st.columns(5)
+        with pred1:
+             st.dataframe(data=pred_df1[pred_df1.iloc[-1].sort_values(ascending=False).index].T, height=914)
+        with pred2:
+             st.dataframe(data=pred_df2[pred_df2.iloc[-1].sort_values(ascending=False).index].T, height=914)
+        with pred3:
+             st.dataframe(data=pred_df3[pred_df3.iloc[-1].sort_values(ascending=False).index].T, height=914)
+        with pred4:
+             st.dataframe(data=pred_df4[pred_df4.iloc[-1].sort_values(ascending=False).index].T, height=914)
+        with pred5:
+             st.dataframe(data=pred_df5[pred_df5.iloc[-1].sort_values(ascending=False).index].T, height=914)
+        #downloads
+        #downloads
+        st.markdown("* Please click the download button below to download the All positions Prediction Report.")
+        @st.cache
+        def convert_df(df):
+            # IMPORTANT: Cache the conversion to prevent computation on every rerun
+            return df.to_csv().encode('utf-8')
+        csv3 = convert_df(final_pred)
+        st.download_button(
+            label="Click here to download the All positions Prediction Report",
+            data=csv3,
+            file_name='All positions Prediction Report - Smart Labour Turnover Solutions.csv',
+            mime='text/csv',)