This is the the frontend of the labor turnover prediction. This is created...

This is the the frontend of the labor turnover prediction. This is created with python and streamlit.
parent 652f5ba7
import streamlit as st
import numpy as np
import pandas as pd
import sys
import sklearn
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
import pickle
import io
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
#columns create sections vertical way
#containers create sections in horizontal way
header = st.container()
input_file = st.container()
correlation = st.container()
feature_imp = st.container()
feature_dist = st.container()
def add_bg_from_url():
st.markdown(
f"""
<style>
.stApp {{
background-image: url("https://cdn.wallpapersafari.com/81/70/MwlB8x.jpg");
background-attachment: fixed;
background-size: cover
}}
</style>
""",
unsafe_allow_html=True
)
add_bg_from_url()
with header:
st.markdown("<h1 style='text-align: center;'>Welcome To The Smart Labour Turnover Solution System</h1>", unsafe_allow_html=True)
st.markdown("* This system is mainly capable of Finding employees with a higher possibility of leaving the company using artificial intelligence (AI).")
st.markdown("""* After you have uploaded your .csv file, our system will find the patterns in your data using machine learning techniques and present the list of employees, including their names and ID, who are more likely to leave the company""")
st.markdown("""* Moreover, the system will provide an analytical Overview that will help you deep dive into your data in an easy graphical way. Such as,
- Correlation for the each and every attribute in your dataset.
- Feature importance for the whole dataset.
- Top 15 feature distribution for employee turnover.""")
with input_file:
st.subheader("1. Please Upload Your Dataset in .csv Format")
input = st.file_uploader("Choose the file")
if input is not None:
df = pd.read_csv(input)
st.text("** Below is your uploaded file. Please cancel the upload and reupload the file if it is not the correct one.** ")
st.markdown("<h3 style='text-align: center;'>Inputted Dataset</h3>", unsafe_allow_html=True)
st.write(df)
st.write(" * The shape of the uploaded file has (rows, columns): ", df.shape)
st.subheader("2. Here is Your Prediction")
#remove Irrelevant Features
test= df.drop( columns= ['ID', 'Name'] , axis = 1)
#Encode the data
# Ordianl Feature encoding
test['OverTime'] = test['OverTime'].apply(lambda x: 1 if x == 'Yes' else 0)
#test['Gender'] = test['Gender'].apply(lambda x: 1 if x == 'Male' else 0)
test['BusinessTravel'] = test['BusinessTravel'].apply( lambda x: 0 if x== 'Non-Travel' else 1 if x == 'Travel_Frequently' else 2)
#test['MaritalStatus'] = test['MaritalStatus'].apply( lambda x: 0 if x== 'Single' else 1 if x == 'Divorced' else 2)
#correlation martrix
corr_matrix = df.corr()
#Onehot Encoding
def get_OHE(test):
X_OHE = pd.concat([test[['Age', 'BusinessTravel', 'DailyRate', 'DistanceFromHome', 'Education', 'EnvironmentSatisfaction',
'HourlyRate', 'JobInvolvement', 'JobLevel', 'JobSatisfaction', 'MonthlyIncome',
'MonthlyRate', 'NumCompaniesWorked', 'OverTime', 'PercentSalaryHike', 'PerformanceRating',
'RelationshipSatisfaction', 'StockOptionLevel', 'TotalWorkingYears', 'TrainingTimesLastYear', 'WorkLifeBalance',
'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion', 'YearsWithCurrManager']],
pd.get_dummies(test[['Department', 'EducationField', 'JobRole', 'MaritalStatus', 'Gender']], drop_first=True)], axis=1)
return X_OHE
test = get_OHE(test)
#Scale Data
scaler = StandardScaler()
test[['Age', 'BusinessTravel', 'DailyRate', 'DistanceFromHome', 'Education', 'EnvironmentSatisfaction', 'HourlyRate',
'JobInvolvement', 'JobLevel', 'JobSatisfaction', 'MonthlyIncome', 'MonthlyRate', 'NumCompaniesWorked',
'PercentSalaryHike', 'PerformanceRating', 'RelationshipSatisfaction', 'StockOptionLevel', 'TotalWorkingYears',
'TrainingTimesLastYear', 'WorkLifeBalance', 'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion',
'YearsWithCurrManager']] = scaler.fit_transform(test[['Age', 'BusinessTravel', 'DailyRate', 'DistanceFromHome', 'Education', 'EnvironmentSatisfaction', 'HourlyRate',
'JobInvolvement', 'JobLevel', 'JobSatisfaction', 'MonthlyIncome', 'MonthlyRate', 'NumCompaniesWorked',
'PercentSalaryHike', 'PerformanceRating', 'RelationshipSatisfaction', 'StockOptionLevel', 'TotalWorkingYears',
'TrainingTimesLastYear', 'WorkLifeBalance', 'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion',
'YearsWithCurrManager']])
#Load the Model
loaded_model = pickle.load(open("Best_attrition_model.pickle", "rb"))
#do Predictions
predictions=loaded_model.predict(test)
#Add ID and Name along with prediction and save the pandas dataframe
predictions_df=pd.DataFrame(data={"Name":df["Name"], "ID":df["ID"], "Department":df["Department"], "JobRole":df["JobRole"], "Predictions":predictions})
leaving = predictions_df.loc[predictions_df['Predictions'] == 1]
final_leaving = leaving.drop( columns= ["Predictions", "Department", "JobRole"] , axis = 1)
down_report = pd.DataFrame( data={"Name":df["Name"],"ID":df["ID"],"Age":df["Age"],"BusinessTravel":df["BusinessTravel"],"DailyRate":df["DailyRate"],
"Department":df["Department"],"DistanceFromHome":df["DistanceFromHome"],"Education":df["Education"], "EducationField":df["EducationField"],
"EnvironmentSatisfaction":df["EnvironmentSatisfaction"], "Gender":df["Gender"],"HourlyRate":df["HourlyRate"], "JobInvolvement":df["JobInvolvement"],
"JobLevel":df["JobLevel"],"JobRole":df["JobRole"], "JobSatisfaction":df["JobSatisfaction"],"MaritalStatus":df["MaritalStatus"],
"MonthlyIncome":df["MonthlyIncome"], "MonthlyRate":df["MonthlyRate"], "NumCompaniesWorked":df["NumCompaniesWorked"],
"OverTime":df["OverTime"],"PercentSalaryHike":test["PercentSalaryHike"],"PerformanceRating":df["PerformanceRating"],
"RelationshipSatisfaction":df["RelationshipSatisfaction"],"StockOptionLevel":df["StockOptionLevel"],"TotalWorkingYears":df["TotalWorkingYears"],
"TrainingTimesLastYear":df["TrainingTimesLastYear"],"WorkLifeBalance":df["WorkLifeBalance"],"YearsAtCompany":df["YearsAtCompany"],
"YearsInCurrentRole":df["YearsInCurrentRole"],"YearsSinceLastPromotion":df["YearsSinceLastPromotion"],"YearsWithCurrManager":df["YearsWithCurrManager"],
"Predictions":predictions})
leaving1, leaving2, leaving3 = st.columns(3)
st.markdown("<h3 style='text-align: center;'>Below are the people who are most likely to leave the company.</h3>", unsafe_allow_html=True)
leaving1, leaving2, leaving3 = st.columns(3)
with leaving1:
st.write("")
with leaving2:
st.write(final_leaving)
with leaving3:
st.write("")
#st.write(leaving)
st.write(" * According to the prediction, Totally ", len(leaving), "people can leave the company.")
st.markdown("<h3 style='text-align: center;'>Here is the Full report of your company's employee Turnover.</h3>", unsafe_allow_html=True)
st.markdown("* In the Prediction column, 1 means the employee will leave, and 0 means the employee will stay.")
# predictions_df1, predictions_df2, predictions_df3 = st.columns(3)
# with predictions_df1:
# st.write("")
#with predictions_df2:
# st.write(predictions_df)
#with predictions_df3:
# st.write("")
st.write(predictions_df)
st.markdown("* To download the full report, please click the download button below.")
@st.cache
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv().encode('utf-8')
csv1 = convert_df(predictions_df)
csv2 = convert_df(down_report)
st.download_button(
label="Click here to download the full report",
data=csv1,
file_name='Full Report - Employee Turnover.csv',
mime='text/csv',)
st.markdown("* To download the original file with predictions, please click the download button below.")
st.download_button(
label="Click here to download the original file with predictions",
data=csv2,
file_name='Original Report with Predictions.csv',
mime='text/csv',)
with correlation:
st.header("3. The Analytical Overview")
st.markdown("<h3 style='text-align: center;'>3.1 Feature Correlation.</h3>", unsafe_allow_html=True)
st.markdown("* The correlation matrix below, shows which arrtribute is having a high or low correlation in respect to another attribute")
fig, ax = plt.subplots(figsize=(20, 20))
sns.heatmap(corr_matrix, annot=True, ax=ax)
st.write(fig)
with feature_imp:
#st.header("3. The Analytical Overview")
st.markdown("<h3 style='text-align: center;'>3.2 Feature Importance.</h3>", unsafe_allow_html=True)
st.markdown("* Feature Importance shows the most critical attributes for your companies' Employee Turnover. The features are listed here in descending order.")
#Feature Importance
rf_gs = RandomForestClassifier(max_depth=None, min_samples_split=2, n_estimators=500)
rf_gs = rf_gs.fit(test, predictions)
coef = rf_gs.feature_importances_
ind = np.argsort(-coef)
#for i in range(test.shape[1]):
#st.write("%d. %s (%f)" % (i + 1 , test.columns[ind[i]], coef[ind[i]]))
x = range(test.shape[1])
y = coef[ind][:test.shape[1]]
#plot the feature importance
fi_fig = plt.figure(figsize=(6, 8))
plt.title("Feature importances")
ax = plt.subplot()
plt.barh(x, y)
ax.set_yticks(x)
ax.set_yticklabels(test.columns[ind])
plt.gca().invert_yaxis()
st.pyplot(fi_fig)
with feature_dist:
st.markdown("<h3 style='text-align: center;'>3.3 Top 15 Feature's Values Distribution.</h3>", unsafe_allow_html=True)
st.markdown("* By exploring the histograms below, you can find out the most common values or value ranges for each attribute and make decisions accordingly to decrease your organization's employee turnover rate. ")
ds_df=pd.DataFrame(data={"Age":df["Age"], "MonthlyIncome":df["MonthlyIncome"], "TotalWorkingYears":df["TotalWorkingYears"], "MonthlyRate":df["MonthlyRate"],
"DistanceFromHome":df["DistanceFromHome"], "YearsAtCompany":df["YearsAtCompany"], "PercentSalaryHike":df["PercentSalaryHike"],
"YearsWithCurrManager":df["YearsWithCurrManager"], "EnvironmentSatisfaction":df["EnvironmentSatisfaction"], "JobInvolvement":df["JobInvolvement"],
"WorkLifeBalance":df["WorkLifeBalance"], "NumCompaniesWorked":df["NumCompaniesWorked"], "YearsSinceLastPromotion":df["YearsSinceLastPromotion"],
"JobSatisfaction":df["JobSatisfaction"],"OverTime":test["OverTime"],"Predictions":predictions})
leaving_ds = ds_df.loc[ds_df['Predictions'] == 1]
final_leaving_ds = leaving_ds.drop( columns= ['Predictions'] , axis = 1)
st.set_option('deprecation.showPyplotGlobalUse', False)#disable this warning by disabling t
#Histograms
final_leaving_ds.hist(bins = 30, figsize = (15,15), color = 'b')
plt.show()
st.pyplot()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment