This is the the frontend of the labor turnover prediction. This is created...

This is the the frontend of the labor turnover prediction. This is created with python and streamlit.

This is the the frontend of the labor turnover prediction. This is created...
This is the the frontend of the labor turnover prediction. This is created with python and streamlit.
4a0d42ba · Withanage Malith Pinsara · 652f5ba7 · 4a0d42ba
Commit 4a0d42ba authored Oct 08, 2022 by Withanage Malith Pinsara
Show whitespace changes
Inline Side-by-side

Showing with 211 additions and 0 deletions

main.py main.py +211 -0

No files found.
--- a/main.py
+++ b/main.py
+import streamlit as st
+import numpy as np
+import pandas as pd
+import sys
+import sklearn
+import matplotlib
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.preprocessing import StandardScaler
+import pickle
+import io
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+#columns create sections vertical way
+#containers create sections in horizontal way
+header = st.container()
+input_file = st.container()
+correlation = st.container()
+feature_imp = st.container()
+feature_dist = st.container()
+def add_bg_from_url():
+    st.markdown(
+         f"""
+         <style>
+         .stApp {{
+             background-image: url("https://cdn.wallpapersafari.com/81/70/MwlB8x.jpg");
+             background-attachment: fixed;
+             background-size: cover
+         }}
+         </style>
+         """,
+         unsafe_allow_html=True
+     )
+add_bg_from_url() 
+with header:
+    st.markdown("<h1 style='text-align: center;'>Welcome To The Smart Labour Turnover Solution System</h1>", unsafe_allow_html=True)
+    st.markdown("* This system is mainly capable of Finding employees with a higher possibility of leaving the company using artificial intelligence (AI).")
+    st.markdown("""* After you have uploaded your .csv file, our system will find the patterns in your data using machine learning techniques and present the list of employees, including their names and ID, who are more likely to leave the company""")
+    st.markdown("""* Moreover, the system will provide an analytical Overview that will help you deep dive into your data in an easy graphical way. Such as,
+    - Correlation for the each and every attribute in your dataset.
+    - Feature importance for the whole dataset.
+    - Top 15 feature distribution for employee turnover.""")
+with input_file:
+    st.subheader("1. Please Upload Your Dataset in .csv Format")
+    input = st.file_uploader("Choose the file")
+    if input is not None:
+        df = pd.read_csv(input)
+        st.text("** Below is your uploaded file. Please cancel the upload and reupload the file if it is not the correct one.** ")
+        st.markdown("<h3 style='text-align: center;'>Inputted Dataset</h3>", unsafe_allow_html=True)
+        st.write(df)
+        st.write(" * The shape of the uploaded file has (rows, columns): ", df.shape)
+        st.subheader("2. Here is Your Prediction")    
+        #remove Irrelevant Features
+        test= df.drop( columns= ['ID', 'Name'] , axis = 1)
+        #Encode the data
+        # Ordianl Feature encoding 
+        test['OverTime'] = test['OverTime'].apply(lambda x: 1 if x == 'Yes' else 0)
+        #test['Gender'] = test['Gender'].apply(lambda x: 1 if x == 'Male' else 0)
+        test['BusinessTravel'] = test['BusinessTravel'].apply( lambda x: 0 if x== 'Non-Travel' else 1 if x == 'Travel_Frequently' else 2)
+        #test['MaritalStatus'] = test['MaritalStatus'].apply( lambda x: 0 if x== 'Single' else 1 if x == 'Divorced' else 2)
+        #correlation martrix
+        corr_matrix = df.corr()
+        #Onehot Encoding
+        def get_OHE(test):
+            X_OHE = pd.concat([test[['Age', 'BusinessTravel', 'DailyRate', 'DistanceFromHome', 'Education', 'EnvironmentSatisfaction',
+             'HourlyRate', 'JobInvolvement', 'JobLevel', 'JobSatisfaction', 'MonthlyIncome', 
+             'MonthlyRate', 'NumCompaniesWorked', 'OverTime', 'PercentSalaryHike', 'PerformanceRating', 
+             'RelationshipSatisfaction', 'StockOptionLevel', 'TotalWorkingYears', 'TrainingTimesLastYear', 'WorkLifeBalance', 
+             'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion', 'YearsWithCurrManager']],
+             pd.get_dummies(test[['Department', 'EducationField', 'JobRole', 'MaritalStatus', 'Gender']], drop_first=True)], axis=1)
+            return X_OHE
+        test = get_OHE(test)
+        #Scale Data
+        scaler = StandardScaler()
+        test[['Age', 'BusinessTravel', 'DailyRate', 'DistanceFromHome', 'Education', 'EnvironmentSatisfaction', 'HourlyRate', 
+         'JobInvolvement', 'JobLevel', 'JobSatisfaction', 'MonthlyIncome', 'MonthlyRate', 'NumCompaniesWorked', 
+         'PercentSalaryHike', 'PerformanceRating', 'RelationshipSatisfaction', 'StockOptionLevel', 'TotalWorkingYears', 
+         'TrainingTimesLastYear', 'WorkLifeBalance', 'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion', 
+         'YearsWithCurrManager']] = scaler.fit_transform(test[['Age', 'BusinessTravel', 'DailyRate', 'DistanceFromHome', 'Education', 'EnvironmentSatisfaction', 'HourlyRate', 
+         'JobInvolvement', 'JobLevel', 'JobSatisfaction', 'MonthlyIncome', 'MonthlyRate', 'NumCompaniesWorked', 
+         'PercentSalaryHike', 'PerformanceRating', 'RelationshipSatisfaction', 'StockOptionLevel', 'TotalWorkingYears', 
+         'TrainingTimesLastYear', 'WorkLifeBalance', 'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion', 
+         'YearsWithCurrManager']])
+        #Load the Model
+        loaded_model = pickle.load(open("Best_attrition_model.pickle", "rb"))
+        #do Predictions
+        predictions=loaded_model.predict(test)
+        #Add ID and Name along with prediction and save the pandas dataframe
+        predictions_df=pd.DataFrame(data={"Name":df["Name"], "ID":df["ID"], "Department":df["Department"], "JobRole":df["JobRole"], "Predictions":predictions})
+        leaving = predictions_df.loc[predictions_df['Predictions'] == 1]
+        final_leaving = leaving.drop( columns= ["Predictions", "Department", "JobRole"] , axis = 1)
+        down_report = pd.DataFrame( data={"Name":df["Name"],"ID":df["ID"],"Age":df["Age"],"BusinessTravel":df["BusinessTravel"],"DailyRate":df["DailyRate"],
+        "Department":df["Department"],"DistanceFromHome":df["DistanceFromHome"],"Education":df["Education"], "EducationField":df["EducationField"], 
+        "EnvironmentSatisfaction":df["EnvironmentSatisfaction"], "Gender":df["Gender"],"HourlyRate":df["HourlyRate"], "JobInvolvement":df["JobInvolvement"], 
+        "JobLevel":df["JobLevel"],"JobRole":df["JobRole"], "JobSatisfaction":df["JobSatisfaction"],"MaritalStatus":df["MaritalStatus"], 
+        "MonthlyIncome":df["MonthlyIncome"], "MonthlyRate":df["MonthlyRate"], "NumCompaniesWorked":df["NumCompaniesWorked"], 
+        "OverTime":df["OverTime"],"PercentSalaryHike":test["PercentSalaryHike"],"PerformanceRating":df["PerformanceRating"],
+        "RelationshipSatisfaction":df["RelationshipSatisfaction"],"StockOptionLevel":df["StockOptionLevel"],"TotalWorkingYears":df["TotalWorkingYears"],
+        "TrainingTimesLastYear":df["TrainingTimesLastYear"],"WorkLifeBalance":df["WorkLifeBalance"],"YearsAtCompany":df["YearsAtCompany"],
+        "YearsInCurrentRole":df["YearsInCurrentRole"],"YearsSinceLastPromotion":df["YearsSinceLastPromotion"],"YearsWithCurrManager":df["YearsWithCurrManager"],
+        "Predictions":predictions})
+        leaving1, leaving2, leaving3 = st.columns(3)
+        st.markdown("<h3 style='text-align: center;'>Below are the people who are most likely to leave the company.</h3>", unsafe_allow_html=True)
+        leaving1, leaving2, leaving3 = st.columns(3)
+        with leaving1:
+            st.write("")
+        with leaving2:
+            st.write(final_leaving)
+        with leaving3:
+            st.write("")
+        #st.write(leaving)
+        st.write(" * According to the prediction, Totally ",  len(leaving), "people can leave the company.")
+        st.markdown("<h3 style='text-align: center;'>Here is the Full report of your company's employee Turnover.</h3>", unsafe_allow_html=True)
+        st.markdown("* In the Prediction column, 1 means the employee will leave, and 0 means the employee will stay.")
+       # predictions_df1, predictions_df2, predictions_df3 = st.columns(3)
+       # with predictions_df1:
+        #    st.write("")
+        #with predictions_df2:
+        #    st.write(predictions_df)
+        #with predictions_df3:
+          #  st.write("")
+        st.write(predictions_df)
+        st.markdown("* To download the full report, please click the download button below.")
+        @st.cache
+        def convert_df(df):
+            # IMPORTANT: Cache the conversion to prevent computation on every rerun
+            return df.to_csv().encode('utf-8')
+        csv1 = convert_df(predictions_df)
+        csv2 = convert_df(down_report)
+        st.download_button(
+            label="Click here to download the full report",
+            data=csv1,
+            file_name='Full Report - Employee Turnover.csv',
+            mime='text/csv',)
+        st.markdown("* To download the original file with predictions, please click the download button below.")
+        st.download_button(
+            label="Click here to download the original file with predictions",
+            data=csv2,
+            file_name='Original Report with Predictions.csv',
+            mime='text/csv',)
+        with correlation:
+            st.header("3. The Analytical Overview")
+            st.markdown("<h3 style='text-align: center;'>3.1 Feature Correlation.</h3>", unsafe_allow_html=True)
+            st.markdown("* The correlation matrix below, shows which arrtribute is having a high or low correlation in respect to another attribute")
+            fig, ax = plt.subplots(figsize=(20, 20))
+            sns.heatmap(corr_matrix, annot=True, ax=ax)
+            st.write(fig)
+        with feature_imp:
+            #st.header("3. The Analytical Overview")
+            st.markdown("<h3 style='text-align: center;'>3.2 Feature Importance.</h3>", unsafe_allow_html=True)
+            st.markdown("* Feature Importance shows the most critical attributes for your companies' Employee Turnover. The features are listed here in descending order.")
+            #Feature Importance
+            rf_gs = RandomForestClassifier(max_depth=None, min_samples_split=2, n_estimators=500)
+            rf_gs = rf_gs.fit(test, predictions)
+            coef = rf_gs.feature_importances_
+            ind = np.argsort(-coef)
+            #for i in range(test.shape[1]):
+                #st.write("%d. %s (%f)" % (i + 1 , test.columns[ind[i]], coef[ind[i]]))
+            x = range(test.shape[1])
+            y = coef[ind][:test.shape[1]]
+            #plot the feature importance
+            fi_fig = plt.figure(figsize=(6, 8))
+            plt.title("Feature importances")
+            ax = plt.subplot()
+            plt.barh(x, y)
+            ax.set_yticks(x)
+            ax.set_yticklabels(test.columns[ind])
+            plt.gca().invert_yaxis()
+            st.pyplot(fi_fig)
+        with feature_dist:
+            st.markdown("<h3 style='text-align: center;'>3.3 Top 15 Feature's Values Distribution.</h3>", unsafe_allow_html=True)
+            st.markdown("* By exploring the histograms below, you can find out the most common values or value ranges for each attribute and make decisions accordingly to decrease your organization's employee turnover rate.  ")
+            ds_df=pd.DataFrame(data={"Age":df["Age"], "MonthlyIncome":df["MonthlyIncome"], "TotalWorkingYears":df["TotalWorkingYears"], "MonthlyRate":df["MonthlyRate"],
+             "DistanceFromHome":df["DistanceFromHome"], "YearsAtCompany":df["YearsAtCompany"], "PercentSalaryHike":df["PercentSalaryHike"],
+             "YearsWithCurrManager":df["YearsWithCurrManager"], "EnvironmentSatisfaction":df["EnvironmentSatisfaction"], "JobInvolvement":df["JobInvolvement"], 
+             "WorkLifeBalance":df["WorkLifeBalance"], "NumCompaniesWorked":df["NumCompaniesWorked"], "YearsSinceLastPromotion":df["YearsSinceLastPromotion"], 
+             "JobSatisfaction":df["JobSatisfaction"],"OverTime":test["OverTime"],"Predictions":predictions})
+            leaving_ds = ds_df.loc[ds_df['Predictions'] == 1]
+            final_leaving_ds = leaving_ds.drop( columns= ['Predictions'] , axis = 1)
+            st.set_option('deprecation.showPyplotGlobalUse', False)#disable this warning by disabling t
+            #Histograms 
+            final_leaving_ds.hist(bins = 30, figsize = (15,15), color = 'b')
+            plt.show()
+            st.pyplot()
\ No newline at end of file