Add new file

13b1dad3 · Withanage Malith Pinsara · 808f5f54 · 13b1dad3
Commit 13b1dad3 authored May 11, 2022 by Withanage Malith Pinsara
Hide whitespace changes
Inline Side-by-side

Showing with 44 additions and 0 deletions

employee turnover prediction employee turnover prediction +44 -0

No files found.
--- a/employee turnover prediction
+++ b/employee turnover prediction
+#Features values distribution exploration
+df.hist(bins = 30, figsize = (20,20), color = 'b')
+#Irrelevant Feature Removal
+print("Value distribution of StandardHours column")
+print(df["StandardHours"].value_counts().sort_values(ascending=False))
+print("Value distribution of EmployeeCount column")
+print(df["EmployeeCount"].value_counts().sort_values(ascending=False))
+print("Value distribution of Over18 column")
+print(df["Over18"].value_counts().sort_values(ascending=False))
+
+#these columns ['Over18', 'StandardHours', 'EmployeeCount'] have only 1 value and its same for aal the entries.
+#so we can drop above 3 cloumns from df
+#moreover we also can drop 'EmployeeNumber' as it is just a ID number 
+#drop the columns and check the shape of df
+df.drop( columns= ['Over18', 'StandardHours', 'EmployeeCount', 'EmployeeNumber' ] , axis = 1, inplace=True )
+print("Dataset shape after irrelevant feature removal", df.shape) 
+df.head(10)
+
+#EDA
+#statistical info about the numeric columns
+df.describe()
+
+# Correlation matrix
+corr_matrix = df.corr()
+corr_matrix
+
+### correlation matrix visually using seaborn heat map
+plt.figure(figsize=(20, 20))
+sns.heatmap(corr_matrix, annot=True)
+
+plt.figure(figsize=[20,20])
+plt.subplot(411)
+sns.countplot(x = 'Age', hue = 'Attrition', data = df)
+plt.subplot(412)
+sns.countplot(x = 'MaritalStatus', hue = 'Attrition', data = df)
+
+plt.figure(figsize=[15,5])
+sns.countplot(y = 'JobRole', hue = 'Attrition', data = df)
+
+plt.figure(figsize=[15,10])
+plt.subplot(413)
+sns.countplot(y = 'JobInvolvement', hue = 'Attrition', data = df)
+plt.subplot(414)
+sns.countplot(y = 'JobLevel', hue = 'Attrition', data = df)
\ No newline at end of file