# importing libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans
# Importing the dataset
dataset = pd.read_csv('All.csv')
dataset.drop(dataset.index[dataset['game'] == 'Focused'], inplace = True)
dataset.drop(dataset.index[dataset['game'] == 'Sustained'], inplace = True)
display(dataset)
# statistics of the data
dataset.describe()
id | child_gender | sequence_of_responses | sequence_of_stimuli | colour | order_of_selection | sequence_of_sides | no_of_clicks | total_correct_responses | correct_responses | commission_errors | omission_errors | child_age | mean_reaction_time | total_duration | diagnosis | percentage_no_of_correct_responses | game | CER | OER | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2 | [M, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | red star, red crab, white bunny, pink pig, bro... | NaN | NaN | right, right, left, left, left, left, right, l... | NaN | 19 | 18 | 0 | 1 | 4 | 1479 | 57000 | Yes | 94.736842 | Alternating | 0.00 | 1.00 |
1 | 2 | 1 | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | red star, red crab, white bunny, red fish, blu... | NaN | NaN | right, right, left, right, right, left, left, ... | NaN | 19 | 19 | 0 | 0 | 4 | 1605 | 57000 | No | 100.000000 | Alternating | 0.00 | 0.00 |
2 | 3 | 2 | [M, C, C, C, W, C, C, C, C, W, C, W, C, C, C, ... | red star, white bunny, pink pig, brown dog, re... | NaN | NaN | right, left, left, left, right, left, left, le... | NaN | 19 | 18 | 3 | 1 | 4 | 1404 | 57000 | No | 94.736842 | Alternating | 0.75 | 0.25 |
3 | 4 | 2 | [C, C, C, W, C, C, C, C, W, C, C, C, C, C, W, ... | white bunny, pink pig, brown dog, red star, pa... | NaN | NaN | left, left, left, right, left, left, right, le... | NaN | 19 | 19 | 4 | 0 | 4 | 1782 | 57000 | No | 100.000000 | Alternating | 1.00 | 0.00 |
4 | 5 | 2 | [C, C, C, W, C, C, C, W, C, W, C, W, C, C, C, ... | red star, red crab, white bunny, red fish, blu... | NaN | NaN | right, right, left, right, right, right, left,... | NaN | 19 | 19 | 6 | 0 | 4 | 1258 | 57000 | No | 100.000000 | Alternating | 1.00 | 0.00 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
206 | 207 | 2 | 0 | [C, C, C, C, C, C, C, C, C, C] | bee | yellow | yellow_bee, yellow_bee, yellow_bee, yellow_bee... | 10 | 6 | 6 | 0 | 0 | 5 | 0 | 25719 | No | 100.000000 | Selective | 0.00 | 0.00 |
207 | 208 | 2 | 0 | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | bird | green | yellow_bee, yellow_bee, yellow_bee, yellow_bee... | 17 | 6 | 6 | 0 | 0 | 5 | 0 | 50461 | No | 100.000000 | Selective | 0.00 | 0.00 |
208 | 209 | 2 | 0 | [C, C, C, C, C, C, C, C, C, C, C, C, W] | butterfly | blue | yellow_bee, yellow_bee, yellow_bee, yellow_bee... | 13 | 7 | 7 | 1 | 0 | 5 | 0 | 19886 | No | 100.000000 | Selective | 1.00 | 0.00 |
209 | 210 | 2 | 0 | [C, W, C, C, C, W, C, C, C, C, C, W, W, W] | flower | pink | yellow_bee, yellow_bee, yellow_bee, yellow_bee... | 14 | 6 | 6 | 5 | 0 | 5 | 0 | 42199 | No | 100.000000 | Selective | 1.00 | 0.00 |
210 | 211 | 2 | 1 | [C, C, C] | butterfly | blue | blue_buterfly, blue_buterfly, blue_buterfly, | 3 | 6 | 3 | 0 | 3 | 5 | 0 | 10381 | No | 50.000000 | Selective | 0.00 | 1.00 |
151 rows × 20 columns
id | child_gender | total_correct_responses | correct_responses | commission_errors | omission_errors | child_age | mean_reaction_time | total_duration | percentage_no_of_correct_responses | CER | OER | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 151.000000 | 151.000000 | 151.000000 | 151.000000 | 151.000000 | 151.000000 | 151.000000 | 151.000000 | 151.000000 | 151.000000 | 151.000000 | 151.000000 |
mean | 111.430464 | 1.589404 | 8.635762 | 7.927152 | 0.933775 | 0.708609 | 4.490066 | 347.854305 | 26339.344371 | 91.950074 | 0.241839 | 0.215115 |
std | 63.645216 | 0.493579 | 4.388597 | 4.291230 | 1.871430 | 1.458718 | 0.501565 | 573.407155 | 22521.621230 | 17.416068 | 0.389720 | 0.370503 |
min | 1.000000 | 1.000000 | 6.000000 | 1.000000 | 0.000000 | 0.000000 | 4.000000 | 0.000000 | 3000.000000 | 12.500000 | 0.000000 | 0.000000 |
25% | 64.500000 | 1.000000 | 6.000000 | 6.000000 | 0.000000 | 0.000000 | 4.000000 | 0.000000 | 10479.000000 | 88.486842 | 0.000000 | 0.000000 |
50% | 102.000000 | 2.000000 | 7.000000 | 6.000000 | 0.000000 | 0.000000 | 4.000000 | 0.000000 | 13415.000000 | 100.000000 | 0.000000 | 0.000000 |
75% | 173.500000 | 2.000000 | 8.000000 | 8.000000 | 1.000000 | 1.000000 | 5.000000 | 952.500000 | 57000.000000 | 100.000000 | 0.500000 | 0.366667 |
max | 211.000000 | 2.000000 | 19.000000 | 19.000000 | 11.000000 | 7.000000 | 5.000000 | 1782.000000 | 70000.000000 | 100.000000 | 1.000000 | 1.000000 |
cols = list(dataset.columns.values)
cols.pop(cols.index('CER'))
dataset = dataset[cols+['CER']]
cols = list(dataset.columns.values)
cols.pop(cols.index('OER'))
dataset = dataset[cols+['OER']]
cols = list(dataset.columns.values)
cols.pop(cols.index('child_age'))
dataset = dataset[cols+['child_age']]
dataset.head()
id | child_gender | sequence_of_responses | sequence_of_stimuli | colour | order_of_selection | sequence_of_sides | no_of_clicks | total_correct_responses | correct_responses | commission_errors | omission_errors | mean_reaction_time | total_duration | diagnosis | percentage_no_of_correct_responses | game | CER | OER | child_age | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2 | [M, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | red star, red crab, white bunny, pink pig, bro... | NaN | NaN | right, right, left, left, left, left, right, l... | NaN | 19 | 18 | 0 | 1 | 1479 | 57000 | Yes | 94.736842 | Alternating | 0.00 | 1.00 | 4 |
1 | 2 | 1 | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | red star, red crab, white bunny, red fish, blu... | NaN | NaN | right, right, left, right, right, left, left, ... | NaN | 19 | 19 | 0 | 0 | 1605 | 57000 | No | 100.000000 | Alternating | 0.00 | 0.00 | 4 |
2 | 3 | 2 | [M, C, C, C, W, C, C, C, C, W, C, W, C, C, C, ... | red star, white bunny, pink pig, brown dog, re... | NaN | NaN | right, left, left, left, right, left, left, le... | NaN | 19 | 18 | 3 | 1 | 1404 | 57000 | No | 94.736842 | Alternating | 0.75 | 0.25 | 4 |
3 | 4 | 2 | [C, C, C, W, C, C, C, C, W, C, C, C, C, C, W, ... | white bunny, pink pig, brown dog, red star, pa... | NaN | NaN | left, left, left, right, left, left, right, le... | NaN | 19 | 19 | 4 | 0 | 1782 | 57000 | No | 100.000000 | Alternating | 1.00 | 0.00 | 4 |
4 | 5 | 2 | [C, C, C, W, C, C, C, W, C, W, C, W, C, C, C, ... | red star, red crab, white bunny, red fish, blu... | NaN | NaN | right, right, left, right, right, right, left,... | NaN | 19 | 19 | 6 | 0 | 1258 | 57000 | No | 100.000000 | Alternating | 1.00 | 0.00 | 4 |
X = dataset.iloc[:,17:20].values
display(X)
array([[0. , 1. , 4. ], [0. , 0. , 4. ], [0.75 , 0.25 , 4. ], [1. , 0. , 4. ], [1. , 0. , 4. ], [0.55555556, 0.44444444, 4. ], [1. , 0. , 4. ], [0. , 1. , 4. ], [0. , 1. , 4. ], [0.25 , 0.75 , 4. ], [0. , 0. , 4. ], [0.5 , 0.5 , 4. ], [0.4 , 0.6 , 4. ], [0.8 , 0.2 , 4. ], [0.33333333, 0.66666667, 4. ], [0.5 , 0.5 , 4. ], [1. , 0. , 4. ], [0.8 , 0.2 , 4. ], [1. , 0. , 4. ], [0. , 0. , 4. ], [0.75 , 0.25 , 4. ], [0.66666667, 0.33333333, 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 1. , 4. ], [1. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 1. , 4. ], [0. , 0. , 4. ], [0. , 1. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 1. , 4. ], [0. , 1. , 4. ], [1. , 0. , 4. ], [0. , 1. , 4. ], [0. , 0. , 4. ], [0. , 1. , 4. ], [0. , 1. , 4. ], [0. , 0. , 4. ], [0. , 1. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [1. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 1. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [1. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [1. , 0. , 4. ], [1. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [1. , 0. , 4. ], [0. , 0. , 4. ], [0. , 0. , 4. ], [0. , 1. , 4. ], [0. , 0. , 4. ], [1. , 0. , 5. ], [0. , 1. , 5. ], [0.5 , 0.5 , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0.8 , 0.2 , 5. ], [0.66666667, 0.33333333, 5. ], [1. , 0. , 5. ], [1. , 0. , 5. ], [0.4 , 0.6 , 5. ], [0.6 , 0.4 , 5. ], [1. , 0. , 5. ], [0.6 , 0.4 , 5. ], [0. , 0. , 5. ], [0.5 , 0.5 , 5. ], [0. , 1. , 5. ], [0.6 , 0.4 , 5. ], [0.54545455, 0.45454545, 5. ], [0. , 0. , 5. ], [0.5 , 0.5 , 5. ], [0. , 1. , 5. ], [1. , 0. , 5. ], [1. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [1. , 0. , 5. ], [0. , 0. , 5. ], [0. , 1. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [1. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0.5 , 0.5 , 5. ], [0. , 0. , 5. ], [0. , 1. , 5. ], [0. , 0. , 5. ], [1. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 1. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 1. , 5. ], [1. , 0. , 5. ], [0. , 1. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [0. , 0. , 5. ], [1. , 0. , 5. ], [1. , 0. , 5. ], [0. , 1. , 5. ]])
# standardizing the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)
# statistics of scaled data
pd.DataFrame(X).describe()
0 | 1 | 2 | |
---|---|---|---|
count | 1.510000e+02 | 1.510000e+02 | 1.510000e+02 |
mean | 4.043859e-17 | 5.881976e-17 | -1.882232e-16 |
std | 1.003328e+00 | 1.003328e+00 | 1.003328e+00 |
min | -6.226101e-01 | -5.825338e-01 | -9.803259e-01 |
25% | -6.226101e-01 | -5.825338e-01 | -9.803259e-01 |
50% | -6.226101e-01 | -5.825338e-01 | -9.803259e-01 |
75% | 6.646313e-01 | 4.104049e-01 | 1.020069e+00 |
max | 1.951873e+00 | 2.125481e+00 | 1.020069e+00 |
#elbow method
wcss = []
for i in range(1,11):
k_means = KMeans(n_clusters=i,init='k-means++', random_state=42)
k_means.fit(X)
wcss.append(k_means.inertia_)
#plot elbow curve
plt.plot(np.arange(1,11),wcss)
plt.title('The Elobw Method Graph')
plt.xlabel('Number of clusters(k)')
plt.ylabel('wcss_list')
plt.show()
k_means_optimum = KMeans(n_clusters = 4, init = 'k-means++', random_state=42)
y = k_means_optimum.fit_predict(X)
print(y)
dataset['cluster'] = y
# the above step adds extra column indicating the cluster number for each country
data1 = dataset[dataset.cluster==0]
data2 = dataset[dataset.cluster==1]
data3 = dataset[dataset.cluster==2]
data4 = dataset[dataset.cluster==3]
kplot = plt.axes(projection='3d')
xline = np.linspace(0, 15, 1000)
yline = np.linspace(0, 15, 1000)
zline = np.linspace(0, 15, 1000)
kplot.plot3D(xline, yline, zline, 'black')
# Data for three-dimensional scattered points
kplot.scatter3D(data1.commission_errors, data1.omission_errors, data1.child_age, c='blue', label = 'Cluster 1')
kplot.scatter3D(data2.commission_errors,data2.omission_errors,data2.child_age, c ='red', label = 'Cluster 2')
kplot.scatter3D(data3.commission_errors,data3.omission_errors,data3.child_age, c ='green', label = 'Cluster 3')
kplot.scatter3D(data4.commission_errors,data4.omission_errors,data4.child_age, c ='deeppink', label = 'Cluster 4')
[3 2 0 0 0 0 0 3 3 3 2 0 3 0 3 0 0 0 0 2 0 0 2 2 2 2 3 0 2 2 2 2 2 2 2 2 2 3 2 3 2 2 3 3 0 3 2 3 3 2 3 2 2 0 2 2 3 2 2 2 2 2 2 2 0 2 2 0 0 2 2 2 0 2 2 3 2 0 3 0 1 1 0 0 0 0 3 0 0 0 1 0 3 0 0 1 0 3 0 0 1 1 1 1 1 1 1 1 1 1 1 0 1 3 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 3 1 0 1 1 3 1 1 1 1 3 0 3 1 1 1 1 1 1 0 0 3]
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x2fe65160>
new_df = dataset.iloc[:, [17, 18, 19]].copy()
new_df['clusters'] = y
new_df.head()
display(new_df)
CER | OER | child_age | clusters | |
---|---|---|---|---|
0 | 0.00 | 1.00 | 4 | 3 |
1 | 0.00 | 0.00 | 4 | 2 |
2 | 0.75 | 0.25 | 4 | 0 |
3 | 1.00 | 0.00 | 4 | 0 |
4 | 1.00 | 0.00 | 4 | 0 |
... | ... | ... | ... | ... |
206 | 0.00 | 0.00 | 5 | 1 |
207 | 0.00 | 0.00 | 5 | 1 |
208 | 1.00 | 0.00 | 5 | 0 |
209 | 1.00 | 0.00 | 5 | 0 |
210 | 0.00 | 1.00 | 5 | 3 |
151 rows × 4 columns
len(new_df[new_df["clusters"] == 0])
42
cluster_0 = new_df[new_df["clusters"] == 0 ]
maxVal = cluster_0['CER'].max()
minVal = cluster_0['CER'].min()
print("CER min - ", minVal)
print("CER max - ", maxVal)
print()
maxVal = cluster_0['OER'].max()
minVal = cluster_0['OER'].min()
print("OER min - ", minVal)
print("OER max - ", maxVal)
CER min - 0.5 CER max - 1.0 OER min - 0.0 OER max - 0.5
cluster_0 = new_df[new_df["clusters"] == 0 ]
display(cluster_0)
cluster_0.boxplot(column =['CER'], grid = False)
CER | OER | child_age | clusters | |
---|---|---|---|---|
2 | 0.750000 | 0.250000 | 4 | 0 |
3 | 1.000000 | 0.000000 | 4 | 0 |
4 | 1.000000 | 0.000000 | 4 | 0 |
5 | 0.555556 | 0.444444 | 4 | 0 |
6 | 1.000000 | 0.000000 | 4 | 0 |
11 | 0.500000 | 0.500000 | 4 | 0 |
13 | 0.800000 | 0.200000 | 4 | 0 |
15 | 0.500000 | 0.500000 | 4 | 0 |
16 | 1.000000 | 0.000000 | 4 | 0 |
17 | 0.800000 | 0.200000 | 4 | 0 |
18 | 1.000000 | 0.000000 | 4 | 0 |
20 | 0.750000 | 0.250000 | 4 | 0 |
21 | 0.666667 | 0.333333 | 4 | 0 |
53 | 1.000000 | 0.000000 | 4 | 0 |
70 | 1.000000 | 0.000000 | 4 | 0 |
79 | 1.000000 | 0.000000 | 4 | 0 |
90 | 1.000000 | 0.000000 | 4 | 0 |
93 | 1.000000 | 0.000000 | 4 | 0 |
94 | 1.000000 | 0.000000 | 4 | 0 |
98 | 1.000000 | 0.000000 | 4 | 0 |
111 | 1.000000 | 0.000000 | 5 | 0 |
113 | 0.500000 | 0.500000 | 5 | 0 |
116 | 0.800000 | 0.200000 | 5 | 0 |
117 | 0.666667 | 0.333333 | 5 | 0 |
118 | 1.000000 | 0.000000 | 5 | 0 |
119 | 1.000000 | 0.000000 | 5 | 0 |
121 | 0.600000 | 0.400000 | 5 | 0 |
122 | 1.000000 | 0.000000 | 5 | 0 |
123 | 0.600000 | 0.400000 | 5 | 0 |
125 | 0.500000 | 0.500000 | 5 | 0 |
127 | 0.600000 | 0.400000 | 5 | 0 |
128 | 0.545455 | 0.454545 | 5 | 0 |
130 | 0.500000 | 0.500000 | 5 | 0 |
158 | 1.000000 | 0.000000 | 5 | 0 |
159 | 1.000000 | 0.000000 | 5 | 0 |
171 | 1.000000 | 0.000000 | 5 | 0 |
181 | 1.000000 | 0.000000 | 5 | 0 |
187 | 0.500000 | 0.500000 | 5 | 0 |
191 | 1.000000 | 0.000000 | 5 | 0 |
200 | 1.000000 | 0.000000 | 5 | 0 |
208 | 1.000000 | 0.000000 | 5 | 0 |
209 | 1.000000 | 0.000000 | 5 | 0 |
<AxesSubplot:>
cluster_0.boxplot(column =['OER'], grid = False)
<AxesSubplot:>
len(new_df[new_df["clusters"] == 1])
42
cluster_1 = new_df[new_df["clusters"] == 1 ]
maxVal = cluster_1['CER'].max()
minVal = cluster_1['CER'].min()
print("CER min - ", minVal)
print("CER max - ", maxVal)
print()
maxVal = cluster_1['OER'].max()
minVal = cluster_1['OER'].min()
print("OER min - ", minVal)
print("OER max - ", maxVal)
CER min - 0.0 CER max - 0.0 OER min - 0.0 OER max - 0.0
cluster_1 = new_df[new_df["clusters"] == 1 ]
display(cluster_1)
cluster_1.boxplot(column =['CER'], grid = False)
CER | OER | child_age | clusters | |
---|---|---|---|---|
114 | 0.0 | 0.0 | 5 | 1 |
115 | 0.0 | 0.0 | 5 | 1 |
124 | 0.0 | 0.0 | 5 | 1 |
129 | 0.0 | 0.0 | 5 | 1 |
160 | 0.0 | 0.0 | 5 | 1 |
161 | 0.0 | 0.0 | 5 | 1 |
162 | 0.0 | 0.0 | 5 | 1 |
163 | 0.0 | 0.0 | 5 | 1 |
164 | 0.0 | 0.0 | 5 | 1 |
165 | 0.0 | 0.0 | 5 | 1 |
166 | 0.0 | 0.0 | 5 | 1 |
167 | 0.0 | 0.0 | 5 | 1 |
168 | 0.0 | 0.0 | 5 | 1 |
169 | 0.0 | 0.0 | 5 | 1 |
170 | 0.0 | 0.0 | 5 | 1 |
172 | 0.0 | 0.0 | 5 | 1 |
174 | 0.0 | 0.0 | 5 | 1 |
175 | 0.0 | 0.0 | 5 | 1 |
176 | 0.0 | 0.0 | 5 | 1 |
177 | 0.0 | 0.0 | 5 | 1 |
178 | 0.0 | 0.0 | 5 | 1 |
179 | 0.0 | 0.0 | 5 | 1 |
180 | 0.0 | 0.0 | 5 | 1 |
182 | 0.0 | 0.0 | 5 | 1 |
183 | 0.0 | 0.0 | 5 | 1 |
184 | 0.0 | 0.0 | 5 | 1 |
185 | 0.0 | 0.0 | 5 | 1 |
186 | 0.0 | 0.0 | 5 | 1 |
188 | 0.0 | 0.0 | 5 | 1 |
190 | 0.0 | 0.0 | 5 | 1 |
192 | 0.0 | 0.0 | 5 | 1 |
193 | 0.0 | 0.0 | 5 | 1 |
195 | 0.0 | 0.0 | 5 | 1 |
196 | 0.0 | 0.0 | 5 | 1 |
197 | 0.0 | 0.0 | 5 | 1 |
198 | 0.0 | 0.0 | 5 | 1 |
202 | 0.0 | 0.0 | 5 | 1 |
203 | 0.0 | 0.0 | 5 | 1 |
204 | 0.0 | 0.0 | 5 | 1 |
205 | 0.0 | 0.0 | 5 | 1 |
206 | 0.0 | 0.0 | 5 | 1 |
207 | 0.0 | 0.0 | 5 | 1 |
<AxesSubplot:>
cluster_1.boxplot(column =['OER'], grid = False)
<AxesSubplot:>
len(new_df[new_df["clusters"] == 2])
40
cluster_2 = new_df[new_df["clusters"] == 2 ]
maxVal = cluster_2['CER'].max()
minVal = cluster_2['CER'].min()
print("CER min - ", minVal)
print("CER max - ", maxVal)
print()
maxVal = cluster_2['OER'].max()
minVal = cluster_2['OER'].min()
print("OER min - ", minVal)
print("OER max - ", maxVal)
CER min - 0.0 CER max - 0.0 OER min - 0.0 OER max - 0.0
cluster_2 = new_df[new_df["clusters"] == 2 ]
display(cluster_2)
cluster_2.boxplot(column =['CER'], grid = False)
CER | OER | child_age | clusters | |
---|---|---|---|---|
1 | 0.0 | 0.0 | 4 | 2 |
10 | 0.0 | 0.0 | 4 | 2 |
19 | 0.0 | 0.0 | 4 | 2 |
48 | 0.0 | 0.0 | 4 | 2 |
49 | 0.0 | 0.0 | 4 | 2 |
50 | 0.0 | 0.0 | 4 | 2 |
51 | 0.0 | 0.0 | 4 | 2 |
54 | 0.0 | 0.0 | 4 | 2 |
55 | 0.0 | 0.0 | 4 | 2 |
56 | 0.0 | 0.0 | 4 | 2 |
57 | 0.0 | 0.0 | 4 | 2 |
58 | 0.0 | 0.0 | 4 | 2 |
59 | 0.0 | 0.0 | 4 | 2 |
60 | 0.0 | 0.0 | 4 | 2 |
61 | 0.0 | 0.0 | 4 | 2 |
62 | 0.0 | 0.0 | 4 | 2 |
64 | 0.0 | 0.0 | 4 | 2 |
66 | 0.0 | 0.0 | 4 | 2 |
67 | 0.0 | 0.0 | 4 | 2 |
72 | 0.0 | 0.0 | 4 | 2 |
75 | 0.0 | 0.0 | 4 | 2 |
77 | 0.0 | 0.0 | 4 | 2 |
78 | 0.0 | 0.0 | 4 | 2 |
80 | 0.0 | 0.0 | 4 | 2 |
81 | 0.0 | 0.0 | 4 | 2 |
83 | 0.0 | 0.0 | 4 | 2 |
84 | 0.0 | 0.0 | 4 | 2 |
85 | 0.0 | 0.0 | 4 | 2 |
86 | 0.0 | 0.0 | 4 | 2 |
87 | 0.0 | 0.0 | 4 | 2 |
88 | 0.0 | 0.0 | 4 | 2 |
89 | 0.0 | 0.0 | 4 | 2 |
91 | 0.0 | 0.0 | 4 | 2 |
92 | 0.0 | 0.0 | 4 | 2 |
95 | 0.0 | 0.0 | 4 | 2 |
96 | 0.0 | 0.0 | 4 | 2 |
97 | 0.0 | 0.0 | 4 | 2 |
99 | 0.0 | 0.0 | 4 | 2 |
100 | 0.0 | 0.0 | 4 | 2 |
102 | 0.0 | 0.0 | 4 | 2 |
<AxesSubplot:>
cluster_2.boxplot(column =['OER'], grid = False)
<AxesSubplot:>
len(new_df[new_df["clusters"] == 3])
27
cluster_4 = new_df[new_df["clusters"] == 3 ]
maxVal = cluster_4['CER'].max()
minVal = cluster_4['CER'].min()
print("CER min - ", minVal)
print("CER max - ", maxVal)
print()
maxVal = cluster_4['OER'].max()
minVal = cluster_4['OER'].min()
print("OER min - ", minVal)
print("OER max - ", maxVal)
CER min - 0.0 CER max - 0.4 OER min - 0.6 OER max - 1.0
cluster_4 = new_df[new_df["clusters"] == 3 ]
display(cluster_4)
cluster_4.boxplot(column =['CER'], grid = False)
CER | OER | child_age | clusters | |
---|---|---|---|---|
0 | 0.000000 | 1.000000 | 4 | 3 |
7 | 0.000000 | 1.000000 | 4 | 3 |
8 | 0.000000 | 1.000000 | 4 | 3 |
9 | 0.250000 | 0.750000 | 4 | 3 |
12 | 0.400000 | 0.600000 | 4 | 3 |
14 | 0.333333 | 0.666667 | 4 | 3 |
52 | 0.000000 | 1.000000 | 4 | 3 |
63 | 0.000000 | 1.000000 | 4 | 3 |
65 | 0.000000 | 1.000000 | 4 | 3 |
68 | 0.000000 | 1.000000 | 4 | 3 |
69 | 0.000000 | 1.000000 | 4 | 3 |
71 | 0.000000 | 1.000000 | 4 | 3 |
73 | 0.000000 | 1.000000 | 4 | 3 |
74 | 0.000000 | 1.000000 | 4 | 3 |
76 | 0.000000 | 1.000000 | 4 | 3 |
82 | 0.000000 | 1.000000 | 4 | 3 |
101 | 0.000000 | 1.000000 | 4 | 3 |
112 | 0.000000 | 1.000000 | 5 | 3 |
120 | 0.400000 | 0.600000 | 5 | 3 |
126 | 0.000000 | 1.000000 | 5 | 3 |
157 | 0.000000 | 1.000000 | 5 | 3 |
173 | 0.000000 | 1.000000 | 5 | 3 |
189 | 0.000000 | 1.000000 | 5 | 3 |
194 | 0.000000 | 1.000000 | 5 | 3 |
199 | 0.000000 | 1.000000 | 5 | 3 |
201 | 0.000000 | 1.000000 | 5 | 3 |
210 | 0.000000 | 1.000000 | 5 | 3 |
<AxesSubplot:>
cluster_4.boxplot(column =['OER'], grid = False)
<AxesSubplot:>
from matplotlib import pyplot as plt
# Pandas dataframe
data = pd.DataFrame({"Cluster1": cluster_0['OER'], "Cluster2": cluster_1['OER'], "Cluster3": cluster_2['OER'], "Cluster4": cluster_4['OER']})
# Plot the dataframe
ax = data[['Cluster1', 'Cluster2', 'Cluster3', 'Cluster4']].plot(kind='box', title='boxplot')
# Display the plot
plt.show()
from matplotlib import pyplot as plt
# Pandas dataframe
data = pd.DataFrame({"Cluster1": cluster_0['CER'], "Cluster2": cluster_1['CER'], "Cluster3": cluster_2['CER'], "Cluster4": cluster_4['CER']})
# Plot the dataframe
ax = data[['Cluster1', 'Cluster2', 'Cluster3', 'Cluster4']].plot(kind='box', title='boxplot')
# Display the plot
plt.show()