In [2]:
# importing libraries    
import numpy as np   
import matplotlib.pyplot as plt    
import pandas as pd    
from sklearn.cluster import KMeans  
In [3]:
# Importing the dataset  
dataset = pd.read_csv('All.csv')  
dataset.drop(dataset.index[dataset['game'] == 'Focused'], inplace = True)
dataset.drop(dataset.index[dataset['game'] == 'Sustained'], inplace = True)
display(dataset)
# statistics of the data
dataset.describe()
id child_gender sequence_of_responses sequence_of_stimuli colour order_of_selection sequence_of_sides no_of_clicks total_correct_responses correct_responses commission_errors omission_errors child_age mean_reaction_time total_duration diagnosis percentage_no_of_correct_responses game CER OER
0 1 2 [M, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... red star, red crab, white bunny, pink pig, bro... NaN NaN right, right, left, left, left, left, right, l... NaN 19 18 0 1 4 1479 57000 Yes 94.736842 Alternating 0.00 1.00
1 2 1 [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... red star, red crab, white bunny, red fish, blu... NaN NaN right, right, left, right, right, left, left, ... NaN 19 19 0 0 4 1605 57000 No 100.000000 Alternating 0.00 0.00
2 3 2 [M, C, C, C, W, C, C, C, C, W, C, W, C, C, C, ... red star, white bunny, pink pig, brown dog, re... NaN NaN right, left, left, left, right, left, left, le... NaN 19 18 3 1 4 1404 57000 No 94.736842 Alternating 0.75 0.25
3 4 2 [C, C, C, W, C, C, C, C, W, C, C, C, C, C, W, ... white bunny, pink pig, brown dog, red star, pa... NaN NaN left, left, left, right, left, left, right, le... NaN 19 19 4 0 4 1782 57000 No 100.000000 Alternating 1.00 0.00
4 5 2 [C, C, C, W, C, C, C, W, C, W, C, W, C, C, C, ... red star, red crab, white bunny, red fish, blu... NaN NaN right, right, left, right, right, right, left,... NaN 19 19 6 0 4 1258 57000 No 100.000000 Alternating 1.00 0.00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
206 207 2 0 [C, C, C, C, C, C, C, C, C, C] bee yellow yellow_bee, yellow_bee, yellow_bee, yellow_bee... 10 6 6 0 0 5 0 25719 No 100.000000 Selective 0.00 0.00
207 208 2 0 [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... bird green yellow_bee, yellow_bee, yellow_bee, yellow_bee... 17 6 6 0 0 5 0 50461 No 100.000000 Selective 0.00 0.00
208 209 2 0 [C, C, C, C, C, C, C, C, C, C, C, C, W] butterfly blue yellow_bee, yellow_bee, yellow_bee, yellow_bee... 13 7 7 1 0 5 0 19886 No 100.000000 Selective 1.00 0.00
209 210 2 0 [C, W, C, C, C, W, C, C, C, C, C, W, W, W] flower pink yellow_bee, yellow_bee, yellow_bee, yellow_bee... 14 6 6 5 0 5 0 42199 No 100.000000 Selective 1.00 0.00
210 211 2 1 [C, C, C] butterfly blue blue_buterfly, blue_buterfly, blue_buterfly, 3 6 3 0 3 5 0 10381 No 50.000000 Selective 0.00 1.00

151 rows × 20 columns

Out[3]:
id child_gender total_correct_responses correct_responses commission_errors omission_errors child_age mean_reaction_time total_duration percentage_no_of_correct_responses CER OER
count 151.000000 151.000000 151.000000 151.000000 151.000000 151.000000 151.000000 151.000000 151.000000 151.000000 151.000000 151.000000
mean 111.430464 1.589404 8.635762 7.927152 0.933775 0.708609 4.490066 347.854305 26339.344371 91.950074 0.241839 0.215115
std 63.645216 0.493579 4.388597 4.291230 1.871430 1.458718 0.501565 573.407155 22521.621230 17.416068 0.389720 0.370503
min 1.000000 1.000000 6.000000 1.000000 0.000000 0.000000 4.000000 0.000000 3000.000000 12.500000 0.000000 0.000000
25% 64.500000 1.000000 6.000000 6.000000 0.000000 0.000000 4.000000 0.000000 10479.000000 88.486842 0.000000 0.000000
50% 102.000000 2.000000 7.000000 6.000000 0.000000 0.000000 4.000000 0.000000 13415.000000 100.000000 0.000000 0.000000
75% 173.500000 2.000000 8.000000 8.000000 1.000000 1.000000 5.000000 952.500000 57000.000000 100.000000 0.500000 0.366667
max 211.000000 2.000000 19.000000 19.000000 11.000000 7.000000 5.000000 1782.000000 70000.000000 100.000000 1.000000 1.000000
In [4]:
cols = list(dataset.columns.values)
cols.pop(cols.index('CER'))
dataset = dataset[cols+['CER']]

cols = list(dataset.columns.values)
cols.pop(cols.index('OER'))
dataset = dataset[cols+['OER']]

cols = list(dataset.columns.values)
cols.pop(cols.index('child_age'))
dataset = dataset[cols+['child_age']]

dataset.head()
Out[4]:
id child_gender sequence_of_responses sequence_of_stimuli colour order_of_selection sequence_of_sides no_of_clicks total_correct_responses correct_responses commission_errors omission_errors mean_reaction_time total_duration diagnosis percentage_no_of_correct_responses game CER OER child_age
0 1 2 [M, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... red star, red crab, white bunny, pink pig, bro... NaN NaN right, right, left, left, left, left, right, l... NaN 19 18 0 1 1479 57000 Yes 94.736842 Alternating 0.00 1.00 4
1 2 1 [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... red star, red crab, white bunny, red fish, blu... NaN NaN right, right, left, right, right, left, left, ... NaN 19 19 0 0 1605 57000 No 100.000000 Alternating 0.00 0.00 4
2 3 2 [M, C, C, C, W, C, C, C, C, W, C, W, C, C, C, ... red star, white bunny, pink pig, brown dog, re... NaN NaN right, left, left, left, right, left, left, le... NaN 19 18 3 1 1404 57000 No 94.736842 Alternating 0.75 0.25 4
3 4 2 [C, C, C, W, C, C, C, C, W, C, C, C, C, C, W, ... white bunny, pink pig, brown dog, red star, pa... NaN NaN left, left, left, right, left, left, right, le... NaN 19 19 4 0 1782 57000 No 100.000000 Alternating 1.00 0.00 4
4 5 2 [C, C, C, W, C, C, C, W, C, W, C, W, C, C, C, ... red star, red crab, white bunny, red fish, blu... NaN NaN right, right, left, right, right, right, left,... NaN 19 19 6 0 1258 57000 No 100.000000 Alternating 1.00 0.00 4
In [5]:
X = dataset.iloc[:,17:20].values
display(X)
array([[0.        , 1.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.75      , 0.25      , 4.        ],
       [1.        , 0.        , 4.        ],
       [1.        , 0.        , 4.        ],
       [0.55555556, 0.44444444, 4.        ],
       [1.        , 0.        , 4.        ],
       [0.        , 1.        , 4.        ],
       [0.        , 1.        , 4.        ],
       [0.25      , 0.75      , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.5       , 0.5       , 4.        ],
       [0.4       , 0.6       , 4.        ],
       [0.8       , 0.2       , 4.        ],
       [0.33333333, 0.66666667, 4.        ],
       [0.5       , 0.5       , 4.        ],
       [1.        , 0.        , 4.        ],
       [0.8       , 0.2       , 4.        ],
       [1.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.75      , 0.25      , 4.        ],
       [0.66666667, 0.33333333, 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 1.        , 4.        ],
       [1.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 1.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 1.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 1.        , 4.        ],
       [0.        , 1.        , 4.        ],
       [1.        , 0.        , 4.        ],
       [0.        , 1.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 1.        , 4.        ],
       [0.        , 1.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 1.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [1.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 1.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [1.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [1.        , 0.        , 4.        ],
       [1.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [1.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [0.        , 1.        , 4.        ],
       [0.        , 0.        , 4.        ],
       [1.        , 0.        , 5.        ],
       [0.        , 1.        , 5.        ],
       [0.5       , 0.5       , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.8       , 0.2       , 5.        ],
       [0.66666667, 0.33333333, 5.        ],
       [1.        , 0.        , 5.        ],
       [1.        , 0.        , 5.        ],
       [0.4       , 0.6       , 5.        ],
       [0.6       , 0.4       , 5.        ],
       [1.        , 0.        , 5.        ],
       [0.6       , 0.4       , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.5       , 0.5       , 5.        ],
       [0.        , 1.        , 5.        ],
       [0.6       , 0.4       , 5.        ],
       [0.54545455, 0.45454545, 5.        ],
       [0.        , 0.        , 5.        ],
       [0.5       , 0.5       , 5.        ],
       [0.        , 1.        , 5.        ],
       [1.        , 0.        , 5.        ],
       [1.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [1.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 1.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [1.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.5       , 0.5       , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 1.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [1.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 1.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 1.        , 5.        ],
       [1.        , 0.        , 5.        ],
       [0.        , 1.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [0.        , 0.        , 5.        ],
       [1.        , 0.        , 5.        ],
       [1.        , 0.        , 5.        ],
       [0.        , 1.        , 5.        ]])
In [6]:
# standardizing the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

# statistics of scaled data
pd.DataFrame(X).describe()
Out[6]:
0 1 2
count 1.510000e+02 1.510000e+02 1.510000e+02
mean 4.043859e-17 5.881976e-17 -1.882232e-16
std 1.003328e+00 1.003328e+00 1.003328e+00
min -6.226101e-01 -5.825338e-01 -9.803259e-01
25% -6.226101e-01 -5.825338e-01 -9.803259e-01
50% -6.226101e-01 -5.825338e-01 -9.803259e-01
75% 6.646313e-01 4.104049e-01 1.020069e+00
max 1.951873e+00 2.125481e+00 1.020069e+00
In [7]:
#elbow method
wcss = []
for i in range(1,11):
    k_means = KMeans(n_clusters=i,init='k-means++', random_state=42)
    k_means.fit(X)
    wcss.append(k_means.inertia_)
    
#plot elbow curve
plt.plot(np.arange(1,11),wcss)
plt.title('The Elobw Method Graph')  
plt.xlabel('Number of clusters(k)')  
plt.ylabel('wcss_list')  
plt.show()
In [8]:
k_means_optimum = KMeans(n_clusters = 4, init = 'k-means++',  random_state=42)
y = k_means_optimum.fit_predict(X)
print(y)

dataset['cluster'] = y  
# the above step adds extra column indicating the cluster number for each country

data1 = dataset[dataset.cluster==0]
data2 = dataset[dataset.cluster==1]
data3 = dataset[dataset.cluster==2]
data4 = dataset[dataset.cluster==3]

kplot = plt.axes(projection='3d')
xline = np.linspace(0, 15, 1000)
yline = np.linspace(0, 15, 1000)
zline = np.linspace(0, 15, 1000)
kplot.plot3D(xline, yline, zline, 'black')
# Data for three-dimensional scattered points
kplot.scatter3D(data1.commission_errors, data1.omission_errors, data1.child_age, c='blue', label = 'Cluster 1')
kplot.scatter3D(data2.commission_errors,data2.omission_errors,data2.child_age, c ='red', label = 'Cluster 2')
kplot.scatter3D(data3.commission_errors,data3.omission_errors,data3.child_age, c ='green', label = 'Cluster 3')
kplot.scatter3D(data4.commission_errors,data4.omission_errors,data4.child_age, c ='deeppink', label = 'Cluster 4')
[3 2 0 0 0 0 0 3 3 3 2 0 3 0 3 0 0 0 0 2 0 0 2 2 2 2 3 0 2 2 2 2 2 2 2 2 2
 3 2 3 2 2 3 3 0 3 2 3 3 2 3 2 2 0 2 2 3 2 2 2 2 2 2 2 0 2 2 0 0 2 2 2 0 2
 2 3 2 0 3 0 1 1 0 0 0 0 3 0 0 0 1 0 3 0 0 1 0 3 0 0 1 1 1 1 1 1 1 1 1 1 1
 0 1 3 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 3 1 0 1 1 3 1 1 1 1 3 0 3 1 1 1 1 1 1
 0 0 3]
Out[8]:
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x2fe65160>
In [9]:
new_df = dataset.iloc[:, [17, 18, 19]].copy()
new_df['clusters'] = y
new_df.head()
display(new_df)
CER OER child_age clusters
0 0.00 1.00 4 3
1 0.00 0.00 4 2
2 0.75 0.25 4 0
3 1.00 0.00 4 0
4 1.00 0.00 4 0
... ... ... ... ...
206 0.00 0.00 5 1
207 0.00 0.00 5 1
208 1.00 0.00 5 0
209 1.00 0.00 5 0
210 0.00 1.00 5 3

151 rows × 4 columns

Cluster Analysis¶

Cluster 1¶

In [10]:
len(new_df[new_df["clusters"] == 0])
Out[10]:
42
In [11]:
cluster_0 = new_df[new_df["clusters"] == 0 ]

maxVal = cluster_0['CER'].max()
minVal = cluster_0['CER'].min()

print("CER min - ", minVal)
print("CER max - ", maxVal)
print()

maxVal = cluster_0['OER'].max()
minVal = cluster_0['OER'].min()

print("OER min - ", minVal)
print("OER max - ", maxVal)
CER min -  0.5
CER max -  1.0

OER min -  0.0
OER max -  0.5
In [12]:
cluster_0 = new_df[new_df["clusters"] == 0 ]
display(cluster_0)
cluster_0.boxplot(column =['CER'], grid = False)
CER OER child_age clusters
2 0.750000 0.250000 4 0
3 1.000000 0.000000 4 0
4 1.000000 0.000000 4 0
5 0.555556 0.444444 4 0
6 1.000000 0.000000 4 0
11 0.500000 0.500000 4 0
13 0.800000 0.200000 4 0
15 0.500000 0.500000 4 0
16 1.000000 0.000000 4 0
17 0.800000 0.200000 4 0
18 1.000000 0.000000 4 0
20 0.750000 0.250000 4 0
21 0.666667 0.333333 4 0
53 1.000000 0.000000 4 0
70 1.000000 0.000000 4 0
79 1.000000 0.000000 4 0
90 1.000000 0.000000 4 0
93 1.000000 0.000000 4 0
94 1.000000 0.000000 4 0
98 1.000000 0.000000 4 0
111 1.000000 0.000000 5 0
113 0.500000 0.500000 5 0
116 0.800000 0.200000 5 0
117 0.666667 0.333333 5 0
118 1.000000 0.000000 5 0
119 1.000000 0.000000 5 0
121 0.600000 0.400000 5 0
122 1.000000 0.000000 5 0
123 0.600000 0.400000 5 0
125 0.500000 0.500000 5 0
127 0.600000 0.400000 5 0
128 0.545455 0.454545 5 0
130 0.500000 0.500000 5 0
158 1.000000 0.000000 5 0
159 1.000000 0.000000 5 0
171 1.000000 0.000000 5 0
181 1.000000 0.000000 5 0
187 0.500000 0.500000 5 0
191 1.000000 0.000000 5 0
200 1.000000 0.000000 5 0
208 1.000000 0.000000 5 0
209 1.000000 0.000000 5 0
Out[12]:
<AxesSubplot:>
In [13]:
cluster_0.boxplot(column =['OER'], grid = False)
Out[13]:
<AxesSubplot:>

Cluster 2¶

In [14]:
len(new_df[new_df["clusters"] == 1])
Out[14]:
42
In [15]:
cluster_1 = new_df[new_df["clusters"] == 1 ]

maxVal = cluster_1['CER'].max()
minVal = cluster_1['CER'].min()

print("CER min - ", minVal)
print("CER max - ", maxVal)
print()

maxVal = cluster_1['OER'].max()
minVal = cluster_1['OER'].min()

print("OER min - ", minVal)
print("OER max - ", maxVal)
CER min -  0.0
CER max -  0.0

OER min -  0.0
OER max -  0.0
In [16]:
cluster_1 = new_df[new_df["clusters"] == 1 ]
display(cluster_1)
cluster_1.boxplot(column =['CER'], grid = False)
CER OER child_age clusters
114 0.0 0.0 5 1
115 0.0 0.0 5 1
124 0.0 0.0 5 1
129 0.0 0.0 5 1
160 0.0 0.0 5 1
161 0.0 0.0 5 1
162 0.0 0.0 5 1
163 0.0 0.0 5 1
164 0.0 0.0 5 1
165 0.0 0.0 5 1
166 0.0 0.0 5 1
167 0.0 0.0 5 1
168 0.0 0.0 5 1
169 0.0 0.0 5 1
170 0.0 0.0 5 1
172 0.0 0.0 5 1
174 0.0 0.0 5 1
175 0.0 0.0 5 1
176 0.0 0.0 5 1
177 0.0 0.0 5 1
178 0.0 0.0 5 1
179 0.0 0.0 5 1
180 0.0 0.0 5 1
182 0.0 0.0 5 1
183 0.0 0.0 5 1
184 0.0 0.0 5 1
185 0.0 0.0 5 1
186 0.0 0.0 5 1
188 0.0 0.0 5 1
190 0.0 0.0 5 1
192 0.0 0.0 5 1
193 0.0 0.0 5 1
195 0.0 0.0 5 1
196 0.0 0.0 5 1
197 0.0 0.0 5 1
198 0.0 0.0 5 1
202 0.0 0.0 5 1
203 0.0 0.0 5 1
204 0.0 0.0 5 1
205 0.0 0.0 5 1
206 0.0 0.0 5 1
207 0.0 0.0 5 1
Out[16]:
<AxesSubplot:>
In [17]:
cluster_1.boxplot(column =['OER'], grid = False)
Out[17]:
<AxesSubplot:>

Cluster 3¶

In [18]:
len(new_df[new_df["clusters"] == 2])
Out[18]:
40
In [19]:
cluster_2 = new_df[new_df["clusters"] == 2 ]

maxVal = cluster_2['CER'].max()
minVal = cluster_2['CER'].min()

print("CER min - ", minVal)
print("CER max - ", maxVal)
print()

maxVal = cluster_2['OER'].max()
minVal = cluster_2['OER'].min()

print("OER min - ", minVal)
print("OER max - ", maxVal)
CER min -  0.0
CER max -  0.0

OER min -  0.0
OER max -  0.0
In [20]:
cluster_2 = new_df[new_df["clusters"] == 2 ]
display(cluster_2)
cluster_2.boxplot(column =['CER'], grid = False)
CER OER child_age clusters
1 0.0 0.0 4 2
10 0.0 0.0 4 2
19 0.0 0.0 4 2
48 0.0 0.0 4 2
49 0.0 0.0 4 2
50 0.0 0.0 4 2
51 0.0 0.0 4 2
54 0.0 0.0 4 2
55 0.0 0.0 4 2
56 0.0 0.0 4 2
57 0.0 0.0 4 2
58 0.0 0.0 4 2
59 0.0 0.0 4 2
60 0.0 0.0 4 2
61 0.0 0.0 4 2
62 0.0 0.0 4 2
64 0.0 0.0 4 2
66 0.0 0.0 4 2
67 0.0 0.0 4 2
72 0.0 0.0 4 2
75 0.0 0.0 4 2
77 0.0 0.0 4 2
78 0.0 0.0 4 2
80 0.0 0.0 4 2
81 0.0 0.0 4 2
83 0.0 0.0 4 2
84 0.0 0.0 4 2
85 0.0 0.0 4 2
86 0.0 0.0 4 2
87 0.0 0.0 4 2
88 0.0 0.0 4 2
89 0.0 0.0 4 2
91 0.0 0.0 4 2
92 0.0 0.0 4 2
95 0.0 0.0 4 2
96 0.0 0.0 4 2
97 0.0 0.0 4 2
99 0.0 0.0 4 2
100 0.0 0.0 4 2
102 0.0 0.0 4 2
Out[20]:
<AxesSubplot:>
In [21]:
cluster_2.boxplot(column =['OER'], grid = False)
Out[21]:
<AxesSubplot:>

Cluster 4¶

In [22]:
len(new_df[new_df["clusters"] == 3])
Out[22]:
27
In [23]:
cluster_4 = new_df[new_df["clusters"] == 3 ]

maxVal = cluster_4['CER'].max()
minVal = cluster_4['CER'].min()

print("CER min - ", minVal)
print("CER max - ", maxVal)
print()

maxVal = cluster_4['OER'].max()
minVal = cluster_4['OER'].min()

print("OER min - ", minVal)
print("OER max - ", maxVal)
CER min -  0.0
CER max -  0.4

OER min -  0.6
OER max -  1.0
In [24]:
cluster_4 = new_df[new_df["clusters"] == 3 ]
display(cluster_4)
cluster_4.boxplot(column =['CER'], grid = False)
CER OER child_age clusters
0 0.000000 1.000000 4 3
7 0.000000 1.000000 4 3
8 0.000000 1.000000 4 3
9 0.250000 0.750000 4 3
12 0.400000 0.600000 4 3
14 0.333333 0.666667 4 3
52 0.000000 1.000000 4 3
63 0.000000 1.000000 4 3
65 0.000000 1.000000 4 3
68 0.000000 1.000000 4 3
69 0.000000 1.000000 4 3
71 0.000000 1.000000 4 3
73 0.000000 1.000000 4 3
74 0.000000 1.000000 4 3
76 0.000000 1.000000 4 3
82 0.000000 1.000000 4 3
101 0.000000 1.000000 4 3
112 0.000000 1.000000 5 3
120 0.400000 0.600000 5 3
126 0.000000 1.000000 5 3
157 0.000000 1.000000 5 3
173 0.000000 1.000000 5 3
189 0.000000 1.000000 5 3
194 0.000000 1.000000 5 3
199 0.000000 1.000000 5 3
201 0.000000 1.000000 5 3
210 0.000000 1.000000 5 3
Out[24]:
<AxesSubplot:>
In [25]:
cluster_4.boxplot(column =['OER'], grid = False)
Out[25]:
<AxesSubplot:>
In [26]:
from matplotlib import pyplot as plt

# Pandas dataframe
data = pd.DataFrame({"Cluster1": cluster_0['OER'], "Cluster2": cluster_1['OER'], "Cluster3": cluster_2['OER'], "Cluster4": cluster_4['OER']})

# Plot the dataframe
ax = data[['Cluster1', 'Cluster2', 'Cluster3', 'Cluster4']].plot(kind='box', title='boxplot')

# Display the plot
plt.show()
In [27]:
from matplotlib import pyplot as plt

# Pandas dataframe
data = pd.DataFrame({"Cluster1": cluster_0['CER'], "Cluster2": cluster_1['CER'], "Cluster3": cluster_2['CER'], "Cluster4": cluster_4['CER']})

# Plot the dataframe
ax = data[['Cluster1', 'Cluster2', 'Cluster3', 'Cluster4']].plot(kind='box', title='boxplot')

# Display the plot
plt.show()