# importing libraries
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
from sklearn.cluster import DBSCAN
from numpy import unique
from numpy import where
from matplotlib import pyplot
# Importing the dataset
dataset = pd.read_csv('A4.csv')
dataset.drop(dataset.index[dataset['game'] == 'Focused'], inplace = True)
dataset.drop(dataset.index[dataset['game'] == 'Sustained'], inplace = True)
display(dataset)
# statistics of the data
dataset.describe()
id | child_gender | child_age | sequence_of_responses | sequence_of_stimuli | colour | order_of_selection | sequence_of_sides | no_of_clicks | total_correct_responses | correct_responses | commission_errors | omission_errors | mean_reaction_time | total_duration | diagnosis | percentage_no_of_correct_responses | game | CER | OER | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2 | 4 | [M, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | red star, red crab, white bunny, pink pig, bro... | NaN | NaN | right, right, left, left, left, left, right, l... | NaN | 19 | 18 | 0 | 1 | 1479 | 57000 | Yes | 94.736842 | Alternating | 0.00 | 1.00 |
1 | 2 | 1 | 4 | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | red star, red crab, white bunny, red fish, blu... | NaN | NaN | right, right, left, right, right, left, left, ... | NaN | 19 | 19 | 0 | 0 | 1605 | 57000 | No | 100.000000 | Alternating | 0.00 | 0.00 |
2 | 3 | 2 | 4 | [M, C, C, C, W, C, C, C, C, W, C, W, C, C, C, ... | red star, white bunny, pink pig, brown dog, re... | NaN | NaN | right, left, left, left, right, left, left, le... | NaN | 19 | 18 | 3 | 1 | 1404 | 57000 | No | 94.736842 | Alternating | 0.75 | 0.25 |
3 | 4 | 2 | 4 | [C, C, C, W, C, C, C, C, W, C, C, C, C, C, W, ... | white bunny, pink pig, brown dog, red star, pa... | NaN | NaN | left, left, left, right, left, left, right, le... | NaN | 19 | 19 | 4 | 0 | 1782 | 57000 | No | 100.000000 | Alternating | 1.00 | 0.00 |
4 | 5 | 2 | 4 | [C, C, C, W, C, C, C, W, C, W, C, W, C, C, C, ... | red star, red crab, white bunny, red fish, blu... | NaN | NaN | right, right, left, right, right, right, left,... | NaN | 19 | 19 | 6 | 0 | 1258 | 57000 | No | 100.000000 | Alternating | 1.00 | 0.00 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
98 | 99 | 1 | 4 | [W, W, C, C, C, C, W, W, C, C, C] | ladybird | red | red_ladybird, red_ladybird, red_ladybird, red_... | NaN | 11.0 | 6 | 6 | 4 | 0 | 0 | 22375 | No | 100.000000 | Selective | 1.00 | 0.00 |
99 | 100 | 1 | 4 | [C, C, C, C, C, C] | ladybird | red | red_ladybird, red_ladybird, red_ladybird, red_... | NaN | 6.0 | 6 | 6 | 0 | 0 | 0 | 11325 | No | 100.000000 | Selective | 0.00 | 0.00 |
100 | 101 | 1 | 4 | [C, C, C, C, C, C] | butterfly | blue | red_ladybird, red_ladybird, red_ladybird, red_... | NaN | 6.0 | 6 | 6 | 0 | 0 | 0 | 14820 | No | 100.000000 | Selective | 0.00 | 0.00 |
101 | 102 | 1 | 4 | [C, C, C, C, C, C, C] | bird | green | red_ladybird, red_ladybird, red_ladybird, red_... | NaN | 7.0 | 8 | 7 | 0 | 1 | 0 | 16869 | No | 87.500000 | Selective | 0.00 | 1.00 |
102 | 103 | 1 | 4 | [C, C, C, C, C, C] | bird | green | red_ladybird, red_ladybird, red_ladybird, red_... | NaN | 6.0 | 6 | 6 | 0 | 0 | 0 | 14130 | No | 100.000000 | Selective | 0.00 | 0.00 |
77 rows × 20 columns
id | child_gender | child_age | no_of_clicks | total_correct_responses | correct_responses | commission_errors | omission_errors | mean_reaction_time | total_duration | percentage_no_of_correct_responses | CER | OER | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 77.000000 | 77.00000 | 77.0 | 55.000000 | 77.000000 | 77.000000 | 77.000000 | 77.000000 | 77.000000 | 77.000000 | 77.000000 | 77.000000 | 77.000000 |
mean | 57.571429 | 1.38961 | 4.0 | 6.781818 | 8.688312 | 7.909091 | 0.896104 | 0.779221 | 373.298701 | 27235.090909 | 91.246786 | 0.237734 | 0.242785 |
std | 32.468782 | 0.49086 | 0.0 | 2.087803 | 4.347594 | 4.240102 | 1.602501 | 1.518512 | 605.455267 | 23804.601968 | 18.103695 | 0.391351 | 0.394733 |
min | 1.000000 | 1.00000 | 4.0 | 1.000000 | 6.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 3000.000000 | 12.500000 | 0.000000 | 0.000000 |
25% | 20.000000 | 1.00000 | 4.0 | 6.000000 | 6.000000 | 6.000000 | 0.000000 | 0.000000 | 0.000000 | 10816.000000 | 87.500000 | 0.000000 | 0.000000 |
50% | 65.000000 | 1.00000 | 4.0 | 6.000000 | 7.000000 | 6.000000 | 0.000000 | 0.000000 | 0.000000 | 14130.000000 | 100.000000 | 0.000000 | 0.000000 |
75% | 84.000000 | 2.00000 | 4.0 | 7.000000 | 8.000000 | 8.000000 | 2.000000 | 1.000000 | 1043.000000 | 57000.000000 | 100.000000 | 0.500000 | 0.444444 |
max | 103.000000 | 2.00000 | 4.0 | 13.000000 | 19.000000 | 19.000000 | 6.000000 | 7.000000 | 1782.000000 | 70000.000000 | 100.000000 | 1.000000 | 1.000000 |
# extracting only 11-comission & 12-omission
x = dataset.iloc[:, [18, 19]].values
display(x)
array([[0. , 1. ], [0. , 0. ], [0.75 , 0.25 ], [1. , 0. ], [1. , 0. ], [0.55555556, 0.44444444], [1. , 0. ], [0. , 1. ], [0. , 1. ], [0.25 , 0.75 ], [0. , 0. ], [0.5 , 0.5 ], [0.4 , 0.6 ], [0.8 , 0.2 ], [0.33333333, 0.66666667], [0.5 , 0.5 ], [1. , 0. ], [0.8 , 0.2 ], [1. , 0. ], [0. , 0. ], [0.75 , 0.25 ], [0.66666667, 0.33333333], [0. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 1. ], [1. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 1. ], [0. , 0. ], [0. , 1. ], [0. , 0. ], [0. , 0. ], [0. , 1. ], [0. , 1. ], [1. , 0. ], [0. , 1. ], [0. , 0. ], [0. , 1. ], [0. , 1. ], [0. , 0. ], [0. , 1. ], [0. , 0. ], [0. , 0. ], [1. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 1. ], [0. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 0. ], [1. , 0. ], [0. , 0. ], [0. , 0. ], [1. , 0. ], [1. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 0. ], [1. , 0. ], [0. , 0. ], [0. , 0. ], [0. , 1. ], [0. , 0. ]])
# standardizing the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x = scaler.fit_transform(x)
# statistics of scaled data
pd.DataFrame(x).describe()
0 | 1 | |
---|---|---|
count | 7.700000e+01 | 7.700000e+01 |
mean | -6.920871e-17 | 6.776686e-17 |
std | 1.006557e+00 | 1.006557e+00 |
min | -6.114545e-01 | -6.190950e-01 |
25% | -6.114545e-01 | -6.190950e-01 |
50% | -6.114545e-01 | -6.190950e-01 |
75% | 6.745484e-01 | 5.142260e-01 |
max | 1.960551e+00 | 1.930877e+00 |
# Finding the optimal number of clusters using the elbow method
from sklearn.cluster import KMeans
wcss_list= [] #Initializing the list for the values of WCSS
#Using for loop for iterations from 1 to 10.
for i in range(1, 11):
kmeans = KMeans(n_clusters=i, init='k-means++', random_state= 42)
kmeans.fit(x)
wcss_list.append(kmeans.inertia_)
mtp.plot(range(1, 11), wcss_list)
mtp.title('The Elobw Method Graph')
mtp.xlabel('Number of clusters(k)')
mtp.ylabel('wcss_list')
mtp.show()
#training the K-means model on a dataset
kmeans = KMeans(n_clusters=3, init='k-means++', random_state= 42)
y_predict= kmeans.fit_predict(x)
print(y_predict)
#visulaizing the clusters
mtp.scatter(x[y_predict == 0, 0], x[y_predict == 0, 1], s = 100, c = 'blue', label = 'Cluster 1') #for first cluster
mtp.scatter(x[y_predict == 1, 0], x[y_predict == 1, 1], s = 100, c = 'green', label = 'Cluster 2') #for second cluster
mtp.scatter(x[y_predict== 2, 0], x[y_predict == 2, 1], s = 100, c = 'red', label = 'Cluster 3') #for third cluster
mtp.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroid')
mtp.title('Clusters of children')
mtp.xlabel('Commission Errors')
mtp.ylabel('Omission Errors')
mtp.legend()
mtp.show()
[1 0 2 2 2 2 2 1 1 1 0 2 1 2 1 2 2 2 2 0 2 2 0 0 0 0 1 2 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 1 2 1 0 1 1 0 1 0 0 2 0 0 1 0 0 0 0 0 0 0 2 0 0 2 2 0 0 0 2 0 0 1 0]
new_df = dataset.iloc[:, [18, 19]].copy()
new_df['clusters'] = y_predict
new_df.head()
display(new_df)
CER | OER | clusters | |
---|---|---|---|
0 | 0.00 | 1.00 | 1 |
1 | 0.00 | 0.00 | 0 |
2 | 0.75 | 0.25 | 2 |
3 | 1.00 | 0.00 | 2 |
4 | 1.00 | 0.00 | 2 |
... | ... | ... | ... |
98 | 1.00 | 0.00 | 2 |
99 | 0.00 | 0.00 | 0 |
100 | 0.00 | 0.00 | 0 |
101 | 0.00 | 1.00 | 1 |
102 | 0.00 | 0.00 | 0 |
77 rows × 3 columns
len(new_df[new_df["clusters"] == 0])
40
cluster_0 = new_df[new_df["clusters"] == 0 ]
maxVal = cluster_0['CER'].max()
minVal = cluster_0['CER'].min()
print("CER min - ", minVal)
print("CER max - ", maxVal)
print()
maxVal = cluster_0['OER'].max()
minVal = cluster_0['OER'].min()
print("OER min - ", minVal)
print("OER max - ", maxVal)
CER min - 0.0 CER max - 0.0 OER min - 0.0 OER max - 0.0
cluster_0 = new_df[new_df["clusters"] == 0 ]
display(cluster_0)
cluster_0.boxplot(column =['CER'], grid = False)
CER | OER | clusters | |
---|---|---|---|
1 | 0.0 | 0.0 | 0 |
10 | 0.0 | 0.0 | 0 |
19 | 0.0 | 0.0 | 0 |
48 | 0.0 | 0.0 | 0 |
49 | 0.0 | 0.0 | 0 |
50 | 0.0 | 0.0 | 0 |
51 | 0.0 | 0.0 | 0 |
54 | 0.0 | 0.0 | 0 |
55 | 0.0 | 0.0 | 0 |
56 | 0.0 | 0.0 | 0 |
57 | 0.0 | 0.0 | 0 |
58 | 0.0 | 0.0 | 0 |
59 | 0.0 | 0.0 | 0 |
60 | 0.0 | 0.0 | 0 |
61 | 0.0 | 0.0 | 0 |
62 | 0.0 | 0.0 | 0 |
64 | 0.0 | 0.0 | 0 |
66 | 0.0 | 0.0 | 0 |
67 | 0.0 | 0.0 | 0 |
72 | 0.0 | 0.0 | 0 |
75 | 0.0 | 0.0 | 0 |
77 | 0.0 | 0.0 | 0 |
78 | 0.0 | 0.0 | 0 |
80 | 0.0 | 0.0 | 0 |
81 | 0.0 | 0.0 | 0 |
83 | 0.0 | 0.0 | 0 |
84 | 0.0 | 0.0 | 0 |
85 | 0.0 | 0.0 | 0 |
86 | 0.0 | 0.0 | 0 |
87 | 0.0 | 0.0 | 0 |
88 | 0.0 | 0.0 | 0 |
89 | 0.0 | 0.0 | 0 |
91 | 0.0 | 0.0 | 0 |
92 | 0.0 | 0.0 | 0 |
95 | 0.0 | 0.0 | 0 |
96 | 0.0 | 0.0 | 0 |
97 | 0.0 | 0.0 | 0 |
99 | 0.0 | 0.0 | 0 |
100 | 0.0 | 0.0 | 0 |
102 | 0.0 | 0.0 | 0 |
<AxesSubplot:>
cluster_0.boxplot(column =['OER'], grid = False)
<AxesSubplot:>
len(new_df[new_df["clusters"] == 1])
17
cluster_1 = new_df[new_df["clusters"] == 1 ]
maxVal = cluster_1['CER'].max()
minVal = cluster_1['CER'].min()
print("CER min - ", minVal)
print("CER max - ", maxVal)
print()
maxVal = cluster_1['OER'].max()
minVal = cluster_1['OER'].min()
print("OER min - ", minVal)
print("OER max - ", maxVal)
CER min - 0.0 CER max - 0.4 OER min - 0.6 OER max - 1.0
cluster_1 = new_df[new_df["clusters"] == 1 ]
display(cluster_1)
cluster_1.boxplot(column =['CER'], grid = False)
CER | OER | clusters | |
---|---|---|---|
0 | 0.000000 | 1.000000 | 1 |
7 | 0.000000 | 1.000000 | 1 |
8 | 0.000000 | 1.000000 | 1 |
9 | 0.250000 | 0.750000 | 1 |
12 | 0.400000 | 0.600000 | 1 |
14 | 0.333333 | 0.666667 | 1 |
52 | 0.000000 | 1.000000 | 1 |
63 | 0.000000 | 1.000000 | 1 |
65 | 0.000000 | 1.000000 | 1 |
68 | 0.000000 | 1.000000 | 1 |
69 | 0.000000 | 1.000000 | 1 |
71 | 0.000000 | 1.000000 | 1 |
73 | 0.000000 | 1.000000 | 1 |
74 | 0.000000 | 1.000000 | 1 |
76 | 0.000000 | 1.000000 | 1 |
82 | 0.000000 | 1.000000 | 1 |
101 | 0.000000 | 1.000000 | 1 |
<AxesSubplot:>
cluster_1.boxplot(column =['OER'], grid = False)
<AxesSubplot:>
len(new_df[new_df["clusters"] == 2])
20
cluster_2 = new_df[new_df["clusters"] == 2 ]
maxVal = cluster_2['CER'].max()
minVal = cluster_2['CER'].min()
print("CER min - ", minVal)
print("CER max - ", maxVal)
print()
maxVal = cluster_2['OER'].max()
minVal = cluster_2['OER'].min()
print("OER min - ", minVal)
print("OER max - ", maxVal)
CER min - 0.5 CER max - 1.0 OER min - 0.0 OER max - 0.5
cluster_2 = new_df[new_df["clusters"] == 2 ]
display(cluster_2)
cluster_2.boxplot(column =['CER'], grid = False)
CER | OER | clusters | |
---|---|---|---|
2 | 0.750000 | 0.250000 | 2 |
3 | 1.000000 | 0.000000 | 2 |
4 | 1.000000 | 0.000000 | 2 |
5 | 0.555556 | 0.444444 | 2 |
6 | 1.000000 | 0.000000 | 2 |
11 | 0.500000 | 0.500000 | 2 |
13 | 0.800000 | 0.200000 | 2 |
15 | 0.500000 | 0.500000 | 2 |
16 | 1.000000 | 0.000000 | 2 |
17 | 0.800000 | 0.200000 | 2 |
18 | 1.000000 | 0.000000 | 2 |
20 | 0.750000 | 0.250000 | 2 |
21 | 0.666667 | 0.333333 | 2 |
53 | 1.000000 | 0.000000 | 2 |
70 | 1.000000 | 0.000000 | 2 |
79 | 1.000000 | 0.000000 | 2 |
90 | 1.000000 | 0.000000 | 2 |
93 | 1.000000 | 0.000000 | 2 |
94 | 1.000000 | 0.000000 | 2 |
98 | 1.000000 | 0.000000 | 2 |
<AxesSubplot:>
cluster_2.boxplot(column =['OER'], grid = False)
<AxesSubplot:>
from matplotlib import pyplot as plt
# Pandas dataframe
data = pd.DataFrame({"Cluster1": cluster_0['CER'], "Cluster2": cluster_1['CER'], "Cluster3": cluster_2['CER']})
# Plot the dataframe
ax = data[['Cluster1', 'Cluster2', 'Cluster3']].plot(kind='box', title='boxplot')
# Display the plot
plt.show()
from matplotlib import pyplot as plt
# Pandas dataframe
data = pd.DataFrame({"Cluster1": cluster_0['OER'], "Cluster2": cluster_1['OER'], "Cluster3": cluster_2['OER']})
# Plot the dataframe
ax = data[['Cluster1', 'Cluster2', 'Cluster3']].plot(kind='box', title='boxplot')
# Display the plot
plt.show()