Commit 135e2f32 authored by Chamodi Yapa's avatar Chamodi Yapa

ML Model

parent 3d883784
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ML Model Development - Random Forest"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Import Necessary Modules"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, explained_variance_score\n",
"import matplotlib.pyplot as plt\n",
"import joblib"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"# Load the final dataset\n",
"df = pd.read_csv(\"../DataPreprocessing/final_game_dataset.csv\")\n",
"\n",
"# Load the scalers\n",
"scaler_X = joblib.load(\"./fun1/scaler_X.pkl\")\n",
"scaler_y = joblib.load(\"./fun1/scaler_y.pkl\")"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"# Separate X and y\n",
"X = df.drop(columns=[\"improvement_score\"])\n",
"y = df[\"improvement_score\"].ravel()\n",
"\n",
"# Scale the dataset\n",
"X_scaled = scaler_X.transform(X)\n",
"y_scaled = scaler_y.transform(y.reshape(-1, 1))"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"# Split the dataset into a training and testing set\n",
"X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-container-id-4 {color: black;}#sk-container-id-4 pre{padding: 0;}#sk-container-id-4 div.sk-toggleable {background-color: white;}#sk-container-id-4 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-4 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-4 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-4 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-4 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-4 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-4 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-4 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-4 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-4 div.sk-item {position: relative;z-index: 1;}#sk-container-id-4 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-4 div.sk-item::before, #sk-container-id-4 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-4 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-4 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-4 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-4 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-4 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-4 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-4 div.sk-label-container {text-align: center;}#sk-container-id-4 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-4 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestRegressor(random_state=42)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" checked><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestRegressor</label><div class=\"sk-toggleable__content\"><pre>RandomForestRegressor(random_state=42)</pre></div></div></div></div></div>"
],
"text/plain": [
"RandomForestRegressor(random_state=42)"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create and train a Random Forest Regressor model\n",
"rf_model = RandomForestRegressor(n_estimators=100, random_state=42)\n",
"rf_model.fit(X_train, y_train.ravel())"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Make predictions on the test set\n",
"y_pred = rf_model.predict(X_test)\n",
"\n",
"# Inverse transform to get back the original scale\n",
"y_pred_original_scale = scaler_y.inverse_transform(y_pred.reshape(-1, 1))\n",
"y_test_original_scale = scaler_y.inverse_transform(y_test.reshape(-1, 1))\n",
"\n",
"# Plot the predicted vs. actual improvement scores\n",
"plt.scatter(y_test_original_scale, y_pred_original_scale)\n",
"plt.xlabel(\"Actual Improvement Score\")\n",
"plt.ylabel(\"Predicted Improvement Score\")\n",
"plt.title(\"Actual vs. Predicted Improvement Score\")\n",
"plt.show()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Calculate Evaluation Metrics"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training R2 Score: 1.00\n",
"Testing R2 Score: 1.00\n",
"Mean Squared Error: 0.00\n",
"Mean Absolute Error (MAE): 0.00\n",
"Root Mean Squared Error (RMSE): 0.01\n",
"Explained Variance Score: 1.00\n"
]
}
],
"source": [
"# Calculate evaluation metrics\n",
"mse = mean_squared_error(y_test_original_scale, y_pred_original_scale)\n",
"r2 = r2_score(y_test_original_scale, y_pred_original_scale)\n",
"\n",
"# Training accuracy\n",
"y_train_pred = rf_model.predict(X_train)\n",
"train_r2 = r2_score(y_train, y_train_pred)\n",
"\n",
"# Testing accuracy\n",
"test_r2 = r2_score(y_test, y_pred)\n",
"\n",
"print(f\"Training R2 Score: {train_r2:.2f}\")\n",
"print(f\"Testing R2 Score: {test_r2:.2f}\")\n",
"\n",
"\n",
"# Calculate additional evaluation metrics\n",
"mae = mean_absolute_error(y_test_original_scale, y_pred_original_scale)\n",
"rmse = mean_squared_error(y_test_original_scale, y_pred_original_scale, squared=False)\n",
"explained_variance = explained_variance_score(y_test_original_scale, y_pred_original_scale)\n",
"\n",
"# Print evaluation metrics\n",
"print(f\"Mean Squared Error: {mse:.2f}\")\n",
"print(f\"Mean Absolute Error (MAE): {mae:.2f}\")\n",
"print(f\"Root Mean Squared Error (RMSE): {rmse:.2f}\")\n",
"print(f\"Explained Variance Score: {explained_variance:.2f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Cross-Validation"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cross-Validation MSE Scores: [9.57677453e-05 9.10708808e-05 1.02907478e-04 1.10507850e-04\n",
" 1.18874434e-04]\n",
"Average MSE: 0.00010382567759524451\n"
]
}
],
"source": [
"from sklearn.model_selection import cross_val_score\n",
"\n",
"# Perform k-fold cross-validation\n",
"cross_val_scores = cross_val_score(rf_model, X_scaled, y_scaled.ravel(), cv=5, scoring='neg_mean_squared_error')\n",
"cross_val_mse = -cross_val_scores # Convert negative MSE to positive\n",
"\n",
"# Print the cross-validation MSE scores\n",
"print(\"Cross-Validation MSE Scores:\", cross_val_mse)\n",
"print(\"Average MSE:\", cross_val_mse.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Saving the Model"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model saved as 'rf_model.pkl'.\n"
]
}
],
"source": [
"import joblib\n",
"\n",
"# Save the trained model to a file\n",
"joblib.dump(rf_model, \"./fun1/rf_model.pkl\")\n",
"\n",
"print(\"Model saved as 'rf_model.pkl'.\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### The metrics and cross-validation MSE scores as percentages "
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training R2 Score: 100.00%\n",
"Testing R2 Score: 99.99%\n",
"Mean Squared Error: 0.02%\n",
"Mean Absolute Error (MAE): 0.40%\n",
"Root Mean Squared Error (RMSE): 1.24%\n",
"Cross-Validation MSE Scores: [0.00957677 0.00910709 0.01029075 0.01105079 0.01188744]\n",
"Average MSE: 0.01%\n"
]
}
],
"source": [
"# Convert R2 scores to percentages\n",
"train_r2_percent = train_r2 * 100\n",
"test_r2_percent = test_r2 * 100\n",
"\n",
"# Convert MSE, MAE, and RMSE to percentages\n",
"mse_percent = mse * 100\n",
"mae_percent = mae * 100\n",
"rmse_percent = rmse * 100\n",
"\n",
"# Convert cross-validation MSE scores to percentages\n",
"cross_val_mse_percent = cross_val_mse * 100\n",
"average_mse_percent = cross_val_mse.mean() * 100\n",
"\n",
"# Print the metrics\n",
"print(f\"Training R2 Score: {train_r2_percent:.2f}%\")\n",
"print(f\"Testing R2 Score: {test_r2_percent:.2f}%\")\n",
"print(f\"Mean Squared Error: {mse_percent:.2f}%\")\n",
"print(f\"Mean Absolute Error (MAE): {mae_percent:.2f}%\")\n",
"print(f\"Root Mean Squared Error (RMSE): {rmse_percent:.2f}%\")\n",
"print(\"Cross-Validation MSE Scores:\", cross_val_mse_percent)\n",
"print(f\"Average MSE: {average_mse_percent:.2f}%\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "learn-joy",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment