Commit 54509182 authored by Chamodi Mandakini's avatar Chamodi Mandakini

tumor prediction

parent dbe569f4
{
"cells": [
{
"cell_type": "markdown",
"id": "d4f44a2e",
"metadata": {
"id": "d4f44a2e"
},
"source": [
"## Title - **Melanoma Tumor Size Prediction**\n",
"## Used Algorithm - **Liner Regression**\n",
"\n",
"\n",
"## Accuracy\n",
"* Accuracy on Isolation Forest : 90%\n",
"* Accuracy on Random Forest : 93%"
]
},
{
"cell_type": "markdown",
"id": "e555e9d8",
"metadata": {
"id": "e555e9d8"
},
"source": [
"### Importing the dependancies"
]
},
{
"cell_type": "code",
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Lr8KJqCxamQm",
"outputId": "3610dc7a-6a5b-489a-af1e-2f9af9f9afbc"
},
"id": "Lr8KJqCxamQm",
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from google.colab import files\n",
"files.upload()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 90
},
"id": "x73tDO5fanhd",
"outputId": "20d2a8fa-66ff-4df5-990d-de9d4c1418cf"
},
"id": "x73tDO5fanhd",
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <input type=\"file\" id=\"files-0551e200-59b6-43e1-9f54-d379d1013939\" name=\"files[]\" multiple disabled\n",
" style=\"border:none\" />\n",
" <output id=\"result-0551e200-59b6-43e1-9f54-d379d1013939\">\n",
" Upload widget is only available when the cell has been executed in the\n",
" current browser session. Please rerun this cell to enable.\n",
" </output>\n",
" <script>// Copyright 2017 Google LLC\n",
"//\n",
"// Licensed under the Apache License, Version 2.0 (the \"License\");\n",
"// you may not use this file except in compliance with the License.\n",
"// You may obtain a copy of the License at\n",
"//\n",
"// http://www.apache.org/licenses/LICENSE-2.0\n",
"//\n",
"// Unless required by applicable law or agreed to in writing, software\n",
"// distributed under the License is distributed on an \"AS IS\" BASIS,\n",
"// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
"// See the License for the specific language governing permissions and\n",
"// limitations under the License.\n",
"\n",
"/**\n",
" * @fileoverview Helpers for google.colab Python module.\n",
" */\n",
"(function(scope) {\n",
"function span(text, styleAttributes = {}) {\n",
" const element = document.createElement('span');\n",
" element.textContent = text;\n",
" for (const key of Object.keys(styleAttributes)) {\n",
" element.style[key] = styleAttributes[key];\n",
" }\n",
" return element;\n",
"}\n",
"\n",
"// Max number of bytes which will be uploaded at a time.\n",
"const MAX_PAYLOAD_SIZE = 100 * 1024;\n",
"\n",
"function _uploadFiles(inputId, outputId) {\n",
" const steps = uploadFilesStep(inputId, outputId);\n",
" const outputElement = document.getElementById(outputId);\n",
" // Cache steps on the outputElement to make it available for the next call\n",
" // to uploadFilesContinue from Python.\n",
" outputElement.steps = steps;\n",
"\n",
" return _uploadFilesContinue(outputId);\n",
"}\n",
"\n",
"// This is roughly an async generator (not supported in the browser yet),\n",
"// where there are multiple asynchronous steps and the Python side is going\n",
"// to poll for completion of each step.\n",
"// This uses a Promise to block the python side on completion of each step,\n",
"// then passes the result of the previous step as the input to the next step.\n",
"function _uploadFilesContinue(outputId) {\n",
" const outputElement = document.getElementById(outputId);\n",
" const steps = outputElement.steps;\n",
"\n",
" const next = steps.next(outputElement.lastPromiseValue);\n",
" return Promise.resolve(next.value.promise).then((value) => {\n",
" // Cache the last promise value to make it available to the next\n",
" // step of the generator.\n",
" outputElement.lastPromiseValue = value;\n",
" return next.value.response;\n",
" });\n",
"}\n",
"\n",
"/**\n",
" * Generator function which is called between each async step of the upload\n",
" * process.\n",
" * @param {string} inputId Element ID of the input file picker element.\n",
" * @param {string} outputId Element ID of the output display.\n",
" * @return {!Iterable<!Object>} Iterable of next steps.\n",
" */\n",
"function* uploadFilesStep(inputId, outputId) {\n",
" const inputElement = document.getElementById(inputId);\n",
" inputElement.disabled = false;\n",
"\n",
" const outputElement = document.getElementById(outputId);\n",
" outputElement.innerHTML = '';\n",
"\n",
" const pickedPromise = new Promise((resolve) => {\n",
" inputElement.addEventListener('change', (e) => {\n",
" resolve(e.target.files);\n",
" });\n",
" });\n",
"\n",
" const cancel = document.createElement('button');\n",
" inputElement.parentElement.appendChild(cancel);\n",
" cancel.textContent = 'Cancel upload';\n",
" const cancelPromise = new Promise((resolve) => {\n",
" cancel.onclick = () => {\n",
" resolve(null);\n",
" };\n",
" });\n",
"\n",
" // Wait for the user to pick the files.\n",
" const files = yield {\n",
" promise: Promise.race([pickedPromise, cancelPromise]),\n",
" response: {\n",
" action: 'starting',\n",
" }\n",
" };\n",
"\n",
" cancel.remove();\n",
"\n",
" // Disable the input element since further picks are not allowed.\n",
" inputElement.disabled = true;\n",
"\n",
" if (!files) {\n",
" return {\n",
" response: {\n",
" action: 'complete',\n",
" }\n",
" };\n",
" }\n",
"\n",
" for (const file of files) {\n",
" const li = document.createElement('li');\n",
" li.append(span(file.name, {fontWeight: 'bold'}));\n",
" li.append(span(\n",
" `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n",
" `last modified: ${\n",
" file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n",
" 'n/a'} - `));\n",
" const percent = span('0% done');\n",
" li.appendChild(percent);\n",
"\n",
" outputElement.appendChild(li);\n",
"\n",
" const fileDataPromise = new Promise((resolve) => {\n",
" const reader = new FileReader();\n",
" reader.onload = (e) => {\n",
" resolve(e.target.result);\n",
" };\n",
" reader.readAsArrayBuffer(file);\n",
" });\n",
" // Wait for the data to be ready.\n",
" let fileData = yield {\n",
" promise: fileDataPromise,\n",
" response: {\n",
" action: 'continue',\n",
" }\n",
" };\n",
"\n",
" // Use a chunked sending to avoid message size limits. See b/62115660.\n",
" let position = 0;\n",
" do {\n",
" const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n",
" const chunk = new Uint8Array(fileData, position, length);\n",
" position += length;\n",
"\n",
" const base64 = btoa(String.fromCharCode.apply(null, chunk));\n",
" yield {\n",
" response: {\n",
" action: 'append',\n",
" file: file.name,\n",
" data: base64,\n",
" },\n",
" };\n",
"\n",
" let percentDone = fileData.byteLength === 0 ?\n",
" 100 :\n",
" Math.round((position / fileData.byteLength) * 100);\n",
" percent.textContent = `${percentDone}% done`;\n",
"\n",
" } while (position < fileData.byteLength);\n",
" }\n",
"\n",
" // All done.\n",
" yield {\n",
" response: {\n",
" action: 'complete',\n",
" }\n",
" };\n",
"}\n",
"\n",
"scope.google = scope.google || {};\n",
"scope.google.colab = scope.google.colab || {};\n",
"scope.google.colab._files = {\n",
" _uploadFiles,\n",
" _uploadFilesContinue,\n",
"};\n",
"})(self);\n",
"</script> "
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Saving kaggle.json to kaggle (1).json\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'kaggle.json': b'{\"username\":\"mishaneperera\",\"key\":\"fb1e9a2fcad19f92a1d3ebd6e8c8165f\"}'}"
]
},
"metadata": {},
"execution_count": 13
}
]
},
{
"cell_type": "code",
"source": [
"!mkdir -p ~/.kaggle\n",
"!cp kaggle.json ~/.kaggle/"
],
"metadata": {
"id": "g-QxxXqKap_8"
},
"id": "g-QxxXqKap_8",
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!chmod 600 ~/.kaggle/kaggle.json"
],
"metadata": {
"id": "HSge_gifar5N"
},
"id": "HSge_gifar5N",
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"\n",
"!unzip '/content/drive/MyDrive/Skin tumor/machine-hack-melanoma-tumor-size-prediction.zip'"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "u1IPVt6ZavPc",
"outputId": "64d364dd-ce34-47dc-c74c-778cadcff92a"
},
"id": "u1IPVt6ZavPc",
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Archive: /content/drive/MyDrive/Skin tumor/machine-hack-melanoma-tumor-size-prediction.zip\n",
"replace sample_submission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y\n",
" inflating: sample_submission.csv \n",
"replace test.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y\n",
" inflating: test.csv \n",
"replace train.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y\n",
" inflating: train.csv \n"
]
}
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1123a658",
"metadata": {
"id": "1123a658"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n"
]
},
{
"cell_type": "markdown",
"id": "4e1ec49f",
"metadata": {
"id": "4e1ec49f"
},
"source": [
"### Data collecting process"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "111d33c0",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "111d33c0",
"outputId": "a8a28ca2-0714-4c35-b92d-26ff08273e7e"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" mass_npea size_npear malign_ratio damage_size exposed_area \\\n",
"0 6930.90 2919.02 0.42116 51.8298 9.888294e+05 \n",
"1 15635.70 4879.36 0.31206 223.5500 2.058426e+06 \n",
"2 10376.20 2613.88 0.25191 127.3370 1.434676e+06 \n",
"3 13093.80 4510.06 0.34444 155.4400 1.812195e+06 \n",
"4 7545.21 2882.36 0.38201 85.1237 1.043918e+06 \n",
"\n",
" std_dev_malign err_malign malign_penalty damage_ratio tumor_size \n",
"0 109.487 2758.76 72 39.3620 14.103 \n",
"1 248.881 5952.53 240 22.0253 2.648 \n",
"2 160.093 4635.26 73 29.9963 1.688 \n",
"3 173.015 5273.87 32 28.1354 3.796 \n",
"4 124.414 3263.35 57 35.0200 18.023 "
],
"text/html": [
"\n",
" <div id=\"df-e80b92a4-4456-4112-bb21-72dcd092350c\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mass_npea</th>\n",
" <th>size_npear</th>\n",
" <th>malign_ratio</th>\n",
" <th>damage_size</th>\n",
" <th>exposed_area</th>\n",
" <th>std_dev_malign</th>\n",
" <th>err_malign</th>\n",
" <th>malign_penalty</th>\n",
" <th>damage_ratio</th>\n",
" <th>tumor_size</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>6930.90</td>\n",
" <td>2919.02</td>\n",
" <td>0.42116</td>\n",
" <td>51.8298</td>\n",
" <td>9.888294e+05</td>\n",
" <td>109.487</td>\n",
" <td>2758.76</td>\n",
" <td>72</td>\n",
" <td>39.3620</td>\n",
" <td>14.103</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>15635.70</td>\n",
" <td>4879.36</td>\n",
" <td>0.31206</td>\n",
" <td>223.5500</td>\n",
" <td>2.058426e+06</td>\n",
" <td>248.881</td>\n",
" <td>5952.53</td>\n",
" <td>240</td>\n",
" <td>22.0253</td>\n",
" <td>2.648</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10376.20</td>\n",
" <td>2613.88</td>\n",
" <td>0.25191</td>\n",
" <td>127.3370</td>\n",
" <td>1.434676e+06</td>\n",
" <td>160.093</td>\n",
" <td>4635.26</td>\n",
" <td>73</td>\n",
" <td>29.9963</td>\n",
" <td>1.688</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13093.80</td>\n",
" <td>4510.06</td>\n",
" <td>0.34444</td>\n",
" <td>155.4400</td>\n",
" <td>1.812195e+06</td>\n",
" <td>173.015</td>\n",
" <td>5273.87</td>\n",
" <td>32</td>\n",
" <td>28.1354</td>\n",
" <td>3.796</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7545.21</td>\n",
" <td>2882.36</td>\n",
" <td>0.38201</td>\n",
" <td>85.1237</td>\n",
" <td>1.043918e+06</td>\n",
" <td>124.414</td>\n",
" <td>3263.35</td>\n",
" <td>57</td>\n",
" <td>35.0200</td>\n",
" <td>18.023</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-e80b92a4-4456-4112-bb21-72dcd092350c')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-e80b92a4-4456-4112-bb21-72dcd092350c button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-e80b92a4-4456-4112-bb21-72dcd092350c');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 50
}
],
"source": [
"dataset = pd.read_csv('/content/train.csv')\n",
"dataset.head()"
]
},
{
"cell_type": "markdown",
"id": "5da5cf75",
"metadata": {
"id": "5da5cf75"
},
"source": [
"### Explore the dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "740f14ff",
"metadata": {
"id": "740f14ff"
},
"outputs": [],
"source": [
"X = dataset.iloc[:,:-1].values #independent variable array\n",
"y = dataset.iloc[:,1].values #dependent variable vector"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7c8d2381",
"metadata": {
"id": "7c8d2381"
},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=1/3,random_state=0)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e535ee6f",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 75
},
"id": "e535ee6f",
"outputId": "8b64e058-1606-4284-82dd-de77f9a70f70"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"LinearRegression()"
],
"text/html": [
"<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LinearRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div>"
]
},
"metadata": {},
"execution_count": 54
}
],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"regressor = LinearRegression()\n",
"regressor.fit(X_train,y_train) #actually produces the linear eqn for the data\n",
"\n"
]
},
{
"cell_type": "code",
"source": [
"accuracy=regressor.score(X_train,y_train)\n",
"print(accuracy)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5BGEhbFxpjxB",
"outputId": "47340ca9-a7ad-484a-9dc7-61cace4b5c4b"
},
"id": "5BGEhbFxpjxB",
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1.0\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d21cf5f8",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "d21cf5f8",
"outputId": "19c58cf8-d471-46fa-8380-6827494e6042"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([3564.2 , 2081.8 , 3567.24, ..., 2344.44, 4253.25, 2881.7 ])"
]
},
"metadata": {},
"execution_count": 55
}
],
"source": [
"y_pred = regressor.predict(X_test) \n",
"y_pred"
]
},
{
"cell_type": "markdown",
"id": "f0e0e01a",
"metadata": {
"id": "f0e0e01a"
},
"source": [
"### Checking the distribution of target variable\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf5c2e23",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 853
},
"id": "cf5c2e23",
"outputId": "21f84128-1406-40a8-cec7-31ad93d9474b"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 1200x900 with 2 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
],
"source": [
"import seaborn as sns\n",
"# Correlation matrix\n",
"corrmat = dataset.corr()\n",
"fig = plt.figure(figsize = (12, 9))\n",
"\n",
"sns.heatmap(corrmat, vmax = .8, square = True)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "38ffa726",
"metadata": {
"id": "38ffa726"
},
"source": [
"### Splitting the features and target"
]
},
{
"cell_type": "markdown",
"id": "5df8ea16",
"metadata": {
"id": "5df8ea16"
},
"source": [
"### Saving the model"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "02faf4e6",
"metadata": {
"id": "02faf4e6"
},
"outputs": [],
"source": [
"import pickle\n",
"filename = 'finalized_model_skinT_prediction_RF.sav'\n",
"pickle.dump(regressor, open(filename, 'wb'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "896159c4",
"metadata": {
"id": "896159c4"
},
"outputs": [],
"source": [
"with open('finalized_model_skinT_prediction_RF.sav', 'rb') as f:\n",
" model = pickle.load(f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "03d78684",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "03d78684",
"outputId": "39dc528d-b12e-4716-e42b-40879e4f7698"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[4510.06]\n"
]
}
],
"source": [
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"#input_data1 = (70,1.02,0,0,1,44,1.2,145,5,14.8,8406,4.71,0) #0\n",
"input_data2 = (13093.8,4510.06,0.34444,155.44,1812195.1833,173.015,5273.87,32,28.1354) #1\n",
"\n",
"#change input data to numpy array\n",
"input_data_as_numpy_array = np.asarray(input_data2)\n",
"\n",
"#reshape the numpy array as we are predicting for only on instance\n",
"\n",
"input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
"\n",
"prediction = model.predict(input_data_reshaped)\n",
"print(prediction)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "41a76901",
"metadata": {
"id": "41a76901"
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
},
"vscode": {
"interpreter": {
"hash": "d9a6414fa631c028c434667d182c0b79dc634ffcd06f52fc304061a2c0b9ef26"
}
},
"colab": {
"provenance": [],
"collapsed_sections": [
"e8dd7a8f",
"5821ec3c"
]
}
},
"nbformat": 4,
"nbformat_minor": 5
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment