Commit 47673403 authored by W.P.S.M wickramage's avatar W.P.S.M wickramage

Upload New File

parent ce96f4bb
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('BusTravelData.csv')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Time</th>\n",
" <th>Day</th>\n",
" <th>Special</th>\n",
" <th>Congestion</th>\n",
" <th>Drving Speed</th>\n",
" <th>Stops</th>\n",
" <th>Weather</th>\n",
" <th>Distance</th>\n",
" <th>Travel Time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1/1/2019</td>\n",
" <td>6</td>\n",
" <td>Sunday</td>\n",
" <td>No</td>\n",
" <td>5</td>\n",
" <td>40</td>\n",
" <td>14</td>\n",
" <td>25</td>\n",
" <td>14.7</td>\n",
" <td>56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1/2/2019</td>\n",
" <td>7</td>\n",
" <td>Sunday</td>\n",
" <td>No</td>\n",
" <td>8</td>\n",
" <td>40</td>\n",
" <td>14</td>\n",
" <td>27</td>\n",
" <td>14.7</td>\n",
" <td>55</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1/3/2019</td>\n",
" <td>8</td>\n",
" <td>Sunday</td>\n",
" <td>No</td>\n",
" <td>9</td>\n",
" <td>40</td>\n",
" <td>14</td>\n",
" <td>28</td>\n",
" <td>14.7</td>\n",
" <td>55</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1/4/2019</td>\n",
" <td>9</td>\n",
" <td>Sunday</td>\n",
" <td>No</td>\n",
" <td>7</td>\n",
" <td>40</td>\n",
" <td>14</td>\n",
" <td>29</td>\n",
" <td>14.7</td>\n",
" <td>56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1/5/2019</td>\n",
" <td>10</td>\n",
" <td>Sunday</td>\n",
" <td>No</td>\n",
" <td>6</td>\n",
" <td>40</td>\n",
" <td>14</td>\n",
" <td>29</td>\n",
" <td>14.7</td>\n",
" <td>56</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Time Day Special Congestion Drving Speed Stops Weather \\\n",
"0 1/1/2019 6 Sunday No 5 40 14 25 \n",
"1 1/2/2019 7 Sunday No 8 40 14 27 \n",
"2 1/3/2019 8 Sunday No 9 40 14 28 \n",
"3 1/4/2019 9 Sunday No 7 40 14 29 \n",
"4 1/5/2019 10 Sunday No 6 40 14 29 \n",
"\n",
" Distance Travel Time \n",
"0 14.7 56 \n",
"1 14.7 55 \n",
"2 14.7 55 \n",
"3 14.7 56 \n",
"4 14.7 56 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Time</th>\n",
" <th>Day</th>\n",
" <th>Special</th>\n",
" <th>Congestion</th>\n",
" <th>Drving Speed</th>\n",
" <th>Stops</th>\n",
" <th>Weather</th>\n",
" <th>Distance</th>\n",
" <th>Travel Time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>595</th>\n",
" <td>8/18/2020</td>\n",
" <td>6</td>\n",
" <td>Monday</td>\n",
" <td>No</td>\n",
" <td>9</td>\n",
" <td>40</td>\n",
" <td>12</td>\n",
" <td>25</td>\n",
" <td>12.1</td>\n",
" <td>49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>596</th>\n",
" <td>8/19/2020</td>\n",
" <td>6</td>\n",
" <td>Monday</td>\n",
" <td>No</td>\n",
" <td>3</td>\n",
" <td>40</td>\n",
" <td>12</td>\n",
" <td>25</td>\n",
" <td>12.1</td>\n",
" <td>40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>597</th>\n",
" <td>8/20/2020</td>\n",
" <td>12</td>\n",
" <td>Monday</td>\n",
" <td>No</td>\n",
" <td>5</td>\n",
" <td>40</td>\n",
" <td>12</td>\n",
" <td>25</td>\n",
" <td>12.1</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598</th>\n",
" <td>8/21/2020</td>\n",
" <td>12</td>\n",
" <td>Monday</td>\n",
" <td>No</td>\n",
" <td>9</td>\n",
" <td>40</td>\n",
" <td>12</td>\n",
" <td>25</td>\n",
" <td>12.1</td>\n",
" <td>49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>599</th>\n",
" <td>8/22/2020</td>\n",
" <td>12</td>\n",
" <td>Monday</td>\n",
" <td>No</td>\n",
" <td>3</td>\n",
" <td>40</td>\n",
" <td>12</td>\n",
" <td>25</td>\n",
" <td>12.1</td>\n",
" <td>40</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Time Day Special Congestion Drving Speed Stops \\\n",
"595 8/18/2020 6 Monday No 9 40 12 \n",
"596 8/19/2020 6 Monday No 3 40 12 \n",
"597 8/20/2020 12 Monday No 5 40 12 \n",
"598 8/21/2020 12 Monday No 9 40 12 \n",
"599 8/22/2020 12 Monday No 3 40 12 \n",
"\n",
" Weather Distance Travel Time \n",
"595 25 12.1 49 \n",
"596 25 12.1 40 \n",
"597 25 12.1 43 \n",
"598 25 12.1 49 \n",
"599 25 12.1 40 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.tail()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(600, 10)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of rows 600\n",
"Number of columns 10\n"
]
}
],
"source": [
"print(\"Number of rows\",data.shape[0])\n",
"print(\"Number of columns\",data.shape[1])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 600 entries, 0 to 599\n",
"Data columns (total 10 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Date 600 non-null object \n",
" 1 Time 600 non-null int64 \n",
" 2 Day 600 non-null object \n",
" 3 Special 600 non-null object \n",
" 4 Congestion 600 non-null int64 \n",
" 5 Drving Speed 600 non-null int64 \n",
" 6 Stops 600 non-null int64 \n",
" 7 Weather 600 non-null int64 \n",
" 8 Distance 600 non-null float64\n",
" 9 Travel Time 600 non-null int64 \n",
"dtypes: float64(1), int64(6), object(3)\n",
"memory usage: 47.0+ KB\n"
]
}
],
"source": [
"data.info()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Time</th>\n",
" <th>Congestion</th>\n",
" <th>Drving Speed</th>\n",
" <th>Stops</th>\n",
" <th>Weather</th>\n",
" <th>Distance</th>\n",
" <th>Travel Time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>600.000000</td>\n",
" <td>600.000000</td>\n",
" <td>600.0</td>\n",
" <td>600.000000</td>\n",
" <td>600.000000</td>\n",
" <td>600.00000</td>\n",
" <td>600.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>12.546667</td>\n",
" <td>6.253333</td>\n",
" <td>40.0</td>\n",
" <td>12.000000</td>\n",
" <td>27.000000</td>\n",
" <td>12.40400</td>\n",
" <td>49.786667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>5.184524</td>\n",
" <td>2.035369</td>\n",
" <td>0.0</td>\n",
" <td>4.033251</td>\n",
" <td>1.961227</td>\n",
" <td>4.05381</td>\n",
" <td>17.968470</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>6.000000</td>\n",
" <td>2.000000</td>\n",
" <td>40.0</td>\n",
" <td>0.000000</td>\n",
" <td>25.000000</td>\n",
" <td>1.20000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.000000</td>\n",
" <td>5.000000</td>\n",
" <td>40.0</td>\n",
" <td>12.000000</td>\n",
" <td>25.000000</td>\n",
" <td>12.10000</td>\n",
" <td>43.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>12.000000</td>\n",
" <td>6.000000</td>\n",
" <td>40.0</td>\n",
" <td>14.000000</td>\n",
" <td>27.000000</td>\n",
" <td>14.70000</td>\n",
" <td>56.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>17.000000</td>\n",
" <td>8.000000</td>\n",
" <td>40.0</td>\n",
" <td>14.000000</td>\n",
" <td>29.000000</td>\n",
" <td>14.70000</td>\n",
" <td>56.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>22.000000</td>\n",
" <td>9.000000</td>\n",
" <td>40.0</td>\n",
" <td>14.000000</td>\n",
" <td>30.000000</td>\n",
" <td>14.70000</td>\n",
" <td>75.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Time Congestion Drving Speed Stops Weather \\\n",
"count 600.000000 600.000000 600.0 600.000000 600.000000 \n",
"mean 12.546667 6.253333 40.0 12.000000 27.000000 \n",
"std 5.184524 2.035369 0.0 4.033251 1.961227 \n",
"min 6.000000 2.000000 40.0 0.000000 25.000000 \n",
"25% 7.000000 5.000000 40.0 12.000000 25.000000 \n",
"50% 12.000000 6.000000 40.0 14.000000 27.000000 \n",
"75% 17.000000 8.000000 40.0 14.000000 29.000000 \n",
"max 22.000000 9.000000 40.0 14.000000 30.000000 \n",
"\n",
" Distance Travel Time \n",
"count 600.00000 600.000000 \n",
"mean 12.40400 49.786667 \n",
"std 4.05381 17.968470 \n",
"min 1.20000 3.000000 \n",
"25% 12.10000 43.000000 \n",
"50% 14.70000 56.000000 \n",
"75% 14.70000 56.000000 \n",
"max 14.70000 75.000000 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.describe()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Sunday', 'Monday', 'Tuesday'], dtype=object)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Day'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0\n",
"1 0\n",
"2 0\n",
"3 0\n",
"4 0\n",
" ..\n",
"595 1\n",
"596 1\n",
"597 1\n",
"598 1\n",
"599 1\n",
"Name: Day, Length: 600, dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Day'].map({'Sunday':0,'Monday':1,'Tuesday':2})"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"data['Day'] = data['Day'].map({'Sunday':0,'Monday':1,'Tuesday':2})"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Time</th>\n",
" <th>Day</th>\n",
" <th>Special</th>\n",
" <th>Congestion</th>\n",
" <th>Drving Speed</th>\n",
" <th>Stops</th>\n",
" <th>Weather</th>\n",
" <th>Distance</th>\n",
" <th>Travel Time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1/1/2019</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>5</td>\n",
" <td>40</td>\n",
" <td>14</td>\n",
" <td>25</td>\n",
" <td>14.7</td>\n",
" <td>56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1/2/2019</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>8</td>\n",
" <td>40</td>\n",
" <td>14</td>\n",
" <td>27</td>\n",
" <td>14.7</td>\n",
" <td>55</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1/3/2019</td>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>9</td>\n",
" <td>40</td>\n",
" <td>14</td>\n",
" <td>28</td>\n",
" <td>14.7</td>\n",
" <td>55</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1/4/2019</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>7</td>\n",
" <td>40</td>\n",
" <td>14</td>\n",
" <td>29</td>\n",
" <td>14.7</td>\n",
" <td>56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1/5/2019</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>6</td>\n",
" <td>40</td>\n",
" <td>14</td>\n",
" <td>29</td>\n",
" <td>14.7</td>\n",
" <td>56</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Time Day Special Congestion Drving Speed Stops Weather \\\n",
"0 1/1/2019 6 0 No 5 40 14 25 \n",
"1 1/2/2019 7 0 No 8 40 14 27 \n",
"2 1/3/2019 8 0 No 9 40 14 28 \n",
"3 1/4/2019 9 0 No 7 40 14 29 \n",
"4 1/5/2019 10 0 No 6 40 14 29 \n",
"\n",
" Distance Travel Time \n",
"0 14.7 56 \n",
"1 14.7 55 \n",
"2 14.7 55 \n",
"3 14.7 56 \n",
"4 14.7 56 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"data['Special'] = data['Special'].map({'No':0,'Yes':1})"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Time</th>\n",
" <th>Day</th>\n",
" <th>Special</th>\n",
" <th>Congestion</th>\n",
" <th>Drving Speed</th>\n",
" <th>Stops</th>\n",
" <th>Weather</th>\n",
" <th>Distance</th>\n",
" <th>Travel Time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>595</th>\n",
" <td>8/18/2020</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" <td>40</td>\n",
" <td>12</td>\n",
" <td>25</td>\n",
" <td>12.1</td>\n",
" <td>49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>596</th>\n",
" <td>8/19/2020</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>40</td>\n",
" <td>12</td>\n",
" <td>25</td>\n",
" <td>12.1</td>\n",
" <td>40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>597</th>\n",
" <td>8/20/2020</td>\n",
" <td>12</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>40</td>\n",
" <td>12</td>\n",
" <td>25</td>\n",
" <td>12.1</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598</th>\n",
" <td>8/21/2020</td>\n",
" <td>12</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" <td>40</td>\n",
" <td>12</td>\n",
" <td>25</td>\n",
" <td>12.1</td>\n",
" <td>49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>599</th>\n",
" <td>8/22/2020</td>\n",
" <td>12</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>40</td>\n",
" <td>12</td>\n",
" <td>25</td>\n",
" <td>12.1</td>\n",
" <td>40</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Time Day Special Congestion Drving Speed Stops Weather \\\n",
"595 8/18/2020 6 1 0 9 40 12 25 \n",
"596 8/19/2020 6 1 0 3 40 12 25 \n",
"597 8/20/2020 12 1 0 5 40 12 25 \n",
"598 8/21/2020 12 1 0 9 40 12 25 \n",
"599 8/22/2020 12 1 0 3 40 12 25 \n",
"\n",
" Distance Travel Time \n",
"595 12.1 49 \n",
"596 12.1 40 \n",
"597 12.1 43 \n",
"598 12.1 49 \n",
"599 12.1 40 "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.tail()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([25, 27, 28, 29, 30, 26], dtype=int64)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Weather'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Date', 'Time', 'Day', 'Special', 'Congestion', 'Drving Speed', 'Stops',\n",
" 'Weather', 'Distance', 'Travel Time'],\n",
" dtype='object')"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.columns"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"X = data.drop(['Date','Travel Time'],axis=1)\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"y = data['Travel Time']\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Train/test split\n",
"\n",
"1. split data into two parts \n",
"\n",
" training data set\n",
" testing data set\n",
"\n",
"2. Train the models on training set\n",
"\n",
"3. Test the models on testing data set"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"145 49\n",
"9 56\n",
"375 56\n",
"523 49\n",
"188 56\n",
" ..\n",
"71 40\n",
"106 73\n",
"270 56\n",
"435 34\n",
"102 56\n",
"Name: Travel Time, Length: 480, dtype: int64"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_train"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.svm import SVR\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn.ensemble import GradientBoostingClassifier\n",
"from sklearn.neural_network import MLPClassifier\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Shiv\\AppData\\Roaming\\Python\\Python310\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.\n",
" warnings.warn(\n"
]
},
{
"data": {
"text/html": [
"<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>MLPClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MLPClassifier</label><div class=\"sk-toggleable__content\"><pre>MLPClassifier()</pre></div></div></div></div></div>"
],
"text/plain": [
"MLPClassifier()"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lr = LinearRegression()\n",
"NN = MLPClassifier()\n",
"lr.fit(X_train,y_train)\n",
"NN.fit(X_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"y_pred1 = lr.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"df1 = pd.DataFrame({'Actual':y_test, 'Lr Results':y_pred1})"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Actual</th>\n",
" <th>Lr Results</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>110</th>\n",
" <td>55</td>\n",
" <td>56.187653</td>\n",
" </tr>\n",
" <tr>\n",
" <th>419</th>\n",
" <td>56</td>\n",
" <td>55.732541</td>\n",
" </tr>\n",
" <tr>\n",
" <th>565</th>\n",
" <td>56</td>\n",
" <td>53.591764</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>55</td>\n",
" <td>56.690085</td>\n",
" </tr>\n",
" <tr>\n",
" <th>181</th>\n",
" <td>73</td>\n",
" <td>66.133304</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>399</th>\n",
" <td>56</td>\n",
" <td>54.599261</td>\n",
" </tr>\n",
" <tr>\n",
" <th>340</th>\n",
" <td>56</td>\n",
" <td>53.591764</td>\n",
" </tr>\n",
" <tr>\n",
" <th>148</th>\n",
" <td>49</td>\n",
" <td>52.322530</td>\n",
" </tr>\n",
" <tr>\n",
" <th>494</th>\n",
" <td>56</td>\n",
" <td>55.732541</td>\n",
" </tr>\n",
" <tr>\n",
" <th>439</th>\n",
" <td>43</td>\n",
" <td>38.662629</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>120 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" Actual Lr Results\n",
"110 55 56.187653\n",
"419 56 55.732541\n",
"565 56 53.591764\n",
"77 55 56.690085\n",
"181 73 66.133304\n",
".. ... ...\n",
"399 56 54.599261\n",
"340 56 53.591764\n",
"148 49 52.322530\n",
"494 56 55.732541\n",
"439 43 38.662629\n",
"\n",
"[120 rows x 2 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Actual</th>\n",
" <th>NN Results</th>\n",
" <th>LR Results</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>110</th>\n",
" <td>55</td>\n",
" <td>55</td>\n",
" <td>56.187653</td>\n",
" </tr>\n",
" <tr>\n",
" <th>419</th>\n",
" <td>56</td>\n",
" <td>56</td>\n",
" <td>55.732541</td>\n",
" </tr>\n",
" <tr>\n",
" <th>565</th>\n",
" <td>56</td>\n",
" <td>56</td>\n",
" <td>53.591764</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>55</td>\n",
" <td>55</td>\n",
" <td>56.690085</td>\n",
" </tr>\n",
" <tr>\n",
" <th>181</th>\n",
" <td>73</td>\n",
" <td>73</td>\n",
" <td>66.133304</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>399</th>\n",
" <td>56</td>\n",
" <td>56</td>\n",
" <td>54.599261</td>\n",
" </tr>\n",
" <tr>\n",
" <th>340</th>\n",
" <td>56</td>\n",
" <td>56</td>\n",
" <td>53.591764</td>\n",
" </tr>\n",
" <tr>\n",
" <th>148</th>\n",
" <td>49</td>\n",
" <td>49</td>\n",
" <td>52.322530</td>\n",
" </tr>\n",
" <tr>\n",
" <th>494</th>\n",
" <td>56</td>\n",
" <td>56</td>\n",
" <td>55.732541</td>\n",
" </tr>\n",
" <tr>\n",
" <th>439</th>\n",
" <td>43</td>\n",
" <td>43</td>\n",
" <td>38.662629</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>120 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Actual NN Results LR Results\n",
"110 55 55 56.187653\n",
"419 56 56 55.732541\n",
"565 56 56 53.591764\n",
"77 55 55 56.690085\n",
"181 73 73 66.133304\n",
".. ... ... ...\n",
"399 56 56 54.599261\n",
"340 56 56 53.591764\n",
"148 49 49 52.322530\n",
"494 56 56 55.732541\n",
"439 43 43 38.662629\n",
"\n",
"[120 rows x 3 columns]"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_pred2 = NN.predict(X_test)\n",
"df2 = pd.DataFrame({'Actual':y_test, 'NN Results':y_pred2, 'LR Results':y_pred1})\n",
"df2"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.legend.Legend at 0x265e22fe0e0>"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.subplot(221)\n",
"plt.plot(df1['Actual'].iloc[0:11],label='Actual')\n",
"plt.plot(df1['Lr Results'].iloc[0:11],label='Lr Results')\n",
"plt.legend()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\tTime\tDay\tRush\tSpecial\tCongestion\tDrving Speed\tStops\tWeather\tDistance\tTravel Time"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.legend.Legend at 0x265e2510ca0>"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.subplot(221)\n",
"plt.plot(df1['Actual'].iloc[0:11],label='Actual')\n",
"plt.plot(df2['NN Results'].iloc[0:11],label='NN Results')\n",
"plt.legend()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Prediction with Current parameters (Predict Time with trained Model)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Time</th>\n",
" <th>Day</th>\n",
" <th>Special</th>\n",
" <th>Congestion</th>\n",
" <th>Drving Speed</th>\n",
" <th>Stops</th>\n",
" <th>Weather</th>\n",
" <th>Distance</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>8</td>\n",
" <td>40</td>\n",
" <td>14</td>\n",
" <td>23</td>\n",
" <td>14.7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Time Day Special Congestion Drving Speed Stops Weather Distance\n",
"0 6 1 0 8 40 14 23 14.7"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = {'Time':6,\n",
" 'Day':1,\n",
" 'Special':0,\n",
" 'Congestion':8,\n",
" 'Drving Speed':40,\n",
" 'Stops':14,\n",
" 'Weather':23,\n",
" 'Distance':14.7\n",
" }\n",
"df = pd.DataFrame(data,index=[0])\n",
"df\n"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"linear regression [61.09983093]\n",
"Neural Network [55]\n",
"Actual Travel Time :- 58\n"
]
}
],
"source": [
"new_pred = lr.predict(df)\n",
"NN_pred = NN.predict(df)\n",
"print('linear regression',new_pred)\n",
"print('Neural Network',NN_pred)\n",
"print('Actual Travel Time :- 58')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Testing With Current Data"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Time</th>\n",
" <th>Day</th>\n",
" <th>Special</th>\n",
" <th>Congestion</th>\n",
" <th>Drving Speed</th>\n",
" <th>Stops</th>\n",
" <th>Weather</th>\n",
" <th>Distance</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>17</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>40</td>\n",
" <td>20</td>\n",
" <td>24</td>\n",
" <td>14.7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Time Day Special Congestion Drving Speed Stops Weather Distance\n",
"0 17 1 0 5 40 20 24 14.7"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = {'Time':17,\n",
" 'Day':1,\n",
" 'Special':0,\n",
" 'Congestion':5,\n",
" 'Drving Speed':40,\n",
" 'Stops':20,\n",
" 'Weather':24,\n",
" 'Distance':14.7\n",
" }\n",
"ddf = pd.DataFrame(data,index=[0])\n",
"ddf"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Travel Time Prediction :- 56 Min\n"
]
}
],
"source": [
"resultsNN = NN.predict(ddf)\n",
"resultsNN\n",
"print('Travel Time Prediction :-',*resultsNN,'Min')"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"def GetPrediction(time,date,special,congestion,dspeed,stops,weather,distance):\n",
" data = {'Time':time,\n",
" 'Day':date,\n",
" 'Special':special,\n",
" 'Congestion':congestion,\n",
" 'Drving Speed':dspeed,\n",
" 'Stops':stops,\n",
" 'Weather':weather,\n",
" 'Distance':distance\n",
" }\n",
" ddf = pd.DataFrame(data,index=[0])\n",
" resultsNN = NN.predict(ddf)\n",
" resultsNN\n",
" print('Travel Time Prediction :-',*resultsNN,'Min')"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Travel Time Prediction :- 55 Min\n"
]
}
],
"source": [
"def GetDateCode(date):\n",
" if('Sunday' == date ):\n",
" return 0\n",
" elif('Monday' == date):\n",
" return 1\n",
" elif('Tuesday' == date):\n",
" return 2\n",
" elif('Wednesday' == date):\n",
" return 3\n",
" elif('Thursday' == date):\n",
" return 4\n",
" elif('Friday' == date):\n",
" return 5\n",
" elif('Saturday' == date):\n",
" return 6\n",
" else:\n",
" return 9\n",
" \n",
"\n",
" \n",
"time= 6 #this time come from semins data/member of route planning\n",
"day ='Friday' #date come from UI \n",
"date = GetDateCode(day)\n",
"special = 0 # Special Day or Not 1/0\n",
"Congestion= 8 # This is depend on the depature time---- heavy traffic =9 / No Traffic = 0 \n",
"drivingspeedAVG = 40 #this is come from GPS data\n",
"stops =14 #total Bus stops/holts between the travel --- data come from the Seminas data/member of route planning \n",
"weather =23 #Get weather in travel area\n",
"\n",
"\n",
"GetPrediction(time,date,special,Congestion,drivingspeedAVG,14,24,14.7)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.7 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.7"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "26de051ba29f2982a8de78e945f0abaf191376122a1563185a90213a26c5da77"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment