Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
22_23-J 56
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
22_23-J 56
22_23-J 56
Commits
96b63182
Commit
96b63182
authored
Feb 17, 2023
by
IT19954806 Devindi L.A.P.S
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added importing libraries and read data set file
parent
bc4c172e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
109 additions
and
0 deletions
+109
-0
Libraries and Read data set
Libraries and Read data set
+109
-0
No files found.
Libraries and Read data set
0 → 100644
View file @
96b63182
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Dense, LSTM
from keras.models import Sequential
from prophet.plot import plot_plotly, plot_components_plotly
from prophet import Prophet
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from xgboost import plot_importance, plot_tree
from xgboost import XGBRegressor
import logging, sys
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import plotly.express as px
import time
import xgboost as xgb
logging.disable(sys.maxsize)
%matplotlib inline
data = pd.read_csv('data_set.csv', parse_dates=True)
# Dumping the head to confirm it reads fine
data.head()
# Output all the different tea types and weight classes
print('Tea types:', data['Type'].unique())
print('Total weight classes:', data['Weight'].unique())
# A graph showing total tea exports throughout the past 6 years and the contribution of each tea type
fig = px.bar(data, x='Date', y='Quantity',
hover_data=['Type', 'Weight'], color='Quantity',
labels={'pop':'Total exports'}, height=400)
fig.show()
# We only need the total tea exports so we drop all other columns
store_exports = data.drop(['Type','Weight', 'Price'], axis=1)
# Convert Date from object type to datetime type
store_exports['Date'] = pd.to_datetime(data['Date'])
# Since we are predicting exports for month instead of dates or year,
# we are converting our dates to intervals of months
store_exports['Date'] = store_exports['Date'].dt.to_period('M')
monthly_exports = store_exports.groupby('Date').sum().reset_index()
monthly_exports['Date'] = monthly_exports['Date'].dt.to_timestamp()
# Plot the graph to get a rough idea of the pattern
plt.figure(figsize=(15,5))
plt.plot(monthly_exports['Date'], monthly_exports['Quantity'])
plt.ylabel('Exports')
plt.xlabel('Date')
plt.title("Monthly Exports")
plt.show()
# The export levels are steady over the years of time with slight downhill during covid times
# Since it doesn't have a trend, we are making it a time series stationary
monthly_exports['quantity_diff'] = monthly_exports['Quantity'].diff()
monthly_exports = monthly_exports.dropna()
monthly_exports.head(10)
# Plotting the diff of exports in a graph
plt.figure(figsize=(15,5))
plt.plot(monthly_exports['Date'], monthly_exports['quantity_diff'])
plt.ylabel('Exports')
plt.xlabel('Date')
plt.title("Monthly Export Diff")
plt.show()
# Preparing a supervised data set to feel into the model
supverised_data = monthly_exports.drop(['Date','Quantity'], axis=1)
for i in range(1,13):
col_name = 'month_' + str(i)
supverised_data[col_name] = supverised_data['quantity_diff'].shift(i)
supverised_data = supverised_data.dropna().reset_index(drop=True)
# Splitting the data into train and test data
train_data = supverised_data[:-12]
test_data = supverised_data[-12:]
print('Train Data Shape:', train_data.shape)
print('Test Data Shape:', test_data.shape)
# Set MinMaxScaler to -1 and 1 because there are - values
scaler = MinMaxScaler(feature_range=(-1,1))
scaler.fit(train_data)
train_data = scaler.transform(train_data)
test_data = scaler.transform(test_data)
x_train, y_train = train_data[:,1:], train_data[:,0:1]
x_test, y_test = test_data[:,1:], test_data[:,0:1]
y_train = y_train.ravel()
y_test = y_test.ravel()
print('x_train Shape:', x_train.shape)
print('y_train Shape:', y_train.shape)
print('x_test Shape:', x_test.shape)
print('y_test Shape:', y_test.shape)
# Making a prediction data frame to merge the predicted export quantities of all the trained algorithms
export_dates = monthly_exports['Date'][-12:].reset_index(drop=True)
predict_df = pd.DataFrame(export_dates)
# Extract actual monthly export values of the last 13 months
act_exports = monthly_exports['Quantity'][-13:].to_list()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment