Added importing libraries and read data set file

96b63182 · IT19954806 Devindi L.A.P.S · bc4c172e · 96b63182
Commit 96b63182 authored Feb 17, 2023 by IT19954806 Devindi L.A.P.S
Hide whitespace changes
Inline Side-by-side

Showing with 109 additions and 0 deletions

Libraries and Read data set Libraries and Read data set +109 -0

No files found.
--- a/Libraries and Read data set
+++ b/Libraries and Read data set
+from keras.callbacks import EarlyStopping, ModelCheckpoint
+from keras.layers import Dense, LSTM
+from keras.models import Sequential
+from prophet.plot import plot_plotly, plot_components_plotly
+from prophet import Prophet
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import LinearRegression
+from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+from sklearn.metrics import r2_score
+from sklearn.model_selection import GridSearchCV
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import MinMaxScaler
+from xgboost import plot_importance, plot_tree
+from xgboost import XGBRegressor
+import logging, sys
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pandas as pd
+import plotly.express as px
+import time
+import xgboost as xgb
+logging.disable(sys.maxsize)
+%matplotlib inline
+
+data = pd.read_csv('data_set.csv', parse_dates=True)
+# Dumping the head to confirm it reads fine
+data.head()
+
+# Output all the different tea types and weight classes
+print('Tea types:', data['Type'].unique())
+print('Total weight classes:', data['Weight'].unique())
+
+# A graph showing total tea exports throughout the past 6 years and the contribution of each tea type
+fig = px.bar(data, x='Date', y='Quantity',
+    hover_data=['Type', 'Weight'], color='Quantity',
+    labels={'pop':'Total exports'}, height=400)
+fig.show()
+
+# We only need the total tea exports so we drop all other columns
+store_exports = data.drop(['Type','Weight', 'Price'], axis=1)
+# Convert Date from object type to datetime type
+store_exports['Date'] = pd.to_datetime(data['Date'])
+
+# Since we are predicting exports for month instead of dates or year,
+# we are converting our dates to intervals of months
+store_exports['Date'] = store_exports['Date'].dt.to_period('M')
+monthly_exports = store_exports.groupby('Date').sum().reset_index()
+
+monthly_exports['Date'] = monthly_exports['Date'].dt.to_timestamp()
+
+# Plot the graph to get a rough idea of the pattern
+plt.figure(figsize=(15,5))
+plt.plot(monthly_exports['Date'], monthly_exports['Quantity'])
+plt.ylabel('Exports')
+plt.xlabel('Date')
+plt.title("Monthly Exports")
+plt.show()
+
+# The export levels are steady over the years of time with slight downhill during covid times
+# Since it doesn't have a trend, we are making it a time series stationary
+monthly_exports['quantity_diff'] = monthly_exports['Quantity'].diff()
+monthly_exports = monthly_exports.dropna()
+monthly_exports.head(10)
+
+# Plotting the diff of exports in a graph
+plt.figure(figsize=(15,5))
+plt.plot(monthly_exports['Date'], monthly_exports['quantity_diff'])
+plt.ylabel('Exports')
+plt.xlabel('Date')
+plt.title("Monthly Export Diff")
+plt.show()
+
+# Preparing a supervised data set to feel into the model
+supverised_data = monthly_exports.drop(['Date','Quantity'], axis=1)
+
+for i in range(1,13):
+    col_name = 'month_' + str(i)
+    supverised_data[col_name] = supverised_data['quantity_diff'].shift(i)
+supverised_data = supverised_data.dropna().reset_index(drop=True)
+
+# Splitting the data into train and test data
+train_data = supverised_data[:-12]
+test_data = supverised_data[-12:]
+print('Train Data Shape:', train_data.shape)
+print('Test Data Shape:', test_data.shape)
+
+# Set MinMaxScaler to -1 and 1 because there are - values
+scaler = MinMaxScaler(feature_range=(-1,1))
+scaler.fit(train_data)
+train_data = scaler.transform(train_data)
+test_data = scaler.transform(test_data)
+
+x_train, y_train = train_data[:,1:], train_data[:,0:1]
+x_test, y_test = test_data[:,1:], test_data[:,0:1]
+y_train = y_train.ravel()
+y_test = y_test.ravel()
+print('x_train Shape:', x_train.shape)
+print('y_train Shape:', y_train.shape)
+print('x_test Shape:', x_test.shape)
+print('y_test Shape:', y_test.shape)
+
+# Making a prediction data frame to merge the predicted export quantities of all the trained algorithms
+export_dates = monthly_exports['Date'][-12:].reset_index(drop=True)
+predict_df = pd.DataFrame(export_dates)
+
+# Extract actual monthly export values of the last 13 months
+act_exports = monthly_exports['Quantity'][-13:].to_list()
\ No newline at end of file