Added importing libraries and read data set file

parent bc4c172e
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Dense, LSTM
from keras.models import Sequential
from prophet.plot import plot_plotly, plot_components_plotly
from prophet import Prophet
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from xgboost import plot_importance, plot_tree
from xgboost import XGBRegressor
import logging, sys
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import plotly.express as px
import time
import xgboost as xgb
logging.disable(sys.maxsize)
%matplotlib inline
data = pd.read_csv('data_set.csv', parse_dates=True)
# Dumping the head to confirm it reads fine
data.head()
# Output all the different tea types and weight classes
print('Tea types:', data['Type'].unique())
print('Total weight classes:', data['Weight'].unique())
# A graph showing total tea exports throughout the past 6 years and the contribution of each tea type
fig = px.bar(data, x='Date', y='Quantity',
hover_data=['Type', 'Weight'], color='Quantity',
labels={'pop':'Total exports'}, height=400)
fig.show()
# We only need the total tea exports so we drop all other columns
store_exports = data.drop(['Type','Weight', 'Price'], axis=1)
# Convert Date from object type to datetime type
store_exports['Date'] = pd.to_datetime(data['Date'])
# Since we are predicting exports for month instead of dates or year,
# we are converting our dates to intervals of months
store_exports['Date'] = store_exports['Date'].dt.to_period('M')
monthly_exports = store_exports.groupby('Date').sum().reset_index()
monthly_exports['Date'] = monthly_exports['Date'].dt.to_timestamp()
# Plot the graph to get a rough idea of the pattern
plt.figure(figsize=(15,5))
plt.plot(monthly_exports['Date'], monthly_exports['Quantity'])
plt.ylabel('Exports')
plt.xlabel('Date')
plt.title("Monthly Exports")
plt.show()
# The export levels are steady over the years of time with slight downhill during covid times
# Since it doesn't have a trend, we are making it a time series stationary
monthly_exports['quantity_diff'] = monthly_exports['Quantity'].diff()
monthly_exports = monthly_exports.dropna()
monthly_exports.head(10)
# Plotting the diff of exports in a graph
plt.figure(figsize=(15,5))
plt.plot(monthly_exports['Date'], monthly_exports['quantity_diff'])
plt.ylabel('Exports')
plt.xlabel('Date')
plt.title("Monthly Export Diff")
plt.show()
# Preparing a supervised data set to feel into the model
supverised_data = monthly_exports.drop(['Date','Quantity'], axis=1)
for i in range(1,13):
col_name = 'month_' + str(i)
supverised_data[col_name] = supverised_data['quantity_diff'].shift(i)
supverised_data = supverised_data.dropna().reset_index(drop=True)
# Splitting the data into train and test data
train_data = supverised_data[:-12]
test_data = supverised_data[-12:]
print('Train Data Shape:', train_data.shape)
print('Test Data Shape:', test_data.shape)
# Set MinMaxScaler to -1 and 1 because there are - values
scaler = MinMaxScaler(feature_range=(-1,1))
scaler.fit(train_data)
train_data = scaler.transform(train_data)
test_data = scaler.transform(test_data)
x_train, y_train = train_data[:,1:], train_data[:,0:1]
x_test, y_test = test_data[:,1:], test_data[:,0:1]
y_train = y_train.ravel()
y_test = y_test.ravel()
print('x_train Shape:', x_train.shape)
print('y_train Shape:', y_train.shape)
print('x_test Shape:', x_test.shape)
print('y_test Shape:', y_test.shape)
# Making a prediction data frame to merge the predicted export quantities of all the trained algorithms
export_dates = monthly_exports['Date'][-12:].reset_index(drop=True)
predict_df = pd.DataFrame(export_dates)
# Extract actual monthly export values of the last 13 months
act_exports = monthly_exports['Quantity'][-13:].to_list()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment