Commit 1dde7a00 authored by Shivanthi Fernando's avatar Shivanthi Fernando

Changes in Customer Retention

parent 4d87df32
This diff is collapsed.
This diff is collapsed.
...@@ -654,18 +654,28 @@ def Customer_Frequency(request): ...@@ -654,18 +654,28 @@ def Customer_Frequency(request):
# Loading the data # Loading the data
df = pd.read_csv('Data/CustomerDatasetNew_4.csv') df = pd.read_csv('Data/CustomerDatasetNew_4.csv')
# Data pre processing
# Drop the missing data in dataframe # Drop the missing data in dataframe
df_data = df.dropna() df_data = df.dropna()
# Convert the date field, InvoiceDate to datetime object # Convert the date field, InvoiceDate to datetime object
df_data.InvoiceDate = pd.to_datetime(df_data.InvoiceDate) df_data.InvoiceDate = pd.to_datetime(df_data.InvoiceDate)
ctm_bhvr_dt = df_data[(df_data.InvoiceDate < pd.Timestamp(2021, 12, 1)) # dataset End date # Get least recent date
& (df_data.InvoiceDate >= pd.Timestamp(2021, 5, 1))].reset_index( least_recent_date = pd.Timestamp((df_data.InvoiceDate).iloc[0])
updated_least_recent_date = least_recent_date.strftime('%Y,%d,%m')
# Get most recent date
most_recent_date = pd.Timestamp((df_data.InvoiceDate).iloc[-1])
updated_most_recent_date = most_recent_date.strftime('%Y,%d,%m')
ctm_bhvr_dt = df_data[(df_data.InvoiceDate < pd.Timestamp(updated_most_recent_date)) # dataset End date
& (df_data.InvoiceDate >= pd.Timestamp(updated_least_recent_date))].reset_index(
drop=True) # dataset start date drop=True) # dataset start date
ctm_next_quarter = df_data[(df_data.InvoiceDate < pd.Timestamp(2022, 3, 1)) # predict date Till ctm_next_quarter = df_data[(df_data.InvoiceDate < pd.Timestamp(2022, 12, 1)) # predict date Till
& (df_data.InvoiceDate >= pd.Timestamp(2021, 12, 1))].reset_index( & (df_data.InvoiceDate >= pd.Timestamp(updated_most_recent_date))].reset_index(
drop=True) # dataset End date drop=True) # dataset End date
# Get the distinct customers in the dataframe ctm_bhvr_dt # Get the distinct customers in the dataframe ctm_bhvr_dt
...@@ -674,44 +684,47 @@ def Customer_Frequency(request): ...@@ -674,44 +684,47 @@ def Customer_Frequency(request):
# Rename the column to CustomerID. # Rename the column to CustomerID.
ctm_dt.columns = ['CustomerID'] ctm_dt.columns = ['CustomerID']
# Create a dataframe with CustomerID and customers first purchase # Create a dataframe with CustomerID and customers first service
ctm_1st_purchase_in_next_quarter = ctm_next_quarter.groupby('CustomerID').InvoiceDate.min().reset_index() ctm_1st_service_in_next_quarter = ctm_next_quarter.groupby('CustomerID').InvoiceDate.min().reset_index()
ctm_1st_purchase_in_next_quarter.columns = ['CustomerID', 'MinPurchaseDate'] ctm_1st_service_in_next_quarter.columns = ['CustomerID', 'MinServiceDate']
# Create a dataframe with CustomerID and customers last purchase # Create a dataframe with CustomerID and customers last service
ctm_last_purchase_bhvr_dt = ctm_bhvr_dt.groupby('CustomerID').InvoiceDate.max().reset_index() ctm_last_service_bhvr_dt = ctm_bhvr_dt.groupby('CustomerID').InvoiceDate.max().reset_index()
ctm_last_purchase_bhvr_dt.columns = ['CustomerID', 'MaxPurchaseDate'] ctm_last_service_bhvr_dt.columns = ['CustomerID', 'MaxServiceDate']
# Merge two dataframes # Merge two dataframes
ctm_purchase_dates = pd.merge(ctm_last_purchase_bhvr_dt, ctm_1st_purchase_in_next_quarter, on='CustomerID', ctm_service_dates = pd.merge(ctm_last_service_bhvr_dt, ctm_1st_service_in_next_quarter, on='CustomerID',
how='left') how='left')
# Get the difference in days from MinPurchaseDate and MaxPurchaseDate for each customer # Get the difference in days from MinServiceDate and MaxServiceDate for each customer
ctm_purchase_dates['NextPurchaseDay'] = ( ctm_service_dates['NextServiceDay'] = (
ctm_purchase_dates['MinPurchaseDate'] - ctm_purchase_dates['MaxPurchaseDate']).dt.days ctm_service_dates['MinServiceDate'] - ctm_service_dates['MaxServiceDate']).dt.days
# Update the dataframe ctm_dt by merging it with the NextPurchaseDay column of the dataframe ctm_purchase_dates # Update the dataframe ctm_dt by merging it with the NextServiceDay column of the dataframe ctm_service_dates
ctm_dt = pd.merge(ctm_dt, ctm_purchase_dates[['CustomerID', 'NextPurchaseDay']], on='CustomerID', ctm_dt = pd.merge(ctm_dt, ctm_service_dates[['CustomerID', 'NextServiceDay']], on='CustomerID',
how='left') how='left')
# Fill all missing values in the dataset ctm_dt with the number 9999
# Fill all missing values in the dataset ctm_dt with 'Will not return' value
ctm_dt = ctm_dt.fillna('Will not return') ctm_dt = ctm_dt.fillna('Will not return')
# Feature Engineering # Feature Engineering
# RFM Model
# Recency # Recency
# Get the maximum purchase date of each customer and create a dataframe with it together with the customer's id. # Get the maximum service date of each customer and create a dataframe with it together with the customer's id.
ctm_max_purchase = ctm_bhvr_dt.groupby('CustomerID').InvoiceDate.max().reset_index() ctm_max_service = ctm_bhvr_dt.groupby('CustomerID').InvoiceDate.max().reset_index()
ctm_max_purchase.columns = ['CustomerID', 'MaxPurchaseDate'] ctm_max_service.columns = ['CustomerID', 'MaxServiceDate']
# Find the recency of each customer in days # Find the recency of each customer in days
ctm_max_purchase['Recency'] = ( ctm_max_service['Recency'] = (
ctm_max_purchase['MaxPurchaseDate'].max() - ctm_max_purchase['MaxPurchaseDate']).dt.days ctm_max_service['MaxServiceDate'].max() - ctm_max_service['MaxServiceDate']).dt.days
# Find the recency in days # Find the recency in days
ctm_max_purchase['Recency'] = ( ctm_max_service['Recency'] = (
ctm_max_purchase['MaxPurchaseDate'].max() - ctm_max_purchase['MaxPurchaseDate']).dt.days ctm_max_service['MaxServiceDate'].max() - ctm_max_service['MaxServiceDate']).dt.days
# Merge the dataframes ctm_dt and ctm_max_purchase[['CustomerID', 'Recency']] on the CustomerID column. # Merge the dataframes ctm_dt and ctm_max_service[['CustomerID', 'Recency']] on the CustomerID column.
ctm_dt = pd.merge(ctm_dt, ctm_max_purchase[['CustomerID', 'Recency']], on='CustomerID') ctm_dt = pd.merge(ctm_dt, ctm_max_service[['CustomerID', 'Recency']], on='CustomerID')
Recency = ctm_dt.head() Recency = ctm_dt.head()
# Frequency # Frequency
...@@ -722,7 +735,7 @@ def Customer_Frequency(request): ...@@ -722,7 +735,7 @@ def Customer_Frequency(request):
# Update the dataframe ctm_dt by merging it with the dataframe ctm_frequency # Update the dataframe ctm_dt by merging it with the dataframe ctm_frequency
ctm_dt = pd.merge(ctm_dt, ctm_frequency, on='CustomerID') ctm_dt = pd.merge(ctm_dt, ctm_frequency, on='CustomerID')
Frequency = ctm_dt.head() Frequency = ctm_dt.head()
print(Frequency) #print(Frequency)
df_data['Revenue'] = df_data.ServicePrice df_data['Revenue'] = df_data.ServicePrice
# df_data['Revenue'] = df_data.UnitPrice * df_data.Quantity # df_data['Revenue'] = df_data.UnitPrice * df_data.Quantity
...@@ -736,7 +749,7 @@ def Customer_Frequency(request): ...@@ -736,7 +749,7 @@ def Customer_Frequency(request):
pntge = (singlecustoemrvalue / totalALlcustomervalue) * 100 pntge = (singlecustoemrvalue / totalALlcustomervalue) * 100
sss = pd.DataFrame(pntge, columns=['persontageImpact']) sss = pd.DataFrame(pntge, columns=['persontageImpact'])
xxx = sss.head() xxx = sss.head()
print(xxx) #print(xxx)
ctm_dt = pd.merge(ctm_dt, sss, on='CustomerID') ctm_dt = pd.merge(ctm_dt, sss, on='CustomerID')
Frequency = ctm_dt.head() Frequency = ctm_dt.head()
df2 = pd.read_csv('Data/CustomerDatasetNew_4.csv') df2 = pd.read_csv('Data/CustomerDatasetNew_4.csv')
...@@ -746,7 +759,7 @@ def Customer_Frequency(request): ...@@ -746,7 +759,7 @@ def Customer_Frequency(request):
df0 = df0.groupby('CustomerID')[ df0 = df0.groupby('CustomerID')[
'ServicePrice', 'ServiceQualityRating', 'ResponseTimeRating', 'CustomerServiceRating'].sum() 'ServicePrice', 'ServiceQualityRating', 'ResponseTimeRating', 'CustomerServiceRating'].sum()
df0 = pd.merge(df0, Frequency, on='CustomerID') df0 = pd.merge(df0, Frequency, on='CustomerID')
print(df0) #print(df0)
x = df0[['ServicePrice', 'ServiceQualityRating', 'ResponseTimeRating', 'CustomerServiceRating']] x = df0[['ServicePrice', 'ServiceQualityRating', 'ResponseTimeRating', 'CustomerServiceRating']]
y = df0[['Frequency']] y = df0[['Frequency']]
# split dataset for train and test # split dataset for train and test
...@@ -760,7 +773,7 @@ def Customer_Frequency(request): ...@@ -760,7 +773,7 @@ def Customer_Frequency(request):
customerid = myForm.cleaned_data['customerid'] customerid = myForm.cleaned_data['customerid']
dfz = ctm_dt[(ctm_dt["CustomerID"] == customerid)] dfz = ctm_dt[(ctm_dt["CustomerID"] == customerid)]
Frequency = dfz.head(1) Frequency = dfz.head(1)
print(Frequency) #print(Frequency)
df = Frequency df = Frequency
json_records = df.reset_index().to_json(orient='records') json_records = df.reset_index().to_json(orient='records')
arr = json.loads(json_records) arr = json.loads(json_records)
......
No preview for this file type
...@@ -86,7 +86,7 @@ ...@@ -86,7 +86,7 @@
<tr> <tr>
<th>Customer ID </th> <th>Customer ID </th>
<th>Customer Name </th> <th>Customer Name </th>
<th>Next Purchase In </th> <th>Next Service In </th>
<th>Purchased Frequency </th> <th>Purchased Frequency </th>
<th>Impact on Profitability </th> <th>Impact on Profitability </th>
</tr> </tr>
...@@ -97,7 +97,7 @@ ...@@ -97,7 +97,7 @@
<tr> <tr>
<td>{{i.CustomerID}} </td> <td>{{i.CustomerID}} </td>
<td>{{i.CustomerName}} </td> <td>{{i.CustomerName}} </td>
<td>{{i.NextPurchaseDay}} </td> <td>{{i.NextServiceDay}} </td>
<td>{{i.Frequency}} </td> <td>{{i.Frequency}} </td>
<td>{{i.persontageImpact}} % </td> <td>{{i.persontageImpact}} % </td>
</tr> </tr>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment