In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import plotly.graph_objects as go
import plotly.offline as pyo
In [ ]:
pyo.init_notebook_mode(connected=True)
In [ ]:
def convert_to_rgb_str(rgb_tuple):
    """Convert an RGB tuple from Seaborn palette with values in [0, 1] to an RGB string for Plotly."""
    return f"rgb({int(255*rgb_tuple[0])}, {int(255*rgb_tuple[1])}, {int(255*rgb_tuple[2])})"
In [ ]:
# Load the data from your XLSX file
df = pd.read_excel('./applications_analytics_randomized.xlsx', engine='openpyxl')

# Convert dates from string to datetime format
df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y')

# Set Date as the index of the dataframe
df.set_index('Date', inplace=True)

# Create a column for the week number of the year
df['Week_Number'] = df.index.to_series().dt.isocalendar().week

# Group by the week number and sum the number of applications
df_weekly = df.groupby('Week_Number').sum()

# Ensure all weeks are present
latest_week = df['Week_Number'][df['Week_Number'].last_valid_index()]

df_weekly = df_weekly.reindex(np.arange(1, latest_week+1), fill_value=0)

# Reset index before creating the table
df = df.reset_index()

# Create a new dataframe for the table
df_table = df.groupby('Week_Number').agg(
    {'Date': [np.min, np.max]}) # Get the first and last date for each week
df_table.columns = ['Start Date', 'End Date'] # Rename columns

# Create a column for the month of the year
df['Month'] = df['Date'].dt.month

# Group by the month and sum the number of applications
df_monthly = df.groupby('Month').sum(numeric_only=True)
df_monthly = df_monthly.drop(['Week_Number'], axis=1)

latest_month = df_monthly['Number of Applications'].last_valid_index()

# Ensure all months are present
df_monthly = df_monthly.reindex(np.arange(1, latest_month+1), fill_value=0)
In [ ]:
df.head()
Out[ ]:
Date Number of Applications Week_Number Month
0 2022-04-18 6 16 4
1 2022-04-21 9 16 4
2 2022-04-23 4 16 4
3 2022-05-07 3 18 5
4 2022-05-07 2 18 5
In [ ]:
sns_palette = sns.color_palette("bright",2)
In [ ]:
# Compute trendline (assuming you've already done this above)
z = np.polyfit(df_monthly.index, df_monthly['Number of Applications'], 1)
p = np.poly1d(z)

# Create a line trace for "Number of Applications"
trace1 = go.Scatter(x=df_monthly.index, 
                    y=df_monthly['Number of Applications'], 
                    mode='lines',
                    name='Number of Applications',
                    line=dict(color=convert_to_rgb_str(sns_palette[0])))

# Create a line trace for the trendline
trace2 = go.Scatter(x=df_monthly.index, 
                    y=p(df_monthly.index), 
                    mode='lines',
                    name=f'Trend: y={z[0]:.2f}x + {z[1]:.2f}',
                    line=dict(color=convert_to_rgb_str(sns_palette[1]), dash='dash'))

# Layout configuration
layout = go.Layout(title='Number of Applications by Month',
                   xaxis=dict(title='Month Number'),
                   yaxis=dict(title='Number of Applications'))

# Combine traces and layout, and plot
fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()
In [ ]:
df_monthly
Out[ ]:
Number of Applications
Month
1 0
2 0
3 0
4 19
5 15
6 20
7 19
8 35
9 4
10 32
In [ ]:
df_monthly.describe().round(2)
Out[ ]:
Number of Applications
count 10.00
mean 14.40
std 13.07
min 0.00
25% 1.00
50% 17.00
75% 19.75
max 35.00
In [ ]:
df_monthly['Number of Applications'].sum()
Out[ ]:
144
In [ ]:
df_monthly['Number of Applications'].rolling(window=3).mean().tail().round(2)
Out[ ]:
Month
6     18.00
7     18.00
8     24.67
9     19.33
10    23.67
Name: Number of Applications, dtype: float64
In [ ]:
# Compute trendline (assuming you've already done this above)
z = np.polyfit(df_weekly.index, df_weekly['Number of Applications'], 1)
p = np.poly1d(z)

# Create a line trace for "Number of Applications"
trace1 = go.Scatter(x=df_weekly.index, 
                    y=df_weekly['Number of Applications'], 
                    mode='lines',
                    name='Number of Applications',
                    line=dict(color=convert_to_rgb_str(sns_palette[0])))

# Create a line trace for the trendline
trace2 = go.Scatter(x=df_weekly.index, 
                    y=p(df_weekly.index), 
                    mode='lines',
                    name=f'Trend: y={z[0]:.2f}x + {z[1]:.2f}',
                    line=dict(color=convert_to_rgb_str(sns_palette[1]), dash='dash'))

# Layout configuration
layout = go.Layout(title='Number of Applications by Week',
                   xaxis=dict(title='Week Number'),
                   yaxis=dict(title='Number of Applications'))

# Combine traces and layout, and plot
fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()
In [ ]:
df_weekly
Out[ ]:
Number of Applications
Week_Number
1 0
2 0
3 0
4 0
5 0
6 0
7 0
8 0
9 0
10 0
11 0
12 0
13 0
14 0
15 0
16 19
17 0
18 5
19 10
20 0
21 0
22 0
23 7
24 2
25 11
26 0
27 0
28 0
29 9
30 10
31 18
32 9
33 7
34 1
35 0
36 2
37 0
38 2
39 0
40 18
41 13
42 1
In [ ]:
df_weekly.describe().round(2)
Out[ ]:
Number of Applications
count 42.00
mean 3.43
std 5.64
min 0.00
25% 0.00
50% 0.00
75% 6.50
max 19.00
In [ ]:
df_weekly['Number of Applications'].rolling(window=4).mean().tail().round(2)
Out[ ]:
Week_Number
38    1.00
39    1.00
40    5.00
41    8.25
42    8.00
Name: Number of Applications, dtype: float64