import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import plotly.graph_objects as go
import plotly.offline as pyo
pyo.init_notebook_mode(connected=True)
def convert_to_rgb_str(rgb_tuple):
"""Convert an RGB tuple from Seaborn palette with values in [0, 1] to an RGB string for Plotly."""
return f"rgb({int(255*rgb_tuple[0])}, {int(255*rgb_tuple[1])}, {int(255*rgb_tuple[2])})"
# Load the data from your XLSX file
df = pd.read_excel('./applications_analytics_randomized.xlsx', engine='openpyxl')
# Convert dates from string to datetime format
df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y')
# Set Date as the index of the dataframe
df.set_index('Date', inplace=True)
# Create a column for the week number of the year
df['Week_Number'] = df.index.to_series().dt.isocalendar().week
# Group by the week number and sum the number of applications
df_weekly = df.groupby('Week_Number').sum()
# Ensure all weeks are present
latest_week = df['Week_Number'][df['Week_Number'].last_valid_index()]
df_weekly = df_weekly.reindex(np.arange(1, latest_week+1), fill_value=0)
# Reset index before creating the table
df = df.reset_index()
# Create a new dataframe for the table
df_table = df.groupby('Week_Number').agg(
{'Date': [np.min, np.max]}) # Get the first and last date for each week
df_table.columns = ['Start Date', 'End Date'] # Rename columns
# Create a column for the month of the year
df['Month'] = df['Date'].dt.month
# Group by the month and sum the number of applications
df_monthly = df.groupby('Month').sum(numeric_only=True)
df_monthly = df_monthly.drop(['Week_Number'], axis=1)
latest_month = df_monthly['Number of Applications'].last_valid_index()
# Ensure all months are present
df_monthly = df_monthly.reindex(np.arange(1, latest_month+1), fill_value=0)
df.head()
Date | Number of Applications | Week_Number | Month | |
---|---|---|---|---|
0 | 2022-04-18 | 6 | 16 | 4 |
1 | 2022-04-21 | 9 | 16 | 4 |
2 | 2022-04-23 | 4 | 16 | 4 |
3 | 2022-05-07 | 3 | 18 | 5 |
4 | 2022-05-07 | 2 | 18 | 5 |
sns_palette = sns.color_palette("bright",2)
# Compute trendline (assuming you've already done this above)
z = np.polyfit(df_monthly.index, df_monthly['Number of Applications'], 1)
p = np.poly1d(z)
# Create a line trace for "Number of Applications"
trace1 = go.Scatter(x=df_monthly.index,
y=df_monthly['Number of Applications'],
mode='lines',
name='Number of Applications',
line=dict(color=convert_to_rgb_str(sns_palette[0])))
# Create a line trace for the trendline
trace2 = go.Scatter(x=df_monthly.index,
y=p(df_monthly.index),
mode='lines',
name=f'Trend: y={z[0]:.2f}x + {z[1]:.2f}',
line=dict(color=convert_to_rgb_str(sns_palette[1]), dash='dash'))
# Layout configuration
layout = go.Layout(title='Number of Applications by Month',
xaxis=dict(title='Month Number'),
yaxis=dict(title='Number of Applications'))
# Combine traces and layout, and plot
fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()
df_monthly
Number of Applications | |
---|---|
Month | |
1 | 0 |
2 | 0 |
3 | 0 |
4 | 19 |
5 | 15 |
6 | 20 |
7 | 19 |
8 | 35 |
9 | 4 |
10 | 32 |
df_monthly.describe().round(2)
Number of Applications | |
---|---|
count | 10.00 |
mean | 14.40 |
std | 13.07 |
min | 0.00 |
25% | 1.00 |
50% | 17.00 |
75% | 19.75 |
max | 35.00 |
df_monthly['Number of Applications'].sum()
144
df_monthly['Number of Applications'].rolling(window=3).mean().tail().round(2)
Month 6 18.00 7 18.00 8 24.67 9 19.33 10 23.67 Name: Number of Applications, dtype: float64
# Compute trendline (assuming you've already done this above)
z = np.polyfit(df_weekly.index, df_weekly['Number of Applications'], 1)
p = np.poly1d(z)
# Create a line trace for "Number of Applications"
trace1 = go.Scatter(x=df_weekly.index,
y=df_weekly['Number of Applications'],
mode='lines',
name='Number of Applications',
line=dict(color=convert_to_rgb_str(sns_palette[0])))
# Create a line trace for the trendline
trace2 = go.Scatter(x=df_weekly.index,
y=p(df_weekly.index),
mode='lines',
name=f'Trend: y={z[0]:.2f}x + {z[1]:.2f}',
line=dict(color=convert_to_rgb_str(sns_palette[1]), dash='dash'))
# Layout configuration
layout = go.Layout(title='Number of Applications by Week',
xaxis=dict(title='Week Number'),
yaxis=dict(title='Number of Applications'))
# Combine traces and layout, and plot
fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()
df_weekly
Number of Applications | |
---|---|
Week_Number | |
1 | 0 |
2 | 0 |
3 | 0 |
4 | 0 |
5 | 0 |
6 | 0 |
7 | 0 |
8 | 0 |
9 | 0 |
10 | 0 |
11 | 0 |
12 | 0 |
13 | 0 |
14 | 0 |
15 | 0 |
16 | 19 |
17 | 0 |
18 | 5 |
19 | 10 |
20 | 0 |
21 | 0 |
22 | 0 |
23 | 7 |
24 | 2 |
25 | 11 |
26 | 0 |
27 | 0 |
28 | 0 |
29 | 9 |
30 | 10 |
31 | 18 |
32 | 9 |
33 | 7 |
34 | 1 |
35 | 0 |
36 | 2 |
37 | 0 |
38 | 2 |
39 | 0 |
40 | 18 |
41 | 13 |
42 | 1 |
df_weekly.describe().round(2)
Number of Applications | |
---|---|
count | 42.00 |
mean | 3.43 |
std | 5.64 |
min | 0.00 |
25% | 0.00 |
50% | 0.00 |
75% | 6.50 |
max | 19.00 |
df_weekly['Number of Applications'].rolling(window=4).mean().tail().round(2)
Week_Number 38 1.00 39 1.00 40 5.00 41 8.25 42 8.00 Name: Number of Applications, dtype: float64