Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 15 additions & 12 deletions frontend/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def get_installations():
logger.warning(
"Error from backend: %s: %s",
installations_response.status_code,
installations_response.text
installations_response.text,
)
except (ConnectTimeout, ReadTimeout, rConnectionError):
logger.error("Timeout when connecting to backend.")
Expand All @@ -89,6 +89,8 @@ def get_installations():
df[InstallationSchema.OS] = df[InstallationSchema.OS].str.replace(r"(Raspbian |Debian )", "Debian ", regex=True)
# convert all entries of os having "Armbian" in the name to "Armbian"
df[InstallationSchema.OS] = df[InstallationSchema.OS].str.replace(r"Armbian.*", "Armbian (all)", regex=True)
# it might be that there is an empty string for the os, we need to remove those
df = df[df[InstallationSchema.OS] != ""]
return df


Expand All @@ -99,7 +101,7 @@ def filter_dataframe(
machines: list,
recipes: list,
only_one_day: bool,
dates: tuple[datetime.date, datetime.date]
dates: tuple[datetime.date, datetime.date],
):
"""Apply the sidebar filter option to the data."""
filtered_df = df.loc[
Expand All @@ -110,8 +112,7 @@ def filter_dataframe(
& (df[CocktailSchema.receivedate] <= pd.Timestamp(dates[1]) + pd.Timedelta(days=1))
]
if only_one_day:
filtering = filtered_df[CocktailSchema.receivedate] >= (
datetime.datetime.now() - datetime.timedelta(hours=24)) # type: ignore
filtering = filtered_df[CocktailSchema.receivedate] >= (datetime.datetime.now() - datetime.timedelta(hours=24)) # type: ignore
filtered_df = filtered_df[filtering]
return filtered_df

Expand Down Expand Up @@ -161,8 +162,12 @@ def cocktail_count(df: pd.DataFrame, limit_recipe: int, country_split: bool) ->
return cocktails.iloc[:limit_recipe]
# If split by country, for the listing, we need to generate a tmp rank
# that we can order by that rank for the cocktail name (its dependant on total count)
name_order = df.groupby([CocktailSchema.cocktail_name])[
CocktailSchema.volume].count().sort_values().index.to_list()[-limit_recipe:]
name_order = (
df.groupby([CocktailSchema.cocktail_name])[CocktailSchema.volume]
.count()
.sort_values()
.index.to_list()[-limit_recipe:]
)
sorter_index = dict(zip(name_order, range(len(name_order))))
cocktails["Rank"] = cocktails[CocktailSchema.cocktail_name].map(sorter_index)
cocktails.sort_values(["Rank", CocktailSchema.cocktail_count], ascending=False, inplace=True)
Expand Down Expand Up @@ -223,8 +228,7 @@ def serving_aggregation(df: pd.DataFrame, machine_split: bool, min_count: int):
@st.cache_data(ttl=300)
def aggregate_installations(df: pd.DataFrame):
return (
df
.groupby([InstallationSchema.OS])[InstallationSchema.RECEIVEDATE]
df.groupby([InstallationSchema.OS])[InstallationSchema.RECEIVEDATE]
.count()
.reset_index()
.rename(
Expand All @@ -241,13 +245,12 @@ def cumulate_installations(raw_df: pd.DataFrame, os_split: bool = False):
"""Group the installations by week and returns the count."""
df = raw_df.copy(deep=True)
df["counter"] = 1
grouping = [pd.Grouper(key=InstallationSchema.RECEIVEDATE, freq='w')]
grouping = [pd.Grouper(key=InstallationSchema.RECEIVEDATE, freq="w")]
# also need to group by os if needed
if os_split:
grouping.insert(0, InstallationSchema.OS) # type: ignore
df = (
df
.groupby(grouping)
df.groupby(grouping)
.count()
.reset_index()
.rename(
Expand Down Expand Up @@ -275,7 +278,7 @@ def cumulate_installations(raw_df: pd.DataFrame, os_split: bool = False):
df,
index=InstallationSchema.RECEIVEDATE,
columns=InstallationSchema.OS,
values=InstallationSchema.INSTALLATIONS_COUNT
values=InstallationSchema.INSTALLATIONS_COUNT,
)
.sort_index()
.ffill()
Expand Down