Skip to content

Commit

Permalink
Merge pull request #729 from MoralCode/collectiondate
Browse files Browse the repository at this point in the history
Add Collection date to OSSF scorecard
  • Loading branch information
cdolfi authored Oct 11, 2024
2 parents 630e5a6 + bf3b214 commit 434d1c4
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 12 deletions.
6 changes: 4 additions & 2 deletions 8Knot/cache_manager/db_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,8 @@ def _create_application_tables() -> None:
CREATE UNLOGGED TABLE IF NOT EXISTS ossf_score_query(
repo_id int,
name text,
score float4
score float4,
data_collection_date timestamp
)
"""
)
Expand All @@ -323,7 +324,8 @@ def _create_application_tables() -> None:
stars_count int,
code_of_conduct_file text,
security_issue_file text,
security_audit_file text
security_audit_file text,
data_collection_date timestamp
)
"""
)
Expand Down
30 changes: 27 additions & 3 deletions 8Knot/pages/repo_overview/visualizations/ossf_scorecard.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import cache_manager.cache_facade as cf
from pages.utils.job_utils import nodata_graph
import time
from datetime import datetime

PAGE = "repo_info"
VIZ_ID = "ossf-scorecard"
Expand Down Expand Up @@ -40,6 +41,16 @@
[
dbc.Row(
[
dbc.Col(
dbc.Row(
[
dbc.Label(
["Last Updated: ", html.Span(id=f"{PAGE}-{VIZ_ID}-updated")],
className="mr-2",
)
]
),
),
dbc.Col(
dbc.Button(
"Scorecard Info",
Expand Down Expand Up @@ -76,7 +87,7 @@ def toggle_popover(n, is_open):

# callback for ossf scorecard
@callback(
Output(f"{PAGE}-{VIZ_ID}", "children"),
[Output(f"{PAGE}-{VIZ_ID}", "children"), Output(f"{PAGE}-{VIZ_ID}-updated", "children")],
[
Input("repo-info-selection", "value"),
],
Expand All @@ -100,16 +111,29 @@ def ossf_scorecard(repo):
# test if there is data
if df.empty:
logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE")
return dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True)
return dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True), dbc.Label("No data")

# repo id not needed for table
df.drop(["repo_id"], axis=1, inplace=True)

# get all values from the data_collection_date column
updated_times = pd.to_datetime(df["data_collection_date"])

# we dont need to display this column for every entry
df.drop(["data_collection_date"], axis=1, inplace=True)

df.loc[df.name == "OSSF_SCORECARD_AGGREGATE_SCORE", "name"] = "Aggregate Score"
df.sort_values("name", ascending=True, inplace=True)
df.rename(columns={"name": "Check Type", "score": "Score"}, inplace=True)

table = dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True)

unique_updated_times = updated_times.drop_duplicates().to_numpy().flatten()

if len(unique_updated_times) > 1:
logging.warning(f"{VIZ_ID} - MORE THAN ONE DATA COLLECTION DATE")

updated_date = pd.to_datetime(str(unique_updated_times[-1])).strftime("%d/%m/%Y")

logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}")
return table
return table, dbc.Label(updated_date)
21 changes: 16 additions & 5 deletions 8Knot/pages/repo_overview/visualizations/repo_general_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import cache_manager.cache_facade as cf
from pages.utils.job_utils import nodata_graph
import time
from datetime import datetime

PAGE = "repo_info"
VIZ_ID = "repo-general-info"
Expand All @@ -31,6 +32,7 @@
dcc.Loading(
html.Div(id=f"{PAGE}-{VIZ_ID}"),
),
dbc.Row([dbc.Label(["Last Updated: ", html.Span(id=f"{PAGE}-{VIZ_ID}-updated")], className="mr-2")]),
]
)
],
Expand All @@ -51,7 +53,7 @@ def toggle_popover(n, is_open):

# callback for repo general info
@callback(
Output(f"{PAGE}-{VIZ_ID}", "children"),
[Output(f"{PAGE}-{VIZ_ID}", "children"), Output(f"{PAGE}-{VIZ_ID}-updated", "children")],
[
Input("repo-info-selection", "value"),
],
Expand All @@ -68,18 +70,27 @@ def repo_general_info(repo):
# test if there is data
if df_repo_files.empty and df_repo_info.empty and df_releases.empty:
logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE")
return dbc.Table.from_dataframe(pd.DataFrame(), striped=True, bordered=True, hover=True)
return dbc.Table.from_dataframe(pd.DataFrame(), striped=True, bordered=True, hover=True), dbc.Label("No data")

df = process_data(df_repo_files, df_repo_info, df_releases)
df, last_updated = process_data(df_repo_files, df_repo_info, df_releases)

table = dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True)

logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}")
return table
return table, last_updated


def process_data(df_repo_files, df_repo_info, df_releases):

updated_times_repo_info = pd.to_datetime(df_repo_info["data_collection_date"])

unique_updated_times = updated_times_repo_info.drop_duplicates().to_numpy().flatten()

if len(unique_updated_times) > 1:
logging.warning(f"{VIZ_ID} - MORE THAN ONE LAST UPDATE DATE")

updated_date = pd.to_datetime(str(unique_updated_times[-1])).strftime("%d/%m/%Y")

# convert to datetime objects rather than strings
df_releases["release_published_at"] = pd.to_datetime(df_releases["release_published_at"], utc=True)

Expand Down Expand Up @@ -164,7 +175,7 @@ def process_data(df_repo_files, df_repo_info, df_releases):
}
)

return df
return df, dbc.Label(updated_date)


def multi_query_helper(repos):
Expand Down
3 changes: 2 additions & 1 deletion 8Knot/queries/ossf_score_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def ossf_score_query(self, repos):
SELECT
repo_id as id,
name,
score
score,
data_collection_date
FROM
repo_deps_scorecard
WHERE
Expand Down
3 changes: 2 additions & 1 deletion 8Knot/queries/repo_info_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def repo_info_query(self, repos):
stars_count,
code_of_conduct_file,
security_issue_file,
security_audit_file
security_audit_file,
data_collection_date
FROM
repo_info ri
WHERE
Expand Down

0 comments on commit 434d1c4

Please sign in to comment.