Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,4 @@ if it detects any missing docstrings or type hints in files that you have modifi
These will *not* block any Pull request, but exist primarily as advisory comments to encourage good coding standards.

Note that `python_checks.yml` will only function on pull requests made from within the repo, not from a forked repo.

Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""Update URL Status Materialized View

Revision ID: 9d57b3b79d35
Revises: 7fc6502f1fa3
Create Date: 2025-10-18 15:17:23.653448

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa

Check warning on line 11 in alembic/versions/2025_10_18_1517-9d57b3b79d35_update_url_status_materialized_view.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_10_18_1517-9d57b3b79d35_update_url_status_materialized_view.py#L11 <401>

'sqlalchemy as sa' imported but unused
Raw output
./alembic/versions/2025_10_18_1517-9d57b3b79d35_update_url_status_materialized_view.py:11:1: F401 'sqlalchemy as sa' imported but unused


# revision identifiers, used by Alembic.
revision: str = '9d57b3b79d35'
down_revision: Union[str, None] = '7fc6502f1fa3'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:

Check warning on line 21 in alembic/versions/2025_10_18_1517-9d57b3b79d35_update_url_status_materialized_view.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_10_18_1517-9d57b3b79d35_update_url_status_materialized_view.py#L21 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_10_18_1517-9d57b3b79d35_update_url_status_materialized_view.py:21:1: D103 Missing docstring in public function
op.execute("DROP MATERIALIZED VIEW IF EXISTS url_status_mat_view")
op.execute("""
CREATE MATERIALIZED VIEW url_status_mat_view as
with
urls_with_relevant_errors as (
select
ute.url_id
from
url_task_error ute
where
ute.task_type in (
'Screenshot',
'HTML',
'URL Probe'
)
)
, status_text as (
select
u.id as url_id,
case
when (
-- Validated as not relevant, individual record, or not found
fuv.type in ('not relevant', 'individual record', 'not found')
) Then 'Accepted'
when (
(fuv.type = 'data source' and uds.url_id is null)
OR
(fuv.type = 'meta url' and udmu.url_id is null)
) Then 'Awaiting Submission'
when (
(fuv.type = 'data source' and uds.url_id is not null)
OR
(fuv.type = 'meta url' and udmu.url_id is not null)
) Then 'Submitted'
when (
-- Has compressed HTML
uch.url_id is not null
AND
-- Has web metadata
uwm.url_id is not null
AND
-- Has screenshot
us.url_id is not null
) THEN 'Community Labeling'
when uwre.url_id is not null then 'Error'
ELSE 'Intake'
END as status

from
urls u
left join urls_with_relevant_errors uwre
on u.id = uwre.url_id
left join url_screenshot us
on u.id = us.url_id
left join url_compressed_html uch
on u.id = uch.url_id
left join url_web_metadata uwm
on u.id = uwm.url_id
left join flag_url_validated fuv
on u.id = fuv.url_id
left join url_ds_meta_url udmu
on u.id = udmu.url_id
left join url_data_source uds
on u.id = uds.url_id
)
select
url_id,
status,
CASE status
WHEN 'Intake' THEN 100
WHEN 'Error' THEN 110
WHEN 'Community Labeling' THEN 200
WHEN 'Accepted' THEN 300
WHEN 'Awaiting Submission' THEN 380
WHEN 'Submitted' THEN 390
ELSE -1
END as code
from status_text
""")


def downgrade() -> None:

Check warning on line 103 in alembic/versions/2025_10_18_1517-9d57b3b79d35_update_url_status_materialized_view.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_10_18_1517-9d57b3b79d35_update_url_status_materialized_view.py#L103 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_10_18_1517-9d57b3b79d35_update_url_status_materialized_view.py:103:1: D103 Missing docstring in public function
pass
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ async def run(
).where(
URLStatusMatView.status.not_in(
[
URLStatusViewEnum.SUBMITTED_PIPELINE_COMPLETE.value,
URLStatusViewEnum.SUBMITTED.value,
URLStatusViewEnum.ACCEPTED.value,
URLStatusViewEnum.AWAITING_SUBMISSION.value,
]
)
).order_by(
Expand Down
3 changes: 2 additions & 1 deletion src/db/models/views/url_status/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
class URLStatusViewEnum(Enum):
INTAKE = "Intake"
ACCEPTED = "Accepted"
SUBMITTED_PIPELINE_COMPLETE = "Submitted/Pipeline Complete"
AWAITING_SUBMISSION = "Awaiting Submission"
SUBMITTED = "Submitted"
ERROR = "Error"
COMMUNITY_LABELING = "Community Labeling"