|
| 1 | +"""Update URL Status Materialized View |
| 2 | +
|
| 3 | +Revision ID: 9d57b3b79d35 |
| 4 | +Revises: 7fc6502f1fa3 |
| 5 | +Create Date: 2025-10-18 15:17:23.653448 |
| 6 | +
|
| 7 | +""" |
| 8 | +from typing import Sequence, Union |
| 9 | + |
| 10 | +from alembic import op |
| 11 | +import sqlalchemy as sa |
| 12 | + |
| 13 | + |
| 14 | +# revision identifiers, used by Alembic. |
| 15 | +revision: str = '9d57b3b79d35' |
| 16 | +down_revision: Union[str, None] = '7fc6502f1fa3' |
| 17 | +branch_labels: Union[str, Sequence[str], None] = None |
| 18 | +depends_on: Union[str, Sequence[str], None] = None |
| 19 | + |
| 20 | + |
| 21 | +def upgrade() -> None: |
| 22 | + op.execute("DROP MATERIALIZED VIEW IF EXISTS url_status_mat_view") |
| 23 | + op.execute(""" |
| 24 | + CREATE MATERIALIZED VIEW url_status_mat_view as |
| 25 | + with |
| 26 | + urls_with_relevant_errors as ( |
| 27 | + select |
| 28 | + ute.url_id |
| 29 | + from |
| 30 | + url_task_error ute |
| 31 | + where |
| 32 | + ute.task_type in ( |
| 33 | + 'Screenshot', |
| 34 | + 'HTML', |
| 35 | + 'URL Probe' |
| 36 | + ) |
| 37 | + ) |
| 38 | + , status_text as ( |
| 39 | + select |
| 40 | + u.id as url_id, |
| 41 | + case |
| 42 | + when ( |
| 43 | + -- Validated as not relevant, individual record, or not found |
| 44 | + fuv.type in ('not relevant', 'individual record', 'not found') |
| 45 | + ) Then 'Accepted' |
| 46 | + when ( |
| 47 | + (fuv.type = 'data source' and uds.url_id is null) |
| 48 | + OR |
| 49 | + (fuv.type = 'meta url' and udmu.url_id is null) |
| 50 | + ) Then 'Awaiting Submission' |
| 51 | + when ( |
| 52 | + (fuv.type = 'data source' and uds.url_id is not null) |
| 53 | + OR |
| 54 | + (fuv.type = 'meta url' and udmu.url_id is not null) |
| 55 | + ) Then 'Submitted' |
| 56 | + when ( |
| 57 | + -- Has compressed HTML |
| 58 | + uch.url_id is not null |
| 59 | + AND |
| 60 | + -- Has web metadata |
| 61 | + uwm.url_id is not null |
| 62 | + AND |
| 63 | + -- Has screenshot |
| 64 | + us.url_id is not null |
| 65 | + ) THEN 'Community Labeling' |
| 66 | + when uwre.url_id is not null then 'Error' |
| 67 | + ELSE 'Intake' |
| 68 | + END as status |
| 69 | + |
| 70 | + from |
| 71 | + urls u |
| 72 | + left join urls_with_relevant_errors uwre |
| 73 | + on u.id = uwre.url_id |
| 74 | + left join url_screenshot us |
| 75 | + on u.id = us.url_id |
| 76 | + left join url_compressed_html uch |
| 77 | + on u.id = uch.url_id |
| 78 | + left join url_web_metadata uwm |
| 79 | + on u.id = uwm.url_id |
| 80 | + left join flag_url_validated fuv |
| 81 | + on u.id = fuv.url_id |
| 82 | + left join url_ds_meta_url udmu |
| 83 | + on u.id = udmu.url_id |
| 84 | + left join url_data_source uds |
| 85 | + on u.id = uds.url_id |
| 86 | + ) |
| 87 | + select |
| 88 | + url_id, |
| 89 | + status, |
| 90 | + CASE status |
| 91 | + WHEN 'Intake' THEN 100 |
| 92 | + WHEN 'Error' THEN 110 |
| 93 | + WHEN 'Community Labeling' THEN 200 |
| 94 | + WHEN 'Accepted' THEN 300 |
| 95 | + WHEN 'Awaiting Submission' THEN 380 |
| 96 | + WHEN 'Submitted' THEN 390 |
| 97 | + ELSE -1 |
| 98 | + END as code |
| 99 | + from status_text |
| 100 | + """) |
| 101 | + |
| 102 | + |
| 103 | +def downgrade() -> None: |
| 104 | + pass |
0 commit comments