From 13fecbfd8611028523ede1ae3e43775a491a90d0 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Fri, 5 Dec 2025 13:04:27 -0500 Subject: [PATCH 1/4] drop deprecated default dataset --- .github/workflows/ingest_single.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ingest_single.yml b/.github/workflows/ingest_single.yml index f4734d0f4d..c714bf4ff8 100644 --- a/.github/workflows/ingest_single.yml +++ b/.github/workflows/ingest_single.yml @@ -7,7 +7,7 @@ on: dataset: description: "Name of the dataset (required)" required: true - default: dcp_mappluto + default: dcp_mappluto_wi latest: type: boolean description: "Tag this version as latest (optional)" From 566a645bdd6d7fdbbd142a0b91f1842bc0d9ec16 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Thu, 4 Dec 2025 14:33:45 -0500 Subject: [PATCH 2/4] drop unused code --- products/pluto/pluto_build/04_archive.sh | 9 --------- 1 file changed, 9 deletions(-) diff --git a/products/pluto/pluto_build/04_archive.sh b/products/pluto/pluto_build/04_archive.sh index d8d564a4d0..16dc892f06 100755 --- a/products/pluto/pluto_build/04_archive.sh +++ b/products/pluto/pluto_build/04_archive.sh @@ -24,12 +24,3 @@ run_sql_file \ sql/export_mappluto_gdb.sql\ -v TABLE='mappluto_unclipped_gdb'\ -v GEOM='geom_2263' - -run_sql_command " - DROP TABLE IF EXISTS mappluto_sample; - SELECT * INTO mappluto_sample FROM mappluto_unclipped_gdb limit 5; - ALTER TABLE mappluto_sample ALTER COLUMN \"Borough\" SET NOT NULL; - ALTER TABLE mappluto_sample ALTER COLUMN \"Block\" SET NOT NULL; - ALTER TABLE mappluto_sample ALTER COLUMN \"Lot\" SET NOT NULL; - ALTER TABLE mappluto_sample ALTER COLUMN \"BBL\" SET NOT NULL; - ALTER TABLE mappluto_sample ALTER COLUMN \"BoroCode\" SET NOT NULL;" From 33c05586c3de38c04220cfc39aa3747bd2fdca49 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Thu, 4 Dec 2025 16:51:29 -0500 Subject: [PATCH 3/4] consolidate bash scripts --- .github/workflows/pluto_build.yml | 11 +---- .../pluto/pluto_build/01_load_local_csvs.sh | 5 --- products/pluto/pluto_build/02_build.sh | 41 +++++++++++++++++++ products/pluto/pluto_build/03_corrections.sh | 20 --------- products/pluto/pluto_build/04_archive.sh | 26 ------------ 5 files changed, 42 insertions(+), 61 deletions(-) delete mode 100755 products/pluto/pluto_build/01_load_local_csvs.sh delete mode 100755 products/pluto/pluto_build/03_corrections.sh delete mode 100755 products/pluto/pluto_build/04_archive.sh diff --git a/.github/workflows/pluto_build.yml b/.github/workflows/pluto_build.yml index 0b36ebd559..b50544103e 100644 --- a/.github/workflows/pluto_build.yml +++ b/.github/workflows/pluto_build.yml @@ -83,18 +83,9 @@ jobs: run: python -m dcpy.lifecycle.builds.load load --recipe-path ${{ inputs.recipe_file }}.lock.yml - - name: Load Local Data - run: ./01_load_local_csvs.sh - - - name: building ... + - name: Build run: ./02_build.sh - - name: apply corrections - run: ./03_corrections.sh - - - name: Archive - run: ./04_archive.sh - - name: QAQC run: ./05_qaqc.sh diff --git a/products/pluto/pluto_build/01_load_local_csvs.sh b/products/pluto/pluto_build/01_load_local_csvs.sh deleted file mode 100755 index 49e4ec0cb3..0000000000 --- a/products/pluto/pluto_build/01_load_local_csvs.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -source ./bash/config.sh -set_error_traps - -run_sql_file sql/_create.sql diff --git a/products/pluto/pluto_build/02_build.sh b/products/pluto/pluto_build/02_build.sh index 5657da5a6e..35bb8b217e 100755 --- a/products/pluto/pluto_build/02_build.sh +++ b/products/pluto/pluto_build/02_build.sh @@ -3,6 +3,7 @@ source ./bash/config.sh set_error_traps echo "Starting to build PLUTO ..." +run_sql_file sql/_create.sql run_sql_file sql/preprocessing.sql run_sql_file sql/create_pts.sql run_sql_file sql/create_rpad_geo.sql @@ -108,4 +109,44 @@ run_sql_file sql/plutomapid_1.sql run_sql_file sql/plutomapid_2.sql run_sql_file sql/shorelineclip.sql +run_sql_file sql/corr_create.sql + +echo "Applying corrections to PLUTO" +run_sql_file sql/corr_lotarea.sql +run_sql_file sql/corr_template.sql -v FIELD=yearbuilt +run_sql_file sql/corr_template.sql -v FIELD=ownername +run_sql_file sql/corr_ownername_punctuation.sql +run_sql_file sql/corr_template.sql -v FIELD=cd +run_sql_file sql/corr_template.sql -v FIELD=numfloors +run_sql_file sql/corr_template.sql -v FIELD=numbldgs +run_sql_file sql/corr_template.sql -v FIELD=unitsres +run_sql_file sql/corr_template.sql -v FIELD=unitstotal +run_sql_file sql/corr_inwoodrezoning.sql +run_sql_file sql/corr_template.sql -v FIELD=bct2020 +run_sql_file sql/corr_template.sql -v FIELD=address +run_sql_file sql/remove_unitlots.sql + +echo "Creating export tables" +run_sql_file sql/export.sql + +run_sql_file \ + sql/export_mappluto_shp.sql\ + -v TABLE='mappluto'\ + -v GEOM='clipped_2263' + +run_sql_file \ + sql/export_mappluto_shp.sql\ + -v TABLE='mappluto_unclipped'\ + -v GEOM='geom_2263' + +run_sql_file \ + sql/export_mappluto_gdb.sql\ + -v TABLE='mappluto_gdb'\ + -v GEOM='clipped_2263' + +run_sql_file \ + sql/export_mappluto_gdb.sql\ + -v TABLE='mappluto_unclipped_gdb'\ + -v GEOM='geom_2263' + echo 'Done' diff --git a/products/pluto/pluto_build/03_corrections.sh b/products/pluto/pluto_build/03_corrections.sh deleted file mode 100755 index 364c896649..0000000000 --- a/products/pluto/pluto_build/03_corrections.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -source ./bash/config.sh -set_error_traps - -run_sql_file sql/corr_create.sql - -echo "Applying corrections to PLUTO" -run_sql_file sql/corr_lotarea.sql -run_sql_file sql/corr_template.sql -v FIELD=yearbuilt -run_sql_file sql/corr_template.sql -v FIELD=ownername -run_sql_file sql/corr_ownername_punctuation.sql -run_sql_file sql/corr_template.sql -v FIELD=cd -run_sql_file sql/corr_template.sql -v FIELD=numfloors -run_sql_file sql/corr_template.sql -v FIELD=numbldgs -run_sql_file sql/corr_template.sql -v FIELD=unitsres -run_sql_file sql/corr_template.sql -v FIELD=unitstotal -run_sql_file sql/corr_inwoodrezoning.sql -run_sql_file sql/corr_template.sql -v FIELD=bct2020 -run_sql_file sql/corr_template.sql -v FIELD=address -run_sql_file sql/remove_unitlots.sql diff --git a/products/pluto/pluto_build/04_archive.sh b/products/pluto/pluto_build/04_archive.sh deleted file mode 100755 index 16dc892f06..0000000000 --- a/products/pluto/pluto_build/04_archive.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -source ./bash/config.sh -set_error_traps - -echo 'Create Export' -run_sql_file sql/export.sql - -run_sql_file \ - sql/export_mappluto_shp.sql\ - -v TABLE='mappluto'\ - -v GEOM='clipped_2263' - -run_sql_file \ - sql/export_mappluto_shp.sql\ - -v TABLE='mappluto_unclipped'\ - -v GEOM='geom_2263' - -run_sql_file \ - sql/export_mappluto_gdb.sql\ - -v TABLE='mappluto_gdb'\ - -v GEOM='clipped_2263' - -run_sql_file \ - sql/export_mappluto_gdb.sql\ - -v TABLE='mappluto_unclipped_gdb'\ - -v GEOM='geom_2263' From db3debe74dfee02882349abd0bbf69b9cc2d1f27 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Wed, 17 Dec 2025 09:47:03 -0500 Subject: [PATCH 4/4] add valid geometries test --- products/pluto/models/_sources.yml | 1 + .../tests/assert_all_geometry_is_valid.sql | 56 +++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 products/pluto/tests/assert_all_geometry_is_valid.sql diff --git a/products/pluto/models/_sources.yml b/products/pluto/models/_sources.yml index 385c929c97..ff8d905618 100644 --- a/products/pluto/models/_sources.yml +++ b/products/pluto/models/_sources.yml @@ -57,6 +57,7 @@ sources: records per a condominium. - name: previous_pluto + - name: pluto_geom - name: export_pluto description: final PLUTO table - name: dcp_zoningdistricts diff --git a/products/pluto/tests/assert_all_geometry_is_valid.sql b/products/pluto/tests/assert_all_geometry_is_valid.sql new file mode 100644 index 0000000000..651f6ea9b0 --- /dev/null +++ b/products/pluto/tests/assert_all_geometry_is_valid.sql @@ -0,0 +1,56 @@ +{{ + config( + tags = ['de_check', 'minor', 'major'], + meta = { + 'description': ''' + This test checks for any invalid lot geometries + ''', + 'next_steps': 'Contact DE and GIS to investigate' + } + ) +}} + +WITH pluto_geom AS ( + SELECT + bbl, + geom_2263, + geom_4326, + clipped_2263, + clipped_4326 + FROM {{ source('build_sources', 'pluto_geom') }} +), + +-- ST_IsValidDetail returns a row containing: +-- a boolean (valid) stating if a geometry is valid +-- a varchar (reason) stating why it is invalid +-- a geometry (location) pointing out where it is invalid +-- https://postgis.net/docs/ST_IsValidDetail.html +validity_details AS ( + SELECT + bbl, + ST_ISVALIDDETAIL(geom_2263) AS validity_details_geom_2263, + ST_ISVALIDDETAIL(geom_4326) AS validity_details_geom_4326, + ST_ISVALIDDETAIL(clipped_2263) AS validity_details_clipped_2263, + ST_ISVALIDDETAIL(clipped_4326) AS validity_details_clipped_4326 + FROM pluto_geom +), + +check_all_geoms AS ( + SELECT + *, + ( + false + = ANY( + ARRAY[ + (validity_details_geom_2263).valid, + (validity_details_geom_4326).valid, + (validity_details_clipped_2263).valid, + (validity_details_clipped_4326).valid + ] + ) + ) AS any_geoms_invalid + FROM validity_details +) + +SELECT * FROM check_all_geoms +WHERE any_geoms_invalid