nycmeshnet · Andrew-Dickinson · Mar 4, 2025 · Mar 4, 2025 · Mar 4, 2025 · Mar 4, 2025
diff --git a/.gitignore b/.gitignore
@@ -175,4 +175,4 @@ loadenv.sh
 
 # Spreadsheet data
 spreadsheet_data
-
+data/
diff --git a/README.md b/README.md
@@ -109,16 +109,17 @@ You'll probably want an admin account
 python src/manage.py createsuperuser
 ```
 
-And if you have access to it, you can use `import_spreadsheet_dump.sh` to populate
+And if you have access to it, you can use `import_datadump.sh` script to populate
 your database.
 
 > [!WARNING]
 > This is _real member data_. DO NOT share this database with anyone under any
 > circumstances.
 
 ```sh
-cp -R <path_to_data_dump> ./spreadsheet_data/
-./scripts/import_spreadsheet_dump.sh
+mkdir data/
+cp <path_to_data_dump>/full_dump.sql data/
+./scripts/import_datadump.sh
 ```
 
 If you want to do work with celery, you'll need to run a worker as well as a beat.
@@ -318,6 +319,29 @@ ADMIN_MAP_BASE_URL=http://localhost:3000
 
 Follow this PR: https://github.com/nycmeshnet/meshdb/pull/617/files
 
+### Making Exports for New Devs
+
+To make importable data exports for new devs, first obtain a local copy of the data you want to 
+share (see Backups below). Then:
+
+Run the scramble script to obfuscate PII:
+```sh
+python src/manage.py scramble_members
+```
+
+Clear the data from the historical tables (so that we don't leak the data we just scrambled via a diff)
+```sh
+scripts/clear_history_tables.sh
+```
+[!WARNING] be sure that you spot check the data to make sure the scramble process worked as expected.
+
+Finally, create an importable datadump with:
+```sh
+scripts/create_importable_datadump.sh
+```
+
+The file will be written to `data/full_dump.sql`, share this with the new devs
+
 ### Backups
 
 **The Proper Way**
@@ -351,7 +375,7 @@ $ echo 'drop database meshdb; create database meshdb;' | docker exec -i meshdb-p
 
 4. Restore the backup
 ```
-root@eefdc57a46c2:/opt/meshdb# python manage.py dbrestore -i default-bd0acc253775-2024-03-31-163520.psql.bin   
+root@eefdc57a46c2:/opt/meshdb# python manage.py dbrestore -i default-bd0acc253775-2024-03-31-163520.psql.bin --database default   
 ```
 
 **The Quick 'n Dirty Way**

diff --git a/scripts/clear_history_tables.sh b/scripts/clear_history_tables.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+DOCKER_PG_COMMAND="docker exec -i meshdb-postgres-1 psql -U meshdb -d meshdb"
+tables=(
+"meshapi_historicalaccesspoint"
+"meshapi_historicalbuilding"
+"meshapi_historicalbuilding_nodes"
+"meshapi_historicaldevice"
+"meshapi_historicalinstall"
+"meshapi_historicallink"
+"meshapi_historicallos"
+"meshapi_historicalmember"
+"meshapi_historicalnode"
+"meshapi_historicalsector"
+)
+
+set -ex
+
+for table_name in "${tables[@]}"
+do
+	echo "TRUNCATE ${table_name} CASCADE;" | $DOCKER_PG_COMMAND
+done
+
diff --git a/scripts/create_importable_datadump.sh b/scripts/create_importable_datadump.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+DOCKER_PG_COMMAND="docker exec -i meshdb-postgres-1 pg_dump -U meshdb"
+DATA_DIR="./data/"
+tables=(
+"meshapi_accesspoint"
+"meshapi_building"
+"meshapi_building_nodes"
+"meshapi_device"
+"meshapi_historicalaccesspoint"
+"meshapi_historicalbuilding"
+"meshapi_historicalbuilding_nodes"
+"meshapi_historicaldevice"
+"meshapi_historicalinstall"
+"meshapi_historicallink"
+"meshapi_historicallos"
+"meshapi_historicalmember"
+"meshapi_historicalnode"
+"meshapi_historicalsector"
+"meshapi_install"
+"meshapi_link"
+"meshapi_los"
+"meshapi_member"
+"meshapi_node"
+"meshapi_sector"
+)
+set -ex
+
+# Make sure our files exist.
+if [ ! -d "$DATA_DIR" ]; then
+	echo "$DATA_DIR missing!"
+	exit 1
+fi
+
+docker exec -i meshdb-postgres-1 pg_dump -U meshdb -d meshdb ${tables[@]/#/-t } > "$DATA_DIR/full_dump.sql"
+
diff --git a/scripts/import_spreadsheet_dump.sh → scripts/import_datadump.sh b/scripts/import_spreadsheet_dump.sh → scripts/import_datadump.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-DOCKER_PG_COMMAND="docker exec -i meshdb-postgres-1 psql -U meshdb"
-DATA_DIR="./spreadsheet_data/"
+DOCKER_PG_COMMAND="docker exec -i meshdb-postgres-1 psql -U meshdb -d meshdb"
+DATA_DIR="./data/"
 tables=(
 "meshapi_los"
 "meshapi_link"
@@ -32,40 +32,22 @@ if [ ! -d "$DATA_DIR" ]; then
 	exit 1
 fi
 
-for table_name in "${tables[@]}"
-do
-    if [ ! -e "spreadsheet_data/$table_name.sql" ]; then
-		echo "$table_name.sql is missing!"
-		exit 1
-	fi
-done
-
-# Don't need to create them.
-# XXX (willnilges): Do we want to have an option to dump the tables?
-#for table_name in "${tables[@]}"
-#do
-#    docker exec -i meshdb_postgres_1 pg_dump -U meshdb --table="$table_name" > "$table_name.sql"
-#done
+if [ ! -e "$DATA_DIR/full_dump.sql" ]; then
+		echo "full_dump.sql is missing!"
+    exit 1
+fi
 
 num_tables=${#tables[@]}
-
-# Yeet
-# XXX (willnilges): Would it be better to use manage.py?
 for ((i = num_tables - 1; i >= 0; i--));
 do
 	$DOCKER_PG_COMMAND -c "DROP TABLE IF EXISTS ${tables[i]} CASCADE"
 done
 
+
 # Import the new data
-for table_name in "${tables[@]}"
-do
-	cat "spreadsheet_data/$table_name.sql" | $DOCKER_PG_COMMAND
-done
+cat "$DATA_DIR/full_dump.sql" | $DOCKER_PG_COMMAND
+
 
 # Fix the auto numbering sequence for installs
 max_install_number=$(($(${DOCKER_PG_COMMAND} -c "SELECT MAX(install_number) FROM meshapi_install" -At) + 1))
 ${DOCKER_PG_COMMAND} -c "ALTER SEQUENCE meshapi_install_install_number_seq RESTART WITH ${max_install_number}"
-
-
-
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -175,4 +175,4 @@ loadenv.sh

		# Spreadsheet data
		spreadsheet_data

		data/