diff --git a/.gitignore b/.gitignore index a49099ff..41e74f6d 100644 --- a/.gitignore +++ b/.gitignore @@ -175,4 +175,4 @@ loadenv.sh # Spreadsheet data spreadsheet_data - +data/ diff --git a/README.md b/README.md index 54d77ff8..492ad9df 100644 --- a/README.md +++ b/README.md @@ -109,7 +109,7 @@ You'll probably want an admin account python src/manage.py createsuperuser ``` -And if you have access to it, you can use `import_spreadsheet_dump.sh` to populate +And if you have access to it, you can use `import_datadump.sh` script to populate your database. > [!WARNING] @@ -117,8 +117,9 @@ your database. > circumstances. ```sh -cp -R ./spreadsheet_data/ -./scripts/import_spreadsheet_dump.sh +mkdir data/ +cp /full_dump.sql data/ +./scripts/import_datadump.sh ``` If you want to do work with celery, you'll need to run a worker as well as a beat. @@ -318,6 +319,29 @@ ADMIN_MAP_BASE_URL=http://localhost:3000 Follow this PR: https://github.com/nycmeshnet/meshdb/pull/617/files +### Making Exports for New Devs + +To make importable data exports for new devs, first obtain a local copy of the data you want to +share (see Backups below). Then: + +Run the scramble script to obfuscate PII: +```sh +python src/manage.py scramble_members +``` + +Clear the data from the historical tables (so that we don't leak the data we just scrambled via a diff) +```sh +scripts/clear_history_tables.sh +``` +[!WARNING] be sure that you spot check the data to make sure the scramble process worked as expected. + +Finally, create an importable datadump with: +```sh +scripts/create_importable_datadump.sh +``` + +The file will be written to `data/full_dump.sql`, share this with the new devs + ### Backups **The Proper Way** @@ -351,7 +375,7 @@ $ echo 'drop database meshdb; create database meshdb;' | docker exec -i meshdb-p 4. Restore the backup ``` -root@eefdc57a46c2:/opt/meshdb# python manage.py dbrestore -i default-bd0acc253775-2024-03-31-163520.psql.bin +root@eefdc57a46c2:/opt/meshdb# python manage.py dbrestore -i default-bd0acc253775-2024-03-31-163520.psql.bin --database default ``` **The Quick 'n Dirty Way** diff --git a/scripts/clear_history_tables.sh b/scripts/clear_history_tables.sh new file mode 100755 index 00000000..d31c2503 --- /dev/null +++ b/scripts/clear_history_tables.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +DOCKER_PG_COMMAND="docker exec -i meshdb-postgres-1 psql -U meshdb -d meshdb" +tables=( +"meshapi_historicalaccesspoint" +"meshapi_historicalbuilding" +"meshapi_historicalbuilding_nodes" +"meshapi_historicaldevice" +"meshapi_historicalinstall" +"meshapi_historicallink" +"meshapi_historicallos" +"meshapi_historicalmember" +"meshapi_historicalnode" +"meshapi_historicalsector" +) + +set -ex + +for table_name in "${tables[@]}" +do + echo "TRUNCATE ${table_name} CASCADE;" | $DOCKER_PG_COMMAND +done + diff --git a/scripts/create_importable_datadump.sh b/scripts/create_importable_datadump.sh new file mode 100755 index 00000000..1627bef0 --- /dev/null +++ b/scripts/create_importable_datadump.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +DOCKER_PG_COMMAND="docker exec -i meshdb-postgres-1 pg_dump -U meshdb" +DATA_DIR="./data/" +tables=( +"meshapi_accesspoint" +"meshapi_building" +"meshapi_building_nodes" +"meshapi_device" +"meshapi_historicalaccesspoint" +"meshapi_historicalbuilding" +"meshapi_historicalbuilding_nodes" +"meshapi_historicaldevice" +"meshapi_historicalinstall" +"meshapi_historicallink" +"meshapi_historicallos" +"meshapi_historicalmember" +"meshapi_historicalnode" +"meshapi_historicalsector" +"meshapi_install" +"meshapi_link" +"meshapi_los" +"meshapi_member" +"meshapi_node" +"meshapi_sector" +) +set -ex + +# Make sure our files exist. +if [ ! -d "$DATA_DIR" ]; then + echo "$DATA_DIR missing!" + exit 1 +fi + +docker exec -i meshdb-postgres-1 pg_dump -U meshdb -d meshdb ${tables[@]/#/-t } > "$DATA_DIR/full_dump.sql" + diff --git a/scripts/import_spreadsheet_dump.sh b/scripts/import_datadump.sh similarity index 64% rename from scripts/import_spreadsheet_dump.sh rename to scripts/import_datadump.sh index 05a11602..3f17f9d6 100755 --- a/scripts/import_spreadsheet_dump.sh +++ b/scripts/import_datadump.sh @@ -1,7 +1,7 @@ #!/bin/bash -DOCKER_PG_COMMAND="docker exec -i meshdb-postgres-1 psql -U meshdb" -DATA_DIR="./spreadsheet_data/" +DOCKER_PG_COMMAND="docker exec -i meshdb-postgres-1 psql -U meshdb -d meshdb" +DATA_DIR="./data/" tables=( "meshapi_los" "meshapi_link" @@ -32,40 +32,22 @@ if [ ! -d "$DATA_DIR" ]; then exit 1 fi -for table_name in "${tables[@]}" -do - if [ ! -e "spreadsheet_data/$table_name.sql" ]; then - echo "$table_name.sql is missing!" - exit 1 - fi -done - -# Don't need to create them. -# XXX (willnilges): Do we want to have an option to dump the tables? -#for table_name in "${tables[@]}" -#do -# docker exec -i meshdb_postgres_1 pg_dump -U meshdb --table="$table_name" > "$table_name.sql" -#done +if [ ! -e "$DATA_DIR/full_dump.sql" ]; then + echo "full_dump.sql is missing!" + exit 1 +fi num_tables=${#tables[@]} - -# Yeet -# XXX (willnilges): Would it be better to use manage.py? for ((i = num_tables - 1; i >= 0; i--)); do $DOCKER_PG_COMMAND -c "DROP TABLE IF EXISTS ${tables[i]} CASCADE" done + # Import the new data -for table_name in "${tables[@]}" -do - cat "spreadsheet_data/$table_name.sql" | $DOCKER_PG_COMMAND -done +cat "$DATA_DIR/full_dump.sql" | $DOCKER_PG_COMMAND + # Fix the auto numbering sequence for installs max_install_number=$(($(${DOCKER_PG_COMMAND} -c "SELECT MAX(install_number) FROM meshapi_install" -At) + 1)) ${DOCKER_PG_COMMAND} -c "ALTER SEQUENCE meshapi_install_install_number_seq RESTART WITH ${max_install_number}" - - - -