diff --git a/flights/flights-bundle-template/README.md b/flights/flights-bundle-template/README.md index c09d9fd..6174940 100644 --- a/flights/flights-bundle-template/README.md +++ b/flights/flights-bundle-template/README.md @@ -3,6 +3,7 @@ Template on top of the [flights_simple](../flights-simple) project to enable the following deployment options: 1. Wheels or relative imports for the project's Python modules 2. Serverless compute or classic compute for workflows +3. Liquibase example setup ## Resources A subset of [flights_simple](../flights-simple) resources are currently demonstrated at the [template resources dir](template/resources/). @@ -49,3 +50,20 @@ A subset of [flights_simple](../flights-simple) resources are currently demonstr ``` $ databricks bundle run flights_notebook --profile ``` + +## Liquibase +If Liquibase is enabled, the template creates a GH action under `./.github/workflows/flights_liquibase.yml` which +- Downloads the Liquibase CLI + required JDBC driver and jars +- Runs a liquibase connection test and updates the changelog +- Runs local tests +- Deploys the bundle in target env and runs a job + +### GH setup +Liquibase requires the following variables set in GitHub: +- `DATABRICKS_HOST`: e.g. `dbc-eae35cd0-9e95.cloud.databricks.com` +- `DATABRICKS_JDBC_URL`: DWH url e.g. `jdbc:databricks://dbc-eae35cd0-9e95.cloud.databricks.com:443/default;transportMode=http;ssl=1;AuthMech=3;httpPath=/sql/1.0/warehouses/863bcc7ff8fea25b;` +- `LIQUIBASE_CATALOG_NAME`: a catalog name e.g. `main` +- `LIQUIBASE_SCHEMA_NAME`: a schema name e.g. `liquibase_test` + +Plus the secret: +- `DATABRICKS_TOKEN_TST`: Databricks token for deployment environment diff --git a/flights/flights-bundle-template/databricks_template_schema.json b/flights/flights-bundle-template/databricks_template_schema.json index 379f641..5bf888e 100644 --- a/flights/flights-bundle-template/databricks_template_schema.json +++ b/flights/flights-bundle-template/databricks_template_schema.json @@ -16,19 +16,25 @@ "type": "boolean", "default": false, "description": "Do you want the Databricks workflows to run on serverless?", - "order": 2 + "order": 3 }, "catalog": { "type": "string", "default": "main", "description": "Name of your UC catalog for your workflows/DLT?", - "order": 2 + "order": 4 }, "database": { "type": "string", "default": "flights", "description": "Name of your UC database for your workflows/DLT?", - "order": 2 + "order": 5 + }, + "include_liquibase": { + "type": "boolean", + "default": false, + "description": "Do you want to use liquibase for DataOps data evolution?", + "order": 6 } }, "success_message": "\nYour bundle '{{.project_name}}' has been created." diff --git a/flights/flights-bundle-template/template/.github/workflows/flights_liquibase.yml.tmpl b/flights/flights-bundle-template/template/.github/workflows/flights_liquibase.yml.tmpl new file mode 100644 index 0000000..8fd2483 --- /dev/null +++ b/flights/flights-bundle-template/template/.github/workflows/flights_liquibase.yml.tmpl @@ -0,0 +1,127 @@ +{{ if .include_liquibase }} +name: Deploy flights_project to TEST with liquibase + +concurrency: 1 + +on: + workflow_dispatch: + + pull_request: + types: + - opened + - synchronize + branches: + - dev + paths: + - "**/*.yml" + - "**/*.py" + + push: + branches: + - test/liquibase* + paths: + - "**/*.yml" + - "**/*.py" + - "**/*.xml" + - "**/*.txt" + +jobs: + deploy: + name: "Test and deploy bundle" + runs-on: ubuntu-latest + environment: test + env: + DATABRICKS_HOST: {{ "${{ vars.DATABRICKS_HOST }}" }} + DATABRICKS_TOKEN: {{ "${{ vars.DATABRICKS_TOKEN_TST }}" }} + DATABRICKS_BUNDLE_ENV: dev # should be TST, 'dev' is set for demo purposes only + defaults: + run: + working-directory: . + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python 3.12 + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + #---------------------------------------------- + # Set up Java (Temurin JDK 8) + #---------------------------------------------- + - name: Set up Java 8 + uses: actions/setup-java@v4 + with: + java-version: '8' + distribution: 'temurin' + + #---------------------------------------------- + # Download Liquibase CLI and make executable + #---------------------------------------------- + - name: Download Liquibase CLI + run: | + LIQUIBASE_VERSION=4.33.0 + wget https://github.com/liquibase/liquibase/releases/download/v${LIQUIBASE_VERSION}/liquibase-${LIQUIBASE_VERSION}.tar.gz + mkdir liquibase_download + tar -xzf liquibase-${LIQUIBASE_VERSION}.tar.gz -C liquibase_download + + #---------------------------------------------- + # Download Databricks JDBC and extension JARs + #---------------------------------------------- + - name: Download Databricks JDBC Driver and Liquibase Extension + run: | + # Download Databricks JDBC driver + wget https://databricks-bi-artifacts.s3.us-east-2.amazonaws.com/simbaspark-drivers/jdbc/2.7.3/DatabricksJDBC42-2.7.3.1010.zip + unzip DatabricksJDBC42-2.7.3.1010.zip + mv DatabricksJDBC-2.7.3.1010/DatabricksJDBC42.jar liquibase_download/lib + + # Download Liquibase-Databricks extension + wget https://github.com/liquibase/liquibase-databricks/releases/download/v1.4.2/liquibase-databricks-1.4.2.jar + mv liquibase-databricks-1.4.2.jar liquibase_download/lib + + #---------------------------------------------- + # Run Liquibase connection test & update + #---------------------------------------------- + - name: Run Liquibase connection test & update + env: + LIQUIBASE_COMMAND_URL: {{ "${{ vars.DATABRICKS_JDBC_URL }}" }} + LIQUIBASE_COMMAND_USERNAME: "token" + LIQUIBASE_COMMAND_PASSWORD: {{ "${{ secrets.DATABRICKS_TOKEN_TST }}" }} + LIQUIBASE_COMMAND_DEFAULT_CATALOG_NAME: {{ "${{ vars.LIQUIBASE_CATALOG_NAME }}" }} + LIQUIBASE_COMMAND_DEFAULT_SCHEMA_NAME: {{ "${{ vars.LIQUIBASE_SCHEMA_NAME }}" }} + run: | + liquibase_download/liquibase update --changelog-file=./liquibase/root.changelog.xml --log-level INFO + + #---------------------------------------------- + # Install dependencies and package + #---------------------------------------------- + - run: python -m pip install --upgrade pip + - run: pip install -r ./requirements.txt -r ./dev-requirements.txt + - run: pip install -e . + + #---------------------------------------------- + # run test suite + #---------------------------------------------- + - name: Run unit tests + env: + DATABRICKS_HOST: {{ "${{ vars.DATABRICKS_HOST }}" }} + DATABRICKS_TOKEN: {{ "${{ secrets.DATABRICKS_TOKEN_TST }}" }} + DATABRICKS_SERVERLESS_COMPUTE_ID: "auto" + run: | + pytest ./tests/ + + - uses: databricks/setup-cli@main + + #---------------------------------------------- + # run job (in staging) + #---------------------------------------------- + - name: Run job (in staging) + env: + DATABRICKS_HOST: {{ "${{ vars.DATABRICKS_HOST }}" }} + DATABRICKS_TOKEN: {{ "${{ secrets.DATABRICKS_TOKEN_TST }}" }} + DATABRICKS_BUNDLE_ENV: dev # should be TST, 'dev' is set for demo purposes only + BUNDLE_VAR_liquibase_catalog: {{ "${{ vars.LIQUIBASE_CATALOG_NAME }}" }} + BUNDLE_VAR_liquibase_schema: {{ "${{ vars.LIQUIBASE_SCHEMA_NAME }}" }} + run: | + databricks bundle deploy; databricks bundle run flights_notebook; databricks bundle destroy --auto-approve +{{ end }} diff --git a/flights/flights-bundle-template/template/databricks.yml.tmpl b/flights/flights-bundle-template/template/databricks.yml.tmpl index 375a482..02c28ab 100644 --- a/flights/flights-bundle-template/template/databricks.yml.tmpl +++ b/flights/flights-bundle-template/template/databricks.yml.tmpl @@ -21,10 +21,14 @@ include: - resources/dlt/*.yml variables: + {{ if .include_liquibase }} + liquibase_catalog: + liquibase_schema: + {{end}} catalog: - default: {{.catalog}} + default: {{ if .include_liquibase }}${var.liquibase_catalog}{{else}}{{.catalog}}{{end}} database: - default: ${resources.schemas.project_schema.name} + default: {{ if .include_liquibase }}${var.liquibase_schema}{{else}}${resources.schemas.project_schema.name}{{end}} flights_dlt_schema: default: ${resources.schemas.project_schema.name}_dlt flights_test_schema: @@ -62,7 +66,7 @@ targets: resources: schemas: project_schema: - name: {{.database}} + name: ${var.database} catalog_name: ${var.catalog} comment: "Schema for flight data" diff --git a/flights/flights-bundle-template/template/dev-requirements.txt b/flights/flights-bundle-template/template/dev-requirements.txt new file mode 100644 index 0000000..f39f467 --- /dev/null +++ b/flights/flights-bundle-template/template/dev-requirements.txt @@ -0,0 +1,4 @@ +wheel +pytest +databricks-sdk[notebook]>=0.46.0 +pandas==2.2.3 diff --git a/flights/flights-bundle-template/template/liquibase/changelogs/changelog.xml.tmpl b/flights/flights-bundle-template/template/liquibase/changelogs/changelog.xml.tmpl new file mode 100644 index 0000000..d7753b3 --- /dev/null +++ b/flights/flights-bundle-template/template/liquibase/changelogs/changelog.xml.tmpl @@ -0,0 +1,65 @@ +{{ if .include_liquibase }} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +{{ end }} \ No newline at end of file diff --git a/flights/flights-bundle-template/template/liquibase/root.changelog.xml.tmpl b/flights/flights-bundle-template/template/liquibase/root.changelog.xml.tmpl new file mode 100644 index 0000000..9411d2f --- /dev/null +++ b/flights/flights-bundle-template/template/liquibase/root.changelog.xml.tmpl @@ -0,0 +1,25 @@ +{{ if .include_liquibase }} + + + + + + + + + + + +{{ end }} \ No newline at end of file diff --git a/flights/flights-bundle-template/template/requirements.txt b/flights/flights-bundle-template/template/requirements.txt new file mode 100644 index 0000000..fc9bc0e --- /dev/null +++ b/flights/flights-bundle-template/template/requirements.txt @@ -0,0 +1 @@ +databricks-connect==17.2.* diff --git a/flights/flights-bundle-template/template/src/flights_main_notebook.py.tmpl b/flights/flights-bundle-template/template/src/flights_main_notebook.py.tmpl index 7bafe1d..3674f4b 100644 --- a/flights/flights-bundle-template/template/src/flights_main_notebook.py.tmpl +++ b/flights/flights-bundle-template/template/src/flights_main_notebook.py.tmpl @@ -57,5 +57,5 @@ df_transformed = ( # COMMAND ---------- -df_transformed.write.format("delta").mode("append").saveAsTable(raw_table_name) +df_transformed.write.format("delta").mode("append").option("mergeSchema", "true").saveAsTable(raw_table_name) print(f"Succesfully wrote data to {raw_table_name}") diff --git a/flights/flights-bundle-template/template/update_layout.tmpl b/flights/flights-bundle-template/template/update_layout.tmpl new file mode 100644 index 0000000..df59cbb --- /dev/null +++ b/flights/flights-bundle-template/template/update_layout.tmpl @@ -0,0 +1,5 @@ +{{ if (ne .include_liquibase true) }} + {{ skip `liquibase.properties` }} + {{ skip `liquibase` }} + {{ skip `.github/workflows/flights_liquibase.yml` }} +{{ end }} \ No newline at end of file