diff --git a/docs/examples/druid/quickstart/deep-storage-config.yaml b/docs/examples/druid/quickstart/deep-storage-config.yaml index 09389a6d3..361259582 100644 --- a/docs/examples/druid/quickstart/deep-storage-config.yaml +++ b/docs/examples/druid/quickstart/deep-storage-config.yaml @@ -10,6 +10,7 @@ stringData: druid.s3.accessKey: "minio" druid.s3.secretKey: "minio123" druid.s3.protocol: "http" + druid.s3.enablePathStyleAccess: "true" druid.s3.endpoint.signingRegion: "us-east-1" druid.s3.endpoint.url: "http://myminio-hl.demo.svc.cluster.local:9000/" diff --git a/docs/guides/druid/_index.md b/docs/guides/druid/_index.md index f84d02cbe..bd01b5770 100644 --- a/docs/guides/druid/_index.md +++ b/docs/guides/druid/_index.md @@ -2,7 +2,7 @@ title: Druid menu: docs_{{ .version }}: - identifier: dr-druid-guides + identifier: guides-druid name: Druid parent: guides weight: 10 diff --git a/docs/guides/druid/backup/_index.md b/docs/guides/druid/backup/_index.md new file mode 100644 index 000000000..31146d6c1 --- /dev/null +++ b/docs/guides/druid/backup/_index.md @@ -0,0 +1,10 @@ +--- +title: Backup & Restore Druid | KubeStash +menu: + docs_{{ .version }}: + identifier: guides-druid-backup + name: Backup & Restore + parent: guides-druid + weight: 50 +menu_name: docs_{{ .version }} +--- \ No newline at end of file diff --git a/docs/guides/druid/backup/application-level/examples/backupconfiguration.yaml b/docs/guides/druid/backup/application-level/examples/backupconfiguration.yaml new file mode 100644 index 000000000..542c015f7 --- /dev/null +++ b/docs/guides/druid/backup/application-level/examples/backupconfiguration.yaml @@ -0,0 +1,37 @@ +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupConfiguration +metadata: + name: sample-druid-backup + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: sample-druid + backends: + - name: gcs-backend + storageRef: + namespace: demo + name: gcs-storage + retentionPolicy: + name: demo-retention + namespace: demo + sessions: + - name: frequent-backup + scheduler: + schedule: "*/5 * * * *" + jobTemplate: + backoffLimit: 1 + repositories: + - name: gcs-druid-repo + backend: gcs-backend + directory: /druid + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: manifest-backup + - name: mysql-metadata-storage-backup diff --git a/docs/guides/druid/backup/application-level/examples/backupstorage.yaml b/docs/guides/druid/backup/application-level/examples/backupstorage.yaml new file mode 100644 index 000000000..c09038c6b --- /dev/null +++ b/docs/guides/druid/backup/application-level/examples/backupstorage.yaml @@ -0,0 +1,17 @@ +apiVersion: storage.kubestash.com/v1alpha1 +kind: BackupStorage +metadata: + name: gcs-storage + namespace: demo +spec: + storage: + provider: gcs + gcs: + bucket: kubestash-qa + prefix: druid + secretName: gcs-secret + usagePolicy: + allowedNamespaces: + from: All + default: true + deletionPolicy: Delete \ No newline at end of file diff --git a/docs/guides/druid/backup/application-level/examples/restoresession.yaml b/docs/guides/druid/backup/application-level/examples/restoresession.yaml new file mode 100644 index 000000000..2f17e07e5 --- /dev/null +++ b/docs/guides/druid/backup/application-level/examples/restoresession.yaml @@ -0,0 +1,22 @@ +apiVersion: core.kubestash.com/v1alpha1 +kind: RestoreSession +metadata: + name: restore-sample-druid + namespace: demo +spec: + manifestOptions: + druid: + restoreNamespace: dev + db: true + dbName: restored-druid + dataSource: + repository: gcs-druid-repo + snapshot: latest + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-restore + - name: manifest-restore diff --git a/docs/guides/druid/backup/application-level/examples/retentionpolicy.yaml b/docs/guides/druid/backup/application-level/examples/retentionpolicy.yaml new file mode 100644 index 000000000..459156286 --- /dev/null +++ b/docs/guides/druid/backup/application-level/examples/retentionpolicy.yaml @@ -0,0 +1,15 @@ +apiVersion: storage.kubestash.com/v1alpha1 +kind: RetentionPolicy +metadata: + name: demo-retention + namespace: demo +spec: + default: true + failedSnapshots: + last: 2 + maxRetentionPeriod: 2mo + successfulSnapshots: + last: 5 + usagePolicy: + allowedNamespaces: + from: All \ No newline at end of file diff --git a/docs/guides/druid/backup/application-level/examples/sample-druid.yaml b/docs/guides/druid/backup/application-level/examples/sample-druid.yaml new file mode 100644 index 000000000..d92c784f0 --- /dev/null +++ b/docs/guides/druid/backup/application-level/examples/sample-druid.yaml @@ -0,0 +1,15 @@ +apiVersion: kubedb.com/v1alpha2 +kind: Druid +metadata: + name: sample-druid + namespace: demo +spec: + version: 30.0.0 + deepStorage: + type: s3 + configSecret: + name: deep-storage-config + topology: + routers: + replicas: 1 + deletionPolicy: WipeOut \ No newline at end of file diff --git a/docs/guides/druid/backup/application-level/images/druid-ui-1.png b/docs/guides/druid/backup/application-level/images/druid-ui-1.png new file mode 100644 index 000000000..c5d985f8c Binary files /dev/null and b/docs/guides/druid/backup/application-level/images/druid-ui-1.png differ diff --git a/docs/guides/druid/backup/application-level/images/druid-ui-2.png b/docs/guides/druid/backup/application-level/images/druid-ui-2.png new file mode 100644 index 000000000..5cd4988e6 Binary files /dev/null and b/docs/guides/druid/backup/application-level/images/druid-ui-2.png differ diff --git a/docs/guides/druid/backup/application-level/images/druid-ui-3.png b/docs/guides/druid/backup/application-level/images/druid-ui-3.png new file mode 100644 index 000000000..4ed44c6c9 Binary files /dev/null and b/docs/guides/druid/backup/application-level/images/druid-ui-3.png differ diff --git a/docs/guides/druid/backup/application-level/images/druid-ui-4.png b/docs/guides/druid/backup/application-level/images/druid-ui-4.png new file mode 100644 index 000000000..f11920449 Binary files /dev/null and b/docs/guides/druid/backup/application-level/images/druid-ui-4.png differ diff --git a/docs/guides/druid/backup/application-level/images/druid-ui-5.png b/docs/guides/druid/backup/application-level/images/druid-ui-5.png new file mode 100644 index 000000000..065fadf4e Binary files /dev/null and b/docs/guides/druid/backup/application-level/images/druid-ui-5.png differ diff --git a/docs/guides/druid/backup/application-level/images/druid-ui-6.png b/docs/guides/druid/backup/application-level/images/druid-ui-6.png new file mode 100644 index 000000000..2e78eef06 Binary files /dev/null and b/docs/guides/druid/backup/application-level/images/druid-ui-6.png differ diff --git a/docs/guides/druid/backup/application-level/index.md b/docs/guides/druid/backup/application-level/index.md new file mode 100644 index 000000000..d5d3150f1 --- /dev/null +++ b/docs/guides/druid/backup/application-level/index.md @@ -0,0 +1,749 @@ +--- +title: Application Level Backup & Restore Druid | KubeStash +description: Application Level Backup and Restore using KubeStash +menu: + docs_{{ .version }}: + identifier: guides-druid-backup-application-level + name: Application Level Backup + parent: guides-druid-backup + weight: 20 +menu_name: docs_{{ .version }} +section_menu_id: guides +--- + +# Application Level Backup and Restore Druid database using KubeStash + +KubeStash offers application-level backup and restore functionality for `Druid` databases. It captures both manifest and logical data backups of any `Druid` database in a single snapshot. During the restore process, KubeStash first applies the `Druid` manifest to the cluster and then restores the data into it. + +This guide will give you how you can take application-level backup and restore your `Druid` databases using `Kubestash`. + +## Before You Begin + +- At first, you need to have a Kubernetes cluster, and the `kubectl` command-line tool must be configured to communicate with your cluster. If you do not already have a cluster, you can create one by using `Minikube` or `Kind`. +- Install `KubeDB` in your cluster following the steps [here](/docs/setup/README.md). +- Install `KubeStash` in your cluster following the steps [here](https://kubestash.com/docs/latest/setup/install/kubestash). +- Install KubeStash `kubectl` plugin following the steps [here](https://kubestash.com/docs/latest/setup/install/kubectl-plugin/). +- If you are not familiar with how KubeStash backup and restore Druid databases, please check the following guide [here](/docs/guides/druid/backup/overview/index.md). + +You should be familiar with the following `KubeStash` concepts: + +- [BackupStorage](https://kubestash.com/docs/latest/concepts/crds/backupstorage/) +- [BackupConfiguration](https://kubestash.com/docs/latest/concepts/crds/backupconfiguration/) +- [BackupSession](https://kubestash.com/docs/latest/concepts/crds/backupsession/) +- [RestoreSession](https://kubestash.com/docs/latest/concepts/crds/restoresession/) +- [Addon](https://kubestash.com/docs/latest/concepts/crds/addon/) +- [Function](https://kubestash.com/docs/latest/concepts/crds/function/) +- [Task](https://kubestash.com/docs/latest/concepts/crds/addon/#task-specification) + +To keep everything isolated, we are going to use a separate namespace called `demo` throughout this tutorial. + +```bash +$ kubectl create ns demo +namespace/demo created +``` + +> **Note:** YAML files used in this tutorial are stored in [docs/guides/druid/backup/application-level/examples](docs/guides/druid/backup/application-level/examples) directory of [kubedb/docs](https://github.com/kubedb/docs) repository. + +## Backup Druid + +KubeStash supports backups for `Druid` instances for various Cluster setups. In this demonstration, we'll focus on a `Druid` database with 5 type of nodes (coordinators, historicals, brokers, middlemanagers and routers). The backup and restore process is similar for other Cluster setup as well. + +This section will demonstrate how to take application-level backup of a `Druid` database. Here, we are going to deploy a `Druid` database using KubeDB. Then, we are going to back up the database at the application level to a `GCS` bucket. Finally, we will restore the entire `Druid` database. + +### Deploy Sample Druid Database + +Let's deploy a sample `Druid` database and insert some data into it. + +**Create Druid CR:** + +Below is the YAML of a sample `Druid` CR that we are going to create for this tutorial: + +```yaml +apiVersion: kubedb.com/v1alpha2 +kind: Druid +metadata: + name: sample-druid + namespace: demo +spec: + version: 30.0.0 + deepStorage: + type: s3 + configSecret: + name: deep-storage-config + topology: + routers: + replicas: 1 + deletionPolicy: WipeOut +``` + +Here, +- `.spec.topology` specifies about the clustering configuration of Druid. +- `.Spec.topology.routers` specifies that 1 replica of routers node will get provisioned alongside the essential nodes. + +Create the above `Druid` CR, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/application-level/examples/sample-druid.yaml +druid.kubedb.com/sample-druid created +``` + +KubeDB will deploy a Druid database according to the above specification. It will also create the necessary Secrets and Services to access the database. + +Let's check if the database is ready to use, + +```bash +$ kubectl get druids.kubedb.com -n demo +NAME TYPE VERSION STATUS AGE +sample-druid kubedb.com/v1alpha2 30.0.0 Ready 4m14s +``` + +The database is `Ready`. Verify that KubeDB has created a `Secret` and a `Service` for this database using the following commands, + +```bash +$ kubectl get secret -n demo -l=app.kubernetes.io/instance=sample-druid +NAME TYPE DATA AGE +sample-druid-admin-cred kubernetes.io/basic-auth 2 2m34s +sample-druid-config Opaque 11 2m34s + +$ kubectl get service -n demo -l=app.kubernetes.io/instance=sample-druid +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +sample-druid-brokers ClusterIP 10.128.135.115 8082/TCP 2m53s +sample-druid-coordinators ClusterIP 10.128.16.222 8081/TCP 2m53s +sample-druid-pods ClusterIP None 8081/TCP,8090/TCP,8083/TCP,8091/TCP,8082/TCP,8888/TCP 2m53s +sample-druid-routers ClusterIP 10.128.191.186 8888/TCP 2m53s +``` + +Here, we have to use service `sample-druid-routers` and secret `sample-druid-admin-cred` to connect with the database. `KubeDB` creates an [AppBinding](/docs/guides/druid/concepts/appbinding.md) CR that holds the necessary information to connect with the database. + +**Verify Internal Dependencies:** + +```bash +kubectl get mysql,zk -n demo +NAME VERSION STATUS AGE +mysql.kubedb.com/sample-druid-mysql-metadata 8.0.35 Ready 6m31s + +NAME TYPE VERSION STATUS AGE +zookeeper.kubedb.com/sample-druid-zk kubedb.com/v1alpha2 3.7.2 Ready 6m31s +``` +We can see that KubeDB has deployed a `MySQL` and a `ZooKeeper` instance as [External dependencies](https://druid.apache.org/docs/latest/design/architecture/#external-dependencies) of the `Druid` cluster. + +**Verify AppBinding:** + +Verify that the `AppBinding` has been created successfully using the following command, + +```bash +$ kubectl get appbindings -n demo +NAME TYPE VERSION AGE +sample-druid kubedb.com/druid 30.0.0 4m7s +sample-druid-mysql-metadata kubedb.com/mysql 8.0.35 6m31s +sample-druid-zk kubedb.com/zookeeper 3.7.2 6m34s +``` + +Here `sample-druid` is the `AppBinding` of Druid, while `sample-druid-mysql-metadata` and `sample-druid-zk` are the `AppBinding` of `MySQL` and `ZooKeeper` instances that `KubeDB` has deployed as the [External dependencies](https://druid.apache.org/docs/latest/design/architecture/#external-dependencies) of `Druid` + +Let's check the YAML of the `AppBinding` of druid, + +```bash +$ kubectl get appbindings -n demo sample-druid -o yaml +``` + +```yaml +apiVersion: appcatalog.appscode.com/v1alpha1 +kind: AppBinding +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: | + {"apiVersion":"kubedb.com/v1alpha2","kind":"Druid","metadata":{"annotations":{},"name":"sample-druid","namespace":"demo"},"spec":{"deepStorage":{"configSecret":{"name":"deep-storage-config"},"type":"s3"},"deletionPolicy":"WipeOut","topology":{"routers":{"replicas":1}},"version":"30.0.0"}} + creationTimestamp: "2024-09-19T13:02:20Z" + generation: 1 + labels: + app.kubernetes.io/component: database + app.kubernetes.io/instance: sample-druid + app.kubernetes.io/managed-by: kubedb.com + app.kubernetes.io/name: druids.kubedb.com + name: sample-druid + namespace: demo + ownerReferences: + - apiVersion: kubedb.com/v1alpha2 + blockOwnerDeletion: true + controller: true + kind: Druid + name: sample-druid + uid: cdbc2414-0dd1-4573-9532-e96b9094a443 + resourceVersion: "1610820" + uid: 8430d22d-e715-454a-8a83-e30e40cbeb14 +spec: + appRef: + apiGroup: kubedb.com + kind: Druid + name: sample-druid + namespace: demo + clientConfig: + service: + name: sample-druid-pods + port: 8888 + scheme: http + url: http://sample-druid-coordinators-0.sample-druid-pods.demo.svc.cluster.local:8081,http://sample-druid-overlords-0.sample-druid-pods.demo.svc.cluster.local:8090,http://sample-druid-middlemanagers-0.sample-druid-pods.demo.svc.cluster.local:8091,http://sample-druid-historicals-0.sample-druid-pods.demo.svc.cluster.local:8083,http://sample-druid-brokers-0.sample-druid-pods.demo.svc.cluster.local:8082,http://sample-druid-routers-0.sample-druid-pods.demo.svc.cluster.local:8888 + secret: + name: sample-druid-admin-cred + type: kubedb.com/druid + version: 30.0.0 +``` + +KubeStash uses the `AppBinding` CR to connect with the target database. It requires the following two fields to set in AppBinding's `.spec` section. + +- `.spec.clientConfig.service.name` specifies the name of the Service that connects to the database. +- `.spec.secret` specifies the name of the Secret that holds necessary credentials to access the database. +- `spec.type` specifies the types of the app that this AppBinding is pointing to. KubeDB generated AppBinding follows the following format: `/`. + +**Insert Sample Data:** + +We can access the [web console](https://druid.apache.org/docs/latest/operations/web-console) of Druid database from any browser by port-forwarding the routers. Let’s port-forward the port `8888` to local machine: +```bash +kubectl port-forward -n demo svc/sample-druid-routers 8888 +Forwarding from 127.0.0.1:8888 -> 8888 +Forwarding from [::1]:8888 -> 8888 +``` + +Now hit the `http://localhost:8888` from any browser, and you will be prompted to provide the credential of the druid database. By following the steps discussed below, you can get the credential generated by the KubeDB operator for your Druid database. + +**Connection information:** + +- Username: + + ```bash + $ kubectl get secret -n demo sample-druid-admin-cred -o jsonpath='{.data.username}' | base64 -d + admin + ``` + +- Password: + + ```bash + $ kubectl get secret -n demo sample-druid-admin-cred -o jsonpath='{.data.password}' | base64 -d + DqG5E63NtklAkxqC + ``` + +After providing the credentials correctly, you should be able to access the web console like shown below. + +

+  lifecycle +

+ +Now select the `Load Data` option and then select `Batch - classic` from the drop-down menu. +

+  lifecycle +

+ +Select `Example data` and click `Load example` to insert the example `Wikipedia Edits` datasource. + +

+  lifecycle +

+ +After clicking `Next` multiple times, click `Submit` + +

+  lifecycle +

+ +Within a minute status of the ingestion task should become `SUCCESS` +

+  lifecycle +

+ +Now, we are ready to backup the database. + +### Prepare Backend + +We are going to store our backed up data into a GCS bucket. We have to create a Secret with necessary credentials and a `BackupStorage` CR to use this backend. If you want to use a different backend, please read the respective backend configuration doc from [here](https://kubestash.com/docs/latest/guides/backends/overview/). + +**Create Secret:** + +Let's create a secret called `gcs-secret` with access credentials to our desired GCS bucket, + +```bash +$ echo -n '' > GOOGLE_PROJECT_ID +$ cat /path/to/downloaded-sa-key.json > GOOGLE_SERVICE_ACCOUNT_JSON_KEY +$ kubectl create secret generic -n demo gcs-secret \ + --from-file=./GOOGLE_PROJECT_ID \ + --from-file=./GOOGLE_SERVICE_ACCOUNT_JSON_KEY +secret/gcs-secret created +``` + +**Create BackupStorage:** + +Now, create a `BackupStorage` using this secret. Below is the YAML of `BackupStorage` CR we are going to create, + +```yaml +apiVersion: storage.kubestash.com/v1alpha1 +kind: BackupStorage +metadata: + name: gcs-storage + namespace: demo +spec: + storage: + provider: gcs + gcs: + bucket: kubestash-qa + prefix: druid + secretName: gcs-secret + usagePolicy: + allowedNamespaces: + from: All + default: true + deletionPolicy: Delete +``` + +Let's create the BackupStorage we have shown above, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/application-level/examples/backupstorage.yaml +backupstorage.storage.kubestash.com/gcs-storage created +``` + +Now, we are ready to backup our database to our desired backend. + +**Create RetentionPolicy:** + +Now, let's create a `RetentionPolicy` to specify how the old Snapshots should be cleaned up. + +Below is the YAML of the `RetentionPolicy` object that we are going to create, + +```yaml +apiVersion: storage.kubestash.com/v1alpha1 +kind: RetentionPolicy +metadata: + name: demo-retention + namespace: demo +spec: + default: true + failedSnapshots: + last: 2 + maxRetentionPeriod: 2mo + successfulSnapshots: + last: 5 + usagePolicy: + allowedNamespaces: + from: All +``` + +Let’s create the above `RetentionPolicy`, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/application-level/examples/retentionpolicy.yaml +retentionpolicy.storage.kubestash.com/demo-retention created +``` + +### Backup + +We have to create a `BackupConfiguration` targeting respective `sample-druid` Druid database. Then, KubeStash will create a `CronJob` for each session to take periodic backup of that database. + +At first, we need to create a secret with a Restic password for backup data encryption. + +**Create Secret:** + +Let's create a secret called `encrypt-secret` with the Restic password, + +```bash +$ echo -n 'changeit' > RESTIC_PASSWORD +$ kubectl create secret generic -n demo encrypt-secret \ + --from-file=./RESTIC_PASSWORD \ +secret "encrypt-secret" created +``` + +**Create BackupConfiguration:** + +Below is the YAML for `BackupConfiguration` CR to take application-level backup of the `sample-druid` database that we have deployed earlier, + +```yaml +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupConfiguration +metadata: + name: sample-druid-backup + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: sample-druid + backends: + - name: gcs-backend + storageRef: + namespace: demo + name: gcs-storage + retentionPolicy: + name: demo-retention + namespace: demo + sessions: + - name: frequent-backup + scheduler: + schedule: "*/5 * * * *" + jobTemplate: + backoffLimit: 1 + repositories: + - name: gcs-druid-repo + backend: gcs-backend + directory: /druid + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: manifest-backup + - name: mysql-metadata-storage-backup +``` + +- `.spec.sessions[*].schedule` specifies that we want to backup at `5 minutes` interval. +- `.spec.target` refers to the targeted `sample-druid` Druid database that we created earlier. +- `.spec.sessions[*].addon.tasks[*].name[*]` specifies that both the `manifest-backup` and `mysql-metadata-storage-backup` tasks will be executed. + +> Note: To create `BackupConfiguration` for druid with `PostgreSQL` as metadata storage update the `spec.sessions[*].addon.tasks.name` from `mysql-metadata-storage-backup` to `postgres-metadata-storage-restore` +> Note: When we backup a `Druid`, KubeStash operator will also take backup of the dependency of the `MySQL` and `ZooKeeper` cluster as well. +> Note: When we backup a `Druid` where `spec.metadatastorage.externallyManaged` is false then KubeStash operator will also take backup of + +Let's create the `BackupConfiguration` CR that we have shown above, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/application-level/examples/backupconfiguration.yaml +backupconfiguration.core.kubestash.com/sample-druid-backup created +``` + +**Verify Backup Setup Successful** + +If everything goes well, the phase of the `BackupConfiguration` should be `Ready`. The `Ready` phase indicates that the backup setup is successful. Let's verify the `Phase` of the BackupConfiguration, + +```bash +$ kubectl get backupconfiguration -n demo +NAME PHASE PAUSED AGE +sample-druid-backup Ready 2m50s +``` + +Additionally, we can verify that the `Repository` specified in the `BackupConfiguration` has been created using the following command, + +```bash +$ kubectl get repo -n demo +NAME INTEGRITY SNAPSHOT-COUNT SIZE PHASE LAST-SUCCESSFUL-BACKUP AGE +gcs-druid-repo 0 0 B Ready 3m +``` + +KubeStash keeps the backup for `Repository` YAMLs. If we navigate to the GCS bucket, we will see the `Repository` YAML stored in the `demo/druid` directory. + +**Verify CronJob:** + +It will also create a `CronJob` with the schedule specified in `spec.sessions[*].scheduler.schedule` field of `BackupConfiguration` CR. + +Verify that the `CronJob` has been created using the following command, + +```bash +$ kubectl get cronjob -n demo +NAME SCHEDULE SUSPEND ACTIVE LAST SCHEDULE AGE +trigger-sample-druid-backup-frequent-backup */5 * * * * 0 2m45s 3m25s +``` + +**Verify BackupSession:** + +KubeStash triggers an instant backup as soon as the `BackupConfiguration` is ready. After that, backups are scheduled according to the specified schedule. + +Run the following command to watch `BackupSession` CR, + +```bash +$ kubectl get backupsession -n demo -w + +NAME INVOKER-TYPE INVOKER-NAME PHASE DURATION AGE +sample-druid-backup-frequent-backup-1724065200 BackupConfiguration sample-druid-backup Succeeded 7m22s +``` + +We can see from the above output that the backup session has succeeded. Now, we are going to verify whether the backed up data has been stored in the backend. + +**Verify Backup:** + +Once a backup is complete, KubeStash will update the respective `Repository` CR to reflect the backup. Check that the repository `sample-druid-backup` has been updated by the following command, + +```bash +$ kubectl get repository -n demo gcs-druid-repo +NAME INTEGRITY SNAPSHOT-COUNT SIZE PHASE LAST-SUCCESSFUL-BACKUP AGE +gcs-druid-repo true 4 664.979 KiB Ready 2m55s 4h56m +``` + +At this moment we have one `Snapshot`. Run the following command to check the respective `Snapshot` which represents the state of a backup run for an application. + +```bash +$ kubectl get snapshots -n demo -l=kubestash.com/repo-name=gcs-druid-repo +NAME REPOSITORY SESSION SNAPSHOT-TIME DELETION-POLICY PHASE AGE +gcs-druid-repo-sample-druid-backup-frequent-backup-1726830540 gcs-druid-repo frequent-backup 2024-09-20T11:09:00Z Delete Succeeded 3m13s +``` + +> Note: KubeStash creates a `Snapshot` with the following labels: +> - `kubestash.com/app-ref-kind: ` +> - `kubestash.com/app-ref-name: ` +> - `kubestash.com/app-ref-namespace: ` +> - `kubestash.com/repo-name: ` +> +> These labels can be used to watch only the `Snapshot`s related to our target Database or `Repository`. + +If we check the YAML of the `Snapshot`, we can find the information about the backed up components of the Database. + +```bash +$ kubectl get snapshots -n demo gcs-druid-repo-sample-druid-backup-frequent-backup-1725359100 -oyaml +``` + +```yaml +apiVersion: storage.kubestash.com/v1alpha1 +kind: Snapshot +metadata: + annotations: + kubedb.com/db-version: 30.0.0 + creationTimestamp: "2024-09-20T11:09:00Z" + finalizers: + - kubestash.com/cleanup + generation: 1 + labels: + kubestash.com/app-ref-kind: Druid + kubestash.com/app-ref-name: sample-druid + kubestash.com/app-ref-namespace: demo + kubestash.com/repo-name: gcs-druid-repo + name: gcs-druid-repo-sample-druid-backup-frequent-backup-1726830540 + namespace: demo + ownerReferences: + - apiVersion: storage.kubestash.com/v1alpha1 + blockOwnerDeletion: true + controller: true + kind: Repository + name: gcs-druid-repo + uid: d894aad3-ac0d-4c8f-b165-9f9f1085ef3a + resourceVersion: "1720138" + uid: 348fe907-9207-4a71-953c-6cafa80ba3f7 +spec: + appRef: + apiGroup: kubedb.com + kind: Druid + name: sample-druid + namespace: demo + backupSession: sample-druid-backup-frequent-backup-1726830540 + deletionPolicy: Delete + repository: gcs-druid-repo + session: frequent-backup + snapshotID: 01J87HXY4439P70MKGWS8RZM7E + type: FullBackup + version: v1 +status: + components: + dump: + driver: Restic + duration: 10.312603282s + integrity: true + path: repository/v1/frequent-backup/dump + phase: Succeeded + resticStats: + - hostPath: dumpfile.sql + id: 647a7123a66423a81fa21ac77128e46587ddae3e9c9426537a30ad1c9a8e1843 + size: 3.807 MiB + uploaded: 3.807 MiB + size: 652.853 KiB + manifest: + driver: Restic + duration: 10.457007184s + integrity: true + path: repository/v1/frequent-backup/manifest + phase: Succeeded + resticStats: + - hostPath: /kubestash-tmp/manifest + id: 069ad1c6dae59fd086aa9771289fc4dad6d076afbc11180e3b1cd8083cd01691 + size: 13.599 KiB + uploaded: 4.268 KiB + size: 12.127 KiB + conditions: + - lastTransitionTime: "2024-09-20T11:09:00Z" + message: Recent snapshot list updated successfully + reason: SuccessfullyUpdatedRecentSnapshotList + status: "True" + type: RecentSnapshotListUpdated + - lastTransitionTime: "2024-09-20T11:10:07Z" + message: Metadata uploaded to backend successfully + reason: SuccessfullyUploadedSnapshotMetadata + status: "True" + type: SnapshotMetadataUploaded + integrity: true + phase: Succeeded + size: 664.979 KiB + snapshotTime: "2024-09-20T11:09:00Z" + totalComponents: 2 +``` + +> KubeStash uses the `mysqldump`/`postgresdump` command to take backups of the metadata storage of the target Druid databases. Therefore, the component name for `logical backups` is set as `dump`. +> KubeStash set component name as `manifest` for the `manifest backup` of Druid databases. + +Now, if we navigate to the GCS bucket, we will see the backed up data stored in the `demo/druid/repository/v1/frequent-backup/dump` directory. KubeStash also keeps the backup for `Snapshot` YAMLs, which can be found in the `demo/dep/snapshots` directory. + +> Note: KubeStash stores all dumped data encrypted in the backup directory, meaning it remains unreadable until decrypted. + +## Restore + + +In this section, we are going to restore the entire database from the backup that we have taken in the previous section. + +#### Delete Druid: +First, lets delete the `sample-druid` that we have created and took backup using the command below: +```bash +kubectl delete -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/application-level/examples/sample-druid.yaml +druid.kubedb.com "sample-druid" deleted +``` + +For this tutorial, we will restore the database in a separate namespace called `dev`. +Now, create the namespace by running the following command: + +```bash +$ kubectl create ns dev +namespace/dev created +``` + +#### Create RestoreSession: + +We need to create a RestoreSession CR. + +Below, is the contents of YAML file of the `RestoreSession` CR that we are going to create to restore the entire database. + +```yaml +apiVersion: core.kubestash.com/v1alpha1 +kind: RestoreSession +metadata: + name: restore-sample-druid + namespace: demo +spec: + manifestOptions: + druid: + restoreNamespace: dev + db: true + dataSource: + repository: gcs-druid-repo + snapshot: latest + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-restore + - name: manifest-restore +``` + +Here, + +- `.spec.manifestOptions.druid.db` specifies whether to restore the DB manifest or not. +- `.spec.dataSource.repository` specifies the Repository object that holds the backed up data. +- `.spec.dataSource.snapshot` specifies to restore from latest `Snapshot`. +- `.spec.addon.tasks[*]` specifies that both the `manifest-restore` and `logical-backup-restore` tasks. + +> Note: When we restore a `Druid` with `spec.metadataStorage.externallyManaged` set to `false` (which is `false` by default), then KubeStash operator will also restore the metadataStorage automatically. +> Note: Similarly, if `spec.zooKeeper.externallyManaged` is `false` (which is also `false` by default) then KubeStash operator will also restore the zookeeper instance automatically. +> Note: For externally managed metadata storage and zookeeper however, user needs to specify it in `spec.manifestOptions.mySQL`/`spec.manifestOptions.postgres`/`spec.manifestOptions.zooKeeper` to restore those. + +Let's create the RestoreSession CRD object we have shown above, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/application-level/examples/restoresession.yaml +restoresession.core.kubestash.com/restore-sample-druid created +``` + +Once, you have created the `RestoreSession` object, KubeStash will create restore Job. Run the following command to watch the phase of the `RestoreSession` object, + +```bash +$ watch kubectl get restoresession -n demo +Every 2.0s: kubectl get restores... AppsCode-PC-03: Wed Aug 21 10:44:05 2024 + +NAME REPOSITORY FAILURE-POLICY PHASE DURATION AGE +sample-restore gcs-demo-repo Succeeded 3s 53s +``` +The `Succeeded` phase means that the restore process has been completed successfully. + +#### Verify Restored Druid Manifest: + +In this section, we will verify whether the desired `Druid` database manifest has been successfully applied to the cluster. + +```bash +$ kubectl get druids.kubedb.com -n dev +NAME VERSION STATUS AGE +restored-druid 30.0.0 Ready 39m +``` + +The output confirms that the `Druid` database has been successfully created with the same configuration as it had at the time of backup. + +Verify the dependencies have been restored: +```bash +$ kubectl get mysql,zk -n dev +NAME VERSION STATUS AGE +mysql.kubedb.com/restored-druid-mysql-metadata 8.0.35 Ready 2m52s + +NAME TYPE VERSION STATUS AGE +zookeeper.kubedb.com/restored-druid-zk kubedb.com/v1alpha2 3.7.2 Ready 2m42s +``` + +The output confirms that the `MySQL` and `ZooKeper` databases have been successfully created with the same configuration as it had at the time of backup. + +#### Verify Restored Data: + +In this section, we are going to verify whether the desired data has been restored successfully. We are going to connect to the database server and check whether the database and the table we created earlier in the original database are restored. + +At first, check if the database has gone into `Ready` state by the following command, + +```bash +$ kubectl get druid -n demo restored-druid +NAME VERSION STATUS AGE +restored-druid 30.0.0 Ready 34m +``` + +Now, let's verify if our datasource `wikipedia` exists or not. For that, first find out the database `Sevices` by the following command, + +Now access the [web console](https://druid.apache.org/docs/latest/operations/web-console) of Druid database from any browser by port-forwarding the routers. Let’s port-forward the port `8888` to local machine: +```bash +$ kubectl get svc -n demo --selector="app.kubernetes.io/instance=restored-druid" +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +restored-druid-brokers ClusterIP 10.128.74.54 8082/TCP 10m +restored-druid-coordinators ClusterIP 10.128.30.124 8081/TCP 10m +restored-druid-pods ClusterIP None 8081/TCP,8090/TCP,8083/TCP,8091/TCP,8082/TCP,8888/TCP 10m +restored-druid-routers ClusterIP 10.128.228.193 8888/TCP 10m +``` +```bash +kubectl port-forward -n demo svc/restored-druid-routers 8888 +Forwarding from 127.0.0.1:8888 -> 8888 +Forwarding from [::1]:8888 -> 8888 +``` + +Then hit the `http://localhost:8888` from any browser, and you will be prompted to provide the credential of the druid database. By following the steps discussed below, you can get the credential generated by the KubeDB operator for your Druid database. +**Connection information:** +- Username: + + ```bash + $ kubectl get secret -n demo restored-druid-admin-cred -o jsonpath='{.data.username}' | base64 -d + admin + ``` + +- Password: + + ```bash + $ kubectl get secret -n demo restored-druid-admin-cred -o jsonpath='{.data.password}' | base64 -d + DqG5E63NtklAkxqC + ``` +After providing the credentials correctly, you should be able to access the web console like shown below. Now if you go to the `Datasources` section, you will see that our ingested datasource `wikipedia` exists in the list. +

+  lifecycle +

+ +So, from the above screenshot, we can see that the `wikipedia` datasource we have ingested earlier in the original database and now, it is restored successfully. + +## Cleanup + +To cleanup the Kubernetes resources created by this tutorial, run: + +```bash +kubectl delete backupconfigurations.core.kubestash.com -n demo sample-druid-backup +kubectl delete backupstorage -n demo gcs-storage +kubectl delete secret -n demo gcs-secret +kubectl delete secret -n demo encrypt-secret +kubectl delete retentionpolicies.storage.kubestash.com -n demo demo-retention +kubectl delete restoresessions.core.kubestash.com -n demo restore-sample-druid +kubectl delete druid -n demo sample-druid +kubectl delete druid -n dev restored-druid +``` \ No newline at end of file diff --git a/docs/guides/druid/backup/auto-backup/examples/backupstorage.yaml b/docs/guides/druid/backup/auto-backup/examples/backupstorage.yaml new file mode 100644 index 000000000..f5478706b --- /dev/null +++ b/docs/guides/druid/backup/auto-backup/examples/backupstorage.yaml @@ -0,0 +1,17 @@ +apiVersion: storage.kubestash.com/v1alpha1 +kind: BackupStorage +metadata: + name: gcs-storage + namespace: demo +spec: + storage: + provider: gcs + gcs: + bucket: kubestash-qa + prefix: blueprint + secretName: gcs-secret + usagePolicy: + allowedNamespaces: + from: All + default: true + deletionPolicy: Delete \ No newline at end of file diff --git a/docs/guides/druid/backup/auto-backup/examples/customize-backupblueprint.yaml b/docs/guides/druid/backup/auto-backup/examples/customize-backupblueprint.yaml new file mode 100644 index 000000000..acd54b8a5 --- /dev/null +++ b/docs/guides/druid/backup/auto-backup/examples/customize-backupblueprint.yaml @@ -0,0 +1,41 @@ +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupBlueprint +metadata: + name: druid-customize-backup-blueprint + namespace: demo +spec: + usagePolicy: + allowedNamespaces: + from: All + backupConfigurationTemplate: + deletionPolicy: OnDelete + # ============== Blueprint for Backends of BackupConfiguration ================= + backends: + - name: gcs-backend + storageRef: + namespace: demo + name: gcs-storage + retentionPolicy: + name: demo-retention + namespace: demo + # ============== Blueprint for Sessions of BackupConfiguration ================= + sessions: + - name: frequent-backup + sessionHistoryLimit: 3 + scheduler: + schedule: ${schedule} + jobTemplate: + backoffLimit: 1 + repositories: + - name: ${repoName} + backend: gcs-backend + directory: ${namespace}/${targetName} + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-backup + params: + databases: ${targetedDatabases} \ No newline at end of file diff --git a/docs/guides/druid/backup/auto-backup/examples/deep-storage-config.yaml b/docs/guides/druid/backup/auto-backup/examples/deep-storage-config.yaml new file mode 100644 index 000000000..84e309aee --- /dev/null +++ b/docs/guides/druid/backup/auto-backup/examples/deep-storage-config.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Secret +metadata: + name: deep-storage-config + namespace: demo +stringData: + druid.storage.type: "s3" + druid.storage.bucket: "druid" + druid.storage.baseKey: "druid/segments" + druid.s3.accessKey: "minio" + druid.s3.secretKey: "minio123" + druid.s3.protocol: "http" + druid.s3.enablePathStyleAccess: "true" + druid.s3.endpoint.signingRegion: "us-east-1" + druid.s3.endpoint.url: "http://myminio-hl.demo.svc.cluster.local:9000/" diff --git a/docs/guides/druid/backup/auto-backup/examples/default-backupblueprint.yaml b/docs/guides/druid/backup/auto-backup/examples/default-backupblueprint.yaml new file mode 100644 index 000000000..595398b67 --- /dev/null +++ b/docs/guides/druid/backup/auto-backup/examples/default-backupblueprint.yaml @@ -0,0 +1,37 @@ +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupBlueprint +metadata: + name: druid-default-backup-blueprint + namespace: demo +spec: + usagePolicy: + allowedNamespaces: + from: All + backupConfigurationTemplate: + deletionPolicy: OnDelete + backends: + - name: gcs-backend + storageRef: + namespace: demo + name: gcs-storage + retentionPolicy: + name: demo-retention + namespace: demo + sessions: + - name: frequent-backup + sessionHistoryLimit: 3 + scheduler: + schedule: "*/5 * * * *" + jobTemplate: + backoffLimit: 1 + repositories: + - name: default-blueprint + backend: gcs-backend + directory: /default-blueprint + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-backup \ No newline at end of file diff --git a/docs/guides/druid/backup/auto-backup/examples/retentionpolicy.yaml b/docs/guides/druid/backup/auto-backup/examples/retentionpolicy.yaml new file mode 100644 index 000000000..459156286 --- /dev/null +++ b/docs/guides/druid/backup/auto-backup/examples/retentionpolicy.yaml @@ -0,0 +1,15 @@ +apiVersion: storage.kubestash.com/v1alpha1 +kind: RetentionPolicy +metadata: + name: demo-retention + namespace: demo +spec: + default: true + failedSnapshots: + last: 2 + maxRetentionPeriod: 2mo + successfulSnapshots: + last: 5 + usagePolicy: + allowedNamespaces: + from: All \ No newline at end of file diff --git a/docs/guides/druid/backup/auto-backup/examples/sample-druid-2.yaml b/docs/guides/druid/backup/auto-backup/examples/sample-druid-2.yaml new file mode 100644 index 000000000..0dccaf9a5 --- /dev/null +++ b/docs/guides/druid/backup/auto-backup/examples/sample-druid-2.yaml @@ -0,0 +1,23 @@ +apiVersion: kubedb.com/v1alpha2 +kind: Druid +metadata: + name: sample-druid-2 + namespace: demo + annotations: + blueprint.kubestash.com/name: druid-customize-backup-blueprint + blueprint.kubestash.com/namespace: demo + variables.kubestash.com/schedule: "*/10 * * * *" + variables.kubestash.com/repoName: customize-blueprint + variables.kubestash.com/namespace: demo + variables.kubestash.com/targetName: sample-druid-2 + variables.kubestash.com/targetedDatabases: druid +spec: + version: 30.0.0 + deepStorage: + type: s3 + configSecret: + name: deep-storage-config + topology: + routers: + replicas: 1 + deletionPolicy: WipeOut \ No newline at end of file diff --git a/docs/guides/druid/backup/auto-backup/examples/sample-druid.yaml b/docs/guides/druid/backup/auto-backup/examples/sample-druid.yaml new file mode 100644 index 000000000..08131374e --- /dev/null +++ b/docs/guides/druid/backup/auto-backup/examples/sample-druid.yaml @@ -0,0 +1,18 @@ +apiVersion: kubedb.com/v1alpha2 +kind: Druid +metadata: + name: sample-druid + namespace: demo + annotations: + blueprint.kubestash.com/name: druid-default-backup-blueprint + blueprint.kubestash.com/namespace: demo +spec: + version: 30.0.0 + deepStorage: + type: s3 + configSecret: + name: deep-storage-config + topology: + routers: + replicas: 1 + deletionPolicy: WipeOut \ No newline at end of file diff --git a/docs/guides/druid/backup/auto-backup/index.md b/docs/guides/druid/backup/auto-backup/index.md new file mode 100644 index 000000000..a21e6f8c7 --- /dev/null +++ b/docs/guides/druid/backup/auto-backup/index.md @@ -0,0 +1,820 @@ +--- +title: Druid Auto-Backup | KubeStash +description: Backup Druid database using KubeStash +menu: + docs_{{ .version }}: + identifier: guides-druid-backup-auto-backup + name: Auto Backup + parent: guides-druid-backup + weight: 20 +menu_name: docs_{{ .version }} +section_menu_id: guides +--- + +# Backup Druid using KubeStash Auto-Backup + +KubeStash can automatically be configured to backup any `Druid` databases in your cluster. KubeStash enables cluster administrators to deploy backup `blueprints` ahead of time so database owners can easily backup any `Druid` database with a few annotations. + +In this tutorial, we are going to show how you can configure a backup blueprint for `Druid` databases in your cluster and backup them with a few annotations. + +## Before You Begin + +- At first, you need to have a Kubernetes cluster, and the `kubectl` command-line tool must be configured to communicate with your cluster. If you do not already have a cluster, you can create one by using `Minikube` or `Kind`. +- Install `KubeDB` in your cluster following the steps [here](/docs/setup/README.md). +- Install `KubeStash` in your cluster following the steps [here](https://kubestash.com/docs/latest/setup/install/kubestash). +- Install KubeStash `kubectl` plugin following the steps [here](https://kubestash.com/docs/latest/setup/install/kubectl-plugin/). +- If you are not familiar with how KubeStash backup and restore Druid databases, please check the following guide [here](/docs/guides/druid/backup/overview/index.md). + +You should be familiar with the following `KubeStash` concepts: + +- [BackupStorage](https://kubestash.com/docs/latest/concepts/crds/backupstorage/) +- [BackupConfiguration](https://kubestash.com/docs/latest/concepts/crds/backupconfiguration/) +- [BackupSession](https://kubestash.com/docs/latest/concepts/crds/backupsession/) +- [RestoreSession](https://kubestash.com/docs/latest/concepts/crds/restoresession/) +- [Addon](https://kubestash.com/docs/latest/concepts/crds/addon/) +- [Function](https://kubestash.com/docs/latest/concepts/crds/function/) +- [Task](https://kubestash.com/docs/latest/concepts/crds/addon/#task-specification) + +To keep everything isolated, we are going to use a separate namespace called `demo` throughout this tutorial. + +```bash +$ kubectl create ns demo +namespace/demo created +``` + +### Prepare Backend + +We are going to store our backed up data into a GCS bucket. We have to create a Secret with necessary credentials and a `BackupStorage` CR to use this backend. If you want to use a different backend, please read the respective backend configuration doc from [here](https://kubestash.com/docs/latest/guides/backends/overview/). + +**Create Secret:** + +Let's create a secret called `gcs-secret` with access credentials to our desired GCS bucket, + +```bash +$ echo -n '' > GOOGLE_PROJECT_ID +$ cat /path/to/downloaded-sa-key.json > GOOGLE_SERVICE_ACCOUNT_JSON_KEY +$ kubectl create secret generic -n demo gcs-secret \ + --from-file=./GOOGLE_PROJECT_ID \ + --from-file=./GOOGLE_SERVICE_ACCOUNT_JSON_KEY +secret/gcs-secret created +``` + +**Create BackupStorage:** + +Now, create a `BackupStorage` using this secret. Below is the YAML of `BackupStorage` CR we are going to create, + +```yaml +apiVersion: storage.kubestash.com/v1alpha1 +kind: BackupStorage +metadata: + name: gcs-storage + namespace: demo +spec: + storage: + provider: gcs + gcs: + bucket: kubestash-qa + prefix: blueprint + secretName: gcs-secret + usagePolicy: + allowedNamespaces: + from: All + default: true + deletionPolicy: Delete +``` + +Let's create the BackupStorage we have shown above, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/auto-backup/examples/backupstorage.yaml +backupstorage.storage.kubestash.com/gcs-storage created +``` + +**Create RetentionPolicy:** + +Now, let's create a `RetentionPolicy` to specify how the old Snapshots should be cleaned up. + +Below is the YAML of the `RetentionPolicy` object that we are going to create, + +```yaml +apiVersion: storage.kubestash.com/v1alpha1 +kind: RetentionPolicy +metadata: + name: demo-retention + namespace: demo +spec: + default: true + failedSnapshots: + last: 2 + maxRetentionPeriod: 2mo + successfulSnapshots: + last: 5 + usagePolicy: + allowedNamespaces: + from: All +``` + +Let’s create the above `RetentionPolicy`, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/auto-backup/examples/retentionpolicy.yaml +retentionpolicy.storage.kubestash.com/demo-retention created +``` + +**Create Secret:** + +We also need to create a secret with a `Restic` password for backup data encryption. + +Let's create a secret called `encrypt-secret` with the Restic password, + +```bash +$ echo -n 'changeit' > RESTIC_PASSWORD +$ kubectl create secret generic -n demo encrypt-secret \ + --from-file=./RESTIC_PASSWORD \ +secret "encrypt-secret" created +``` + +## Auto-backup with default configurations + +In this section, we are going to backup a `Druid` database of `demo` namespace. We are going to use the default configurations which will be specified in the `Backup Blueprint` CR. + +**Prepare Backup Blueprint** + +A `BackupBlueprint` allows you to specify a template for the `Repository`,`Session` or `Variables` of `BackupConfiguration` in a Kubernetes native way. + +Now, we have to create a `BackupBlueprint` CR with a blueprint for `BackupConfiguration` object. + +```yaml +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupBlueprint +metadata: + name: druid-default-backup-blueprint + namespace: demo +spec: + usagePolicy: + allowedNamespaces: + from: All + backupConfigurationTemplate: + deletionPolicy: OnDelete + backends: + - name: gcs-backend + storageRef: + namespace: demo + name: gcs-storage + retentionPolicy: + name: demo-retention + namespace: demo + sessions: + - name: frequent-backup + sessionHistoryLimit: 3 + scheduler: + schedule: "*/5 * * * *" + jobTemplate: + backoffLimit: 1 + repositories: + - name: default-blueprint + backend: gcs-backend + directory: /default-blueprint + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-backup +``` + +Here, + +- `.spec.backupConfigurationTemplate.backends[*].storageRef` refers our earlier created `gcs-storage` backupStorage. +- `.spec.backupConfigurationTemplate.sessions[*].schedule` specifies that we want to backup the database at `5 minutes` interval. + +Let's create the `BackupBlueprint` we have shown above, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/auto-backup/examples/default-backupblueprint.yaml +backupblueprint.core.kubestash.com/druid-default-backup-blueprint created +``` + +Now, we are ready to backup our `Druid` databases using few annotations. + +### Deploy Sample Druid Database + +## Get External Dependencies Ready + +### Deep Storage + +One of the external dependency of Druid is deep storage where the segments are stored. It is a storage mechanism that Apache Druid does not provide. **Amazon S3**, **Google Cloud Storage**, or **Azure Blob Storage**, **S3-compatible storage** (like **Minio**), or **HDFS** are generally convenient options for deep storage. + +In this tutorial, we will run a `minio-server` as deep storage in our local `kind` cluster using `minio-operator` and create a bucket named `druid` in it, which the deployed druid database will use. + +```bash + +$ helm repo add minio https://operator.min.io/ +$ helm repo update minio +$ helm upgrade --install --namespace "minio-operator" --create-namespace "minio-operator" minio/operator --set operator.replicaCount=1 + +$ helm upgrade --install --namespace "demo" --create-namespace druid-minio minio/tenant \ +--set tenant.pools[0].servers=1 \ +--set tenant.pools[0].volumesPerServer=1 \ +--set tenant.pools[0].size=1Gi \ +--set tenant.certificate.requestAutoCert=false \ +--set tenant.buckets[0].name="druid" \ +--set tenant.pools[0].name="default" + +``` + +Now we need to create a `Secret` named `deep-storage-config`. It contains the necessary connection information using which the druid database will connect to the deep storage. + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: deep-storage-config + namespace: demo +stringData: + druid.storage.type: "s3" + druid.storage.bucket: "druid" + druid.storage.baseKey: "druid/segments" + druid.s3.accessKey: "minio" + druid.s3.secretKey: "minio123" + druid.s3.protocol: "http" + druid.s3.enablePathStyleAccess: "true" + druid.s3.endpoint.signingRegion: "us-east-1" + druid.s3.endpoint.url: "http://myminio-hl.demo.svc.cluster.local:9000/" +``` + +Let’s create the `deep-storage-config` Secret shown above: + +```bash +$ kubectl create -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/auto-backup/examples/deep-storage-config.yaml +secret/deep-storage-config created +``` + +Let's deploy a sample `Druid` database and insert some data into it. + +**Create Druid CR:** + +Below is the YAML of a sample `Druid` CRD that we are going to create for this tutorial: + +```yaml +apiVersion: kubedb.com/v1alpha2 +kind: Druid +metadata: + name: sample-druid + namespace: demo + annotations: + blueprint.kubestash.com/name: druid-default-backup-blueprint + blueprint.kubestash.com/namespace: demo +spec: + version: 30.0.0 + deepStorage: + type: s3 + configSecret: + name: deep-storage-config + topology: + routers: + replicas: 1 + deletionPolicy: WipeOut +``` +Here, + +- `.spec.annotations.blueprint.kubestash.com/name: druid-default-backup-blueprint` specifies the name of the `BackupBlueprint` that will use in backup. +- `.spec.annotations.blueprint.kubestash.com/namespace: demo` specifies the name of the `namespace` where the `BackupBlueprint` resides. + +Create the above `Druid` CR, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/auto-backup/examples/sample-druid.yaml +druid.kubedb.com/sample-druid created +``` + +**Verify BackupConfiguration** + +If everything goes well, KubeStash should create a `BackupConfiguration` for our Druid in demo namespace and the phase of that `BackupConfiguration` should be `Ready`. Verify the `BackupConfiguration` object by the following command, + +```bash +$ kubectl get backupconfiguration -n demo +NAME PHASE PAUSED AGE +appbinding-sample-druid Ready 8m48s +``` + +Now, let’s check the YAML of the `BackupConfiguration`. + +```bash +$ kubectl get backupconfiguration -n demo appbinding-sample-druid -o yaml +``` + +```yaml +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupConfiguration +metadata: + creationTimestamp: "2024-09-19T10:30:46Z" + finalizers: + - kubestash.com/cleanup + generation: 1 + labels: + app.kubernetes.io/managed-by: kubestash.com + kubestash.com/invoker-name: druid-default-backup-blueprint + kubestash.com/invoker-namespace: demo + name: appbinding-sample-druid + namespace: demo + resourceVersion: "1594861" + uid: 8c5a21cd-780b-4b67-b95a-d6338d038dd4 +spec: + backends: + - name: gcs-backend + retentionPolicy: + name: demo-retention + namespace: demo + storageRef: + name: gcs-storage + namespace: demo + sessions: + - addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-backup + name: frequent-backup + repositories: + - backend: gcs-backend + directory: /default-blueprint + encryptionSecret: + name: encrypt-secret + namespace: demo + name: default-blueprint + scheduler: + jobTemplate: + backoffLimit: 1 + template: + controller: {} + metadata: {} + spec: + resources: {} + schedule: '*/5 * * * *' + sessionHistoryLimit: 3 + target: + apiGroup: kubedb.com + kind: Druid + name: sample-druid + namespace: demo +status: + backends: + - name: gcs-backend + ready: true + retentionPolicy: + found: true + ref: + name: demo-retention + namespace: demo + storage: + phase: Ready + ref: + name: gcs-storage + namespace: demo +``` + +Notice the `spec.backends`, `spec.sessions` and `spec.target` sections, KubeStash automatically resolved those info from the `BackupBluePrint` and created above `BackupConfiguration`. + +**Verify BackupSession:** + +KubeStash triggers an instant backup as soon as the `BackupConfiguration` is ready. After that, backups are scheduled according to the specified schedule. + +```bash +$ kubectl get backupsession -n demo -w + +NAME INVOKER-TYPE INVOKER-NAME PHASE DURATION AGE +appbinding-sample-druid-frequent-backup-1726741846 BackupConfiguration appbinding-sample-druid Succeeded 28s 10m +appbinding-sample-druid-frequent-backup-1726742101 BackupConfiguration appbinding-sample-druid Succeeded 35s 6m37s +appbinding-sample-druid-frequent-backup-1726742400 BackupConfiguration appbinding-sample-druid Succeeded 29s 98s +``` + +We can see from the above output that the backup session has succeeded. Now, we are going to verify whether the backed up data has been stored in the backend. + +**Verify Backup:** + +Once a backup is complete, KubeStash will update the respective `Repository` CR to reflect the backup. Check that the repository `default-blueprint` has been updated by the following command, + +```bash +$ kubectl get repository -n demo default-blueprint +NAME INTEGRITY SNAPSHOT-COUNT SIZE PHASE LAST-SUCCESSFUL-BACKUP AGE +default-blueprint true 3 1.757 MiB Ready 2m23s 11m +``` + +At this moment we have one `Snapshot`. Run the following command to check the respective `Snapshot` which represents the state of a backup run for an application. + +```bash +$ kubectl get snapshots -n demo -l=kubestash.com/repo-name=default-blueprint +NAME REPOSITORY SESSION SNAPSHOT-TIME DELETION-POLICY PHASE AGE +default-blueprint-appbinding-samruid-frequent-backup-1726741846 default-blueprint frequent-backup 2024-09-19T10:30:56Z Delete Succeeded 11m +default-blueprint-appbinding-samruid-frequent-backup-1726742101 default-blueprint frequent-backup 2024-09-19T10:35:01Z Delete Succeeded 7m49s +default-blueprint-appbinding-samruid-frequent-backup-1726742400 default-blueprint frequent-backup 2024-09-19T10:40:00Z Delete Succeeded 2m50s +``` + +> Note: KubeStash creates a `Snapshot` with the following labels: +> - `kubedb.com/db-version: ` +> - `kubestash.com/app-ref-kind: ` +> - `kubestash.com/app-ref-name: ` +> - `kubestash.com/app-ref-namespace: ` +> - `kubestash.com/repo-name: ` +> +> These labels can be used to watch only the `Snapshot`s related to our target Database or `Repository`. + +If we check the YAML of the `Snapshot`, we can find the information about the backed up components of the Database. + +```bash +$ kubectl get snapshots -n demo default-blueprint-appbinding-samruid-frequent-backup-1726741846 -oyaml +``` + +```yaml +apiVersion: storage.kubestash.com/v1alpha1 +kind: Snapshot +metadata: + creationTimestamp: "2024-09-19T10:30:56Z" + finalizers: + - kubestash.com/cleanup + generation: 1 + labels: + kubedb.com/db-version: 30.0.0 + kubestash.com/app-ref-kind: Druid + kubestash.com/app-ref-name: sample-druid + kubestash.com/app-ref-namespace: demo + kubestash.com/repo-name: default-blueprint + name: default-blueprint-appbinding-samruid-frequent-backup-1726741846 + namespace: demo + ownerReferences: + - apiVersion: storage.kubestash.com/v1alpha1 + blockOwnerDeletion: true + controller: true + kind: Repository + name: default-blueprint + uid: 7ced6866-349b-48c0-821d-d1ecfee1c80e + resourceVersion: "1594964" + uid: 8ec9bb0c-590c-47b8-944b-22af92d62470 +spec: + appRef: + apiGroup: kubedb.com + kind: Druid + name: sample-druid + namespace: demo + backupSession: appbinding-sample-druid-frequent-backup-1726741846 + deletionPolicy: Delete + repository: default-blueprint + session: frequent-backup + snapshotID: 01J84XBGGY0JKG7JKTRCGV3HYM + type: FullBackup + version: v1 +status: + components: + dump: + driver: Restic + duration: 9.614587405s + integrity: true + path: repository/v1/frequent-backup/dump + phase: Succeeded + resticStats: + - hostPath: dumpfile.sql + id: 8f2b5f5d8a7a18304917e2d4c5a3636f8927085b15c652c35d5fca4a9988515d + size: 3.750 MiB + uploaded: 3.751 MiB + size: 674.017 KiB +``` + +> KubeStash uses the `mysqldump`/`postgresdump` command to take backups of metadata storage of target Druid databases. Therefore, the component name for `logical backups` is set as `dump`. + +Now, if we navigate to the GCS bucket, we will see the backed up data stored in the `/blueprint/default-blueprint/repository/v1/frequent-backup/dump` directory. KubeStash also keeps the backup for `Snapshot` YAMLs, which can be found in the `blueprint/default-blueprintrepository/snapshots` directory. + +> Note: KubeStash stores all dumped data encrypted in the backup directory, meaning it remains unreadable until decrypted. + +## Auto-backup with custom configurations + +In this section, we are going to backup a `Druid` database of `demo` namespace. We are going to use the custom configurations which will be specified in the `BackupBlueprint` CR. + +**Prepare Backup Blueprint** + +A `BackupBlueprint` allows you to specify a template for the `Repository`,`Session` or `Variables` of `BackupConfiguration` in a Kubernetes native way. + +Now, we have to create a `BackupBlueprint` CR with a blueprint for `BackupConfiguration` object. + +```yaml +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupBlueprint +metadata: + name: druid-customize-backup-blueprint + namespace: demo +spec: + usagePolicy: + allowedNamespaces: + from: All + backupConfigurationTemplate: + deletionPolicy: OnDelete + # ============== Blueprint for Backends of BackupConfiguration ================= + backends: + - name: gcs-backend + storageRef: + namespace: demo + name: gcs-storage + retentionPolicy: + name: demo-retention + namespace: demo + # ============== Blueprint for Sessions of BackupConfiguration ================= + sessions: + - name: frequent-backup + sessionHistoryLimit: 3 + scheduler: + schedule: ${schedule} + jobTemplate: + backoffLimit: 1 + repositories: + - name: ${repoName} + backend: gcs-backend + directory: ${namespace}/${targetName} + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-backup + params: + databases: ${targetedDatabases} +``` + +Note that we have used some variables (format: `${}`) in different fields. KubeStash will substitute these variables with values from the respective target’s annotations. You’re free to use any variables you like. + +Here, + +- `.spec.backupConfigurationTemplate.backends[*].storageRef` refers our earlier created `gcs-storage` backupStorage. +- `.spec.backupConfigurationTemplate.sessions[*]`: + - `.schedule` defines `${schedule}` variable, which determines the time interval for the backup. + - `.repositories[*].name` defines the `${repoName}` variable, which specifies the name of the backup `Repository`. + - `.repositories[*].directory` defines two variables, `${namespace}` and `${targetName}`, which are used to determine the path where the backup will be stored. + - `.addon.tasks[*]databases` defines `${targetedDatabases}` variable, which identifies list of databases to backup. + +> Note: To create `BackupBlueprint` for druid with `PostgreSQL` as metadata storage just update `spec.sessions[*].addon.tasks.name` to `postgres-metadata-storage-restore` + +Let's create the `BackupBlueprint` we have shown above, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/auto-backup/examples/customize-backupblueprint.yaml +backupblueprint.core.kubestash.com/druid-customize-backup-blueprint created +``` + +Now, we are ready to backup our `Druid` databases using few annotations. You can check available auto-backup annotations for a databases from [here](https://kubestash.com/docs/latest/concepts/crds/backupblueprint/). + +**Create Database** + +Before proceeding to creating a new `Druid` database, let us clean up the resources of the previous step: +```bash +kubectl delete backupblueprints.core.kubestash.com -n demo druid-default-backup-blueprint +kubectl delete druid -n demo sample-druid +``` + +Now, we are going to create a new `Druid` CR in demo namespace. Below is the YAML of the Druid object that we are going to create, + +```yaml +apiVersion: kubedb.com/v1alpha2 +kind: Druid +metadata: + name: sample-druid-2 + namespace: demo + annotations: + blueprint.kubestash.com/name: druid-customize-backup-blueprint + blueprint.kubestash.com/namespace: demo + variables.kubestash.com/schedule: "*/10 * * * *" + variables.kubestash.com/repoName: customize-blueprint + variables.kubestash.com/namespace: demo + variables.kubestash.com/targetName: sample-druid-2 + variables.kubestash.com/targetedDatabases: druid +spec: + version: "8.2.0" + replicas: 1 + storageType: Durable + storage: + storageClassName: "standard" + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Mi + terminationPolicy: WipeOut +``` + +Notice the `metadata.annotations` field, where we have defined the annotations related to the automatic backup configuration. Specifically, we've set the `BackupBlueprint` name as `druid-customize-backup-blueprint` and the namespace as `demo`. We have also provided values for the blueprint template variables, such as the backup `schedule`, `repositoryName`, `namespace`, `targetName`, and `targetedDatabases`. These annotations will be used to create a `BackupConfiguration` for this `Druid` database. + +Let's create the `Druid` we have shown above, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/auto-backup/examples/sample-druid-2.yaml +druid.kubedb.com/sample-druid-2 created +``` + +**Verify BackupConfiguration** + +If everything goes well, KubeStash should create a `BackupConfiguration` for our Druid in demo namespace and the phase of that `BackupConfiguration` should be `Ready`. Verify the `BackupConfiguration` object by the following command, + +```bash +$ kubectl get backupconfiguration -n demo +NAME PHASE PAUSED AGE +appbinding-sample-druid-2 Ready 2m50m +``` + +Now, let’s check the YAML of the `BackupConfiguration`. + +```bash +$ kubectl get backupconfiguration -n demo appbinding-sample-druid-2 -o yaml +``` + +```yaml +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupConfiguration +metadata: + creationTimestamp: "2024-09-19T11:00:56Z" + finalizers: + - kubestash.com/cleanup + generation: 1 + labels: + app.kubernetes.io/managed-by: kubestash.com + kubestash.com/invoker-name: druid-customize-backup-blueprint + kubestash.com/invoker-namespace: demo + name: appbinding-sample-druid-2 + namespace: demo + resourceVersion: "1599083" + uid: 1c979902-33cd-4212-ae6d-ea4e4198bcaf +spec: + backends: + - name: gcs-backend + retentionPolicy: + name: demo-retention + namespace: demo + storageRef: + name: gcs-storage + namespace: demo + sessions: + - addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-backup + params: + databases: druid + name: frequent-backup + repositories: + - backend: gcs-backend + directory: demo/sample-druid-2 + encryptionSecret: + name: encrypt-secret + namespace: demo + name: customize-blueprint + scheduler: + jobTemplate: + backoffLimit: 1 + template: + controller: {} + metadata: {} + spec: + resources: {} + schedule: '*/10 * * * *' + sessionHistoryLimit: 3 + target: + apiGroup: kubedb.com + kind: Druid + name: sample-druid-2 + namespace: demo +status: + backends: + - name: gcs-backend + ready: true + retentionPolicy: + found: true + ref: + name: demo-retention + namespace: demo + storage: + phase: Ready + ref: + name: gcs-storage + namespace: demo +``` + +Notice the `spec.backends`, `spec.sessions` and `spec.target` sections, KubeStash automatically resolved those info from the `BackupBluePrint` and created above `BackupConfiguration`. + +**Verify BackupSession:** + +KubeStash triggers an instant backup as soon as the `BackupConfiguration` is ready. After that, backups are scheduled according to the specified schedule. + +```bash +$ kubectl get backupsession -n demo -w + +NAME INVOKER-TYPE INVOKER-NAME PHASE DURATION AGE +appbinding-sample-druid-2-frequent-backup-1726743656 BackupConfiguration appbinding-sample-druid-2 Succeeded 30s 2m32s +``` + +We can see from the above output that the backup session has succeeded. Now, we are going to verify whether the backed up data has been stored in the backend. + +**Verify Backup:** + +Once a backup is complete, KubeStash will update the respective `Repository` CR to reflect the backup. Check that the repository `customize-blueprint` has been updated by the following command, + +```bash +$ kubectl get repository -n demo customize-blueprint +NAME INTEGRITY SNAPSHOT-COUNT SIZE PHASE LAST-SUCCESSFUL-BACKUP AGE +customize-blueprint true 1 806 B Ready 8m27s 9m18s +``` + +At this moment we have one `Snapshot`. Run the following command to check the respective `Snapshot` which represents the state of a backup run for an application. + +```bash +$ kubectl get snapshots -n demo -l=kubestash.com/repo-name=customize-blueprint +NAME REPOSITORY SESSION SNAPSHOT-TIME DELETION-POLICY PHASE AGE +customize-blueprint-appbinding-sid-2-frequent-backup-1726743656 customize-blueprint frequent-backup 2024-09-19T11:01:06Z Delete Succeeded 2m56s +``` + +> Note: KubeStash creates a `Snapshot` with the following labels: +> - `kubedb.com/db-version: ` +> - `kubestash.com/app-ref-kind: ` +> - `kubestash.com/app-ref-name: ` +> - `kubestash.com/app-ref-namespace: ` +> - `kubestash.com/repo-name: ` +> +> These labels can be used to watch only the `Snapshot`s related to our target Database or `Repository`. + +If we check the YAML of the `Snapshot`, we can find the information about the backed up components of the Database. + +```bash +$ kubectl get snapshots -n demo customize-blueprint-appbinding-sid-2-frequent-backup-1726743656 -oyaml +``` + +```yaml +apiVersion: storage.kubestash.com/v1alpha1 +kind: Snapshot +metadata: + creationTimestamp: "2024-09-19T11:01:06Z" + finalizers: + - kubestash.com/cleanup + generation: 1 + labels: + kubedb.com/db-version: 30.0.0 + kubestash.com/app-ref-kind: Druid + kubestash.com/app-ref-name: sample-druid-2 + kubestash.com/app-ref-namespace: demo + kubestash.com/repo-name: customize-blueprint + name: customize-blueprint-appbinding-sid-2-frequent-backup-1726743656 + namespace: demo + ownerReferences: + - apiVersion: storage.kubestash.com/v1alpha1 + blockOwnerDeletion: true + controller: true + kind: Repository + name: customize-blueprint + uid: 5eaccae6-046c-4c6a-9b76-087d040f001a + resourceVersion: "1599190" + uid: 014c050d-0e91-43eb-b60a-36eefbd4b048 +spec: + appRef: + apiGroup: kubedb.com + kind: Druid + name: sample-druid-2 + namespace: demo + backupSession: appbinding-sample-druid-2-frequent-backup-1726743656 + deletionPolicy: Delete + repository: customize-blueprint + session: frequent-backup + snapshotID: 01J84Z2R6R64FH8E7QYNNZGC1S + type: FullBackup + version: v1 +status: + components: + dump: + driver: Restic + duration: 9.132887467s + integrity: true + path: repository/v1/frequent-backup/dump + phase: Succeeded + resticStats: + - hostPath: dumpfile.sql + id: a1061e74f1ad398a9fe85bcbae34f540f2437a97061fd26c5b3e6bde3b5b7642 + size: 10.859 KiB + uploaded: 11.152 KiB + size: 2.127 KiB +``` + +> KubeStash uses the `mysqldump`/`postgresdump` command to take backups of the metadata storage of the target Druid databases. Therefore, the component name for `logical backups` is set as `dump`. + +Now, if we navigate to the GCS bucket, we will see the backed up data stored in the `/blueprint/custom-blueprint/repository/v1/frequent-backup/dump` directory. KubeStash also keeps the backup for `Snapshot` YAMLs, which can be found in the `blueprint/custom-blueprint/snapshots` directory. + +> Note: KubeStash stores all dumped data encrypted in the backup directory, meaning it remains unreadable until decrypted. + +## Cleanup + +To cleanup the resources crated by this tutorial, run the following commands, + +```bash +kubectl delete backupblueprints.core.kubestash.com -n demo druid-default-backup-blueprint +kubectl delete backupblueprints.core.kubestash.com -n demo druid-customize-backup-blueprint +kubectl delete backupstorage -n demo gcs-storage +kubectl delete secret -n demo gcs-secret +kubectl delete secret -n demo encrypt-secret +kubectl delete retentionpolicies.storage.kubestash.com -n demo demo-retention +kubectl delete druid -n demo sample-druid +kubectl delete druid -n demo sample-druid-2 +``` \ No newline at end of file diff --git a/docs/guides/druid/backup/customization/examples/backup/multiple-backends.yaml b/docs/guides/druid/backup/customization/examples/backup/multiple-backends.yaml new file mode 100644 index 000000000..9225337cd --- /dev/null +++ b/docs/guides/druid/backup/customization/examples/backup/multiple-backends.yaml @@ -0,0 +1,49 @@ +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupConfiguration +metadata: + name: sample-druid-backup + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: sample-druid + backends: + - name: gcs-backend + storageRef: + namespace: demo + name: gcs-storage + retentionPolicy: + name: demo-retention + namespace: demo + - name: s3-backend + storageRef: + namespace: demo + name: s3-storage + retentionPolicy: + name: demo-retention + namespace: demo + sessions: + - name: frequent-backup + scheduler: + schedule: "*/5 * * * *" + jobTemplate: + backoffLimit: 1 + repositories: + - name: gcs-druid-repo + backend: gcs-backend + directory: /druid + encryptionSecret: + name: encrypt-secret + namespace: demo + - name: s3-druid-repo + backend: s3-backend + directory: /druid-copy + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-backup \ No newline at end of file diff --git a/docs/guides/druid/backup/customization/examples/backup/resources-limit.yaml b/docs/guides/druid/backup/customization/examples/backup/resources-limit.yaml new file mode 100644 index 000000000..004177590 --- /dev/null +++ b/docs/guides/druid/backup/customization/examples/backup/resources-limit.yaml @@ -0,0 +1,45 @@ +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupConfiguration +metadata: + name: sample-druid-backup + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: sample-druid + backends: + - name: gcs-backend + storageRef: + namespace: demo + name: gcs-storage + retentionPolicy: + name: demo-retention + namespace: demo + sessions: + - name: frequent-backup + scheduler: + schedule: "*/5 * * * *" + jobTemplate: + backoffLimit: 1 + repositories: + - name: gcs-druid-repo + backend: gcs-backend + directory: /druid + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + jobTemplate: + spec: + resources: + requests: + cpu: "200m" + memory: "1Gi" + limits: + cpu: "200m" + memory: "1Gi" + tasks: + - name: mysql-metadata-storage-backup \ No newline at end of file diff --git a/docs/guides/druid/backup/customization/examples/backup/specific-user.yaml b/docs/guides/druid/backup/customization/examples/backup/specific-user.yaml new file mode 100644 index 000000000..6efdd6bba --- /dev/null +++ b/docs/guides/druid/backup/customization/examples/backup/specific-user.yaml @@ -0,0 +1,41 @@ +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupConfiguration +metadata: + name: sample-druid-backup + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: sample-druid + backends: + - name: gcs-backend + storageRef: + namespace: demo + name: gcs-storage + retentionPolicy: + name: demo-retention + namespace: demo + sessions: + - name: frequent-backup + scheduler: + schedule: "*/5 * * * *" + jobTemplate: + backoffLimit: 1 + repositories: + - name: gcs-druid-repo + backend: gcs-backend + directory: /druid + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + jobTemplate: + spec: + securityContext: + runAsUser: 0 + runAsGroup: 0 + tasks: + - name: mysql-metadata-storage-backup \ No newline at end of file diff --git a/docs/guides/druid/backup/customization/examples/common/gcs-backupstorage.yaml b/docs/guides/druid/backup/customization/examples/common/gcs-backupstorage.yaml new file mode 100644 index 000000000..6ab3df02a --- /dev/null +++ b/docs/guides/druid/backup/customization/examples/common/gcs-backupstorage.yaml @@ -0,0 +1,17 @@ +apiVersion: storage.kubestash.com/v1alpha1 +kind: BackupStorage +metadata: + name: gcs-storage + namespace: demo +spec: + storage: + provider: gcs + gcs: + bucket: kubestash-qa + prefix: demo + secretName: gcs-secret + usagePolicy: + allowedNamespaces: + from: All + default: true + deletionPolicy: Delete \ No newline at end of file diff --git a/docs/guides/druid/backup/customization/examples/common/retentionpolicy.yaml b/docs/guides/druid/backup/customization/examples/common/retentionpolicy.yaml new file mode 100644 index 000000000..459156286 --- /dev/null +++ b/docs/guides/druid/backup/customization/examples/common/retentionpolicy.yaml @@ -0,0 +1,15 @@ +apiVersion: storage.kubestash.com/v1alpha1 +kind: RetentionPolicy +metadata: + name: demo-retention + namespace: demo +spec: + default: true + failedSnapshots: + last: 2 + maxRetentionPeriod: 2mo + successfulSnapshots: + last: 5 + usagePolicy: + allowedNamespaces: + from: All \ No newline at end of file diff --git a/docs/guides/druid/backup/customization/examples/common/s3-backupstorage.yaml b/docs/guides/druid/backup/customization/examples/common/s3-backupstorage.yaml new file mode 100644 index 000000000..a0f1c3ade --- /dev/null +++ b/docs/guides/druid/backup/customization/examples/common/s3-backupstorage.yaml @@ -0,0 +1,19 @@ +apiVersion: storage.kubestash.com/v1alpha1 +kind: BackupStorage +metadata: + name: s3-storage + namespace: demo +spec: + storage: + provider: s3 + s3: + bucket: kubestash + region: us-east-1 + endpoint: us-east-1.linodeobjects.com + secretName: s3-secret + prefix: demo + usagePolicy: + allowedNamespaces: + from: All + default: false + deletionPolicy: Delete \ No newline at end of file diff --git a/docs/guides/druid/backup/customization/examples/common/sample-druid.yaml b/docs/guides/druid/backup/customization/examples/common/sample-druid.yaml new file mode 100644 index 000000000..d92c784f0 --- /dev/null +++ b/docs/guides/druid/backup/customization/examples/common/sample-druid.yaml @@ -0,0 +1,15 @@ +apiVersion: kubedb.com/v1alpha2 +kind: Druid +metadata: + name: sample-druid + namespace: demo +spec: + version: 30.0.0 + deepStorage: + type: s3 + configSecret: + name: deep-storage-config + topology: + routers: + replicas: 1 + deletionPolicy: WipeOut \ No newline at end of file diff --git a/docs/guides/druid/backup/customization/examples/restore/resources-limit.yaml b/docs/guides/druid/backup/customization/examples/restore/resources-limit.yaml new file mode 100644 index 000000000..b6d14866d --- /dev/null +++ b/docs/guides/druid/backup/customization/examples/restore/resources-limit.yaml @@ -0,0 +1,30 @@ +apiVersion: core.kubestash.com/v1alpha1 +kind: RestoreSession +metadata: + name: restore-sample-druid + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: restored-druid + dataSource: + repository: gcs-druid-repo + snapshot: latest + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + jobTemplate: + spec: + resources: + requests: + cpu: "200m" + memory: "1Gi" + limits: + cpu: "200m" + memory: "1Gi" + tasks: + - name: mysql-metadata-storage-backup \ No newline at end of file diff --git a/docs/guides/druid/backup/customization/examples/restore/specific-snapshot.yaml b/docs/guides/druid/backup/customization/examples/restore/specific-snapshot.yaml new file mode 100644 index 000000000..a6fef758f --- /dev/null +++ b/docs/guides/druid/backup/customization/examples/restore/specific-snapshot.yaml @@ -0,0 +1,21 @@ +apiVersion: core.kubestash.com/v1alpha1 +kind: RestoreSession +metadata: + name: restore-sample-druid + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: restored-druid + dataSource: + repository: gcs-druid-repo + snapshot: gcs-druid-repo-sample-druid-backup-frequent-backup-1725258600 + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-backup \ No newline at end of file diff --git a/docs/guides/druid/backup/customization/examples/restore/specific-user.yaml b/docs/guides/druid/backup/customization/examples/restore/specific-user.yaml new file mode 100644 index 000000000..16673eed1 --- /dev/null +++ b/docs/guides/druid/backup/customization/examples/restore/specific-user.yaml @@ -0,0 +1,26 @@ +apiVersion: core.kubestash.com/v1alpha1 +kind: RestoreSession +metadata: + name: restore-sample-druid + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: restored-druid + dataSource: + repository: gcs-druid-repo + snapshot: latest + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + jobTemplate: + spec: + securityContext: + runAsUser: 0 + runAsGroup: 0 + tasks: + - name: mysql-metadata-storage-backup \ No newline at end of file diff --git a/docs/guides/druid/backup/customization/index.md b/docs/guides/druid/backup/customization/index.md new file mode 100644 index 000000000..8a19703b3 --- /dev/null +++ b/docs/guides/druid/backup/customization/index.md @@ -0,0 +1,294 @@ +--- +title: Backup & Restore Druid | KubeStash +description: Customizing Druid Backup and Restore process with KubeStash +menu: + docs_{{ .version }}: + identifier: guides-druid-backup-customization + name: Logical Backup + parent: guides-druid-backup + weight: 20 +menu_name: docs_{{ .version }} +section_menu_id: guides +--- + +# Customizing Backup and Restore Process + +KubeStash provides rich customization supports for the backup and restore process to meet the requirements of various cluster configurations. This guide will show you some examples of these customizations. + +## Customizing Backup Process + +In this section, we are going to show you how to customize the backup process. Here, we are going to show some examples of using multiple backends for the backup process, running the backup process as a specific user, etc. + +### Using multiple backends + +You can configure multiple backends within a single `backupConfiguration`. To back up the same data to different backends, such as S3 and GCS, declare each backend in the `.spe.backends` section. Then, reference these backends in the `.spec.sessions[*].repositories` section. + +```yaml +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupConfiguration +metadata: + name: sample-druid-backup + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: sample-druid + backends: + - name: gcs-backend + storageRef: + namespace: demo + name: gcs-storage + retentionPolicy: + name: demo-retention + namespace: demo + - name: s3-backend + storageRef: + namespace: demo + name: s3-storage + retentionPolicy: + name: demo-retention + namespace: demo + sessions: + - name: frequent-backup + scheduler: + schedule: "*/5 * * * *" + jobTemplate: + backoffLimit: 1 + repositories: + - name: gcs-druid-repo + backend: gcs-backend + directory: /druid + encryptionSecret: + name: encrypt-secret + namespace: demo + - name: s3-druid-repo + backend: s3-backend + directory: /druid-copy + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-backup +``` + +### Running backup job as a specific user + +If your cluster requires running the backup job as a specific user, you can provide `securityContext` under `addon.jobTemplate.spec.securityContext` section. The below example shows how you can run the backup job as the `root` user. + +```yaml +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupConfiguration +metadata: + name: sample-druid-backup + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: sample-druid + backends: + - name: gcs-backend + storageRef: + namespace: demo + name: gcs-storage + retentionPolicy: + name: demo-retention + namespace: demo + sessions: + - name: frequent-backup + scheduler: + schedule: "*/5 * * * *" + jobTemplate: + backoffLimit: 1 + repositories: + - name: gcs-druid-repo + backend: gcs-backend + directory: /druid + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + jobTemplate: + spec: + securityContext: + runAsUser: 0 + runAsGroup: 0 + tasks: + - name: mysql-metadata-storage-backup +``` + +### Specifying Memory/CPU limit/request for the backup job + +If you want to specify the Memory/CPU limit/request for your backup job, you can specify `resources` field under `addon.jobTemplate.spec` section. + +```yaml +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupConfiguration +metadata: + name: sample-druid-backup + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: sample-druid + backends: + - name: gcs-backend + storageRef: + namespace: demo + name: gcs-storage + retentionPolicy: + name: demo-retention + namespace: demo + sessions: + - name: frequent-backup + scheduler: + schedule: "*/5 * * * *" + jobTemplate: + backoffLimit: 1 + repositories: + - name: gcs-druid-repo + backend: gcs-backend + directory: /druid + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + jobTemplate: + spec: + resources: + requests: + cpu: "200m" + memory: "1Gi" + limits: + cpu: "200m" + memory: "1Gi" + tasks: + - name: mysql-metadata-storage-backup +``` + +> You can configure additional runtime settings for backup jobs within the `addon.jobTemplate.spec` sections. For further details, please refer to the [reference](https://kubestash.com/docs/latest/concepts/crds/backupconfiguration/#podtemplate-spec). + +## Customizing Restore Process + +KubeStash also uses `druid` during the restore process. In this section, we are going to show how you can pass arguments to the restore process, restore a specific snapshot, run restore job as a specific user, etc. + +### Restore specific snapshot + +You can also restore a specific snapshot. At first, list the available snapshot as bellow, + +```bash +➤ kubectl get snapshots.storage.kubestash.com -n demo -l=kubestash.com/repo-name=gcs-druid-repo +NAME REPOSITORY SESSION SNAPSHOT-TIME DELETION-POLICY PHASE AGE +gcs-druid-repo-sample-druid-backup-frequent-backup-1725257849 gcs-druid-repo frequent-backup 2024-09-02T06:18:01Z Delete Succeeded 15m +gcs-druid-repo-sample-druid-backup-frequent-backup-1725258000 gcs-druid-repo frequent-backup 2024-09-02T06:20:00Z Delete Succeeded 13m +gcs-druid-repo-sample-druid-backup-frequent-backup-1725258300 gcs-druid-repo frequent-backup 2024-09-02T06:25:00Z Delete Succeeded 8m34s +gcs-druid-repo-sample-druid-backup-frequent-backup-1725258600 gcs-druid-repo frequent-backup 2024-09-02T06:30:00Z Delete Succeeded 3m34s +``` + +The below example shows how you can pass a specific snapshot name in `.dataSource` section. + +```yaml +apiVersion: core.kubestash.com/v1alpha1 +kind: RestoreSession +metadata: + name: restore-sample-druid + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: restored-druid + dataSource: + repository: gcs-druid-repo + snapshot: gcs-druid-repo-sample-druid-backup-frequent-backup-1725258600 + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-backup +``` + + +### Running restore job as a specific user + +Similar to the backup process under the `addon.jobTemplate.spec.` you can provide `securityContext` to run the restore job as a specific user. + +```yaml +apiVersion: core.kubestash.com/v1alpha1 +kind: RestoreSession +metadata: + name: restore-sample-druid + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: restored-druid + dataSource: + repository: gcs-druid-repo + snapshot: latest + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + jobTemplate: + spec: + securityContext: + runAsUser: 0 + runAsGroup: 0 + tasks: + - name: mysql-metadata-storage-backup +``` + +### Specifying Memory/CPU limit/request for the restore job + +Similar to the backup process, you can also provide `resources` field under the `addon.jobTemplate.spec.resources` section to limit the Memory/CPU for your restore job. + +```yaml +apiVersion: core.kubestash.com/v1alpha1 +kind: RestoreSession +metadata: + name: restore-sample-druid + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: restored-druid + dataSource: + repository: gcs-druid-repo + snapshot: latest + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + jobTemplate: + spec: + resources: + requests: + cpu: "200m" + memory: "1Gi" + limits: + cpu: "200m" + memory: "1Gi" + tasks: + - name: mysql-metadata-storage-backup +``` + +> You can configure additional runtime settings for restore jobs within the `addon.jobTemplate.spec` sections. For further details, please refer to the [reference](https://kubestash.com/docs/latest/concepts/crds/restoresession/#podtemplate-spec). \ No newline at end of file diff --git a/docs/guides/druid/backup/logical/examples/backupconfiguration.yaml b/docs/guides/druid/backup/logical/examples/backupconfiguration.yaml new file mode 100644 index 000000000..1740910cd --- /dev/null +++ b/docs/guides/druid/backup/logical/examples/backupconfiguration.yaml @@ -0,0 +1,36 @@ +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupConfiguration +metadata: + name: sample-druid-backup + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: sample-druid + backends: + - name: gcs-backend + storageRef: + namespace: demo + name: gcs-storage + retentionPolicy: + name: demo-retention + namespace: demo + sessions: + - name: frequent-backup + scheduler: + schedule: "*/5 * * * *" + jobTemplate: + backoffLimit: 1 + repositories: + - name: gcs-druid-repo + backend: gcs-backend + directory: /druid + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-backup diff --git a/docs/guides/druid/backup/logical/examples/backupstorage.yaml b/docs/guides/druid/backup/logical/examples/backupstorage.yaml new file mode 100644 index 000000000..c09038c6b --- /dev/null +++ b/docs/guides/druid/backup/logical/examples/backupstorage.yaml @@ -0,0 +1,17 @@ +apiVersion: storage.kubestash.com/v1alpha1 +kind: BackupStorage +metadata: + name: gcs-storage + namespace: demo +spec: + storage: + provider: gcs + gcs: + bucket: kubestash-qa + prefix: druid + secretName: gcs-secret + usagePolicy: + allowedNamespaces: + from: All + default: true + deletionPolicy: Delete \ No newline at end of file diff --git a/docs/guides/druid/backup/logical/examples/deep-storage-config.yaml b/docs/guides/druid/backup/logical/examples/deep-storage-config.yaml new file mode 100644 index 000000000..361259582 --- /dev/null +++ b/docs/guides/druid/backup/logical/examples/deep-storage-config.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Secret +metadata: + name: deep-storage-config + namespace: demo +stringData: + druid.storage.type: "s3" + druid.storage.bucket: "druid" + druid.storage.baseKey: "druid/segments" + druid.s3.accessKey: "minio" + druid.s3.secretKey: "minio123" + druid.s3.protocol: "http" + druid.s3.enablePathStyleAccess: "true" + druid.s3.endpoint.signingRegion: "us-east-1" + druid.s3.endpoint.url: "http://myminio-hl.demo.svc.cluster.local:9000/" + diff --git a/docs/guides/druid/backup/logical/examples/restored-druid.yaml b/docs/guides/druid/backup/logical/examples/restored-druid.yaml new file mode 100644 index 000000000..fc6bca512 --- /dev/null +++ b/docs/guides/druid/backup/logical/examples/restored-druid.yaml @@ -0,0 +1,17 @@ +apiVersion: kubedb.com/v1alpha2 +kind: Druid +metadata: + name: restored-druid + namespace: demo +spec: + init: + waitForInitialRestore: true + version: 30.0.0 + deepStorage: + type: s3 + configSecret: + name: deep-storage-config + topology: + routers: + replicas: 1 + deletionPolicy: WipeOut \ No newline at end of file diff --git a/docs/guides/druid/backup/logical/examples/restoresession.yaml b/docs/guides/druid/backup/logical/examples/restoresession.yaml new file mode 100644 index 000000000..16c384028 --- /dev/null +++ b/docs/guides/druid/backup/logical/examples/restoresession.yaml @@ -0,0 +1,21 @@ +apiVersion: core.kubestash.com/v1alpha1 +kind: RestoreSession +metadata: + name: restore-sample-druid + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + name: restored-druid + namespace: demo + dataSource: + snapshot: latest + repository: gcs-druid-repo + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-restore \ No newline at end of file diff --git a/docs/guides/druid/backup/logical/examples/retentionpolicy.yaml b/docs/guides/druid/backup/logical/examples/retentionpolicy.yaml new file mode 100644 index 000000000..459156286 --- /dev/null +++ b/docs/guides/druid/backup/logical/examples/retentionpolicy.yaml @@ -0,0 +1,15 @@ +apiVersion: storage.kubestash.com/v1alpha1 +kind: RetentionPolicy +metadata: + name: demo-retention + namespace: demo +spec: + default: true + failedSnapshots: + last: 2 + maxRetentionPeriod: 2mo + successfulSnapshots: + last: 5 + usagePolicy: + allowedNamespaces: + from: All \ No newline at end of file diff --git a/docs/guides/druid/backup/logical/examples/sample-druid.yaml b/docs/guides/druid/backup/logical/examples/sample-druid.yaml new file mode 100644 index 000000000..d92c784f0 --- /dev/null +++ b/docs/guides/druid/backup/logical/examples/sample-druid.yaml @@ -0,0 +1,15 @@ +apiVersion: kubedb.com/v1alpha2 +kind: Druid +metadata: + name: sample-druid + namespace: demo +spec: + version: 30.0.0 + deepStorage: + type: s3 + configSecret: + name: deep-storage-config + topology: + routers: + replicas: 1 + deletionPolicy: WipeOut \ No newline at end of file diff --git a/docs/guides/druid/backup/logical/images/druid-ui-1.png b/docs/guides/druid/backup/logical/images/druid-ui-1.png new file mode 100644 index 000000000..c5d985f8c Binary files /dev/null and b/docs/guides/druid/backup/logical/images/druid-ui-1.png differ diff --git a/docs/guides/druid/backup/logical/images/druid-ui-2.png b/docs/guides/druid/backup/logical/images/druid-ui-2.png new file mode 100644 index 000000000..5cd4988e6 Binary files /dev/null and b/docs/guides/druid/backup/logical/images/druid-ui-2.png differ diff --git a/docs/guides/druid/backup/logical/images/druid-ui-3.png b/docs/guides/druid/backup/logical/images/druid-ui-3.png new file mode 100644 index 000000000..4ed44c6c9 Binary files /dev/null and b/docs/guides/druid/backup/logical/images/druid-ui-3.png differ diff --git a/docs/guides/druid/backup/logical/images/druid-ui-4.png b/docs/guides/druid/backup/logical/images/druid-ui-4.png new file mode 100644 index 000000000..f11920449 Binary files /dev/null and b/docs/guides/druid/backup/logical/images/druid-ui-4.png differ diff --git a/docs/guides/druid/backup/logical/images/druid-ui-5.png b/docs/guides/druid/backup/logical/images/druid-ui-5.png new file mode 100644 index 000000000..065fadf4e Binary files /dev/null and b/docs/guides/druid/backup/logical/images/druid-ui-5.png differ diff --git a/docs/guides/druid/backup/logical/images/druid-ui-6.png b/docs/guides/druid/backup/logical/images/druid-ui-6.png new file mode 100644 index 000000000..2e78eef06 Binary files /dev/null and b/docs/guides/druid/backup/logical/images/druid-ui-6.png differ diff --git a/docs/guides/druid/backup/logical/index.md b/docs/guides/druid/backup/logical/index.md new file mode 100644 index 000000000..1d36b2f65 --- /dev/null +++ b/docs/guides/druid/backup/logical/index.md @@ -0,0 +1,753 @@ +--- +title: Backup & Restore Druid | KubeStash +description: Backup Druid database using KubeStash +menu: + docs_{{ .version }}: + identifier: guides-druid-backup-logical + name: Logical Backup + parent: guides-druid-backup + weight: 20 +menu_name: docs_{{ .version }} +section_menu_id: guides +--- + +# Backup and Restore Druid database using KubeStash + +KubeStash allows you to backup and restore `Druid` databases. Specifically backup of external dependency of `Druid` metadata storage (`MySQL` or `PostgreSQL`) is sufficient to restore `Druid` to its previous state if the deep storage is kept intact. KubeStash makes managing your `Druid` backups and restorations more straightforward and efficient. + +This guide will give you how you can take backup and restore your `Druid` databases using `Kubestash`. + +## Before You Begin + +- At first, you need to have a Kubernetes cluster, and the `kubectl` command-line tool must be configured to communicate with your cluster. If you do not already have a cluster, you can create one by using `Minikube` or `Kind`. +- Install `KubeDB` in your cluster following the steps [here](/docs/setup/README.md). +- Install `KubeStash` in your cluster following the steps [here](https://kubestash.com/docs/latest/setup/install/kubestash). +- Install KubeStash `kubectl` plugin following the steps [here](https://kubestash.com/docs/latest/setup/install/kubectl-plugin/). +- If you are not familiar with how KubeStash backup and restore Druid databases, please check the following guide [here](/docs/guides/druid/backup/overview/index.md). + +You should be familiar with the following `KubeStash` concepts: + +- [BackupStorage](https://kubestash.com/docs/latest/concepts/crds/backupstorage/) +- [BackupConfiguration](https://kubestash.com/docs/latest/concepts/crds/backupconfiguration/) +- [BackupSession](https://kubestash.com/docs/latest/concepts/crds/backupsession/) +- [RestoreSession](https://kubestash.com/docs/latest/concepts/crds/restoresession/) +- [Addon](https://kubestash.com/docs/latest/concepts/crds/addon/) +- [Function](https://kubestash.com/docs/latest/concepts/crds/function/) +- [Task](https://kubestash.com/docs/latest/concepts/crds/addon/#task-specification) + +To keep everything isolated, we are going to use a separate namespace called `demo` throughout this tutorial. + +```bash +$ kubectl create ns demo +namespace/demo created +``` + +> **Note:** YAML files used in this tutorial are stored in [docs/guides/druid/backup/logical/examples](docs/guides/druid/backup/logical/examples) directory of [kubedb/docs](https://github.com/kubedb/docs) repository. + +## Backup Druid + +KubeStash supports backups for `Druid` instances with both type of metadata storage (`MySQL` and `PostgreSQL`). In this demonstration, we'll focus on a `Druid` database with a MySQL cluster. The backup and restore process is similar for `Druid` with `PostgreSQL` as metadata storage as well. + +This section will demonstrate how to backup a `Druid` database. Here, we are going to deploy a `Druid` database using KubeDB. Then, we are going to backup this database into a `GCS` bucket. Finally, we are going to restore the backup up data into another `Druid` database. + +### Deploy Sample Druid Database + +## Get External Dependencies Ready + +### Deep Storage + +One of the external dependency of Druid is deep storage where the segments are stored. It is a storage mechanism that Apache Druid does not provide. **Amazon S3**, **Google Cloud Storage**, or **Azure Blob Storage**, **S3-compatible storage** (like **Minio**), or **HDFS** are generally convenient options for deep storage. + +In this tutorial, we will run a `minio-server` as deep storage in our local `kind` cluster using `minio-operator` and create a bucket named `druid` in it, which the deployed druid database will use. + +```bash + +$ helm repo add minio https://operator.min.io/ +$ helm repo update minio +$ helm upgrade --install --namespace "minio-operator" --create-namespace "minio-operator" minio/operator --set operator.replicaCount=1 + +$ helm upgrade --install --namespace "demo" --create-namespace druid-minio minio/tenant \ +--set tenant.pools[0].servers=1 \ +--set tenant.pools[0].volumesPerServer=1 \ +--set tenant.pools[0].size=1Gi \ +--set tenant.certificate.requestAutoCert=false \ +--set tenant.buckets[0].name="druid" \ +--set tenant.pools[0].name="default" + +``` + +Now we need to create a `Secret` named `deep-storage-config`. It contains the necessary connection information using which the druid database will connect to the deep storage. + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: deep-storage-config + namespace: demo +stringData: + druid.storage.type: "s3" + druid.storage.bucket: "druid" + druid.storage.baseKey: "druid/segments" + druid.s3.accessKey: "minio" + druid.s3.secretKey: "minio123" + druid.s3.protocol: "http" + druid.s3.enablePathStyleAccess: "true" + druid.s3.endpoint.signingRegion: "us-east-1" + druid.s3.endpoint.url: "http://myminio-hl.demo.svc.cluster.local:9000/" +``` + +Let’s create the `deep-storage-config` Secret shown above: + +```bash +$ kubectl create -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/logical/examples/deep-storage-config.yaml +secret/deep-storage-config created +``` + +Let's deploy a sample `Druid` database and insert some data into it. + +**Create Druid CR:** + +Below is the YAML of a sample `Druid` CRD that we are going to create for this tutorial: + +```yaml +apiVersion: kubedb.com/v1alpha2 +kind: Druid +metadata: + name: druid-quickstart + namespace: demo +spec: + version: 30.0.0 + deepStorage: + type: s3 + configSecret: + name: deep-storage-config + topology: + routers: + replicas: 1 + deletionPolicy: WipeOut +``` + +Create the above `Druid` CR, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/logical/examples/sample-druid.yaml +druid.kubedb.com/sample-druid created +``` + +KubeDB will deploy a Druid database according to the above specification. It will also create the necessary `Secrets` and `Services` to access the database along with `MySQL` and `ZooKeeper` instance as druid dependencies. + +Let's check if the database is ready to use, + +```bash +$ kubectl get druids.kubedb.com -n demo +NAME TYPE VERSION STATUS AGE +sample-druid kubedb.com/v1alpha2 30.0.0 Ready 113s +``` + +The database is `Ready`. Verify that KubeDB has created the necessary `Secrets` and `Services` to access the database along with `MySQL` and `ZooKeeper` instance for this database using the following commands, + +```bash +$ kubectl get secret -n demo -l=app.kubernetes.io/instance=sample-druid +NAME TYPE DATA AGE +sample-druid-admin-cred kubernetes.io/basic-auth 2 48s + +$ kubectl get service -n demo -l=app.kubernetes.io/instance=sample-druid +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +sample-druid-brokers ClusterIP 10.128.189.77 8082/TCP 72s +sample-druid-coordinators ClusterIP 10.128.175.228 8081/TCP 72s +sample-druid-pods ClusterIP None 8081/TCP,8090/TCP,8083/TCP,8091/TCP,8082/TCP,8888/TCP 72s +sample-druid-routers ClusterIP 10.128.95.51 8888/TCP 72s +``` + +Here, we have to use service `sample-druid-routers` and secret `sample-druid-admin-cred` to connect with the database. `KubeDB` creates an [AppBinding](/docs/guides/druid/concepts/appbinding/index.md) CR that holds the necessary information to connect with the database. + +**Verify AppBinding:** + +Verify that the `AppBinding` has been created successfully using the following command, + +```bash +$ kubectl get appbindings -n demo +NAME TYPE VERSION AGE +sample-druid kubedb.com/druid 30.0.0 2m26s +sample-druid-mysql-metadata kubedb.com/mysql 8.0.35 5m40s +sample-druid-zk kubedb.com/zookeeper 3.7.2 5m43s +``` + +Let's check the YAML of the above `AppBinding`, + +```bash +$ kubectl get appbindings -n demo sample-druid -o yaml +``` + +```yaml +apiVersion: appcatalog.appscode.com/v1alpha1 +kind: AppBinding +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: | + {"apiVersion":"kubedb.com/v1alpha2","kind":"Druid","metadata":{"annotations":{},"name":"sample-druid","namespace":"demo"},"spec":{"deepStorage":{"configSecret":{"name":"deep-storage-config"},"type":"s3"},"deletionPolicy":"WipeOut","topology":{"routers":{"replicas":1}},"version":"30.0.0"}} + creationTimestamp: "2024-09-17T12:17:27Z" + generation: 1 + labels: + app.kubernetes.io/component: database + app.kubernetes.io/instance: sample-druid + app.kubernetes.io/managed-by: kubedb.com + app.kubernetes.io/name: druids.kubedb.com + name: sample-druid + namespace: demo + ownerReferences: + - apiVersion: kubedb.com/v1alpha2 + blockOwnerDeletion: true + controller: true + kind: Druid + name: sample-druid + uid: aab70ef0-ff00-437d-be91-68438513552e + resourceVersion: "1372134" + uid: a45b6562-aa0b-4dba-8e6b-139cfc33beb6 +spec: + appRef: + apiGroup: kubedb.com + kind: Druid + name: sample-druid + namespace: demo + clientConfig: + service: + name: sample-druid-pods + port: 8888 + scheme: http + url: http://sample-druid-coordinators-0.sample-druid-pods.demo.svc.cluster.local:8081,http://sample-druid-overlords-0.sample-druid-pods.demo.svc.cluster.local:8090,http://sample-druid-middlemanagers-0.sample-druid-pods.demo.svc.cluster.local:8091,http://sample-druid-historicals-0.sample-druid-pods.demo.svc.cluster.local:8083,http://sample-druid-brokers-0.sample-druid-pods.demo.svc.cluster.local:8082,http://sample-druid-routers-0.sample-druid-pods.demo.svc.cluster.local:8888 + secret: + name: sample-druid-admin-cred + type: kubedb.com/druid + version: 30.0.0 +``` + +KubeStash uses the `AppBinding` CR to connect with the target database. It requires the following two fields to set in AppBinding's `.spec` section. + +- `.spec.clientConfig.service.name` specifies the name of the Service that connects to the database. +- `.spec.secret` specifies the name of the Secret that holds necessary credentials to access the database. +- `spec.type` specifies the types of the app that this AppBinding is pointing to. KubeDB generated AppBinding follows the following format: `/`. + +**Insert Sample Data:** + +We can access the [web console](https://druid.apache.org/docs/latest/operations/web-console) of Druid database from any browser by port-forwarding the routers. Let’s port-forward the port `8888` to local machine: +```bash +kubectl port-forward -n demo svc/sample-druid-routers 8888 +Forwarding from 127.0.0.1:8888 -> 8888 +Forwarding from [::1]:8888 -> 8888 +``` + +Now hit the `http://localhost:8888` from any browser, and you will be prompted to provide the credential of the druid database. By following the steps discussed below, you can get the credential generated by the KubeDB operator for your Druid database. + +**Connection information:** + +- Username: + + ```bash + $ kubectl get secret -n demo sample-druid-admin-cred -o jsonpath='{.data.username}' | base64 -d + admin + ``` + +- Password: + + ```bash + $ kubectl get secret -n demo sample-druid-admin-cred -o jsonpath='{.data.password}' | base64 -d + DqG5E63NtklAkxqC + ``` + +After providing the credentials correctly, you should be able to access the web console like shown below. + +

+  lifecycle +

+ +Now select the `Load Data` option and then select `Batch - classic` from the drop-down menu. +

+  lifecycle +

+ +Select `Example data` and click `Load example` to insert the example `Wikipedia Edits` datasource. + +

+  lifecycle +

+ +After clicking `Next` multiple times, click `Submit` + +

+  lifecycle +

+ +Within a minute status of the ingestion task should become `SUCCESS` +

+  lifecycle +

+ +Now, we are ready to backup the database. + +### Prepare Backend + +We are going to store our backed up data into a GCS bucket. We have to create a Secret with necessary credentials and a `BackupStorage` CR to use this backend. If you want to use a different backend, please read the respective backend configuration doc from [here](https://kubestash.com/docs/latest/guides/backends/overview/). + +**Create Secret:** + +Let's create a secret called `gcs-secret` with access credentials to our desired GCS bucket, + +```bash +$ echo -n '' > GOOGLE_PROJECT_ID +$ cat /path/to/downloaded-sa-key.json > GOOGLE_SERVICE_ACCOUNT_JSON_KEY +$ kubectl create secret generic -n demo gcs-secret \ + --from-file=./GOOGLE_PROJECT_ID \ + --from-file=./GOOGLE_SERVICE_ACCOUNT_JSON_KEY +secret/gcs-secret created +``` + +**Create BackupStorage:** + +Now, create a `BackupStorage` using this secret. Below is the YAML of `BackupStorage` CR we are going to create, + +```yaml +apiVersion: storage.kubestash.com/v1alpha1 +kind: BackupStorage +metadata: + name: gcs-storage + namespace: demo +spec: + storage: + provider: gcs + gcs: + bucket: kubestash-qa + prefix: demo + secretName: gcs-secret + usagePolicy: + allowedNamespaces: + from: All + default: true + deletionPolicy: Delete +``` + +Let's create the BackupStorage we have shown above, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/logical/examples/backupstorage.yaml +backupstorage.storage.kubestash.com/gcs-storage created +``` + +Now, we are ready to backup our database to our desired backend. + +**Create RetentionPolicy:** + +Now, let's create a `RetentionPolicy` to specify how the old Snapshots should be cleaned up. + +Below is the YAML of the `RetentionPolicy` object that we are going to create, + +```yaml +apiVersion: storage.kubestash.com/v1alpha1 +kind: RetentionPolicy +metadata: + name: demo-retention + namespace: demo +spec: + default: true + failedSnapshots: + last: 2 + maxRetentionPeriod: 2mo + successfulSnapshots: + last: 5 + usagePolicy: + allowedNamespaces: + from: All +``` + +Let’s create the above `RetentionPolicy`, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/logical/examples/retentionpolicy.yaml +retentionpolicy.storage.kubestash.com/demo-retention created +``` + +### Backup + +We have to create a `BackupConfiguration` targeting respective `sample-druid` Druid database. Then, KubeStash will create a `CronJob` for each session to take periodic backup of that database. + +At first, we need to create a secret with a Restic password for backup data encryption. + +**Create Secret:** + +Let's create a secret called `encrypt-secret` with the Restic password, + +```bash +$ echo -n 'changeit' > RESTIC_PASSWORD +$ kubectl create secret generic -n demo encrypt-secret \ + --from-file=./RESTIC_PASSWORD \ +secret "encrypt-secret" created +``` + +**Create BackupConfiguration:** + +Below is the YAML for `BackupConfiguration` CR to backup the `sample-druid` database that we have deployed earlier, + +```yaml +apiVersion: core.kubestash.com/v1alpha1 +kind: BackupConfiguration +metadata: + name: sample-druid-backup + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + namespace: demo + name: sample-druid + backends: + - name: gcs-backend + storageRef: + namespace: demo + name: gcs-storage + retentionPolicy: + name: demo-retention + namespace: demo + sessions: + - name: frequent-backup + scheduler: + schedule: "*/5 * * * *" + jobTemplate: + backoffLimit: 1 + repositories: + - name: gcs-druid-repo + backend: gcs-backend + directory: /druid + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-backup +``` +- `.spec.sessions[*].schedule` specifies that we want to backup the database at `5 minutes` interval. +- `.spec.target` refers to the targeted `sample-druid` Druid database that we created earlier. + +> Note: To create `BackupConfiguration` for druid with `PostgreSQL` as metadata storage just update `spec.sessions[*].addon.tasks.name` to `pg-metadata-storage-backup` + +Let's create the `BackupConfiguration` CR that we have shown above, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/logical/examples/backupconfiguration.yaml +backupconfiguration.core.kubestash.com/sample-druid-backup created +``` + +**Verify Backup Setup Successful** + +If everything goes well, the phase of the `BackupConfiguration` should be `Ready`. The `Ready` phase indicates that the backup setup is successful. Let's verify the `Phase` of the BackupConfiguration, + +```bash +$ kubectl get backupconfiguration -n demo +NAME PHASE PAUSED AGE +sample-druid-backup Ready 2m50s +``` + +Additionally, we can verify that the `Repository` specified in the `BackupConfiguration` has been created using the following command, + +```bash +$ kubectl get repo -n demo +NAME INTEGRITY SNAPSHOT-COUNT SIZE PHASE LAST-SUCCESSFUL-BACKUP AGE +gcs-druid-repo true 1 712.822 KiB Ready 5m 4m +``` + +KubeStash keeps the backup for `Repository` YAMLs. If we navigate to the GCS bucket, we will see the `Repository` YAML stored in the `demo/druid` directory. + +**Verify CronJob:** + +It will also create a `CronJob` with the schedule specified in `spec.sessions[*].scheduler.schedule` field of `BackupConfiguration` CR. + +Verify that the `CronJob` has been created using the following command, + +```bash +$ kubectl get cronjob -n demo +NAME SCHEDULE SUSPEND ACTIVE LAST SCHEDULE AGE +trigger-sample-druid-backup-frequent-backup */5 * * * * 0 2m45s 3m25s +``` + +**Verify BackupSession:** + +KubeStash triggers an instant backup as soon as the `BackupConfiguration` is ready. After that, backups are scheduled according to the specified schedule. + +```bash +$ kubectl get backupsession -n demo -w + +NAME INVOKER-TYPE INVOKER-NAME PHASE DURATION AGE +sample-druid-backup-frequent-backup-1724065200 BackupConfiguration sample-druid-backup Succeeded 7m22s +``` + +We can see from the above output that the backup session has succeeded. Now, we are going to verify whether the backed up data has been stored in the backend. + +**Verify Backup:** + +Once a backup is complete, KubeStash will update the respective `Repository` CR to reflect the backup. Check that the repository `sample-druid-backup` has been updated by the following command, + +```bash +$ kubectl get repository -n demo sample-druid-backup +NAME INTEGRITY SNAPSHOT-COUNT SIZE PHASE LAST-SUCCESSFUL-BACKUP AGE +sample-druid-backup true 1 806 B Ready 8m27s 9m18s +``` + +At this moment we have one `Snapshot`. Run the following command to check the respective `Snapshot` which represents the state of a backup run for an application. + +```bash +$ kubectl get snapshots -n demo -l=kubestash.com/repo-name=gcs-druid-repo +NAME REPOSITORY SESSION SNAPSHOT-TIME DELETION-POLICY PHASE AGE +gcs-druid-repo-sample-druid-backup-frequent-backup-1726656835 gcs-druid-repo frequent-backup 2024-09-18T10:54:07Z Delete Succeeded 11m +``` + +> Note: KubeStash creates a `Snapshot` with the following labels: +> - `kubestash.com/app-ref-kind: ` +> - `kubestash.com/app-ref-name: ` +> - `kubestash.com/app-ref-namespace: ` +> - `kubestash.com/repo-name: ` +> +> These labels can be used to watch only the `Snapshot`s related to our target Database or `Repository`. + +If we check the YAML of the `Snapshot`, we can find the information about the backed up components of the Database. + +```bash +$ kubectl get snapshots -n demo gcs-druid-repo-sample-druid-backup-frequent-backup-1724065200 -oyaml +``` + +```yaml +$ kubectl get snapshots -n demo gcs-druid-repo-sample-druid-backup-frequent-backup-1726656835 -oyaml +``` +``` +apiVersion: storage.kubestash.com/v1alpha1 +kind: Snapshot +metadata: + creationTimestamp: "2024-09-18T10:54:07Z" + finalizers: + - kubestash.com/cleanup + generation: 1 + labels: + kubedb.com/db-version: 30.0.0 + kubestash.com/app-ref-kind: Druid + kubestash.com/app-ref-name: sample-druid + kubestash.com/app-ref-namespace: demo + kubestash.com/repo-name: gcs-druid-repo + name: gcs-druid-repo-sample-druid-backup-frequent-backup-1726656835 + namespace: demo + ownerReferences: + - apiVersion: storage.kubestash.com/v1alpha1 + blockOwnerDeletion: true + controller: true + kind: Repository + name: gcs-druid-repo + uid: 7656c292-4d59-4503-8462-5601823fc531 + resourceVersion: "1477854" + uid: 9a3bbb73-ae71-4fb4-a99b-72af62a95011 +spec: + appRef: + apiGroup: kubedb.com + kind: Druid + name: sample-druid + namespace: demo + backupSession: sample-druid-backup-frequent-backup-1726656835 + deletionPolicy: Delete + repository: gcs-druid-repo + session: frequent-backup + snapshotID: 01J82C980JHJ869SQYMGCH3S44 + type: FullBackup + version: v1 +status: + components: + dump: + driver: Restic + duration: 6.897377973s + integrity: true + path: repository/v1/frequent-backup/dump + phase: Succeeded + resticStats: + - hostPath: dumpfile.sql + id: d10ab158ce2667d03b08cb35573a6f049a2cef9ef2e96be847caed6660bbb904 + size: 4.322 MiB + uploaded: 4.323 MiB + size: 712.824 KiB + ... +``` + +> KubeStash uses the `mysqldump`/`postgresdump` command to take backups of the target metadata storage of Druid databases. Therefore, the component name for logical backups is set as `dump`. + +Now, if we navigate to the GCS bucket, we will see the backed up data stored in the `demo/druid/repository/v1/frequent-backup/dump` directory. KubeStash also keeps the backup for `Snapshot` YAMLs, which can be found in the `demo/dep/snapshots` directory. + +> Note: KubeStash stores all dumped data encrypted in the backup directory, meaning it remains unreadable until decrypted. + +## Restore + +In this section, we are going to restore the database from the backup we have taken in the previous section. We are going to deploy a new database and initialize it from the backup. + +#### Deploy Restored Database: + +Now, we have to deploy the restored database similarly as we have deployed the original `sample-druid` database. However, this time there will be the following differences: + +- We are going to specify `.spec.init.waitForInitialRestore` field that tells KubeDB to wait for first restore to complete before marking this database is ready to use. + +Below is the YAML for `Druid` CRD we are going deploy to initialize from backup, + +```yaml +apiVersion: kubedb.com/v1 +kind: Druid +metadata: + name: restored-druid + namespace: demo +spec: + init: + waitForInitialRestore: true + version: "8.2.0" + replicas: 3 + topology: + mode: GroupReplication + storageType: Durable + storage: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Mi + deletionPolicy: WipeOut +``` + +Let's create the above database, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/logical/examples/restored-druid.yaml +druid.kubedb.com/restored-druid created +``` + +If you check the database status, you will see it is stuck in `Provisioning` state. + +```bash +$ kubectl get druid -n demo restored-druid +NAME TYPE VERSION STATUS AGE +restored-druid kubedb.com/v1alpha2 30.0.0 Provisioning 22s +``` + +#### Create RestoreSession: + +Now, we need to create a RestoreSession CRD pointing to targeted `Druid` database. + +Below, is the contents of YAML file of the `RestoreSession` object that we are going to create to restore backed up data into the newly created database provisioned by Druid object named `restored-druid`. + +```yaml +apiVersion: core.kubestash.com/v1alpha1 +kind: RestoreSession +metadata: + name: restore-sample-druid + namespace: demo +spec: + target: + apiGroup: kubedb.com + kind: Druid + name: restored-druid + namespace: demo + dataSource: + snapshot: latest + repository: gcs-druid-repo + encryptionSecret: + name: encrypt-secret + namespace: demo + addon: + name: druid-addon + tasks: + - name: mysql-metadata-storage-restore +``` + +Here, + +- `.spec.target` refers to the newly created `restored-druid` Druid object to where we want to restore backup data. +- `.spec.dataSource.repository` specifies the Repository object that holds the backed up data. +- `.spec.dataSource.snapshot` specifies to restore from latest `Snapshot`. + +> Note: To create `RestoreSession` for druid with `PostgreSQL` as metadata storage just update `spec.addon.tasks.name` to `postgres-metadata-storage-restore` + +Let's create the RestoreSession CRD object we have shown above, + +```bash +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/druid/backup/logical/examples/restoresession.yaml +restoresession.core.kubestash.com/sample-druid-restore created +``` + +Once, you have created the `RestoreSession` object, KubeStash will create restore Job. Run the following command to watch the phase of the `RestoreSession` object, + +```bash +$ watch kubectl get restoresession -n demo +Every 2.0s: kubectl get restores... AppsCode-PC-03: Wed Aug 21 10:44:05 2024 + +NAME REPOSITORY FAILURE-POLICY PHASE DURATION AGE +sample-restore gcs-demo-repo Succeeded 3s 53s +``` + +The `Succeeded` phase means that the restore process has been completed successfully. + + +#### Verify Restored Data: + +In this section, we are going to verify whether the desired data has been restored successfully. We are going to connect to the database server and check whether the database and the table we created earlier in the original database are restored. + +At first, check if the database has gone into `Ready` state by the following command, + +```bash +$ kubectl get druid -n demo restored-druid +NAME VERSION STATUS AGE +restored-druid 8.2.0 Ready 34m +``` + +Now, let's verify if our datasource `wikipedia` exists or not. For that, first find out the database `Sevices` by the following command, + +Now access the [web console](https://druid.apache.org/docs/latest/operations/web-console) of Druid database from any browser by port-forwarding the routers. Let’s port-forward the port `8888` to local machine: +```bash +$ kubectl get svc -n demo --selector="app.kubernetes.io/instance=restored-druid" +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +restored-druid-brokers ClusterIP 10.128.74.54 8082/TCP 10m +restored-druid-coordinators ClusterIP 10.128.30.124 8081/TCP 10m +restored-druid-pods ClusterIP None 8081/TCP,8090/TCP,8083/TCP,8091/TCP,8082/TCP,8888/TCP 10m +restored-druid-routers ClusterIP 10.128.228.193 8888/TCP 10m +``` +```bash +kubectl port-forward -n demo svc/sample-druid-routers 8888 +Forwarding from 127.0.0.1:8888 -> 8888 +Forwarding from [::1]:8888 -> 8888 +``` + +Then hit the `http://localhost:8888` from any browser, and you will be prompted to provide the credential of the druid database. By following the steps discussed below, you can get the credential generated by the KubeDB operator for your Druid database. +**Connection information:** +- Username: + + ```bash + $ kubectl get secret -n demo sample-druid-admin-cred -o jsonpath='{.data.username}' | base64 -d + admin + ``` + +- Password: + + ```bash + $ kubectl get secret -n demo sample-druid-admin-cred -o jsonpath='{.data.password}' | base64 -d + DqG5E63NtklAkxqC + ``` +After providing the credentials correctly, you should be able to access the web console like shown below. Now if you go to the `Datasources` section, you will see that our ingested datasource `wikipedia` exists in the list. +

+  lifecycle +

+ +So, from the above screenshot, we can see that the `wikipedia` datasource we have ingested earlier in the original database and now, it is restored successfully. + +## Cleanup + +To cleanup the Kubernetes resources created by this tutorial, run: + +```bash +kubectl delete backupconfigurations.core.kubestash.com -n demo sample-druid-backup +kubectl delete restoresessions.core.kubestash.com -n demo restore-sample-druid +kubectl delete retentionpolicies.storage.kubestash.com -n demo demo-retention +kubectl delete backupstorage -n demo gcs-storage +kubectl delete secret -n demo gcs-secret +kubectl delete secret -n demo encrypt-secret +kubectl delete druid -n demo restored-druid +kubectl delete druid -n demo sample-druid +``` \ No newline at end of file diff --git a/docs/guides/druid/backup/overview/images/backup_overview.svg b/docs/guides/druid/backup/overview/images/backup_overview.svg new file mode 100644 index 000000000..c9fb1141d --- /dev/null +++ b/docs/guides/druid/backup/overview/images/backup_overview.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/guides/druid/backup/overview/images/kubedb_plus_kubestash.svg b/docs/guides/druid/backup/overview/images/kubedb_plus_kubestash.svg new file mode 100644 index 000000000..380d92d96 --- /dev/null +++ b/docs/guides/druid/backup/overview/images/kubedb_plus_kubestash.svg @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/guides/druid/backup/overview/images/restore_overview.svg b/docs/guides/druid/backup/overview/images/restore_overview.svg new file mode 100644 index 000000000..f3da12dfc --- /dev/null +++ b/docs/guides/druid/backup/overview/images/restore_overview.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/guides/druid/backup/overview/index.md b/docs/guides/druid/backup/overview/index.md new file mode 100644 index 000000000..0b210025a --- /dev/null +++ b/docs/guides/druid/backup/overview/index.md @@ -0,0 +1,97 @@ +--- +title: Backup & Restore Druid Overview +menu: + docs_{{ .version }}: + identifier: guides-druid-backup-overview + name: Overview + parent: guides-druid-backup + weight: 10 +menu_name: docs_{{ .version }} +--- + +> New to KubeDB? Please start [here](/docs/README.md). + +{{< notice type="warning" message="Please install [KubeStash](https://kubestash.com/docs/latest/setup/install/kubestash/) to try this feature. Database backup with KubeStash is already included in the KubeDB license. So, you don't need a separate license for KubeStash." >}} + +# Druid Backup & Restore Overview + +KubeDB also uses [KubeStash](https://kubestash.com) to backup and restore databases. KubeStash by AppsCode is a cloud native data backup and recovery solution for Kubernetes workloads and databases. KubeStash utilizes [restic](https://github.com/restic/restic) to securely backup stateful applications to any cloud or on-prem storage backends (for example, S3, GCS, Azure Blob storage, Minio, NetApp, Dell EMC etc.). + +
+  KubeDB + KubeStash +
Fig: Backup KubeDB Databases Using KubeStash
+
+ +## How Backup Works + +The following diagram shows how KubeStash takes backup of a `Druid` database. Open the image in a new tab to see the enlarged version. + +
+  Druid Backup Overview +
Fig: Druid Backup Overview
+
+ +The backup process consists of the following steps: + +1. At first, a user creates a `Secret`. This secret holds the credentials to access the backend where the backed up data will be stored. + +2. Then, she creates a `BackupStorage` custom resource that specifies the backend information, along with the `Secret` containing the credentials needed to access the backend. + +3. KubeStash operator watches for `BackupStorage` custom resources. When it finds a `BackupStorage` object, it initializes the `BackupStorage` by uploading the `metadata.yaml` file to the specified backend. + +4. Next, she creates a `BackupConfiguration` custom resource that specifies the target database, addon information (including backup tasks), backup schedules, storage backends for storing the backup data, and other additional settings. + +5. KubeStash operator watches for `BackupConfiguration` objects. + +6. Once the KubeStash operator finds a `BackupConfiguration` object, it creates `Repository` with the information specified in the `BackupConfiguration`. + +7. KubeStash operator watches for `Repository` custom resources. When it finds the `Repository` object, it Initializes `Repository` by uploading `repository.yaml` file into the `spec.sessions[*].repositories[*].directory` path specified in `BackupConfiguration`. + +8. Then, it creates a `CronJob` for each session with the schedule specified in `BackupConfiguration` to trigger backup periodically. + +9. KubeStash operator triggers an instant backup as soon as the `BackupConfiguration` is ready. Backups are otherwise triggered by the `CronJob` based on the specified schedule. + +10. KubeStash operator watches for `BackupSession` custom resources. + +11. When it finds a `BackupSession` object, it creates a `Snapshot` custom resource for each `Repository` specified in the `BackupConfiguration`. + +12. Then it resolves the respective `Addon` and `Function` and prepares backup `Job` definition. + +13. Then, it creates the `Job` to backup the targeted `Druid` database. + +14. The backup `Job` reads necessary information (e.g. auth secret, port) to connect with the database from the `AppBinding` CR. It also reads backend information and access credentials from `BackupStorage` CR, Storage `Secret` and `Repository` path respectively. + +15. Then, the `Job` dumps the targeted `Druid` database and uploads the output to the backend. KubeStash pipes the output of dump command to uploading process. Hence, backup `Job` does not require a large volume to hold the entire dump output. + +16. After the backup process is completed, the backup `Job` updates the `status.components[dump]` field of the `Snapshot` resources with backup information of the target `Druid` database. + +## How Restore Process Works + +The following diagram shows how KubeStash restores backed up data into a `Druid` database. Open the image in a new tab to see the enlarged version. + +
+  Database Restore Overview +
Fig: Druid Restore Process Overview
+
+ +The restore process consists of the following steps: + +1. At first, a user creates a `Druid` database where the data will be restored or the user can use the same `Druid` database. + +2. Then, she creates a `RestoreSession` custom resource that specifies the target database where the backed-up data will be restored, addon information (including restore tasks), the target snapshot to be restored, the `Repository` containing that snapshot, and other additional settings. + +3. KubeStash operator watches for `RestoreSession` custom resources. + +4. When it finds a `RestoreSession` custom resource, it resolves the respective `Addon` and `Function` and prepares a restore `Job` definition. + +5. Then, it creates the `Job` to restore the target. + +6. The `Job` reads necessary information to connect with the database from respective `AppBinding` CR. It also reads backend information and access credentials from `Repository` CR and storage `Secret` respectively. + +7. Then, the `Job` downloads the backed up data from the backend and injects into the desired database. KubeStash pipes the downloaded data to the respective database tool to inject into the database. Hence, restore `Job` does not require a large volume to download entire backup data inside it. + +8. Finally, when the restore process is completed, the `Job` updates the `status.components[*]` field of the `RestoreSession` with restore information of the target database. + +## Next Steps + +- Backup a `Druid` database using KubeStash by following the guides from [here](/docs/guides/druid/backup/logical/index.md). diff --git a/docs/guides/druid/concepts/_index.md b/docs/guides/druid/concepts/_index.md index d2cd071f0..67c3be774 100755 --- a/docs/guides/druid/concepts/_index.md +++ b/docs/guides/druid/concepts/_index.md @@ -2,9 +2,9 @@ title: Druid Concepts menu: docs_{{ .version }}: - identifier: dr-concepts-druid + identifier: guides-druid-concepts name: Concepts - parent: dr-druid-guides + parent: guides-druid weight: 20 menu_name: docs_{{ .version }} --- diff --git a/docs/guides/druid/concepts/appbinding.md b/docs/guides/druid/concepts/appbinding.md index 42582c057..2b42174a1 100644 --- a/docs/guides/druid/concepts/appbinding.md +++ b/docs/guides/druid/concepts/appbinding.md @@ -2,9 +2,9 @@ title: AppBinding CRD menu: docs_{{ .version }}: - identifier: dr-appbinding-concepts + identifier: guides-druid-concepts-appbinding name: AppBinding - parent: dr-concepts-druid + parent: guides-druid-concepts weight: 20 menu_name: docs_{{ .version }} section_menu_id: guides diff --git a/docs/guides/druid/concepts/catalog.md b/docs/guides/druid/concepts/catalog.md index 5fb90a23e..57ef475dc 100644 --- a/docs/guides/druid/concepts/catalog.md +++ b/docs/guides/druid/concepts/catalog.md @@ -2,9 +2,9 @@ title: DruidVersion CRD menu: docs_{{ .version }}: - identifier: dr-catalog-concepts + identifier: guides-druid-concepts-catalog name: DruidVersion - parent: dr-concepts-druid + parent: guides-druid-concepts weight: 15 menu_name: docs_{{ .version }} section_menu_id: guides diff --git a/docs/guides/druid/concepts/druid.md b/docs/guides/druid/concepts/druid.md index a4fb307f9..41824e7cf 100644 --- a/docs/guides/druid/concepts/druid.md +++ b/docs/guides/druid/concepts/druid.md @@ -2,9 +2,9 @@ title: Druid CRD menu: docs_{{ .version }}: - identifier: dr-druid-concepts + identifier: guides-druid-concepts-druid name: Druid - parent: dr-concepts-druid + parent: guides-druid-concepts weight: 10 menu_name: docs_{{ .version }} section_menu_id: guides diff --git a/docs/guides/druid/quickstart/_index.md b/docs/guides/druid/quickstart/_index.md index 7f083cbbe..c99d5aad2 100644 --- a/docs/guides/druid/quickstart/_index.md +++ b/docs/guides/druid/quickstart/_index.md @@ -2,9 +2,9 @@ title: Druid Quickstart menu: docs_{{ .version }}: - identifier: dr-quickstart-druid + identifier: guides-druid-quickstart name: Quickstart - parent: dr-druid-guides + parent: guides-druid weight: 15 menu_name: docs_{{ .version }} --- diff --git a/docs/guides/druid/quickstart/overview/index.md b/docs/guides/druid/quickstart/overview/index.md index 2ac45392d..ee080604c 100644 --- a/docs/guides/druid/quickstart/overview/index.md +++ b/docs/guides/druid/quickstart/overview/index.md @@ -2,9 +2,9 @@ title: Druid Quickstart menu: docs_{{ .version }}: - identifier: dr-quickstart-quickstart + identifier: guides-druid-quickstart-overview name: Overview - parent: dr-quickstart-druid + parent: guides-druid-quickstart weight: 10 menu_name: docs_{{ .version }} section_menu_id: guides diff --git a/docs/guides/mysql/autoscaler/compute/cluster/index.md b/docs/guides/mysql/autoscaler/compute/cluster/index.md index 632d4f199..5f05af044 100644 --- a/docs/guides/mysql/autoscaler/compute/cluster/index.md +++ b/docs/guides/mysql/autoscaler/compute/cluster/index.md @@ -81,7 +81,7 @@ spec: Let's create the `MySQL` CRO we have shown above, ```bash -$ kubectl create -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/mysql/autoscaler/compute/cluster/examples/sample-mysql.yaml +$ kubectl create -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/mysql/autoscaler/compute/cluster/examples/sample-druid.yaml mysql.kubedb.com/sample-mysql created ``` diff --git a/docs/guides/mysql/autoscaler/storage/cluster/index.md b/docs/guides/mysql/autoscaler/storage/cluster/index.md index 8fb7466c3..c1fb35a72 100644 --- a/docs/guides/mysql/autoscaler/storage/cluster/index.md +++ b/docs/guides/mysql/autoscaler/storage/cluster/index.md @@ -87,7 +87,7 @@ spec: Let's create the `MySQL` CRO we have shown above, ```bash -$ kubectl create -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/mysql/autoscaler/storage/cluster/examples/sample-mysql.yaml +$ kubectl create -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/mysql/autoscaler/storage/cluster/examples/sample-druid.yaml mysql.kubedb.com/sample-mysql created ``` diff --git a/docs/guides/mysql/backup/auto-backup/index.md b/docs/guides/mysql/backup/auto-backup/index.md index 45962d044..f75b6a08d 100644 --- a/docs/guides/mysql/backup/auto-backup/index.md +++ b/docs/guides/mysql/backup/auto-backup/index.md @@ -153,7 +153,7 @@ Notice the `annotations` section. We are pointing to the `BackupBlueprint` that Let's create the above MySQL CRO, ```bash -❯ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/mysql/backup/auto-backup/examples/sample-mysql.yaml +❯ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/mysql/backup/auto-backup/examples/sample-druid.yaml mysql.kubedb.com/sample-mysql created ``` diff --git a/docs/guides/mysql/backup/standalone/index.md b/docs/guides/mysql/backup/standalone/index.md index 1a1fc084c..c64e62338 100644 --- a/docs/guides/mysql/backup/standalone/index.md +++ b/docs/guides/mysql/backup/standalone/index.md @@ -72,7 +72,7 @@ spec: Create the above `MySQL` CRD, ```bash -$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/mysql/backup/standalone/examples/sample-mysql.yaml +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/mysql/backup/standalone/examples/sample-druid.yaml mysql.kubedb.com/sample-mysql created ``` @@ -462,7 +462,7 @@ spec: Let's create the above database, ```bash -$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/mysql/backup/standalone/examples/restored-mysql.yaml +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/mysql/backup/standalone/examples/restored-druid.yaml mysql.kubedb.com/restored-mysql created ``` diff --git a/docs/guides/proxysql/autoscaler/compute/cluster/index.md b/docs/guides/proxysql/autoscaler/compute/cluster/index.md index 21c3e5cb9..ff34a6762 100644 --- a/docs/guides/proxysql/autoscaler/compute/cluster/index.md +++ b/docs/guides/proxysql/autoscaler/compute/cluster/index.md @@ -63,7 +63,7 @@ spec: ``` ```bash -$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/proxysql/autoscaler/cluster/examples/sample-mysql.yaml +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/proxysql/autoscaler/cluster/examples/sample-druid.yaml mysql.kubedb.com/mysql-server created ``` diff --git a/docs/guides/proxysql/clustering/proxysql-cluster/index.md b/docs/guides/proxysql/clustering/proxysql-cluster/index.md index addf81539..de56356c5 100644 --- a/docs/guides/proxysql/clustering/proxysql-cluster/index.md +++ b/docs/guides/proxysql/clustering/proxysql-cluster/index.md @@ -60,7 +60,7 @@ spec: ``` ```bash -$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/proxysql/clustering/proxysql-cluster/examples/sample-mysql.yaml +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/proxysql/clustering/proxysql-cluster/examples/sample-druid.yaml mysql.kubedb.com/mysql-server created ``` diff --git a/docs/guides/proxysql/reconfigure/cluster/index.md b/docs/guides/proxysql/reconfigure/cluster/index.md index 7021bf7ae..00f08466d 100644 --- a/docs/guides/proxysql/reconfigure/cluster/index.md +++ b/docs/guides/proxysql/reconfigure/cluster/index.md @@ -62,7 +62,7 @@ spec: ``` ```bash -$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/proxysql/reconfigure/cluster/examples/sample-mysql.yaml +$ kubectl apply -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/proxysql/reconfigure/cluster/examples/sample-druid.yaml mysql.kubedb.com/mysql-server created ``` diff --git a/docs/guides/proxysql/scaling/horizontal-scaling/cluster/index.md b/docs/guides/proxysql/scaling/horizontal-scaling/cluster/index.md index c0a015c62..f1f43fd22 100644 --- a/docs/guides/proxysql/scaling/horizontal-scaling/cluster/index.md +++ b/docs/guides/proxysql/scaling/horizontal-scaling/cluster/index.md @@ -61,7 +61,7 @@ spec: ``` ```bash -$ kubectl create -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/proxysql/scaling/horizontal-scaling/cluster/example/sample-mysql.yaml +$ kubectl create -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/proxysql/scaling/horizontal-scaling/cluster/example/sample-druid.yaml mysql.kubedb.com/mysql-server created ``` diff --git a/docs/guides/proxysql/scaling/vertical-scaling/cluster/index.md b/docs/guides/proxysql/scaling/vertical-scaling/cluster/index.md index c2c4e1320..4a3de9969 100644 --- a/docs/guides/proxysql/scaling/vertical-scaling/cluster/index.md +++ b/docs/guides/proxysql/scaling/vertical-scaling/cluster/index.md @@ -60,7 +60,7 @@ spec: ``` ```bash -$ kubectl create -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/proxysql/scaling/vertical-scaling/cluster/example/sample-mysql.yaml +$ kubectl create -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/proxysql/scaling/vertical-scaling/cluster/example/sample-druid.yaml mysql.kubedb.com/mysql-server created ``` diff --git a/docs/guides/proxysql/tls/configure/index.md b/docs/guides/proxysql/tls/configure/index.md index e9f8f54f0..d767d8c21 100644 --- a/docs/guides/proxysql/tls/configure/index.md +++ b/docs/guides/proxysql/tls/configure/index.md @@ -64,7 +64,7 @@ spec: ``` ```bash -$ kubectl create -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/proxysql/tls/configure/examples/sample-mysql.yaml +$ kubectl create -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/proxysql/tls/configure/examples/sample-druid.yaml mysql.kubedb.com/mysql-server created ``` diff --git a/docs/guides/proxysql/update-version/cluster/index.md b/docs/guides/proxysql/update-version/cluster/index.md index 07c296dca..2500d7cd3 100644 --- a/docs/guides/proxysql/update-version/cluster/index.md +++ b/docs/guides/proxysql/update-version/cluster/index.md @@ -60,7 +60,7 @@ spec: ``` ```bash -$ kubectl create -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/proxysql/update-version/cluster/examples/sample-mysql.yaml +$ kubectl create -f https://github.com/kubedb/docs/raw/{{< param "info.version" >}}/docs/guides/proxysql/update-version/cluster/examples/sample-druid.yaml mysql.kubedb.com/mysql-server created ```