diff --git a/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.constants.ts b/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.constants.ts index b1a43aa1cbd2c..5c358d9fd3270 100644 --- a/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.constants.ts +++ b/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.constants.ts @@ -1147,14 +1147,39 @@ export const database: NavMenuConstant = { items: [ { name: 'Overview', url: '/guides/database/replication' }, { - name: 'Setting up replication', - url: '/guides/database/replication/setting-up-replication' as `/${string}`, + name: 'ETL Replication', + url: '/guides/database/replication/etl-replication-setup' as `/${string}`, + items: [ + { + name: 'Setting up', + url: '/guides/database/replication/etl-replication-setup' as `/${string}`, + }, + { + name: 'Destinations', + url: '/guides/database/replication/etl-destinations' as `/${string}`, + }, + { + name: 'Monitoring', + url: '/guides/database/replication/etl-replication-monitoring' as `/${string}`, + }, + { name: 'FAQ', url: '/guides/database/replication/etl-replication-faq' }, + ], }, { - name: 'Monitoring replication', - url: '/guides/database/replication/monitoring-replication' as `/${string}`, + name: 'Manual Replication', + url: '/guides/database/replication/manual-replication-setup' as `/${string}`, + items: [ + { + name: 'Setting up', + url: '/guides/database/replication/manual-replication-setup' as `/${string}`, + }, + { + name: 'Monitoring', + url: '/guides/database/replication/manual-replication-monitoring' as `/${string}`, + }, + { name: 'FAQ', url: '/guides/database/replication/manual-replication-faq' }, + ], }, - { name: 'FAQ', url: '/guides/database/replication/faq' }, ], }, { diff --git a/apps/docs/content/guides/database/replication.mdx b/apps/docs/content/guides/database/replication.mdx index 10793ab2abb02..d2375d88e731c 100644 --- a/apps/docs/content/guides/database/replication.mdx +++ b/apps/docs/content/guides/database/replication.mdx @@ -1,39 +1,55 @@ --- -title: 'Replication and change data capture' -description: 'An introduction to logical replication and change data capture' +id: 'replication' +title: 'Database Replication' +description: 'Replicate your database to external destinations using ETL or manual replication.' +subtitle: 'An introduction to database replication and change data capture.' +sidebar_label: 'Overview' --- Replication is the process of copying changes from your database to another location. It's also referred to as change data capture (CDC): capturing all the changes that occur to your data. ## Use cases -You might use replication for: +You might use database replication for: - **Analytics and Data Warehousing**: Replicate your operational database to analytics platforms for complex analysis without impacting your application's performance. - **Data Integration**: Keep your data synchronized across different systems and services in your tech stack. - **Backup and Disaster Recovery**: Maintain up-to-date copies of your data in different locations. -- **Read Scaling**: Distribute read operations across multiple database instances to improve performance. -## Replication in Postgres +## Replication methods -Postgres comes with built-in support for replication via publications and replication slots. Refer to the [Concepts and terms](#concepts-and-terms) section to learn how replication works. +Supabase supports two methods for replicating your database to external destinations: -## Setting up and monitoring replication in Supabase +### ETL replication -- [Setting up replication](/docs/guides/database/replication/setting-up-replication) -- [Monitoring replication](/docs/guides/database/replication/monitoring-replication) + - - -If you want to set up a read replica, see [Read Replicas](/docs/guides/platform/read-replicas) instead. If you want to sync your data in real time to a client such as a browser or mobile app, see [Realtime](/docs/guides/realtime) instead. For configuring replication to an ETL destination, use the [Dashboard](/dashboard/project/_/database/replication). +ETL Replication is currently in private alpha. Access is limited and features may change. +Use Supabase ETL to automatically replicate data to supported systems. + +- [Set up ETL Replication](/docs/guides/database/replication/etl-replication-setup) + +### Manual replication + +Configure your own replication using external tools and Postgres's native logical replication. This gives you full control over the replication process and allows you to use any tool that supports Postgres logical replication. + +- [Set up Manual Replication](/docs/guides/database/replication/manual-replication-setup) + +## Related features + +Choose the data syncing method based on your use case: + +- For realtime features and syncing data to clients (browsers, mobile apps), see [Realtime](/docs/guides/realtime) +- For deploying read-only databases across multiple regions, see [Read Replicas](/docs/guides/platform/read-replicas) + ## Concepts and terms ### Write-Ahead Log (WAL) -Postgres uses a system called the Write-Ahead Log (WAL) to manage changes to the database. As you make changes, they are appended to the WAL (which is a series of files (also called "segments"), where the file size can be specified). Once one segment is full, Postgres will start appending to a new segment. After a period of time, a checkpoint occurs and Postgres synchronizes the WAL with your database. Once the checkpoint is complete, then the WAL files can be removed from disk and free up space. +Postgres uses a system called the Write-Ahead Log (WAL) to manage changes to the database. As you make changes, they are appended to the WAL, which is a series of files (also called "segments") where the file size can be specified. Once one segment is full, Postgres will start appending to a new segment. After a period of time, a checkpoint occurs and Postgres synchronizes the WAL with your database. Once the checkpoint is complete, then the WAL files can be removed from disk and free up space. ### Logical replication and WAL diff --git a/apps/docs/content/guides/database/replication/etl-bigquery.mdx b/apps/docs/content/guides/database/replication/etl-bigquery.mdx new file mode 100644 index 0000000000000..1547272f1e9e5 --- /dev/null +++ b/apps/docs/content/guides/database/replication/etl-bigquery.mdx @@ -0,0 +1,126 @@ +--- +id: 'etl-bigquery' +title: 'ETL to BigQuery' +description: 'Replicate your Supabase database to Google BigQuery using ETL Replication.' +subtitle: 'Stream data changes to BigQuery in real-time.' +sidebar_label: 'BigQuery' +--- + + + +ETL Replication is currently in private alpha. Access is limited and features may change. + + + +BigQuery is Google's fully managed data warehouse. ETL Replication allows you to automatically sync your Supabase database tables to BigQuery for analytics and reporting. + + + +This page covers BigQuery-specific configuration. For complete setup instructions including publications, general settings, and pipeline management, see the [ETL Replication Setup guide](/docs/guides/database/replication/etl-replication-setup). + + + +### Setup + +Setting up BigQuery replication requires preparing your GCP resources, then configuring BigQuery as an ETL destination. + +#### Step 1: Prepare GCP resources + +Before configuring BigQuery as a destination, set up the following in Google Cloud Platform: + +1. **Google Cloud Platform (GCP) account**: [Sign up for GCP](https://cloud.google.com/gcp) if you don't have one +2. **BigQuery dataset**: Create a [BigQuery dataset](https://cloud.google.com/bigquery/docs/datasets-intro) in your GCP project +3. **GCP service account key**: Create a [service account](https://cloud.google.com/iam/docs/keys-create-delete) with the **BigQuery Data Editor** role and download the JSON key file + +#### Step 2: Add BigQuery as an ETL destination + +After preparing your GCP resources, configure BigQuery as an ETL destination: + +1. Navigate to [Database](/dashboard/project/_/database/etl) → **ETL Replication** in your Supabase Dashboard +2. Click **Add destination** +3. Configure the destination: + + BigQuery Configuration Settings + + - **Destination type**: Select **BigQuery** + - **Project ID**: Your BigQuery project identifier (found in the GCP Console) + - **Dataset ID**: The name of your BigQuery dataset (without the project ID) + + + + In the GCP Console, the dataset is shown as `project-id.dataset-id`. Enter only the part after the dot. For example, if you see `my-project.my_dataset`, enter `my_dataset`. + + + + - **Service Account Key**: Your GCP service account key in JSON format. The service account must have the following permissions: + - `bigquery.datasets.get` + - `bigquery.tables.create` + - `bigquery.tables.get` + - `bigquery.tables.getData` + - `bigquery.tables.update` + - `bigquery.tables.updateData` + +4. Complete the remaining configuration following the [ETL Replication Setup guide](/docs/guides/database/replication/etl-replication-setup) + +### How it works + +Once configured, ETL Replication to BigQuery: + +1. Captures changes from your Postgres database (INSERT, UPDATE, DELETE operations) +2. Batches changes for optimal performance +3. Creates BigQuery tables automatically to match your Postgres schema +4. Streams data to BigQuery with CDC metadata + + + +Due to ingestion latency in BigQuery's streaming API, there may be a delay (typically seconds to minutes) in data appearing. This is normal and expected for BigQuery's architecture. + + + +#### BigQuery CDC format + +BigQuery tables include additional columns for change tracking: + +- `_change_type`: The type of change (`INSERT`, `UPDATE`, `DELETE`) +- `_commit_timestamp`: When the change was committed in Postgres +- `_stream_id`: Internal identifier for the replication stream + +### Querying replicated data + +Once replication is running, you can query your data in BigQuery: + +```sql +-- Query the replicated table +SELECT * FROM `your-project.your_dataset.users` +WHERE created_at > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY); + +-- View CDC changes +SELECT + _change_type, + _commit_timestamp, + id, + name, + email +FROM `your-project.your_dataset.users` +ORDER BY _commit_timestamp DESC +LIMIT 100; +``` + +### Limitations + +BigQuery-specific limitations: + +- **Ingestion latency**: BigQuery's streaming API has inherent latency (typically seconds to minutes) +- **Row size**: Limited to 10 MB per row due to BigQuery Storage Write API constraints + +For general ETL Replication limitations that apply to all destinations, see the [ETL Replication Setup guide](/docs/guides/database/replication/etl-replication-setup#limitations). + +### Next steps + +- [Set up ETL Replication](/docs/guides/database/replication/etl-replication-setup) +- [Monitor ETL Replication](/docs/guides/database/replication/etl-replication-monitoring) +- [View ETL Replication FAQ](/docs/guides/database/replication/etl-replication-faq) diff --git a/apps/docs/content/guides/database/replication/etl-destinations.mdx b/apps/docs/content/guides/database/replication/etl-destinations.mdx new file mode 100644 index 0000000000000..e958d51d4f748 --- /dev/null +++ b/apps/docs/content/guides/database/replication/etl-destinations.mdx @@ -0,0 +1,32 @@ +--- +id: 'etl-destinations' +title: 'ETL Destinations' +description: 'Choose where to replicate your database with ETL Replication.' +subtitle: 'Available destinations for ETL Replication.' +sidebar_label: 'Destinations' +--- + + + +ETL Replication is currently in private alpha. Access is limited and features may change. + + + +ETL Replication supports multiple destination types for syncing your database. Choose the destination that best fits your analytics and integration needs. + + + +Some destinations may not be available for all users. Additional destinations are planned for the future, but we don't have public timelines to share at this time. + + + +### Available destinations + +| Destination | Description | Configuration | +| ------------------------------- | ---------------------------------------------- | -------------------------------------------------------------------- | +| **Iceberg (Analytics Buckets)** | Apache Iceberg tables in S3-compatible storage | [Configure Iceberg →](/docs/guides/database/replication/etl-iceberg) | + +### Next steps + +- [Set up ETL Replication](/docs/guides/database/replication/etl-replication-setup) +- [Monitor ETL Replication](/docs/guides/database/replication/etl-replication-monitoring) diff --git a/apps/docs/content/guides/database/replication/etl-iceberg.mdx b/apps/docs/content/guides/database/replication/etl-iceberg.mdx new file mode 100644 index 0000000000000..5db6f621b66de --- /dev/null +++ b/apps/docs/content/guides/database/replication/etl-iceberg.mdx @@ -0,0 +1,88 @@ +--- +id: 'etl-iceberg' +title: 'ETL to Iceberg (Analytics Buckets)' +description: 'Replicate your Supabase database to Iceberg format using Analytics Buckets.' +subtitle: 'Stream data to Analytics Buckets.' +sidebar_label: 'Iceberg' +--- + + + +ETL Replication is currently in private alpha. Access is limited and features may change. + + + + + +Iceberg replication is currently incomplete. It provides an append-only log listing all your data changes with an additional column explaining the type of operation (INSERT, UPDATE, DELETE). + + + +Apache Iceberg is an open table format for analytic datasets. ETL Replication to Iceberg uses Supabase [Analytics Buckets](/docs/guides/storage/analytics) to store your replicated data. + + + +This page covers Iceberg-specific configuration. For complete setup instructions including publications, general settings, and pipeline management, see the [ETL Replication Setup guide](/docs/guides/database/replication/etl-replication-setup). + + + +### Setup + +Setting up Iceberg replication requires two steps: creating an Analytics Bucket, then configuring it as an ETL destination. + +#### Step 1: Create an Analytics bucket + +First, create an Analytics Bucket to store your replicated data: + +1. Navigate to [Storage](/dashboard/project/_/storage/buckets) → **Analytics** in your Supabase Dashboard +2. Click **New bucket** + + Create New Analytics Bucket + +#### Step 2: Add Iceberg as an ETL destination + +After clicking **New bucket**, fill in the bucket details and copy the credentials: + +1. Fill in the bucket details: + + Analytics Bucket Details + + - **Name**: A unique name for your bucket + - **Region**: Select the region where your data will be stored + +2. Click **Create bucket** +3. **Copy the credentials** displayed after bucket creation (Catalog Token, S3 Access Key ID, S3 Secret Access Key). You'll need these in the next steps. +4. Navigate to [Database](/dashboard/project/_/database/etl) → **ETL Replication** in your Supabase Dashboard +5. Click **Add destination** +6. Configure the destination: + - **Destination type**: Select **Iceberg (Analytics Bucket)** + - **Bucket**: The name of your Analytics Bucket from Step 1 + - **Namespace**: The schema name where your tables will be replicated (e.g., `public`) + - **Catalog Token**: Authentication token for accessing the Iceberg catalog (copied in Step 3) + - **S3 Access Key ID**: Access key for S3-compatible storage (copied in Step 3) + - **S3 Secret Access Key**: Secret key for S3-compatible storage (copied in Step 3) +7. Complete the remaining configuration following the [ETL Replication Setup guide](/docs/guides/database/replication/etl-replication-setup) + +For more information about Analytics Buckets, see the [Analytics Buckets documentation](/docs/guides/storage/analytics). + +### Limitations + +Iceberg-specific limitations: + +- **Append-only log**: Currently provides an append-only log format rather than a full table representation + +For general ETL Replication limitations that apply to all destinations, see the [ETL Replication Setup guide](/docs/guides/database/replication/etl-replication-setup#limitations). + +### Next steps + +- [Set up ETL Replication](/docs/guides/database/replication/etl-replication-setup) +- [Monitor ETL Replication](/docs/guides/database/replication/etl-replication-monitoring) +- [View ETL Replication FAQ](/docs/guides/database/replication/etl-replication-faq) diff --git a/apps/docs/content/guides/database/replication/etl-replication-faq.mdx b/apps/docs/content/guides/database/replication/etl-replication-faq.mdx new file mode 100644 index 0000000000000..e76c1377c5c5a --- /dev/null +++ b/apps/docs/content/guides/database/replication/etl-replication-faq.mdx @@ -0,0 +1,100 @@ +--- +id: 'etl-replication-faq' +title: 'ETL Replication FAQ' +description: 'Frequently asked questions about ETL replication.' +subtitle: 'Common questions and answers about ETL replication.' +sidebar_label: 'FAQ' +--- + + + +ETL Replication is currently in private alpha. Access is limited and features may change. + + + +## What destinations are supported? + +ETL Replication currently supports Iceberg (Analytics Buckets). See the [ETL Destinations guide](/docs/guides/database/replication/etl-destinations) for details. + +## Why is a table not being replicated? + +Common reasons: + +- **Missing primary key**: Tables must have a primary key to be replicated +- **Not in publication**: Ensure the table is included in your publication +- **Unsupported data types**: Tables with custom data types are not supported +- **Partitioned tables**: Not currently supported + +Check your publication settings and verify your table meets the requirements. + +## Why aren't publication changes reflected after adding or removing tables? + +After modifying your publication, you must restart the ETL pipeline for changes to take effect. See [Adding or removing tables](/docs/guides/database/replication/etl-replication-setup#adding-or-removing-tables) for instructions. + +## Why is a pipeline in failed state? + +Pipeline failures occur during the streaming phase when an error happens while replicating live data. This prevents data loss. To recover: + +1. Check the error message by hovering over the **Failed** status +2. Click **View status** for detailed information +3. Fix the underlying issue (e.g., schema mismatches, destination connectivity) +4. Restart the pipeline + +See [Handling errors](/docs/guides/database/replication/etl-replication-monitoring#handling-errors) for more details. + +## Why is a table in error state? + +Table errors occur during the copy phase. To recover, click **View status**, find the affected table, and reset the table state. This will restart the table copy from the beginning. + +## How to verify replication is working + +Check the [Database](/dashboard/project/_/database/etl) → **ETL Replication** page: + +1. Verify your pipeline shows **Running** status +2. Click **View status** to check table states +3. Ensure all tables show **Live** state (actively replicating) +4. Monitor replication lag metrics + +See the [ETL Replication Monitoring guide](/docs/guides/database/replication/etl-replication-monitoring) for comprehensive monitoring instructions. + +## What are the main limitations? + +Key limitations to be aware of: + +- **Primary keys**: Required on all tables +- **Custom data types**: Not supported +- **Schema changes**: Not automatically handled + +Destination-specific limitations may also apply. See the [Iceberg](/docs/guides/database/replication/etl-iceberg#limitations) destination page for details. + +## How to stop or pause replication + +You can manage your pipeline using the actions menu in the destinations list. See [Managing your pipeline](/docs/guides/database/replication/etl-replication-setup#managing-your-pipeline) for details on available actions. + +Note: Stopping replication will cause changes to queue up in the WAL. + +## Can data duplicates occur during pipeline operations? + +Yes, data duplicates can occur in certain scenarios when stopping a pipeline. + +When you stop a pipeline (for restarts or updates), ETL Replication tries to finish processing any transactions that are currently being sent to your destination. It waits up to a few minutes to allow these in-progress transactions to complete cleanly before stopping. + +However, if a transaction in your database takes longer than this waiting period to complete, the pipeline will stop before that entire transaction has been fully processed. When the pipeline starts again, it must restart the incomplete transaction from the beginning to maintain transaction boundaries, which results in some data being sent twice to your destination. + +**Understanding transaction boundaries**: A transaction is a group of database changes that happen together (for example, all changes within a `BEGIN...COMMIT` block). ETL Replication must process entire transactions - it cannot process part of a transaction, stop, and then continue from the middle. This means if a transaction is interrupted, the whole transaction must be replayed when the pipeline resumes. + +**Example scenario**: Suppose you have a batch operation that updates 10,000 rows within a single transaction. If this operation takes 10 minutes to complete and you stop the pipeline after 5 minutes (when 5,000 rows have been processed), the pipeline cannot resume from row 5,001. Instead, when it restarts, it must reprocess all 10,000 rows from the beginning, resulting in the first 5,000 rows being sent to your destination twice. + +**Important**: We are not currently planning to implement automatic deduplication. If your use case requires guaranteed exactly-once delivery, you should implement deduplication logic in your downstream systems based on primary keys or other unique identifiers. + +## Where to find replication logs + +Navigate to [Logs](/dashboard/project/_/logs/explorer) → **ETL Replication** to see all pipeline logs. Logs contain diagnostic information. If you're experiencing issues, contact support with your error details. + +## How to get help + +If you need assistance: + +1. Check the [ETL Replication Setup guide](/docs/guides/database/replication/etl-replication-setup) and [ETL Replication Monitoring guide](/docs/guides/database/replication/etl-replication-monitoring) +2. Review this FAQ for common issues +3. Contact support with your error details and logs diff --git a/apps/docs/content/guides/database/replication/etl-replication-monitoring.mdx b/apps/docs/content/guides/database/replication/etl-replication-monitoring.mdx new file mode 100644 index 0000000000000..f524d326db4e7 --- /dev/null +++ b/apps/docs/content/guides/database/replication/etl-replication-monitoring.mdx @@ -0,0 +1,180 @@ +--- +id: 'etl-replication-monitoring' +title: 'ETL Replication Monitoring' +description: 'Monitor the status and health of your ETL replication pipelines.' +subtitle: 'Track replication status, view logs, and troubleshoot issues.' +sidebar_label: 'Monitoring' +--- + + + +ETL Replication is currently in private alpha. Access is limited and features may change. + + + +After setting up ETL replication, you can monitor the status and health of your replication pipelines directly from the Supabase Dashboard. The pipeline is the active process that continuously replicates changes from your database to your destination. + +### Viewing pipeline status + +To monitor your ETL replication pipelines: + +1. Navigate to the [Database](/dashboard/project/_/database/etl) section in your Supabase Dashboard +2. Select the **ETL Replication** tab +3. You'll see a list of all your destinations with their pipeline status + +ETL Destinations List + +#### Pipeline states + +Each destination shows its pipeline in one of these states: + +| State | Description | +| ------------ | ------------------------------------------------------------------------------- | +| **Stopped** | Pipeline is not running | +| **Starting** | Pipeline is being started | +| **Running** | Pipeline is actively replicating data | +| **Stopping** | Pipeline is being stopped | +| **Failed** | Pipeline has encountered an error (hover over the status to view error details) | + +### Viewing detailed pipeline metrics + +For detailed information about a specific pipeline, click **View status** on the destination. This opens the pipeline status page where you can monitor replication performance and table states. + +Pipeline Status View + +#### Replication lag metrics + +The status page shows replication lag metrics that help you determine how fast your pipeline is replicating data. These metrics are loaded directly from Postgres itself. + +#### Table states + +The pipeline status page also shows the state of individual tables being replicated. Each table can be in one of these states: + +| State | Description | +| ----------- | ---------------------------------------------------------------------- | +| **Queue** | Table is getting ready to be copied | +| **Copying** | Initial snapshot of the table is being copied | +| **Copied** | Table snapshot is complete and getting ready for real-time replication | +| **Live** | Table is now replicating data in near real-time | +| **Error** | Table has experienced an error during replication | + +### Handling errors + +Errors can occur at two levels: per table or per pipeline. + +#### Table errors + +Table errors occur during the copy phase and affect individual tables. These errors can be retried without stopping the entire pipeline. + +Table Error Details + +**Viewing table error details:** + +1. Click **View status** on your destination +2. Check the table states section to identify tables in **Error** state +3. Review the error message for that specific table + +**Recovering from table errors:** + +When a table encounters an error during the copy phase, you can reset the table state (for some errors). This will restart the table copy from the beginning. + +#### Pipeline errors + +Pipeline errors occur during the streaming phase (Live state) and affect the entire pipeline. When streaming data, if an error occurs, the entire pipeline will stop and enter a **Failed** state. This prevents data loss by ensuring no changes are skipped. + +Pipeline Error Details + +**Viewing pipeline error details:** + +1. Hover over the **Failed** status in the destinations list to see a quick error summary +2. Click **View status** for comprehensive error information +3. Navigate to [Logs](/dashboard/project/_/logs/explorer) → **ETL Replication** for detailed error logs + +**Recovering from pipeline errors:** + +To recover from a pipeline error, you'll need to: + +1. Investigate the root cause using the error details and logs +2. Fix the underlying issue (e.g., destination connectivity, schema compatibility) +3. Restart the pipeline from the destinations list + +### Viewing logs + +To see detailed logs for all your ETL replication pipelines: + +1. Navigate to [Logs](/dashboard/project/_/logs/explorer) in your Supabase Dashboard +2. Select **ETL Replication** from the log source filter +3. You'll see all logs from your replication pipelines + +ETL Replication Logs + + + +Logs contain diagnostic information that may be too technical for most users. If you're experiencing issues with ETL replication, reaching out to support with your error details is recommended. + + + +### Common monitoring scenarios + +#### Checking if replication is healthy + +1. Navigate to [Database](/dashboard/project/_/database/etl) → **ETL Replication** +2. Verify your destination shows **Running** status +3. Click **View status** to check replication lag and table states +4. Ensure all tables show **Live** state + +#### Investigating errors + +If you see a **Failed** status: + +1. Hover over the status to see the error summary +2. Click **View status** to see detailed error information +3. Check table states to identify which tables are affected +4. Navigate to [Logs](/dashboard/project/_/logs/explorer) → **ETL Replication** for full error details +5. For table errors, attempt to reset the affected tables + +#### Monitoring performance + +To ensure optimal performance: + +1. Regularly check replication lag metrics in the pipeline status view +2. Monitor table states to ensure tables are staying in **Live** state +3. Review logs for warnings or performance issues +4. If lag is consistently high, consider adjusting your publication or batch wait time settings + +### Troubleshooting + +If you notice issues with your ETL replication: + +1. **Check pipeline state**: Ensure the pipeline is in **Running** state +2. **Review table states**: Identify tables in **Error** state +3. **Check logs**: Navigate to Logs → ETL Replication for detailed error information +4. **Verify publication**: Ensure your publication is properly configured +5. **Monitor replication lag**: High lag may indicate performance issues + +For more troubleshooting tips, see the [ETL Replication FAQ](/docs/guides/database/replication/etl-replication-faq). + +### Next steps + +- [Set up ETL Replication](/docs/guides/database/replication/etl-replication-setup) +- [View ETL Replication FAQ](/docs/guides/database/replication/etl-replication-faq) diff --git a/apps/docs/content/guides/database/replication/etl-replication-setup.mdx b/apps/docs/content/guides/database/replication/etl-replication-setup.mdx new file mode 100644 index 0000000000000..34b3d1ea4cbf9 --- /dev/null +++ b/apps/docs/content/guides/database/replication/etl-replication-setup.mdx @@ -0,0 +1,260 @@ +--- +id: 'etl-replication-setup' +title: 'ETL Replication Setup' +description: 'Set up ETL replication to automatically sync your database to external destinations.' +subtitle: 'Configure publications and destinations for ETL replication.' +sidebar_label: 'Setting up' +--- + + + +ETL Replication is currently in private alpha. Access is limited and features may change. + + + +ETL Replication requires two main components: a **publication** (source) and a **destination**. Follow these steps to set up your replication pipeline. + + + +If you already have a publication set up, you can skip to [Step 2: Enable ETL Replication](#step-2-enable-etl-replication). + + + +### Step 1: Create a publication + +A publication defines which tables and change types will be replicated. You need to create a publication using SQL. + +#### Creating a publication + +The following SQL examples assume you have `users` and `orders` tables in your database. + +##### Publication for specific tables + +```sql +-- Create publication for both tables +create publication pub_users_orders +for table users, orders; +``` + +This publication will track all changes (INSERT, UPDATE, DELETE) for both the `users` and `orders` tables. + +##### Publication for all tables in a schema + +```sql +-- Create a publication for all tables in the public schema +create publication pub_all_public for tables in schema public; +``` + +This will track changes for all existing and future tables in the `public` schema. + +##### Publication for all tables + +```sql +-- Create a publication for all tables +create publication pub_all_tables for all tables; +``` + +This will track changes for all tables in your database. + +#### Advanced publication options + +##### Selecting specific columns + +You can replicate only a subset of columns from a table: + +```sql +-- Replicate only specific columns from the users table +create publication pub_users_subset +for table users (id, email, created_at); +``` + +This will only replicate the `id`, `email`, and `created_at` columns from the `users` table. + +##### Filtering rows with a predicate + +You can filter which rows to replicate using a WHERE clause: + +```sql +-- Only replicate active users +create publication pub_active_users +for table users where (status = 'active'); + +-- Only replicate recent orders +create publication pub_recent_orders +for table orders where (created_at > '2024-01-01'); +``` + +#### Viewing publications in the Dashboard + +After creating a publication via SQL, you can view it in the Supabase Dashboard: + +1. Navigate to **Database** → [Publications](/dashboard/project/_/database/publications) in your Supabase Dashboard +2. You'll see all your publications listed with their tables + +### Step 2: Enable ETL replication + +Before adding destinations, you need to enable ETL replication for your project: + +1. Navigate to the [Database](/dashboard/project/_/database/etl) section in your Supabase Dashboard +2. Select the **ETL Replication** tab +3. Click **Enable replication** to activate ETL replication for your project + +Enable ETL Replication + +### Step 3: Add a destination + +Once replication is enabled and you have a publication, you can add a destination. The destination is where your replicated data will be stored, while the pipeline is the active process that continuously replicates changes from your database to that destination. + +#### Available destinations + +For a complete list of available destinations and how to choose the right one for your needs, see [ETL Destinations](/docs/guides/database/replication/etl-destinations). + +#### Configuration + +1. In the ETL Replication tab, click **Add destination** +2. Configure the destination settings: + + **General Settings:** + + - **Destination name**: A name to identify this destination (e.g., "Analytics Warehouse") + - **Publication**: The publication to replicate data from (created in Step 1) + - **Destination type**: Choose from available destination types + + **Destination-specific settings:** + Each destination type requires different configuration. See the [ETL Destinations guide](/docs/guides/database/replication/etl-destinations) for configuration details specific to your chosen destination. + +Add ETL Destination + +3. Configure **Advanced Settings** (optional): + + - **Batch wait time (milliseconds)**: How long to wait for more changes before sending a batch. We recommend leaving this at the default value for optimal performance. Setting this too low can result in too much traffic and less efficient batching. + +4. Click **Create and start** to begin replication + +### Step 4: Monitor your pipeline + +After creating a destination, the pipeline will start and appear in the destinations list. You can monitor the pipeline's status and performance from the Dashboard. + +ETL Destinations List + +For comprehensive monitoring instructions including pipeline states, metrics, and logs, see the [ETL Replication Monitoring guide](/docs/guides/database/replication/etl-replication-monitoring). + +### Managing your pipeline + +You can manage your pipeline from the destinations list using the actions menu. + +Pipeline Actions + +Available actions: + +- **Start**: Begin replication for a stopped pipeline +- **Stop**: Pause replication (changes will queue up in the WAL) +- **Restart**: Stop and start the pipeline (required after publication changes) +- **Edit destination**: Modify destination settings like credentials or advanced options +- **Delete**: Remove the destination and permanently stop replication + +### Adding or removing tables + +If you need to modify which tables are replicated after your ETL pipeline is already running, follow these steps: + + + +If your publication uses `FOR ALL TABLES` or `FOR TABLES IN SCHEMA`, new tables in that scope are automatically included in the publication. However, you still **must restart the ETL pipeline** for the changes to take effect. + + + +#### Adding tables to replication + +1. Add the table to your publication using SQL: + + ```sql + -- Add a single table to an existing publication + alter publication pub_users_orders add table products; + + -- Or add multiple tables at once + alter publication pub_users_orders add table products, categories; + ``` + +2. **Restart the ETL pipeline** using the actions menu (see [Managing your pipeline](#managing-your-pipeline)) for the changes to take effect. + +#### Removing tables from replication + +1. Remove the table from your publication using SQL: + + ```sql + -- Remove a single table from a publication + alter publication pub_users_orders drop table orders; + + -- Or remove multiple tables at once + alter publication pub_users_orders drop table orders, products; + ``` + +2. **Restart the ETL pipeline** using the actions menu (see [Managing your pipeline](#managing-your-pipeline)) for the changes to take effect. + +### How it works + +Once configured, ETL Replication: + +1. **Captures** changes from your database using the publication +2. **Loads** the data to your destination in near real-time batches + +Changes are sent in batches to optimize performance and reduce costs. The batch size and timing can be adjusted using the advanced settings. + + + +ETL Replication currently performs data extraction and loading only, without transformation. Your data is replicated as-is to the destination. + + + +### Troubleshooting + +If you encounter issues during setup: + +- **Publication not appearing**: Ensure you created the publication via SQL and refresh the dashboard +- **Tables not showing in publication**: Verify your tables have primary keys (required for replication) +- **Pipeline failed to start**: Check the error message in the status view for specific details +- **No data being replicated**: Verify your publication includes the correct tables and event types + +For more troubleshooting help, see the [ETL Replication FAQ](/docs/guides/database/replication/etl-replication-faq). + +### Limitations + +ETL Replication has the following limitations that apply to all destinations: + +- **Primary keys required**: Tables must have primary keys +- **Custom data types**: Not supported +- **Schema changes**: Not automatically handled +- **No data transformation**: Data is replicated as-is without transformation +- **Data duplicates**: Duplicates can occur when stopping a pipeline if your database has transactions that take longer than a few minutes to complete. See [Can data duplicates occur?](/docs/guides/database/replication/etl-replication-faq#can-data-duplicates-occur-during-pipeline-operations) for details + +Destination-specific limitations may also apply. See the [Iceberg](/docs/guides/database/replication/etl-iceberg#limitations) destination page for details. + +### Future work + +ETL Replication is actively being developed. Planned improvements include: + +- **DDL support**: Automatic handling of schema changes (ALTER TABLE, ADD COLUMN, etc.) +- **Additional destinations**: Support for more data warehouses and analytics platforms + +We don't have public timelines for these features, but they represent our roadmap for making ETL Replication more robust and flexible. + +### Next steps + +- [Monitor ETL Replication](/docs/guides/database/replication/etl-replication-monitoring) +- [View ETL Replication FAQ](/docs/guides/database/replication/etl-replication-faq) diff --git a/apps/docs/content/guides/database/replication/faq.mdx b/apps/docs/content/guides/database/replication/manual-replication-faq.mdx similarity index 73% rename from apps/docs/content/guides/database/replication/faq.mdx rename to apps/docs/content/guides/database/replication/manual-replication-faq.mdx index 93427562d689c..d909e2b629468 100644 --- a/apps/docs/content/guides/database/replication/faq.mdx +++ b/apps/docs/content/guides/database/replication/manual-replication-faq.mdx @@ -1,21 +1,22 @@ --- id: 'replication-faq' -title: 'FAQs' -description: 'Considerations and FAQs when setting up replication' -sidebar_label: 'FAQs' +title: 'Replication FAQ' +description: 'Frequently asked questions about manual database replication.' +subtitle: 'Common questions and considerations when setting up manual replication.' +sidebar_label: 'FAQ' --- -# Which connection string should be used? +## Which connection string should be used? Always use the direct connection string for logical replication. Connections through a pooler, such as Supavisor, will not work. -# The tool in use does not support IPv6 +## The tool in use does not support IPv6 You can enable the [IPv4 add-on](/docs/guides/platform/ipv4-address) for your project. -# What is XMIN and should it be used? +## What is XMIN and should it be used? Xmin is a different form of replication from logical replication and should only be used if logical replication is not available for your database (i.e. older versions of Postgres). @@ -23,15 +24,15 @@ Xmin performs replication by checking the [xmin system column](https://www.postg It does not capture deletion of data and is **not recommended**, particularly for larger databases. -# Can replication be configured in the Dashboard? +## Can replication be configured in the Dashboard? You can view [publications](/dashboard/project/default/database/publications) in the Dashboard but all steps to configure replication must be done using the [SQL Editor](/dashboard/project/default/sql/new) or a CLI tool of your choice. -# How to configure database settings for replication? +## How to configure database settings for replication? Yes. Using the Supabase CLI, you can [configure database settings](/docs/guides/database/custom-postgres-config#cli-configurable-settings) to optimize them for your replication needs. These values can vary depending on the activity of your database size and activity. -# What are some important configuration options? +## What are some important configuration options? Some of the more important options to be aware of are: diff --git a/apps/docs/content/guides/database/replication/monitoring-replication.mdx b/apps/docs/content/guides/database/replication/manual-replication-monitoring.mdx similarity index 88% rename from apps/docs/content/guides/database/replication/monitoring-replication.mdx rename to apps/docs/content/guides/database/replication/manual-replication-monitoring.mdx index 300ca96689822..0047bc80136e4 100644 --- a/apps/docs/content/guides/database/replication/monitoring-replication.mdx +++ b/apps/docs/content/guides/database/replication/manual-replication-monitoring.mdx @@ -1,5 +1,9 @@ --- -title: 'Monitoring replication' +id: 'monitoring-replication' +title: 'Monitoring Replication' +description: 'Monitor replication lag and status for manual replication setups.' +subtitle: 'Track replication health and performance.' +sidebar_label: 'Monitoring' --- Monitoring replication lag is important and there are 3 ways to do this: @@ -14,9 +18,9 @@ Monitoring replication lag is important and there are 3 ways to do this: - pg_stat_replication_replay_lag - lag to replay WAL files from the source DB on the target DB (throttled by disk or high activity) - pg_stat_replication_send_lag - lag in sending WAL files from the source DB (a high lag means that the publisher is not being asked to send new WAL files OR a network issues) -## Primary +### Primary -### Replication status and lag +#### Replication status and lag The `pg_stat_replication` table shows the status of any replicas connected to the primary database. @@ -25,7 +29,7 @@ select pid, application_name, state, sent_lsn, write_lsn, flush_lsn, replay_lsn, from pg_stat_replication; ``` -### Replication slot status +#### Replication slot status A replication slot can be in one of three states: @@ -39,7 +43,7 @@ The state can be checked using the `pg_replication_slots` table: select slot_name, active, state from pg_replication_slots; ``` -### WAL size +#### WAL size The WAL size can be checked using the `pg_ls_waldir()` function: @@ -47,15 +51,15 @@ The WAL size can be checked using the `pg_ls_waldir()` function: select * from pg_ls_waldir(); ``` -### Check LSN +#### Check LSN ```sql select pg_current_wal_lsn(); ``` -## Subscriber +### Subscriber -### Subscription status +#### Subscription status The `pg_subscription` table shows the status of any subscriptions on a replica and the `pg_subscription_rel` table shows the status of each table within a subscription. @@ -87,7 +91,7 @@ ORDER BY table_name; ``` -### Check LSN +#### Check LSN ```sql select pg_last_wal_replay_lsn(); diff --git a/apps/docs/content/guides/database/replication/setting-up-replication.mdx b/apps/docs/content/guides/database/replication/manual-replication-setup.mdx similarity index 80% rename from apps/docs/content/guides/database/replication/setting-up-replication.mdx rename to apps/docs/content/guides/database/replication/manual-replication-setup.mdx index 5258bc2d18eed..eeccf0a383579 100644 --- a/apps/docs/content/guides/database/replication/setting-up-replication.mdx +++ b/apps/docs/content/guides/database/replication/manual-replication-setup.mdx @@ -1,11 +1,20 @@ --- -id: 'setting-up-replication' -title: 'Setting up replication and CDC with Supabase' -description: 'Performing Extract Transform Load (ETL) with Supabase' -sidebar_label: 'Setting up replication and CDC' +id: 'manual-replication-setup' +title: 'Manual Replication Setup' +description: 'Configure your own replication using external tools and Postgres logical replication.' +subtitle: 'Set up replication with Airbyte, Estuary, Fivetran, and other tools.' +sidebar_label: 'Setting up' --- -## Prerequisites +This guide covers setting up **manual replication** using external tools. If you prefer a simpler, managed solution, see [ETL Replication](/docs/guides/database/replication/etl-replication-setup) instead. + + + +This guide is for replicating data to external systems using your own tools. For deploying read-only databases across multiple regions, see [Read Replicas](/docs/guides/platform/read-replicas) instead. + + + +### Prerequisites To set up replication, the following is recommended: @@ -39,7 +48,7 @@ You can follow those steps with the following modifications: 1. Use the `postgres` user 2. Select `logical replication` as the replication method (`xmin` is possible, but not recommended) -## Troubleshooting +### Troubleshooting Airbyte has a known [issue](https://discuss.airbyte.io/t/postgres-source-replication-slot-safe-wal-size-only-reset-when-a-change-occurs/3263/7) where it does not clear WAL files on each successful sync. The recommended workaround is to have a `heartbeat` table that you write changes to once an hour.> diff --git a/apps/docs/content/guides/getting-started/features.mdx b/apps/docs/content/guides/getting-started/features.mdx index 33cf6c0345bbc..a5d10213bff00 100644 --- a/apps/docs/content/guides/getting-started/features.mdx +++ b/apps/docs/content/guides/getting-started/features.mdx @@ -32,6 +32,10 @@ Send database changes to any external service using Webhooks. [Docs](/docs/guide Encrypt sensitive data and store secrets using our Postgres extension, Supabase Vault. [Docs](/docs/guides/database/vault). +### ETL replication + +Automatically replicate your database to external destinations like data warehouses and analytics platforms. [Docs](/docs/guides/database/replication/etl-replication). + ## Platform ### Database backups @@ -195,59 +199,60 @@ Features in Beta are tested by an external penetration tester for security issue In addition to the Beta requirements, features in GA are covered by the [uptime SLA](/sla). -| Product | Feature | Stage | Available on self-hosted | -| -------------- | -------------------------- | -------------- | ------------------------------------------- | -| Database | Postgres | `GA` | ✅ | -| Database | Vector Database | `GA` | ✅ | -| Database | Auto-generated Rest API | `GA` | ✅ | -| Database | Auto-generated GraphQL API | `GA` | ✅ | -| Database | Webhooks | `beta` | ✅ | -| Database | Vault | `public alpha` | ✅ | -| Platform | | `GA` | ✅ | -| Platform | Point-in-Time Recovery | `GA` | 🚧 [wal-g](https://github.com/wal-g/wal-g) | -| Platform | Custom Domains | `GA` | N/A | -| Platform | Network Restrictions | `GA` | N/A | -| Platform | SSL enforcement | `GA` | N/A | -| Platform | Branching | `beta` | N/A | -| Platform | Terraform Provider | `public alpha` | N/A | -| Platform | Read Replicas | `GA` | N/A | -| Platform | Log Drains | `public alpha` | ✅ | -| Platform | MCP | `public alpha` | ✅ | -| Studio | | `GA` | ✅ | -| Studio | SSO | `GA` | ✅ | -| Studio | Column Privileges | `public alpha` | ✅ | -| Realtime | Postgres Changes | `GA` | ✅ | -| Realtime | Broadcast | `GA` | ✅ | -| Realtime | Presence | `GA` | ✅ | -| Realtime | Broadcast Authorization | `public beta` | ✅ | -| Realtime | Presence Authorization | `public beta` | ✅ | -| Realtime | Broadcast from Database | `public beta` | ✅ | -| Storage | | `GA` | ✅ | -| Storage | CDN | `GA` | 🚧 [Cloudflare](https://www.cloudflare.com) | -| Storage | Smart CDN | `GA` | 🚧 [Cloudflare](https://www.cloudflare.com) | -| Storage | Image Transformations | `GA` | ✅ | -| Storage | Resumable Uploads | `GA` | ✅ | -| Storage | S3 compatibility | `GA` | ✅ | -| Edge Functions | | `GA` | ✅ | -| Edge Functions | Regional Invocations | `GA` | ✅ | -| Edge Functions | NPM compatibility | `GA` | ✅ | -| Auth | | `GA` | ✅ | -| Auth | Email login | `GA` | ✅ | -| Auth | Social login | `GA` | ✅ | -| Auth | Phone login | `GA` | ✅ | -| Auth | Passwordless login | `GA` | ✅ | -| Auth | SSO with SAML | `GA` | ✅ | -| Auth | Authorization via RLS | `GA` | ✅ | -| Auth | CAPTCHA protection | `GA` | ✅ | -| Auth | Server-side Auth | `beta` | ✅ | -| Auth | Third-Party Auth | `GA` | ✅ | -| Auth | Hooks | `beta` | ✅ | -| CLI | | `GA` | ✅ Works with self-hosted | -| Management API | | `GA` | N/A | -| Client Library | JavaScript | `GA` | N/A | -| Client Library | Flutter | `GA` | N/A | -| Client Library | Swift | `GA` | N/A | -| Client Library | Python | `beta` | N/A | +| Product | Feature | Stage | Available on self-hosted | +| -------------- | -------------------------- | --------------- | ------------------------------------------- | +| Database | Postgres | `GA` | ✅ | +| Database | Vector Database | `GA` | ✅ | +| Database | Auto-generated Rest API | `GA` | ✅ | +| Database | Auto-generated GraphQL API | `GA` | ✅ | +| Database | Webhooks | `beta` | ✅ | +| Database | Vault | `public alpha` | ✅ | +| Database | ETL Replication | `private alpha` | N/A | +| Platform | | `GA` | ✅ | +| Platform | Point-in-Time Recovery | `GA` | 🚧 [wal-g](https://github.com/wal-g/wal-g) | +| Platform | Custom Domains | `GA` | N/A | +| Platform | Network Restrictions | `GA` | N/A | +| Platform | SSL enforcement | `GA` | N/A | +| Platform | Branching | `beta` | N/A | +| Platform | Terraform Provider | `public alpha` | N/A | +| Platform | Read Replicas | `GA` | N/A | +| Platform | Log Drains | `public alpha` | ✅ | +| Platform | MCP | `public alpha` | ✅ | +| Studio | | `GA` | ✅ | +| Studio | SSO | `GA` | ✅ | +| Studio | Column Privileges | `public alpha` | ✅ | +| Realtime | Postgres Changes | `GA` | ✅ | +| Realtime | Broadcast | `GA` | ✅ | +| Realtime | Presence | `GA` | ✅ | +| Realtime | Broadcast Authorization | `public beta` | ✅ | +| Realtime | Presence Authorization | `public beta` | ✅ | +| Realtime | Broadcast from Database | `public beta` | ✅ | +| Storage | | `GA` | ✅ | +| Storage | CDN | `GA` | 🚧 [Cloudflare](https://www.cloudflare.com) | +| Storage | Smart CDN | `GA` | 🚧 [Cloudflare](https://www.cloudflare.com) | +| Storage | Image Transformations | `GA` | ✅ | +| Storage | Resumable Uploads | `GA` | ✅ | +| Storage | S3 compatibility | `GA` | ✅ | +| Edge Functions | | `GA` | ✅ | +| Edge Functions | Regional Invocations | `GA` | ✅ | +| Edge Functions | NPM compatibility | `GA` | ✅ | +| Auth | | `GA` | ✅ | +| Auth | Email login | `GA` | ✅ | +| Auth | Social login | `GA` | ✅ | +| Auth | Phone login | `GA` | ✅ | +| Auth | Passwordless login | `GA` | ✅ | +| Auth | SSO with SAML | `GA` | ✅ | +| Auth | Authorization via RLS | `GA` | ✅ | +| Auth | CAPTCHA protection | `GA` | ✅ | +| Auth | Server-side Auth | `beta` | ✅ | +| Auth | Third-Party Auth | `GA` | ✅ | +| Auth | Hooks | `beta` | ✅ | +| CLI | | `GA` | ✅ Works with self-hosted | +| Management API | | `GA` | N/A | +| Client Library | JavaScript | `GA` | N/A | +| Client Library | Flutter | `GA` | N/A | +| Client Library | Swift | `GA` | N/A | +| Client Library | Python | `beta` | N/A | - ✅ = Fully Available - 🚧 = Available, but requires external tools or configuration diff --git a/apps/docs/public/humans.txt b/apps/docs/public/humans.txt index dce66077a6be6..d8c124d5ed31a 100644 --- a/apps/docs/public/humans.txt +++ b/apps/docs/public/humans.txt @@ -4,6 +4,7 @@ Supabase is 100% remote. Aaron Byrne Adam Mokan +Akash Manimaran Alaister Young Alan De Los Santos Aleksi Immonen @@ -79,6 +80,7 @@ Jean-Paul Argudo Jeff Smick Jenny Kibiri Jess Shears +Jim Brodeur Jim Chanco Jr Joakim Ahrlin John Pena @@ -98,6 +100,7 @@ Katerina Skroumpelou Kemal Y Kevin Brolly Kevin Grüneberg +Kostas Botsas Krishna Sai Vandavasi Lakshan Perera Laura C @@ -110,10 +113,12 @@ Luca Forstner Manan Gupta Margarita Sandomirskaia Mark Burggraf +Matthew Hambright Matt Johnston Matt Rossman Monica Khoury Mykhailo Mischa Lieibenson +Nick B Nick Littman Nyannyacha Oli R diff --git a/apps/docs/public/img/database/replication/etl-add-destination.png b/apps/docs/public/img/database/replication/etl-add-destination.png new file mode 100644 index 0000000000000..88154e6767763 Binary files /dev/null and b/apps/docs/public/img/database/replication/etl-add-destination.png differ diff --git a/apps/docs/public/img/database/replication/etl-bigquery-details.png b/apps/docs/public/img/database/replication/etl-bigquery-details.png new file mode 100644 index 0000000000000..587ddca2bf2ed Binary files /dev/null and b/apps/docs/public/img/database/replication/etl-bigquery-details.png differ diff --git a/apps/docs/public/img/database/replication/etl-destinations-list.png b/apps/docs/public/img/database/replication/etl-destinations-list.png new file mode 100644 index 0000000000000..b33f6c20f1f98 Binary files /dev/null and b/apps/docs/public/img/database/replication/etl-destinations-list.png differ diff --git a/apps/docs/public/img/database/replication/etl-enable-replication.png b/apps/docs/public/img/database/replication/etl-enable-replication.png new file mode 100644 index 0000000000000..e6c34f117a554 Binary files /dev/null and b/apps/docs/public/img/database/replication/etl-enable-replication.png differ diff --git a/apps/docs/public/img/database/replication/etl-iceberg-bucket-details.png b/apps/docs/public/img/database/replication/etl-iceberg-bucket-details.png new file mode 100644 index 0000000000000..367d7080dce70 Binary files /dev/null and b/apps/docs/public/img/database/replication/etl-iceberg-bucket-details.png differ diff --git a/apps/docs/public/img/database/replication/etl-iceberg-details.png b/apps/docs/public/img/database/replication/etl-iceberg-details.png new file mode 100644 index 0000000000000..ba51c58c3a56b Binary files /dev/null and b/apps/docs/public/img/database/replication/etl-iceberg-details.png differ diff --git a/apps/docs/public/img/database/replication/etl-iceberg-new-bucket.png b/apps/docs/public/img/database/replication/etl-iceberg-new-bucket.png new file mode 100644 index 0000000000000..fbd860ef20af0 Binary files /dev/null and b/apps/docs/public/img/database/replication/etl-iceberg-new-bucket.png differ diff --git a/apps/docs/public/img/database/replication/etl-pipeline-actions.png b/apps/docs/public/img/database/replication/etl-pipeline-actions.png new file mode 100644 index 0000000000000..870e96fab51d5 Binary files /dev/null and b/apps/docs/public/img/database/replication/etl-pipeline-actions.png differ diff --git a/apps/docs/public/img/database/replication/etl-pipeline-error.png b/apps/docs/public/img/database/replication/etl-pipeline-error.png new file mode 100644 index 0000000000000..a1c672c94a5f6 Binary files /dev/null and b/apps/docs/public/img/database/replication/etl-pipeline-error.png differ diff --git a/apps/docs/public/img/database/replication/etl-pipeline-table-error.png b/apps/docs/public/img/database/replication/etl-pipeline-table-error.png new file mode 100644 index 0000000000000..667a6a5b1ceb3 Binary files /dev/null and b/apps/docs/public/img/database/replication/etl-pipeline-table-error.png differ diff --git a/apps/docs/public/img/database/replication/etl-replication-logs.png b/apps/docs/public/img/database/replication/etl-replication-logs.png new file mode 100644 index 0000000000000..449ae39077cbc Binary files /dev/null and b/apps/docs/public/img/database/replication/etl-replication-logs.png differ diff --git a/apps/docs/public/img/database/replication/etl-view-status.png b/apps/docs/public/img/database/replication/etl-view-status.png new file mode 100644 index 0000000000000..3c34ebf4cd63e Binary files /dev/null and b/apps/docs/public/img/database/replication/etl-view-status.png differ diff --git a/apps/studio/components/interfaces/Settings/General/TransferProjectPanel/TransferProjectButton.tsx b/apps/studio/components/interfaces/Settings/General/TransferProjectPanel/TransferProjectButton.tsx index 945b2cd9a39f9..4388d275dc6be 100644 --- a/apps/studio/components/interfaces/Settings/General/TransferProjectPanel/TransferProjectButton.tsx +++ b/apps/studio/components/interfaces/Settings/General/TransferProjectPanel/TransferProjectButton.tsx @@ -61,7 +61,8 @@ export const TransferProjectButton = () => { queryKey: projectKeys.projectTransferPreview(projectRef, selectedOrg), }) } - }, [isOpen, projectRef, selectedOrg, queryClient]) + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [isOpen]) const { can: canTransferProject } = useAsyncCheckPermissions( PermissionAction.UPDATE, diff --git a/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/AnalyticsBucketDetails.constants.ts b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/AnalyticsBucketDetails.constants.ts index 21957b2208bb9..7efe6904c95b4 100644 --- a/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/AnalyticsBucketDetails.constants.ts +++ b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/AnalyticsBucketDetails.constants.ts @@ -24,3 +24,9 @@ export const DESCRIPTIONS: Record = { 's3.endpoint': '', catalog_uri: '', } + +// [Joshen] For context we've decided to decouple ETL from Analytics Buckets for now +// So this flag just hides all "connect table" ETL flow related UI +// Depending on future decision if we intend to keep it that way, then we might be able +// to clean up + deprecate ConnectTablesDialog and other ETL related UI within Analytics Buckets +export const HIDE_REPLICATION_USER_FLOW = true diff --git a/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/BucketHeader.tsx b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/BucketHeader.tsx index 685f8ab9264eb..d2802ff321201 100644 --- a/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/BucketHeader.tsx +++ b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/BucketHeader.tsx @@ -1,15 +1,14 @@ import { noop } from 'lodash' -import { useParams } from 'common' import { FormattedWrapperTable } from 'components/interfaces/Integrations/Wrappers/Wrappers.utils' import { ScaffoldHeader, ScaffoldSectionDescription, ScaffoldSectionTitle, } from 'components/layouts/Scaffold' -import { useReplicationPipelineStatusQuery } from 'data/etl/pipeline-status-query' +import { HIDE_REPLICATION_USER_FLOW } from './AnalyticsBucketDetails.constants' import { ConnectTablesDialog } from './ConnectTablesDialog' -import { useAnalyticsBucketAssociatedEntities } from './useAnalyticsBucketAssociatedEntities' +import { CreateTableInstructionsDialog } from './CreateTableInstructions/CreateTableInstructionsDialog' interface BucketHeaderProps { showActions?: boolean @@ -26,13 +25,6 @@ export const BucketHeader = ({ namespaces = [], onSuccessConnectTables = noop, }: BucketHeaderProps) => { - const { ref: projectRef, bucketId } = useParams() - - const { pipeline } = useAnalyticsBucketAssociatedEntities({ projectRef, bucketId }) - const { data } = useReplicationPipelineStatusQuery({ projectRef, pipelineId: pipeline?.id }) - const pipelineStatus = data?.status.name - const isPipelineRunning = pipelineStatus === 'started' - return (
@@ -44,7 +36,13 @@ export const BucketHeader = ({ {showActions && (
{namespaces.length > 0 && ( - + <> + {HIDE_REPLICATION_USER_FLOW ? ( + + ) : ( + + )} + )}
)} diff --git a/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/CreateTableInstructions/CreateTableInstructions.constants.ts b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/CreateTableInstructions/CreateTableInstructions.constants.ts new file mode 100644 index 0000000000000..f5371e970689f --- /dev/null +++ b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/CreateTableInstructions/CreateTableInstructions.constants.ts @@ -0,0 +1,88 @@ +export const getPyicebergSnippet = ({ + ref, + warehouse, + catalogUri, + s3Endpoint, + s3Region, + s3AccessKey, + s3SecretKey, + token, +}: { + ref?: string + warehouse?: string + catalogUri?: string + s3Endpoint?: string + s3Region?: string + s3AccessKey?: string + s3SecretKey?: string + token?: string +}) => + ` +from pyiceberg.catalog import load_catalog +import pyarrow as pa +import datetime + +# Supabase project ref +PROJECT_REF = "${ref ?? ''}" + +# Configuration for Iceberg REST Catalog +WAREHOUSE = "${warehouse ?? 'your-analytics-bucket-name'}" +TOKEN = "${token ?? '•••••••••••••'}" + +# Configuration for S3-Compatible Storage +S3_ACCESS_KEY = "${s3AccessKey ?? '•••••••••••••'}" +S3_SECRET_KEY = "${s3SecretKey ?? '•••••••••••••'}" +S3_REGION = "${s3Region}" +S3_ENDPOINT = f"${s3Endpoint ?? 'https://{PROJECT_REF}.supabase.co/storage/v1/s3'}" +CATALOG_URI = f"${catalogUri ?? 'https://{PROJECT_REF}.supabase.co/storage/v1/iceberg'}" + +# Load the Iceberg catalog +catalog = load_catalog( + "supabase", + type="rest", + warehouse=WAREHOUSE, + uri=CATALOG_URI, + token=TOKEN, + **{ + "py-io-impl": "pyiceberg.io.pyarrow.PyArrowFileIO", + "s3.endpoint": S3_ENDPOINT, + "s3.access-key-id": S3_ACCESS_KEY, + "s3.secret-access-key": S3_SECRET_KEY, + "s3.region": S3_REGION, + "s3.force-virtual-addressing": False, + }, +) + +# Create namespace if it doesn't exist +print("Creating catalog 'default'...") +catalog.create_namespace_if_not_exists("default") + +# Define schema for your Iceberg table +schema = pa.schema([ + pa.field("event_id", pa.int64()), + pa.field("event_name", pa.string()), + pa.field("event_timestamp", pa.timestamp("ms")), +]) + +# Create table (if it doesn't exist already) +print("Creating table 'events'...") +table = catalog.create_table_if_not_exists(("default", "events"), schema=schema) + +# Generate and insert sample data +print("Preparing sample data to be inserted...") +current_time = datetime.datetime.now() +data = pa.table({ + "event_id": [1, 2, 3], + "event_name": ["login", "logout", "purchase"], + "event_timestamp": [current_time, current_time, current_time], +}) + +# Append data to the Iceberg table +print("Inserting data into 'events'...") +table.append(data) + +print("Completed!") +# Scan table and print data as pandas DataFrame +df = table.scan().to_pandas() +print(df) +`.trim() diff --git a/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/CreateTableInstructions/CreateTableInstructionsDialog.tsx b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/CreateTableInstructions/CreateTableInstructionsDialog.tsx new file mode 100644 index 0000000000000..19c08f08015cd --- /dev/null +++ b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/CreateTableInstructions/CreateTableInstructionsDialog.tsx @@ -0,0 +1,32 @@ +import { Plus } from 'lucide-react' +import { + Button, + Dialog, + DialogContent, + DialogDescription, + DialogHeader, + DialogTitle, + DialogTrigger, +} from 'ui' +import { CreateTableInstructions } from '.' + +export const CreateTableInstructionsDialog = () => { + return ( + + + + + + + Adding tables to your Analytics Bucket + + Tables can be created or added to your bucket via Pyiceberg + + + + + + ) +} diff --git a/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/CreateTableInstructions/index.tsx b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/CreateTableInstructions/index.tsx new file mode 100644 index 0000000000000..a4ad80992055f --- /dev/null +++ b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/CreateTableInstructions/index.tsx @@ -0,0 +1,266 @@ +import { useMemo, useState } from 'react' + +import { useParams } from 'common' +import CommandRender from 'components/interfaces/Functions/CommandRender' +import { convertKVStringArrayToJson } from 'components/interfaces/Integrations/Wrappers/Wrappers.utils' +import { ButtonTooltip } from 'components/ui/ButtonTooltip' +import CopyButton from 'components/ui/CopyButton' +import { InlineLink } from 'components/ui/InlineLink' +import { useProjectSettingsV2Query } from 'data/config/project-settings-v2-query' +import { + getDecryptedValues, + useVaultSecretDecryptedValueQuery, +} from 'data/vault/vault-secret-decrypted-value-query' +import { useSelectedProjectQuery } from 'hooks/misc/useSelectedProject' +import { DOCS_URL } from 'lib/constants' +import { Eye, EyeOff } from 'lucide-react' +import { + Accordion_Shadcn_, + AccordionContent_Shadcn_, + AccordionItem_Shadcn_, + AccordionTrigger_Shadcn_, + Card, + CardFooter, + CardHeader, + CardTitle, + CodeBlock, +} from 'ui' +import { useAnalyticsBucketWrapperInstance } from '../useAnalyticsBucketWrapperInstance' +import { getPyicebergSnippet } from './CreateTableInstructions.constants' + +export const CreateTableInstructions = ({ + hideHeader = false, + className, +}: { + hideHeader?: boolean + className?: string +}) => { + const { ref, bucketId } = useParams() + const { data: project } = useSelectedProjectQuery() + const [showKeys, setShowKeys] = useState(false) + const [isFetchingSecretsOnCopy, setIsFetchingSecretsOnCopy] = useState(false) + + const { data: projectSettings } = useProjectSettingsV2Query({ projectRef: ref }) + const { data: wrapperInstance } = useAnalyticsBucketWrapperInstance({ bucketId }) + const wrapperValues = convertKVStringArrayToJson(wrapperInstance?.server_options ?? []) + + const s3AccessKeyVaultID = wrapperValues.vault_aws_access_key_id + const s3SecretKeyVaultID = wrapperValues.vault_aws_secret_access_key + const tokenVaultID = wrapperValues.vault_token + + const { data: decryptedS3AccessKey, isLoading: isDecryptingS3AccessKey } = + useVaultSecretDecryptedValueQuery( + { + projectRef: project?.ref, + connectionString: project?.connectionString, + id: s3AccessKeyVaultID, + }, + { enabled: showKeys } + ) + + const { data: decryptedS3SecretKey, isLoading: isDecryptingS3SecretKey } = + useVaultSecretDecryptedValueQuery( + { + projectRef: project?.ref, + connectionString: project?.connectionString, + id: s3SecretKeyVaultID, + }, + { enabled: showKeys } + ) + + const { data: decryptedToken, isLoading: isDecryptingToken } = useVaultSecretDecryptedValueQuery( + { + projectRef: project?.ref, + connectionString: project?.connectionString, + id: tokenVaultID, + }, + { enabled: showKeys } + ) + + const isFetchingSecretValues = + showKeys && (isDecryptingS3AccessKey || isDecryptingS3SecretKey || isDecryptingToken) + + const snippetContent = useMemo( + () => + getPyicebergSnippet({ + ref, + warehouse: wrapperValues.warehouse, + catalogUri: wrapperValues.catalog_uri, + s3Endpoint: wrapperValues['s3.endpoint'], + s3Region: projectSettings?.region, + s3AccessKey: showKeys ? decryptedS3AccessKey : undefined, + s3SecretKey: showKeys ? decryptedS3SecretKey : undefined, + token: showKeys ? decryptedToken : undefined, + }), + [ + decryptedS3AccessKey, + decryptedS3SecretKey, + decryptedToken, + projectSettings?.region, + ref, + showKeys, + wrapperValues, + ] + ) + + return ( + + {!hideHeader && ( + + Create your first table via Pyiceberg + + )} + + + + +
+
+ 1 +
+

+ Set up Python project with uv +

+
+
+ + 'curl -LsSf https://astral.sh/uv/install.sh | sh', + }, + { + comment: '2. Initialize a new Python project with uv', + command: 'uv init ', + jsx: () => 'uv init ', + }, + { + comment: '3. Install required packages', + command: 'uv add pyiceberg pyarrow pandas', + jsx: () => 'uv add pyiceberg pyarrow pandas', + }, + ]} + /> + +
+ + + +
+
+ 2 +
+

+ Replace main.py with the following snippet +

+
+
+ +

+ The following snippet creates a namespace default, then creates a sample + table + events into that namespace. +

+
+ +
+ { + if (!!decryptedS3AccessKey && !!decryptedS3SecretKey && !!decryptedToken) { + return getPyicebergSnippet({ + ref, + warehouse: wrapperValues.warehouse, + catalogUri: wrapperValues.catalog_uri, + s3Endpoint: wrapperValues['s3.endpoint'], + s3Region: projectSettings?.region, + s3AccessKey: decryptedS3AccessKey, + s3SecretKey: decryptedS3SecretKey, + token: decryptedToken, + }) + } else { + setIsFetchingSecretsOnCopy(true) + const decryptedSecrets = await getDecryptedValues({ + projectRef: project?.ref, + connectionString: project?.connectionString, + ids: [s3AccessKeyVaultID, s3SecretKeyVaultID, tokenVaultID], + }) + setIsFetchingSecretsOnCopy(false) + return getPyicebergSnippet({ + ref, + warehouse: wrapperValues.warehouse, + catalogUri: wrapperValues.catalog_uri, + s3Endpoint: wrapperValues['s3.endpoint'], + s3Region: projectSettings?.region, + s3AccessKey: decryptedSecrets[s3AccessKeyVaultID], + s3SecretKey: decryptedSecrets[s3SecretKeyVaultID], + token: decryptedSecrets[tokenVaultID], + }) + } + }} + /> + setShowKeys(!showKeys)} + icon={showKeys ? : } + tooltip={{ + content: { + side: 'bottom', + text: showKeys + ? 'Hide keys' + : isFetchingSecretValues + ? 'Retrieving keys' + : 'Reveal keys', + }, + }} + /> +
+
+
+
+ + + +
+
+ 3 +
+

Run the Python script

+
+
+ + 'uv run main.py', + }, + ]} + /> + +
+
+ + +

+ Connecting to bucket with other Iceberg clients? Read more in our{' '} + + documentation + + . +

+
+
+ ) +} diff --git a/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/InitializeForeignSchemaDialog.tsx b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/InitializeForeignSchemaDialog.tsx new file mode 100644 index 0000000000000..03b4e00b22bed --- /dev/null +++ b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/InitializeForeignSchemaDialog.tsx @@ -0,0 +1,129 @@ +import { zodResolver } from '@hookform/resolvers/zod' +import { useState } from 'react' +import { SubmitHandler, useForm } from 'react-hook-form' +import { toast } from 'sonner' +import z from 'zod' + +import { useParams } from 'common' +import { useSchemaCreateMutation } from 'data/database/schema-create-mutation' +import { useSchemasQuery } from 'data/database/schemas-query' +import { useFDWImportForeignSchemaMutation } from 'data/fdw/fdw-import-foreign-schema-mutation' +import { useSelectedProjectQuery } from 'hooks/misc/useSelectedProject' +import { + Button, + Dialog, + DialogContent, + DialogFooter, + DialogHeader, + DialogSection, + DialogSectionSeparator, + DialogTitle, + DialogTrigger, + Form_Shadcn_, + FormField_Shadcn_, + Input_Shadcn_, +} from 'ui' +import { FormItemLayout } from 'ui-patterns/form/FormItemLayout/FormItemLayout' +import { getAnalyticsBucketFDWServerName } from './AnalyticsBucketDetails.utils' + +// Create foreign tables for namespace tables +export const InitializeForeignSchemaDialog = ({ namespace }: { namespace: string }) => { + const { ref: projectRef, bucketId } = useParams() + const { data: project } = useSelectedProjectQuery() + const { data: schemas } = useSchemasQuery({ projectRef }) + + const [isOpen, setIsOpen] = useState(false) + const [isCreating, setIsCreating] = useState(false) + + const serverName = getAnalyticsBucketFDWServerName(bucketId ?? '') + + const FormSchema = z.object({ + schema: z + .string() + .trim() + .min(1, 'Schema name is required') + .refine((val) => !schemas?.find((s) => s.name === val), { + message: 'This schema already exists. Please specify a unique schema name.', + }), + }) + const form = useForm>({ + resolver: zodResolver(FormSchema), + defaultValues: { schema: '' }, + }) + + const { mutateAsync: createSchema } = useSchemaCreateMutation() + const { mutateAsync: importForeignSchema } = useFDWImportForeignSchemaMutation() + + const onSubmit: SubmitHandler> = async (values) => { + if (!projectRef) return console.error('Project ref is required') + + try { + setIsCreating(true) + + await createSchema({ + projectRef, + connectionString: project?.connectionString, + name: values.schema, + }) + + await importForeignSchema({ + projectRef, + connectionString: project?.connectionString, + serverName: serverName, + sourceSchema: namespace, + targetSchema: values.schema, + }) + + toast.success( + `Successfully created "${values.schema}" schema! Data from tables in the "${namespace}" namespace can now be queried from there.` + ) + setIsOpen(false) + } catch (error: any) { + toast.error(`Failed to expose tables: ${error.message}`) + } finally { + setIsCreating(false) + } + } + + return ( + + + + + + +
+ + Query this namespace from Postgres + + + +

+ Iceberg data can be queried from Postgres with the Iceberg Foreign Data Wrapper. + Create a Postgres schema to expose tables from the "{namespace}" namespace as + foreign tables. +

+ ( + + + + )} + /> +
+ + + + + +
+
+
+ ) +} diff --git a/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/NamespaceWithTables/TableRowComponent.tsx b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/NamespaceWithTables/TableRowComponent.tsx index 905641ba86d8b..ffd58a652cf32 100644 --- a/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/NamespaceWithTables/TableRowComponent.tsx +++ b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/NamespaceWithTables/TableRowComponent.tsx @@ -1,5 +1,5 @@ import { uniq } from 'lodash' -import { Eye, Loader2, MoreVertical, Pause, Play, Trash } from 'lucide-react' +import { Eye, Loader2, MoreVertical, Pause, Play, Table2, Trash } from 'lucide-react' import Link from 'next/link' import { useMemo, useState } from 'react' import { toast } from 'sonner' @@ -16,12 +16,14 @@ import { useReplicationPipelineStatusQuery } from 'data/etl/pipeline-status-quer import { useUpdatePublicationMutation } from 'data/etl/publication-update-mutation' import { useStartPipelineMutation } from 'data/etl/start-pipeline-mutation' import { useReplicationTablesQuery } from 'data/etl/tables-query' +import { useFDWDropForeignTableMutation } from 'data/fdw/fdw-drop-foreign-table-mutation' import { useFDWUpdateMutation } from 'data/fdw/fdw-update-mutation' import { useIcebergNamespaceTableDeleteMutation } from 'data/storage/iceberg-namespace-table-delete-mutation' import { useSelectedProjectQuery } from 'hooks/misc/useSelectedProject' -import { SqlEditor } from 'icons' +import { SqlEditor, TableEditor } from 'icons' import { Button, + cn, DropdownMenu, DropdownMenuContent, DropdownMenuItem, @@ -34,6 +36,7 @@ import { TooltipTrigger, } from 'ui' import { ConfirmationModal } from 'ui-patterns/Dialogs/ConfirmationModal' +import { HIDE_REPLICATION_USER_FLOW } from '../AnalyticsBucketDetails.constants' import { getAnalyticsBucketFDWServerName, getNamespaceTableNameFromPostgresTableName, @@ -46,15 +49,9 @@ interface TableRowComponentProps { table: { id: number; name: string; isConnected: boolean } schema: string namespace: string - isLoading?: boolean } -export const TableRowComponent = ({ - table, - schema, - namespace, - isLoading, -}: TableRowComponentProps) => { +export const TableRowComponent = ({ table, schema, namespace }: TableRowComponentProps) => { const { ref: projectRef, bucketId } = useParams() const { data: project } = useSelectedProjectQuery() @@ -64,7 +61,7 @@ export const TableRowComponent = ({ const [isUpdatingReplication, setIsUpdatingReplication] = useState(false) const [isRemovingTable, setIsRemovingTable] = useState(false) - const { sourceId, publication, pipeline } = useAnalyticsBucketAssociatedEntities({ + const { sourceId, publication, pipeline, icebergWrapper } = useAnalyticsBucketAssociatedEntities({ projectRef, bucketId, }) @@ -80,9 +77,9 @@ export const TableRowComponent = ({ }) const { mutateAsync: updateFDW } = useFDWUpdateMutation() - const { mutateAsync: deleteNamespaceTable } = useIcebergNamespaceTableDeleteMutation({ - projectRef, - }) + const { mutateAsync: dropForeignTable } = useFDWDropForeignTableMutation() + const { mutateAsync: deleteNamespaceTable, isLoading: isDeletingNamespaceTable } = + useIcebergNamespaceTableDeleteMutation({ projectRef, onError: () => {} }) const { mutateAsync: updatePublication } = useUpdatePublicationMutation() const { mutateAsync: startPipeline } = useStartPipelineMutation() @@ -179,6 +176,7 @@ export const TableRowComponent = ({ } } + // [Joshen] For ETL replication context const onConfirmRemoveTable = async () => { if (!bucketId) return console.error('Bucket ID is required') if (!wrapperInstance || !wrapperMeta) return toast.error('Unable to find wrapper') @@ -243,11 +241,57 @@ export const TableRowComponent = ({ } } + const connectedForeignTablesInNamespace = (icebergWrapper?.tables ?? []).filter((x) => + x.options[0].includes(`table=${namespace}.`) + ) + + const connectedForeignTables = (icebergWrapper?.tables ?? []).filter( + (x) => x.options[0] === `table=${namespace}.${table.name}` + ) + + // [Joshen] For purely Analytics Bucket context + const onConfirmRemoveNamespaceTable = async () => { + try { + setIsRemovingTable(true) + const wrapperValues = convertKVStringArrayToJson(wrapperInstance?.server_options ?? []) + await deleteNamespaceTable({ + catalogUri: wrapperValues.catalog_uri, + warehouse: wrapperValues.warehouse, + namespace: namespace, + table: table.name, + }) + + await Promise.all( + connectedForeignTables.map((x) => + dropForeignTable({ + projectRef, + connectionString: project?.connectionString, + schemaName: x.schema, + tableName: x.name, + }) + ) + ) + + toast.success(`Successfully removed table "${table.name}"!`) + } catch (error: any) { + toast.error(`Failed to remove table: ${error.message}`) + } finally { + setIsRemovingTable(false) + } + } + return ( <> - {table.name} - {!!hasReplication && ( + +
+
+ +
+

{table.name}

+
+
+ {!HIDE_REPLICATION_USER_FLOW && !!hasReplication && (
@@ -280,29 +324,17 @@ export const TableRowComponent = ({ )} - {table.isConnected && ( + {!HIDE_REPLICATION_USER_FLOW && table.isConnected ? ( + // [Joshen] These are if there's the context of replication which we're currently not doing + // May need to clean up if we decided to move forward de-coupling replication and Analytics Buckets <> - - - )} + +
+ {HIDE_REPLICATION_USER_FLOW ? ( + <> + {/* Is this just the import foreign schema dialog then? */} + {connectedForeignTablesForNamespace.length === 0 ? ( + + ) : hasUnconnectedForeignTablesForNamespace ? ( + + ) : null} + + + + ) : null} +
@@ -240,7 +346,7 @@ export const NamespaceWithTables = ({ - Name + Table name {!!publication && ( @@ -269,20 +375,34 @@ export const NamespaceWithTables = ({ table={table} namespace={namespace} schema={displaySchema} - isLoading={isImportingForeignSchema || isLoadingNamespaceTables} /> )) )} + setImportForeignSchemaShown(false)} /> + + setShowConfirmDeleteNamespace(false)} + onConfirm={() => onConfirmDeleteNamespace()} + > +

+ This will remove all Iceberg tables under the namespace, as well as any associated foreign + tables. Are you sure? +

+
) } diff --git a/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/UpdateForeignSchemaDialog.tsx b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/UpdateForeignSchemaDialog.tsx new file mode 100644 index 0000000000000..909e46bde548e --- /dev/null +++ b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/UpdateForeignSchemaDialog.tsx @@ -0,0 +1,169 @@ +import { zodResolver } from '@hookform/resolvers/zod' +import { useState } from 'react' +import { SubmitHandler, useForm } from 'react-hook-form' +import { toast } from 'sonner' +import z from 'zod' + +import { useParams } from 'common' +import { InlineLinkClassName } from 'components/ui/InlineLink' +import { useFDWImportForeignSchemaMutation } from 'data/fdw/fdw-import-foreign-schema-mutation' +import { useSelectedProjectQuery } from 'hooks/misc/useSelectedProject' +import { + Button, + Dialog, + DialogContent, + DialogFooter, + DialogHeader, + DialogSection, + DialogSectionSeparator, + DialogTitle, + DialogTrigger, + Form_Shadcn_, + FormField_Shadcn_, + Select_Shadcn_, + SelectContent_Shadcn_, + SelectItem_Shadcn_, + SelectTrigger_Shadcn_, + SelectValue_Shadcn_, + Tooltip, + TooltipContent, + TooltipTrigger, +} from 'ui' +import { FormItemLayout } from 'ui-patterns/form/FormItemLayout/FormItemLayout' +import { getAnalyticsBucketFDWServerName } from './AnalyticsBucketDetails.utils' +import { useAnalyticsBucketAssociatedEntities } from './useAnalyticsBucketAssociatedEntities' + +export const UpdateForeignSchemaDialog = ({ + namespace, + tables, +}: { + namespace: string + tables: string[] +}) => { + const { ref: projectRef, bucketId } = useParams() + const { data: project } = useSelectedProjectQuery() + + const [isOpen, setIsOpen] = useState(false) + + const { icebergWrapper } = useAnalyticsBucketAssociatedEntities({ bucketId }) + const connectedForeignTablesForNamespace = (icebergWrapper?.tables ?? []).filter((x) => + x.options[0].startsWith(`table=${namespace}.`) + ) + const schemasAssociatedWithNamespace = [ + ...new Set(connectedForeignTablesForNamespace.map((x) => x.schema)), + ] + + const serverName = getAnalyticsBucketFDWServerName(bucketId ?? '') + + const FormSchema = z.object({ + schema: z.string().trim().min(1, 'Schema name is required'), + }) + const form = useForm>({ + resolver: zodResolver(FormSchema), + defaultValues: { schema: schemasAssociatedWithNamespace[0] }, + values: { schema: schemasAssociatedWithNamespace[0] }, + }) + + const { mutateAsync: importForeignSchema, isLoading: isUpdating } = + useFDWImportForeignSchemaMutation() + + const onSubmit: SubmitHandler> = async (values) => { + if (!projectRef) return console.error('Project ref is required') + + try { + await importForeignSchema({ + projectRef, + connectionString: project?.connectionString, + serverName: serverName, + sourceSchema: namespace, + targetSchema: values.schema, + }) + + toast.success(`Successfully updated "${values.schema}" schema!`) + setIsOpen(false) + } catch (error: any) { + toast.error(`Failed to update schema: ${error.message}`) + } + } + + return ( + + + + + + +
+ + Update schema to expose foreign tables + + + +

+ {tables.length > 1 ? ( + <> + There are{' '} + + + {tables.length} tables + + + {tables.join(', ')} + + {' '} + that aren't included in the Iceberg Foreign Data Wrapper. Update the wrapper to + create foreign tables for all unexposed tables. This will let you query the + tables from Postgres. + + ) : ( + `The table "${tables[0]}" in the "${namespace}" namespace is not yet included in the Iceberg Foreign Data Wrapper. The schema can be updated to expose this table as a foreign table.` + )} +

+ + {schemasAssociatedWithNamespace.length > 1 ? ( + ( + + field.onChange(val)} + > + + + + + {schemasAssociatedWithNamespace.map((x) => ( + + {x} + + ))} + + + + )} + /> + ) : ( +

+ Confirm to update foreign schema on the "{namespace}" namespace? +

+ )} +
+ + + + + +
+
+
+ ) +} diff --git a/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/index.tsx b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/index.tsx index 48d8a950d97d9..877b0d0006d4f 100644 --- a/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/index.tsx +++ b/apps/studio/components/interfaces/Storage/AnalyticsBuckets/AnalyticsBucketDetails/index.tsx @@ -34,12 +34,13 @@ import { Admonition } from 'ui-patterns/admonition' import { GenericTableLoader } from 'ui-patterns/ShimmeringLoader' import { DeleteAnalyticsBucketModal } from '../DeleteAnalyticsBucketModal' import { useSelectedAnalyticsBucket } from '../useSelectedAnalyticsBucket' +import { HIDE_REPLICATION_USER_FLOW } from './AnalyticsBucketDetails.constants' import { BucketHeader } from './BucketHeader' import { ConnectTablesDialog } from './ConnectTablesDialog' +import { CreateTableInstructions } from './CreateTableInstructions' import { NamespaceWithTables } from './NamespaceWithTables' import { SimpleConfigurationDetails } from './SimpleConfigurationDetails' import { useAnalyticsBucketAssociatedEntities } from './useAnalyticsBucketAssociatedEntities' -import { useAnalyticsBucketWrapperInstance } from './useAnalyticsBucketWrapperInstance' import { useIcebergWrapperExtension } from './useIcebergWrapper' export const AnalyticBucketDetails = () => { @@ -67,10 +68,12 @@ export const AnalyticBucketDetails = () => { const { mutateAsync: startPipeline, isPending: isStartingPipeline } = useStartPipelineMutation() - /** The wrapper instance is the wrapper that is installed for this Analytics bucket. */ - const { data: wrapperInstance, isLoading: isLoadingWrapperInstance } = - useAnalyticsBucketWrapperInstance({ bucketId: bucket?.name }) - const { publication, pipeline } = useAnalyticsBucketAssociatedEntities({ + const { + publication, + pipeline, + icebergWrapper: wrapperInstance, + isLoadingWrapperInstance, + } = useAnalyticsBucketAssociatedEntities({ projectRef, bucketId: bucket?.name, }) @@ -111,10 +114,8 @@ export const AnalyticBucketDetails = () => { const { data: namespacesData = [], - error: namespacesError, isLoading: isLoadingNamespaces, isSuccess: isSuccessNamespaces, - isError: isErrorNamespaces, } = useIcebergNamespacesQuery( { projectRef, @@ -179,7 +180,7 @@ export const AnalyticBucketDetails = () => { {state === 'loading' ? ( - + ) : state === 'not-installed' ? ( { {isLoadingNamespaces || isLoadingWrapperInstance ? ( - ) : isErrorNamespaces ? ( - ) : namespaces.length === 0 ? ( <> - {isPollingForData ? ( + {HIDE_REPLICATION_USER_FLOW ? ( + + ) : isPollingForData ? (