Workflow to list and update TSDB GUCs in the docs (#4222)

philkra · billy-the-fish · web-flow · commit d5b6c813ec76 · 2025-07-07T13:56:54.000+02:00
* Workflow to list and update TSDB GUCs in the docs * something * chore: suggestion for Philip. (#4223) * fixed order and some cosmetics * fixed order and some cosmetics * fixed order and some cosmetics * fixed order and some cosmetics * chore: cleanup on merge. * costemtics --------- Co-authored-by: Iain Cox <iain@timescale.com> Co-authored-by: billy-the-fish <iain.cox@sarkdocumentation.com>
diff --git a/.github/workflows/tsdb-refresh-gucs-list.yaml b/.github/workflows/tsdb-refresh-gucs-list.yaml
@@ -0,0 +1,37 @@
+name: "TimescaleDB: Update GUCs list"
+
+on:
+  workflow_call:
+    inputs:
+      tag:
+        description: 'Tag to refesh list from'
+        required: true
+        type: string
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  update-gucs-list:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+          cache: 'pip' # caching pip dependencies
+      
+      - name: Update list of GUCs
+        run: |
+          pip install -r ./helper-scripts/timescaledb/requirements.txt
+          python ./helper-scripts/timescaledb/generate_guc_overview.py "${{ github.event.inputs.tag }}" ./_partials/_timescaledb-gucs.md
+
+      - name: Create Pull Request
+        uses: peter-evans/create-pull-request@v7
+        with:
+          token: ${{ secrets.ORG_AUTOMATION_TOKEN }}
+          add-paths: |
+            _partials/_timescaledb-gucs.md
+          delete-branch: true
+          title: "Updated list of GUCs from TimescaleDB ${{ github.event.inputs.tag }}"
diff --git a/.helper-scripts/timescaledb/generate_guc_overview.py b/.helper-scripts/timescaledb/generate_guc_overview.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Generate Overview page of available GUCs in TimescaleDB with descriptions
+#
+# Args: 
+#   tag: tag to pull the guc.c from
+#
+
+import argparse
+import requests
+import re
+import logging
+
+logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO)
+
+parser = argparse.ArgumentParser()
+parser.add_argument('tag', type=str, help='tag name to pull guc.c')
+parser.add_argument('destination', type=str, help='file name to add output')
+args = parser.parse_args()
+
+TYPES = {
+    "DefineCustomBoolVariable": "BOOLEAN",
+    "DefineCustomIntVariable": "INTEGER",
+    "DefineCustomEnumVariable": "ENUM",
+    "DefineCustomStringVariable": "STRING",
+}
+
+# List of GUCs to exclude from the docs
+EXCLUDE = []
+
+"""
+Fetch the guc.c content from GitHub
+@param url: str
+@return str
+"""
+def get_content(url: str) -> str:
+    resp = requests.get(url=url)
+    if resp.status_code != 200:
+        logging.error("can not fetch: %s" % url)
+        exit(10)
+    return resp.text
+
+"""
+Unwrap parsed GUCs into a map with GUC name as key and the value with the 
+extracted values from the GUC:
+    /* name= */,
+	/* short_desc= */,
+    /* long_desc= */,
+    /* valueAddr= */,
+    /* Value= */,
+    /* context= */,
+    /* flags= */,
+    /* check_hook= */,
+    /* assign_hook= */,
+    /* show_hook= */
+@param gucs: list
+@param guc_type: str
+@return dict
+"""
+def unwrap(gucs: list, guc_type: str) -> dict:
+    map = {}
+
+    for guc in gucs:
+        # sanitize data
+        it = [re.sub(r"[\n\t]*", "", v).strip() for v in guc.split(",")]
+
+        # sanitize elements
+        name = re.sub(r"[\"\(\)]*", "", it[0])
+        short_desc = sanitize_description(it[1])
+        long_desc = short_desc if it[2].lower() == "null" else sanitize_description(it[2])
+
+        # Exclude GUCs (if specified)
+        if name not in EXCLUDE:
+            map[name] = {
+                "name": name,
+                "short_desc": short_desc,
+                "long_desc": long_desc,
+                "value": get_value(guc_type, it),
+                "type": guc_type,
+                "scopes": [], # assigned later during scope discovery
+            }
+
+    logging.info("registered %d GUCs of type: %s" % (len(map), guc_type))
+    return map
+
+def sanitize_description(text) -> str:
+    # Remove all quotes and normalize whitespace to single line
+    return ' '.join(text.replace('"', '').split()).strip()
+
+def strip_comment_pattern(text) -> str:
+    pattern = r'/\*\s*[a-zA-Z0-9_]*=\s*\*/'
+    return re.sub(pattern, '', extract_gettext_noop_string(text))
+
+def extract_gettext_noop_string(text):
+    pattern = r'gettext_noop\s*\(\s*"([^"]*(?:\\.[^"]*)*)"\s*\)'
+    match = re.search(pattern, text, re.DOTALL)
+    return match.group(1) if match else text
+
+def get_value(type: str, parts: list) -> str:
+    """
+    Get the value of the GUC based on the type
+    """
+    if type == "BOOLEAN":
+        if parts[5].upper()[0:4] == "PGC_":
+            return strip_comment_pattern(parts[4]).strip()
+        else:
+            return strip_comment_pattern(parts[5]).strip()
+    return strip_comment_pattern(parts[5]).strip()
+
+"""
+Parse GUCs and prepare them for rendering
+@param content: str
+@return dict
+"""
+def prepare(content: str) -> dict:
+    map = {}
+
+    # Find all GUCs based on patterns and prepare them in a dict
+    for pattern, val in TYPES.items():
+        map.update(unwrap(re.findall(r"%s\(MAKE_EXTOPTION(.*?)\);" % pattern, content, re.DOTALL), val))
+
+    # TODO: find scopes
+    # https://github.com/timescale/timescaledb/blob/2.19.x/src/guc.c#L797
+
+
+    # Return dict with alphabetically sorted keys
+    return {i: map[i] for i in sorted(map.keys())}
+
+"""
+Render the GUCs to file
+"""
+def render(gucs: dict, filename: str):
+    with open(filename, "w") as f:
+        f.write("| Name | Type | Default | Long Description |\n")
+        f.write("| -- | -- | -- |--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n")
+        for guc in gucs.values():
+            f.write("| `%s` | `%s` | `%s` | %s |\n" % (
+                guc["name"], guc["type"], guc["value"], guc["long_desc"]
+            ))
+    logging.info("rendering completed to %s" % filename)
+
+"""
+Main
+"""
+if __name__ == "__main__":
+    content = get_content("https://raw.githubusercontent.com/timescale/timescaledb/refs/tags/%s/src/guc.c" % args.tag)
+    logging.info("fetched guc.c file for version: %s" % args.tag)
+    gucs = prepare(content)
+    render(gucs, args.destination)
+
+#    print(gucs)
diff --git a/.helper-scripts/timescaledb/requirements.txt b/.helper-scripts/timescaledb/requirements.txt
@@ -0,0 +1 @@
+requests==2.32.3
diff --git a/_partials/_timescaledb-config.md b/_partials/_timescaledb-config.md
@@ -0,0 +1,56 @@
+import ConfigCloudSelf from "versionContent/_partials/_cloud_self_configuration.mdx";
+
+Just as you can tune settings in $PG, $TIMESCALE_DB provides a number of configuration
+settings that may be useful to your specific installation and performance needs. These can
+also be set within the `postgresql.conf` file or as command-line parameters
+when starting $PG.
+
+## Query Planning and Execution
+
+### `timescaledb.enable_chunkwise_aggregation (bool)`
+If enabled, aggregations are converted into partial aggregations during query
+planning. The first part of the aggregation is executed on a per-chunk basis.
+Then, these partial results are combined and finalized. Splitting aggregations
+decreases the size of the created hash tables and increases data locality, which
+speeds up queries.
+
+### `timescaledb.vectorized_aggregation (bool)`
+Enables or disables the vectorized optimizations in the query executor. For
+example, the `sum()` aggregation function on compressed chunks can be optimized
+in this way.
+
+### `timescaledb.enable_merge_on_cagg_refresh  (bool)`
+
+Set to `ON` to dramatically decrease the amount of data written on a continuous aggregate
+in the presence of a small number of changes, reduce the i/o cost of refreshing a
+[continuous aggregate][continuous-aggregates], and generate fewer Write-Ahead Logs (WAL). Only works for continuous aggregates that don't have compression enabled.
+
+<ConfigCloudSelf />
+
+## Administration
+
+### `timescaledb.restoring (bool)`
+
+Set TimescaleDB in restoring mode. It is disabled by default.
+
+### `timescaledb.license (string)`
+
+Change access to features based on the TimescaleDB license in use. For example,
+setting `timescaledb.license` to `apache` limits TimescaleDB to features that
+are implemented under the Apache 2 license. The default value is `timescale`,
+which allows access to all features.
+
+### `timescaledb.telemetry_level (enum)`
+
+Telemetry settings level. Level used to determine which telemetry to
+send. Can be set to `off` or `basic`. Defaults to `basic`.
+
+### `timescaledb.last_tuned (string)`
+
+Records last time `timescaledb-tune` ran.
+
+### `timescaledb.last_tuned_version (string)`
+
+Version of `timescaledb-tune` used to tune when it runs.
+
+[continuous-aggregates]: /use-timescale/:currentVersion:/continuous-aggregates/
diff --git a/_partials/_timescaledb-gucs.md b/_partials/_timescaledb-gucs.md
@@ -0,0 +1,59 @@
+| Name | Type | Default | Long Description |
+| -- | -- | -- |--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `auto_sparse_indexes` | `BOOLEAN` | `true` | The hypertable columns that are used as index keys will have suitable sparse indexes when compressed. Must be set at the moment of chunk compression |
+| `bgw_log_level` | `ENUM` | `loglevel_options` | Log level for the scheduler and workers of the background worker subsystem. Requires configuration reload to change. |
+| `compress_truncate_behaviour` | `ENUM` | `compress_truncate_behaviour_options` | Defines how truncate behaves at the end of compression. 'truncate_only' forces truncation. 'truncate_disabled' deletes rows instead of truncate. 'truncate_or_delete' allows falling back to deletion. |
+| `compression_batch_size_limit` | `INTEGER` | `1` | Setting this option to a number between 1 and 999 will force compression to limit the size of compressed batches to that amount of uncompressed tuples.Setting this to 0 defaults to the max batch size of 1000. |
+| `default_hypercore_use_access_method` | `BOOLEAN` | `PGC_USERSET` | gettext_noop(Sets the global default for using Hypercore TAM when compressing chunks.) |
+| `enable_bool_compression` | `BOOLEAN` | `true` | Enable bool compression |
+| `enable_bulk_decompression` | `BOOLEAN` | `true` | Increases throughput of decompression |
+| `enable_cagg_reorder_groupby` | `BOOLEAN` | `true` | Enable group by clause reordering for continuous aggregates |
+| `enable_cagg_sort_pushdown` | `BOOLEAN` | `true` | Enable pushdown of ORDER BY clause for continuous aggregates |
+| `enable_cagg_watermark_constify` | `BOOLEAN` | `true` | Enable constifying cagg watermark for real-time caggs |
+| `enable_cagg_window_functions` | `BOOLEAN` | `false` | Allow window functions in continuous aggregate views |
+| `enable_chunk_append` | `BOOLEAN` | `true` | Enable using chunk append node |
+| `enable_chunk_skipping` | `BOOLEAN` | `false` | Enable using chunk column stats to filter chunks based on column filters |
+| `enable_chunkwise_aggregation` | `BOOLEAN` | `true` | Enable the pushdown of aggregations to the chunk level |
+| `enable_columnarscan` | `BOOLEAN` | `true` | A columnar scan replaces sequence scans for columnar-oriented storage and enables storage-specific optimizations like vectorized filters. Disabling columnar scan will make PostgreSQL fall back to regular sequence scans. |
+| `enable_compressed_direct_batch_delete` | `BOOLEAN` | `true` | Enable direct batch deletion in compressed chunks |
+| `enable_compressed_skipscan` | `BOOLEAN` | `true` | Enable SkipScan for distinct inputs over compressed chunks |
+| `enable_compression_indexscan` | `BOOLEAN` | `false` | Enable indexscan during compression |
+| `enable_compression_ratio_warnings` | `BOOLEAN` | `true` | Enable warnings for poor compression ratio |
+| `enable_compression_wal_markers` | `BOOLEAN` | `true` | Enable the generation of markers in the WAL stream which mark the start and end of compression operations |
+| `enable_compressor_batch_limit` | `BOOLEAN` | `false` | Enable compressor batch limit for compressors which can go over the allocation limit (1 GB). This feature will limit those compressors by reducing the size of the batch and thus avoid hitting the limit. |
+| `enable_constraint_aware_append` | `BOOLEAN` | `true` | Enable constraint exclusion at execution time |
+| `enable_constraint_exclusion` | `BOOLEAN` | `true` | Enable planner constraint exclusion |
+| `enable_custom_hashagg` | `BOOLEAN` | `false` | Enable creating custom hash aggregation plans |
+| `enable_decompression_sorted_merge` | `BOOLEAN` | `true` | Enable the merge of compressed batches to preserve the compression order by |
+| `enable_delete_after_compression` | `BOOLEAN` | `false` | Delete all rows after compression instead of truncate |
+| `enable_deprecation_warnings` | `BOOLEAN` | `true` | Enable warnings when using deprecated functionality |
+| `enable_dml_decompression` | `BOOLEAN` | `true` | Enable DML decompression when modifying compressed hypertable |
+| `enable_dml_decompression_tuple_filtering` | `BOOLEAN` | `true` | Recheck tuples during DML decompression to only decompress batches with matching tuples |
+| `enable_event_triggers` | `BOOLEAN` | `false` | Enable event triggers for chunks creation |
+| `enable_exclusive_locking_recompression` | `BOOLEAN` | `false` | Enable getting exclusive lock on chunk during segmentwise recompression |
+| `enable_foreign_key_propagation` | `BOOLEAN` | `true` | Adjust foreign key lookup queries to target whole hypertable |
+| `enable_job_execution_logging` | `BOOLEAN` | `false` | Retain job run status in logging table |
+| `enable_merge_on_cagg_refresh` | `BOOLEAN` | `false` | Enable MERGE statement on cagg refresh |
+| `enable_now_constify` | `BOOLEAN` | `true` | Enable constifying now() in query constraints |
+| `enable_null_compression` | `BOOLEAN` | `true` | Enable null compression |
+| `enable_optimizations` | `BOOLEAN` | `true` | Enable TimescaleDB query optimizations |
+| `enable_ordered_append` | `BOOLEAN` | `true` | Enable ordered append optimization for queries that are ordered by the time dimension |
+| `enable_parallel_chunk_append` | `BOOLEAN` | `true` | Enable using parallel aware chunk append node |
+| `enable_qual_propagation` | `BOOLEAN` | `true` | Enable propagation of qualifiers in JOINs |
+| `enable_runtime_exclusion` | `BOOLEAN` | `true` | Enable runtime chunk exclusion in ChunkAppend node |
+| `enable_segmentwise_recompression` | `BOOLEAN` | `true` | Enable segmentwise recompression |
+| `enable_skipscan` | `BOOLEAN` | `true` | Enable SkipScan for DISTINCT queries |
+| `enable_skipscan_for_distinct_aggregates` | `BOOLEAN` | `true` | Enable SkipScan for DISTINCT aggregates |
+| `enable_sparse_index_bloom` | `BOOLEAN` | `true` | This sparse index speeds up the equality queries on compressed columns |
+| `enable_tiered_reads` | `BOOLEAN` | `true` | Enable reading of tiered data by including a foreign table representing the data in the object storage into the query plan |
+| `enable_transparent_decompression` | `ENUM` | `transparent_decompression_options` | Enable transparent decompression when querying hypertable |
+| `enable_tss_callbacks` | `BOOLEAN` | `true` | Enable ts_stat_statements callbacks |
+| `enable_vectorized_aggregation` | `BOOLEAN` | `true` | Enable vectorized aggregation for compressed data |
+| `hypercore_copy_to_behavior` | `ENUM` | `hypercore_copy_to_options` | Set to 'all_data' to return both compressed and uncompressed data via the Hypercore table when using COPY TO. Set to 'no_compressed_data' to skip compressed data. |
+| `hypercore_indexam_whitelist` | `STRING` | `hash"` | gettext_noop( List of index access method names supported by hypercore.) |
+| `materializations_per_refresh_window` | `INTEGER` | `10` | The maximal number of individual refreshes per cagg refresh. If more refreshes need to be performed |
+| `max_cached_chunks_per_hypertable` | `INTEGER` | `0` | Maximum number of chunks stored in the cache |
+| `max_open_chunks_per_insert` | `INTEGER` | `0` | Maximum number of open chunk tables per insert |
+| `max_tuples_decompressed_per_dml_transaction` | `INTEGER` | `an error will ""be thrown and transaction rolled back. ""Setting this to 0 sets this value to unlimited number of ""tuples decompressed."` | UPDATE |
+| `restoring` | `BOOLEAN` | `false` | In restoring mode all timescaledb internal hooks are disabled. This mode is required for restoring logical dumps of databases with timescaledb. |
+| `telemetry_level` | `ENUM` | `telemetry_level_options` | Level used to determine which telemetry to send |
diff --git a/api/configuration.md b/api/configuration.md
diff --git a/api/configuration/gucs.md b/api/configuration/gucs.md
@@ -0,0 +1,13 @@
+---
+title: Grand Unified Configuration (GUC) parameters
+excerpt: Optimize the behavior of TimescaleDB using Grand Unified Configuration (GUC) parameters
+keywords: [GUC, Configuration]
+---
+
+import TsdbGucsList from "versionContent/_partials/_timescaledb-gucs.mdx";
+
+# Grand Unified Configuration (GUC) parameters
+
+You use the following Grand Unified Configuration (GUC) parameters to optimize the behavior of your $SERVICE_LONG.
+
+<TsdbGucsList />
diff --git a/api/configuration/index.md b/api/configuration/index.md
@@ -0,0 +1,17 @@
+---
+title: Service configuration
+excerpt: Use the default PostgreSQL server configuration settings for your Tiger Cloud service, or customize them as needed
+keywords: [configure]
+products: [self_hosted, cloud]
+---
+
+# $SERVICE_LONG configuration
+
+$SERVICE_LONG use the default $PG server configuration settings. You can optimize your $SERVICE_SHORT configuration
+using the following $TIMESCALE_DB and Grand Unified Configuration (GUC) parameters. 
+
+* [$TIGER_POSTGRES configuration and tuning][tigerpostgres-config]
+* [Grand Unified Configuration (GUC) parameters][gucs]
+
+[tigerpostgres-config]: /api/:currentVersion:/configuration/tiger-postgres/
+[gucs]: /api/:currentVersion:/configuration/gucs/
diff --git a/api/configuration/tiger-postgres.md b/api/configuration/tiger-postgres.md
@@ -0,0 +1,19 @@
+---
+title: Tiger Postgres configuration and tuning
+excerpt: Configure the Tiger Postgres settings related to policies, query planning and execution, distributed 
+  hypertables, and 
+  administration
+products: [cloud]
+keywords: [configuration, settings]
+tags: [tune]
+---
+
+import TimescaleDBConfig from "versionContent/_partials/_timescaledb-config.mdx";
+
+# $TIGER_POSTGRES configuration and tuning
+
+Just as you can tune settings in $PG, $TIGER_POSTGRES provides a number of configuration
+settings that may be useful to your specific installation and performance needs. 
+
+<TimescaleDBConfig />
+
diff --git a/api/page-index/page-index.js b/api/page-index/page-index.js
diff --git a/self-hosted/configuration/timescaledb-config.md b/self-hosted/configuration/timescaledb-config.md