Skip to content

Commit d5b6c81

Browse files
Workflow to list and update TSDB GUCs in the docs (#4222)
* Workflow to list and update TSDB GUCs in the docs * something * chore: suggestion for Philip. (#4223) * fixed order and some cosmetics * fixed order and some cosmetics * fixed order and some cosmetics * fixed order and some cosmetics * chore: cleanup on merge. * costemtics --------- Co-authored-by: Iain Cox <[email protected]> Co-authored-by: billy-the-fish <[email protected]>
1 parent 3ee1061 commit d5b6c81

File tree

11 files changed

+373
-80
lines changed

11 files changed

+373
-80
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
name: "TimescaleDB: Update GUCs list"
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
tag:
7+
description: 'Tag to refesh list from'
8+
required: true
9+
type: string
10+
11+
permissions:
12+
contents: write
13+
pull-requests: write
14+
15+
jobs:
16+
update-gucs-list:
17+
runs-on: ubuntu-latest
18+
steps:
19+
- uses: actions/checkout@v4
20+
- uses: actions/setup-python@v5
21+
with:
22+
python-version: '3.13'
23+
cache: 'pip' # caching pip dependencies
24+
25+
- name: Update list of GUCs
26+
run: |
27+
pip install -r ./helper-scripts/timescaledb/requirements.txt
28+
python ./helper-scripts/timescaledb/generate_guc_overview.py "${{ github.event.inputs.tag }}" ./_partials/_timescaledb-gucs.md
29+
30+
- name: Create Pull Request
31+
uses: peter-evans/create-pull-request@v7
32+
with:
33+
token: ${{ secrets.ORG_AUTOMATION_TOKEN }}
34+
add-paths: |
35+
_partials/_timescaledb-gucs.md
36+
delete-branch: true
37+
title: "Updated list of GUCs from TimescaleDB ${{ github.event.inputs.tag }}"
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Generate Overview page of available GUCs in TimescaleDB with descriptions
5+
#
6+
# Args:
7+
# tag: tag to pull the guc.c from
8+
#
9+
10+
import argparse
11+
import requests
12+
import re
13+
import logging
14+
15+
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO)
16+
17+
parser = argparse.ArgumentParser()
18+
parser.add_argument('tag', type=str, help='tag name to pull guc.c')
19+
parser.add_argument('destination', type=str, help='file name to add output')
20+
args = parser.parse_args()
21+
22+
TYPES = {
23+
"DefineCustomBoolVariable": "BOOLEAN",
24+
"DefineCustomIntVariable": "INTEGER",
25+
"DefineCustomEnumVariable": "ENUM",
26+
"DefineCustomStringVariable": "STRING",
27+
}
28+
29+
# List of GUCs to exclude from the docs
30+
EXCLUDE = []
31+
32+
"""
33+
Fetch the guc.c content from GitHub
34+
@param url: str
35+
@return str
36+
"""
37+
def get_content(url: str) -> str:
38+
resp = requests.get(url=url)
39+
if resp.status_code != 200:
40+
logging.error("can not fetch: %s" % url)
41+
exit(10)
42+
return resp.text
43+
44+
"""
45+
Unwrap parsed GUCs into a map with GUC name as key and the value with the
46+
extracted values from the GUC:
47+
/* name= */,
48+
/* short_desc= */,
49+
/* long_desc= */,
50+
/* valueAddr= */,
51+
/* Value= */,
52+
/* context= */,
53+
/* flags= */,
54+
/* check_hook= */,
55+
/* assign_hook= */,
56+
/* show_hook= */
57+
@param gucs: list
58+
@param guc_type: str
59+
@return dict
60+
"""
61+
def unwrap(gucs: list, guc_type: str) -> dict:
62+
map = {}
63+
64+
for guc in gucs:
65+
# sanitize data
66+
it = [re.sub(r"[\n\t]*", "", v).strip() for v in guc.split(",")]
67+
68+
# sanitize elements
69+
name = re.sub(r"[\"\(\)]*", "", it[0])
70+
short_desc = sanitize_description(it[1])
71+
long_desc = short_desc if it[2].lower() == "null" else sanitize_description(it[2])
72+
73+
# Exclude GUCs (if specified)
74+
if name not in EXCLUDE:
75+
map[name] = {
76+
"name": name,
77+
"short_desc": short_desc,
78+
"long_desc": long_desc,
79+
"value": get_value(guc_type, it),
80+
"type": guc_type,
81+
"scopes": [], # assigned later during scope discovery
82+
}
83+
84+
logging.info("registered %d GUCs of type: %s" % (len(map), guc_type))
85+
return map
86+
87+
def sanitize_description(text) -> str:
88+
# Remove all quotes and normalize whitespace to single line
89+
return ' '.join(text.replace('"', '').split()).strip()
90+
91+
def strip_comment_pattern(text) -> str:
92+
pattern = r'/\*\s*[a-zA-Z0-9_]*=\s*\*/'
93+
return re.sub(pattern, '', extract_gettext_noop_string(text))
94+
95+
def extract_gettext_noop_string(text):
96+
pattern = r'gettext_noop\s*\(\s*"([^"]*(?:\\.[^"]*)*)"\s*\)'
97+
match = re.search(pattern, text, re.DOTALL)
98+
return match.group(1) if match else text
99+
100+
def get_value(type: str, parts: list) -> str:
101+
"""
102+
Get the value of the GUC based on the type
103+
"""
104+
if type == "BOOLEAN":
105+
if parts[5].upper()[0:4] == "PGC_":
106+
return strip_comment_pattern(parts[4]).strip()
107+
else:
108+
return strip_comment_pattern(parts[5]).strip()
109+
return strip_comment_pattern(parts[5]).strip()
110+
111+
"""
112+
Parse GUCs and prepare them for rendering
113+
@param content: str
114+
@return dict
115+
"""
116+
def prepare(content: str) -> dict:
117+
map = {}
118+
119+
# Find all GUCs based on patterns and prepare them in a dict
120+
for pattern, val in TYPES.items():
121+
map.update(unwrap(re.findall(r"%s\(MAKE_EXTOPTION(.*?)\);" % pattern, content, re.DOTALL), val))
122+
123+
# TODO: find scopes
124+
# https://github.com/timescale/timescaledb/blob/2.19.x/src/guc.c#L797
125+
126+
127+
# Return dict with alphabetically sorted keys
128+
return {i: map[i] for i in sorted(map.keys())}
129+
130+
"""
131+
Render the GUCs to file
132+
"""
133+
def render(gucs: dict, filename: str):
134+
with open(filename, "w") as f:
135+
f.write("| Name | Type | Default | Long Description |\n")
136+
f.write("| -- | -- | -- |--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n")
137+
for guc in gucs.values():
138+
f.write("| `%s` | `%s` | `%s` | %s |\n" % (
139+
guc["name"], guc["type"], guc["value"], guc["long_desc"]
140+
))
141+
logging.info("rendering completed to %s" % filename)
142+
143+
"""
144+
Main
145+
"""
146+
if __name__ == "__main__":
147+
content = get_content("https://raw.githubusercontent.com/timescale/timescaledb/refs/tags/%s/src/guc.c" % args.tag)
148+
logging.info("fetched guc.c file for version: %s" % args.tag)
149+
gucs = prepare(content)
150+
render(gucs, args.destination)
151+
152+
# print(gucs)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
requests==2.32.3

_partials/_timescaledb-config.md

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import ConfigCloudSelf from "versionContent/_partials/_cloud_self_configuration.mdx";
2+
3+
Just as you can tune settings in $PG, $TIMESCALE_DB provides a number of configuration
4+
settings that may be useful to your specific installation and performance needs. These can
5+
also be set within the `postgresql.conf` file or as command-line parameters
6+
when starting $PG.
7+
8+
## Query Planning and Execution
9+
10+
### `timescaledb.enable_chunkwise_aggregation (bool)`
11+
If enabled, aggregations are converted into partial aggregations during query
12+
planning. The first part of the aggregation is executed on a per-chunk basis.
13+
Then, these partial results are combined and finalized. Splitting aggregations
14+
decreases the size of the created hash tables and increases data locality, which
15+
speeds up queries.
16+
17+
### `timescaledb.vectorized_aggregation (bool)`
18+
Enables or disables the vectorized optimizations in the query executor. For
19+
example, the `sum()` aggregation function on compressed chunks can be optimized
20+
in this way.
21+
22+
### `timescaledb.enable_merge_on_cagg_refresh (bool)`
23+
24+
Set to `ON` to dramatically decrease the amount of data written on a continuous aggregate
25+
in the presence of a small number of changes, reduce the i/o cost of refreshing a
26+
[continuous aggregate][continuous-aggregates], and generate fewer Write-Ahead Logs (WAL). Only works for continuous aggregates that don't have compression enabled.
27+
28+
<ConfigCloudSelf />
29+
30+
## Administration
31+
32+
### `timescaledb.restoring (bool)`
33+
34+
Set TimescaleDB in restoring mode. It is disabled by default.
35+
36+
### `timescaledb.license (string)`
37+
38+
Change access to features based on the TimescaleDB license in use. For example,
39+
setting `timescaledb.license` to `apache` limits TimescaleDB to features that
40+
are implemented under the Apache 2 license. The default value is `timescale`,
41+
which allows access to all features.
42+
43+
### `timescaledb.telemetry_level (enum)`
44+
45+
Telemetry settings level. Level used to determine which telemetry to
46+
send. Can be set to `off` or `basic`. Defaults to `basic`.
47+
48+
### `timescaledb.last_tuned (string)`
49+
50+
Records last time `timescaledb-tune` ran.
51+
52+
### `timescaledb.last_tuned_version (string)`
53+
54+
Version of `timescaledb-tune` used to tune when it runs.
55+
56+
[continuous-aggregates]: /use-timescale/:currentVersion:/continuous-aggregates/

_partials/_timescaledb-gucs.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
| Name | Type | Default | Long Description |
2+
| -- | -- | -- |--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
3+
| `auto_sparse_indexes` | `BOOLEAN` | `true` | The hypertable columns that are used as index keys will have suitable sparse indexes when compressed. Must be set at the moment of chunk compression |
4+
| `bgw_log_level` | `ENUM` | `loglevel_options` | Log level for the scheduler and workers of the background worker subsystem. Requires configuration reload to change. |
5+
| `compress_truncate_behaviour` | `ENUM` | `compress_truncate_behaviour_options` | Defines how truncate behaves at the end of compression. 'truncate_only' forces truncation. 'truncate_disabled' deletes rows instead of truncate. 'truncate_or_delete' allows falling back to deletion. |
6+
| `compression_batch_size_limit` | `INTEGER` | `1` | Setting this option to a number between 1 and 999 will force compression to limit the size of compressed batches to that amount of uncompressed tuples.Setting this to 0 defaults to the max batch size of 1000. |
7+
| `default_hypercore_use_access_method` | `BOOLEAN` | `PGC_USERSET` | gettext_noop(Sets the global default for using Hypercore TAM when compressing chunks.) |
8+
| `enable_bool_compression` | `BOOLEAN` | `true` | Enable bool compression |
9+
| `enable_bulk_decompression` | `BOOLEAN` | `true` | Increases throughput of decompression |
10+
| `enable_cagg_reorder_groupby` | `BOOLEAN` | `true` | Enable group by clause reordering for continuous aggregates |
11+
| `enable_cagg_sort_pushdown` | `BOOLEAN` | `true` | Enable pushdown of ORDER BY clause for continuous aggregates |
12+
| `enable_cagg_watermark_constify` | `BOOLEAN` | `true` | Enable constifying cagg watermark for real-time caggs |
13+
| `enable_cagg_window_functions` | `BOOLEAN` | `false` | Allow window functions in continuous aggregate views |
14+
| `enable_chunk_append` | `BOOLEAN` | `true` | Enable using chunk append node |
15+
| `enable_chunk_skipping` | `BOOLEAN` | `false` | Enable using chunk column stats to filter chunks based on column filters |
16+
| `enable_chunkwise_aggregation` | `BOOLEAN` | `true` | Enable the pushdown of aggregations to the chunk level |
17+
| `enable_columnarscan` | `BOOLEAN` | `true` | A columnar scan replaces sequence scans for columnar-oriented storage and enables storage-specific optimizations like vectorized filters. Disabling columnar scan will make PostgreSQL fall back to regular sequence scans. |
18+
| `enable_compressed_direct_batch_delete` | `BOOLEAN` | `true` | Enable direct batch deletion in compressed chunks |
19+
| `enable_compressed_skipscan` | `BOOLEAN` | `true` | Enable SkipScan for distinct inputs over compressed chunks |
20+
| `enable_compression_indexscan` | `BOOLEAN` | `false` | Enable indexscan during compression |
21+
| `enable_compression_ratio_warnings` | `BOOLEAN` | `true` | Enable warnings for poor compression ratio |
22+
| `enable_compression_wal_markers` | `BOOLEAN` | `true` | Enable the generation of markers in the WAL stream which mark the start and end of compression operations |
23+
| `enable_compressor_batch_limit` | `BOOLEAN` | `false` | Enable compressor batch limit for compressors which can go over the allocation limit (1 GB). This feature will limit those compressors by reducing the size of the batch and thus avoid hitting the limit. |
24+
| `enable_constraint_aware_append` | `BOOLEAN` | `true` | Enable constraint exclusion at execution time |
25+
| `enable_constraint_exclusion` | `BOOLEAN` | `true` | Enable planner constraint exclusion |
26+
| `enable_custom_hashagg` | `BOOLEAN` | `false` | Enable creating custom hash aggregation plans |
27+
| `enable_decompression_sorted_merge` | `BOOLEAN` | `true` | Enable the merge of compressed batches to preserve the compression order by |
28+
| `enable_delete_after_compression` | `BOOLEAN` | `false` | Delete all rows after compression instead of truncate |
29+
| `enable_deprecation_warnings` | `BOOLEAN` | `true` | Enable warnings when using deprecated functionality |
30+
| `enable_dml_decompression` | `BOOLEAN` | `true` | Enable DML decompression when modifying compressed hypertable |
31+
| `enable_dml_decompression_tuple_filtering` | `BOOLEAN` | `true` | Recheck tuples during DML decompression to only decompress batches with matching tuples |
32+
| `enable_event_triggers` | `BOOLEAN` | `false` | Enable event triggers for chunks creation |
33+
| `enable_exclusive_locking_recompression` | `BOOLEAN` | `false` | Enable getting exclusive lock on chunk during segmentwise recompression |
34+
| `enable_foreign_key_propagation` | `BOOLEAN` | `true` | Adjust foreign key lookup queries to target whole hypertable |
35+
| `enable_job_execution_logging` | `BOOLEAN` | `false` | Retain job run status in logging table |
36+
| `enable_merge_on_cagg_refresh` | `BOOLEAN` | `false` | Enable MERGE statement on cagg refresh |
37+
| `enable_now_constify` | `BOOLEAN` | `true` | Enable constifying now() in query constraints |
38+
| `enable_null_compression` | `BOOLEAN` | `true` | Enable null compression |
39+
| `enable_optimizations` | `BOOLEAN` | `true` | Enable TimescaleDB query optimizations |
40+
| `enable_ordered_append` | `BOOLEAN` | `true` | Enable ordered append optimization for queries that are ordered by the time dimension |
41+
| `enable_parallel_chunk_append` | `BOOLEAN` | `true` | Enable using parallel aware chunk append node |
42+
| `enable_qual_propagation` | `BOOLEAN` | `true` | Enable propagation of qualifiers in JOINs |
43+
| `enable_runtime_exclusion` | `BOOLEAN` | `true` | Enable runtime chunk exclusion in ChunkAppend node |
44+
| `enable_segmentwise_recompression` | `BOOLEAN` | `true` | Enable segmentwise recompression |
45+
| `enable_skipscan` | `BOOLEAN` | `true` | Enable SkipScan for DISTINCT queries |
46+
| `enable_skipscan_for_distinct_aggregates` | `BOOLEAN` | `true` | Enable SkipScan for DISTINCT aggregates |
47+
| `enable_sparse_index_bloom` | `BOOLEAN` | `true` | This sparse index speeds up the equality queries on compressed columns |
48+
| `enable_tiered_reads` | `BOOLEAN` | `true` | Enable reading of tiered data by including a foreign table representing the data in the object storage into the query plan |
49+
| `enable_transparent_decompression` | `ENUM` | `transparent_decompression_options` | Enable transparent decompression when querying hypertable |
50+
| `enable_tss_callbacks` | `BOOLEAN` | `true` | Enable ts_stat_statements callbacks |
51+
| `enable_vectorized_aggregation` | `BOOLEAN` | `true` | Enable vectorized aggregation for compressed data |
52+
| `hypercore_copy_to_behavior` | `ENUM` | `hypercore_copy_to_options` | Set to 'all_data' to return both compressed and uncompressed data via the Hypercore table when using COPY TO. Set to 'no_compressed_data' to skip compressed data. |
53+
| `hypercore_indexam_whitelist` | `STRING` | `hash"` | gettext_noop( List of index access method names supported by hypercore.) |
54+
| `materializations_per_refresh_window` | `INTEGER` | `10` | The maximal number of individual refreshes per cagg refresh. If more refreshes need to be performed |
55+
| `max_cached_chunks_per_hypertable` | `INTEGER` | `0` | Maximum number of chunks stored in the cache |
56+
| `max_open_chunks_per_insert` | `INTEGER` | `0` | Maximum number of open chunk tables per insert |
57+
| `max_tuples_decompressed_per_dml_transaction` | `INTEGER` | `an error will ""be thrown and transaction rolled back. ""Setting this to 0 sets this value to unlimited number of ""tuples decompressed."` | UPDATE |
58+
| `restoring` | `BOOLEAN` | `false` | In restoring mode all timescaledb internal hooks are disabled. This mode is required for restoring logical dumps of databases with timescaledb. |
59+
| `telemetry_level` | `ENUM` | `telemetry_level_options` | Level used to determine which telemetry to send |

api/configuration.md

Lines changed: 0 additions & 15 deletions
This file was deleted.

api/configuration/gucs.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
---
2+
title: Grand Unified Configuration (GUC) parameters
3+
excerpt: Optimize the behavior of TimescaleDB using Grand Unified Configuration (GUC) parameters
4+
keywords: [GUC, Configuration]
5+
---
6+
7+
import TsdbGucsList from "versionContent/_partials/_timescaledb-gucs.mdx";
8+
9+
# Grand Unified Configuration (GUC) parameters
10+
11+
You use the following Grand Unified Configuration (GUC) parameters to optimize the behavior of your $SERVICE_LONG.
12+
13+
<TsdbGucsList />

api/configuration/index.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
title: Service configuration
3+
excerpt: Use the default PostgreSQL server configuration settings for your Tiger Cloud service, or customize them as needed
4+
keywords: [configure]
5+
products: [self_hosted, cloud]
6+
---
7+
8+
# $SERVICE_LONG configuration
9+
10+
$SERVICE_LONG use the default $PG server configuration settings. You can optimize your $SERVICE_SHORT configuration
11+
using the following $TIMESCALE_DB and Grand Unified Configuration (GUC) parameters.
12+
13+
* [$TIGER_POSTGRES configuration and tuning][tigerpostgres-config]
14+
* [Grand Unified Configuration (GUC) parameters][gucs]
15+
16+
[tigerpostgres-config]: /api/:currentVersion:/configuration/tiger-postgres/
17+
[gucs]: /api/:currentVersion:/configuration/gucs/

api/configuration/tiger-postgres.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
---
2+
title: Tiger Postgres configuration and tuning
3+
excerpt: Configure the Tiger Postgres settings related to policies, query planning and execution, distributed
4+
hypertables, and
5+
administration
6+
products: [cloud]
7+
keywords: [configuration, settings]
8+
tags: [tune]
9+
---
10+
11+
import TimescaleDBConfig from "versionContent/_partials/_timescaledb-config.mdx";
12+
13+
# $TIGER_POSTGRES configuration and tuning
14+
15+
Just as you can tune settings in $PG, $TIGER_POSTGRES provides a number of configuration
16+
settings that may be useful to your specific installation and performance needs.
17+
18+
<TimescaleDBConfig />
19+

0 commit comments

Comments
 (0)