Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion .github/workflows/build-cloudberry-rocky8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,10 @@ jobs:
"gpcontrib/gp_sparse_vector:installcheck",
"gpcontrib/gp_toolkit:installcheck"]
},
{"test":"gpcontrib-gp-stats-collector",
"make_configs":["gpcontrib/gp_stats_collector:installcheck"],
"extension":"gp_stats_collector"
},
{"test":"ic-fixme",
"make_configs":["src/test/regress:installcheck-fixme"],
"enable_core_check":false
Expand Down Expand Up @@ -540,10 +544,11 @@ jobs:
if: needs.check-skip.outputs.should_skip != 'true'
env:
SRC_DIR: ${{ github.workspace }}
CONFIGURE_EXTRA_OPTS: --with-gp-stats-collector
run: |
set -eo pipefail
chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh
if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then
if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} CONFIGURE_EXTRA_OPTS=${{ env.CONFIGURE_EXTRA_OPTS }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then
echo "::error::Configure script failed"
exit 1
fi
Expand Down Expand Up @@ -1400,6 +1405,7 @@ jobs:
if: success() && needs.check-skip.outputs.should_skip != 'true'
env:
SRC_DIR: ${{ github.workspace }}
BUILD_DESTINATION: /usr/local/cloudberry-db
shell: bash {0}
run: |
set -o pipefail
Expand All @@ -1423,6 +1429,30 @@ jobs:
# 2. Follow the same pattern as optimizer
# 3. Update matrix entries to include the new setting

# Create extension if required
if [[ "${{ matrix.extension != '' }}" == "true" ]]; then
case "${{ matrix.extension }}" in
gp_stats_collector)
if ! su - gpadmin -c "source ${BUILD_DESTINATION}/cloudberry-env.sh && \
source ${SRC_DIR}/gpAux/gpdemo/gpdemo-env.sh && \
gpconfig -c shared_preload_libraries -v 'gp_stats_collector' && \
gpstop -ra && \
echo 'CREATE EXTENSION IF NOT EXISTS gp_stats_collector; \
SHOW shared_preload_libraries; \
TABLE pg_extension;' | \
psql postgres"
then
echo "Error creating gp_stats_collector extension"
exit 1
fi
;;
*)
echo "Unknown extension: ${{ matrix.extension }}"
exit 1
;;
esac
fi

# Set PostgreSQL options if defined
PG_OPTS=""
if [[ "${{ matrix.pg_settings.optimizer != '' }}" == "true" ]]; then
Expand Down
16 changes: 8 additions & 8 deletions .github/workflows/build-cloudberry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,9 @@ jobs:
},
"enable_core_check":false
},
{"test":"gpcontrib-yagp-hooks-collector",
"make_configs":["gpcontrib/yagp_hooks_collector:installcheck"],
"extension":"yagp_hooks_collector"
{"test":"gpcontrib-gp-stats-collector",
"make_configs":["gpcontrib/gp_stats_collector:installcheck"],
"extension":"gp_stats_collector"
},
{"test":"ic-expandshrink",
"make_configs":["src/test/isolation2:installcheck-expandshrink"]
Expand Down Expand Up @@ -539,7 +539,7 @@ jobs:
if: needs.check-skip.outputs.should_skip != 'true'
env:
SRC_DIR: ${{ github.workspace }}
CONFIGURE_EXTRA_OPTS: --with-yagp-hooks-collector
CONFIGURE_EXTRA_OPTS: --with-gp-stats-collector
run: |
set -eo pipefail
chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh
Expand Down Expand Up @@ -1441,17 +1441,17 @@ jobs:
# Create extension if required
if [[ "${{ matrix.extension != '' }}" == "true" ]]; then
case "${{ matrix.extension }}" in
yagp_hooks_collector)
gp_stats_collector)
if ! su - gpadmin -c "source ${BUILD_DESTINATION}/cloudberry-env.sh && \
source ${SRC_DIR}/gpAux/gpdemo/gpdemo-env.sh && \
gpconfig -c shared_preload_libraries -v 'yagp_hooks_collector' && \
gpconfig -c shared_preload_libraries -v 'gp_stats_collector' && \
gpstop -ra && \
echo 'CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; \
echo 'CREATE EXTENSION IF NOT EXISTS gp_stats_collector; \
SHOW shared_preload_libraries; \
TABLE pg_extension;' | \
psql postgres"
then
echo "Error creating yagp_hooks_collector extension"
echo "Error creating gp_stats_collector extension"
exit 1
fi
;;
Expand Down
32 changes: 31 additions & 1 deletion .github/workflows/build-deb-cloudberry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,10 @@ jobs:
"gpcontrib/gp_sparse_vector:installcheck",
"gpcontrib/gp_toolkit:installcheck"]
},
{"test":"gpcontrib-gp-stats-collector",
"make_configs":["gpcontrib/gp_stats_collector:installcheck"],
"extension":"gp_stats_collector"
},
{"test":"ic-cbdb-parallel",
"make_configs":["src/test/regress:installcheck-cbdb-parallel"]
}
Expand Down Expand Up @@ -448,13 +452,14 @@ jobs:
shell: bash
env:
SRC_DIR: ${{ github.workspace }}
CONFIGURE_EXTRA_OPTS: --with-gp-stats-collector
run: |
set -eo pipefail
export BUILD_DESTINATION=${SRC_DIR}/debian/build
chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh
if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} BUILD_DESTINATION=${BUILD_DESTINATION} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then
if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} CONFIGURE_EXTRA_OPTS=${{ env.CONFIGURE_EXTRA_OPTS }} BUILD_DESTINATION=${BUILD_DESTINATION} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then
echo "::error::Configure script failed"
exit 1
fi
Expand Down Expand Up @@ -1341,6 +1346,7 @@ jobs:
if: success() && needs.check-skip.outputs.should_skip != 'true'
env:
SRC_DIR: ${{ github.workspace }}
BUILD_DESTINATION: ${{ github.workspace }}/debian/build
shell: bash {0}
run: |
set -o pipefail
Expand All @@ -1365,6 +1371,30 @@ jobs:
# 3. Update matrix entries to include the new setting
# Create extension if required
if [[ "${{ matrix.extension != '' }}" == "true" ]]; then
case "${{ matrix.extension }}" in
gp_stats_collector)
if ! su - gpadmin -c "source ${BUILD_DESTINATION}/cloudberry-env.sh && \
source ${SRC_DIR}/gpAux/gpdemo/gpdemo-env.sh && \
gpconfig -c shared_preload_libraries -v 'gp_stats_collector' && \
gpstop -ra && \
echo 'CREATE EXTENSION IF NOT EXISTS gp_stats_collector; \
SHOW shared_preload_libraries; \
TABLE pg_extension;' | \
psql postgres"
then
echo "Error creating gp_stats_collector extension"
exit 1
fi
;;
*)
echo "Unknown extension: ${{ matrix.extension }}"
exit 1
;;
esac
fi
# Set PostgreSQL options if defined
PG_OPTS=""
if [[ "${{ matrix.pg_settings.optimizer != '' }}" == "true" ]]; then
Expand Down
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
# to build Postgres with a different make, we have this make file
# that, as a service, will look for a GNU make and invoke it, or show
# an error message if none could be found.

# If the user were using GNU make now, this file would not get used
# because GNU make uses a make file named "GNUmakefile" in preference
# to "Makefile" if it exists. PostgreSQL is shipped with a
# "GNUmakefile". If the user hasn't run the configure script yet, the
# GNUmakefile won't exist yet, so we catch that case as well.


# AIX make defaults to building *every* target of the first rule. Start with
# a single-target, empty rule to make the other targets non-default.
all:
Expand Down
16 changes: 8 additions & 8 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,7 @@ with_apr_config
with_libcurl
with_rt
with_zstd
with_yagp_hooks_collector
with_gp_stats_collector
with_libbz2
LZ4_LIBS
LZ4_CFLAGS
Expand Down Expand Up @@ -943,7 +943,7 @@ with_zlib
with_lz4
with_libbz2
with_zstd
with_yagp_hooks_collector
with_gp_stats_collector
with_rt
with_libcurl
with_apr_config
Expand Down Expand Up @@ -11153,14 +11153,14 @@ fi
fi

#
# yagp_hooks_collector
# gp_stats_collector
#



# Check whether --with-yagp-hooks-collector was given.
if test "${with_yagp_hooks_collector+set}" = set; then :
withval=$with_yagp_hooks_collector;
# Check whether --with-gp-stats-collector was given.
if test "${with_gp_stats_collector+set}" = set; then :
withval=$with_gp_stats_collector;
case $withval in
yes)
:
Expand All @@ -11169,12 +11169,12 @@ if test "${with_yagp_hooks_collector+set}" = set; then :
:
;;
*)
as_fn_error $? "no argument expected for --with-yagp-hooks-collector option" "$LINENO" 5
as_fn_error $? "no argument expected for --with-gp-stats-collector option" "$LINENO" 5
;;
esac

else
with_yagp_hooks_collector=no
with_gp_stats_collector=no

fi

Expand Down
19 changes: 15 additions & 4 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -1366,11 +1366,22 @@ AC_MSG_RESULT([$with_zstd])
AC_SUBST(with_zstd)

#
# yagp_hooks_collector
# gp_stats_collector
#
PGAC_ARG_BOOL(with, yagp_hooks_collector, no,
[build with YAGP hooks collector extension])
AC_SUBST(with_yagp_hooks_collector)
PGAC_ARG_BOOL(with, gp_stats_collector, no,
[build with stats collector extension])
AC_SUBST(with_gp_stats_collector)

if test "$with_gp_stats_collector" = yes; then
PKG_CHECK_MODULES([PROTOBUF], [protobuf >= 3.0.0],
[],
[AC_MSG_ERROR([protobuf >= 3.0.0 is required for gp_stats_collector])]
)
AC_PATH_PROG([PROTOC], [protoc], [no])
if test "$PROTOC" = no; then
AC_MSG_ERROR([protoc is required for gp_stats_collector but was not found in PATH])
fi
fi

if test "$with_zstd" = yes; then
dnl zstd_errors.h was renamed from error_public.h in v1.4.0
Expand Down
4 changes: 2 additions & 2 deletions gpcontrib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ else
diskquota
endif

ifeq "$(with_yagp_hooks_collector)" "yes"
recurse_targets += yagp_hooks_collector
ifeq "$(with_gp_stats_collector)" "yes"
recurse_targets += gp_stats_collector
endif
ifeq "$(with_zstd)" "yes"
recurse_targets += zstd
Expand Down
5 changes: 5 additions & 0 deletions gpcontrib/gp_stats_collector/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
*.o
*.so
src/protos/
.vscode
compile_commands.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
MODULE_big = yagp_hooks_collector
EXTENSION = yagp_hooks_collector
MODULE_big = gp_stats_collector
EXTENSION = gp_stats_collector
DATA = $(wildcard *--*.sql)
REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility yagp_guc_cache yagp_uds yagp_locale
REGRESS = gpsc_cursors gpsc_dist gpsc_select gpsc_utf8_trim gpsc_utility gpsc_guc_cache gpsc_uds gpsc_locale

PROTO_BASES = yagpcc_plan yagpcc_metrics yagpcc_set_service
PROTO_BASES = gpsc_plan gpsc_metrics gpsc_set_service
PROTO_OBJS = $(patsubst %,src/protos/%.pb.o,$(PROTO_BASES))

C_OBJS = $(patsubst %.c,%.o,$(wildcard src/*.c src/*/*.c))
Expand All @@ -24,18 +24,16 @@ PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
else
subdir = gpcontrib/yagp_hooks_collector
subdir = gpcontrib/gp_stats_collector
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif

src/protos/%.pb.cpp src/protos/%.pb.h: protos/%.proto
@mkdir -p src/protos
sed -i 's/optional //g' $^
sed -i 's|cloud/mdb/yagpcc/api/proto/common/|protos/|g' $^
protoc -I /usr/include -I /usr/local/include -I . --cpp_out=src $^
mv src/protos/$*.pb.cc src/protos/$*.pb.cpp

$(CPP_OBJS): src/protos/yagpcc_metrics.pb.h src/protos/yagpcc_plan.pb.h src/protos/yagpcc_set_service.pb.h
src/protos/yagpcc_set_service.pb.o: src/protos/yagpcc_metrics.pb.h
$(CPP_OBJS): src/protos/gpsc_metrics.pb.h src/protos/gpsc_plan.pb.h src/protos/gpsc_set_service.pb.h
src/protos/gpsc_set_service.pb.o: src/protos/gpsc_metrics.pb.h
47 changes: 47 additions & 0 deletions gpcontrib/gp_stats_collector/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

## GP Stats Collector

An extension for collecting query execution metrics and reporting them to an external agent.

### Collected Statistics

#### 1. Query Lifecycle
- **What:** Captures query text, normalized query text, timestamps (submit, start, end, done), and user/database info.
- **GUC:** `gpsc.enable`.

#### 2. `EXPLAIN` data
- **What:** Triggers generation of the `EXPLAIN (TEXT, COSTS, VERBOSE)` and captures it.
- **GUC:** `gpsc.enable`.

#### 3. `EXPLAIN ANALYZE` data
- **What:** Triggers generation of the `EXPLAIN (TEXT, ANALYZE, BUFFERS, TIMING, VERBOSE)` and captures it.
- **GUCs:** `gpsc.enable`, `gpsc.min_analyze_time`, `gpsc.enable_cdbstats`(ANALYZE), `gpsc.enable_analyze`(BUFFERS, TIMING, VERBOSE).

#### 4. Other Metrics
- **What:** Captures Instrument, System, Network, Interconnect, Spill metrics.
- **GUC:** `gpsc.enable`.

### General Configuration
- **Nested Queries:** When `gpsc.report_nested_queries` is `false`, only top-level queries are reported from the coordinator and segments, when `true`, both top-level and nested queries are reported from the coordinator, from segments collected as aggregates.
- **Data Destination:** All collected data is sent to a Unix Domain Socket. Configure the path with `gpsc.uds_path`.
- **User Filtering:** To exclude activity from certain roles, add them to the comma-separated list in `gpsc.ignored_users_list`.
- **Trimming plans:** Query texts and execution plans are trimmed based on `gpsc.max_text_size` and `gpsc.max_plan_size` (default: 1024KB). For now, it is not recommended to set these GUCs higher than 1024KB.
- **Analyze collection:** Analyze is sent if execution time exceeds `gpsc.min_analyze_time`, which is 10 seconds by default. Analyze is collected if `gpsc.enable_analyze` is true.
Loading
Loading