From 4be9e4a6662b246d7a28c5200f5686e3892a423d Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 23 Mar 2023 15:15:26 +0300 Subject: [PATCH 01/49] [yagp_hooks_collector] Add extension skeleton with GRPC transport Add yagp_hooks_collector, a shared-preload module that hooks into ExecutorStart and ExecutorFinish to capture query lifecycle events. Includes Makefile with protobuf code generation, GRPC-based delivery, QueryInfo generation (query text, plan text, query_id, plan_id, session metadata), and basic protobuf message filling. --- .gitignore | 7 +- Makefile | 2 - protos/yagpcc_metrics.proto | 130 +++ protos/yagpcc_plan.proto | 570 +++++++++++++ protos/yagpcc_set_service.proto | 45 + sql/yagp-hooks-collector--1.0.sql | 2 + sql/yagp-hooks-collector--unpackaged--1.0.sql | 2 + src/EventSender.cpp | 189 +++++ src/EventSender.h | 19 + src/GrpcConnector.cpp | 55 ++ src/GrpcConnector.h | 15 + src/hook_wrappers.cpp | 67 ++ src/hook_wrappers.h | 12 + src/stat_statements_parser/README.MD | 1 + .../pg_stat_statements_ya_parser.c | 771 ++++++++++++++++++ .../pg_stat_statements_ya_parser.h | 15 + src/yagp_hooks_collector.c | 22 + yagp-hooks-collector.control | 5 + 18 files changed, 1926 insertions(+), 3 deletions(-) create mode 100644 protos/yagpcc_metrics.proto create mode 100644 protos/yagpcc_plan.proto create mode 100644 protos/yagpcc_set_service.proto create mode 100644 sql/yagp-hooks-collector--1.0.sql create mode 100644 sql/yagp-hooks-collector--unpackaged--1.0.sql create mode 100644 src/EventSender.cpp create mode 100644 src/EventSender.h create mode 100644 src/GrpcConnector.cpp create mode 100644 src/GrpcConnector.h create mode 100644 src/hook_wrappers.cpp create mode 100644 src/hook_wrappers.h create mode 100644 src/stat_statements_parser/README.MD create mode 100644 src/stat_statements_parser/pg_stat_statements_ya_parser.c create mode 100644 src/stat_statements_parser/pg_stat_statements_ya_parser.h create mode 100644 src/yagp_hooks_collector.c create mode 100644 yagp-hooks-collector.control diff --git a/.gitignore b/.gitignore index 5c21989c4ab..29b40ee096c 100644 --- a/.gitignore +++ b/.gitignore @@ -73,4 +73,9 @@ lib*.pc /compile_commands.json /tmp_install/ /.cache/ -/install/ \ No newline at end of file +/install/ +*.o +*.so +src/protos/ +.vscode +compile_commands.json diff --git a/Makefile b/Makefile index e9ab3fbf2d4..15c5dabb70e 100644 --- a/Makefile +++ b/Makefile @@ -3,14 +3,12 @@ # to build Postgres with a different make, we have this make file # that, as a service, will look for a GNU make and invoke it, or show # an error message if none could be found. - # If the user were using GNU make now, this file would not get used # because GNU make uses a make file named "GNUmakefile" in preference # to "Makefile" if it exists. PostgreSQL is shipped with a # "GNUmakefile". If the user hasn't run the configure script yet, the # GNUmakefile won't exist yet, so we catch that case as well. - # AIX make defaults to building *every* target of the first rule. Start with # a single-target, empty rule to make the other targets non-default. all: diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto new file mode 100644 index 00000000000..b7e255484c7 --- /dev/null +++ b/protos/yagpcc_metrics.proto @@ -0,0 +1,130 @@ +syntax = "proto3"; + +package yagpcc; +option java_outer_classname = "SegmentYAGPCCM"; +option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/common;greenplum"; + +enum QueryStatus { + QUERY_STATUS_UNSPECIFIED = 0; + QUERY_STATUS_SUBMIT = 1; + QUERY_STATUS_START = 2; + QUERY_STATUS_DONE = 3; + QUERY_STATUS_QUERY_DONE = 4; + QUERY_STATUS_ERROR = 5; + QUERY_STATUS_CANCELLING = 6; + QUERY_STATUS_CANCELED = 7; + QUERY_STATUS_END = 8; +} + +enum PlanNodeStatus { + PLAN_NODE_STATUS_UNSPECIFIED = 0; + PLAN_NODE_STATUS_INITIALIZED = 1; + PLAN_NODE_STATUS_EXECUTING = 2; + PLAN_NODE_STATUS_FINISHED = 3; +} + +message QueryInfo { + PlanGenerator generator = 1; + uint64 query_id = 2; + uint64 plan_id = 3; + string queryText = 4; + string planText = 5; + SessionInfo sessionInfo = 6; +} + +enum PlanGenerator +{ + PLAN_GENERATOR_UNSPECIFIED = 0; + PLAN_GENERATOR_PLANNER = 1; /* plan produced by the planner*/ + PLAN_GENERATOR_OPTIMIZER = 2; /* plan produced by the optimizer*/ +} + +message GPMetrics { + SystemStat systemStat = 1; + MetricInstrumentation instrumentation = 2; + SpillInfo spill = 3; +} + +message QueryInfoHeader { + int32 pid = 1; + GpId gpIdentity = 2; + + int32 tmid = 3; /* A time identifier for a particular query. All records associated with the query will have the same tmid. */ + int32 ssid = 4; /* The session id as shown by gp_session_id. All records associated with the query will have the same ssid */ + int32 ccnt = 5; /* The command number within this session as shown by gp_command_count. All records associated with the query will have the same ccnt */ + int32 sliceid = 6; /* slice identificator, 0 means general info for the whole query */ +} + +message GpId { + int32 dbid = 1; /* the dbid of this database */ + int32 segindex = 2; /* content indicator: -1 for entry database, + * 0, ..., n-1 for segment database * + * a primary and its mirror have the same segIndex */ + GpRole gp_role = 3; + GpRole gp_session_role = 4; +} + +enum GpRole +{ + GP_ROLE_UNSPECIFIED = 0; + GP_ROLE_UTILITY = 1; /* Operating as a simple database engine */ + GP_ROLE_DISPATCH = 2; /* Operating as the parallel query dispatcher */ + GP_ROLE_EXECUTE = 3; /* Operating as a parallel query executor */ + GP_ROLE_UNDEFINED = 4; /* Should never see this role in use */ +} + +message SessionInfo { + string sql = 1; + string userName = 2; + string databaseName = 3; + string resourceGroup = 4; + string applicationName = 5; +} + +message SystemStat { + /* CPU stat*/ + double runningTimeSeconds = 1; + double userTimeSeconds = 2; + double kernelTimeSeconds = 3; + + /* Memory stat */ + uint64 vsize = 4; + uint64 rss = 5; + uint64 VmSizeKb = 6; + uint64 VmPeakKb = 7; + + /* Storage stat */ + uint64 rchar = 8; + uint64 wchar = 9; + uint64 syscr = 10; + uint64 syscw = 11; + uint64 read_bytes = 12; + uint64 write_bytes = 13; + uint64 cancelled_write_bytes = 14; +} + +message MetricInstrumentation { + uint64 ntuples = 1; /* Total tuples produced */ + uint64 nloops = 2; /* # of run cycles for this node */ + uint64 tuplecount = 3; /* Tuples emitted so far this cycle */ + double firsttuple = 4; /* Time for first tuple of this cycle */ + double startup = 5; /* Total startup time (in seconds) */ + double total = 6; /* Total total time (in seconds) */ + uint64 shared_blks_hit = 7; /* shared blocks stats*/ + uint64 shared_blks_read = 8; + uint64 shared_blks_dirtied = 9; + uint64 shared_blks_written = 10; + uint64 local_blks_hit = 11; /* data read from disks */ + uint64 local_blks_read = 12; + uint64 local_blks_dirtied = 13; + uint64 local_blks_written = 14; + uint64 temp_blks_read = 15; /* temporary tables read stat */ + uint64 temp_blks_written = 16; + double blk_read_time = 17; /* measured read/write time */ + double blk_write_time = 18; +} + +message SpillInfo { + int32 fileCount = 1; + int64 totalBytes = 2; +} diff --git a/protos/yagpcc_plan.proto b/protos/yagpcc_plan.proto new file mode 100644 index 00000000000..962fab4bbdd --- /dev/null +++ b/protos/yagpcc_plan.proto @@ -0,0 +1,570 @@ +syntax = "proto3"; + +package yagpcc; +option java_outer_classname = "SegmentYAGPCCP"; +option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/common;greenplum"; + +message MetricPlan { + GpdbNodeType type = 1; + + int32 plan_node_id = 2; + int32 parent_plan_node_id = 3; // Valid only for QueryInfoMetricQuerySubmit + + double startup_cost = 4; /* cost expended before fetching any tuples */ + double total_cost = 5; /* total cost (assuming all tuples fetched) */ + double plan_rows = 6; /* number of rows plan is expected to emit */ + int32 plan_width = 7; /* average row width in bytes */ + + int32 arg1 = 8; // for some nodes it's additional opperand type + int32 arg2 = 9; // for some nodes it's additional opperand type + + MetricMotionInfo motion_info = 10; + MetricRelationInfo relation_info = 11; + + string scan_index_name = 12; + ScanDirection scan_direction = 13; + MetricSliceInfo slice_info = 14; + string statement = 15; +} + +message MetricMotionInfo { + MotionType type = 1; + bool isBroadcast = 2; + CdbLocusType locusType = 3; + + int32 sliceId = 4; + int32 parentSliceId = 5; +} + +message MetricRelationInfo { + int32 oid = 1; + string name = 2; + string schema = 3; + string alias = 4; + int32 dynamicScanId = 5; +} + +message MetricSliceInfo { + int32 slice = 1; + int32 segments = 2; + GangType gangType = 3; + int32 gang = 4; +} + +enum ScanDirection +{ + SCAN_DIRECTION_UNSPECIFIED = 0; + SCAN_DIRECTION_BACKWARD = 1; + SCAN_DIRECTION_FORWARD = 2; +} + +/* GangType enumeration is used in several structures related to CDB + * slice plan support. + */ +enum GangType +{ + GANG_TYPE_UNSPECIFIED = 0; + GANG_TYPE_UNALLOCATED = 1; /* a root slice executed by the qDisp */ + GANG_TYPE_ENTRYDB_READER = 2; /* a 1-gang with read access to the entry db */ + GANG_TYPE_SINGLETON_READER = 3; /* a 1-gang to read the segment dbs */ + GANG_TYPE_PRIMARY_READER = 4; /* a 1-gang or N-gang to read the segment dbs */ + GANG_TYPE_PRIMARY_WRITER = 5; /* the N-gang that can update the segment dbs */ +} + + +enum CdbLocusType +{ + CDB_LOCUS_TYPE_UNSPECIFIED = 0; + CDB_LOCUS_TYPE_ENTRY = 1; /* a single backend process on the entry db: + * usually the qDisp itself, but could be a + * qExec started by the entry postmaster. + */ + + CDB_LOCUS_TYPE_SINGLE_QE = 2; /* a single backend process on any db: the + * qDisp itself, or a qExec started by a + * segment postmaster or the entry postmaster. + */ + + CDB_LOCUS_TYPE_GENERAL = 3; /* compatible with any locus (data is + * self-contained in the query plan or + * generally available in any qExec or qDisp) */ + + CDB_LOCUS_TYPE_SEGMENT_GENERAL = 4; /* generally available in any qExec, but not + * available in qDisp */ + + CDB_LOCUS_TYPE_REPLICATED = 5; /* replicated over all qExecs of an N-gang */ + CDB_LOCUS_TYPE_HASHED = 6; /* hash partitioned over all qExecs of N-gang */ + CDB_LOCUS_TYPE_HASHED_OJ = 7; /* result of hash partitioned outer join, NULLs can be anywhere */ + CDB_LOCUS_TYPE_STREWN = 8; /* partitioned on no known function */ + CDB_LOCUS_TYPE_END = 9; /* = last valid CdbLocusType + 1 */ +} + +enum MotionType +{ + MOTION_TYPE_UNSPECIFIED = 0; + MOTION_TYPE_HASH = 1; // Use hashing to select a segindex destination + MOTION_TYPE_FIXED = 2; // Send tuples to a fixed set of segindexes + MOTION_TYPE_EXPLICIT = 3; // Send tuples to the segment explicitly specified in their segid column +} + +enum GpdbNodeType { + GPDB_NODE_TYPE_UNSPECIFIED = 0; + INDEX_INFO = 1; + EXPR_CONTEXT = 2; + PROJECTION_INFO = 3; + JUNK_FILTER = 4; + RESULT_REL_INFO = 5; + E_STATE = 6; + TUPLE_TABLE_SLOT = 7; + CDB_PROCESS = 8; + SLICE = 9; + SLICE_TABLE = 10; + CURSOR_POS_INFO = 11; + SHARE_NODE_ENTRY = 12; + PARTITION_STATE = 13; + QUERY_DISPATCH_DESC = 14; + OID_ASSIGNMENT = 15; + PLAN = 16; + SCAN = 17; + JOIN = 18; + RESULT = 19; + MODIFY_TABLE = 20; + APPEND = 21; + MERGE_APPEND = 22; + RECURSIVE_UNION = 23; + SEQUENCE = 24; + BITMAP_AND = 25; + BITMAP_OR = 26; + SEQ_SCAN = 27; + DYNAMIC_SEQ_SCAN = 28; + EXTERNAL_SCAN = 29; + INDEX_SCAN = 30; + DYNAMIC_INDEX_SCAN = 31; + INDEX_ONLY_SCAN = 32; + BITMAP_INDEX_SCAN = 33; + DYNAMIC_BITMAP_INDEX_SCAN = 34; + BITMAP_HEAP_SCAN = 35; + DYNAMIC_BITMAP_HEAP_SCAN = 36; + TID_SCAN = 37; + SUBQUERY_SCAN = 38; + FUNCTION_SCAN = 39; + TABLE_FUNCTION_SCAN = 40; + VALUES_SCAN = 41; + CTE_SCAN = 42; + WORK_TABLE_SCAN = 43; + FOREIGN_SCAN = 44; + NEST_LOOP = 45; + MERGE_JOIN = 46; + HASH_JOIN = 47; + MATERIAL = 48; + SORT = 49; + AGG = 50; + WINDOW_AGG = 51; + UNIQUE = 52; + HASH = 53; + SET_OP = 54; + LOCK_ROWS = 55; + LIMIT = 56; + MOTION = 57; + SHARE_INPUT_SCAN = 58; + REPEAT = 59; + DML = 60; + SPLIT_UPDATE = 61; + ROW_TRIGGER = 62; + ASSERT_OP = 63; + PARTITION_SELECTOR = 64; + PLAN_END = 65; + NEST_LOOP_PARAM = 66; + PLAN_ROW_MARK = 67; + PLAN_INVAL_ITEM = 68; + PLAN_STATE = 69; + SCAN_STATE = 70; + JOIN_STATE = 71; + RESULT_STATE = 72; + MODIFY_TABLE_STATE = 73; + APPEND_STATE = 74; + MERGE_APPEND_STATE = 75; + RECURSIVE_UNION_STATE = 76; + SEQUENCE_STATE = 77; + BITMAP_AND_STATE = 78; + BITMAP_OR_STATE = 79; + SEQ_SCAN_STATE = 80; + DYNAMIC_SEQ_SCAN_STATE = 81; + EXTERNAL_SCAN_STATE = 82; + INDEX_SCAN_STATE = 83; + DYNAMIC_INDEX_SCAN_STATE = 84; + INDEX_ONLY_SCAN_STATE = 85; + BITMAP_INDEX_SCAN_STATE = 86; + DYNAMIC_BITMAP_INDEX_SCAN_STATE = 87; + BITMAP_HEAP_SCAN_STATE = 88; + DYNAMIC_BITMAP_HEAP_SCAN_STATE = 89; + TID_SCAN_STATE = 90; + SUBQUERY_SCAN_STATE = 91; + FUNCTION_SCAN_STATE = 92; + TABLE_FUNCTION_STATE = 93; + VALUES_SCAN_STATE = 94; + CTE_SCAN_STATE = 95; + WORK_TABLE_SCAN_STATE = 96; + FOREIGN_SCAN_STATE = 97; + NEST_LOOP_STATE = 98; + MERGE_JOIN_STATE = 99; + HASH_JOIN_STATE = 100; + MATERIAL_STATE = 101; + SORT_STATE = 102; + AGG_STATE = 103; + WINDOW_AGG_STATE = 104; + UNIQUE_STATE = 105; + HASH_STATE = 106; + SET_OP_STATE = 107; + LOCK_ROWS_STATE = 108; + LIMIT_STATE = 109; + MOTION_STATE = 110; + SHARE_INPUT_SCAN_STATE = 111; + REPEAT_STATE = 112; + DML_STATE = 113; + SPLIT_UPDATE_STATE = 114; + ROW_TRIGGER_STATE = 115; + ASSERT_OP_STATE = 116; + PARTITION_SELECTOR_STATE = 117; + TUPLE_DESC_NODE = 118; + SERIALIZED_PARAM_EXTERN_DATA = 119; + ALIAS = 120; + RANGE_VAR = 121; + EXPR = 122; + VAR = 123; + CONST = 124; + PARAM = 125; + AGGREF = 126; + WINDOW_FUNC = 127; + ARRAY_REF = 128; + FUNC_EXPR = 129; + NAMED_ARG_EXPR = 130; + OP_EXPR = 131; + DISTINCT_EXPR = 132; + NULL_IF_EXPR = 133; + SCALAR_ARRAY_OP_EXPR = 134; + BOOL_EXPR = 135; + SUB_LINK = 136; + SUB_PLAN = 137; + ALTERNATIVE_SUB_PLAN = 138; + FIELD_SELECT = 139; + FIELD_STORE = 140; + RELABEL_TYPE = 141; + COERCE_VIA_IO = 142; + ARRAY_COERCE_EXPR = 143; + CONVERT_ROWTYPE_EXPR = 144; + COLLATE_EXPR = 145; + CASE_EXPR = 146; + CASE_WHEN = 147; + CASE_TEST_EXPR = 148; + ARRAY_EXPR = 149; + ROW_EXPR = 150; + ROW_COMPARE_EXPR = 151; + COALESCE_EXPR = 152; + MIN_MAX_EXPR = 153; + XML_EXPR = 154; + NULL_TEST = 155; + BOOLEAN_TEST = 156; + COERCE_TO_DOMAIN = 157; + COERCE_TO_DOMAIN_VALUES = 158; + SET_TO_DEFAULT = 159; + CURRENT_OF_EXPR = 160; + TARGET_ENTRY = 161; + RANGE_TBL_REF = 162; + JOIN_EXPR = 163; + FROM_EXPR = 164; + INTO_CLAUSE = 165; + COPY_INTO_CLAUSE = 166; + REFRESH_CLAUSE = 167; + FLOW = 168; + GROUPING = 169; + GROUP_ID = 170; + DISTRIBUTED_BY = 171; + DML_ACTION_EXPR = 172; + PART_SELECTED_EXPR = 173; + PART_DEFAULT_EXPR = 174; + PART_BOUND_EXPR = 175; + PART_BOUND_INCLUSION_EXPR = 176; + PART_BOUND_OPEN_EXPR = 177; + PART_LIST_RULE_EXPR = 178; + PART_LIST_NULL_TEST_EXPR = 179; + TABLE_OID_INFO = 180; + EXPR_STATE = 181; + GENERIC_EXPR_STATE = 182; + WHOLE_ROW_VAR_EXPR_STATE = 183; + AGGREF_EXPR_STATE = 184; + WINDOW_FUNC_EXPR_STATE = 185; + ARRAY_REF_EXPR_STATE = 186; + FUNC_EXPR_STATE = 187; + SCALAR_ARRAY_OP_EXPR_STATE = 188; + BOOL_EXPR_STATE = 189; + SUB_PLAN_STATE = 190; + ALTERNATIVE_SUB_PLAN_STATE = 191; + FIELD_SELECT_STATE = 192; + FIELD_STORE_STATE = 193; + COERCE_VIA_IO_STATE = 194; + ARRAY_COERCE_EXPR_STATE = 195; + CONVERT_ROWTYPE_EXPR_STATE = 196; + CASE_EXPR_STATE = 197; + CASE_WHEN_STATE = 198; + ARRAY_EXPR_STATE = 199; + ROW_EXPR_STATE = 200; + ROW_COMPARE_EXPR_STATE = 201; + COALESCE_EXPR_STATE = 202; + MIN_MAX_EXPR_STATE = 203; + XML_EXPR_STATE = 204; + NULL_TEST_STATE = 205; + COERCE_TO_DOMAIN_STATE = 206; + DOMAIN_CONSTRAINT_STATE = 207; + GROUPING_FUNC_EXPR_STATE = 208; + PART_SELECTED_EXPR_STATE = 209; + PART_DEFAULT_EXPR_STATE = 210; + PART_BOUND_EXPR_STATE = 211; + PART_BOUND_INCLUSION_EXPR_STATE = 212; + PART_BOUND_OPEN_EXPR_STATE = 213; + PART_LIST_RULE_EXPR_STATE = 214; + PART_LIST_NULL_TEST_EXPR_STATE = 215; + PLANNER_INFO = 216; + PLANNER_GLOBAL = 217; + REL_OPT_INFO = 218; + INDEX_OPT_INFO = 219; + PARAM_PATH_INFO = 220; + PATH = 221; + APPEND_ONLY_PATH = 222; + AOCS_PATH = 223; + EXTERNAL_PATH = 224; + INDEX_PATH = 225; + BITMAP_HEAP_PATH = 226; + BITMAP_AND_PATH = 227; + BITMAP_OR_PATH = 228; + NEST_PATH = 229; + MERGE_PATH = 230; + HASH_PATH = 231; + TID_PATH = 232; + FOREIGN_PATH = 233; + APPEND_PATH = 234; + MERGE_APPEND_PATH = 235; + RESULT_PATH = 236; + MATERIAL_PATH = 237; + UNIQUE_PATH = 238; + PROJECTION_PATH = 239; + EQUIVALENCE_CLASS = 240; + EQUIVALENCE_MEMBER = 241; + PATH_KEY = 242; + RESTRICT_INFO = 243; + PLACE_HOLDER_VAR = 244; + SPECIAL_JOIN_INFO = 245; + LATERAL_JOIN_INFO = 246; + APPEND_REL_INFO = 247; + PLACE_HOLDER_INFO = 248; + MIN_MAX_AGG_INFO = 249; + PARTITION = 250; + PARTITION_RULE = 251; + PARTITION_NODE = 252; + PG_PART_RULE = 253; + SEGFILE_MAP_NODE = 254; + PLANNER_PARAM_ITEM = 255; + CDB_MOTION_PATH = 256; + PARTITION_SELECTOR_PATH = 257; + CDB_REL_COLUMN_INFO = 258; + DISTRIBUTION_KEY = 259; + MEMORY_CONTEXT = 260; + ALLOC_SET_CONTEXT = 261; + MEMORY_ACCOUNT = 262; + VALUE = 263; + INTEGER = 264; + FLOAT = 265; + STRING = 266; + BIT_STRING = 267; + NULL_VALUE = 268; + LIST = 269; + INT_LIST = 270; + OID_LIST = 271; + QUERY = 272; + PLANNED_STMT = 273; + INSERT_STMT = 274; + DELETE_STMT = 275; + UPDATE_STMT = 276; + SELECT_STMT = 277; + ALTER_TABLE_STMT = 278; + ALTER_TABLE_CMD = 279; + ALTER_DOMAIN_STMT = 280; + SET_OPERATION_STMT = 281; + GRANT_STMT = 282; + GRANT_ROLE_STMT = 283; + ALTER_DEFAULT_PRIVILEGES_STMT = 284; + CLOSE_PORTAL_STMT = 285; + CLUSTER_STMT = 286; + COPY_STMT = 287; + CREATE_STMT = 288; + SINGLE_ROW_ERROR_DESC = 289; + EXT_TABLE_TYPE_DESC = 290; + CREATE_EXTERNAL_STMT = 291; + DEFINE_STMT = 292; + DROP_STMT = 293; + TRUNCATE_STMT = 294; + COMMENT_STMT = 295; + FETCH_STMT = 296; + INDEX_STMT = 297; + CREATE_FUNCTION_STMT = 298; + ALTER_FUNCTION_STMT = 299; + DO_STMT = 300; + RENAME_STMT = 301; + RULE_STMT = 302; + NOTIFY_STMT = 303; + LISTEN_STMT = 304; + UNLISTEN_STMT = 305; + TRANSACTION_STMT = 306; + VIEW_STMT = 307; + LOAD_STMT = 308; + CREATE_DOMAIN_STMT = 309; + CREATEDB_STMT = 310; + DROPDB_STMT = 311; + VACUUM_STMT = 312; + EXPLAIN_STMT = 313; + CREATE_TABLE_AS_STMT = 314; + CREATE_SEQ_STMT = 315; + ALTER_SEQ_STMT = 316; + VARIABLE_SET_STMT = 317; + VARIABLE_SHOW_STMT = 318; + DISCARD_STMT = 319; + CREATE_TRIG_STMT = 320; + CREATE_P_LANG_STMT = 321; + CREATE_ROLE_STMT = 322; + ALTER_ROLE_STMT = 323; + DROP_ROLE_STMT = 324; + CREATE_QUEUE_STMT = 325; + ALTER_QUEUE_STMT = 326; + DROP_QUEUE_STMT = 327; + CREATE_RESOURCE_GROUP_STMT = 328; + DROP_RESOURCE_GROUP_STMT = 329; + ALTER_RESOURCE_GROUP_STMT = 330; + LOCK_STMT = 331; + CONSTRAINTS_SET_STMT = 332; + REINDEX_STMT = 333; + CHECK_POINT_STMT = 334; + CREATE_SCHEMA_STMT = 335; + ALTER_DATABASE_STMT = 336; + ALTER_DATABASE_SET_STMT = 337; + ALTER_ROLE_SET_STMT = 338; + CREATE_CONVERSION_STMT = 339; + CREATE_CAST_STMT = 340; + CREATE_OP_CLASS_STMT = 341; + CREATE_OP_FAMILY_STMT = 342; + ALTER_OP_FAMILY_STMT = 343; + PREPARE_STMT = 344; + EXECUTE_STMT = 345; + DEALLOCATE_STMT = 346; + DECLARE_CURSOR_STMT = 347; + CREATE_TABLE_SPACE_STMT = 348; + DROP_TABLE_SPACE_STMT = 349; + ALTER_OBJECT_SCHEMA_STMT = 350; + ALTER_OWNER_STMT = 351; + DROP_OWNED_STMT = 352; + REASSIGN_OWNED_STMT = 353; + COMPOSITE_TYPE_STMT = 354; + CREATE_ENUM_STMT = 355; + CREATE_RANGE_STMT = 356; + ALTER_ENUM_STMT = 357; + ALTER_TS_DICTIONARY_STMT = 358; + ALTER_TS_CONFIGURATION_STMT = 359; + CREATE_FDW_STMT = 360; + ALTER_FDW_STMT = 361; + CREATE_FOREIGN_SERVER_STMT = 362; + ALTER_FOREIGN_SERVER_STMT = 363; + CREATE_USER_MAPPING_STMT = 364; + ALTER_USER_MAPPING_STMT = 365; + DROP_USER_MAPPING_STMT = 366; + ALTER_TABLE_SPACE_OPTIONS_STMT = 367; + ALTER_TABLE_MOVE_ALL_STMT = 368; + SEC_LABEL_STMT = 369; + CREATE_FOREIGN_TABLE_STMT = 370; + CREATE_EXTENSION_STMT = 371; + ALTER_EXTENSION_STMT = 372; + ALTER_EXTENSION_CONTENTS_STMT = 373; + CREATE_EVENT_TRIG_STMT = 374; + ALTER_EVENT_TRIG_STMT = 375; + REFRESH_MAT_VIEW_STMT = 376; + REPLICA_IDENTITY_STMT = 377; + ALTER_SYSTEM_STMT = 378; + PARTITION_BY = 379; + PARTITION_ELEM = 380; + PARTITION_RANGE_ITEM = 381; + PARTITION_BOUND_SPEC = 382; + PARTITION_SPEC = 383; + PARTITION_VALUES_SPEC = 384; + ALTER_PARTITION_ID = 385; + ALTER_PARTITION_CMD = 386; + INHERIT_PARTITION_CMD = 387; + CREATE_FILE_SPACE_STMT = 388; + FILE_SPACE_ENTRY = 389; + DROP_FILE_SPACE_STMT = 390; + TABLE_VALUE_EXPR = 391; + DENY_LOGIN_INTERVAL = 392; + DENY_LOGIN_POINT = 393; + ALTER_TYPE_STMT = 394; + SET_DISTRIBUTION_CMD = 395; + EXPAND_STMT_SPEC = 396; + A_EXPR = 397; + COLUMN_REF = 398; + PARAM_REF = 399; + A_CONST = 400; + FUNC_CALL = 401; + A_STAR = 402; + A_INDICES = 403; + A_INDIRECTION = 404; + A_ARRAY_EXPR = 405; + RES_TARGET = 406; + TYPE_CAST = 407; + COLLATE_CLAUSE = 408; + SORT_BY = 409; + WINDOW_DEF = 410; + RANGE_SUBSELECT = 411; + RANGE_FUNCTION = 412; + TYPE_NAME = 413; + COLUMN_DEF = 414; + INDEX_ELEM = 415; + CONSTRAINT = 416; + DEF_ELEM = 417; + RANGE_TBL_ENTRY = 418; + RANGE_TBL_FUNCTION = 419; + WITH_CHECK_OPTION = 420; + GROUPING_CLAUSE = 421; + GROUPING_FUNC = 422; + SORT_GROUP_CLAUSE = 423; + WINDOW_CLAUSE = 424; + PRIV_GRANTEE = 425; + FUNC_WITH_ARGS = 426; + ACCESS_PRIV = 427; + CREATE_OP_CLASS_ITEM = 428; + TABLE_LIKE_CLAUSE = 429; + FUNCTION_PARAMETER = 430; + LOCKING_CLAUSE = 431; + ROW_MARK_CLAUSE = 432; + XML_SERIALIZE = 433; + WITH_CLAUSE = 434; + COMMON_TABLE_EXPR = 435; + COLUMN_REFERENCE_STORAGE_DIRECTIVE = 436; + IDENTIFY_SYSTEM_CMD = 437; + BASE_BACKUP_CMD = 438; + CREATE_REPLICATION_SLOT_CMD = 439; + DROP_REPLICATION_SLOT_CMD = 440; + START_REPLICATION_CMD = 441; + TIME_LINE_HISTORY_CMD = 442; + TRIGGER_DATA = 443; + EVENT_TRIGGER_DATA = 444; + RETURN_SET_INFO = 445; + WINDOW_OBJECT_DATA = 446; + TID_BITMAP = 447; + INLINE_CODE_BLOCK = 448; + FDW_ROUTINE = 449; + STREAM_BITMAP = 450; + FORMATTER_DATA = 451; + EXT_PROTOCOL_DATA = 452; + EXT_PROTOCOL_VALIDATOR_DATA = 453; + SELECTED_PARTS = 454; + COOKED_CONSTRAINT = 455; + CDB_EXPLAIN_STAT_HDR = 456; + GP_POLICY = 457; + RETRIEVE_STMT = 458; +} diff --git a/protos/yagpcc_set_service.proto b/protos/yagpcc_set_service.proto new file mode 100644 index 00000000000..0bef72891ee --- /dev/null +++ b/protos/yagpcc_set_service.proto @@ -0,0 +1,45 @@ +syntax = "proto3"; + +import "google/protobuf/timestamp.proto"; + +import "protos/yagpcc_metrics.proto"; +import "protos/yagpcc_plan.proto"; + +package yagpcc; +option java_outer_classname = "SegmentYAGPCCAS"; +option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/agent_segment;greenplum"; + +service SetQueryInfo { + rpc SetMetricPlanNode (SetPlanNodeReq) returns (MetricResponse) {} + + rpc SetMetricQuery (SetQueryReq) returns (MetricResponse) {} +} + +message MetricResponse { + MetricResponseStatusCode error_code = 1; + string error_text = 2; +} + +enum MetricResponseStatusCode { + METRIC_RESPONSE_STATUS_CODE_UNSPECIFIED = 0; + METRIC_RESPONSE_STATUS_CODE_SUCCESS = 1; + METRIC_RESPONSE_STATUS_CODE_ERROR = 2; +} + +message SetQueryReq { + QueryStatus query_status = 1; + google.protobuf.Timestamp datetime = 2; + + QueryInfoHeader header = 3; + QueryInfo query_info = 4; + GPMetrics query_metrics = 5; + repeated MetricPlan plan_tree = 6; +} + +message SetPlanNodeReq { + PlanNodeStatus node_status = 1; + google.protobuf.Timestamp datetime = 2; + QueryInfoHeader header = 3; + GPMetrics node_metrics = 4; + MetricPlan plan_node = 5; +} diff --git a/sql/yagp-hooks-collector--1.0.sql b/sql/yagp-hooks-collector--1.0.sql new file mode 100644 index 00000000000..f9ab15fb400 --- /dev/null +++ b/sql/yagp-hooks-collector--1.0.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use '''CREATE EXTENSION "yagp-hooks-collector"''' to load this file. \quit diff --git a/sql/yagp-hooks-collector--unpackaged--1.0.sql b/sql/yagp-hooks-collector--unpackaged--1.0.sql new file mode 100644 index 00000000000..0441c97bd84 --- /dev/null +++ b/sql/yagp-hooks-collector--unpackaged--1.0.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use '''CREATE EXTENSION "uuid-cb" FROM unpackaged''' to load this file. \quit diff --git a/src/EventSender.cpp b/src/EventSender.cpp new file mode 100644 index 00000000000..bb4765adeb1 --- /dev/null +++ b/src/EventSender.cpp @@ -0,0 +1,189 @@ +#include "EventSender.h" +#include "GrpcConnector.h" +#include "protos/yagpcc_set_service.pb.h" +#include + +extern "C" +{ +#include "postgres.h" +#include "utils/metrics_utils.h" +#include "utils/elog.h" +#include "executor/executor.h" +#include "commands/explain.h" +#include "commands/dbcommands.h" +#include "commands/resgroupcmds.h" + +#include "cdb/cdbvars.h" +#include "cdb/cdbexplain.h" + +#include "tcop/utility.h" +#include "pg_stat_statements_ya_parser.h" +} + +namespace +{ +std::string* get_user_name() +{ + const char *username = GetConfigOption("session_authorization", false, false); + return username ? new std::string(username) : nullptr; +} + +std::string* get_db_name() +{ + char *dbname = get_database_name(MyDatabaseId); + std::string* result = dbname ? new std::string(dbname) : nullptr; + pfree(dbname); + return result; +} + +std::string* get_rg_name() +{ + auto userId = GetUserId(); + if (!OidIsValid(userId)) + return nullptr; + auto groupId = GetResGroupIdForRole(userId); + if (!OidIsValid(groupId)) + return nullptr; + char *rgname = GetResGroupNameForId(groupId); + if (rgname == nullptr) + return nullptr; + pfree(rgname); + return new std::string(rgname); +} + +std::string* get_app_name() +{ + return application_name ? new std::string(application_name) : nullptr; +} + +int get_cur_slice_id(QueryDesc *desc) +{ + if (!desc->estate) + { + return 0; + } + return LocallyExecutingSliceIndex(desc->estate); +} + +google::protobuf::Timestamp current_ts() +{ + google::protobuf::Timestamp current_ts; + struct timeval tv; + gettimeofday(&tv, nullptr); + current_ts.set_seconds(tv.tv_sec); + current_ts.set_nanos(static_cast(tv.tv_usec * 1000)); + return current_ts; +} + +void set_header(yagpcc::QueryInfoHeader *header, QueryDesc *queryDesc) +{ + header->set_pid(MyProcPid); + auto gpId = header->mutable_gpidentity(); + gpId->set_dbid(GpIdentity.dbid); + gpId->set_segindex(GpIdentity.segindex); + gpId->set_gp_role(static_cast(Gp_role)); + gpId->set_gp_session_role(static_cast(Gp_session_role)); + header->set_ssid(gp_session_id); + header->set_ccnt(gp_command_count); + header->set_sliceid(get_cur_slice_id(queryDesc)); + int32 tmid = 0; + gpmon_gettmid(&tmid); + header->set_tmid(tmid); +} + +void set_session_info(yagpcc::SessionInfo *si, QueryDesc *queryDesc) +{ + if (queryDesc->sourceText) + *si->mutable_sql() = std::string(queryDesc->sourceText); + si->set_allocated_applicationname(get_app_name()); + si->set_allocated_databasename(get_db_name()); + si->set_allocated_resourcegroup(get_rg_name()); + si->set_allocated_username(get_user_name()); +} + +ExplainState get_explain_state(QueryDesc *queryDesc, bool costs) +{ + ExplainState es; + ExplainInitState(&es); + es.costs = costs; + es.verbose = true; + es.format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(&es); + ExplainPrintPlan(&es, queryDesc); + ExplainEndOutput(&es); + return es; +} + +void set_plan_text(std::string *plan_text, QueryDesc *queryDesc) +{ + auto es = get_explain_state(queryDesc, true); + *plan_text = std::string(es.str->data, es.str->len); +} + +void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *queryDesc) +{ + set_session_info(qi->mutable_sessioninfo(), queryDesc); + if (queryDesc->sourceText) + *qi->mutable_querytext() = queryDesc->sourceText; + if (queryDesc->plannedstmt) + { + qi->set_generator(queryDesc->plannedstmt->planGen == PLANGEN_OPTIMIZER + ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER + : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); + set_plan_text(qi->mutable_plantext(), queryDesc); + qi->set_plan_id(get_plan_id(queryDesc)); + qi->set_query_id(queryDesc->plannedstmt->queryId); + } +} +} // namespace + +void EventSender::ExecutorStart(QueryDesc *queryDesc, int /* eflags*/) +{ + elog(DEBUG1, "Query %s start recording", queryDesc->sourceText); + yagpcc::SetQueryReq req; + req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); + *req.mutable_datetime() = current_ts(); + set_header(req.mutable_header(), queryDesc); + set_query_info(req.mutable_query_info(), queryDesc); + auto result = connector->set_metric_query(req); + if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) + { + elog(WARNING, "Query %s start reporting failed with an error %s", + queryDesc->sourceText, result.error_text().c_str()); + } + else + { + elog(DEBUG1, "Query %s start successful", queryDesc->sourceText); + } +} + +void EventSender::ExecutorFinish(QueryDesc *queryDesc) +{ + elog(DEBUG1, "Query %s finish recording", queryDesc->sourceText); + yagpcc::SetQueryReq req; + req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); + *req.mutable_datetime() = current_ts(); + set_header(req.mutable_header(), queryDesc); + set_query_info(req.mutable_query_info(), queryDesc); + auto result = connector->set_metric_query(req); + if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) + { + elog(WARNING, "Query %s finish reporting failed with an error %s", + queryDesc->sourceText, result.error_text().c_str()); + } + else + { + elog(DEBUG1, "Query %s finish successful", queryDesc->sourceText); + } +} + +EventSender *EventSender::instance() +{ + static EventSender sender; + return &sender; +} + +EventSender::EventSender() +{ + connector = std::make_unique(); +} \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h new file mode 100644 index 00000000000..70868f6c757 --- /dev/null +++ b/src/EventSender.h @@ -0,0 +1,19 @@ +#pragma once + +#include + +class GrpcConnector; + +struct QueryDesc; + +class EventSender +{ +public: + void ExecutorStart(QueryDesc *queryDesc, int eflags); + void ExecutorFinish(QueryDesc *queryDesc); + static EventSender *instance(); + +private: + EventSender(); + std::unique_ptr connector; +}; \ No newline at end of file diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp new file mode 100644 index 00000000000..7329f392010 --- /dev/null +++ b/src/GrpcConnector.cpp @@ -0,0 +1,55 @@ +#include "GrpcConnector.h" +#include "yagpcc_set_service.grpc.pb.h" + +#include +#include +#include + +class GrpcConnector::Impl +{ +public: + Impl() + { + GOOGLE_PROTOBUF_VERIFY_VERSION; + this->stub = yagpcc::SetQueryInfo::NewStub(grpc::CreateChannel( + SOCKET_FILE, grpc::InsecureChannelCredentials())); + } + + yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) + { + yagpcc::MetricResponse response; + grpc::ClientContext context; + auto deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(50); + context.set_deadline(deadline); + + grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); + + if (!status.ok()) + { + response.set_error_text("Connection lost: " + status.error_message() + "; " + status.error_details()); + response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); + } + + return response; + } + +private: + const std::string SOCKET_FILE = "unix:///tmp/yagpcc_agent.sock"; + const std::string TCP_ADDRESS = "127.0.0.1:1432"; + std::unique_ptr stub; +}; + +GrpcConnector::GrpcConnector() +{ + impl = new Impl(); +} + +GrpcConnector::~GrpcConnector() +{ + delete impl; +} + +yagpcc::MetricResponse GrpcConnector::set_metric_query(yagpcc::SetQueryReq req) +{ + return impl->set_metric_query(req); +} \ No newline at end of file diff --git a/src/GrpcConnector.h b/src/GrpcConnector.h new file mode 100644 index 00000000000..dc0f21706a3 --- /dev/null +++ b/src/GrpcConnector.h @@ -0,0 +1,15 @@ +#pragma once + +#include "yagpcc_set_service.pb.h" + +class GrpcConnector +{ +public: + GrpcConnector(); + ~GrpcConnector(); + yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req); + +private: + class Impl; + Impl *impl; +}; \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp new file mode 100644 index 00000000000..9f3200c006f --- /dev/null +++ b/src/hook_wrappers.cpp @@ -0,0 +1,67 @@ +#include "hook_wrappers.h" +#include "EventSender.h" + +extern "C" +{ +#include "postgres.h" +#include "utils/metrics_utils.h" +#include "utils/elog.h" +#include "executor/executor.h" + +#include "cdb/cdbvars.h" +#include "cdb/cdbexplain.h" + +#include "tcop/utility.h" +} + +#include "stat_statements_parser/pg_stat_statements_ya_parser.h" + +static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr; +static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; + +static void ya_ExecutorStart_hook(QueryDesc *queryDesc, int eflags); +static void ya_ExecutorFinish_hook(QueryDesc *queryDesc); + +#define REPLACE_HOOK(hookName) \ + previous_##hookName = hookName; \ + hookName = ya_##hookName; + +void hooks_init() +{ + REPLACE_HOOK(ExecutorStart_hook); + REPLACE_HOOK(ExecutorFinish_hook); + stat_statements_parser_init(); +} + +void hooks_deinit() +{ + ExecutorStart_hook = previous_ExecutorStart_hook; + ExecutorFinish_hook = ExecutorFinish_hook; + stat_statements_parser_deinit(); +} + +#define CREATE_HOOK_WRAPPER(hookName, ...) \ + PG_TRY(); \ + { \ + EventSender::instance()->hookName(__VA_ARGS__); \ + } \ + PG_CATCH(); \ + { \ + ereport(WARNING, (errmsg("EventSender failed in %s", #hookName))); \ + PG_RE_THROW(); \ + } \ + PG_END_TRY(); \ + if (previous_##hookName##_hook) \ + (*previous_##hookName##_hook)(__VA_ARGS__); \ + else \ + standard_##hookName(__VA_ARGS__); + +void ya_ExecutorStart_hook(QueryDesc *queryDesc, int eflags) +{ + CREATE_HOOK_WRAPPER(ExecutorStart, queryDesc, eflags); +} + +void ya_ExecutorFinish_hook(QueryDesc *queryDesc) +{ + CREATE_HOOK_WRAPPER(ExecutorFinish, queryDesc); +} \ No newline at end of file diff --git a/src/hook_wrappers.h b/src/hook_wrappers.h new file mode 100644 index 00000000000..815fcb7cd51 --- /dev/null +++ b/src/hook_wrappers.h @@ -0,0 +1,12 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +extern void hooks_init(); +extern void hooks_deinit(); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/src/stat_statements_parser/README.MD b/src/stat_statements_parser/README.MD new file mode 100644 index 00000000000..291e31a3099 --- /dev/null +++ b/src/stat_statements_parser/README.MD @@ -0,0 +1 @@ +This directory contains a slightly modified subset of pg_stat_statements for PG v9.4 to be used in query and plan ID generation. diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/src/stat_statements_parser/pg_stat_statements_ya_parser.c new file mode 100644 index 00000000000..f14742337bd --- /dev/null +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -0,0 +1,771 @@ +#include "postgres.h" + +#include +#include + +#include "access/hash.h" +#include "executor/instrument.h" +#include "executor/execdesc.h" +#include "funcapi.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "parser/analyze.h" +#include "parser/parsetree.h" +#include "parser/scanner.h" +#include "parser/gram.h" +#include "pgstat.h" +#include "storage/fd.h" +#include "storage/ipc.h" +#include "storage/spin.h" +#include "tcop/utility.h" +#include "utils/builtins.h" +#include "utils/memutils.h" + +#include "pg_stat_statements_ya_parser.h" + +static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL; + +#define JUMBLE_SIZE 1024 /* query serialization buffer size */ + +/* + * Struct for tracking locations/lengths of constants during normalization + */ +typedef struct pgssLocationLen +{ + int location; /* start offset in query text */ + int length; /* length in bytes, or -1 to ignore */ +} pgssLocationLen; + +/* + * Working state for computing a query jumble and producing a normalized + * query string + */ +typedef struct pgssJumbleState +{ + /* Jumble of current query tree */ + unsigned char *jumble; + + /* Number of bytes used in jumble[] */ + Size jumble_len; + + /* Array of locations of constants that should be removed */ + pgssLocationLen *clocations; + + /* Allocated length of clocations array */ + int clocations_buf_size; + + /* Current number of valid entries in clocations array */ + int clocations_count; + + /* highest Param id we've seen, in order to start normalization correctly */ + int highest_extern_param_id; +} pgssJumbleState; + +static void AppendJumble(pgssJumbleState *jstate, + const unsigned char *item, Size size); +static void JumbleQuery(pgssJumbleState *jstate, Query *query); +static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable); +static void JumbleExpr(pgssJumbleState *jstate, Node *node); +static void RecordConstLocation(pgssJumbleState *jstate, int location); + +static StringInfo gen_normplan(const char *execution_plan); + +static bool need_replace(int token); + +void pgss_post_parse_analyze(ParseState *pstate, Query *query); + +void stat_statements_parser_init() +{ + prev_post_parse_analyze_hook = post_parse_analyze_hook; + post_parse_analyze_hook = pgss_post_parse_analyze; +} + +void stat_statements_parser_deinit() +{ + post_parse_analyze_hook = prev_post_parse_analyze_hook; +} + +/* + * AppendJumble: Append a value that is substantive in a given query to + * the current jumble. + */ +static void +AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size) +{ + unsigned char *jumble = jstate->jumble; + Size jumble_len = jstate->jumble_len; + + /* + * Whenever the jumble buffer is full, we hash the current contents and + * reset the buffer to contain just that hash value, thus relying on the + * hash to summarize everything so far. + */ + while (size > 0) + { + Size part_size; + + if (jumble_len >= JUMBLE_SIZE) + { + uint32 start_hash = hash_any(jumble, JUMBLE_SIZE); + + memcpy(jumble, &start_hash, sizeof(start_hash)); + jumble_len = sizeof(start_hash); + } + part_size = Min(size, JUMBLE_SIZE - jumble_len); + memcpy(jumble + jumble_len, item, part_size); + jumble_len += part_size; + item += part_size; + size -= part_size; + } + jstate->jumble_len = jumble_len; +} + +/* + * Wrappers around AppendJumble to encapsulate details of serialization + * of individual local variable elements. + */ +#define APP_JUMB(item) \ + AppendJumble(jstate, (const unsigned char *)&(item), sizeof(item)) +#define APP_JUMB_STRING(str) \ + AppendJumble(jstate, (const unsigned char *)(str), strlen(str) + 1) + +/* + * JumbleQuery: Selectively serialize the query tree, appending significant + * data to the "query jumble" while ignoring nonsignificant data. + * + * Rule of thumb for what to include is that we should ignore anything not + * semantically significant (such as alias names) as well as anything that can + * be deduced from child nodes (else we'd just be double-hashing that piece + * of information). + */ +void JumbleQuery(pgssJumbleState *jstate, Query *query) +{ + Assert(IsA(query, Query)); + Assert(query->utilityStmt == NULL); + + APP_JUMB(query->commandType); + /* resultRelation is usually predictable from commandType */ + JumbleExpr(jstate, (Node *)query->cteList); + JumbleRangeTable(jstate, query->rtable); + JumbleExpr(jstate, (Node *)query->jointree); + JumbleExpr(jstate, (Node *)query->targetList); + JumbleExpr(jstate, (Node *)query->returningList); + JumbleExpr(jstate, (Node *)query->groupClause); + JumbleExpr(jstate, query->havingQual); + JumbleExpr(jstate, (Node *)query->windowClause); + JumbleExpr(jstate, (Node *)query->distinctClause); + JumbleExpr(jstate, (Node *)query->sortClause); + JumbleExpr(jstate, query->limitOffset); + JumbleExpr(jstate, query->limitCount); + /* we ignore rowMarks */ + JumbleExpr(jstate, query->setOperations); +} + +/* + * Jumble a range table + */ +static void +JumbleRangeTable(pgssJumbleState *jstate, List *rtable) +{ + ListCell *lc; + + foreach (lc, rtable) + { + RangeTblEntry *rte = (RangeTblEntry *)lfirst(lc); + + Assert(IsA(rte, RangeTblEntry)); + APP_JUMB(rte->rtekind); + switch (rte->rtekind) + { + case RTE_RELATION: + APP_JUMB(rte->relid); + break; + case RTE_SUBQUERY: + JumbleQuery(jstate, rte->subquery); + break; + case RTE_JOIN: + APP_JUMB(rte->jointype); + break; + case RTE_FUNCTION: + JumbleExpr(jstate, (Node *)rte->functions); + break; + case RTE_VALUES: + JumbleExpr(jstate, (Node *)rte->values_lists); + break; + case RTE_CTE: + + /* + * Depending on the CTE name here isn't ideal, but it's the + * only info we have to identify the referenced WITH item. + */ + APP_JUMB_STRING(rte->ctename); + APP_JUMB(rte->ctelevelsup); + break; + default: + elog(ERROR, "unrecognized RTE kind: %d", (int)rte->rtekind); + break; + } + } +} + +/* + * Jumble an expression tree + * + * In general this function should handle all the same node types that + * expression_tree_walker() does, and therefore it's coded to be as parallel + * to that function as possible. However, since we are only invoked on + * queries immediately post-parse-analysis, we need not handle node types + * that only appear in planning. + * + * Note: the reason we don't simply use expression_tree_walker() is that the + * point of that function is to support tree walkers that don't care about + * most tree node types, but here we care about all types. We should complain + * about any unrecognized node type. + */ +static void +JumbleExpr(pgssJumbleState *jstate, Node *node) +{ + ListCell *temp; + + if (node == NULL) + return; + + /* Guard against stack overflow due to overly complex expressions */ + check_stack_depth(); + + /* + * We always emit the node's NodeTag, then any additional fields that are + * considered significant, and then we recurse to any child nodes. + */ + APP_JUMB(node->type); + + switch (nodeTag(node)) + { + case T_Var: + { + Var *var = (Var *)node; + + APP_JUMB(var->varno); + APP_JUMB(var->varattno); + APP_JUMB(var->varlevelsup); + } + break; + case T_Const: + { + Const *c = (Const *)node; + + /* We jumble only the constant's type, not its value */ + APP_JUMB(c->consttype); + /* Also, record its parse location for query normalization */ + RecordConstLocation(jstate, c->location); + } + break; + case T_Param: + { + Param *p = (Param *)node; + + APP_JUMB(p->paramkind); + APP_JUMB(p->paramid); + APP_JUMB(p->paramtype); + } + break; + case T_Aggref: + { + Aggref *expr = (Aggref *)node; + + APP_JUMB(expr->aggfnoid); + JumbleExpr(jstate, (Node *)expr->aggdirectargs); + JumbleExpr(jstate, (Node *)expr->args); + JumbleExpr(jstate, (Node *)expr->aggorder); + JumbleExpr(jstate, (Node *)expr->aggdistinct); + JumbleExpr(jstate, (Node *)expr->aggfilter); + } + break; + case T_WindowFunc: + { + WindowFunc *expr = (WindowFunc *)node; + + APP_JUMB(expr->winfnoid); + APP_JUMB(expr->winref); + JumbleExpr(jstate, (Node *)expr->args); + JumbleExpr(jstate, (Node *)expr->aggfilter); + } + break; + case T_ArrayRef: + { + ArrayRef *aref = (ArrayRef *)node; + + JumbleExpr(jstate, (Node *)aref->refupperindexpr); + JumbleExpr(jstate, (Node *)aref->reflowerindexpr); + JumbleExpr(jstate, (Node *)aref->refexpr); + JumbleExpr(jstate, (Node *)aref->refassgnexpr); + } + break; + case T_FuncExpr: + { + FuncExpr *expr = (FuncExpr *)node; + + APP_JUMB(expr->funcid); + JumbleExpr(jstate, (Node *)expr->args); + } + break; + case T_NamedArgExpr: + { + NamedArgExpr *nae = (NamedArgExpr *)node; + + APP_JUMB(nae->argnumber); + JumbleExpr(jstate, (Node *)nae->arg); + } + break; + case T_OpExpr: + case T_DistinctExpr: /* struct-equivalent to OpExpr */ + case T_NullIfExpr: /* struct-equivalent to OpExpr */ + { + OpExpr *expr = (OpExpr *)node; + + APP_JUMB(expr->opno); + JumbleExpr(jstate, (Node *)expr->args); + } + break; + case T_ScalarArrayOpExpr: + { + ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *)node; + + APP_JUMB(expr->opno); + APP_JUMB(expr->useOr); + JumbleExpr(jstate, (Node *)expr->args); + } + break; + case T_BoolExpr: + { + BoolExpr *expr = (BoolExpr *)node; + + APP_JUMB(expr->boolop); + JumbleExpr(jstate, (Node *)expr->args); + } + break; + case T_SubLink: + { + SubLink *sublink = (SubLink *)node; + + APP_JUMB(sublink->subLinkType); + JumbleExpr(jstate, (Node *)sublink->testexpr); + JumbleQuery(jstate, (Query *)sublink->subselect); + } + break; + case T_FieldSelect: + { + FieldSelect *fs = (FieldSelect *)node; + + APP_JUMB(fs->fieldnum); + JumbleExpr(jstate, (Node *)fs->arg); + } + break; + case T_FieldStore: + { + FieldStore *fstore = (FieldStore *)node; + + JumbleExpr(jstate, (Node *)fstore->arg); + JumbleExpr(jstate, (Node *)fstore->newvals); + } + break; + case T_RelabelType: + { + RelabelType *rt = (RelabelType *)node; + + APP_JUMB(rt->resulttype); + JumbleExpr(jstate, (Node *)rt->arg); + } + break; + case T_CoerceViaIO: + { + CoerceViaIO *cio = (CoerceViaIO *)node; + + APP_JUMB(cio->resulttype); + JumbleExpr(jstate, (Node *)cio->arg); + } + break; + case T_ArrayCoerceExpr: + { + ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *)node; + + APP_JUMB(acexpr->resulttype); + JumbleExpr(jstate, (Node *)acexpr->arg); + } + break; + case T_ConvertRowtypeExpr: + { + ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *)node; + + APP_JUMB(crexpr->resulttype); + JumbleExpr(jstate, (Node *)crexpr->arg); + } + break; + case T_CollateExpr: + { + CollateExpr *ce = (CollateExpr *)node; + + APP_JUMB(ce->collOid); + JumbleExpr(jstate, (Node *)ce->arg); + } + break; + case T_CaseExpr: + { + CaseExpr *caseexpr = (CaseExpr *)node; + + JumbleExpr(jstate, (Node *)caseexpr->arg); + foreach (temp, caseexpr->args) + { + CaseWhen *when = (CaseWhen *)lfirst(temp); + + Assert(IsA(when, CaseWhen)); + JumbleExpr(jstate, (Node *)when->expr); + JumbleExpr(jstate, (Node *)when->result); + } + JumbleExpr(jstate, (Node *)caseexpr->defresult); + } + break; + case T_CaseTestExpr: + { + CaseTestExpr *ct = (CaseTestExpr *)node; + + APP_JUMB(ct->typeId); + } + break; + case T_ArrayExpr: + JumbleExpr(jstate, (Node *)((ArrayExpr *)node)->elements); + break; + case T_RowExpr: + JumbleExpr(jstate, (Node *)((RowExpr *)node)->args); + break; + case T_RowCompareExpr: + { + RowCompareExpr *rcexpr = (RowCompareExpr *)node; + + APP_JUMB(rcexpr->rctype); + JumbleExpr(jstate, (Node *)rcexpr->largs); + JumbleExpr(jstate, (Node *)rcexpr->rargs); + } + break; + case T_CoalesceExpr: + JumbleExpr(jstate, (Node *)((CoalesceExpr *)node)->args); + break; + case T_MinMaxExpr: + { + MinMaxExpr *mmexpr = (MinMaxExpr *)node; + + APP_JUMB(mmexpr->op); + JumbleExpr(jstate, (Node *)mmexpr->args); + } + break; + case T_XmlExpr: + { + XmlExpr *xexpr = (XmlExpr *)node; + + APP_JUMB(xexpr->op); + JumbleExpr(jstate, (Node *)xexpr->named_args); + JumbleExpr(jstate, (Node *)xexpr->args); + } + break; + case T_NullTest: + { + NullTest *nt = (NullTest *)node; + + APP_JUMB(nt->nulltesttype); + JumbleExpr(jstate, (Node *)nt->arg); + } + break; + case T_BooleanTest: + { + BooleanTest *bt = (BooleanTest *)node; + + APP_JUMB(bt->booltesttype); + JumbleExpr(jstate, (Node *)bt->arg); + } + break; + case T_CoerceToDomain: + { + CoerceToDomain *cd = (CoerceToDomain *)node; + + APP_JUMB(cd->resulttype); + JumbleExpr(jstate, (Node *)cd->arg); + } + break; + case T_CoerceToDomainValue: + { + CoerceToDomainValue *cdv = (CoerceToDomainValue *)node; + + APP_JUMB(cdv->typeId); + } + break; + case T_SetToDefault: + { + SetToDefault *sd = (SetToDefault *)node; + + APP_JUMB(sd->typeId); + } + break; + case T_CurrentOfExpr: + { + CurrentOfExpr *ce = (CurrentOfExpr *)node; + + APP_JUMB(ce->cvarno); + if (ce->cursor_name) + APP_JUMB_STRING(ce->cursor_name); + APP_JUMB(ce->cursor_param); + } + break; + case T_TargetEntry: + { + TargetEntry *tle = (TargetEntry *)node; + + APP_JUMB(tle->resno); + APP_JUMB(tle->ressortgroupref); + JumbleExpr(jstate, (Node *)tle->expr); + } + break; + case T_RangeTblRef: + { + RangeTblRef *rtr = (RangeTblRef *)node; + + APP_JUMB(rtr->rtindex); + } + break; + case T_JoinExpr: + { + JoinExpr *join = (JoinExpr *)node; + + APP_JUMB(join->jointype); + APP_JUMB(join->isNatural); + APP_JUMB(join->rtindex); + JumbleExpr(jstate, join->larg); + JumbleExpr(jstate, join->rarg); + JumbleExpr(jstate, join->quals); + } + break; + case T_FromExpr: + { + FromExpr *from = (FromExpr *)node; + + JumbleExpr(jstate, (Node *)from->fromlist); + JumbleExpr(jstate, from->quals); + } + break; + case T_List: + foreach (temp, (List *)node) + { + JumbleExpr(jstate, (Node *)lfirst(temp)); + } + break; + case T_SortGroupClause: + { + SortGroupClause *sgc = (SortGroupClause *)node; + + APP_JUMB(sgc->tleSortGroupRef); + APP_JUMB(sgc->eqop); + APP_JUMB(sgc->sortop); + APP_JUMB(sgc->nulls_first); + } + break; + case T_WindowClause: + { + WindowClause *wc = (WindowClause *)node; + + APP_JUMB(wc->winref); + APP_JUMB(wc->frameOptions); + JumbleExpr(jstate, (Node *)wc->partitionClause); + JumbleExpr(jstate, (Node *)wc->orderClause); + JumbleExpr(jstate, wc->startOffset); + JumbleExpr(jstate, wc->endOffset); + } + break; + case T_CommonTableExpr: + { + CommonTableExpr *cte = (CommonTableExpr *)node; + + /* we store the string name because RTE_CTE RTEs need it */ + APP_JUMB_STRING(cte->ctename); + JumbleQuery(jstate, (Query *)cte->ctequery); + } + break; + case T_SetOperationStmt: + { + SetOperationStmt *setop = (SetOperationStmt *)node; + + APP_JUMB(setop->op); + APP_JUMB(setop->all); + JumbleExpr(jstate, setop->larg); + JumbleExpr(jstate, setop->rarg); + } + break; + case T_RangeTblFunction: + { + RangeTblFunction *rtfunc = (RangeTblFunction *)node; + + JumbleExpr(jstate, rtfunc->funcexpr); + } + break; + default: + /* Only a warning, since we can stumble along anyway */ + elog(WARNING, "unrecognized node type: %d", + (int)nodeTag(node)); + break; + } +} + +/* + * Record location of constant within query string of query tree + * that is currently being walked. + */ +static void +RecordConstLocation(pgssJumbleState *jstate, int location) +{ + /* -1 indicates unknown or undefined location */ + if (location >= 0) + { + /* enlarge array if needed */ + if (jstate->clocations_count >= jstate->clocations_buf_size) + { + jstate->clocations_buf_size *= 2; + jstate->clocations = (pgssLocationLen *) + repalloc(jstate->clocations, + jstate->clocations_buf_size * + sizeof(pgssLocationLen)); + } + jstate->clocations[jstate->clocations_count].location = location; + /* initialize lengths to -1 to simplify fill_in_constant_lengths */ + jstate->clocations[jstate->clocations_count].length = -1; + jstate->clocations_count++; + } +} + +/* check if token should be replaced by substitute varable */ +static bool +need_replace(int token) +{ + return (token == FCONST) || (token == ICONST) || (token == SCONST) || (token == BCONST) || (token == XCONST); +} + +/* + * gen_normplan - parse execution plan using flex and replace all CONST to + * substitute variables. + */ +static StringInfo +gen_normplan(const char *execution_plan) +{ + core_yyscan_t yyscanner; + core_yy_extra_type yyextra; + core_YYSTYPE yylval; + YYLTYPE yylloc; + int tok; + int bind_prefix = 1; + char *tmp_str; + YYLTYPE last_yylloc = 0; + int last_tok = 0; + StringInfo plan_out = makeStringInfo(); + ; + + yyscanner = scanner_init(execution_plan, + &yyextra, +#if PG_VERSION_NUM >= 120000 + &ScanKeywords, + ScanKeywordTokens +#else + ScanKeywords, + NumScanKeywords +#endif + ); + + for (;;) + { + /* get the next lexem */ + tok = core_yylex(&yylval, &yylloc, yyscanner); + + /* now we store end previsous lexem in yylloc - so could prcess it */ + if (need_replace(last_tok)) + { + /* substitute variable instead of CONST */ + int s_len = asprintf(&tmp_str, "$%i", bind_prefix++); + if (s_len > 0) + { + appendStringInfoString(plan_out, tmp_str); + free(tmp_str); + } + else + { + appendStringInfoString(plan_out, "??"); + } + } + else + { + /* do not change - just copy as-is */ + tmp_str = strndup((char *)execution_plan + last_yylloc, yylloc - last_yylloc); + appendStringInfoString(plan_out, tmp_str); + free(tmp_str); + } + /* check if further parsing not needed */ + if (tok == 0) + break; + last_tok = tok; + last_yylloc = yylloc; + } + + scanner_finish(yyscanner); + + return plan_out; +} + +uint64_t get_plan_id(QueryDesc *queryDesc) +{ + if (!queryDesc->sourceText) + return 0; + StringInfo normalized = gen_normplan(queryDesc->sourceText); + return hash_any((unsigned char *)normalized->data, normalized->len); +} + +/* + * Post-parse-analysis hook: mark query with a queryId + */ +void pgss_post_parse_analyze(ParseState *pstate, Query *query) +{ + pgssJumbleState jstate; + + if (prev_post_parse_analyze_hook) + prev_post_parse_analyze_hook(pstate, query); + + /* Assert we didn't do this already */ + Assert(query->queryId == 0); + + /* + * Utility statements get queryId zero. We do this even in cases where + * the statement contains an optimizable statement for which a queryId + * could be derived (such as EXPLAIN or DECLARE CURSOR). For such cases, + * runtime control will first go through ProcessUtility and then the + * executor, and we don't want the executor hooks to do anything, since we + * are already measuring the statement's costs at the utility level. + */ + if (query->utilityStmt) + { + query->queryId = 0; + return; + } + + /* Set up workspace for query jumbling */ + jstate.jumble = (unsigned char *)palloc(JUMBLE_SIZE); + jstate.jumble_len = 0; + jstate.clocations_buf_size = 32; + jstate.clocations = (pgssLocationLen *) + palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen)); + jstate.clocations_count = 0; + + /* Compute query ID and mark the Query node with it */ + JumbleQuery(&jstate, query); + query->queryId = hash_any(jstate.jumble, jstate.jumble_len); + + /* + * If we are unlucky enough to get a hash of zero, use 1 instead, to + * prevent confusion with the utility-statement case. + */ + if (query->queryId == 0) + query->queryId = 1; +} \ No newline at end of file diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.h b/src/stat_statements_parser/pg_stat_statements_ya_parser.h new file mode 100644 index 00000000000..274f96aebaf --- /dev/null +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.h @@ -0,0 +1,15 @@ +#pragma once + +#ifdef __cplusplus +extern "C" +{ +#endif + +extern void stat_statements_parser_init(void); +extern void stat_statements_parser_deinit(void); + +#ifdef __cplusplus +} +#endif + +uint64_t get_plan_id(QueryDesc *queryDesc); \ No newline at end of file diff --git a/src/yagp_hooks_collector.c b/src/yagp_hooks_collector.c new file mode 100644 index 00000000000..69475ea5079 --- /dev/null +++ b/src/yagp_hooks_collector.c @@ -0,0 +1,22 @@ +#include "postgres.h" +#include "cdb/cdbvars.h" +#include "fmgr.h" + +#include "hook_wrappers.h" + +PG_MODULE_MAGIC; + +void _PG_init(void); +void _PG_fini(void); + +void _PG_init(void) { + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { + hooks_init(); + } +} + +void _PG_fini(void) { + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { + hooks_deinit(); + } +} diff --git a/yagp-hooks-collector.control b/yagp-hooks-collector.control new file mode 100644 index 00000000000..82c189a88fc --- /dev/null +++ b/yagp-hooks-collector.control @@ -0,0 +1,5 @@ +# yagp-hooks-collector extension +comment = 'Intercept query and plan execution hooks and report them to Yandex GPCC agents' +default_version = '1.0' +module_pathname = '$libdir/yagp-hooks-collector' +superuser = true From 8f3f0c6d00eae72a59ad552f04b22c72d95a49b5 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 28 Mar 2023 17:07:23 +0300 Subject: [PATCH 02/49] [yagp_hooks_collector] Fix segfault in plan text generator Guard against NULL plan state when generating EXPLAIN output. --- src/EventSender.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index bb4765adeb1..b1815a22bf8 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -130,10 +130,13 @@ void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *queryDesc) qi->set_generator(queryDesc->plannedstmt->planGen == PLANGEN_OPTIMIZER ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); - set_plan_text(qi->mutable_plantext(), queryDesc); - qi->set_plan_id(get_plan_id(queryDesc)); - qi->set_query_id(queryDesc->plannedstmt->queryId); + if (queryDesc->planstate) + { + set_plan_text(qi->mutable_plantext(), queryDesc); + qi->set_plan_id(get_plan_id(queryDesc)); + } } + qi->set_query_id(queryDesc->plannedstmt->queryId); } } // namespace From 1bf1d760e8686dc18f7bb098f32581e7b930b5ce Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 29 Mar 2023 16:10:20 +0300 Subject: [PATCH 03/49] [yagp_hooks_collector] Add executor instrumentation, /proc stats, and normalized texts Collect spill info (file count, bytes written). Generate normalized query and plan texts using a pg_stat_statements-derived parser. Collect buffer I/O counters, tuple counts, timing, and /proc/self CPU/memory/IO statistics. --- Makefile | 1 - protos/yagpcc_metrics.proto | 42 +-- protos/yagpcc_set_service.proto | 20 +- src/EventSender.cpp | 164 +++++++----- src/EventSender.h | 4 +- src/ProcStats.cpp | 119 +++++++++ src/ProcStats.h | 7 + src/SpillInfoWrapper.c | 21 ++ src/hook_wrappers.cpp | 22 +- .../pg_stat_statements_ya_parser.c | 248 +++++++++++++++++- .../pg_stat_statements_ya_parser.h | 3 +- 11 files changed, 519 insertions(+), 132 deletions(-) create mode 100644 src/ProcStats.cpp create mode 100644 src/ProcStats.h create mode 100644 src/SpillInfoWrapper.c diff --git a/Makefile b/Makefile index 15c5dabb70e..0a21cf136ff 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,6 @@ # AIX make defaults to building *every* target of the first rule. Start with # a single-target, empty rule to make the other targets non-default. -all: all check install installdirs installcheck installcheck-parallel uninstall clean distclean maintainer-clean dist distcheck world check-world install-world installcheck-world installcheck-resgroup installcheck-resgroup-v2: @if [ ! -f GNUmakefile ] ; then \ diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index b7e255484c7..f00f329a208 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -27,9 +27,12 @@ message QueryInfo { PlanGenerator generator = 1; uint64 query_id = 2; uint64 plan_id = 3; - string queryText = 4; - string planText = 5; - SessionInfo sessionInfo = 6; + string query_text = 4; + string plan_text = 5; + string temlate_query_text = 6; + string temlate_plan_text = 7; + string userName = 8; + string databaseName = 9; } enum PlanGenerator @@ -45,40 +48,17 @@ message GPMetrics { SpillInfo spill = 3; } -message QueryInfoHeader { - int32 pid = 1; - GpId gpIdentity = 2; - - int32 tmid = 3; /* A time identifier for a particular query. All records associated with the query will have the same tmid. */ - int32 ssid = 4; /* The session id as shown by gp_session_id. All records associated with the query will have the same ssid */ - int32 ccnt = 5; /* The command number within this session as shown by gp_command_count. All records associated with the query will have the same ccnt */ - int32 sliceid = 6; /* slice identificator, 0 means general info for the whole query */ +message QueryKey { + int32 tmid = 1; /* A time identifier for a particular query. All records associated with the query will have the same tmid. */ + int32 ssid = 2; /* The session id as shown by gp_session_id. All records associated with the query will have the same ssid */ + int32 ccnt = 3; /* The command number within this session as shown by gp_command_count. All records associated with the query will have the same ccnt */ } -message GpId { +message SegmentKey { int32 dbid = 1; /* the dbid of this database */ int32 segindex = 2; /* content indicator: -1 for entry database, * 0, ..., n-1 for segment database * * a primary and its mirror have the same segIndex */ - GpRole gp_role = 3; - GpRole gp_session_role = 4; -} - -enum GpRole -{ - GP_ROLE_UNSPECIFIED = 0; - GP_ROLE_UTILITY = 1; /* Operating as a simple database engine */ - GP_ROLE_DISPATCH = 2; /* Operating as the parallel query dispatcher */ - GP_ROLE_EXECUTE = 3; /* Operating as a parallel query executor */ - GP_ROLE_UNDEFINED = 4; /* Should never see this role in use */ -} - -message SessionInfo { - string sql = 1; - string userName = 2; - string databaseName = 3; - string resourceGroup = 4; - string applicationName = 5; } message SystemStat { diff --git a/protos/yagpcc_set_service.proto b/protos/yagpcc_set_service.proto index 0bef72891ee..97c5691a6f5 100644 --- a/protos/yagpcc_set_service.proto +++ b/protos/yagpcc_set_service.proto @@ -27,19 +27,19 @@ enum MetricResponseStatusCode { } message SetQueryReq { - QueryStatus query_status = 1; + QueryStatus query_status = 1; google.protobuf.Timestamp datetime = 2; - - QueryInfoHeader header = 3; - QueryInfo query_info = 4; - GPMetrics query_metrics = 5; - repeated MetricPlan plan_tree = 6; + QueryKey query_key = 3; + QueryInfo query_info = 4; + GPMetrics query_metrics = 5; + repeated MetricPlan plan_tree = 6; } message SetPlanNodeReq { - PlanNodeStatus node_status = 1; + PlanNodeStatus node_status = 1; google.protobuf.Timestamp datetime = 2; - QueryInfoHeader header = 3; - GPMetrics node_metrics = 4; - MetricPlan plan_node = 5; + QueryKey query_key = 3; + SegmentKey segment_key = 4; + GPMetrics node_metrics = 5; + MetricPlan plan_node = 6; } diff --git a/src/EventSender.cpp b/src/EventSender.cpp index b1815a22bf8..d8145b811a4 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,11 +1,13 @@ #include "EventSender.h" #include "GrpcConnector.h" +#include "ProcStats.h" #include "protos/yagpcc_set_service.pb.h" #include extern "C" { #include "postgres.h" +#include "access/hash.h" #include "utils/metrics_utils.h" #include "utils/elog.h" #include "executor/executor.h" @@ -18,10 +20,13 @@ extern "C" #include "tcop/utility.h" #include "pg_stat_statements_ya_parser.h" + +void get_spill_info(int ssid, int ccid, int32_t* file_count, int64_t* total_bytes); } namespace { + std::string* get_user_name() { const char *username = GetConfigOption("session_authorization", false, false); @@ -36,26 +41,6 @@ std::string* get_db_name() return result; } -std::string* get_rg_name() -{ - auto userId = GetUserId(); - if (!OidIsValid(userId)) - return nullptr; - auto groupId = GetResGroupIdForRole(userId); - if (!OidIsValid(groupId)) - return nullptr; - char *rgname = GetResGroupNameForId(groupId); - if (rgname == nullptr) - return nullptr; - pfree(rgname); - return new std::string(rgname); -} - -std::string* get_app_name() -{ - return application_name ? new std::string(application_name) : nullptr; -} - int get_cur_slice_id(QueryDesc *desc) { if (!desc->estate) @@ -75,33 +60,22 @@ google::protobuf::Timestamp current_ts() return current_ts; } -void set_header(yagpcc::QueryInfoHeader *header, QueryDesc *queryDesc) +void set_query_key(yagpcc::QueryKey *key, QueryDesc *query_desc) { - header->set_pid(MyProcPid); - auto gpId = header->mutable_gpidentity(); - gpId->set_dbid(GpIdentity.dbid); - gpId->set_segindex(GpIdentity.segindex); - gpId->set_gp_role(static_cast(Gp_role)); - gpId->set_gp_session_role(static_cast(Gp_session_role)); - header->set_ssid(gp_session_id); - header->set_ccnt(gp_command_count); - header->set_sliceid(get_cur_slice_id(queryDesc)); + key->set_ccnt(gp_command_count); + key->set_ssid(gp_session_id); int32 tmid = 0; gpmon_gettmid(&tmid); - header->set_tmid(tmid); + key->set_tmid(tmid); } -void set_session_info(yagpcc::SessionInfo *si, QueryDesc *queryDesc) +void set_segment_key(yagpcc::SegmentKey *key, QueryDesc *query_desc) { - if (queryDesc->sourceText) - *si->mutable_sql() = std::string(queryDesc->sourceText); - si->set_allocated_applicationname(get_app_name()); - si->set_allocated_databasename(get_db_name()); - si->set_allocated_resourcegroup(get_rg_name()); - si->set_allocated_username(get_user_name()); + key->set_dbid(GpIdentity.dbid); + key->set_segindex(GpIdentity.segindex); } -ExplainState get_explain_state(QueryDesc *queryDesc, bool costs) +ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { ExplainState es; ExplainInitState(&es); @@ -109,74 +83,130 @@ ExplainState get_explain_state(QueryDesc *queryDesc, bool costs) es.verbose = true; es.format = EXPLAIN_FORMAT_TEXT; ExplainBeginOutput(&es); - ExplainPrintPlan(&es, queryDesc); + ExplainPrintPlan(&es, query_desc); ExplainEndOutput(&es); return es; } -void set_plan_text(std::string *plan_text, QueryDesc *queryDesc) +void set_plan_text(std::string *plan_text, QueryDesc *query_desc) { - auto es = get_explain_state(queryDesc, true); + auto es = get_explain_state(query_desc, true); *plan_text = std::string(es.str->data, es.str->len); } -void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *queryDesc) +void set_query_plan(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { - set_session_info(qi->mutable_sessioninfo(), queryDesc); - if (queryDesc->sourceText) - *qi->mutable_querytext() = queryDesc->sourceText; - if (queryDesc->plannedstmt) - { - qi->set_generator(queryDesc->plannedstmt->planGen == PLANGEN_OPTIMIZER + qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); - if (queryDesc->planstate) - { - set_plan_text(qi->mutable_plantext(), queryDesc); - qi->set_plan_id(get_plan_id(queryDesc)); - } + set_plan_text(qi->mutable_plan_text(), query_desc); + StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); + *qi->mutable_temlate_plan_text() = std::string(norm_plan->data); + qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); + //TODO: free stringinfo? +} + +void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) +{ + *qi->mutable_query_text() = query_desc->sourceText; + char* norm_query = gen_normquery(query_desc->sourceText); + *qi->mutable_temlate_query_text() = std::string(norm_query); + pfree(norm_query); +} + +void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc) +{ + if (query_desc->sourceText) + set_query_text(qi, query_desc); + if (query_desc->plannedstmt) + { + set_query_plan(qi, query_desc); + qi->set_query_id(query_desc->plannedstmt->queryId); } - qi->set_query_id(queryDesc->plannedstmt->queryId); + qi->set_allocated_username(get_user_name()); + qi->set_allocated_databasename(get_db_name()); +} + +void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, QueryDesc *query_desc) +{ + auto instrument = query_desc->planstate->instrument; + metrics->set_ntuples(instrument->ntuples); + metrics->set_nloops(instrument->nloops); + metrics->set_tuplecount(instrument->tuplecount); + metrics->set_firsttuple(instrument->firsttuple); + metrics->set_startup(instrument->startup); + metrics->set_total(instrument->total); + auto &buffusage = instrument->bufusage; + metrics->set_shared_blks_hit(buffusage.shared_blks_hit); + metrics->set_shared_blks_read(buffusage.shared_blks_read); + metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); + metrics->set_shared_blks_written(buffusage.shared_blks_written); + metrics->set_local_blks_hit(buffusage.local_blks_hit); + metrics->set_local_blks_read(buffusage.local_blks_read); + metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); + metrics->set_local_blks_written(buffusage.local_blks_written); + metrics->set_temp_blks_read(buffusage.temp_blks_read); + metrics->set_temp_blks_written(buffusage.temp_blks_written); + metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); + metrics->set_blk_write_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); } + +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc) +{ + int32_t n_spill_files = 0; + int64_t n_spill_bytes = 0; + get_spill_info(gp_session_id, gp_command_count, &n_spill_files, &n_spill_bytes); + metrics->mutable_spill()->set_filecount(n_spill_files); + metrics->mutable_spill()->set_totalbytes(n_spill_bytes); + if (query_desc->planstate->instrument) + set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); + fill_self_stats(metrics->mutable_systemstat()); +} + + } // namespace -void EventSender::ExecutorStart(QueryDesc *queryDesc, int /* eflags*/) +void EventSender::ExecutorStart(QueryDesc *query_desc, int /* eflags*/) { - elog(DEBUG1, "Query %s start recording", queryDesc->sourceText); + query_desc->instrument_options |= INSTRUMENT_BUFFERS; + query_desc->instrument_options |= INSTRUMENT_ROWS; + query_desc->instrument_options |= INSTRUMENT_TIMER; + + elog(DEBUG1, "Query %s start recording", query_desc->sourceText); yagpcc::SetQueryReq req; req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); *req.mutable_datetime() = current_ts(); - set_header(req.mutable_header(), queryDesc); - set_query_info(req.mutable_query_info(), queryDesc); + set_query_key(req.mutable_query_key(), query_desc); auto result = connector->set_metric_query(req); if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { elog(WARNING, "Query %s start reporting failed with an error %s", - queryDesc->sourceText, result.error_text().c_str()); + query_desc->sourceText, result.error_text().c_str()); } else { - elog(DEBUG1, "Query %s start successful", queryDesc->sourceText); + elog(DEBUG1, "Query %s start successful", query_desc->sourceText); } } -void EventSender::ExecutorFinish(QueryDesc *queryDesc) +void EventSender::ExecutorFinish(QueryDesc *query_desc) { - elog(DEBUG1, "Query %s finish recording", queryDesc->sourceText); + elog(DEBUG1, "Query %s finish recording", query_desc->sourceText); yagpcc::SetQueryReq req; req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); *req.mutable_datetime() = current_ts(); - set_header(req.mutable_header(), queryDesc); - set_query_info(req.mutable_query_info(), queryDesc); + set_query_key(req.mutable_query_key(), query_desc); + set_query_info(req.mutable_query_info(), query_desc); + set_gp_metrics(req.mutable_query_metrics(), query_desc); auto result = connector->set_metric_query(req); if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { elog(WARNING, "Query %s finish reporting failed with an error %s", - queryDesc->sourceText, result.error_text().c_str()); + query_desc->sourceText, result.error_text().c_str()); } else { - elog(DEBUG1, "Query %s finish successful", queryDesc->sourceText); + elog(DEBUG1, "Query %s finish successful", query_desc->sourceText); } } diff --git a/src/EventSender.h b/src/EventSender.h index 70868f6c757..bd02455ca7e 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -9,8 +9,8 @@ struct QueryDesc; class EventSender { public: - void ExecutorStart(QueryDesc *queryDesc, int eflags); - void ExecutorFinish(QueryDesc *queryDesc); + void ExecutorStart(QueryDesc *query_desc, int eflags); + void ExecutorFinish(QueryDesc *query_desc); static EventSender *instance(); private: diff --git a/src/ProcStats.cpp b/src/ProcStats.cpp new file mode 100644 index 00000000000..34c5d05719e --- /dev/null +++ b/src/ProcStats.cpp @@ -0,0 +1,119 @@ +#include "ProcStats.h" +#include "yagpcc_metrics.pb.h" +#include +#include +#include + +extern "C" +{ +#include "postgres.h" +#include "utils/elog.h" +} + +namespace { +#define FILL_IO_STAT(stat_name) \ + uint64_t stat_name; \ + proc_stat >> tmp >> stat_name; \ + stats->set_##stat_name(stat_name); + +void fill_io_stats(yagpcc::SystemStat *stats) +{ + std::ifstream proc_stat("/proc/self/io"); + std::string tmp; + FILL_IO_STAT(rchar); + FILL_IO_STAT(wchar); + FILL_IO_STAT(syscr); + FILL_IO_STAT(syscw); + FILL_IO_STAT(read_bytes); + FILL_IO_STAT(write_bytes); + FILL_IO_STAT(cancelled_write_bytes); +} + +void fill_cpu_stats(yagpcc::SystemStat *stats) +{ + static const int UTIME_ID = 13; + static const int STIME_ID = 14; + static const int STARTTIME_ID = 21; + static const int VSIZE_ID = 22; + static const int RSS_ID = 23; + static const double tps = sysconf(_SC_CLK_TCK); + + double uptime; + { + std::ifstream proc_stat("/proc/uptime"); + proc_stat >> uptime; + } + + std::ifstream proc_stat("/proc/self/stat"); + std::string trash; + double start_time = 0; + for (int i = 0; i <= RSS_ID; ++i) + { + switch (i) + { + case UTIME_ID: + double utime; + proc_stat >> utime; + stats->set_usertimeseconds(utime / tps); + break; + case STIME_ID: + double stime; + proc_stat >> stime; + stats->set_kerneltimeseconds(stime / tps); + break; + case STARTTIME_ID: + uint64_t starttime; + proc_stat >> starttime; + start_time = static_cast(starttime) / tps; + break; + case VSIZE_ID: + uint64_t vsize; + proc_stat >> vsize; + stats->set_vsize(vsize); + break; + case RSS_ID: + uint64_t rss; + proc_stat >> rss; + // NOTE: this is a double AFAIU, need to double-check + stats->set_rss(rss); + break; + default: + proc_stat >> trash; + } + stats->set_runningtimeseconds(uptime - start_time); + } +} + +void fill_status_stats(yagpcc::SystemStat *stats) +{ + std::ifstream proc_stat("/proc/self/status"); + std::string key, measure; + while (proc_stat >> key) + { + if (key == "VmPeak:") + { + uint64_t value; + proc_stat >> value; + stats->set_vmpeakkb(value); + proc_stat >> measure; + if (measure != "kB") + elog(FATAL, "Expected memory sizes in kB, but got in %s", measure.c_str()); + } + else if (key == "VmSize:") + { + uint64_t value; + proc_stat >> value; + stats->set_vmsizekb(value); + if (measure != "kB") + elog(FATAL, "Expected memory sizes in kB, but got in %s", measure.c_str()); + } + } +} +} // namespace + +void fill_self_stats(yagpcc::SystemStat *stats) +{ + fill_io_stats(stats); + fill_cpu_stats(stats); + fill_status_stats(stats); +} \ No newline at end of file diff --git a/src/ProcStats.h b/src/ProcStats.h new file mode 100644 index 00000000000..30a90a60519 --- /dev/null +++ b/src/ProcStats.h @@ -0,0 +1,7 @@ +#pragma once + +namespace yagpcc { +class SystemStat; +} + +void fill_self_stats(yagpcc::SystemStat *stats); \ No newline at end of file diff --git a/src/SpillInfoWrapper.c b/src/SpillInfoWrapper.c new file mode 100644 index 00000000000..c6ace0a693f --- /dev/null +++ b/src/SpillInfoWrapper.c @@ -0,0 +1,21 @@ +#include "postgres.h" +#include "utils/workfile_mgr.h" + +void get_spill_info(int ssid, int ccid, int32_t* file_count, int64_t* total_bytes); + +void get_spill_info(int ssid, int ccid, int32_t* file_count, int64_t* total_bytes) +{ + int count = 0; + int i = 0; + workfile_set *workfiles = workfile_mgr_cache_entries_get_copy(&count); + workfile_set *wf_iter = workfiles; + for (i = 0; i < count; ++i, ++wf_iter) + { + if (wf_iter->active && wf_iter->session_id == ssid && wf_iter->command_count == ccid) + { + *file_count += wf_iter->num_files; + *total_bytes += wf_iter->total_bytes; + } + } + pfree(workfiles); +} \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 9f3200c006f..1dabb59ab3f 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -19,8 +19,8 @@ extern "C" static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr; static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; -static void ya_ExecutorStart_hook(QueryDesc *queryDesc, int eflags); -static void ya_ExecutorFinish_hook(QueryDesc *queryDesc); +static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); +static void ya_ExecutorFinish_hook(QueryDesc *query_desc); #define REPLACE_HOOK(hookName) \ previous_##hookName = hookName; \ @@ -56,12 +56,22 @@ void hooks_deinit() else \ standard_##hookName(__VA_ARGS__); -void ya_ExecutorStart_hook(QueryDesc *queryDesc, int eflags) +void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { - CREATE_HOOK_WRAPPER(ExecutorStart, queryDesc, eflags); + CREATE_HOOK_WRAPPER(ExecutorStart, query_desc, eflags); + PG_TRY(); + { + EventSender::instance()->ExecutorStart(query_desc, eflags); + } + PG_CATCH(); + { + ereport(WARNING, (errmsg("EventSender failed in ExecutorStart afterhook"))); + PG_RE_THROW(); + } + PG_END_TRY(); } -void ya_ExecutorFinish_hook(QueryDesc *queryDesc) +void ya_ExecutorFinish_hook(QueryDesc *query_desc) { - CREATE_HOOK_WRAPPER(ExecutorFinish, queryDesc); + CREATE_HOOK_WRAPPER(ExecutorFinish, query_desc); } \ No newline at end of file diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/src/stat_statements_parser/pg_stat_statements_ya_parser.c index f14742337bd..ae79e7dc40a 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -1,3 +1,6 @@ +// NOTE: this file is just a bunch of code borrowed from pg_stat_statements for PG 9.4 +// and from our own inhouse implementation of pg_stat_statements for managed PG + #include "postgres.h" #include @@ -67,14 +70,15 @@ static void JumbleQuery(pgssJumbleState *jstate, Query *query); static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable); static void JumbleExpr(pgssJumbleState *jstate, Node *node); static void RecordConstLocation(pgssJumbleState *jstate, int location); - -static StringInfo gen_normplan(const char *execution_plan); - +static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query); +static int comp_location(const void *a, const void *b); +StringInfo gen_normplan(const char *execution_plan); static bool need_replace(int token); - void pgss_post_parse_analyze(ParseState *pstate, Query *query); +static char *generate_normalized_query(pgssJumbleState *jstate, const char *query, + int *query_len_p, int encoding); -void stat_statements_parser_init() + void stat_statements_parser_init() { prev_post_parse_analyze_hook = post_parse_analyze_hook; post_parse_analyze_hook = pgss_post_parse_analyze; @@ -650,7 +654,7 @@ need_replace(int token) * gen_normplan - parse execution plan using flex and replace all CONST to * substitute variables. */ -static StringInfo +StringInfo gen_normplan(const char *execution_plan) { core_yyscan_t yyscanner; @@ -715,14 +719,6 @@ gen_normplan(const char *execution_plan) return plan_out; } -uint64_t get_plan_id(QueryDesc *queryDesc) -{ - if (!queryDesc->sourceText) - return 0; - StringInfo normalized = gen_normplan(queryDesc->sourceText); - return hash_any((unsigned char *)normalized->data, normalized->len); -} - /* * Post-parse-analysis hook: mark query with a queryId */ @@ -768,4 +764,228 @@ void pgss_post_parse_analyze(ParseState *pstate, Query *query) */ if (query->queryId == 0) query->queryId = 1; +} + +/* + * comp_location: comparator for qsorting pgssLocationLen structs by location + */ +static int +comp_location(const void *a, const void *b) +{ + int l = ((const pgssLocationLen *) a)->location; + int r = ((const pgssLocationLen *) b)->location; + + if (l < r) + return -1; + else if (l > r) + return +1; + else + return 0; +} + +/* + * Given a valid SQL string and an array of constant-location records, + * fill in the textual lengths of those constants. + * + * The constants may use any allowed constant syntax, such as float literals, + * bit-strings, single-quoted strings and dollar-quoted strings. This is + * accomplished by using the public API for the core scanner. + * + * It is the caller's job to ensure that the string is a valid SQL statement + * with constants at the indicated locations. Since in practice the string + * has already been parsed, and the locations that the caller provides will + * have originated from within the authoritative parser, this should not be + * a problem. + * + * Duplicate constant pointers are possible, and will have their lengths + * marked as '-1', so that they are later ignored. (Actually, we assume the + * lengths were initialized as -1 to start with, and don't change them here.) + * + * N.B. There is an assumption that a '-' character at a Const location begins + * a negative numeric constant. This precludes there ever being another + * reason for a constant to start with a '-'. + */ +static void +fill_in_constant_lengths(pgssJumbleState *jstate, const char *query) +{ + pgssLocationLen *locs; + core_yyscan_t yyscanner; + core_yy_extra_type yyextra; + core_YYSTYPE yylval; + YYLTYPE yylloc; + int last_loc = -1; + int i; + + /* + * Sort the records by location so that we can process them in order while + * scanning the query text. + */ + if (jstate->clocations_count > 1) + qsort(jstate->clocations, jstate->clocations_count, + sizeof(pgssLocationLen), comp_location); + locs = jstate->clocations; + + /* initialize the flex scanner --- should match raw_parser() */ + yyscanner = scanner_init(query, + &yyextra, + ScanKeywords, + NumScanKeywords); + + /* Search for each constant, in sequence */ + for (i = 0; i < jstate->clocations_count; i++) + { + int loc = locs[i].location; + int tok; + + Assert(loc >= 0); + + if (loc <= last_loc) + continue; /* Duplicate constant, ignore */ + + /* Lex tokens until we find the desired constant */ + for (;;) + { + tok = core_yylex(&yylval, &yylloc, yyscanner); + + /* We should not hit end-of-string, but if we do, behave sanely */ + if (tok == 0) + break; /* out of inner for-loop */ + + /* + * We should find the token position exactly, but if we somehow + * run past it, work with that. + */ + if (yylloc >= loc) + { + if (query[loc] == '-') + { + /* + * It's a negative value - this is the one and only case + * where we replace more than a single token. + * + * Do not compensate for the core system's special-case + * adjustment of location to that of the leading '-' + * operator in the event of a negative constant. It is + * also useful for our purposes to start from the minus + * symbol. In this way, queries like "select * from foo + * where bar = 1" and "select * from foo where bar = -2" + * will have identical normalized query strings. + */ + tok = core_yylex(&yylval, &yylloc, yyscanner); + if (tok == 0) + break; /* out of inner for-loop */ + } + + /* + * We now rely on the assumption that flex has placed a zero + * byte after the text of the current token in scanbuf. + */ + locs[i].length = strlen(yyextra.scanbuf + loc); + break; /* out of inner for-loop */ + } + } + + /* If we hit end-of-string, give up, leaving remaining lengths -1 */ + if (tok == 0) + break; + + last_loc = loc; + } + + scanner_finish(yyscanner); +} + +/* + * Generate a normalized version of the query string that will be used to + * represent all similar queries. + * + * Note that the normalized representation may well vary depending on + * just which "equivalent" query is used to create the hashtable entry. + * We assume this is OK. + * + * *query_len_p contains the input string length, and is updated with + * the result string length (which cannot be longer) on exit. + * + * Returns a palloc'd string. + */ +static char * +generate_normalized_query(pgssJumbleState *jstate, const char *query, + int *query_len_p, int encoding) +{ + char *norm_query; + int query_len = *query_len_p; + int i, + len_to_wrt, /* Length (in bytes) to write */ + quer_loc = 0, /* Source query byte location */ + n_quer_loc = 0, /* Normalized query byte location */ + last_off = 0, /* Offset from start for previous tok */ + last_tok_len = 0; /* Length (in bytes) of that tok */ + + /* + * Get constants' lengths (core system only gives us locations). Note + * this also ensures the items are sorted by location. + */ + fill_in_constant_lengths(jstate, query); + + /* Allocate result buffer */ + norm_query = palloc(query_len + 1); + + for (i = 0; i < jstate->clocations_count; i++) + { + int off, /* Offset from start for cur tok */ + tok_len; /* Length (in bytes) of that tok */ + + off = jstate->clocations[i].location; + tok_len = jstate->clocations[i].length; + + if (tok_len < 0) + continue; /* ignore any duplicates */ + + /* Copy next chunk (what precedes the next constant) */ + len_to_wrt = off - last_off; + len_to_wrt -= last_tok_len; + + Assert(len_to_wrt >= 0); + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + /* And insert a '?' in place of the constant token */ + norm_query[n_quer_loc++] = '?'; + + quer_loc = off + tok_len; + last_off = off; + last_tok_len = tok_len; + } + + /* + * We've copied up until the last ignorable constant. Copy over the + * remaining bytes of the original query string. + */ + len_to_wrt = query_len - quer_loc; + + Assert(len_to_wrt >= 0); + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + Assert(n_quer_loc <= query_len); + norm_query[n_quer_loc] = '\0'; + + *query_len_p = n_quer_loc; + return norm_query; +} + +char *gen_normquery(const char *query) +{ + if (!query) { + return NULL; + } + pgssJumbleState jstate; + jstate.jumble = (unsigned char *)palloc(JUMBLE_SIZE); + jstate.jumble_len = 0; + jstate.clocations_buf_size = 32; + jstate.clocations = (pgssLocationLen *) + palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen)); + jstate.clocations_count = 0; + int query_len = strlen(query); + return generate_normalized_query(&jstate, query, &query_len, GetDatabaseEncoding()); } \ No newline at end of file diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.h b/src/stat_statements_parser/pg_stat_statements_ya_parser.h index 274f96aebaf..aa9cd217e31 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.h +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.h @@ -12,4 +12,5 @@ extern void stat_statements_parser_deinit(void); } #endif -uint64_t get_plan_id(QueryDesc *queryDesc); \ No newline at end of file +StringInfo gen_normplan(const char *executionPlan); +char *gen_normquery(const char *query); \ No newline at end of file From 7680c38e1ff3b5551800d9679b54497eea7b7c82 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 6 Apr 2023 13:24:25 +0300 Subject: [PATCH 04/49] [yagp_hooks_collector] Apply llvm code style --- Makefile | 1 - src/EventSender.cpp | 367 ++++++++++++++++++++---------------------- src/EventSender.h | 13 +- src/GrpcConnector.cpp | 68 ++++---- src/GrpcConnector.h | 13 +- src/ProcStats.cpp | 183 ++++++++++----------- src/hook_wrappers.cpp | 83 +++++----- 7 files changed, 338 insertions(+), 390 deletions(-) diff --git a/Makefile b/Makefile index 0a21cf136ff..91be52c4468 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,6 @@ # to "Makefile" if it exists. PostgreSQL is shipped with a # "GNUmakefile". If the user hasn't run the configure script yet, the # GNUmakefile won't exist yet, so we catch that case as well. - # AIX make defaults to building *every* target of the first rule. Start with # a single-target, empty rule to make the other targets non-default. diff --git a/src/EventSender.cpp b/src/EventSender.cpp index d8145b811a4..b7c3cd70b85 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -4,8 +4,7 @@ #include "protos/yagpcc_set_service.pb.h" #include -extern "C" -{ +extern "C" { #include "postgres.h" #include "access/hash.h" #include "utils/metrics_utils.h" @@ -21,202 +20,178 @@ extern "C" #include "tcop/utility.h" #include "pg_stat_statements_ya_parser.h" -void get_spill_info(int ssid, int ccid, int32_t* file_count, int64_t* total_bytes); -} - -namespace -{ - -std::string* get_user_name() -{ - const char *username = GetConfigOption("session_authorization", false, false); - return username ? new std::string(username) : nullptr; -} - -std::string* get_db_name() -{ - char *dbname = get_database_name(MyDatabaseId); - std::string* result = dbname ? new std::string(dbname) : nullptr; - pfree(dbname); - return result; -} - -int get_cur_slice_id(QueryDesc *desc) -{ - if (!desc->estate) - { - return 0; - } - return LocallyExecutingSliceIndex(desc->estate); -} - -google::protobuf::Timestamp current_ts() -{ - google::protobuf::Timestamp current_ts; - struct timeval tv; - gettimeofday(&tv, nullptr); - current_ts.set_seconds(tv.tv_sec); - current_ts.set_nanos(static_cast(tv.tv_usec * 1000)); - return current_ts; -} - -void set_query_key(yagpcc::QueryKey *key, QueryDesc *query_desc) -{ - key->set_ccnt(gp_command_count); - key->set_ssid(gp_session_id); - int32 tmid = 0; - gpmon_gettmid(&tmid); - key->set_tmid(tmid); -} - -void set_segment_key(yagpcc::SegmentKey *key, QueryDesc *query_desc) -{ - key->set_dbid(GpIdentity.dbid); - key->set_segindex(GpIdentity.segindex); -} - -ExplainState get_explain_state(QueryDesc *query_desc, bool costs) -{ - ExplainState es; - ExplainInitState(&es); - es.costs = costs; - es.verbose = true; - es.format = EXPLAIN_FORMAT_TEXT; - ExplainBeginOutput(&es); - ExplainPrintPlan(&es, query_desc); - ExplainEndOutput(&es); - return es; -} - -void set_plan_text(std::string *plan_text, QueryDesc *query_desc) -{ - auto es = get_explain_state(query_desc, true); - *plan_text = std::string(es.str->data, es.str->len); -} - -void set_query_plan(yagpcc::QueryInfo *qi, QueryDesc *query_desc) -{ - qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER - ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER - : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); - set_plan_text(qi->mutable_plan_text(), query_desc); - StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); - *qi->mutable_temlate_plan_text() = std::string(norm_plan->data); - qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); - //TODO: free stringinfo? -} - -void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) -{ - *qi->mutable_query_text() = query_desc->sourceText; - char* norm_query = gen_normquery(query_desc->sourceText); - *qi->mutable_temlate_query_text() = std::string(norm_query); - pfree(norm_query); -} - -void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc) -{ - if (query_desc->sourceText) - set_query_text(qi, query_desc); - if (query_desc->plannedstmt) - { - set_query_plan(qi, query_desc); - qi->set_query_id(query_desc->plannedstmt->queryId); - } - qi->set_allocated_username(get_user_name()); - qi->set_allocated_databasename(get_db_name()); -} - -void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, QueryDesc *query_desc) -{ - auto instrument = query_desc->planstate->instrument; - metrics->set_ntuples(instrument->ntuples); - metrics->set_nloops(instrument->nloops); - metrics->set_tuplecount(instrument->tuplecount); - metrics->set_firsttuple(instrument->firsttuple); - metrics->set_startup(instrument->startup); - metrics->set_total(instrument->total); - auto &buffusage = instrument->bufusage; - metrics->set_shared_blks_hit(buffusage.shared_blks_hit); - metrics->set_shared_blks_read(buffusage.shared_blks_read); - metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); - metrics->set_shared_blks_written(buffusage.shared_blks_written); - metrics->set_local_blks_hit(buffusage.local_blks_hit); - metrics->set_local_blks_read(buffusage.local_blks_read); - metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); - metrics->set_local_blks_written(buffusage.local_blks_written); - metrics->set_temp_blks_read(buffusage.temp_blks_read); - metrics->set_temp_blks_written(buffusage.temp_blks_written); - metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); - metrics->set_blk_write_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); -} - -void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc) -{ - int32_t n_spill_files = 0; - int64_t n_spill_bytes = 0; - get_spill_info(gp_session_id, gp_command_count, &n_spill_files, &n_spill_bytes); - metrics->mutable_spill()->set_filecount(n_spill_files); - metrics->mutable_spill()->set_totalbytes(n_spill_bytes); - if (query_desc->planstate->instrument) - set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); - fill_self_stats(metrics->mutable_systemstat()); +void get_spill_info(int ssid, int ccid, int32_t *file_count, + int64_t *total_bytes); } +namespace { + +std::string *get_user_name() { + const char *username = GetConfigOption("session_authorization", false, false); + return username ? new std::string(username) : nullptr; +} + +std::string *get_db_name() { + char *dbname = get_database_name(MyDatabaseId); + std::string *result = dbname ? new std::string(dbname) : nullptr; + pfree(dbname); + return result; +} + +int get_cur_slice_id(QueryDesc *desc) { + if (!desc->estate) { + return 0; + } + return LocallyExecutingSliceIndex(desc->estate); +} + +google::protobuf::Timestamp current_ts() { + google::protobuf::Timestamp current_ts; + struct timeval tv; + gettimeofday(&tv, nullptr); + current_ts.set_seconds(tv.tv_sec); + current_ts.set_nanos(static_cast(tv.tv_usec * 1000)); + return current_ts; +} + +void set_query_key(yagpcc::QueryKey *key, QueryDesc *query_desc) { + key->set_ccnt(gp_command_count); + key->set_ssid(gp_session_id); + int32 tmid = 0; + gpmon_gettmid(&tmid); + key->set_tmid(tmid); +} + +void set_segment_key(yagpcc::SegmentKey *key, QueryDesc *query_desc) { + key->set_dbid(GpIdentity.dbid); + key->set_segindex(GpIdentity.segindex); +} + +ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { + ExplainState es; + ExplainInitState(&es); + es.costs = costs; + es.verbose = true; + es.format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(&es); + ExplainPrintPlan(&es, query_desc); + ExplainEndOutput(&es); + return es; +} + +void set_plan_text(std::string *plan_text, QueryDesc *query_desc) { + auto es = get_explain_state(query_desc, true); + *plan_text = std::string(es.str->data, es.str->len); +} + +void set_query_plan(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { + qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER + ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER + : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); + set_plan_text(qi->mutable_plan_text(), query_desc); + StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); + *qi->mutable_temlate_plan_text() = std::string(norm_plan->data); + qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); + // TODO: free stringinfo? +} + +void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { + *qi->mutable_query_text() = query_desc->sourceText; + char *norm_query = gen_normquery(query_desc->sourceText); + *qi->mutable_temlate_query_text() = std::string(norm_query); + pfree(norm_query); +} + +void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { + if (query_desc->sourceText) { + set_query_text(qi, query_desc); + } + if (query_desc->plannedstmt) { + set_query_plan(qi, query_desc); + qi->set_query_id(query_desc->plannedstmt->queryId); + } + qi->set_allocated_username(get_user_name()); + qi->set_allocated_databasename(get_db_name()); +} + +void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, + QueryDesc *query_desc) { + auto instrument = query_desc->planstate->instrument; + metrics->set_ntuples(instrument->ntuples); + metrics->set_nloops(instrument->nloops); + metrics->set_tuplecount(instrument->tuplecount); + metrics->set_firsttuple(instrument->firsttuple); + metrics->set_startup(instrument->startup); + metrics->set_total(instrument->total); + auto &buffusage = instrument->bufusage; + metrics->set_shared_blks_hit(buffusage.shared_blks_hit); + metrics->set_shared_blks_read(buffusage.shared_blks_read); + metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); + metrics->set_shared_blks_written(buffusage.shared_blks_written); + metrics->set_local_blks_hit(buffusage.local_blks_hit); + metrics->set_local_blks_read(buffusage.local_blks_read); + metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); + metrics->set_local_blks_written(buffusage.local_blks_written); + metrics->set_temp_blks_read(buffusage.temp_blks_read); + metrics->set_temp_blks_written(buffusage.temp_blks_written); + metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); + metrics->set_blk_write_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); +} + +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc) { + int32_t n_spill_files = 0; + int64_t n_spill_bytes = 0; + get_spill_info(gp_session_id, gp_command_count, &n_spill_files, + &n_spill_bytes); + metrics->mutable_spill()->set_filecount(n_spill_files); + metrics->mutable_spill()->set_totalbytes(n_spill_bytes); + if (query_desc->planstate->instrument) { + set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); + } + fill_self_stats(metrics->mutable_systemstat()); +} } // namespace -void EventSender::ExecutorStart(QueryDesc *query_desc, int /* eflags*/) -{ - query_desc->instrument_options |= INSTRUMENT_BUFFERS; - query_desc->instrument_options |= INSTRUMENT_ROWS; - query_desc->instrument_options |= INSTRUMENT_TIMER; - - elog(DEBUG1, "Query %s start recording", query_desc->sourceText); - yagpcc::SetQueryReq req; - req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); - *req.mutable_datetime() = current_ts(); - set_query_key(req.mutable_query_key(), query_desc); - auto result = connector->set_metric_query(req); - if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) - { - elog(WARNING, "Query %s start reporting failed with an error %s", - query_desc->sourceText, result.error_text().c_str()); - } - else - { - elog(DEBUG1, "Query %s start successful", query_desc->sourceText); - } -} - -void EventSender::ExecutorFinish(QueryDesc *query_desc) -{ - elog(DEBUG1, "Query %s finish recording", query_desc->sourceText); - yagpcc::SetQueryReq req; - req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); - *req.mutable_datetime() = current_ts(); - set_query_key(req.mutable_query_key(), query_desc); - set_query_info(req.mutable_query_info(), query_desc); - set_gp_metrics(req.mutable_query_metrics(), query_desc); - auto result = connector->set_metric_query(req); - if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) - { - elog(WARNING, "Query %s finish reporting failed with an error %s", - query_desc->sourceText, result.error_text().c_str()); - } - else - { - elog(DEBUG1, "Query %s finish successful", query_desc->sourceText); - } -} - -EventSender *EventSender::instance() -{ - static EventSender sender; - return &sender; -} - -EventSender::EventSender() -{ - connector = std::make_unique(); -} \ No newline at end of file +void EventSender::ExecutorStart(QueryDesc *query_desc, int /* eflags*/) { + query_desc->instrument_options |= INSTRUMENT_BUFFERS; + query_desc->instrument_options |= INSTRUMENT_ROWS; + query_desc->instrument_options |= INSTRUMENT_TIMER; + + elog(DEBUG1, "Query %s start recording", query_desc->sourceText); + yagpcc::SetQueryReq req; + req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); + *req.mutable_datetime() = current_ts(); + set_query_key(req.mutable_query_key(), query_desc); + auto result = connector->set_metric_query(req); + if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { + elog(WARNING, "Query %s start reporting failed with an error %s", + query_desc->sourceText, result.error_text().c_str()); + } else { + elog(DEBUG1, "Query %s start successful", query_desc->sourceText); + } +} + +void EventSender::ExecutorFinish(QueryDesc *query_desc) { + elog(DEBUG1, "Query %s finish recording", query_desc->sourceText); + yagpcc::SetQueryReq req; + req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); + *req.mutable_datetime() = current_ts(); + set_query_key(req.mutable_query_key(), query_desc); + set_query_info(req.mutable_query_info(), query_desc); + set_gp_metrics(req.mutable_query_metrics(), query_desc); + auto result = connector->set_metric_query(req); + if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { + elog(WARNING, "Query %s finish reporting failed with an error %s", + query_desc->sourceText, result.error_text().c_str()); + } else { + elog(DEBUG1, "Query %s finish successful", query_desc->sourceText); + } +} + +EventSender *EventSender::instance() { + static EventSender sender; + return &sender; +} + +EventSender::EventSender() { connector = std::make_unique(); } \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index bd02455ca7e..d69958db9b0 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -6,14 +6,13 @@ class GrpcConnector; struct QueryDesc; -class EventSender -{ +class EventSender { public: - void ExecutorStart(QueryDesc *query_desc, int eflags); - void ExecutorFinish(QueryDesc *query_desc); - static EventSender *instance(); + void ExecutorStart(QueryDesc *query_desc, int eflags); + void ExecutorFinish(QueryDesc *query_desc); + static EventSender *instance(); private: - EventSender(); - std::unique_ptr connector; + EventSender(); + std::unique_ptr connector; }; \ No newline at end of file diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 7329f392010..1a820404428 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -5,51 +5,43 @@ #include #include -class GrpcConnector::Impl -{ +class GrpcConnector::Impl { public: - Impl() - { - GOOGLE_PROTOBUF_VERIFY_VERSION; - this->stub = yagpcc::SetQueryInfo::NewStub(grpc::CreateChannel( - SOCKET_FILE, grpc::InsecureChannelCredentials())); + Impl() { + GOOGLE_PROTOBUF_VERIFY_VERSION; + this->stub = yagpcc::SetQueryInfo::NewStub( + grpc::CreateChannel(SOCKET_FILE, grpc::InsecureChannelCredentials())); + } + + yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) { + yagpcc::MetricResponse response; + grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::milliseconds(50); + context.set_deadline(deadline); + + grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); + + if (!status.ok()) { + response.set_error_text("Connection lost: " + status.error_message() + + "; " + status.error_details()); + response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); } - yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) - { - yagpcc::MetricResponse response; - grpc::ClientContext context; - auto deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(50); - context.set_deadline(deadline); - - grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); - - if (!status.ok()) - { - response.set_error_text("Connection lost: " + status.error_message() + "; " + status.error_details()); - response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); - } - - return response; - } + return response; + } private: - const std::string SOCKET_FILE = "unix:///tmp/yagpcc_agent.sock"; - const std::string TCP_ADDRESS = "127.0.0.1:1432"; - std::unique_ptr stub; + const std::string SOCKET_FILE = "unix:///tmp/yagpcc_agent.sock"; + const std::string TCP_ADDRESS = "127.0.0.1:1432"; + std::unique_ptr stub; }; -GrpcConnector::GrpcConnector() -{ - impl = new Impl(); -} +GrpcConnector::GrpcConnector() { impl = new Impl(); } -GrpcConnector::~GrpcConnector() -{ - delete impl; -} +GrpcConnector::~GrpcConnector() { delete impl; } -yagpcc::MetricResponse GrpcConnector::set_metric_query(yagpcc::SetQueryReq req) -{ - return impl->set_metric_query(req); +yagpcc::MetricResponse +GrpcConnector::set_metric_query(yagpcc::SetQueryReq req) { + return impl->set_metric_query(req); } \ No newline at end of file diff --git a/src/GrpcConnector.h b/src/GrpcConnector.h index dc0f21706a3..810c0bd3e15 100644 --- a/src/GrpcConnector.h +++ b/src/GrpcConnector.h @@ -2,14 +2,13 @@ #include "yagpcc_set_service.pb.h" -class GrpcConnector -{ +class GrpcConnector { public: - GrpcConnector(); - ~GrpcConnector(); - yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req); + GrpcConnector(); + ~GrpcConnector(); + yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req); private: - class Impl; - Impl *impl; + class Impl; + Impl *impl; }; \ No newline at end of file diff --git a/src/ProcStats.cpp b/src/ProcStats.cpp index 34c5d05719e..5c64f25ec09 100644 --- a/src/ProcStats.cpp +++ b/src/ProcStats.cpp @@ -4,116 +4,109 @@ #include #include -extern "C" -{ +extern "C" { #include "postgres.h" #include "utils/elog.h" } namespace { -#define FILL_IO_STAT(stat_name) \ - uint64_t stat_name; \ - proc_stat >> tmp >> stat_name; \ - stats->set_##stat_name(stat_name); +#define FILL_IO_STAT(stat_name) \ + uint64_t stat_name; \ + proc_stat >> tmp >> stat_name; \ + stats->set_##stat_name(stat_name); -void fill_io_stats(yagpcc::SystemStat *stats) -{ - std::ifstream proc_stat("/proc/self/io"); - std::string tmp; - FILL_IO_STAT(rchar); - FILL_IO_STAT(wchar); - FILL_IO_STAT(syscr); - FILL_IO_STAT(syscw); - FILL_IO_STAT(read_bytes); - FILL_IO_STAT(write_bytes); - FILL_IO_STAT(cancelled_write_bytes); +void fill_io_stats(yagpcc::SystemStat *stats) { + std::ifstream proc_stat("/proc/self/io"); + std::string tmp; + FILL_IO_STAT(rchar); + FILL_IO_STAT(wchar); + FILL_IO_STAT(syscr); + FILL_IO_STAT(syscw); + FILL_IO_STAT(read_bytes); + FILL_IO_STAT(write_bytes); + FILL_IO_STAT(cancelled_write_bytes); } -void fill_cpu_stats(yagpcc::SystemStat *stats) -{ - static const int UTIME_ID = 13; - static const int STIME_ID = 14; - static const int STARTTIME_ID = 21; - static const int VSIZE_ID = 22; - static const int RSS_ID = 23; - static const double tps = sysconf(_SC_CLK_TCK); +void fill_cpu_stats(yagpcc::SystemStat *stats) { + static const int UTIME_ID = 13; + static const int STIME_ID = 14; + static const int STARTTIME_ID = 21; + static const int VSIZE_ID = 22; + static const int RSS_ID = 23; + static const double tps = sysconf(_SC_CLK_TCK); - double uptime; - { - std::ifstream proc_stat("/proc/uptime"); - proc_stat >> uptime; - } + double uptime; + { + std::ifstream proc_stat("/proc/uptime"); + proc_stat >> uptime; + } - std::ifstream proc_stat("/proc/self/stat"); - std::string trash; - double start_time = 0; - for (int i = 0; i <= RSS_ID; ++i) - { - switch (i) - { - case UTIME_ID: - double utime; - proc_stat >> utime; - stats->set_usertimeseconds(utime / tps); - break; - case STIME_ID: - double stime; - proc_stat >> stime; - stats->set_kerneltimeseconds(stime / tps); - break; - case STARTTIME_ID: - uint64_t starttime; - proc_stat >> starttime; - start_time = static_cast(starttime) / tps; - break; - case VSIZE_ID: - uint64_t vsize; - proc_stat >> vsize; - stats->set_vsize(vsize); - break; - case RSS_ID: - uint64_t rss; - proc_stat >> rss; - // NOTE: this is a double AFAIU, need to double-check - stats->set_rss(rss); - break; - default: - proc_stat >> trash; - } - stats->set_runningtimeseconds(uptime - start_time); + std::ifstream proc_stat("/proc/self/stat"); + std::string trash; + double start_time = 0; + for (int i = 0; i <= RSS_ID; ++i) { + switch (i) { + case UTIME_ID: + double utime; + proc_stat >> utime; + stats->set_usertimeseconds(utime / tps); + break; + case STIME_ID: + double stime; + proc_stat >> stime; + stats->set_kerneltimeseconds(stime / tps); + break; + case STARTTIME_ID: + uint64_t starttime; + proc_stat >> starttime; + start_time = static_cast(starttime) / tps; + break; + case VSIZE_ID: + uint64_t vsize; + proc_stat >> vsize; + stats->set_vsize(vsize); + break; + case RSS_ID: + uint64_t rss; + proc_stat >> rss; + // NOTE: this is a double AFAIU, need to double-check + stats->set_rss(rss); + break; + default: + proc_stat >> trash; } + stats->set_runningtimeseconds(uptime - start_time); + } } -void fill_status_stats(yagpcc::SystemStat *stats) -{ - std::ifstream proc_stat("/proc/self/status"); - std::string key, measure; - while (proc_stat >> key) - { - if (key == "VmPeak:") - { - uint64_t value; - proc_stat >> value; - stats->set_vmpeakkb(value); - proc_stat >> measure; - if (measure != "kB") - elog(FATAL, "Expected memory sizes in kB, but got in %s", measure.c_str()); - } - else if (key == "VmSize:") - { - uint64_t value; - proc_stat >> value; - stats->set_vmsizekb(value); - if (measure != "kB") - elog(FATAL, "Expected memory sizes in kB, but got in %s", measure.c_str()); - } +void fill_status_stats(yagpcc::SystemStat *stats) { + std::ifstream proc_stat("/proc/self/status"); + std::string key, measure; + while (proc_stat >> key) { + if (key == "VmPeak:") { + uint64_t value; + proc_stat >> value; + stats->set_vmpeakkb(value); + proc_stat >> measure; + if (measure != "kB") { + elog(FATAL, "Expected memory sizes in kB, but got in %s", + measure.c_str()); + } + } else if (key == "VmSize:") { + uint64_t value; + proc_stat >> value; + stats->set_vmsizekb(value); + if (measure != "kB") { + elog(FATAL, "Expected memory sizes in kB, but got in %s", + measure.c_str()); + } } + } } } // namespace -void fill_self_stats(yagpcc::SystemStat *stats) -{ - fill_io_stats(stats); - fill_cpu_stats(stats); - fill_status_stats(stats); +void fill_self_stats(yagpcc::SystemStat *stats) { + fill_io_stats(stats); + fill_cpu_stats(stats); + fill_status_stats(stats); } \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 1dabb59ab3f..739cca80f01 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -1,8 +1,7 @@ #include "hook_wrappers.h" #include "EventSender.h" -extern "C" -{ +extern "C" { #include "postgres.h" #include "utils/metrics_utils.h" #include "utils/elog.h" @@ -22,56 +21,48 @@ static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); static void ya_ExecutorFinish_hook(QueryDesc *query_desc); -#define REPLACE_HOOK(hookName) \ - previous_##hookName = hookName; \ - hookName = ya_##hookName; +#define REPLACE_HOOK(hookName) \ + previous_##hookName = hookName; \ + hookName = ya_##hookName; -void hooks_init() -{ - REPLACE_HOOK(ExecutorStart_hook); - REPLACE_HOOK(ExecutorFinish_hook); - stat_statements_parser_init(); +void hooks_init() { + REPLACE_HOOK(ExecutorStart_hook); + REPLACE_HOOK(ExecutorFinish_hook); + stat_statements_parser_init(); } -void hooks_deinit() -{ - ExecutorStart_hook = previous_ExecutorStart_hook; - ExecutorFinish_hook = ExecutorFinish_hook; - stat_statements_parser_deinit(); +void hooks_deinit() { + ExecutorStart_hook = previous_ExecutorStart_hook; + ExecutorFinish_hook = previous_ExecutorFinish_hook; + stat_statements_parser_deinit(); } -#define CREATE_HOOK_WRAPPER(hookName, ...) \ - PG_TRY(); \ - { \ - EventSender::instance()->hookName(__VA_ARGS__); \ - } \ - PG_CATCH(); \ - { \ - ereport(WARNING, (errmsg("EventSender failed in %s", #hookName))); \ - PG_RE_THROW(); \ - } \ - PG_END_TRY(); \ - if (previous_##hookName##_hook) \ - (*previous_##hookName##_hook)(__VA_ARGS__); \ - else \ - standard_##hookName(__VA_ARGS__); +#define CREATE_HOOK_WRAPPER(hookName, ...) \ + PG_TRY(); \ + { EventSender::instance()->hookName(__VA_ARGS__); } \ + PG_CATCH(); \ + { \ + ereport(WARNING, (errmsg("EventSender failed in %s", #hookName))); \ + PG_RE_THROW(); \ + } \ + PG_END_TRY(); \ + if (previous_##hookName##_hook) \ + (*previous_##hookName##_hook)(__VA_ARGS__); \ + else \ + standard_##hookName(__VA_ARGS__); -void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) -{ - CREATE_HOOK_WRAPPER(ExecutorStart, query_desc, eflags); - PG_TRY(); - { - EventSender::instance()->ExecutorStart(query_desc, eflags); - } - PG_CATCH(); - { - ereport(WARNING, (errmsg("EventSender failed in ExecutorStart afterhook"))); - PG_RE_THROW(); - } - PG_END_TRY(); +void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { + CREATE_HOOK_WRAPPER(ExecutorStart, query_desc, eflags); + PG_TRY(); + { EventSender::instance()->ExecutorStart(query_desc, eflags); } + PG_CATCH(); + { + ereport(WARNING, (errmsg("EventSender failed in ExecutorStart afterhook"))); + PG_RE_THROW(); + } + PG_END_TRY(); } -void ya_ExecutorFinish_hook(QueryDesc *query_desc) -{ - CREATE_HOOK_WRAPPER(ExecutorFinish, query_desc); +void ya_ExecutorFinish_hook(QueryDesc *query_desc) { + CREATE_HOOK_WRAPPER(ExecutorFinish, query_desc); } \ No newline at end of file From 76cb5539004423d2e02c6df65a469f0337dcc778 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 10 Apr 2023 16:01:08 +0300 Subject: [PATCH 05/49] [yagp_hooks_collector] Switch to query_info_collect_hook and fix stability Use query_info_collect_hook for finer-grained lifecycle tracking. Fix two segfaults in early init paths. Skip hooks in UTILITY mode. General robustness improvements. --- protos/yagpcc_set_service.proto | 7 +- src/EventSender.cpp | 207 ++++++++++++------ src/EventSender.h | 13 +- src/GrpcConnector.cpp | 4 +- src/GrpcConnector.h | 2 +- src/hook_wrappers.cpp | 65 +++--- .../pg_stat_statements_ya_parser.c | 21 ++ 7 files changed, 206 insertions(+), 113 deletions(-) diff --git a/protos/yagpcc_set_service.proto b/protos/yagpcc_set_service.proto index 97c5691a6f5..93c2f5a01d1 100644 --- a/protos/yagpcc_set_service.proto +++ b/protos/yagpcc_set_service.proto @@ -30,9 +30,10 @@ message SetQueryReq { QueryStatus query_status = 1; google.protobuf.Timestamp datetime = 2; QueryKey query_key = 3; - QueryInfo query_info = 4; - GPMetrics query_metrics = 5; - repeated MetricPlan plan_tree = 6; + SegmentKey segment_key = 4; + QueryInfo query_info = 5; + GPMetrics query_metrics = 6; + repeated MetricPlan plan_tree = 7; } message SetPlanNodeReq { diff --git a/src/EventSender.cpp b/src/EventSender.cpp index b7c3cd70b85..5ab6bbd60df 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,29 +1,30 @@ -#include "EventSender.h" #include "GrpcConnector.h" #include "ProcStats.h" -#include "protos/yagpcc_set_service.pb.h" #include extern "C" { #include "postgres.h" + #include "access/hash.h" -#include "utils/metrics_utils.h" -#include "utils/elog.h" -#include "executor/executor.h" -#include "commands/explain.h" #include "commands/dbcommands.h" +#include "commands/explain.h" #include "commands/resgroupcmds.h" +#include "executor/executor.h" +#include "utils/elog.h" +#include "utils/metrics_utils.h" -#include "cdb/cdbvars.h" #include "cdb/cdbexplain.h" +#include "cdb/cdbvars.h" +#include "stat_statements_parser/pg_stat_statements_ya_parser.h" #include "tcop/utility.h" -#include "pg_stat_statements_ya_parser.h" void get_spill_info(int ssid, int ccid, int32_t *file_count, int64_t *total_bytes); } +#include "EventSender.h" + namespace { std::string *get_user_name() { @@ -102,90 +103,152 @@ void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { pfree(norm_query); } -void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { - if (query_desc->sourceText) { - set_query_text(qi, query_desc); +void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc, + bool with_text, bool with_plan) { + if (Gp_session_role == GP_ROLE_DISPATCH) { + if (query_desc->sourceText && with_text) { + set_query_text(qi, query_desc); + } + if (query_desc->plannedstmt && with_plan) { + set_query_plan(qi, query_desc); + qi->set_query_id(query_desc->plannedstmt->queryId); + } + qi->set_allocated_username(get_user_name()); + qi->set_allocated_databasename(get_db_name()); } - if (query_desc->plannedstmt) { - set_query_plan(qi, query_desc); - qi->set_query_id(query_desc->plannedstmt->queryId); - } - qi->set_allocated_username(get_user_name()); - qi->set_allocated_databasename(get_db_name()); } void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, QueryDesc *query_desc) { auto instrument = query_desc->planstate->instrument; - metrics->set_ntuples(instrument->ntuples); - metrics->set_nloops(instrument->nloops); - metrics->set_tuplecount(instrument->tuplecount); - metrics->set_firsttuple(instrument->firsttuple); - metrics->set_startup(instrument->startup); - metrics->set_total(instrument->total); - auto &buffusage = instrument->bufusage; - metrics->set_shared_blks_hit(buffusage.shared_blks_hit); - metrics->set_shared_blks_read(buffusage.shared_blks_read); - metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); - metrics->set_shared_blks_written(buffusage.shared_blks_written); - metrics->set_local_blks_hit(buffusage.local_blks_hit); - metrics->set_local_blks_read(buffusage.local_blks_read); - metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); - metrics->set_local_blks_written(buffusage.local_blks_written); - metrics->set_temp_blks_read(buffusage.temp_blks_read); - metrics->set_temp_blks_written(buffusage.temp_blks_written); - metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); - metrics->set_blk_write_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); -} - -void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc) { - int32_t n_spill_files = 0; - int64_t n_spill_bytes = 0; - get_spill_info(gp_session_id, gp_command_count, &n_spill_files, - &n_spill_bytes); - metrics->mutable_spill()->set_filecount(n_spill_files); - metrics->mutable_spill()->set_totalbytes(n_spill_bytes); - if (query_desc->planstate->instrument) { + if (instrument) { + metrics->set_ntuples(instrument->ntuples); + metrics->set_nloops(instrument->nloops); + metrics->set_tuplecount(instrument->tuplecount); + metrics->set_firsttuple(instrument->firsttuple); + metrics->set_startup(instrument->startup); + metrics->set_total(instrument->total); + auto &buffusage = instrument->bufusage; + metrics->set_shared_blks_hit(buffusage.shared_blks_hit); + metrics->set_shared_blks_read(buffusage.shared_blks_read); + metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); + metrics->set_shared_blks_written(buffusage.shared_blks_written); + metrics->set_local_blks_hit(buffusage.local_blks_hit); + metrics->set_local_blks_read(buffusage.local_blks_read); + metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); + metrics->set_local_blks_written(buffusage.local_blks_written); + metrics->set_temp_blks_read(buffusage.temp_blks_read); + metrics->set_temp_blks_written(buffusage.temp_blks_written); + metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); + metrics->set_blk_write_time( + INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); + } +} + +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, + bool need_spillinfo) { + if (need_spillinfo) { + int32_t n_spill_files = 0; + int64_t n_spill_bytes = 0; + get_spill_info(gp_session_id, gp_command_count, &n_spill_files, + &n_spill_bytes); + metrics->mutable_spill()->set_filecount(n_spill_files); + metrics->mutable_spill()->set_totalbytes(n_spill_bytes); + } + if (query_desc->planstate && query_desc->planstate->instrument) { set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); } fill_self_stats(metrics->mutable_systemstat()); } +yagpcc::SetQueryReq create_query_req(QueryDesc *query_desc, + yagpcc::QueryStatus status) { + yagpcc::SetQueryReq req; + req.set_query_status(status); + *req.mutable_datetime() = current_ts(); + set_query_key(req.mutable_query_key(), query_desc); + set_segment_key(req.mutable_segment_key(), query_desc); + return req; +} + } // namespace -void EventSender::ExecutorStart(QueryDesc *query_desc, int /* eflags*/) { +void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { + if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + return; + } + switch (status) { + case METRICS_PLAN_NODE_INITIALIZE: + case METRICS_PLAN_NODE_EXECUTING: + case METRICS_PLAN_NODE_FINISHED: + // TODO + break; + case METRICS_QUERY_SUBMIT: + collect_query_submit(reinterpret_cast(arg)); + break; + case METRICS_QUERY_START: + // no-op: executor_after_start is enough + break; + case METRICS_QUERY_DONE: + collect_query_done(reinterpret_cast(arg), "done"); + break; + case METRICS_QUERY_ERROR: + collect_query_done(reinterpret_cast(arg), "error"); + break; + case METRICS_QUERY_CANCELING: + collect_query_done(reinterpret_cast(arg), "calcelling"); + break; + case METRICS_QUERY_CANCELED: + collect_query_done(reinterpret_cast(arg), "cancelled"); + break; + case METRICS_INNER_QUERY_DONE: + // TODO + break; + default: + elog(FATAL, "Unknown query status: %d", status); + } +} + +void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { + if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + return; + } + auto req = + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); + set_query_info(req.mutable_query_info(), query_desc, false, true); + send_query_info(&req, "started"); +} + +void EventSender::collect_query_submit(QueryDesc *query_desc) { query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; query_desc->instrument_options |= INSTRUMENT_TIMER; - elog(DEBUG1, "Query %s start recording", query_desc->sourceText); - yagpcc::SetQueryReq req; - req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); - *req.mutable_datetime() = current_ts(); - set_query_key(req.mutable_query_key(), query_desc); - auto result = connector->set_metric_query(req); - if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { - elog(WARNING, "Query %s start reporting failed with an error %s", - query_desc->sourceText, result.error_text().c_str()); - } else { - elog(DEBUG1, "Query %s start successful", query_desc->sourceText); - } + auto req = + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); + set_query_info(req.mutable_query_info(), query_desc, true, false); + send_query_info(&req, "submit"); } -void EventSender::ExecutorFinish(QueryDesc *query_desc) { - elog(DEBUG1, "Query %s finish recording", query_desc->sourceText); - yagpcc::SetQueryReq req; - req.set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); - *req.mutable_datetime() = current_ts(); - set_query_key(req.mutable_query_key(), query_desc); - set_query_info(req.mutable_query_info(), query_desc); - set_gp_metrics(req.mutable_query_metrics(), query_desc); - auto result = connector->set_metric_query(req); +void EventSender::collect_query_done(QueryDesc *query_desc, + const std::string &status) { + auto req = + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); + set_query_info(req.mutable_query_info(), query_desc, false, false); + // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to gather + // it here. It only makes sense when doing regular stat checks. + set_gp_metrics(req.mutable_query_metrics(), query_desc, + /*need_spillinfo*/ false); + send_query_info(&req, status); +} + +void EventSender::send_query_info(yagpcc::SetQueryReq *req, + const std::string &event) { + auto result = connector->set_metric_query(*req); if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { - elog(WARNING, "Query %s finish reporting failed with an error %s", - query_desc->sourceText, result.error_text().c_str()); - } else { - elog(DEBUG1, "Query %s finish successful", query_desc->sourceText); + elog(WARNING, "Query {%d-%d-%d} %s reporting failed with an error %s", + req->query_key().tmid(), req->query_key().ssid(), + req->query_key().ccnt(), event.c_str(), result.error_text().c_str()); } } diff --git a/src/EventSender.h b/src/EventSender.h index d69958db9b0..9c574cba9a1 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -1,18 +1,25 @@ #pragma once #include +#include class GrpcConnector; - struct QueryDesc; +namespace yagpcc { +class SetQueryReq; +} class EventSender { public: - void ExecutorStart(QueryDesc *query_desc, int eflags); - void ExecutorFinish(QueryDesc *query_desc); + void executor_after_start(QueryDesc *query_desc, int eflags); + void query_metrics_collect(QueryMetricsStatus status, void *arg); static EventSender *instance(); private: + void collect_query_submit(QueryDesc *query_desc); + void collect_query_done(QueryDesc *query_desc, const std::string &status); + EventSender(); + void send_query_info(yagpcc::SetQueryReq *req, const std::string &event); std::unique_ptr connector; }; \ No newline at end of file diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 1a820404428..bca1acd9ce2 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -16,8 +16,10 @@ class GrpcConnector::Impl { yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) { yagpcc::MetricResponse response; grpc::ClientContext context; + // TODO: find a more secure way to send messages than relying on a fixed + // timeout auto deadline = - std::chrono::system_clock::now() + std::chrono::milliseconds(50); + std::chrono::system_clock::now() + std::chrono::milliseconds(200); context.set_deadline(deadline); grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); diff --git a/src/GrpcConnector.h b/src/GrpcConnector.h index 810c0bd3e15..4fca6960a4e 100644 --- a/src/GrpcConnector.h +++ b/src/GrpcConnector.h @@ -1,6 +1,6 @@ #pragma once -#include "yagpcc_set_service.pb.h" +#include "protos/yagpcc_set_service.pb.h" class GrpcConnector { public: diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 739cca80f01..be39c953970 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -1,6 +1,3 @@ -#include "hook_wrappers.h" -#include "EventSender.h" - extern "C" { #include "postgres.h" #include "utils/metrics_utils.h" @@ -14,55 +11,57 @@ extern "C" { } #include "stat_statements_parser/pg_stat_statements_ya_parser.h" +#include "hook_wrappers.h" +#include "EventSender.h" static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr; -static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; - -static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); -static void ya_ExecutorFinish_hook(QueryDesc *query_desc); +static query_info_collect_hook_type previous_query_info_collect_hook = nullptr; -#define REPLACE_HOOK(hookName) \ - previous_##hookName = hookName; \ - hookName = ya_##hookName; +static void ya_ExecutorAfterStart_hook(QueryDesc *query_desc, int eflags); +static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); void hooks_init() { - REPLACE_HOOK(ExecutorStart_hook); - REPLACE_HOOK(ExecutorFinish_hook); + previous_ExecutorStart_hook = ExecutorStart_hook; + ExecutorStart_hook = ya_ExecutorAfterStart_hook; + previous_query_info_collect_hook = query_info_collect_hook; + query_info_collect_hook = ya_query_info_collect_hook; stat_statements_parser_init(); } void hooks_deinit() { ExecutorStart_hook = previous_ExecutorStart_hook; - ExecutorFinish_hook = previous_ExecutorFinish_hook; + query_info_collect_hook = previous_query_info_collect_hook; stat_statements_parser_deinit(); } -#define CREATE_HOOK_WRAPPER(hookName, ...) \ - PG_TRY(); \ - { EventSender::instance()->hookName(__VA_ARGS__); } \ - PG_CATCH(); \ - { \ - ereport(WARNING, (errmsg("EventSender failed in %s", #hookName))); \ - PG_RE_THROW(); \ - } \ - PG_END_TRY(); \ - if (previous_##hookName##_hook) \ - (*previous_##hookName##_hook)(__VA_ARGS__); \ - else \ - standard_##hookName(__VA_ARGS__); - -void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { - CREATE_HOOK_WRAPPER(ExecutorStart, query_desc, eflags); +void ya_ExecutorAfterStart_hook(QueryDesc *query_desc, int eflags) { + if (previous_ExecutorStart_hook) { + (*previous_ExecutorStart_hook)(query_desc, eflags); + } else { + standard_ExecutorStart(query_desc, eflags); + } PG_TRY(); - { EventSender::instance()->ExecutorStart(query_desc, eflags); } + { EventSender::instance()->executor_after_start(query_desc, eflags); } PG_CATCH(); { - ereport(WARNING, (errmsg("EventSender failed in ExecutorStart afterhook"))); + ereport(WARNING, + (errmsg("EventSender failed in ya_ExecutorAfterStart_hook"))); PG_RE_THROW(); } PG_END_TRY(); } -void ya_ExecutorFinish_hook(QueryDesc *query_desc) { - CREATE_HOOK_WRAPPER(ExecutorFinish, query_desc); +void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { + PG_TRY(); + { EventSender::instance()->query_metrics_collect(status, arg); } + PG_CATCH(); + { + ereport(WARNING, + (errmsg("EventSender failed in ya_query_info_collect_hook"))); + PG_RE_THROW(); + } + PG_END_TRY(); + if (previous_query_info_collect_hook) { + (*previous_query_info_collect_hook)(status, arg); + } } \ No newline at end of file diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/src/stat_statements_parser/pg_stat_statements_ya_parser.c index ae79e7dc40a..737e77745df 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -205,6 +205,13 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable) APP_JUMB_STRING(rte->ctename); APP_JUMB(rte->ctelevelsup); break; + /* GPDB RTEs */ + case RTE_VOID: + break; + case RTE_TABLEFUNCTION: + JumbleQuery(jstate, rte->subquery); + JumbleExpr(jstate, (Node *)rte->functions); + break; default: elog(ERROR, "unrecognized RTE kind: %d", (int)rte->rtekind); break; @@ -609,6 +616,20 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) JumbleExpr(jstate, rtfunc->funcexpr); } break; + /* GPDB nodes */ + case T_GroupingFunc: + { + GroupingFunc *grpnode = (GroupingFunc *)node; + + JumbleExpr(jstate, (Node *)grpnode->args); + } + break; + case T_Grouping: + case T_GroupId: + case T_Integer: + case T_Value: + // TODO: no idea what to do with those + break; default: /* Only a warning, since we can stumble along anyway */ elog(WARNING, "unrecognized node type: %d", From ac9a4eabb2b5057cec9e51409808a905797fe1a5 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 1 May 2023 18:44:53 +0300 Subject: [PATCH 06/49] [yagp_hooks_collector] Add debian packaging and bionic GRPC compatibility --- debian/compat | 1 + debian/control | 11 +++++++++++ debian/postinst | 8 ++++++++ debian/rules | 10 ++++++++++ src/GrpcConnector.cpp | 4 ++-- 5 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 debian/compat create mode 100644 debian/control create mode 100644 debian/postinst create mode 100644 debian/rules diff --git a/debian/compat b/debian/compat new file mode 100644 index 00000000000..ec635144f60 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +9 diff --git a/debian/control b/debian/control new file mode 100644 index 00000000000..600dd4d602e --- /dev/null +++ b/debian/control @@ -0,0 +1,11 @@ +Source: greenplum-6-yagpcc-hooks-collector-1 +Section: misc +Priority: optional +Maintainer: Maxim Smyatkin +Build-Depends: make, gcc, g++, debhelper (>=9), greenplum-db-6 (>=6.19.3), protobuf-compiler, protobuf-compiler-grpc +Standards-Version: 3.9.8 + +Package: greenplum-6-yagpcc-hooks-collector-1 +Architecture: any +Depends: ${misc:Depends}, ${shlibs:Depends}, greenplum-db-6 (>=6.19.3) +Description: Greenplum extension to send query execution metrics to yandex command center agent diff --git a/debian/postinst b/debian/postinst new file mode 100644 index 00000000000..27ddfc06a7d --- /dev/null +++ b/debian/postinst @@ -0,0 +1,8 @@ +#!/bin/bash + +set -e + +GPADMIN=gpadmin +GPHOME=/opt/greenplum-db-6 + +chown -R ${GPADMIN}:${GPADMIN} ${GPHOME} diff --git a/debian/rules b/debian/rules new file mode 100644 index 00000000000..6c2c7491067 --- /dev/null +++ b/debian/rules @@ -0,0 +1,10 @@ +#!/usr/bin/make -f +# You must remove unused comment lines for the released package. +export DH_VERBOSE = 1 + + +export GPHOME := /opt/greenplum-db-6 +export PATH := $(GPHOME)/bin:$(PATH) + +%: + dh $@ diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index bca1acd9ce2..5a24d576de1 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -1,8 +1,8 @@ #include "GrpcConnector.h" #include "yagpcc_set_service.grpc.pb.h" -#include -#include +#include +#include #include class GrpcConnector::Impl { From 96c005bad89dfe2166ab9cbb3fc8c4e600d06da9 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 4 May 2023 14:34:42 +0300 Subject: [PATCH 07/49] [yagp_hooks_collector] Add CDB metrics, query nesting, and configuration GUCs Add missing Greenplum node types to pg_stat_statements parser. Move stats reporting to ExecutorEnd hook. Improve GRPC failure handling. Track CDB-specific metrics and initial query nesting level. Add resource group collection. Add GUCs for controlling collection. Skip nested and utility statements by default. --- debian/control | 6 +- protos/yagpcc_metrics.proto | 1 + src/Config.cpp | 38 ++++++ src/Config.h | 12 ++ src/EventSender.cpp | 112 ++++++++++++++---- src/EventSender.h | 6 + src/GrpcConnector.cpp | 66 +++++++++-- src/hook_wrappers.cpp | 93 +++++++++++++-- .../pg_stat_statements_ya_parser.c | 29 ++++- 9 files changed, 318 insertions(+), 45 deletions(-) create mode 100644 src/Config.cpp create mode 100644 src/Config.h diff --git a/debian/control b/debian/control index 600dd4d602e..c740a8590ca 100644 --- a/debian/control +++ b/debian/control @@ -1,11 +1,11 @@ -Source: greenplum-6-yagpcc-hooks-collector-1 +Source: greenplum-6-yagpcc-hooks Section: misc Priority: optional Maintainer: Maxim Smyatkin -Build-Depends: make, gcc, g++, debhelper (>=9), greenplum-db-6 (>=6.19.3), protobuf-compiler, protobuf-compiler-grpc +Build-Depends: make, gcc, g++, debhelper (>=9), greenplum-db-6 (>=6.19.3), protobuf-compiler, protobuf-compiler-grpc, libgrpc++1, libgrpc++-dev Standards-Version: 3.9.8 -Package: greenplum-6-yagpcc-hooks-collector-1 +Package: greenplum-6-yagpcc-hooks Architecture: any Depends: ${misc:Depends}, ${shlibs:Depends}, greenplum-db-6 (>=6.19.3) Description: Greenplum extension to send query execution metrics to yandex command center agent diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index f00f329a208..26e0a496460 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -33,6 +33,7 @@ message QueryInfo { string temlate_plan_text = 7; string userName = 8; string databaseName = 9; + string rsgname = 10; } enum PlanGenerator diff --git a/src/Config.cpp b/src/Config.cpp new file mode 100644 index 00000000000..d97e5d45984 --- /dev/null +++ b/src/Config.cpp @@ -0,0 +1,38 @@ +#include "Config.h" + +extern "C" { +#include "postgres.h" +#include "utils/builtins.h" +#include "utils/guc.h" +} + +static char *guc_uds_path = nullptr; +static bool guc_enable_analyze = true; +static bool guc_enable_cdbstats = true; +static bool guc_enable_collector = true; + +void Config::init() { + DefineCustomStringVariable( + "yagpcc.uds_path", "Sets filesystem path of the agent socket", 0LL, + &guc_uds_path, "/tmp/yagpcc_agent.sock", PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable( + "yagpcc.enable", "Enable metrics collector", 0LL, &guc_enable_collector, + true, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable( + "yagpcc.enable_analyze", "Collect analyze metrics in yagpcc", 0LL, + &guc_enable_analyze, true, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable( + "yagpcc.enable_cdbstats", "Collect CDB metrics in yagpcc", 0LL, + &guc_enable_cdbstats, true, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); +} + +std::string Config::uds_path() { return guc_uds_path; } +bool Config::enable_analyze() { return guc_enable_analyze; } +bool Config::enable_cdbstats() { return guc_enable_cdbstats; } +bool Config::enable_collector() { return guc_enable_collector; } diff --git a/src/Config.h b/src/Config.h new file mode 100644 index 00000000000..117481f219b --- /dev/null +++ b/src/Config.h @@ -0,0 +1,12 @@ +#pragma once + +#include + +class Config { +public: + static void init(); + static std::string uds_path(); + static bool enable_analyze(); + static bool enable_cdbstats(); + static bool enable_collector(); +}; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 5ab6bbd60df..55858ed5183 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,3 +1,4 @@ +#include "Config.h" #include "GrpcConnector.h" #include "ProcStats.h" #include @@ -13,6 +14,7 @@ extern "C" { #include "utils/elog.h" #include "utils/metrics_utils.h" +#include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" @@ -25,6 +27,10 @@ void get_spill_info(int ssid, int ccid, int32_t *file_count, #include "EventSender.h" +#define need_collect() \ + (nesting_level == 0 && gp_command_count != 0 && \ + query_desc->sourceText != nullptr && Config::enable_collector()) + namespace { std::string *get_user_name() { @@ -39,6 +45,21 @@ std::string *get_db_name() { return result; } +std::string *get_rg_name() { + auto userId = GetUserId(); + if (!OidIsValid(userId)) + return nullptr; + auto groupId = GetResGroupIdForRole(userId); + if (!OidIsValid(groupId)) + return nullptr; + char *rgname = GetResGroupNameForId(groupId); + if (rgname == nullptr) + return nullptr; + auto result = new std::string(rgname); + pfree(rgname); + return result; +} + int get_cur_slice_id(QueryDesc *desc) { if (!desc->estate) { return 0; @@ -103,9 +124,10 @@ void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { pfree(norm_query); } -void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc, +void set_query_info(yagpcc::SetQueryReq *req, QueryDesc *query_desc, bool with_text, bool with_plan) { if (Gp_session_role == GP_ROLE_DISPATCH) { + auto qi = req->mutable_query_info(); if (query_desc->sourceText && with_text) { set_query_text(qi, query_desc); } @@ -115,6 +137,7 @@ void set_query_info(yagpcc::QueryInfo *qi, QueryDesc *query_desc, } qi->set_allocated_username(get_user_name()); qi->set_allocated_databasename(get_db_name()); + qi->set_allocated_rsgname(get_rg_name()); } } @@ -209,37 +232,79 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { } } +void EventSender::executor_before_start(QueryDesc *query_desc, + int /* eflags*/) { + if (Gp_role == GP_ROLE_DISPATCH && need_collect() && + Config::enable_analyze()) { + instr_time starttime; + query_desc->instrument_options |= INSTRUMENT_BUFFERS; + query_desc->instrument_options |= INSTRUMENT_ROWS; + query_desc->instrument_options |= INSTRUMENT_TIMER; + if (Config::enable_cdbstats()) { + query_desc->instrument_options |= INSTRUMENT_CDB; + + // TODO: there is a PR resolving some memory leak around auto-explain: + // https://github.com/greenplum-db/gpdb/pull/15164 + // Need to check if the memory leak applies here as well and fix it + Assert(query_desc->showstatctx == NULL); + INSTR_TIME_SET_CURRENT(starttime); + query_desc->showstatctx = + cdbexplain_showExecStatsBegin(query_desc, starttime); + } + } +} + void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { - if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + if ((Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) && + need_collect()) { + auto req = + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); + set_query_info(&req, query_desc, false, true); + send_query_info(&req, "started"); + } +} + +void EventSender::executor_end(QueryDesc *query_desc) { + if (!need_collect() || + (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE)) { return; } + if (query_desc->totaltime && Config::enable_analyze() && + Config::enable_cdbstats()) { + if (query_desc->estate->dispatcherState && + query_desc->estate->dispatcherState->primaryResults) { + cdbdisp_checkDispatchResult(query_desc->estate->dispatcherState, + DISPATCH_WAIT_NONE); + } + InstrEndLoop(query_desc->totaltime); + } auto req = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); - set_query_info(req.mutable_query_info(), query_desc, false, true); - send_query_info(&req, "started"); + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_END); + set_query_info(&req, query_desc, false, false); + // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to + // gather it here. It only makes sense when doing regular stat checks. + set_gp_metrics(req.mutable_query_metrics(), query_desc, + /*need_spillinfo*/ false); + send_query_info(&req, "ended"); } void EventSender::collect_query_submit(QueryDesc *query_desc) { - query_desc->instrument_options |= INSTRUMENT_BUFFERS; - query_desc->instrument_options |= INSTRUMENT_ROWS; - query_desc->instrument_options |= INSTRUMENT_TIMER; - - auto req = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); - set_query_info(req.mutable_query_info(), query_desc, true, false); - send_query_info(&req, "submit"); + if (need_collect()) { + auto req = + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); + set_query_info(&req, query_desc, true, false); + send_query_info(&req, "submit"); + } } void EventSender::collect_query_done(QueryDesc *query_desc, const std::string &status) { - auto req = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); - set_query_info(req.mutable_query_info(), query_desc, false, false); - // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to gather - // it here. It only makes sense when doing regular stat checks. - set_gp_metrics(req.mutable_query_metrics(), query_desc, - /*need_spillinfo*/ false); - send_query_info(&req, status); + if (need_collect()) { + auto req = + create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); + set_query_info(&req, query_desc, false, false); + send_query_info(&req, status); + } } void EventSender::send_query_info(yagpcc::SetQueryReq *req, @@ -257,4 +322,7 @@ EventSender *EventSender::instance() { return &sender; } -EventSender::EventSender() { connector = std::make_unique(); } \ No newline at end of file +EventSender::EventSender() { + Config::init(); + connector = std::make_unique(); +} \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index 9c574cba9a1..9e2ef992f81 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -11,8 +11,12 @@ class SetQueryReq; class EventSender { public: + void executor_before_start(QueryDesc *query_desc, int eflags); void executor_after_start(QueryDesc *query_desc, int eflags); + void executor_end(QueryDesc *query_desc); void query_metrics_collect(QueryMetricsStatus status, void *arg); + void incr_depth() { nesting_level++; } + void decr_depth() { nesting_level--; } static EventSender *instance(); private: @@ -22,4 +26,6 @@ class EventSender { EventSender(); void send_query_info(yagpcc::SetQueryReq *req, const std::string &event); std::unique_ptr connector; + + int nesting_level = 0; }; \ No newline at end of file diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 5a24d576de1..276c9ceb8a8 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -1,42 +1,86 @@ #include "GrpcConnector.h" +#include "Config.h" #include "yagpcc_set_service.grpc.pb.h" -#include +#include +#include #include +#include +#include #include +#include + +extern "C" { +#include "postgres.h" +#include "cdb/cdbvars.h" +} class GrpcConnector::Impl { public: - Impl() { + Impl() : SOCKET_FILE("unix://" + Config::uds_path()) { GOOGLE_PROTOBUF_VERIFY_VERSION; - this->stub = yagpcc::SetQueryInfo::NewStub( - grpc::CreateChannel(SOCKET_FILE, grpc::InsecureChannelCredentials())); + channel = + grpc::CreateChannel(SOCKET_FILE, grpc::InsecureChannelCredentials()); + stub = yagpcc::SetQueryInfo::NewStub(channel); + connected = true; + done = false; + reconnect_thread = std::thread(&Impl::reconnect, this); + } + + ~Impl() { + done = true; + cv.notify_one(); + reconnect_thread.join(); } yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) { yagpcc::MetricResponse response; + if (!connected) { + response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); + response.set_error_text( + "Not tracing this query connection to agent has been lost"); + return response; + } grpc::ClientContext context; - // TODO: find a more secure way to send messages than relying on a fixed - // timeout + int timeout = Gp_role == GP_ROLE_DISPATCH ? 500 : 250; auto deadline = - std::chrono::system_clock::now() + std::chrono::milliseconds(200); + std::chrono::system_clock::now() + std::chrono::milliseconds(timeout); context.set_deadline(deadline); - grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); - if (!status.ok()) { response.set_error_text("Connection lost: " + status.error_message() + "; " + status.error_details()); response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); + connected = false; + cv.notify_one(); } return response; } private: - const std::string SOCKET_FILE = "unix:///tmp/yagpcc_agent.sock"; - const std::string TCP_ADDRESS = "127.0.0.1:1432"; + const std::string SOCKET_FILE; std::unique_ptr stub; + std::shared_ptr channel; + std::atomic_bool connected; + std::thread reconnect_thread; + std::condition_variable cv; + std::mutex mtx; + bool done; + + void reconnect() { + while (!done) { + { + std::unique_lock lock(mtx); + cv.wait(lock); + } + while (!connected && !done) { + auto deadline = + std::chrono::system_clock::now() + std::chrono::milliseconds(100); + connected = channel->WaitForConnected(deadline); + } + } + } }; GrpcConnector::GrpcConnector() { impl = new Impl(); } diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index be39c953970..edad5798e44 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -1,28 +1,42 @@ extern "C" { #include "postgres.h" -#include "utils/metrics_utils.h" -#include "utils/elog.h" #include "executor/executor.h" +#include "utils/elog.h" +#include "utils/metrics_utils.h" -#include "cdb/cdbvars.h" #include "cdb/cdbexplain.h" +#include "cdb/cdbvars.h" #include "tcop/utility.h" } -#include "stat_statements_parser/pg_stat_statements_ya_parser.h" -#include "hook_wrappers.h" +#include "Config.h" #include "EventSender.h" +#include "hook_wrappers.h" +#include "stat_statements_parser/pg_stat_statements_ya_parser.h" static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr; +static ExecutorRun_hook_type previous_ExecutorRun_hook = nullptr; +static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; +static ExecutorEnd_hook_type previous_ExecutorEnd_hook = nullptr; static query_info_collect_hook_type previous_query_info_collect_hook = nullptr; -static void ya_ExecutorAfterStart_hook(QueryDesc *query_desc, int eflags); +static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); +static void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, + long count); +static void ya_ExecutorFinish_hook(QueryDesc *query_desc); +static void ya_ExecutorEnd_hook(QueryDesc *query_desc); static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); void hooks_init() { previous_ExecutorStart_hook = ExecutorStart_hook; - ExecutorStart_hook = ya_ExecutorAfterStart_hook; + ExecutorStart_hook = ya_ExecutorStart_hook; + previous_ExecutorRun_hook = ExecutorRun_hook; + ExecutorRun_hook = ya_ExecutorRun_hook; + previous_ExecutorFinish_hook = ExecutorFinish_hook; + ExecutorFinish_hook = ya_ExecutorFinish_hook; + previous_ExecutorEnd_hook = ExecutorEnd_hook; + ExecutorEnd_hook = ya_ExecutorEnd_hook; previous_query_info_collect_hook = query_info_collect_hook; query_info_collect_hook = ya_query_info_collect_hook; stat_statements_parser_init(); @@ -30,11 +44,21 @@ void hooks_init() { void hooks_deinit() { ExecutorStart_hook = previous_ExecutorStart_hook; + ExecutorEnd_hook = previous_ExecutorEnd_hook; query_info_collect_hook = previous_query_info_collect_hook; stat_statements_parser_deinit(); } -void ya_ExecutorAfterStart_hook(QueryDesc *query_desc, int eflags) { +void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { + PG_TRY(); + { EventSender::instance()->executor_before_start(query_desc, eflags); } + PG_CATCH(); + { + ereport(WARNING, + (errmsg("EventSender failed in ya_ExecutorBeforeStart_hook"))); + PG_RE_THROW(); + } + PG_END_TRY(); if (previous_ExecutorStart_hook) { (*previous_ExecutorStart_hook)(query_desc, eflags); } else { @@ -51,6 +75,59 @@ void ya_ExecutorAfterStart_hook(QueryDesc *query_desc, int eflags) { PG_END_TRY(); } +void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, + long count) { + EventSender::instance()->incr_depth(); + PG_TRY(); + { + if (previous_ExecutorRun_hook) + previous_ExecutorRun_hook(query_desc, direction, count); + else + standard_ExecutorRun(query_desc, direction, count); + EventSender::instance()->decr_depth(); + } + PG_CATCH(); + { + EventSender::instance()->decr_depth(); + PG_RE_THROW(); + } + PG_END_TRY(); +} + +void ya_ExecutorFinish_hook(QueryDesc *query_desc) { + EventSender::instance()->incr_depth(); + PG_TRY(); + { + if (previous_ExecutorFinish_hook) + previous_ExecutorFinish_hook(query_desc); + else + standard_ExecutorFinish(query_desc); + EventSender::instance()->decr_depth(); + } + PG_CATCH(); + { + EventSender::instance()->decr_depth(); + PG_RE_THROW(); + } + PG_END_TRY(); +} + +void ya_ExecutorEnd_hook(QueryDesc *query_desc) { + PG_TRY(); + { EventSender::instance()->executor_end(query_desc); } + PG_CATCH(); + { + ereport(WARNING, (errmsg("EventSender failed in ya_ExecutorEnd_hook"))); + PG_RE_THROW(); + } + PG_END_TRY(); + if (previous_ExecutorEnd_hook) { + (*previous_ExecutorEnd_hook)(query_desc); + } else { + standard_ExecutorEnd(query_desc); + } +} + void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { PG_TRY(); { EventSender::instance()->query_metrics_collect(status, arg); } diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/src/stat_statements_parser/pg_stat_statements_ya_parser.c index 737e77745df..a37ac0ef0bf 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -617,6 +617,13 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) } break; /* GPDB nodes */ + case T_GroupingClause: + { + GroupingClause *grpnode = (GroupingClause *)node; + + JumbleExpr(jstate, (Node *)grpnode->groupsets); + } + break; case T_GroupingFunc: { GroupingFunc *grpnode = (GroupingFunc *)node; @@ -628,7 +635,27 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) case T_GroupId: case T_Integer: case T_Value: - // TODO: no idea what to do with those + // TODO:seems like nothing to do with it + break; + /* GPDB-only additions, nothing to do */ + case T_PartitionBy: + case T_PartitionElem: + case T_PartitionRangeItem: + case T_PartitionBoundSpec: + case T_PartitionSpec: + case T_PartitionValuesSpec: + case T_AlterPartitionId: + case T_AlterPartitionCmd: + case T_InheritPartitionCmd: + case T_CreateFileSpaceStmt: + case T_FileSpaceEntry: + case T_DropFileSpaceStmt: + case T_TableValueExpr: + case T_DenyLoginInterval: + case T_DenyLoginPoint: + case T_AlterTypeStmt: + case T_SetDistributionCmd: + case T_ExpandStmtSpec: break; default: /* Only a warning, since we can stumble along anyway */ From 5dd24e1ad2583dc44aaea237326cf8c79780963e Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 7 Jun 2023 14:58:57 +0300 Subject: [PATCH 08/49] [yagp_hooks_collector] Diff system stats per-query and improve error safety Capture /proc stats at query start and compute diff at end rather than reporting lifetime totals. Suppress error rethrows from the collector to avoid breaking other extensions. Add missing hooks deinitialization. Modernize ereport style. --- src/EventSender.cpp | 24 +++++++++---- src/ProcStats.cpp | 36 +++++++------------ src/hook_wrappers.cpp | 10 ++---- .../pg_stat_statements_ya_parser.c | 6 ++-- 4 files changed, 36 insertions(+), 40 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 55858ed5183..b1f85cf9f1e 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -2,6 +2,7 @@ #include "GrpcConnector.h" #include "ProcStats.h" #include +#include extern "C" { #include "postgres.h" @@ -168,6 +169,8 @@ void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, } } +decltype(std::chrono::high_resolution_clock::now()) query_start_time; + void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, bool need_spillinfo) { if (need_spillinfo) { @@ -182,6 +185,10 @@ void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); } fill_self_stats(metrics->mutable_systemstat()); + std::chrono::duration elapsed_seconds = + std::chrono::high_resolution_clock::now() - query_start_time; + metrics->mutable_systemstat()->set_runningtimeseconds( + elapsed_seconds.count()); } yagpcc::SetQueryReq create_query_req(QueryDesc *query_desc, @@ -228,14 +235,17 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { // TODO break; default: - elog(FATAL, "Unknown query status: %d", status); + ereport(FATAL, (errmsg("Unknown query status: %d", status))); } } void EventSender::executor_before_start(QueryDesc *query_desc, int /* eflags*/) { - if (Gp_role == GP_ROLE_DISPATCH && need_collect() && - Config::enable_analyze()) { + if (!need_collect()) { + return; + } + query_start_time = std::chrono::high_resolution_clock::now(); + if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { instr_time starttime; query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; @@ -311,9 +321,11 @@ void EventSender::send_query_info(yagpcc::SetQueryReq *req, const std::string &event) { auto result = connector->set_metric_query(*req); if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { - elog(WARNING, "Query {%d-%d-%d} %s reporting failed with an error %s", - req->query_key().tmid(), req->query_key().ssid(), - req->query_key().ccnt(), event.c_str(), result.error_text().c_str()); + ereport(WARNING, + (errmsg("Query {%d-%d-%d} %s reporting failed with an error %s", + req->query_key().tmid(), req->query_key().ssid(), + req->query_key().ccnt(), event.c_str(), + result.error_text().c_str()))); } } diff --git a/src/ProcStats.cpp b/src/ProcStats.cpp index 5c64f25ec09..668173a0f7e 100644 --- a/src/ProcStats.cpp +++ b/src/ProcStats.cpp @@ -13,7 +13,7 @@ namespace { #define FILL_IO_STAT(stat_name) \ uint64_t stat_name; \ proc_stat >> tmp >> stat_name; \ - stats->set_##stat_name(stat_name); + stats->set_##stat_name(stat_name - stats->stat_name()); void fill_io_stats(yagpcc::SystemStat *stats) { std::ifstream proc_stat("/proc/self/io"); @@ -30,36 +30,23 @@ void fill_io_stats(yagpcc::SystemStat *stats) { void fill_cpu_stats(yagpcc::SystemStat *stats) { static const int UTIME_ID = 13; static const int STIME_ID = 14; - static const int STARTTIME_ID = 21; static const int VSIZE_ID = 22; static const int RSS_ID = 23; static const double tps = sysconf(_SC_CLK_TCK); - double uptime; - { - std::ifstream proc_stat("/proc/uptime"); - proc_stat >> uptime; - } - std::ifstream proc_stat("/proc/self/stat"); std::string trash; - double start_time = 0; for (int i = 0; i <= RSS_ID; ++i) { switch (i) { case UTIME_ID: double utime; proc_stat >> utime; - stats->set_usertimeseconds(utime / tps); + stats->set_usertimeseconds(utime / tps - stats->usertimeseconds()); break; case STIME_ID: double stime; proc_stat >> stime; - stats->set_kerneltimeseconds(stime / tps); - break; - case STARTTIME_ID: - uint64_t starttime; - proc_stat >> starttime; - start_time = static_cast(starttime) / tps; + stats->set_kerneltimeseconds(stime / tps - stats->kerneltimeseconds()); break; case VSIZE_ID: uint64_t vsize; @@ -75,7 +62,6 @@ void fill_cpu_stats(yagpcc::SystemStat *stats) { default: proc_stat >> trash; } - stats->set_runningtimeseconds(uptime - start_time); } } @@ -89,16 +75,16 @@ void fill_status_stats(yagpcc::SystemStat *stats) { stats->set_vmpeakkb(value); proc_stat >> measure; if (measure != "kB") { - elog(FATAL, "Expected memory sizes in kB, but got in %s", - measure.c_str()); + ereport(FATAL, (errmsg("Expected memory sizes in kB, but got in %s", + measure.c_str()))); } } else if (key == "VmSize:") { uint64_t value; proc_stat >> value; stats->set_vmsizekb(value); if (measure != "kB") { - elog(FATAL, "Expected memory sizes in kB, but got in %s", - measure.c_str()); + ereport(FATAL, (errmsg("Expected memory sizes in kB, but got in %s", + measure.c_str()))); } } } @@ -106,7 +92,9 @@ void fill_status_stats(yagpcc::SystemStat *stats) { } // namespace void fill_self_stats(yagpcc::SystemStat *stats) { - fill_io_stats(stats); - fill_cpu_stats(stats); - fill_status_stats(stats); + static yagpcc::SystemStat prev_stats; + fill_io_stats(&prev_stats); + fill_cpu_stats(&prev_stats); + fill_status_stats(&prev_stats); + *stats = prev_stats; } \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index edad5798e44..a904dc9bafd 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -44,6 +44,8 @@ void hooks_init() { void hooks_deinit() { ExecutorStart_hook = previous_ExecutorStart_hook; + ExecutorRun_hook = previous_ExecutorRun_hook; + ExecutorFinish_hook = previous_ExecutorFinish_hook; ExecutorEnd_hook = previous_ExecutorEnd_hook; query_info_collect_hook = previous_query_info_collect_hook; stat_statements_parser_deinit(); @@ -56,7 +58,6 @@ void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { { ereport(WARNING, (errmsg("EventSender failed in ya_ExecutorBeforeStart_hook"))); - PG_RE_THROW(); } PG_END_TRY(); if (previous_ExecutorStart_hook) { @@ -70,7 +71,6 @@ void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { { ereport(WARNING, (errmsg("EventSender failed in ya_ExecutorAfterStart_hook"))); - PG_RE_THROW(); } PG_END_TRY(); } @@ -116,10 +116,7 @@ void ya_ExecutorEnd_hook(QueryDesc *query_desc) { PG_TRY(); { EventSender::instance()->executor_end(query_desc); } PG_CATCH(); - { - ereport(WARNING, (errmsg("EventSender failed in ya_ExecutorEnd_hook"))); - PG_RE_THROW(); - } + { ereport(WARNING, (errmsg("EventSender failed in ya_ExecutorEnd_hook"))); } PG_END_TRY(); if (previous_ExecutorEnd_hook) { (*previous_ExecutorEnd_hook)(query_desc); @@ -135,7 +132,6 @@ void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { { ereport(WARNING, (errmsg("EventSender failed in ya_query_info_collect_hook"))); - PG_RE_THROW(); } PG_END_TRY(); if (previous_query_info_collect_hook) { diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/src/stat_statements_parser/pg_stat_statements_ya_parser.c index a37ac0ef0bf..1c58d936093 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -213,7 +213,7 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable) JumbleExpr(jstate, (Node *)rte->functions); break; default: - elog(ERROR, "unrecognized RTE kind: %d", (int)rte->rtekind); + ereport(ERROR, (errmsg("unrecognized RTE kind: %d", (int)rte->rtekind))); break; } } @@ -659,8 +659,8 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) break; default: /* Only a warning, since we can stumble along anyway */ - elog(WARNING, "unrecognized node type: %d", - (int)nodeTag(node)); + ereport(WARNING, (errmsg("unrecognized node type: %d", + (int)nodeTag(node)))); break; } } From 244b1e3c8a584f568ef39102814d4b369448a011 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 13 Jun 2023 16:51:40 +0300 Subject: [PATCH 09/49] [yagp_hooks_collector] Fix EventSender and GrpcConnector in forked processes Delay initialization of static singletons and GRPC connections to actual query handling time rather than _PG_init, since both are incompatible with fork(). --- debian/control | 4 ++-- src/EventSender.cpp | 10 ++-------- src/EventSender.h | 6 ++---- src/GrpcConnector.cpp | 33 ++++++++++++++++++++++----------- src/hook_wrappers.cpp | 33 +++++++++++++++++++++++---------- 5 files changed, 51 insertions(+), 35 deletions(-) diff --git a/debian/control b/debian/control index c740a8590ca..07176e94be5 100644 --- a/debian/control +++ b/debian/control @@ -2,10 +2,10 @@ Source: greenplum-6-yagpcc-hooks Section: misc Priority: optional Maintainer: Maxim Smyatkin -Build-Depends: make, gcc, g++, debhelper (>=9), greenplum-db-6 (>=6.19.3), protobuf-compiler, protobuf-compiler-grpc, libgrpc++1, libgrpc++-dev +Build-Depends: make, gcc, g++, debhelper (>=9), greenplum-db-6 (>=6.19.3), ya-grpc (=1.46-57-50820-02384e3918-yandex) Standards-Version: 3.9.8 Package: greenplum-6-yagpcc-hooks Architecture: any -Depends: ${misc:Depends}, ${shlibs:Depends}, greenplum-db-6 (>=6.19.3) +Depends: ${misc:Depends}, ${shlibs:Depends}, greenplum-db-6 (>=6.19.3), ya-grpc (=1.46-57-50820-02384e3918-yandex) Description: Greenplum extension to send query execution metrics to yandex command center agent diff --git a/src/EventSender.cpp b/src/EventSender.cpp index b1f85cf9f1e..ec966e8686c 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -329,12 +329,6 @@ void EventSender::send_query_info(yagpcc::SetQueryReq *req, } } -EventSender *EventSender::instance() { - static EventSender sender; - return &sender; -} +EventSender::EventSender() { connector = std::make_unique(); } -EventSender::EventSender() { - Config::init(); - connector = std::make_unique(); -} \ No newline at end of file +EventSender::~EventSender() { connector.release(); } \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index 9e2ef992f81..92e6937a690 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -17,15 +17,13 @@ class EventSender { void query_metrics_collect(QueryMetricsStatus status, void *arg); void incr_depth() { nesting_level++; } void decr_depth() { nesting_level--; } - static EventSender *instance(); + EventSender(); + ~EventSender(); private: void collect_query_submit(QueryDesc *query_desc); void collect_query_done(QueryDesc *query_desc, const std::string &status); - - EventSender(); void send_query_info(yagpcc::SetQueryReq *req, const std::string &event); std::unique_ptr connector; - int nesting_level = 0; }; \ No newline at end of file diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 276c9ceb8a8..966bfb4a780 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -10,14 +10,17 @@ #include #include -extern "C" { +extern "C" +{ #include "postgres.h" #include "cdb/cdbvars.h" } -class GrpcConnector::Impl { +class GrpcConnector::Impl +{ public: - Impl() : SOCKET_FILE("unix://" + Config::uds_path()) { + Impl() : SOCKET_FILE("unix://" + Config::uds_path()) + { GOOGLE_PROTOBUF_VERIFY_VERSION; channel = grpc::CreateChannel(SOCKET_FILE, grpc::InsecureChannelCredentials()); @@ -27,15 +30,18 @@ class GrpcConnector::Impl { reconnect_thread = std::thread(&Impl::reconnect, this); } - ~Impl() { + ~Impl() + { done = true; cv.notify_one(); reconnect_thread.join(); } - yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) { + yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) + { yagpcc::MetricResponse response; - if (!connected) { + if (!connected) + { response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); response.set_error_text( "Not tracing this query connection to agent has been lost"); @@ -47,7 +53,8 @@ class GrpcConnector::Impl { std::chrono::system_clock::now() + std::chrono::milliseconds(timeout); context.set_deadline(deadline); grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); - if (!status.ok()) { + if (!status.ok()) + { response.set_error_text("Connection lost: " + status.error_message() + "; " + status.error_details()); response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); @@ -68,13 +75,16 @@ class GrpcConnector::Impl { std::mutex mtx; bool done; - void reconnect() { - while (!done) { + void reconnect() + { + while (!done) + { { std::unique_lock lock(mtx); cv.wait(lock); } - while (!connected && !done) { + while (!connected && !done) + { auto deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(100); connected = channel->WaitForConnected(deadline); @@ -88,6 +98,7 @@ GrpcConnector::GrpcConnector() { impl = new Impl(); } GrpcConnector::~GrpcConnector() { delete impl; } yagpcc::MetricResponse -GrpcConnector::set_metric_query(yagpcc::SetQueryReq req) { +GrpcConnector::set_metric_query(yagpcc::SetQueryReq req) +{ return impl->set_metric_query(req); } \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index a904dc9bafd..66ba6547ce2 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -28,7 +28,17 @@ static void ya_ExecutorFinish_hook(QueryDesc *query_desc); static void ya_ExecutorEnd_hook(QueryDesc *query_desc); static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); +static EventSender *sender = nullptr; + +static inline EventSender *get_sender() { + if (!sender) { + sender = new EventSender(); + } + return sender; +} + void hooks_init() { + Config::init(); previous_ExecutorStart_hook = ExecutorStart_hook; ExecutorStart_hook = ya_ExecutorStart_hook; previous_ExecutorRun_hook = ExecutorRun_hook; @@ -49,11 +59,14 @@ void hooks_deinit() { ExecutorEnd_hook = previous_ExecutorEnd_hook; query_info_collect_hook = previous_query_info_collect_hook; stat_statements_parser_deinit(); + if (sender) { + delete sender; + } } void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { PG_TRY(); - { EventSender::instance()->executor_before_start(query_desc, eflags); } + { get_sender()->executor_before_start(query_desc, eflags); } PG_CATCH(); { ereport(WARNING, @@ -66,7 +79,7 @@ void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { standard_ExecutorStart(query_desc, eflags); } PG_TRY(); - { EventSender::instance()->executor_after_start(query_desc, eflags); } + { get_sender()->executor_after_start(query_desc, eflags); } PG_CATCH(); { ereport(WARNING, @@ -77,36 +90,36 @@ void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, long count) { - EventSender::instance()->incr_depth(); + get_sender()->incr_depth(); PG_TRY(); { if (previous_ExecutorRun_hook) previous_ExecutorRun_hook(query_desc, direction, count); else standard_ExecutorRun(query_desc, direction, count); - EventSender::instance()->decr_depth(); + get_sender()->decr_depth(); } PG_CATCH(); { - EventSender::instance()->decr_depth(); + get_sender()->decr_depth(); PG_RE_THROW(); } PG_END_TRY(); } void ya_ExecutorFinish_hook(QueryDesc *query_desc) { - EventSender::instance()->incr_depth(); + get_sender()->incr_depth(); PG_TRY(); { if (previous_ExecutorFinish_hook) previous_ExecutorFinish_hook(query_desc); else standard_ExecutorFinish(query_desc); - EventSender::instance()->decr_depth(); + get_sender()->decr_depth(); } PG_CATCH(); { - EventSender::instance()->decr_depth(); + get_sender()->decr_depth(); PG_RE_THROW(); } PG_END_TRY(); @@ -114,7 +127,7 @@ void ya_ExecutorFinish_hook(QueryDesc *query_desc) { void ya_ExecutorEnd_hook(QueryDesc *query_desc) { PG_TRY(); - { EventSender::instance()->executor_end(query_desc); } + { get_sender()->executor_end(query_desc); } PG_CATCH(); { ereport(WARNING, (errmsg("EventSender failed in ya_ExecutorEnd_hook"))); } PG_END_TRY(); @@ -127,7 +140,7 @@ void ya_ExecutorEnd_hook(QueryDesc *query_desc) { void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { PG_TRY(); - { EventSender::instance()->query_metrics_collect(status, arg); } + { get_sender()->query_metrics_collect(status, arg); } PG_CATCH(); { ereport(WARNING, From 5011ddf2f0ed4bad1322983e89be77e69788e4be Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 16 Aug 2023 13:23:00 +0300 Subject: [PATCH 10/49] [yagp_hooks_collector] Fix memory leak in EXPLAIN ANALYZE code path --- protos/yagpcc_metrics.proto | 4 ++-- src/EventSender.cpp | 25 ++++++++----------------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index 26e0a496460..bc128a22f17 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -29,8 +29,8 @@ message QueryInfo { uint64 plan_id = 3; string query_text = 4; string plan_text = 5; - string temlate_query_text = 6; - string temlate_plan_text = 7; + string template_query_text = 6; + string template_plan_text = 7; string userName = 8; string databaseName = 9; string rsgname = 10; diff --git a/src/EventSender.cpp b/src/EventSender.cpp index ec966e8686c..6d2ff4afd47 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -61,13 +61,6 @@ std::string *get_rg_name() { return result; } -int get_cur_slice_id(QueryDesc *desc) { - if (!desc->estate) { - return 0; - } - return LocallyExecutingSliceIndex(desc->estate); -} - google::protobuf::Timestamp current_ts() { google::protobuf::Timestamp current_ts; struct timeval tv; @@ -113,7 +106,7 @@ void set_query_plan(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); set_plan_text(qi->mutable_plan_text(), query_desc); StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); - *qi->mutable_temlate_plan_text() = std::string(norm_plan->data); + *qi->mutable_template_plan_text() = std::string(norm_plan->data); qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); // TODO: free stringinfo? } @@ -121,7 +114,7 @@ void set_query_plan(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { *qi->mutable_query_text() = query_desc->sourceText; char *norm_query = gen_normquery(query_desc->sourceText); - *qi->mutable_temlate_query_text() = std::string(norm_query); + *qi->mutable_template_query_text() = std::string(norm_query); pfree(norm_query); } @@ -246,20 +239,18 @@ void EventSender::executor_before_start(QueryDesc *query_desc, } query_start_time = std::chrono::high_resolution_clock::now(); if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { - instr_time starttime; query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; query_desc->instrument_options |= INSTRUMENT_TIMER; if (Config::enable_cdbstats()) { query_desc->instrument_options |= INSTRUMENT_CDB; - // TODO: there is a PR resolving some memory leak around auto-explain: - // https://github.com/greenplum-db/gpdb/pull/15164 - // Need to check if the memory leak applies here as well and fix it - Assert(query_desc->showstatctx == NULL); - INSTR_TIME_SET_CURRENT(starttime); - query_desc->showstatctx = - cdbexplain_showExecStatsBegin(query_desc, starttime); + if (!query_desc->showstatctx) { + instr_time starttime; + INSTR_TIME_SET_CURRENT(starttime); + query_desc->showstatctx = + cdbexplain_showExecStatsBegin(query_desc, starttime); + } } } } From f54c58398d381b28b5717af6bebb2683054d5cfd Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 6 Sep 2023 16:10:04 +0300 Subject: [PATCH 11/49] [yagp_hooks_collector] Add motion network and workfile spill stats --- protos/yagpcc_metrics.proto | 8 ++++++++ src/EventSender.cpp | 41 ++++++++++++++++++++++++++++--------- src/EventSender.h | 2 +- 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index bc128a22f17..2d20d3c46d9 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -84,6 +84,12 @@ message SystemStat { uint64 cancelled_write_bytes = 14; } +message NetworkStat { + uint32 total_bytes = 1; + uint32 tuple_bytes = 2; + uint32 chunks = 3; +} + message MetricInstrumentation { uint64 ntuples = 1; /* Total tuples produced */ uint64 nloops = 2; /* # of run cycles for this node */ @@ -103,6 +109,8 @@ message MetricInstrumentation { uint64 temp_blks_written = 16; double blk_read_time = 17; /* measured read/write time */ double blk_write_time = 18; + NetworkStat sent = 19; + NetworkStat received = 20; } message SpillInfo { diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 6d2ff4afd47..2810e581313 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -4,6 +4,8 @@ #include #include +#define typeid __typeid +#define operator __operator extern "C" { #include "postgres.h" @@ -14,10 +16,12 @@ extern "C" { #include "executor/executor.h" #include "utils/elog.h" #include "utils/metrics_utils.h" +#include "utils/workfile_mgr.h" #include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" +#include "cdb/cdbinterconnect.h" #include "stat_statements_parser/pg_stat_statements_ya_parser.h" #include "tcop/utility.h" @@ -25,6 +29,8 @@ extern "C" { void get_spill_info(int ssid, int ccid, int32_t *file_count, int64_t *total_bytes); } +#undef typeid +#undef operator #include "EventSender.h" @@ -160,6 +166,18 @@ void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, metrics->set_blk_write_time( INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); } + if (query_desc->estate && query_desc->estate->motionlayer_context) { + MotionLayerState *mlstate = + (MotionLayerState *)query_desc->estate->motionlayer_context; + metrics->mutable_sent()->set_total_bytes(mlstate->stat_total_bytes_sent); + metrics->mutable_sent()->set_tuple_bytes(mlstate->stat_tuple_bytes_sent); + metrics->mutable_sent()->set_chunks(mlstate->stat_total_chunks_sent); + metrics->mutable_received()->set_total_bytes( + mlstate->stat_total_bytes_recvd); + metrics->mutable_received()->set_tuple_bytes( + mlstate->stat_tuple_bytes_recvd); + metrics->mutable_received()->set_chunks(mlstate->stat_total_chunks_recvd); + } } decltype(std::chrono::high_resolution_clock::now()) query_start_time; @@ -182,6 +200,8 @@ void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, std::chrono::high_resolution_clock::now() - query_start_time; metrics->mutable_systemstat()->set_runningtimeseconds( elapsed_seconds.count()); + metrics->mutable_spill()->set_filecount(WorkfileTotalFilesCreated()); + metrics->mutable_spill()->set_totalbytes(WorkfileTotalBytesWritten()); } yagpcc::SetQueryReq create_query_req(QueryDesc *query_desc, @@ -238,6 +258,7 @@ void EventSender::executor_before_start(QueryDesc *query_desc, return; } query_start_time = std::chrono::high_resolution_clock::now(); + WorkfileResetBackendStats(); if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; @@ -245,12 +266,10 @@ void EventSender::executor_before_start(QueryDesc *query_desc, if (Config::enable_cdbstats()) { query_desc->instrument_options |= INSTRUMENT_CDB; - if (!query_desc->showstatctx) { - instr_time starttime; - INSTR_TIME_SET_CURRENT(starttime); - query_desc->showstatctx = - cdbexplain_showExecStatsBegin(query_desc, starttime); - } + instr_time starttime; + INSTR_TIME_SET_CURRENT(starttime); + query_desc->showstatctx = + cdbexplain_showExecStatsBegin(query_desc, starttime); } } } @@ -281,7 +300,6 @@ void EventSender::executor_end(QueryDesc *query_desc) { } auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_END); - set_query_info(&req, query_desc, false, false); // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to // gather it here. It only makes sense when doing regular stat checks. set_gp_metrics(req.mutable_query_metrics(), query_desc, @@ -303,7 +321,6 @@ void EventSender::collect_query_done(QueryDesc *query_desc, if (need_collect()) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); - set_query_info(&req, query_desc, false, false); send_query_info(&req, status); } } @@ -320,6 +337,10 @@ void EventSender::send_query_info(yagpcc::SetQueryReq *req, } } -EventSender::EventSender() { connector = std::make_unique(); } +EventSender::EventSender() { + if (Config::enable_collector()) { + connector = new GrpcConnector(); + } +} -EventSender::~EventSender() { connector.release(); } \ No newline at end of file +EventSender::~EventSender() { delete connector; } \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index 92e6937a690..f53648bed36 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -24,6 +24,6 @@ class EventSender { void collect_query_submit(QueryDesc *query_desc); void collect_query_done(QueryDesc *query_desc, const std::string &status); void send_query_info(yagpcc::SetQueryReq *req, const std::string &event); - std::unique_ptr connector; + GrpcConnector *connector; int nesting_level = 0; }; \ No newline at end of file From 2a40208abeea89e7180c4321e96d22a08e176930 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 6 Sep 2023 16:11:06 +0300 Subject: [PATCH 12/49] [yagp_hooks_collector] Clean up threading, signal handling, and logging Mute PG-destined signals in GRPC reconnection thread. Move debian config to CI. Redirect debug output to log file. Harden memory handling. Remove thread-unsafe logging and dead code. --- debian/compat | 1 - debian/control | 11 ------ debian/postinst | 8 ---- debian/rules | 10 ----- src/EventSender.cpp | 85 +++++++++++++++++++----------------------- src/EventSender.h | 1 - src/GrpcConnector.cpp | 85 ++++++++++++++++++++++++++++-------------- src/GrpcConnector.h | 3 +- src/SpillInfoWrapper.c | 21 ----------- 9 files changed, 97 insertions(+), 128 deletions(-) delete mode 100644 debian/compat delete mode 100644 debian/control delete mode 100644 debian/postinst delete mode 100644 debian/rules delete mode 100644 src/SpillInfoWrapper.c diff --git a/debian/compat b/debian/compat deleted file mode 100644 index ec635144f60..00000000000 --- a/debian/compat +++ /dev/null @@ -1 +0,0 @@ -9 diff --git a/debian/control b/debian/control deleted file mode 100644 index 07176e94be5..00000000000 --- a/debian/control +++ /dev/null @@ -1,11 +0,0 @@ -Source: greenplum-6-yagpcc-hooks -Section: misc -Priority: optional -Maintainer: Maxim Smyatkin -Build-Depends: make, gcc, g++, debhelper (>=9), greenplum-db-6 (>=6.19.3), ya-grpc (=1.46-57-50820-02384e3918-yandex) -Standards-Version: 3.9.8 - -Package: greenplum-6-yagpcc-hooks -Architecture: any -Depends: ${misc:Depends}, ${shlibs:Depends}, greenplum-db-6 (>=6.19.3), ya-grpc (=1.46-57-50820-02384e3918-yandex) -Description: Greenplum extension to send query execution metrics to yandex command center agent diff --git a/debian/postinst b/debian/postinst deleted file mode 100644 index 27ddfc06a7d..00000000000 --- a/debian/postinst +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -set -e - -GPADMIN=gpadmin -GPHOME=/opt/greenplum-db-6 - -chown -R ${GPADMIN}:${GPADMIN} ${GPHOME} diff --git a/debian/rules b/debian/rules deleted file mode 100644 index 6c2c7491067..00000000000 --- a/debian/rules +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/make -f -# You must remove unused comment lines for the released package. -export DH_VERBOSE = 1 - - -export GPHOME := /opt/greenplum-db-6 -export PATH := $(GPHOME)/bin:$(PATH) - -%: - dh $@ diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 2810e581313..57fe6f13391 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,8 +1,8 @@ #include "Config.h" #include "GrpcConnector.h" #include "ProcStats.h" -#include #include +#include #define typeid __typeid #define operator __operator @@ -20,14 +20,11 @@ extern "C" { #include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" -#include "cdb/cdbvars.h" #include "cdb/cdbinterconnect.h" +#include "cdb/cdbvars.h" #include "stat_statements_parser/pg_stat_statements_ya_parser.h" #include "tcop/utility.h" - -void get_spill_info(int ssid, int ccid, int32_t *file_count, - int64_t *total_bytes); } #undef typeid #undef operator @@ -48,7 +45,6 @@ std::string *get_user_name() { std::string *get_db_name() { char *dbname = get_database_name(MyDatabaseId); std::string *result = dbname ? new std::string(dbname) : nullptr; - pfree(dbname); return result; } @@ -63,7 +59,6 @@ std::string *get_rg_name() { if (rgname == nullptr) return nullptr; auto result = new std::string(rgname); - pfree(rgname); return result; } @@ -114,14 +109,12 @@ void set_query_plan(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); *qi->mutable_template_plan_text() = std::string(norm_plan->data); qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); - // TODO: free stringinfo? } void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { *qi->mutable_query_text() = query_desc->sourceText; char *norm_query = gen_normquery(query_desc->sourceText); *qi->mutable_template_query_text() = std::string(norm_query); - pfree(norm_query); } void set_query_info(yagpcc::SetQueryReq *req, QueryDesc *query_desc, @@ -182,16 +175,7 @@ void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, decltype(std::chrono::high_resolution_clock::now()) query_start_time; -void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, - bool need_spillinfo) { - if (need_spillinfo) { - int32_t n_spill_files = 0; - int64_t n_spill_bytes = 0; - get_spill_info(gp_session_id, gp_command_count, &n_spill_files, - &n_spill_bytes); - metrics->mutable_spill()->set_filecount(n_spill_files); - metrics->mutable_spill()->set_totalbytes(n_spill_bytes); - } +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc) { if (query_desc->planstate && query_desc->planstate->instrument) { set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); } @@ -254,6 +238,9 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { void EventSender::executor_before_start(QueryDesc *query_desc, int /* eflags*/) { + if (!connector) { + return; + } if (!need_collect()) { return; } @@ -275,71 +262,75 @@ void EventSender::executor_before_start(QueryDesc *query_desc, } void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { + if (!connector) { + return; + } if ((Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) && need_collect()) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); set_query_info(&req, query_desc, false, true); - send_query_info(&req, "started"); + connector->set_metric_query(req, "started"); } } void EventSender::executor_end(QueryDesc *query_desc) { + if (!connector) { + return; + } if (!need_collect() || (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE)) { return; } - if (query_desc->totaltime && Config::enable_analyze() && - Config::enable_cdbstats()) { - if (query_desc->estate->dispatcherState && - query_desc->estate->dispatcherState->primaryResults) { - cdbdisp_checkDispatchResult(query_desc->estate->dispatcherState, - DISPATCH_WAIT_NONE); - } - InstrEndLoop(query_desc->totaltime); - } + /* TODO: when querying via CURSOR this call freezes. Need to investigate. + To reproduce - uncomment it and run installchecks. It will freeze around join test. + Needs investigation + + if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze() && + Config::enable_cdbstats() && query_desc->estate->dispatcherState && + query_desc->estate->dispatcherState->primaryResults) { + cdbdisp_checkDispatchResult(query_desc->estate->dispatcherState, + DISPATCH_WAIT_NONE); + }*/ auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_END); // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to // gather it here. It only makes sense when doing regular stat checks. - set_gp_metrics(req.mutable_query_metrics(), query_desc, - /*need_spillinfo*/ false); - send_query_info(&req, "ended"); + set_gp_metrics(req.mutable_query_metrics(), query_desc); + connector->set_metric_query(req, "ended"); } void EventSender::collect_query_submit(QueryDesc *query_desc) { + if (!connector) { + return; + } if (need_collect()) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); set_query_info(&req, query_desc, true, false); - send_query_info(&req, "submit"); + connector->set_metric_query(req, "submit"); } } void EventSender::collect_query_done(QueryDesc *query_desc, const std::string &status) { + if (!connector) { + return; + } if (need_collect()) { auto req = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); - send_query_info(&req, status); - } -} - -void EventSender::send_query_info(yagpcc::SetQueryReq *req, - const std::string &event) { - auto result = connector->set_metric_query(*req); - if (result.error_code() == yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR) { - ereport(WARNING, - (errmsg("Query {%d-%d-%d} %s reporting failed with an error %s", - req->query_key().tmid(), req->query_key().ssid(), - req->query_key().ccnt(), event.c_str(), - result.error_text().c_str()))); + connector->set_metric_query(req, status); } } EventSender::EventSender() { if (Config::enable_collector()) { - connector = new GrpcConnector(); + try { + connector = new GrpcConnector(); + } catch (const std::exception &e) { + ereport(INFO, (errmsg("Unable to start query tracing %s", e.what()))); + } } } diff --git a/src/EventSender.h b/src/EventSender.h index f53648bed36..ee0db2f0938 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -23,7 +23,6 @@ class EventSender { private: void collect_query_submit(QueryDesc *query_desc); void collect_query_done(QueryDesc *query_desc, const std::string &status); - void send_query_info(yagpcc::SetQueryReq *req, const std::string &event); GrpcConnector *connector; int nesting_level = 0; }; \ No newline at end of file diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp index 966bfb4a780..73c1944fa04 100644 --- a/src/GrpcConnector.cpp +++ b/src/GrpcConnector.cpp @@ -7,45 +7,72 @@ #include #include #include +#include +#include #include #include -extern "C" -{ +extern "C" { #include "postgres.h" #include "cdb/cdbvars.h" } -class GrpcConnector::Impl -{ +/* + * Set up the thread signal mask, we don't want to run our signal handlers + * in downloading and uploading threads. + */ +static void MaskThreadSignals() { + sigset_t sigs; + + if (pthread_equal(main_tid, pthread_self())) { + ereport(ERROR, (errmsg("thread_mask is called from main thread!"))); + return; + } + + sigemptyset(&sigs); + + /* make our thread to ignore these signals (which should allow that they be + * delivered to the main thread) */ + sigaddset(&sigs, SIGHUP); + sigaddset(&sigs, SIGINT); + sigaddset(&sigs, SIGTERM); + sigaddset(&sigs, SIGALRM); + sigaddset(&sigs, SIGUSR1); + sigaddset(&sigs, SIGUSR2); + + pthread_sigmask(SIG_BLOCK, &sigs, NULL); +} + +class GrpcConnector::Impl { public: - Impl() : SOCKET_FILE("unix://" + Config::uds_path()) - { + Impl() : SOCKET_FILE("unix://" + Config::uds_path()) { GOOGLE_PROTOBUF_VERIFY_VERSION; channel = grpc::CreateChannel(SOCKET_FILE, grpc::InsecureChannelCredentials()); stub = yagpcc::SetQueryInfo::NewStub(channel); connected = true; + reconnected = false; done = false; reconnect_thread = std::thread(&Impl::reconnect, this); } - ~Impl() - { + ~Impl() { done = true; cv.notify_one(); reconnect_thread.join(); } - yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req) - { + yagpcc::MetricResponse set_metric_query(const yagpcc::SetQueryReq &req, + const std::string &event) { yagpcc::MetricResponse response; - if (!connected) - { + if (!connected) { response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); response.set_error_text( - "Not tracing this query connection to agent has been lost"); + "Not tracing this query because grpc connection has been lost"); return response; + } else if (reconnected) { + reconnected = false; + ereport(LOG, (errmsg("GRPC connection is restored"))); } grpc::ClientContext context; int timeout = Gp_role == GP_ROLE_DISPATCH ? 500 : 250; @@ -53,12 +80,16 @@ class GrpcConnector::Impl std::chrono::system_clock::now() + std::chrono::milliseconds(timeout); context.set_deadline(deadline); grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); - if (!status.ok()) - { - response.set_error_text("Connection lost: " + status.error_message() + - "; " + status.error_details()); + if (!status.ok()) { + response.set_error_text("GRPC error: " + status.error_message() + "; " + + status.error_details()); response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); + ereport(LOG, (errmsg("Query {%d-%d-%d} %s tracing failed with error %s", + req.query_key().tmid(), req.query_key().ssid(), + req.query_key().ccnt(), event.c_str(), + response.error_text().c_str()))); connected = false; + reconnected = false; cv.notify_one(); } @@ -69,25 +100,23 @@ class GrpcConnector::Impl const std::string SOCKET_FILE; std::unique_ptr stub; std::shared_ptr channel; - std::atomic_bool connected; + std::atomic_bool connected, reconnected, done; std::thread reconnect_thread; std::condition_variable cv; std::mutex mtx; - bool done; - void reconnect() - { - while (!done) - { + void reconnect() { + MaskThreadSignals(); + while (!done) { { std::unique_lock lock(mtx); cv.wait(lock); } - while (!connected && !done) - { + while (!connected && !done) { auto deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(100); connected = channel->WaitForConnected(deadline); + reconnected = connected.load(); } } } @@ -98,7 +127,7 @@ GrpcConnector::GrpcConnector() { impl = new Impl(); } GrpcConnector::~GrpcConnector() { delete impl; } yagpcc::MetricResponse -GrpcConnector::set_metric_query(yagpcc::SetQueryReq req) -{ - return impl->set_metric_query(req); +GrpcConnector::set_metric_query(const yagpcc::SetQueryReq &req, + const std::string &event) { + return impl->set_metric_query(req, event); } \ No newline at end of file diff --git a/src/GrpcConnector.h b/src/GrpcConnector.h index 4fca6960a4e..6571c626dfd 100644 --- a/src/GrpcConnector.h +++ b/src/GrpcConnector.h @@ -6,7 +6,8 @@ class GrpcConnector { public: GrpcConnector(); ~GrpcConnector(); - yagpcc::MetricResponse set_metric_query(yagpcc::SetQueryReq req); + yagpcc::MetricResponse set_metric_query(const yagpcc::SetQueryReq &req, + const std::string &event); private: class Impl; diff --git a/src/SpillInfoWrapper.c b/src/SpillInfoWrapper.c deleted file mode 100644 index c6ace0a693f..00000000000 --- a/src/SpillInfoWrapper.c +++ /dev/null @@ -1,21 +0,0 @@ -#include "postgres.h" -#include "utils/workfile_mgr.h" - -void get_spill_info(int ssid, int ccid, int32_t* file_count, int64_t* total_bytes); - -void get_spill_info(int ssid, int ccid, int32_t* file_count, int64_t* total_bytes) -{ - int count = 0; - int i = 0; - workfile_set *workfiles = workfile_mgr_cache_entries_get_copy(&count); - workfile_set *wf_iter = workfiles; - for (i = 0; i < count; ++i, ++wf_iter) - { - if (wf_iter->active && wf_iter->session_id == ssid && wf_iter->command_count == ccid) - { - *file_count += wf_iter->num_files; - *total_bytes += wf_iter->total_bytes; - } - } - pfree(workfiles); -} \ No newline at end of file From 036e15ea85971987c7dd448efc14147b97696ff4 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 21 Sep 2023 15:16:35 +0300 Subject: [PATCH 13/49] [yagp_hooks_collector] Add ignored_users_list GUC Add a comma-separated GUC to suppress metrics collection for specified roles. Parse using SplitIdentifierString and cache in an unordered_set. --- src/Config.cpp | 43 +++++++++++++++++++++++++++++++++++++++++++ src/Config.h | 1 + src/EventSender.cpp | 5 +++-- src/EventSender.h | 2 +- 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/Config.cpp b/src/Config.cpp index d97e5d45984..c5c2c15f7e9 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -1,4 +1,7 @@ #include "Config.h" +#include +#include +#include extern "C" { #include "postgres.h" @@ -10,6 +13,8 @@ static char *guc_uds_path = nullptr; static bool guc_enable_analyze = true; static bool guc_enable_cdbstats = true; static bool guc_enable_collector = true; +static char *guc_ignored_users = nullptr; +static std::unique_ptr> ignored_users = nullptr; void Config::init() { DefineCustomStringVariable( @@ -30,9 +35,47 @@ void Config::init() { "yagpcc.enable_cdbstats", "Collect CDB metrics in yagpcc", 0LL, &guc_enable_cdbstats, true, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomStringVariable( + "yagpcc.ignored_users_list", + "Make yagpcc ignore queries issued by given users", 0LL, + &guc_ignored_users, "gpadmin,repl,gpperfmon,monitor", PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); } std::string Config::uds_path() { return guc_uds_path; } bool Config::enable_analyze() { return guc_enable_analyze; } bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } + +bool Config::filter_user(const std::string *username) { + if (!ignored_users) { + ignored_users.reset(new std::unordered_set()); + if (guc_ignored_users == nullptr || guc_ignored_users[0] == '0') { + return false; + } + /* Need a modifiable copy of string */ + char *rawstring = pstrdup(guc_ignored_users); + List *elemlist; + ListCell *l; + + /* Parse string into list of identifiers */ + if (!SplitIdentifierString(rawstring, ',', &elemlist)) { + /* syntax error in list */ + pfree(rawstring); + list_free(elemlist); + ereport( + LOG, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg( + "invalid list syntax in parameter yagpcc.ignored_users_list"))); + return false; + } + foreach (l, elemlist) { + ignored_users->insert((char *)lfirst(l)); + } + pfree(rawstring); + list_free(elemlist); + } + return !username || ignored_users->find(*username) != ignored_users->end(); +} diff --git a/src/Config.h b/src/Config.h index 117481f219b..999d0300640 100644 --- a/src/Config.h +++ b/src/Config.h @@ -9,4 +9,5 @@ class Config { static bool enable_analyze(); static bool enable_cdbstats(); static bool enable_collector(); + static bool filter_user(const std::string *username); }; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 57fe6f13391..9146078fd0e 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -33,7 +33,8 @@ extern "C" { #define need_collect() \ (nesting_level == 0 && gp_command_count != 0 && \ - query_desc->sourceText != nullptr && Config::enable_collector()) + query_desc->sourceText != nullptr && Config::enable_collector() && \ + !Config::filter_user(get_user_name())) namespace { @@ -325,7 +326,7 @@ void EventSender::collect_query_done(QueryDesc *query_desc, } EventSender::EventSender() { - if (Config::enable_collector()) { + if (Config::enable_collector() && !Config::filter_user(get_user_name())) { try { connector = new GrpcConnector(); } catch (const std::exception &e) { diff --git a/src/EventSender.h b/src/EventSender.h index ee0db2f0938..2af8b7ffa03 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -23,6 +23,6 @@ class EventSender { private: void collect_query_submit(QueryDesc *query_desc); void collect_query_done(QueryDesc *query_desc, const std::string &status); - GrpcConnector *connector; + GrpcConnector *connector = nullptr; int nesting_level = 0; }; \ No newline at end of file From 004fffdd6089a2bf3407e3753f71417eaeefbac0 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 2 Oct 2023 12:54:32 +0300 Subject: [PATCH 14/49] [yagp_hooks_collector] Replace GRPC transport with protobuf-over-UDS Remove GRPC dependency. Serialize metrics as protobuf messages and deliver them over a Unix domain socket. Replace server-side message queue with incremental per-query message building. Add clang-format configuration. Use deprecated protobuf API for bionic compatibility. --- .clang-format | 2 + protos/yagpcc_set_service.proto | 23 ++---- src/EventSender.cpp | 115 ++++++++++++++++----------- src/EventSender.h | 10 ++- src/GrpcConnector.cpp | 133 -------------------------------- src/GrpcConnector.h | 15 ---- src/UDSConnector.cpp | 83 ++++++++++++++++++++ src/UDSConnector.h | 13 ++++ 8 files changed, 183 insertions(+), 211 deletions(-) create mode 100644 .clang-format delete mode 100644 src/GrpcConnector.cpp delete mode 100644 src/GrpcConnector.h create mode 100644 src/UDSConnector.cpp create mode 100644 src/UDSConnector.h diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000000..99130575c9a --- /dev/null +++ b/.clang-format @@ -0,0 +1,2 @@ +BasedOnStyle: LLVM +SortIncludes: false diff --git a/protos/yagpcc_set_service.proto b/protos/yagpcc_set_service.proto index 93c2f5a01d1..e8fc7aaa99d 100644 --- a/protos/yagpcc_set_service.proto +++ b/protos/yagpcc_set_service.proto @@ -9,23 +9,6 @@ package yagpcc; option java_outer_classname = "SegmentYAGPCCAS"; option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/agent_segment;greenplum"; -service SetQueryInfo { - rpc SetMetricPlanNode (SetPlanNodeReq) returns (MetricResponse) {} - - rpc SetMetricQuery (SetQueryReq) returns (MetricResponse) {} -} - -message MetricResponse { - MetricResponseStatusCode error_code = 1; - string error_text = 2; -} - -enum MetricResponseStatusCode { - METRIC_RESPONSE_STATUS_CODE_UNSPECIFIED = 0; - METRIC_RESPONSE_STATUS_CODE_SUCCESS = 1; - METRIC_RESPONSE_STATUS_CODE_ERROR = 2; -} - message SetQueryReq { QueryStatus query_status = 1; google.protobuf.Timestamp datetime = 2; @@ -34,6 +17,9 @@ message SetQueryReq { QueryInfo query_info = 5; GPMetrics query_metrics = 6; repeated MetricPlan plan_tree = 7; + google.protobuf.Timestamp submit_time = 8; + google.protobuf.Timestamp start_time = 9; + google.protobuf.Timestamp end_time = 10; } message SetPlanNodeReq { @@ -43,4 +29,7 @@ message SetPlanNodeReq { SegmentKey segment_key = 4; GPMetrics node_metrics = 5; MetricPlan plan_node = 6; + google.protobuf.Timestamp submit_time = 7; + google.protobuf.Timestamp start_time = 8; + google.protobuf.Timestamp end_time = 9; } diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 9146078fd0e..834553a6187 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,6 +1,6 @@ #include "Config.h" -#include "GrpcConnector.h" #include "ProcStats.h" +#include "UDSConnector.h" #include #include @@ -15,7 +15,6 @@ extern "C" { #include "commands/resgroupcmds.h" #include "executor/executor.h" #include "utils/elog.h" -#include "utils/metrics_utils.h" #include "utils/workfile_mgr.h" #include "cdb/cdbdisp.h" @@ -102,33 +101,46 @@ void set_plan_text(std::string *plan_text, QueryDesc *query_desc) { *plan_text = std::string(es.str->data, es.str->len); } -void set_query_plan(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { - qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER - ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER - : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); - set_plan_text(qi->mutable_plan_text(), query_desc); - StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); - *qi->mutable_template_plan_text() = std::string(norm_plan->data); - qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); +void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { + if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { + auto qi = req->mutable_query_info(); + qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER + ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER + : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); + set_plan_text(qi->mutable_plan_text(), query_desc); + StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); + *qi->mutable_template_plan_text() = std::string(norm_plan->data); + qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); + // TODO: For now assume queryid equal to planid, which is wrong. The + // reason for doing so this bug + // https://github.com/greenplum-db/gpdb/pull/15385 (ORCA loses + // pg_stat_statements` queryid during planning phase). Need to fix it + // upstream, cherry-pick and bump gp + // qi->set_query_id(query_desc->plannedstmt->queryId); + qi->set_query_id(qi->plan_id()); + } } -void set_query_text(yagpcc::QueryInfo *qi, QueryDesc *query_desc) { - *qi->mutable_query_text() = query_desc->sourceText; - char *norm_query = gen_normquery(query_desc->sourceText); - *qi->mutable_template_query_text() = std::string(norm_query); +void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { + if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->sourceText) { + auto qi = req->mutable_query_info(); + *qi->mutable_query_text() = query_desc->sourceText; + char *norm_query = gen_normquery(query_desc->sourceText); + *qi->mutable_template_query_text() = std::string(norm_query); + } } -void set_query_info(yagpcc::SetQueryReq *req, QueryDesc *query_desc, - bool with_text, bool with_plan) { +void clear_big_fields(yagpcc::SetQueryReq *req) { + if (Gp_session_role == GP_ROLE_DISPATCH) { + auto qi = req->mutable_query_info(); + qi->clear_plan_text(); + qi->clear_query_text(); + } +} + +void set_query_info(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { if (Gp_session_role == GP_ROLE_DISPATCH) { auto qi = req->mutable_query_info(); - if (query_desc->sourceText && with_text) { - set_query_text(qi, query_desc); - } - if (query_desc->plannedstmt && with_plan) { - set_query_plan(qi, query_desc); - qi->set_query_id(query_desc->plannedstmt->queryId); - } qi->set_allocated_username(get_user_name()); qi->set_allocated_databasename(get_db_name()); qi->set_allocated_rsgname(get_rg_name()); @@ -245,6 +257,10 @@ void EventSender::executor_before_start(QueryDesc *query_desc, if (!need_collect()) { return; } + if (query_msg->has_query_key()) { + connector->report_query(*query_msg, "previous query"); + query_msg->Clear(); + } query_start_time = std::chrono::high_resolution_clock::now(); WorkfileResetBackendStats(); if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { @@ -268,10 +284,12 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { } if ((Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) && need_collect()) { - auto req = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_START); - set_query_info(&req, query_desc, false, true); - connector->set_metric_query(req, "started"); + query_msg->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); + *query_msg->mutable_start_time() = current_ts(); + set_query_plan(query_msg, query_desc); + if (connector->report_query(*query_msg, "started")) { + clear_big_fields(query_msg); + } } } @@ -284,21 +302,21 @@ void EventSender::executor_end(QueryDesc *query_desc) { return; } /* TODO: when querying via CURSOR this call freezes. Need to investigate. - To reproduce - uncomment it and run installchecks. It will freeze around join test. - Needs investigation - + To reproduce - uncomment it and run installchecks. It will freeze around + join test. Needs investigation + if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze() && Config::enable_cdbstats() && query_desc->estate->dispatcherState && query_desc->estate->dispatcherState->primaryResults) { cdbdisp_checkDispatchResult(query_desc->estate->dispatcherState, DISPATCH_WAIT_NONE); }*/ - auto req = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_END); - // NOTE: there are no cummulative spillinfo stats AFAIU, so no need to - // gather it here. It only makes sense when doing regular stat checks. - set_gp_metrics(req.mutable_query_metrics(), query_desc); - connector->set_metric_query(req, "ended"); + query_msg->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); + *query_msg->mutable_end_time() = current_ts(); + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); + if (connector->report_query(*query_msg, "ended")) { + query_msg->Clear(); + } } void EventSender::collect_query_submit(QueryDesc *query_desc) { @@ -306,10 +324,14 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { return; } if (need_collect()) { - auto req = + *query_msg = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); - set_query_info(&req, query_desc, true, false); - connector->set_metric_query(req, "submit"); + *query_msg->mutable_submit_time() = current_ts(); + set_query_info(query_msg, query_desc); + set_query_text(query_msg, query_desc); + if (connector->report_query(*query_msg, "submit")) { + clear_big_fields(query_msg); + } } } @@ -319,20 +341,25 @@ void EventSender::collect_query_done(QueryDesc *query_desc, return; } if (need_collect()) { - auto req = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_DONE); - connector->set_metric_query(req, status); + query_msg->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); + if (connector->report_query(*query_msg, status)) { + clear_big_fields(query_msg); + } } } EventSender::EventSender() { if (Config::enable_collector() && !Config::filter_user(get_user_name())) { + query_msg = new yagpcc::SetQueryReq(); try { - connector = new GrpcConnector(); + connector = new UDSConnector(); } catch (const std::exception &e) { ereport(INFO, (errmsg("Unable to start query tracing %s", e.what()))); } } } -EventSender::~EventSender() { delete connector; } \ No newline at end of file +EventSender::~EventSender() { + delete query_msg; + delete connector; +} \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index 2af8b7ffa03..161bf6ce037 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -1,9 +1,14 @@ #pragma once #include +#include #include -class GrpcConnector; +extern "C" { +#include "utils/metrics_utils.h" +} + +class UDSConnector; struct QueryDesc; namespace yagpcc { class SetQueryReq; @@ -23,6 +28,7 @@ class EventSender { private: void collect_query_submit(QueryDesc *query_desc); void collect_query_done(QueryDesc *query_desc, const std::string &status); - GrpcConnector *connector = nullptr; + UDSConnector *connector = nullptr; int nesting_level = 0; + yagpcc::SetQueryReq *query_msg; }; \ No newline at end of file diff --git a/src/GrpcConnector.cpp b/src/GrpcConnector.cpp deleted file mode 100644 index 73c1944fa04..00000000000 --- a/src/GrpcConnector.cpp +++ /dev/null @@ -1,133 +0,0 @@ -#include "GrpcConnector.h" -#include "Config.h" -#include "yagpcc_set_service.grpc.pb.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern "C" { -#include "postgres.h" -#include "cdb/cdbvars.h" -} - -/* - * Set up the thread signal mask, we don't want to run our signal handlers - * in downloading and uploading threads. - */ -static void MaskThreadSignals() { - sigset_t sigs; - - if (pthread_equal(main_tid, pthread_self())) { - ereport(ERROR, (errmsg("thread_mask is called from main thread!"))); - return; - } - - sigemptyset(&sigs); - - /* make our thread to ignore these signals (which should allow that they be - * delivered to the main thread) */ - sigaddset(&sigs, SIGHUP); - sigaddset(&sigs, SIGINT); - sigaddset(&sigs, SIGTERM); - sigaddset(&sigs, SIGALRM); - sigaddset(&sigs, SIGUSR1); - sigaddset(&sigs, SIGUSR2); - - pthread_sigmask(SIG_BLOCK, &sigs, NULL); -} - -class GrpcConnector::Impl { -public: - Impl() : SOCKET_FILE("unix://" + Config::uds_path()) { - GOOGLE_PROTOBUF_VERIFY_VERSION; - channel = - grpc::CreateChannel(SOCKET_FILE, grpc::InsecureChannelCredentials()); - stub = yagpcc::SetQueryInfo::NewStub(channel); - connected = true; - reconnected = false; - done = false; - reconnect_thread = std::thread(&Impl::reconnect, this); - } - - ~Impl() { - done = true; - cv.notify_one(); - reconnect_thread.join(); - } - - yagpcc::MetricResponse set_metric_query(const yagpcc::SetQueryReq &req, - const std::string &event) { - yagpcc::MetricResponse response; - if (!connected) { - response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); - response.set_error_text( - "Not tracing this query because grpc connection has been lost"); - return response; - } else if (reconnected) { - reconnected = false; - ereport(LOG, (errmsg("GRPC connection is restored"))); - } - grpc::ClientContext context; - int timeout = Gp_role == GP_ROLE_DISPATCH ? 500 : 250; - auto deadline = - std::chrono::system_clock::now() + std::chrono::milliseconds(timeout); - context.set_deadline(deadline); - grpc::Status status = (stub->SetMetricQuery)(&context, req, &response); - if (!status.ok()) { - response.set_error_text("GRPC error: " + status.error_message() + "; " + - status.error_details()); - response.set_error_code(yagpcc::METRIC_RESPONSE_STATUS_CODE_ERROR); - ereport(LOG, (errmsg("Query {%d-%d-%d} %s tracing failed with error %s", - req.query_key().tmid(), req.query_key().ssid(), - req.query_key().ccnt(), event.c_str(), - response.error_text().c_str()))); - connected = false; - reconnected = false; - cv.notify_one(); - } - - return response; - } - -private: - const std::string SOCKET_FILE; - std::unique_ptr stub; - std::shared_ptr channel; - std::atomic_bool connected, reconnected, done; - std::thread reconnect_thread; - std::condition_variable cv; - std::mutex mtx; - - void reconnect() { - MaskThreadSignals(); - while (!done) { - { - std::unique_lock lock(mtx); - cv.wait(lock); - } - while (!connected && !done) { - auto deadline = - std::chrono::system_clock::now() + std::chrono::milliseconds(100); - connected = channel->WaitForConnected(deadline); - reconnected = connected.load(); - } - } - } -}; - -GrpcConnector::GrpcConnector() { impl = new Impl(); } - -GrpcConnector::~GrpcConnector() { delete impl; } - -yagpcc::MetricResponse -GrpcConnector::set_metric_query(const yagpcc::SetQueryReq &req, - const std::string &event) { - return impl->set_metric_query(req, event); -} \ No newline at end of file diff --git a/src/GrpcConnector.h b/src/GrpcConnector.h deleted file mode 100644 index 6571c626dfd..00000000000 --- a/src/GrpcConnector.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include "protos/yagpcc_set_service.pb.h" - -class GrpcConnector { -public: - GrpcConnector(); - ~GrpcConnector(); - yagpcc::MetricResponse set_metric_query(const yagpcc::SetQueryReq &req, - const std::string &event); - -private: - class Impl; - Impl *impl; -}; \ No newline at end of file diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp new file mode 100644 index 00000000000..339a5d4f374 --- /dev/null +++ b/src/UDSConnector.cpp @@ -0,0 +1,83 @@ +#include "UDSConnector.h" +#include "Config.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +extern "C" { +#include "postgres.h" +#include "cdb/cdbvars.h" +} + +UDSConnector::UDSConnector() : uds_path("unix://" + Config::uds_path()) { + GOOGLE_PROTOBUF_VERIFY_VERSION; +} + +static void inline log_tracing_failure(const yagpcc::SetQueryReq &req, + const std::string &event) { + ereport(LOG, + (errmsg("Query {%d-%d-%d} %s tracing failed with error %s", + req.query_key().tmid(), req.query_key().ssid(), + req.query_key().ccnt(), event.c_str(), strerror(errno)))); +} + +bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, + const std::string &event) { + sockaddr_un address; + address.sun_family = AF_UNIX; + strcpy(address.sun_path, uds_path.c_str()); + bool success = true; + auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (sockfd != -1) { + if (fcntl(sockfd, F_SETFL, O_NONBLOCK) != -1) { + if (connect(sockfd, (sockaddr *)&address, sizeof(address)) != -1) { + auto data_size = req.ByteSize(); + auto total_size = data_size + sizeof(uint32_t); + uint8_t *buf = (uint8_t *)palloc(total_size); + uint32_t *size_payload = (uint32_t *)buf; + *size_payload = data_size; + req.SerializeWithCachedSizesToArray(buf + sizeof(uint32_t)); + int64_t sent = 0, sent_total = 0; + do { + sent = send(sockfd, buf + sent_total, total_size - sent_total, + MSG_DONTWAIT); + sent_total += sent; + } while ( + sent > 0 && size_t(sent_total) != total_size && + // the line below is a small throttling hack: + // if a message does not fit a single packet, we take a nap + // before sending the next one. + // Otherwise, MSG_DONTWAIT send might overflow the UDS + (std::this_thread::sleep_for(std::chrono::milliseconds(1)), true)); + if (sent < 0) { + log_tracing_failure(req, event); + success = false; + } + pfree(buf); + } else { + // log the error and go on + log_tracing_failure(req, event); + success = false; + } + } else { + // That's a very important error that should never happen, so make it + // visible to an end-user and admins. + ereport(WARNING, + (errmsg("Unable to create non-blocking socket connection %s", + strerror(errno)))); + success = false; + } + close(sockfd); + } else { + // log the error and go on + log_tracing_failure(req, event); + success = false; + } + return success; +} \ No newline at end of file diff --git a/src/UDSConnector.h b/src/UDSConnector.h new file mode 100644 index 00000000000..574653023e6 --- /dev/null +++ b/src/UDSConnector.h @@ -0,0 +1,13 @@ +#pragma once + +#include "protos/yagpcc_set_service.pb.h" +#include + +class UDSConnector { +public: + UDSConnector(); + bool report_query(const yagpcc::SetQueryReq &req, const std::string &event); + +private: + const std::string uds_path; +}; \ No newline at end of file From 79337909d2941f43a66d6007dab25265007775de Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 2 Nov 2023 14:38:24 +0300 Subject: [PATCH 15/49] [yagp_hooks_collector] Fix missing query statuses after protobuf migration --- src/EventSender.cpp | 47 ++++++++++++++++++++++++++++----------------- src/EventSender.h | 2 +- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 834553a6187..45d72b93e48 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -230,16 +230,10 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { // no-op: executor_after_start is enough break; case METRICS_QUERY_DONE: - collect_query_done(reinterpret_cast(arg), "done"); - break; case METRICS_QUERY_ERROR: - collect_query_done(reinterpret_cast(arg), "error"); - break; case METRICS_QUERY_CANCELING: - collect_query_done(reinterpret_cast(arg), "calcelling"); - break; case METRICS_QUERY_CANCELED: - collect_query_done(reinterpret_cast(arg), "cancelled"); + collect_query_done(reinterpret_cast(arg), status); break; case METRICS_INNER_QUERY_DONE: // TODO @@ -320,10 +314,7 @@ void EventSender::executor_end(QueryDesc *query_desc) { } void EventSender::collect_query_submit(QueryDesc *query_desc) { - if (!connector) { - return; - } - if (need_collect()) { + if (connector && need_collect()) { *query_msg = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); *query_msg->mutable_submit_time() = current_ts(); @@ -336,13 +327,33 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { } void EventSender::collect_query_done(QueryDesc *query_desc, - const std::string &status) { - if (!connector) { - return; - } - if (need_collect()) { - query_msg->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); - if (connector->report_query(*query_msg, status)) { + QueryMetricsStatus status) { + if (connector && need_collect()) { + yagpcc::QueryStatus query_status; + std::string msg; + switch (status) { + case METRICS_QUERY_DONE: + query_status = yagpcc::QueryStatus::QUERY_STATUS_DONE; + msg = "done"; + break; + case METRICS_QUERY_ERROR: + query_status = yagpcc::QueryStatus::QUERY_STATUS_ERROR; + msg = "error"; + break; + case METRICS_QUERY_CANCELING: + query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELLING; + msg = "cancelling"; + break; + case METRICS_QUERY_CANCELED: + query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELED; + msg = "cancelled"; + break; + default: + ereport(FATAL, (errmsg("Unexpected query status in query_done hook: %d", + status))); + } + query_msg->set_query_status(query_status); + if (connector->report_query(*query_msg, msg)) { clear_big_fields(query_msg); } } diff --git a/src/EventSender.h b/src/EventSender.h index 161bf6ce037..0e8985873b6 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -27,7 +27,7 @@ class EventSender { private: void collect_query_submit(QueryDesc *query_desc); - void collect_query_done(QueryDesc *query_desc, const std::string &status); + void collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status); UDSConnector *connector = nullptr; int nesting_level = 0; yagpcc::SetQueryReq *query_msg; From 6f416a9cd3de80567cf39b8c7110bd80478c4ce2 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 13 Nov 2023 15:38:31 +0300 Subject: [PATCH 16/49] [yagp_hooks_collector] Add stat_messages() runtime statistics view Add SQL functions stat_messages() and stat_messages_reset() exposing per-segment UDS transport counters: total_messages, send_failures, connection_failures, other_errors, max_message_size. --- sql/yagp-hooks-collector--1.0.sql | 2 - sql/yagp-hooks-collector--unpackaged--1.0.sql | 2 - sql/yagp_hooks_collector--1.0.sql | 55 +++++++++++ src/UDSConnector.cpp | 13 ++- src/UDSConnector.h | 3 - src/YagpStat.cpp | 91 +++++++++++++++++++ src/YagpStat.h | 21 +++++ src/hook_wrappers.cpp | 52 ++++++++++- src/hook_wrappers.h | 2 + src/yagp_hooks_collector.c | 13 ++- ...or.control => yagp_hooks_collector.control | 4 +- 11 files changed, 242 insertions(+), 16 deletions(-) delete mode 100644 sql/yagp-hooks-collector--1.0.sql delete mode 100644 sql/yagp-hooks-collector--unpackaged--1.0.sql create mode 100644 sql/yagp_hooks_collector--1.0.sql create mode 100644 src/YagpStat.cpp create mode 100644 src/YagpStat.h rename yagp-hooks-collector.control => yagp_hooks_collector.control (61%) diff --git a/sql/yagp-hooks-collector--1.0.sql b/sql/yagp-hooks-collector--1.0.sql deleted file mode 100644 index f9ab15fb400..00000000000 --- a/sql/yagp-hooks-collector--1.0.sql +++ /dev/null @@ -1,2 +0,0 @@ --- complain if script is sourced in psql, rather than via CREATE EXTENSION -\echo Use '''CREATE EXTENSION "yagp-hooks-collector"''' to load this file. \quit diff --git a/sql/yagp-hooks-collector--unpackaged--1.0.sql b/sql/yagp-hooks-collector--unpackaged--1.0.sql deleted file mode 100644 index 0441c97bd84..00000000000 --- a/sql/yagp-hooks-collector--unpackaged--1.0.sql +++ /dev/null @@ -1,2 +0,0 @@ --- complain if script is sourced in psql, rather than via CREATE EXTENSION -\echo Use '''CREATE EXTENSION "uuid-cb" FROM unpackaged''' to load this file. \quit diff --git a/sql/yagp_hooks_collector--1.0.sql b/sql/yagp_hooks_collector--1.0.sql new file mode 100644 index 00000000000..88bbe4e0dc7 --- /dev/null +++ b/sql/yagp_hooks_collector--1.0.sql @@ -0,0 +1,55 @@ +/* yagp_hooks_collector--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION yagp_hooks_collector" to load this file. \quit + +CREATE FUNCTION __yagp_stat_messages_reset_f_on_master() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON MASTER; + +CREATE FUNCTION __yagp_stat_messages_reset_f_on_segments() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION yagp_stat_messages_reset() +RETURNS void +AS +$$ + SELECT __yagp_stat_messages_reset_f_on_master(); + SELECT __yagp_stat_messages_reset_f_on_segments(); +$$ +LANGUAGE SQL EXECUTE ON MASTER; + +CREATE FUNCTION __yagp_stat_messages_f_on_master() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION __yagp_stat_messages_f_on_segments() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE VIEW yagp_stat_messages AS + SELECT C.* + FROM __yagp_stat_messages_f_on_master() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) + UNION ALL + SELECT C.* + FROM __yagp_stat_messages_f_on_segments() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) +ORDER BY segid; diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index 339a5d4f374..b9088205250 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -1,5 +1,6 @@ #include "UDSConnector.h" #include "Config.h" +#include "YagpStat.h" #include #include @@ -15,9 +16,7 @@ extern "C" { #include "cdb/cdbvars.h" } -UDSConnector::UDSConnector() : uds_path("unix://" + Config::uds_path()) { - GOOGLE_PROTOBUF_VERIFY_VERSION; -} +UDSConnector::UDSConnector() { GOOGLE_PROTOBUF_VERIFY_VERSION; } static void inline log_tracing_failure(const yagpcc::SetQueryReq &req, const std::string &event) { @@ -31,7 +30,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, const std::string &event) { sockaddr_un address; address.sun_family = AF_UNIX; - strcpy(address.sun_path, uds_path.c_str()); + strcpy(address.sun_path, Config::uds_path().c_str()); bool success = true; auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0); if (sockfd != -1) { @@ -58,12 +57,16 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, if (sent < 0) { log_tracing_failure(req, event); success = false; + YagpStat::report_bad_send(total_size); + } else { + YagpStat::report_send(total_size); } pfree(buf); } else { // log the error and go on log_tracing_failure(req, event); success = false; + YagpStat::report_bad_connection(); } } else { // That's a very important error that should never happen, so make it @@ -72,12 +75,14 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, (errmsg("Unable to create non-blocking socket connection %s", strerror(errno)))); success = false; + YagpStat::report_error(); } close(sockfd); } else { // log the error and go on log_tracing_failure(req, event); success = false; + YagpStat::report_error(); } return success; } \ No newline at end of file diff --git a/src/UDSConnector.h b/src/UDSConnector.h index 574653023e6..42e0aa20968 100644 --- a/src/UDSConnector.h +++ b/src/UDSConnector.h @@ -7,7 +7,4 @@ class UDSConnector { public: UDSConnector(); bool report_query(const yagpcc::SetQueryReq &req, const std::string &event); - -private: - const std::string uds_path; }; \ No newline at end of file diff --git a/src/YagpStat.cpp b/src/YagpStat.cpp new file mode 100644 index 00000000000..879cde85212 --- /dev/null +++ b/src/YagpStat.cpp @@ -0,0 +1,91 @@ +#include "YagpStat.h" + +#include + +extern "C" { +#include "postgres.h" +#include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" +#include "storage/shmem.h" +#include "storage/spin.h" +} + +namespace { +struct ProtectedData { + slock_t mutex; + YagpStat::Data data; +}; +shmem_startup_hook_type prev_shmem_startup_hook = NULL; +ProtectedData *data = nullptr; + +void yagp_shmem_startup() { + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + bool found; + data = reinterpret_cast( + ShmemInitStruct("yagp_stat_messages", sizeof(ProtectedData), &found)); + if (!found) { + SpinLockInit(&data->mutex); + data->data = YagpStat::Data(); + } + LWLockRelease(AddinShmemInitLock); +} + +class LockGuard { +public: + LockGuard(slock_t *mutex) : mutex_(mutex) { SpinLockAcquire(mutex_); } + ~LockGuard() { SpinLockRelease(mutex_); } + +private: + slock_t *mutex_; +}; +} // namespace + +void YagpStat::init() { + if (!process_shared_preload_libraries_in_progress) + return; + RequestAddinShmemSpace(sizeof(ProtectedData)); + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = yagp_shmem_startup; +} + +void YagpStat::deinit() { shmem_startup_hook = prev_shmem_startup_hook; } + +void YagpStat::reset() { + LockGuard lg(&data->mutex); + data->data = YagpStat::Data(); +} + +void YagpStat::report_send(int32_t msg_size) { + LockGuard lg(&data->mutex); + data->data.total++; + data->data.max_message_size = std::max(msg_size, data->data.max_message_size); +} + +void YagpStat::report_bad_connection() { + LockGuard lg(&data->mutex); + data->data.total++; + data->data.failed_connects++; +} + +void YagpStat::report_bad_send(int32_t msg_size) { + LockGuard lg(&data->mutex); + data->data.total++; + data->data.failed_sends++; + data->data.max_message_size = std::max(msg_size, data->data.max_message_size); +} + +void YagpStat::report_error() { + LockGuard lg(&data->mutex); + data->data.total++; + data->data.failed_other++; +} + +YagpStat::Data YagpStat::get_stats() { + LockGuard lg(&data->mutex); + return data->data; +} + +bool YagpStat::loaded() { return data != nullptr; } diff --git a/src/YagpStat.h b/src/YagpStat.h new file mode 100644 index 00000000000..110b1fdcbb1 --- /dev/null +++ b/src/YagpStat.h @@ -0,0 +1,21 @@ +#pragma once + +#include + +class YagpStat { +public: + struct Data { + int64_t total, failed_sends, failed_connects, failed_other; + int32_t max_message_size; + }; + + static void init(); + static void deinit(); + static void reset(); + static void report_send(int32_t msg_size); + static void report_bad_connection(); + static void report_bad_send(int32_t msg_size); + static void report_error(); + static Data get_stats(); + static bool loaded(); +}; \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 66ba6547ce2..37f80385a6b 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -1,16 +1,17 @@ extern "C" { #include "postgres.h" +#include "funcapi.h" #include "executor/executor.h" #include "utils/elog.h" +#include "utils/builtins.h" #include "utils/metrics_utils.h" - #include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" - #include "tcop/utility.h" } #include "Config.h" +#include "YagpStat.h" #include "EventSender.h" #include "hook_wrappers.h" #include "stat_statements_parser/pg_stat_statements_ya_parser.h" @@ -39,6 +40,7 @@ static inline EventSender *get_sender() { void hooks_init() { Config::init(); + YagpStat::init(); previous_ExecutorStart_hook = ExecutorStart_hook; ExecutorStart_hook = ya_ExecutorStart_hook; previous_ExecutorRun_hook = ExecutorRun_hook; @@ -62,6 +64,7 @@ void hooks_deinit() { if (sender) { delete sender; } + YagpStat::deinit(); } void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { @@ -150,4 +153,49 @@ void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { if (previous_query_info_collect_hook) { (*previous_query_info_collect_hook)(status, arg); } +} + +static void check_stats_loaded() { + if (!YagpStat::loaded()) { + ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("yagp_hooks_collector must be loaded via " + "shared_preload_libraries"))); + } +} + +void yagp_functions_reset() { + check_stats_loaded(); + YagpStat::reset(); +} + +Datum yagp_functions_get(FunctionCallInfo fcinfo) { + const int ATTNUM = 6; + check_stats_loaded(); + auto stats = YagpStat::get_stats(); + TupleDesc tupdesc = CreateTemplateTupleDesc(ATTNUM, false); + TupleDescInitEntry(tupdesc, (AttrNumber)1, "segid", INT4OID, -1 /* typmod */, + 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber)2, "total_messages", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber)3, "send_failures", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber)4, "connection_failures", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber)5, "other_errors", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber)6, "max_message_size", INT4OID, + -1 /* typmod */, 0 /* attdim */); + tupdesc = BlessTupleDesc(tupdesc); + Datum values[ATTNUM]; + bool nulls[ATTNUM]; + MemSet(nulls, 0, sizeof(nulls)); + values[0] = Int32GetDatum(GpIdentity.segindex); + values[1] = Int64GetDatum(stats.total); + values[2] = Int64GetDatum(stats.failed_sends); + values[3] = Int64GetDatum(stats.failed_connects); + values[4] = Int64GetDatum(stats.failed_other); + values[5] = Int32GetDatum(stats.max_message_size); + HeapTuple tuple = heap_form_tuple(tupdesc, values, nulls); + Datum result = HeapTupleGetDatum(tuple); + PG_RETURN_DATUM(result); } \ No newline at end of file diff --git a/src/hook_wrappers.h b/src/hook_wrappers.h index 815fcb7cd51..c158f42cf1d 100644 --- a/src/hook_wrappers.h +++ b/src/hook_wrappers.h @@ -6,6 +6,8 @@ extern "C" { extern void hooks_init(); extern void hooks_deinit(); +extern void yagp_functions_reset(); +extern Datum yagp_functions_get(FunctionCallInfo fcinfo); #ifdef __cplusplus } diff --git a/src/yagp_hooks_collector.c b/src/yagp_hooks_collector.c index 69475ea5079..2a9e7328e6d 100644 --- a/src/yagp_hooks_collector.c +++ b/src/yagp_hooks_collector.c @@ -1,6 +1,6 @@ #include "postgres.h" #include "cdb/cdbvars.h" -#include "fmgr.h" +#include "utils/builtins.h" #include "hook_wrappers.h" @@ -8,6 +8,8 @@ PG_MODULE_MAGIC; void _PG_init(void); void _PG_fini(void); +PG_FUNCTION_INFO_V1(yagp_stat_messages_reset); +PG_FUNCTION_INFO_V1(yagp_stat_messages); void _PG_init(void) { if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { @@ -20,3 +22,12 @@ void _PG_fini(void) { hooks_deinit(); } } + +Datum yagp_stat_messages_reset(PG_FUNCTION_ARGS) { + yagp_functions_reset(); + PG_RETURN_VOID(); +} + +Datum yagp_stat_messages(PG_FUNCTION_ARGS) { + return yagp_functions_get(fcinfo); +} \ No newline at end of file diff --git a/yagp-hooks-collector.control b/yagp_hooks_collector.control similarity index 61% rename from yagp-hooks-collector.control rename to yagp_hooks_collector.control index 82c189a88fc..b5539dd6462 100644 --- a/yagp-hooks-collector.control +++ b/yagp_hooks_collector.control @@ -1,5 +1,5 @@ -# yagp-hooks-collector extension +# yagp_hooks_collector extension comment = 'Intercept query and plan execution hooks and report them to Yandex GPCC agents' default_version = '1.0' -module_pathname = '$libdir/yagp-hooks-collector' +module_pathname = '$libdir/yagp_hooks_collector' superuser = true From e941608b1d58787f320cb67a5b4121eb66b8a84e Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 15 Nov 2023 13:37:10 +0300 Subject: [PATCH 17/49] [yagp_hooks_collector] Fix message lifecycle ordering and memory leaks Move query message cleanup to the correct lifecycle point. Finalize fields before sending DONE event. Fix protobuf message memory leaks. --- src/EventSender.cpp | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 45d72b93e48..e3be58b194e 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -39,12 +39,17 @@ namespace { std::string *get_user_name() { const char *username = GetConfigOption("session_authorization", false, false); + // username is not to be freed return username ? new std::string(username) : nullptr; } std::string *get_db_name() { char *dbname = get_database_name(MyDatabaseId); - std::string *result = dbname ? new std::string(dbname) : nullptr; + std::string *result = nullptr; + if (dbname) { + result = new std::string(dbname); + pfree(dbname); + } return result; } @@ -58,8 +63,7 @@ std::string *get_rg_name() { char *rgname = GetResGroupNameForId(groupId); if (rgname == nullptr) return nullptr; - auto result = new std::string(rgname); - return result; + return new std::string(rgname); } google::protobuf::Timestamp current_ts() { @@ -97,8 +101,12 @@ ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { } void set_plan_text(std::string *plan_text, QueryDesc *query_desc) { + MemoryContext oldcxt = + MemoryContextSwitchTo(query_desc->estate->es_query_cxt); auto es = get_explain_state(query_desc, true); *plan_text = std::string(es.str->data, es.str->len); + pfree(es.str->data); + MemoryContextSwitchTo(oldcxt); } void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { @@ -251,10 +259,6 @@ void EventSender::executor_before_start(QueryDesc *query_desc, if (!need_collect()) { return; } - if (query_msg->has_query_key()) { - connector->report_query(*query_msg, "previous query"); - query_msg->Clear(); - } query_start_time = std::chrono::high_resolution_clock::now(); WorkfileResetBackendStats(); if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { @@ -263,11 +267,12 @@ void EventSender::executor_before_start(QueryDesc *query_desc, query_desc->instrument_options |= INSTRUMENT_TIMER; if (Config::enable_cdbstats()) { query_desc->instrument_options |= INSTRUMENT_CDB; - - instr_time starttime; - INSTR_TIME_SET_CURRENT(starttime); - query_desc->showstatctx = - cdbexplain_showExecStatsBegin(query_desc, starttime); + if (!query_desc->showstatctx) { + instr_time starttime; + INSTR_TIME_SET_CURRENT(starttime); + query_desc->showstatctx = + cdbexplain_showExecStatsBegin(query_desc, starttime); + } } } } @@ -309,12 +314,16 @@ void EventSender::executor_end(QueryDesc *query_desc) { *query_msg->mutable_end_time() = current_ts(); set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); if (connector->report_query(*query_msg, "ended")) { - query_msg->Clear(); + clear_big_fields(query_msg); } } void EventSender::collect_query_submit(QueryDesc *query_desc) { if (connector && need_collect()) { + if (query_msg && query_msg->has_query_key()) { + connector->report_query(*query_msg, "previous query"); + query_msg->Clear(); + } *query_msg = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); *query_msg->mutable_submit_time() = current_ts(); @@ -354,7 +363,7 @@ void EventSender::collect_query_done(QueryDesc *query_desc, } query_msg->set_query_status(query_status); if (connector->report_query(*query_msg, msg)) { - clear_big_fields(query_msg); + query_msg->Clear(); } } } From fb41ea5754ee50509e4a2f6eb73a072dad01a91f Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 26 Dec 2023 16:36:26 +0300 Subject: [PATCH 18/49] [yagp_hooks_collector] Improve query_id and resource group resolution Use core query_id from Query instead of a separate hash. Resolve resource group from the current session rather than the role default. --- src/EventSender.cpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index e3be58b194e..21c2e2117a3 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -54,10 +54,7 @@ std::string *get_db_name() { } std::string *get_rg_name() { - auto userId = GetUserId(); - if (!OidIsValid(userId)) - return nullptr; - auto groupId = GetResGroupIdForRole(userId); + auto groupId = ResGroupGetGroupIdBySessionId(MySessionState->sessionId); if (!OidIsValid(groupId)) return nullptr; char *rgname = GetResGroupNameForId(groupId); @@ -119,13 +116,7 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); *qi->mutable_template_plan_text() = std::string(norm_plan->data); qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); - // TODO: For now assume queryid equal to planid, which is wrong. The - // reason for doing so this bug - // https://github.com/greenplum-db/gpdb/pull/15385 (ORCA loses - // pg_stat_statements` queryid during planning phase). Need to fix it - // upstream, cherry-pick and bump gp - // qi->set_query_id(query_desc->plannedstmt->queryId); - qi->set_query_id(qi->plan_id()); + qi->set_query_id(query_desc->plannedstmt->queryId); } } From 14ca85b86d1c8dc402d231289474b7e5210c7745 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Fri, 17 May 2024 15:55:27 +0300 Subject: [PATCH 19/49] [yagp_hooks_collector] Add nested query tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track query nesting level using a per-query key (tmid, ssid, ccnt, nesting_level, query_desc_addr). Maintain a state machine per active query to correctly sequence submit→start→end→done across nesting boundaries. --- protos/yagpcc_metrics.proto | 10 ++- protos/yagpcc_set_service.proto | 32 ++++++-- src/Config.cpp | 7 ++ src/Config.h | 1 + src/EventSender.cpp | 138 ++++++++++++++++++++++++++------ src/EventSender.h | 26 +++++- src/hook_wrappers.cpp | 2 +- 7 files changed, 178 insertions(+), 38 deletions(-) diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index 2d20d3c46d9..68492732ece 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -36,6 +36,11 @@ message QueryInfo { string rsgname = 10; } +message AdditionalQueryInfo { + int64 nested_level = 1; + string error_message = 2; +} + enum PlanGenerator { PLAN_GENERATOR_UNSPECIFIED = 0; @@ -95,7 +100,7 @@ message MetricInstrumentation { uint64 nloops = 2; /* # of run cycles for this node */ uint64 tuplecount = 3; /* Tuples emitted so far this cycle */ double firsttuple = 4; /* Time for first tuple of this cycle */ - double startup = 5; /* Total startup time (in seconds) */ + double startup = 5; /* Total startup time (in seconds) (optimiser's cost estimation) */ double total = 6; /* Total total time (in seconds) */ uint64 shared_blks_hit = 7; /* shared blocks stats*/ uint64 shared_blks_read = 8; @@ -105,12 +110,13 @@ message MetricInstrumentation { uint64 local_blks_read = 12; uint64 local_blks_dirtied = 13; uint64 local_blks_written = 14; - uint64 temp_blks_read = 15; /* temporary tables read stat */ + uint64 temp_blks_read = 15; /* temporary tables read stat */ uint64 temp_blks_written = 16; double blk_read_time = 17; /* measured read/write time */ double blk_write_time = 18; NetworkStat sent = 19; NetworkStat received = 20; + double startup_time = 21; /* real query startup time (planning + queue time) */ } message SpillInfo { diff --git a/protos/yagpcc_set_service.proto b/protos/yagpcc_set_service.proto index e8fc7aaa99d..0b9e34df49d 100644 --- a/protos/yagpcc_set_service.proto +++ b/protos/yagpcc_set_service.proto @@ -9,17 +9,35 @@ package yagpcc; option java_outer_classname = "SegmentYAGPCCAS"; option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/agent_segment;greenplum"; +service SetQueryInfo { + rpc SetMetricPlanNode (SetPlanNodeReq) returns (MetricResponse) {} + + rpc SetMetricQuery (SetQueryReq) returns (MetricResponse) {} +} + +message MetricResponse { + MetricResponseStatusCode error_code = 1; + string error_text = 2; +} + +enum MetricResponseStatusCode { + METRIC_RESPONSE_STATUS_CODE_UNSPECIFIED = 0; + METRIC_RESPONSE_STATUS_CODE_SUCCESS = 1; + METRIC_RESPONSE_STATUS_CODE_ERROR = 2; +} + message SetQueryReq { - QueryStatus query_status = 1; - google.protobuf.Timestamp datetime = 2; - QueryKey query_key = 3; - SegmentKey segment_key = 4; - QueryInfo query_info = 5; - GPMetrics query_metrics = 6; - repeated MetricPlan plan_tree = 7; + QueryStatus query_status = 1; + google.protobuf.Timestamp datetime = 2; + QueryKey query_key = 3; + SegmentKey segment_key = 4; + QueryInfo query_info = 5; + GPMetrics query_metrics = 6; + repeated MetricPlan plan_tree = 7; google.protobuf.Timestamp submit_time = 8; google.protobuf.Timestamp start_time = 9; google.protobuf.Timestamp end_time = 10; + AdditionalQueryInfo add_info = 11; } message SetPlanNodeReq { diff --git a/src/Config.cpp b/src/Config.cpp index c5c2c15f7e9..1bbad9a6ea3 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -13,6 +13,7 @@ static char *guc_uds_path = nullptr; static bool guc_enable_analyze = true; static bool guc_enable_cdbstats = true; static bool guc_enable_collector = true; +static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; static std::unique_ptr> ignored_users = nullptr; @@ -36,6 +37,11 @@ void Config::init() { &guc_enable_cdbstats, true, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + DefineCustomBoolVariable( + "yagpcc.report_nested_queries", "Collect stats on nested queries", 0LL, + &guc_report_nested_queries, true, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + DefineCustomStringVariable( "yagpcc.ignored_users_list", "Make yagpcc ignore queries issued by given users", 0LL, @@ -47,6 +53,7 @@ std::string Config::uds_path() { return guc_uds_path; } bool Config::enable_analyze() { return guc_enable_analyze; } bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } +bool Config::report_nested_queries() { return guc_report_nested_queries; } bool Config::filter_user(const std::string *username) { if (!ignored_users) { diff --git a/src/Config.h b/src/Config.h index 999d0300640..15f425be67c 100644 --- a/src/Config.h +++ b/src/Config.h @@ -10,4 +10,5 @@ class Config { static bool enable_cdbstats(); static bool enable_collector(); static bool filter_user(const std::string *username); + static bool report_nested_queries(); }; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 21c2e2117a3..116805d0646 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -10,6 +10,7 @@ extern "C" { #include "postgres.h" #include "access/hash.h" +#include "access/xact.h" #include "commands/dbcommands.h" #include "commands/explain.h" #include "commands/resgroupcmds.h" @@ -30,11 +31,6 @@ extern "C" { #include "EventSender.h" -#define need_collect() \ - (nesting_level == 0 && gp_command_count != 0 && \ - query_desc->sourceText != nullptr && Config::enable_collector() && \ - !Config::filter_user(get_user_name())) - namespace { std::string *get_user_name() { @@ -146,6 +142,11 @@ void set_query_info(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { } } +void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level) { + auto aqi = req->mutable_add_info(); + aqi->set_nested_level(nesting_level); +} + void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, QueryDesc *query_desc) { auto instrument = query_desc->planstate->instrument; @@ -210,6 +211,19 @@ yagpcc::SetQueryReq create_query_req(QueryDesc *query_desc, return req; } +inline bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { + return (query_desc->gpmon_pkt && + query_desc->gpmon_pkt->u.qexec.key.tmid == 0) || + nesting_level == 0; +} + +inline bool need_collect(QueryDesc *query_desc, int nesting_level) { + return (Config::report_nested_queries() || + is_top_level_query(query_desc, nesting_level)) && + gp_command_count != 0 && query_desc->sourceText != nullptr && + Config::enable_collector() && !Config::filter_user(get_user_name()); +} + } // namespace void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { @@ -223,7 +237,8 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { // TODO break; case METRICS_QUERY_SUBMIT: - collect_query_submit(reinterpret_cast(arg)); + // don't collect anything here. We will fake this call in ExecutorStart as + // it really makes no difference. Just complicates things break; case METRICS_QUERY_START: // no-op: executor_after_start is enough @@ -232,10 +247,8 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { case METRICS_QUERY_ERROR: case METRICS_QUERY_CANCELING: case METRICS_QUERY_CANCELED: - collect_query_done(reinterpret_cast(arg), status); - break; case METRICS_INNER_QUERY_DONE: - // TODO + collect_query_done(reinterpret_cast(arg), status); break; default: ereport(FATAL, (errmsg("Unknown query status: %d", status))); @@ -247,9 +260,10 @@ void EventSender::executor_before_start(QueryDesc *query_desc, if (!connector) { return; } - if (!need_collect()) { + if (!need_collect(query_desc, nesting_level)) { return; } + collect_query_submit(query_desc); query_start_time = std::chrono::high_resolution_clock::now(); WorkfileResetBackendStats(); if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { @@ -273,8 +287,10 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { return; } if ((Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) && - need_collect()) { - query_msg->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); + need_collect(query_desc, nesting_level)) { + auto *query = get_query_message(query_desc); + update_query_state(query_desc, query, QueryState::START); + auto query_msg = query->message; *query_msg->mutable_start_time() = current_ts(); set_query_plan(query_msg, query_desc); if (connector->report_query(*query_msg, "started")) { @@ -287,7 +303,7 @@ void EventSender::executor_end(QueryDesc *query_desc) { if (!connector) { return; } - if (!need_collect() || + if (!need_collect(query_desc, nesting_level) || (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE)) { return; } @@ -301,7 +317,13 @@ void EventSender::executor_end(QueryDesc *query_desc) { cdbdisp_checkDispatchResult(query_desc->estate->dispatcherState, DISPATCH_WAIT_NONE); }*/ - query_msg->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); + auto *query = get_query_message(query_desc); + if (query->state == UNKNOWN && !Config::report_nested_queries()) { + // COMMIT/ROLLBACK of a nested query. Happens in top-level + return; + } + update_query_state(query_desc, query, QueryState::END); + auto query_msg = query->message; *query_msg->mutable_end_time() = current_ts(); set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); if (connector->report_query(*query_msg, "ended")) { @@ -310,15 +332,15 @@ void EventSender::executor_end(QueryDesc *query_desc) { } void EventSender::collect_query_submit(QueryDesc *query_desc) { - if (connector && need_collect()) { - if (query_msg && query_msg->has_query_key()) { - connector->report_query(*query_msg, "previous query"); - query_msg->Clear(); - } + if (connector && need_collect(query_desc, nesting_level)) { + auto *query = get_query_message(query_desc); + query->state = QueryState::SUBMIT; + auto query_msg = query->message; *query_msg = create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); *query_msg->mutable_submit_time() = current_ts(); set_query_info(query_msg, query_desc); + set_qi_nesting_level(query_msg, query_desc->gpmon_pkt->u.qexec.key.tmid); set_query_text(query_msg, query_desc); if (connector->report_query(*query_msg, "submit")) { clear_big_fields(query_msg); @@ -328,11 +350,12 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { void EventSender::collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status) { - if (connector && need_collect()) { + if (connector && need_collect(query_desc, nesting_level)) { yagpcc::QueryStatus query_status; std::string msg; switch (status) { case METRICS_QUERY_DONE: + case METRICS_INNER_QUERY_DONE: query_status = yagpcc::QueryStatus::QUERY_STATUS_DONE; msg = "done"; break; @@ -352,16 +375,26 @@ void EventSender::collect_query_done(QueryDesc *query_desc, ereport(FATAL, (errmsg("Unexpected query status in query_done hook: %d", status))); } - query_msg->set_query_status(query_status); - if (connector->report_query(*query_msg, msg)) { - query_msg->Clear(); + auto *query = get_query_message(query_desc); + if (query->state != UNKNOWN || Config::report_nested_queries()) { + update_query_state(query_desc, query, QueryState::DONE, + query_status == + yagpcc::QueryStatus::QUERY_STATUS_DONE); + auto query_msg = query->message; + query_msg->set_query_status(query_status); + connector->report_query(*query_msg, msg); + } else { + // otherwise it`s a nested query being committed/aborted at top level + // and we should ignore it } + query_msgs.erase({query_desc->gpmon_pkt->u.qexec.key.ccnt, + query_desc->gpmon_pkt->u.qexec.key.tmid}); + pfree(query_desc->gpmon_pkt); } } EventSender::EventSender() { if (Config::enable_collector() && !Config::filter_user(get_user_name())) { - query_msg = new yagpcc::SetQueryReq(); try { connector = new UDSConnector(); } catch (const std::exception &e) { @@ -371,6 +404,59 @@ EventSender::EventSender() { } EventSender::~EventSender() { - delete query_msg; delete connector; -} \ No newline at end of file + for (auto iter = query_msgs.begin(); iter != query_msgs.end(); ++iter) { + delete iter->second.message; + } +} + +// That's basically a very simplistic state machine to fix or highlight any bugs +// coming from GP +void EventSender::update_query_state(QueryDesc *query_desc, QueryItem *query, + QueryState new_state, bool success) { + if (query->state == UNKNOWN) { + collect_query_submit(query_desc); + } + switch (new_state) { + case QueryState::SUBMIT: + Assert(false); + break; + case QueryState::START: + if (query->state == QueryState::SUBMIT) { + query->message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); + } else { + Assert(false); + } + break; + case QueryState::END: + Assert(query->state == QueryState::START || IsAbortInProgress()); + query->message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); + break; + case QueryState::DONE: + Assert(query->state == QueryState::END || !success); + query->message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); + break; + default: + Assert(false); + } + query->state = new_state; +} + +EventSender::QueryItem *EventSender::get_query_message(QueryDesc *query_desc) { + if (query_desc->gpmon_pkt == nullptr || + query_msgs.find({query_desc->gpmon_pkt->u.qexec.key.ccnt, + query_desc->gpmon_pkt->u.qexec.key.tmid}) == + query_msgs.end()) { + query_desc->gpmon_pkt = (gpmon_packet_t *)palloc0(sizeof(gpmon_packet_t)); + query_desc->gpmon_pkt->u.qexec.key.ccnt = gp_command_count; + query_desc->gpmon_pkt->u.qexec.key.tmid = nesting_level; + query_msgs.insert({{gp_command_count, nesting_level}, + QueryItem(UNKNOWN, new yagpcc::SetQueryReq())}); + } + return &query_msgs.at({query_desc->gpmon_pkt->u.qexec.key.ccnt, + query_desc->gpmon_pkt->u.qexec.key.tmid}); +} + +EventSender::QueryItem::QueryItem(EventSender::QueryState st, + yagpcc::SetQueryReq *msg) + : state(st), message(msg) {} \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index 0e8985873b6..55b8daf9a91 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include extern "C" { @@ -26,9 +26,31 @@ class EventSender { ~EventSender(); private: + enum QueryState { UNKNOWN, SUBMIT, START, END, DONE }; + + struct QueryItem { + QueryState state = QueryState::UNKNOWN; + yagpcc::SetQueryReq *message = nullptr; + + QueryItem(QueryState st, yagpcc::SetQueryReq *msg); + }; + + struct pair_hash { + std::size_t operator()(const std::pair &p) const { + auto h1 = std::hash{}(p.first); + auto h2 = std::hash{}(p.second); + return h1 ^ h2; + } + }; + + void update_query_state(QueryDesc *query_desc, QueryItem *query, + QueryState new_state, bool success = true); + QueryItem *get_query_message(QueryDesc *query_desc); void collect_query_submit(QueryDesc *query_desc); void collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status); + void cleanup_messages(); + UDSConnector *connector = nullptr; int nesting_level = 0; - yagpcc::SetQueryReq *query_msg; + std::unordered_map, QueryItem, pair_hash> query_msgs; }; \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 37f80385a6b..caf38a10f6e 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -56,9 +56,9 @@ void hooks_init() { void hooks_deinit() { ExecutorStart_hook = previous_ExecutorStart_hook; + ExecutorEnd_hook = previous_ExecutorEnd_hook; ExecutorRun_hook = previous_ExecutorRun_hook; ExecutorFinish_hook = previous_ExecutorFinish_hook; - ExecutorEnd_hook = previous_ExecutorEnd_hook; query_info_collect_hook = previous_query_info_collect_hook; stat_statements_parser_deinit(); if (sender) { From a9048d4f352d9e45e05d5b8331818b9337bf6b18 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 28 May 2024 15:25:35 +0300 Subject: [PATCH 20/49] [yagp_hooks_collector] Add configurable text field trimming Trim query text and plan text to max_text_size and max_plan_size limits. --- src/Config.cpp | 11 ++++++++++- src/Config.h | 1 + src/EventSender.cpp | 12 +++++++++--- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/Config.cpp b/src/Config.cpp index 1bbad9a6ea3..c07a6948694 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -1,7 +1,8 @@ #include "Config.h" -#include +#include #include #include +#include extern "C" { #include "postgres.h" @@ -15,6 +16,7 @@ static bool guc_enable_cdbstats = true; static bool guc_enable_collector = true; static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; +static int guc_max_text_size = 1024; // in KB static std::unique_ptr> ignored_users = nullptr; void Config::init() { @@ -47,6 +49,12 @@ void Config::init() { "Make yagpcc ignore queries issued by given users", 0LL, &guc_ignored_users, "gpadmin,repl,gpperfmon,monitor", PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomIntVariable( + "yagpcc.max_text_size", + "Make yagpcc trim plan and query texts longer than configured size", NULL, + &guc_max_text_size, 1024, 0, INT_MAX / 1024, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_KB, NULL, NULL, NULL); } std::string Config::uds_path() { return guc_uds_path; } @@ -54,6 +62,7 @@ bool Config::enable_analyze() { return guc_enable_analyze; } bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } bool Config::report_nested_queries() { return guc_report_nested_queries; } +size_t Config::max_text_size() { return guc_max_text_size * 1024; } bool Config::filter_user(const std::string *username) { if (!ignored_users) { diff --git a/src/Config.h b/src/Config.h index 15f425be67c..f806bc0dbf5 100644 --- a/src/Config.h +++ b/src/Config.h @@ -11,4 +11,5 @@ class Config { static bool enable_collector(); static bool filter_user(const std::string *username); static bool report_nested_queries(); + static size_t max_text_size(); }; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 116805d0646..4de5564533b 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -93,11 +93,15 @@ ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { return es; } +inline std::string char_to_trimmed_str(const char *str, size_t len) { + return std::string(str, std::min(len, Config::max_text_size())); +} + void set_plan_text(std::string *plan_text, QueryDesc *query_desc) { MemoryContext oldcxt = MemoryContextSwitchTo(query_desc->estate->es_query_cxt); auto es = get_explain_state(query_desc, true); - *plan_text = std::string(es.str->data, es.str->len); + *plan_text = char_to_trimmed_str(es.str->data, es.str->len); pfree(es.str->data); MemoryContextSwitchTo(oldcxt); } @@ -119,9 +123,11 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->sourceText) { auto qi = req->mutable_query_info(); - *qi->mutable_query_text() = query_desc->sourceText; + *qi->mutable_query_text() = char_to_trimmed_str( + query_desc->sourceText, strlen(query_desc->sourceText)); char *norm_query = gen_normquery(query_desc->sourceText); - *qi->mutable_template_query_text() = std::string(norm_query); + *qi->mutable_template_query_text() = + char_to_trimmed_str(norm_query, strlen(norm_query)); } } From a86107110012ee32fdf9911410c477f960005354 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Tue, 28 May 2024 16:19:58 +0300 Subject: [PATCH 21/49] [yagp_hooks_collector] Add error message reporting for failed queries Capture elog error message at the done event for ERROR and CANCELED statuses. Properly send accumulated runtime metrics before teardown. Drop the intermediate CANCELLING event. --- src/EventSender.cpp | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 4de5564533b..8d202991986 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -153,6 +153,12 @@ void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level) { aqi->set_nested_level(nesting_level); } +void set_qi_error_message(yagpcc::SetQueryReq *req) { + auto aqi = req->mutable_add_info(); + auto error = elog_message(); + *aqi->mutable_error_message() = char_to_trimmed_str(error, strlen(error)); +} + void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, QueryDesc *query_desc) { auto instrument = query_desc->planstate->instrument; @@ -249,9 +255,13 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { case METRICS_QUERY_START: // no-op: executor_after_start is enough break; + case METRICS_QUERY_CANCELING: + // it appears we're unly interested in the actual CANCELED event. + // for now we will ignore CANCELING state unless otherwise requested from + // end users + break; case METRICS_QUERY_DONE: case METRICS_QUERY_ERROR: - case METRICS_QUERY_CANCELING: case METRICS_QUERY_CANCELED: case METRICS_INNER_QUERY_DONE: collect_query_done(reinterpret_cast(arg), status); @@ -370,6 +380,9 @@ void EventSender::collect_query_done(QueryDesc *query_desc, msg = "error"; break; case METRICS_QUERY_CANCELING: + // at the moment we don't track this event, but I`ll leave this code here + // just in case + Assert(false); query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELLING; msg = "cancelling"; break; @@ -382,12 +395,21 @@ void EventSender::collect_query_done(QueryDesc *query_desc, status))); } auto *query = get_query_message(query_desc); + auto prev_state = query->state; if (query->state != UNKNOWN || Config::report_nested_queries()) { update_query_state(query_desc, query, QueryState::DONE, query_status == yagpcc::QueryStatus::QUERY_STATUS_DONE); auto query_msg = query->message; query_msg->set_query_status(query_status); + if (status == METRICS_QUERY_ERROR) { + set_qi_error_message(query_msg); + } + if (prev_state == START) { + // We've missed ExecutorEnd call due to query cancel or error. It's + // fine, but now we need to collect and report execution stats + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); + } connector->report_query(*query_msg, msg); } else { // otherwise it`s a nested query being committed/aborted at top level @@ -435,7 +457,9 @@ void EventSender::update_query_state(QueryDesc *query_desc, QueryItem *query, } break; case QueryState::END: - Assert(query->state == QueryState::START || IsAbortInProgress()); + // Example of below assert triggering: CURSOR closes before ever being + // executed Assert(query->state == QueryState::START || + // IsAbortInProgress()); query->message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); break; case QueryState::DONE: From 965bb64e2ca68714987f9c66eaebf6510187d00f Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 3 Jun 2024 18:22:00 +0300 Subject: [PATCH 22/49] [yagp_hooks_collector] Change report_nested_queries to PGC_USERSET --- src/Config.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Config.cpp b/src/Config.cpp index c07a6948694..42fa4b2fb12 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -41,7 +41,7 @@ void Config::init() { DefineCustomBoolVariable( "yagpcc.report_nested_queries", "Collect stats on nested queries", 0LL, - &guc_report_nested_queries, true, PGC_SUSET, + &guc_report_nested_queries, true, PGC_USERSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); DefineCustomStringVariable( From b61e59a120b5f4b398cf76fb6f0babfbece993b9 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 13 Jun 2024 10:59:46 +0300 Subject: [PATCH 23/49] [yagp_hooks_collector] Diff per-query stats between submit and end Take an initial metrics snapshot at submit so incremental stats are computed as deltas. Required for correct per-query accounting with nested statements. --- src/EventSender.cpp | 21 +++++++++++---------- src/ProcStats.cpp | 8 +++----- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 8d202991986..60f21818d00 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,7 +1,6 @@ #include "Config.h" #include "ProcStats.h" #include "UDSConnector.h" -#include #include #define typeid __typeid @@ -198,19 +197,17 @@ void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, } } -decltype(std::chrono::high_resolution_clock::now()) query_start_time; - void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc) { if (query_desc->planstate && query_desc->planstate->instrument) { set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); } fill_self_stats(metrics->mutable_systemstat()); - std::chrono::duration elapsed_seconds = - std::chrono::high_resolution_clock::now() - query_start_time; metrics->mutable_systemstat()->set_runningtimeseconds( - elapsed_seconds.count()); - metrics->mutable_spill()->set_filecount(WorkfileTotalFilesCreated()); - metrics->mutable_spill()->set_totalbytes(WorkfileTotalBytesWritten()); + time(NULL) - metrics->mutable_systemstat()->runningtimeseconds()); + metrics->mutable_spill()->set_filecount( + WorkfileTotalFilesCreated() - metrics->mutable_spill()->filecount()); + metrics->mutable_spill()->set_totalbytes( + WorkfileTotalBytesWritten() - metrics->mutable_spill()->totalbytes()); } yagpcc::SetQueryReq create_query_req(QueryDesc *query_desc, @@ -280,8 +277,6 @@ void EventSender::executor_before_start(QueryDesc *query_desc, return; } collect_query_submit(query_desc); - query_start_time = std::chrono::high_resolution_clock::now(); - WorkfileResetBackendStats(); if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; @@ -309,9 +304,12 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { auto query_msg = query->message; *query_msg->mutable_start_time() = current_ts(); set_query_plan(query_msg, query_desc); + yagpcc::GPMetrics stats; + std::swap(stats, *query_msg->mutable_query_metrics()); if (connector->report_query(*query_msg, "started")) { clear_big_fields(query_msg); } + std::swap(stats, *query_msg->mutable_query_metrics()); } } @@ -361,6 +359,9 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { if (connector->report_query(*query_msg, "submit")) { clear_big_fields(query_msg); } + // take initial metrics snapshot so that we can safely take diff afterwards + // in END or DONE events. + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); } } diff --git a/src/ProcStats.cpp b/src/ProcStats.cpp index 668173a0f7e..a557a20cbb0 100644 --- a/src/ProcStats.cpp +++ b/src/ProcStats.cpp @@ -92,9 +92,7 @@ void fill_status_stats(yagpcc::SystemStat *stats) { } // namespace void fill_self_stats(yagpcc::SystemStat *stats) { - static yagpcc::SystemStat prev_stats; - fill_io_stats(&prev_stats); - fill_cpu_stats(&prev_stats); - fill_status_stats(&prev_stats); - *stats = prev_stats; + fill_io_stats(stats); + fill_cpu_stats(stats); + fill_status_stats(stats); } \ No newline at end of file From e8be9c94ada13ce49fb43972a90202385621d71a Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Wed, 7 Aug 2024 14:28:57 +0300 Subject: [PATCH 24/49] [yagp_hooks_collector] Fix try/catch block when calling C++ code from PG hooks --- src/hook_wrappers.cpp | 44 +++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index caf38a10f6e..93faaa0bf8f 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -38,6 +38,15 @@ static inline EventSender *get_sender() { return sender; } +template +R cpp_call(T *obj, R (T::*func)(Args...), Args... args) { + try { + return (obj->*func)(args...); + } catch (const std::exception &e) { + ereport(FATAL, (errmsg("Unexpected exception in yagpcc %s", e.what()))); + } +} + void hooks_init() { Config::init(); YagpStat::init(); @@ -68,27 +77,15 @@ void hooks_deinit() { } void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { - PG_TRY(); - { get_sender()->executor_before_start(query_desc, eflags); } - PG_CATCH(); - { - ereport(WARNING, - (errmsg("EventSender failed in ya_ExecutorBeforeStart_hook"))); - } - PG_END_TRY(); + cpp_call(get_sender(), &EventSender::executor_before_start, query_desc, + eflags); if (previous_ExecutorStart_hook) { (*previous_ExecutorStart_hook)(query_desc, eflags); } else { standard_ExecutorStart(query_desc, eflags); } - PG_TRY(); - { get_sender()->executor_after_start(query_desc, eflags); } - PG_CATCH(); - { - ereport(WARNING, - (errmsg("EventSender failed in ya_ExecutorAfterStart_hook"))); - } - PG_END_TRY(); + cpp_call(get_sender(), &EventSender::executor_after_start, query_desc, + eflags); } void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, @@ -129,11 +126,7 @@ void ya_ExecutorFinish_hook(QueryDesc *query_desc) { } void ya_ExecutorEnd_hook(QueryDesc *query_desc) { - PG_TRY(); - { get_sender()->executor_end(query_desc); } - PG_CATCH(); - { ereport(WARNING, (errmsg("EventSender failed in ya_ExecutorEnd_hook"))); } - PG_END_TRY(); + cpp_call(get_sender(), &EventSender::executor_end, query_desc); if (previous_ExecutorEnd_hook) { (*previous_ExecutorEnd_hook)(query_desc); } else { @@ -142,14 +135,7 @@ void ya_ExecutorEnd_hook(QueryDesc *query_desc) { } void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { - PG_TRY(); - { get_sender()->query_metrics_collect(status, arg); } - PG_CATCH(); - { - ereport(WARNING, - (errmsg("EventSender failed in ya_query_info_collect_hook"))); - } - PG_END_TRY(); + cpp_call(get_sender(), &EventSender::query_metrics_collect, status, arg); if (previous_query_info_collect_hook) { (*previous_query_info_collect_hook)(status, arg); } From ce906c23e4e89490db25d0b1ecebe96b98cf1eab Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 12 Sep 2024 16:15:26 +0300 Subject: [PATCH 25/49] [yagp_hooks_collector] Improve nested query handling and add slice info Don't normalize trimmed plans. Clean up stale text fields between events. Report nested queries only from dispatcher. Add slice_id. Aggregate inherited_calls and inherited_time on segments. --- protos/yagpcc_metrics.proto | 3 + src/EventSender.cpp | 295 ++++++++++++++++++++++-------------- src/EventSender.h | 3 + 3 files changed, 191 insertions(+), 110 deletions(-) diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index 68492732ece..fc85386c6b0 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -39,6 +39,7 @@ message QueryInfo { message AdditionalQueryInfo { int64 nested_level = 1; string error_message = 2; + int64 slice_id = 3; } enum PlanGenerator @@ -117,6 +118,8 @@ message MetricInstrumentation { NetworkStat sent = 19; NetworkStat received = 20; double startup_time = 21; /* real query startup time (planning + queue time) */ + uint64 inherited_calls = 22; /* the number of executed sub-queries */ + double inherited_time = 23; /* total time spend on inherited execution */ } message SpillInfo { diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 60f21818d00..7d2d5a1a2c2 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -58,6 +58,53 @@ std::string *get_rg_name() { return new std::string(rgname); } +/** + * Things get tricky with nested queries. + * a) A nested query on master is a real query optimized and executed from + * master. An example would be `select some_insert_function();`, where + * some_insert_function does something like `insert into tbl values (1)`. Master + * will create two statements. Outer select statement and inner insert statement + * with nesting level 1. + * For segments both statements are top-level statements with nesting level 0. + * b) A nested query on segment is something executed as sub-statement on + * segment. An example would be `select a from tbl where is_good_value(b);`. In + * this case master will issue one top-level statement, but segments will change + * contexts for UDF execution and execute is_good_value(b) once for each tuple + * as a nested query. Creating massive load on gpcc agent. + * + * Hence, here is a decision: + * 1) ignore all queries that are nested on segments + * 2) record (if enabled) all queries that are nested on master + * NODE: The truth is, we can't really ignore nested master queries, because + * segment sees those as top-level. + */ + +inline bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { + return (query_desc->gpmon_pkt && + query_desc->gpmon_pkt->u.qexec.key.tmid == 0) || + nesting_level == 0; +} + +inline bool nesting_is_valid(QueryDesc *query_desc, int nesting_level) { + return (Gp_session_role == GP_ROLE_DISPATCH && + Config::report_nested_queries()) || + is_top_level_query(query_desc, nesting_level); +} + +bool need_report_nested_query() { + return Config::report_nested_queries() && Gp_session_role == GP_ROLE_DISPATCH; +} + +inline bool filter_query(QueryDesc *query_desc) { + return gp_command_count == 0 || query_desc->sourceText == nullptr || + !Config::enable_collector() || Config::filter_user(get_user_name()); +} + +inline bool need_collect(QueryDesc *query_desc, int nesting_level) { + return !filter_query(query_desc) && + nesting_is_valid(query_desc, nesting_level); +} + google::protobuf::Timestamp current_ts() { google::protobuf::Timestamp current_ts; struct timeval tv; @@ -96,26 +143,24 @@ inline std::string char_to_trimmed_str(const char *str, size_t len) { return std::string(str, std::min(len, Config::max_text_size())); } -void set_plan_text(std::string *plan_text, QueryDesc *query_desc) { - MemoryContext oldcxt = - MemoryContextSwitchTo(query_desc->estate->es_query_cxt); - auto es = get_explain_state(query_desc, true); - *plan_text = char_to_trimmed_str(es.str->data, es.str->len); - pfree(es.str->data); - MemoryContextSwitchTo(oldcxt); -} - void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { auto qi = req->mutable_query_info(); qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); - set_plan_text(qi->mutable_plan_text(), query_desc); - StringInfo norm_plan = gen_normplan(qi->plan_text().c_str()); - *qi->mutable_template_plan_text() = std::string(norm_plan->data); + MemoryContext oldcxt = + MemoryContextSwitchTo(query_desc->estate->es_query_cxt); + auto es = get_explain_state(query_desc, true); + MemoryContextSwitchTo(oldcxt); + *qi->mutable_plan_text() = char_to_trimmed_str(es.str->data, es.str->len); + StringInfo norm_plan = gen_normplan(es.str->data); + *qi->mutable_template_plan_text() = + char_to_trimmed_str(norm_plan->data, norm_plan->len); qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); qi->set_query_id(query_desc->plannedstmt->queryId); + pfree(es.str->data); + pfree(norm_plan->data); } } @@ -134,7 +179,9 @@ void clear_big_fields(yagpcc::SetQueryReq *req) { if (Gp_session_role == GP_ROLE_DISPATCH) { auto qi = req->mutable_query_info(); qi->clear_plan_text(); + qi->clear_template_plan_text(); qi->clear_query_text(); + qi->clear_template_query_text(); } } @@ -152,6 +199,11 @@ void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level) { aqi->set_nested_level(nesting_level); } +void set_qi_slice_id(yagpcc::SetQueryReq *req) { + auto aqi = req->mutable_add_info(); + aqi->set_slice_id(currentSliceId); +} + void set_qi_error_message(yagpcc::SetQueryReq *req) { auto aqi = req->mutable_add_info(); auto error = elog_message(); @@ -159,7 +211,8 @@ void set_qi_error_message(yagpcc::SetQueryReq *req) { } void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, - QueryDesc *query_desc) { + QueryDesc *query_desc, int nested_calls, + double nested_time) { auto instrument = query_desc->planstate->instrument; if (instrument) { metrics->set_ntuples(instrument->ntuples); @@ -195,11 +248,15 @@ void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, mlstate->stat_tuple_bytes_recvd); metrics->mutable_received()->set_chunks(mlstate->stat_total_chunks_recvd); } + metrics->set_inherited_calls(nested_calls); + metrics->set_inherited_time(nested_time); } -void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc) { +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, + int nested_calls, double nested_time) { if (query_desc->planstate && query_desc->planstate->instrument) { - set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc); + set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc, + nested_calls, nested_time); } fill_self_stats(metrics->mutable_systemstat()); metrics->mutable_systemstat()->set_runningtimeseconds( @@ -220,17 +277,8 @@ yagpcc::SetQueryReq create_query_req(QueryDesc *query_desc, return req; } -inline bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { - return (query_desc->gpmon_pkt && - query_desc->gpmon_pkt->u.qexec.key.tmid == 0) || - nesting_level == 0; -} - -inline bool need_collect(QueryDesc *query_desc, int nesting_level) { - return (Config::report_nested_queries() || - is_top_level_query(query_desc, nesting_level)) && - gp_command_count != 0 && query_desc->sourceText != nullptr && - Config::enable_collector() && !Config::filter_user(get_user_name()); +double protots_to_double(const google::protobuf::Timestamp &ts) { + return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; } } // namespace @@ -273,6 +321,10 @@ void EventSender::executor_before_start(QueryDesc *query_desc, if (!connector) { return; } + if (is_top_level_query(query_desc, nesting_level)) { + nested_timing = 0; + nested_calls = 0; + } if (!need_collect(query_desc, nesting_level)) { return; } @@ -297,51 +349,53 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { if (!connector) { return; } - if ((Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) && - need_collect(query_desc, nesting_level)) { - auto *query = get_query_message(query_desc); - update_query_state(query_desc, query, QueryState::START); - auto query_msg = query->message; - *query_msg->mutable_start_time() = current_ts(); - set_query_plan(query_msg, query_desc); - yagpcc::GPMetrics stats; - std::swap(stats, *query_msg->mutable_query_metrics()); - if (connector->report_query(*query_msg, "started")) { - clear_big_fields(query_msg); + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { + if (!filter_query(query_desc)) { + auto *query = get_query_message(query_desc); + auto query_msg = query->message; + *query_msg->mutable_start_time() = current_ts(); + if (!nesting_is_valid(query_desc, nesting_level)) { + return; + } + update_query_state(query_desc, query, QueryState::START); + set_query_plan(query_msg, query_desc); + yagpcc::GPMetrics stats; + std::swap(stats, *query_msg->mutable_query_metrics()); + if (connector->report_query(*query_msg, "started")) { + clear_big_fields(query_msg); + } + std::swap(stats, *query_msg->mutable_query_metrics()); } - std::swap(stats, *query_msg->mutable_query_metrics()); } } void EventSender::executor_end(QueryDesc *query_desc) { - if (!connector) { - return; - } - if (!need_collect(query_desc, nesting_level) || + if (!connector || (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE)) { return; } - /* TODO: when querying via CURSOR this call freezes. Need to investigate. - To reproduce - uncomment it and run installchecks. It will freeze around - join test. Needs investigation - - if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze() && - Config::enable_cdbstats() && query_desc->estate->dispatcherState && - query_desc->estate->dispatcherState->primaryResults) { - cdbdisp_checkDispatchResult(query_desc->estate->dispatcherState, - DISPATCH_WAIT_NONE); - }*/ - auto *query = get_query_message(query_desc); - if (query->state == UNKNOWN && !Config::report_nested_queries()) { - // COMMIT/ROLLBACK of a nested query. Happens in top-level - return; - } - update_query_state(query_desc, query, QueryState::END); - auto query_msg = query->message; - *query_msg->mutable_end_time() = current_ts(); - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); - if (connector->report_query(*query_msg, "ended")) { - clear_big_fields(query_msg); + if (!filter_query(query_desc)) { + auto *query = get_query_message(query_desc); + auto query_msg = query->message; + *query_msg->mutable_end_time() = current_ts(); + if (nesting_is_valid(query_desc, nesting_level)) { + if (query->state == UNKNOWN && + // Yet another greenplum weirdness: thats actually a nested query + // which is being committed/rollbacked. Treat it accordingly. + !need_report_nested_query()) { + return; + } + update_query_state(query_desc, query, QueryState::END); + if (is_top_level_query(query_desc, nesting_level)) { + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, + nested_calls, nested_timing); + } else { + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); + } + if (connector->report_query(*query_msg, "ended")) { + clear_big_fields(query_msg); + } + } } } @@ -355,66 +409,70 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { *query_msg->mutable_submit_time() = current_ts(); set_query_info(query_msg, query_desc); set_qi_nesting_level(query_msg, query_desc->gpmon_pkt->u.qexec.key.tmid); + set_qi_slice_id(query_msg); set_query_text(query_msg, query_desc); if (connector->report_query(*query_msg, "submit")) { clear_big_fields(query_msg); } // take initial metrics snapshot so that we can safely take diff afterwards // in END or DONE events. - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); } } void EventSender::collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status) { - if (connector && need_collect(query_desc, nesting_level)) { - yagpcc::QueryStatus query_status; - std::string msg; - switch (status) { - case METRICS_QUERY_DONE: - case METRICS_INNER_QUERY_DONE: - query_status = yagpcc::QueryStatus::QUERY_STATUS_DONE; - msg = "done"; - break; - case METRICS_QUERY_ERROR: - query_status = yagpcc::QueryStatus::QUERY_STATUS_ERROR; - msg = "error"; - break; - case METRICS_QUERY_CANCELING: - // at the moment we don't track this event, but I`ll leave this code here - // just in case - Assert(false); - query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELLING; - msg = "cancelling"; - break; - case METRICS_QUERY_CANCELED: - query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELED; - msg = "cancelled"; - break; - default: - ereport(FATAL, (errmsg("Unexpected query status in query_done hook: %d", - status))); - } + if (connector && !filter_query(query_desc)) { auto *query = get_query_message(query_desc); - auto prev_state = query->state; - if (query->state != UNKNOWN || Config::report_nested_queries()) { - update_query_state(query_desc, query, QueryState::DONE, - query_status == - yagpcc::QueryStatus::QUERY_STATUS_DONE); - auto query_msg = query->message; - query_msg->set_query_status(query_status); - if (status == METRICS_QUERY_ERROR) { - set_qi_error_message(query_msg); + if (query->state != UNKNOWN || need_report_nested_query()) { + if (nesting_is_valid(query_desc, nesting_level)) { + yagpcc::QueryStatus query_status; + std::string msg; + switch (status) { + case METRICS_QUERY_DONE: + case METRICS_INNER_QUERY_DONE: + query_status = yagpcc::QueryStatus::QUERY_STATUS_DONE; + msg = "done"; + break; + case METRICS_QUERY_ERROR: + query_status = yagpcc::QueryStatus::QUERY_STATUS_ERROR; + msg = "error"; + break; + case METRICS_QUERY_CANCELING: + // at the moment we don't track this event, but I`ll leave this code + // here just in case + Assert(false); + query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELLING; + msg = "cancelling"; + break; + case METRICS_QUERY_CANCELED: + query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELED; + msg = "cancelled"; + break; + default: + ereport(FATAL, + (errmsg("Unexpected query status in query_done hook: %d", + status))); + } + auto prev_state = query->state; + update_query_state(query_desc, query, QueryState::DONE, + query_status == + yagpcc::QueryStatus::QUERY_STATUS_DONE); + auto query_msg = query->message; + query_msg->set_query_status(query_status); + if (status == METRICS_QUERY_ERROR) { + set_qi_error_message(query_msg); + } + if (prev_state == START) { + // We've missed ExecutorEnd call due to query cancel or error. It's + // fine, but now we need to collect and report execution stats + *query_msg->mutable_end_time() = current_ts(); + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, + nested_calls, nested_timing); + } + connector->report_query(*query_msg, msg); } - if (prev_state == START) { - // We've missed ExecutorEnd call due to query cancel or error. It's - // fine, but now we need to collect and report execution stats - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc); - } - connector->report_query(*query_msg, msg); - } else { - // otherwise it`s a nested query being committed/aborted at top level - // and we should ignore it + update_nested_counters(query_desc); } query_msgs.erase({query_desc->gpmon_pkt->u.qexec.key.ccnt, query_desc->gpmon_pkt->u.qexec.key.tmid}); @@ -488,6 +546,23 @@ EventSender::QueryItem *EventSender::get_query_message(QueryDesc *query_desc) { query_desc->gpmon_pkt->u.qexec.key.tmid}); } +void EventSender::update_nested_counters(QueryDesc *query_desc) { + if (!is_top_level_query(query_desc, nesting_level)) { + auto query_msg = get_query_message(query_desc); + nested_calls++; + double end_time = protots_to_double(query_msg->message->end_time()); + double start_time = protots_to_double(query_msg->message->start_time()); + if (end_time >= start_time) { + nested_timing += end_time - start_time; + } else { + ereport(WARNING, (errmsg("YAGPCC query start_time > end_time (%f > %f)", + start_time, end_time))); + ereport(DEBUG3, + (errmsg("YAGPCC nested query text %s", query_desc->sourceText))); + } + } +} + EventSender::QueryItem::QueryItem(EventSender::QueryState st, yagpcc::SetQueryReq *msg) : state(st), message(msg) {} \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index 55b8daf9a91..9470cbf1f98 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -49,8 +49,11 @@ class EventSender { void collect_query_submit(QueryDesc *query_desc); void collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status); void cleanup_messages(); + void update_nested_counters(QueryDesc *query_desc); UDSConnector *connector = nullptr; int nesting_level = 0; + int64_t nested_calls = 0; + double nested_timing = 0; std::unordered_map, QueryItem, pair_hash> query_msgs; }; \ No newline at end of file From 0d1306714f397f6dcaa80ae96baae2970db513fa Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Thu, 7 Nov 2024 13:09:44 +0300 Subject: [PATCH 26/49] [yagp_hooks_collector] Split EventSender into submodules Factor out ProtoUtils, ProcStats, and PgUtils from EventSender. --- src/EventSender.cpp | 275 +------------------------------------------- src/PgUtils.cpp | 94 +++++++++++++++ src/PgUtils.h | 16 +++ src/ProtoUtils.cpp | 185 +++++++++++++++++++++++++++++ src/ProtoUtils.h | 16 +++ 5 files changed, 315 insertions(+), 271 deletions(-) create mode 100644 src/PgUtils.cpp create mode 100644 src/PgUtils.h create mode 100644 src/ProtoUtils.cpp create mode 100644 src/ProtoUtils.h diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 7d2d5a1a2c2..cdb21ef7aa6 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,287 +1,21 @@ #include "Config.h" -#include "ProcStats.h" #include "UDSConnector.h" -#include -#define typeid __typeid -#define operator __operator extern "C" { #include "postgres.h" #include "access/hash.h" -#include "access/xact.h" -#include "commands/dbcommands.h" -#include "commands/explain.h" -#include "commands/resgroupcmds.h" #include "executor/executor.h" #include "utils/elog.h" -#include "utils/workfile_mgr.h" #include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" -#include "cdb/cdbinterconnect.h" #include "cdb/cdbvars.h" - -#include "stat_statements_parser/pg_stat_statements_ya_parser.h" -#include "tcop/utility.h" } -#undef typeid -#undef operator #include "EventSender.h" - -namespace { - -std::string *get_user_name() { - const char *username = GetConfigOption("session_authorization", false, false); - // username is not to be freed - return username ? new std::string(username) : nullptr; -} - -std::string *get_db_name() { - char *dbname = get_database_name(MyDatabaseId); - std::string *result = nullptr; - if (dbname) { - result = new std::string(dbname); - pfree(dbname); - } - return result; -} - -std::string *get_rg_name() { - auto groupId = ResGroupGetGroupIdBySessionId(MySessionState->sessionId); - if (!OidIsValid(groupId)) - return nullptr; - char *rgname = GetResGroupNameForId(groupId); - if (rgname == nullptr) - return nullptr; - return new std::string(rgname); -} - -/** - * Things get tricky with nested queries. - * a) A nested query on master is a real query optimized and executed from - * master. An example would be `select some_insert_function();`, where - * some_insert_function does something like `insert into tbl values (1)`. Master - * will create two statements. Outer select statement and inner insert statement - * with nesting level 1. - * For segments both statements are top-level statements with nesting level 0. - * b) A nested query on segment is something executed as sub-statement on - * segment. An example would be `select a from tbl where is_good_value(b);`. In - * this case master will issue one top-level statement, but segments will change - * contexts for UDF execution and execute is_good_value(b) once for each tuple - * as a nested query. Creating massive load on gpcc agent. - * - * Hence, here is a decision: - * 1) ignore all queries that are nested on segments - * 2) record (if enabled) all queries that are nested on master - * NODE: The truth is, we can't really ignore nested master queries, because - * segment sees those as top-level. - */ - -inline bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { - return (query_desc->gpmon_pkt && - query_desc->gpmon_pkt->u.qexec.key.tmid == 0) || - nesting_level == 0; -} - -inline bool nesting_is_valid(QueryDesc *query_desc, int nesting_level) { - return (Gp_session_role == GP_ROLE_DISPATCH && - Config::report_nested_queries()) || - is_top_level_query(query_desc, nesting_level); -} - -bool need_report_nested_query() { - return Config::report_nested_queries() && Gp_session_role == GP_ROLE_DISPATCH; -} - -inline bool filter_query(QueryDesc *query_desc) { - return gp_command_count == 0 || query_desc->sourceText == nullptr || - !Config::enable_collector() || Config::filter_user(get_user_name()); -} - -inline bool need_collect(QueryDesc *query_desc, int nesting_level) { - return !filter_query(query_desc) && - nesting_is_valid(query_desc, nesting_level); -} - -google::protobuf::Timestamp current_ts() { - google::protobuf::Timestamp current_ts; - struct timeval tv; - gettimeofday(&tv, nullptr); - current_ts.set_seconds(tv.tv_sec); - current_ts.set_nanos(static_cast(tv.tv_usec * 1000)); - return current_ts; -} - -void set_query_key(yagpcc::QueryKey *key, QueryDesc *query_desc) { - key->set_ccnt(gp_command_count); - key->set_ssid(gp_session_id); - int32 tmid = 0; - gpmon_gettmid(&tmid); - key->set_tmid(tmid); -} - -void set_segment_key(yagpcc::SegmentKey *key, QueryDesc *query_desc) { - key->set_dbid(GpIdentity.dbid); - key->set_segindex(GpIdentity.segindex); -} - -ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { - ExplainState es; - ExplainInitState(&es); - es.costs = costs; - es.verbose = true; - es.format = EXPLAIN_FORMAT_TEXT; - ExplainBeginOutput(&es); - ExplainPrintPlan(&es, query_desc); - ExplainEndOutput(&es); - return es; -} - -inline std::string char_to_trimmed_str(const char *str, size_t len) { - return std::string(str, std::min(len, Config::max_text_size())); -} - -void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { - if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { - auto qi = req->mutable_query_info(); - qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER - ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER - : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); - MemoryContext oldcxt = - MemoryContextSwitchTo(query_desc->estate->es_query_cxt); - auto es = get_explain_state(query_desc, true); - MemoryContextSwitchTo(oldcxt); - *qi->mutable_plan_text() = char_to_trimmed_str(es.str->data, es.str->len); - StringInfo norm_plan = gen_normplan(es.str->data); - *qi->mutable_template_plan_text() = - char_to_trimmed_str(norm_plan->data, norm_plan->len); - qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); - qi->set_query_id(query_desc->plannedstmt->queryId); - pfree(es.str->data); - pfree(norm_plan->data); - } -} - -void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { - if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->sourceText) { - auto qi = req->mutable_query_info(); - *qi->mutable_query_text() = char_to_trimmed_str( - query_desc->sourceText, strlen(query_desc->sourceText)); - char *norm_query = gen_normquery(query_desc->sourceText); - *qi->mutable_template_query_text() = - char_to_trimmed_str(norm_query, strlen(norm_query)); - } -} - -void clear_big_fields(yagpcc::SetQueryReq *req) { - if (Gp_session_role == GP_ROLE_DISPATCH) { - auto qi = req->mutable_query_info(); - qi->clear_plan_text(); - qi->clear_template_plan_text(); - qi->clear_query_text(); - qi->clear_template_query_text(); - } -} - -void set_query_info(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { - if (Gp_session_role == GP_ROLE_DISPATCH) { - auto qi = req->mutable_query_info(); - qi->set_allocated_username(get_user_name()); - qi->set_allocated_databasename(get_db_name()); - qi->set_allocated_rsgname(get_rg_name()); - } -} - -void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level) { - auto aqi = req->mutable_add_info(); - aqi->set_nested_level(nesting_level); -} - -void set_qi_slice_id(yagpcc::SetQueryReq *req) { - auto aqi = req->mutable_add_info(); - aqi->set_slice_id(currentSliceId); -} - -void set_qi_error_message(yagpcc::SetQueryReq *req) { - auto aqi = req->mutable_add_info(); - auto error = elog_message(); - *aqi->mutable_error_message() = char_to_trimmed_str(error, strlen(error)); -} - -void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, - QueryDesc *query_desc, int nested_calls, - double nested_time) { - auto instrument = query_desc->planstate->instrument; - if (instrument) { - metrics->set_ntuples(instrument->ntuples); - metrics->set_nloops(instrument->nloops); - metrics->set_tuplecount(instrument->tuplecount); - metrics->set_firsttuple(instrument->firsttuple); - metrics->set_startup(instrument->startup); - metrics->set_total(instrument->total); - auto &buffusage = instrument->bufusage; - metrics->set_shared_blks_hit(buffusage.shared_blks_hit); - metrics->set_shared_blks_read(buffusage.shared_blks_read); - metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); - metrics->set_shared_blks_written(buffusage.shared_blks_written); - metrics->set_local_blks_hit(buffusage.local_blks_hit); - metrics->set_local_blks_read(buffusage.local_blks_read); - metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); - metrics->set_local_blks_written(buffusage.local_blks_written); - metrics->set_temp_blks_read(buffusage.temp_blks_read); - metrics->set_temp_blks_written(buffusage.temp_blks_written); - metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); - metrics->set_blk_write_time( - INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); - } - if (query_desc->estate && query_desc->estate->motionlayer_context) { - MotionLayerState *mlstate = - (MotionLayerState *)query_desc->estate->motionlayer_context; - metrics->mutable_sent()->set_total_bytes(mlstate->stat_total_bytes_sent); - metrics->mutable_sent()->set_tuple_bytes(mlstate->stat_tuple_bytes_sent); - metrics->mutable_sent()->set_chunks(mlstate->stat_total_chunks_sent); - metrics->mutable_received()->set_total_bytes( - mlstate->stat_total_bytes_recvd); - metrics->mutable_received()->set_tuple_bytes( - mlstate->stat_tuple_bytes_recvd); - metrics->mutable_received()->set_chunks(mlstate->stat_total_chunks_recvd); - } - metrics->set_inherited_calls(nested_calls); - metrics->set_inherited_time(nested_time); -} - -void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, - int nested_calls, double nested_time) { - if (query_desc->planstate && query_desc->planstate->instrument) { - set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc, - nested_calls, nested_time); - } - fill_self_stats(metrics->mutable_systemstat()); - metrics->mutable_systemstat()->set_runningtimeseconds( - time(NULL) - metrics->mutable_systemstat()->runningtimeseconds()); - metrics->mutable_spill()->set_filecount( - WorkfileTotalFilesCreated() - metrics->mutable_spill()->filecount()); - metrics->mutable_spill()->set_totalbytes( - WorkfileTotalBytesWritten() - metrics->mutable_spill()->totalbytes()); -} - -yagpcc::SetQueryReq create_query_req(QueryDesc *query_desc, - yagpcc::QueryStatus status) { - yagpcc::SetQueryReq req; - req.set_query_status(status); - *req.mutable_datetime() = current_ts(); - set_query_key(req.mutable_query_key(), query_desc); - set_segment_key(req.mutable_segment_key(), query_desc); - return req; -} - -double protots_to_double(const google::protobuf::Timestamp &ts) { - return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; -} - -} // namespace +#include "PgUtils.h" +#include "ProtoUtils.h" void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { @@ -404,10 +138,9 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { auto *query = get_query_message(query_desc); query->state = QueryState::SUBMIT; auto query_msg = query->message; - *query_msg = - create_query_req(query_desc, yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); + *query_msg = create_query_req(yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); *query_msg->mutable_submit_time() = current_ts(); - set_query_info(query_msg, query_desc); + set_query_info(query_msg); set_qi_nesting_level(query_msg, query_desc->gpmon_pkt->u.qexec.key.tmid); set_qi_slice_id(query_msg); set_query_text(query_msg, query_desc); diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp new file mode 100644 index 00000000000..528426e6c64 --- /dev/null +++ b/src/PgUtils.cpp @@ -0,0 +1,94 @@ +#include "PgUtils.h" +#include "Config.h" + +extern "C" { +#include "utils/guc.h" +#include "commands/dbcommands.h" +#include "commands/resgroupcmds.h" +#include "cdb/cdbvars.h" +} + +std::string *get_user_name() { + const char *username = GetConfigOption("session_authorization", false, false); + // username is not to be freed + return username ? new std::string(username) : nullptr; +} + +std::string *get_db_name() { + char *dbname = get_database_name(MyDatabaseId); + std::string *result = nullptr; + if (dbname) { + result = new std::string(dbname); + pfree(dbname); + } + return result; +} + +std::string *get_rg_name() { + auto groupId = ResGroupGetGroupIdBySessionId(MySessionState->sessionId); + if (!OidIsValid(groupId)) + return nullptr; + char *rgname = GetResGroupNameForId(groupId); + if (rgname == nullptr) + return nullptr; + return new std::string(rgname); +} + +/** + * Things get tricky with nested queries. + * a) A nested query on master is a real query optimized and executed from + * master. An example would be `select some_insert_function();`, where + * some_insert_function does something like `insert into tbl values (1)`. Master + * will create two statements. Outer select statement and inner insert statement + * with nesting level 1. + * For segments both statements are top-level statements with nesting level 0. + * b) A nested query on segment is something executed as sub-statement on + * segment. An example would be `select a from tbl where is_good_value(b);`. In + * this case master will issue one top-level statement, but segments will change + * contexts for UDF execution and execute is_good_value(b) once for each tuple + * as a nested query. Creating massive load on gpcc agent. + * + * Hence, here is a decision: + * 1) ignore all queries that are nested on segments + * 2) record (if enabled) all queries that are nested on master + * NODE: The truth is, we can't really ignore nested master queries, because + * segment sees those as top-level. + */ + +bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { + return (query_desc->gpmon_pkt && + query_desc->gpmon_pkt->u.qexec.key.tmid == 0) || + nesting_level == 0; +} + +bool nesting_is_valid(QueryDesc *query_desc, int nesting_level) { + return (Gp_session_role == GP_ROLE_DISPATCH && + Config::report_nested_queries()) || + is_top_level_query(query_desc, nesting_level); +} + +bool need_report_nested_query() { + return Config::report_nested_queries() && Gp_session_role == GP_ROLE_DISPATCH; +} + +bool filter_query(QueryDesc *query_desc) { + return gp_command_count == 0 || query_desc->sourceText == nullptr || + !Config::enable_collector() || Config::filter_user(get_user_name()); +} + +bool need_collect(QueryDesc *query_desc, int nesting_level) { + return !filter_query(query_desc) && + nesting_is_valid(query_desc, nesting_level); +} + +ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { + ExplainState es; + ExplainInitState(&es); + es.costs = costs; + es.verbose = true; + es.format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(&es); + ExplainPrintPlan(&es, query_desc); + ExplainEndOutput(&es); + return es; +} diff --git a/src/PgUtils.h b/src/PgUtils.h new file mode 100644 index 00000000000..85b1eb833cd --- /dev/null +++ b/src/PgUtils.h @@ -0,0 +1,16 @@ +extern "C" { +#include "postgres.h" +#include "commands/explain.h" +} + +#include + +std::string *get_user_name(); +std::string *get_db_name(); +std::string *get_rg_name(); +bool is_top_level_query(QueryDesc *query_desc, int nesting_level); +bool nesting_is_valid(QueryDesc *query_desc, int nesting_level); +bool need_report_nested_query(); +bool filter_query(QueryDesc *query_desc); +bool need_collect(QueryDesc *query_desc, int nesting_level); +ExplainState get_explain_state(QueryDesc *query_desc, bool costs); diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp new file mode 100644 index 00000000000..e1be25b8b1e --- /dev/null +++ b/src/ProtoUtils.cpp @@ -0,0 +1,185 @@ +#include "ProtoUtils.h" +#include "PgUtils.h" +#include "ProcStats.h" +#include "Config.h" + +#define typeid __typeid +#define operator __operator +extern "C" { +#include "postgres.h" +#include "access/hash.h" +#include "cdb/cdbinterconnect.h" +#include "cdb/cdbvars.h" +#include "gpmon/gpmon.h" +#include "utils/workfile_mgr.h" + +#include "stat_statements_parser/pg_stat_statements_ya_parser.h" +} +#undef typeid +#undef operator + +#include +#include + +google::protobuf::Timestamp current_ts() { + google::protobuf::Timestamp current_ts; + struct timeval tv; + gettimeofday(&tv, nullptr); + current_ts.set_seconds(tv.tv_sec); + current_ts.set_nanos(static_cast(tv.tv_usec * 1000)); + return current_ts; +} + +void set_query_key(yagpcc::QueryKey *key) { + key->set_ccnt(gp_command_count); + key->set_ssid(gp_session_id); + int32 tmid = 0; + gpmon_gettmid(&tmid); + key->set_tmid(tmid); +} + +void set_segment_key(yagpcc::SegmentKey *key) { + key->set_dbid(GpIdentity.dbid); + key->set_segindex(GpIdentity.segindex); +} + +inline std::string char_to_trimmed_str(const char *str, size_t len) { + return std::string(str, std::min(len, Config::max_text_size())); +} + +void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { + if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { + auto qi = req->mutable_query_info(); + qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER + ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER + : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); + MemoryContext oldcxt = + MemoryContextSwitchTo(query_desc->estate->es_query_cxt); + auto es = get_explain_state(query_desc, true); + MemoryContextSwitchTo(oldcxt); + *qi->mutable_plan_text() = char_to_trimmed_str(es.str->data, es.str->len); + StringInfo norm_plan = gen_normplan(es.str->data); + *qi->mutable_template_plan_text() = + char_to_trimmed_str(norm_plan->data, norm_plan->len); + qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); + qi->set_query_id(query_desc->plannedstmt->queryId); + pfree(es.str->data); + pfree(norm_plan->data); + } +} + +void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { + if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->sourceText) { + auto qi = req->mutable_query_info(); + *qi->mutable_query_text() = char_to_trimmed_str( + query_desc->sourceText, strlen(query_desc->sourceText)); + char *norm_query = gen_normquery(query_desc->sourceText); + *qi->mutable_template_query_text() = + char_to_trimmed_str(norm_query, strlen(norm_query)); + } +} + +void clear_big_fields(yagpcc::SetQueryReq *req) { + if (Gp_session_role == GP_ROLE_DISPATCH) { + auto qi = req->mutable_query_info(); + qi->clear_plan_text(); + qi->clear_template_plan_text(); + qi->clear_query_text(); + qi->clear_template_query_text(); + } +} + +void set_query_info(yagpcc::SetQueryReq *req) { + if (Gp_session_role == GP_ROLE_DISPATCH) { + auto qi = req->mutable_query_info(); + qi->set_allocated_username(get_user_name()); + qi->set_allocated_databasename(get_db_name()); + qi->set_allocated_rsgname(get_rg_name()); + } +} + +void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level) { + auto aqi = req->mutable_add_info(); + aqi->set_nested_level(nesting_level); +} + +void set_qi_slice_id(yagpcc::SetQueryReq *req) { + auto aqi = req->mutable_add_info(); + aqi->set_slice_id(currentSliceId); +} + +void set_qi_error_message(yagpcc::SetQueryReq *req) { + auto aqi = req->mutable_add_info(); + auto error = elog_message(); + *aqi->mutable_error_message() = char_to_trimmed_str(error, strlen(error)); +} + +void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, + QueryDesc *query_desc, int nested_calls, + double nested_time) { + auto instrument = query_desc->planstate->instrument; + if (instrument) { + metrics->set_ntuples(instrument->ntuples); + metrics->set_nloops(instrument->nloops); + metrics->set_tuplecount(instrument->tuplecount); + metrics->set_firsttuple(instrument->firsttuple); + metrics->set_startup(instrument->startup); + metrics->set_total(instrument->total); + auto &buffusage = instrument->bufusage; + metrics->set_shared_blks_hit(buffusage.shared_blks_hit); + metrics->set_shared_blks_read(buffusage.shared_blks_read); + metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); + metrics->set_shared_blks_written(buffusage.shared_blks_written); + metrics->set_local_blks_hit(buffusage.local_blks_hit); + metrics->set_local_blks_read(buffusage.local_blks_read); + metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); + metrics->set_local_blks_written(buffusage.local_blks_written); + metrics->set_temp_blks_read(buffusage.temp_blks_read); + metrics->set_temp_blks_written(buffusage.temp_blks_written); + metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); + metrics->set_blk_write_time( + INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); + } + if (query_desc->estate && query_desc->estate->motionlayer_context) { + MotionLayerState *mlstate = + (MotionLayerState *)query_desc->estate->motionlayer_context; + metrics->mutable_sent()->set_total_bytes(mlstate->stat_total_bytes_sent); + metrics->mutable_sent()->set_tuple_bytes(mlstate->stat_tuple_bytes_sent); + metrics->mutable_sent()->set_chunks(mlstate->stat_total_chunks_sent); + metrics->mutable_received()->set_total_bytes( + mlstate->stat_total_bytes_recvd); + metrics->mutable_received()->set_tuple_bytes( + mlstate->stat_tuple_bytes_recvd); + metrics->mutable_received()->set_chunks(mlstate->stat_total_chunks_recvd); + } + metrics->set_inherited_calls(nested_calls); + metrics->set_inherited_time(nested_time); +} + +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, + int nested_calls, double nested_time) { + if (query_desc->planstate && query_desc->planstate->instrument) { + set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc, + nested_calls, nested_time); + } + fill_self_stats(metrics->mutable_systemstat()); + metrics->mutable_systemstat()->set_runningtimeseconds( + time(NULL) - metrics->mutable_systemstat()->runningtimeseconds()); + metrics->mutable_spill()->set_filecount( + WorkfileTotalFilesCreated() - metrics->mutable_spill()->filecount()); + metrics->mutable_spill()->set_totalbytes( + WorkfileTotalBytesWritten() - metrics->mutable_spill()->totalbytes()); +} + +yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status) { + yagpcc::SetQueryReq req; + req.set_query_status(status); + *req.mutable_datetime() = current_ts(); + set_query_key(req.mutable_query_key()); + set_segment_key(req.mutable_segment_key()); + return req; +} + +double protots_to_double(const google::protobuf::Timestamp &ts) { + return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; +} \ No newline at end of file diff --git a/src/ProtoUtils.h b/src/ProtoUtils.h new file mode 100644 index 00000000000..38aa75611b2 --- /dev/null +++ b/src/ProtoUtils.h @@ -0,0 +1,16 @@ +#include "protos/yagpcc_set_service.pb.h" + +struct QueryDesc; + +google::protobuf::Timestamp current_ts(); +void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc); +void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc); +void clear_big_fields(yagpcc::SetQueryReq *req); +void set_query_info(yagpcc::SetQueryReq *req); +void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level); +void set_qi_slice_id(yagpcc::SetQueryReq *req); +void set_qi_error_message(yagpcc::SetQueryReq *req); +void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, + int nested_calls, double nested_time); +yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status); +double protots_to_double(const google::protobuf::Timestamp &ts); \ No newline at end of file From 9fa6ab137a2abd4273046b76dd024eaaeab7826f Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Mon, 7 Apr 2025 14:15:39 +0300 Subject: [PATCH 27/49] [yagp_hooks_collector] Ignore EXPLAIN VERBOSE errors for unsupported node types --- src/PgUtils.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index 528426e6c64..5982ff77c1c 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -88,7 +88,24 @@ ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { es.verbose = true; es.format = EXPLAIN_FORMAT_TEXT; ExplainBeginOutput(&es); - ExplainPrintPlan(&es, query_desc); + PG_TRY(); + { ExplainPrintPlan(&es, query_desc); } + PG_CATCH(); + { + // PG and GP both have known and yet unknown bugs in EXPLAIN VERBOSE + // implementation. We don't want any queries to fail due to those bugs, so + // we report the bug here for future investigatin and continue collecting + // metrics w/o reporting any plans + resetStringInfo(es.str); + appendStringInfo( + es.str, + "Unable to restore query plan due to PostgreSQL internal error. " + "See logs for more information"); + ereport(INFO, + (errmsg("YAGPCC failed to reconstruct explain text for query: %s", + query_desc->sourceText))); + } + PG_END_TRY(); ExplainEndOutput(&es); return es; } From 7c33bd1b3844e9f5a6ae8bd123a0eae738f53fc9 Mon Sep 17 00:00:00 2001 From: Maxim Smyatkin Date: Fri, 18 Apr 2025 14:58:52 +0300 Subject: [PATCH 28/49] [yagp_hooks_collector] Add per-slice interconnect statistics Hook into ic_teardown to collect UDP-IFC packet-level counters. Compile-time gated behind IC_TEARDOWN_HOOK. --- protos/yagpcc_metrics.proto | 56 +++++++++++++++++++++++++++++++++++++ src/EventSender.cpp | 53 ++++++++++++++++++++++++++++++++++- src/EventSender.h | 10 +++++++ src/ProtoUtils.cpp | 35 +++++++++++++++++++++++ src/ProtoUtils.h | 3 ++ src/hook_wrappers.cpp | 24 ++++++++++++++++ 6 files changed, 180 insertions(+), 1 deletion(-) diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index fc85386c6b0..086f3e63379 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -42,6 +42,11 @@ message AdditionalQueryInfo { int64 slice_id = 3; } +message AdditionalQueryStat { + string error_message = 1; + repeated int64 slices = 2; +} + enum PlanGenerator { PLAN_GENERATOR_UNSPECIFIED = 0; @@ -96,6 +101,56 @@ message NetworkStat { uint32 chunks = 3; } +message InterconnectStat { + // Receive queue size sum when main thread is trying to get a packet + uint64 total_recv_queue_size = 1; + // Counting times when computing total_recv_queue_size + uint64 recv_queue_size_counting_time = 2; + + // The capacity sum when packets are tried to be sent + uint64 total_capacity = 3; + // Counting times used to compute total_capacity + uint64 capacity_counting_time = 4; + + // Total buffers available when sending packets + uint64 total_buffers = 5; + // Counting times when compute total_buffers + uint64 buffer_counting_time = 6; + + // The number of active connections + uint64 active_connections_num = 7; + + // The number of packet retransmits + int64 retransmits = 8; + + // The number of cached future packets + int64 startup_cached_pkt_num = 9; + + // The number of mismatched packets received + int64 mismatch_num = 10; + + // The number of crc errors + int64 crc_errors = 11; + + // The number of packets sent by sender + int64 snd_pkt_num = 12; + + // The number of packets received by receiver + int64 recv_pkt_num = 13; + + // Disordered packet number + int64 disordered_pkt_num = 14; + + // Duplicate packet number + int64 duplicated_pkt_num = 15; + + // The number of Acks received + int64 recv_ack_num = 16; + + // The number of status query messages sent + int64 status_query_msg_num = 17; +} + message MetricInstrumentation { uint64 ntuples = 1; /* Total tuples produced */ uint64 nloops = 2; /* # of run cycles for this node */ @@ -120,6 +175,7 @@ message MetricInstrumentation { double startup_time = 21; /* real query startup time (planning + queue time) */ uint64 inherited_calls = 22; /* the number of executed sub-queries */ double inherited_time = 23; /* total time spend on inherited execution */ + InterconnectStat interconnect = 24; } message SpillInfo { diff --git a/src/EventSender.cpp b/src/EventSender.cpp index cdb21ef7aa6..2ba34d1e4cc 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,6 +1,7 @@ #include "Config.h" #include "UDSConnector.h" +#define typeid __typeid extern "C" { #include "postgres.h" @@ -11,7 +12,9 @@ extern "C" { #include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" +#include "cdb/ml_ipc.h" } +#undef typeid #include "EventSender.h" #include "PgUtils.h" @@ -35,7 +38,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { // no-op: executor_after_start is enough break; case METRICS_QUERY_CANCELING: - // it appears we're unly interested in the actual CANCELED event. + // it appears we're only interested in the actual CANCELED event. // for now we will ignore CANCELING state unless otherwise requested from // end users break; @@ -150,6 +153,12 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { // take initial metrics snapshot so that we can safely take diff afterwards // in END or DONE events. set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); +#ifdef IC_TEARDOWN_HOOK + // same for interconnect statistics + ic_metrics_collect(); + set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), + &ic_statistics); +#endif } } @@ -203,6 +212,12 @@ void EventSender::collect_query_done(QueryDesc *query_desc, set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, nested_calls, nested_timing); } +#ifdef IC_TEARDOWN_HOOK + ic_metrics_collect(); + set_ic_stats( + query_msg->mutable_query_metrics()->mutable_instrumentation(), + &ic_statistics); +#endif connector->report_query(*query_msg, msg); } update_nested_counters(query_desc); @@ -213,6 +228,39 @@ void EventSender::collect_query_done(QueryDesc *query_desc, } } +void EventSender::ic_metrics_collect() { +#ifdef IC_TEARDOWN_HOOK + if (Gp_interconnect_type != INTERCONNECT_TYPE_UDPIFC) { + return; + } + if (!connector || gp_command_count == 0 || !Config::enable_collector() || + Config::filter_user(get_user_name())) { + return; + } + // we also would like to know nesting level here and filter queries BUT we + // don't have this kind of information from this callback. Will have to + // collect stats anyways and throw it away later, if necessary + auto metrics = UDPIFCGetICStats(); + ic_statistics.totalRecvQueueSize += metrics.totalRecvQueueSize; + ic_statistics.recvQueueSizeCountingTime += metrics.recvQueueSizeCountingTime; + ic_statistics.totalCapacity += metrics.totalCapacity; + ic_statistics.capacityCountingTime += metrics.capacityCountingTime; + ic_statistics.totalBuffers += metrics.totalBuffers; + ic_statistics.bufferCountingTime += metrics.bufferCountingTime; + ic_statistics.activeConnectionsNum += metrics.activeConnectionsNum; + ic_statistics.retransmits += metrics.retransmits; + ic_statistics.startupCachedPktNum += metrics.startupCachedPktNum; + ic_statistics.mismatchNum += metrics.mismatchNum; + ic_statistics.crcErrors += metrics.crcErrors; + ic_statistics.sndPktNum += metrics.sndPktNum; + ic_statistics.recvPktNum += metrics.recvPktNum; + ic_statistics.disorderedPktNum += metrics.disorderedPktNum; + ic_statistics.duplicatedPktNum += metrics.duplicatedPktNum; + ic_statistics.recvAckNum += metrics.recvAckNum; + ic_statistics.statusQueryMsgNum += metrics.statusQueryMsgNum; +#endif +} + EventSender::EventSender() { if (Config::enable_collector() && !Config::filter_user(get_user_name())) { try { @@ -221,6 +269,9 @@ EventSender::EventSender() { ereport(INFO, (errmsg("Unable to start query tracing %s", e.what()))); } } +#ifdef IC_TEARDOWN_HOOK + memset(&ic_statistics, 0, sizeof(ICStatistics)); +#endif } EventSender::~EventSender() { diff --git a/src/EventSender.h b/src/EventSender.h index 9470cbf1f98..99f7b24753d 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -4,9 +4,15 @@ #include #include +#define typeid __typeid extern "C" { #include "utils/metrics_utils.h" +#include "cdb/ml_ipc.h" +#ifdef IC_TEARDOWN_HOOK +#include "cdb/ic_udpifc.h" +#endif } +#undef typeid class UDSConnector; struct QueryDesc; @@ -20,6 +26,7 @@ class EventSender { void executor_after_start(QueryDesc *query_desc, int eflags); void executor_end(QueryDesc *query_desc); void query_metrics_collect(QueryMetricsStatus status, void *arg); + void ic_metrics_collect(); void incr_depth() { nesting_level++; } void decr_depth() { nesting_level--; } EventSender(); @@ -55,5 +62,8 @@ class EventSender { int nesting_level = 0; int64_t nested_calls = 0; double nested_timing = 0; +#ifdef IC_TEARDOWN_HOOK + ICStatistics ic_statistics; +#endif std::unordered_map, QueryItem, pair_hash> query_msgs; }; \ No newline at end of file diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index e1be25b8b1e..c37cefb72d6 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -10,6 +10,10 @@ extern "C" { #include "access/hash.h" #include "cdb/cdbinterconnect.h" #include "cdb/cdbvars.h" +#include "cdb/ml_ipc.h" +#ifdef IC_TEARDOWN_HOOK +#include "cdb/ic_udpifc.h" +#endif #include "gpmon/gpmon.h" #include "utils/workfile_mgr.h" @@ -171,6 +175,37 @@ void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, WorkfileTotalBytesWritten() - metrics->mutable_spill()->totalbytes()); } +#define UPDATE_IC_STATS(proto_name, stat_name) \ + metrics->mutable_interconnect()->set_##proto_name( \ + ic_statistics->stat_name - \ + metrics->mutable_interconnect()->proto_name()); \ + Assert(metrics->mutable_interconnect()->proto_name() >= 0 && \ + metrics->mutable_interconnect()->proto_name() <= \ + ic_statistics->stat_name) + +void set_ic_stats(yagpcc::MetricInstrumentation *metrics, + const ICStatistics *ic_statistics) { +#ifdef IC_TEARDOWN_HOOK + UPDATE_IC_STATS(total_recv_queue_size, totalRecvQueueSize); + UPDATE_IC_STATS(recv_queue_size_counting_time, recvQueueSizeCountingTime); + UPDATE_IC_STATS(total_capacity, totalCapacity); + UPDATE_IC_STATS(capacity_counting_time, capacityCountingTime); + UPDATE_IC_STATS(total_buffers, totalBuffers); + UPDATE_IC_STATS(buffer_counting_time, bufferCountingTime); + UPDATE_IC_STATS(active_connections_num, activeConnectionsNum); + UPDATE_IC_STATS(retransmits, retransmits); + UPDATE_IC_STATS(startup_cached_pkt_num, startupCachedPktNum); + UPDATE_IC_STATS(mismatch_num, mismatchNum); + UPDATE_IC_STATS(crc_errors, crcErrors); + UPDATE_IC_STATS(snd_pkt_num, sndPktNum); + UPDATE_IC_STATS(recv_pkt_num, recvPktNum); + UPDATE_IC_STATS(disordered_pkt_num, disorderedPktNum); + UPDATE_IC_STATS(duplicated_pkt_num, duplicatedPktNum); + UPDATE_IC_STATS(recv_ack_num, recvAckNum); + UPDATE_IC_STATS(status_query_msg_num, statusQueryMsgNum); +#endif +} + yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status) { yagpcc::SetQueryReq req; req.set_query_status(status); diff --git a/src/ProtoUtils.h b/src/ProtoUtils.h index 38aa75611b2..4e4ed5e76a3 100644 --- a/src/ProtoUtils.h +++ b/src/ProtoUtils.h @@ -1,6 +1,7 @@ #include "protos/yagpcc_set_service.pb.h" struct QueryDesc; +struct ICStatistics; google::protobuf::Timestamp current_ts(); void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc); @@ -12,5 +13,7 @@ void set_qi_slice_id(yagpcc::SetQueryReq *req); void set_qi_error_message(yagpcc::SetQueryReq *req); void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, int nested_calls, double nested_time); +void set_ic_stats(yagpcc::MetricInstrumentation *metrics, + const ICStatistics *ic_statistics); yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status); double protots_to_double(const google::protobuf::Timestamp &ts); \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 93faaa0bf8f..f1d403b82f1 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -1,3 +1,4 @@ +#define typeid __typeid extern "C" { #include "postgres.h" #include "funcapi.h" @@ -7,8 +8,10 @@ extern "C" { #include "utils/metrics_utils.h" #include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" +#include "cdb/ml_ipc.h" #include "tcop/utility.h" } +#undef typeid #include "Config.h" #include "YagpStat.h" @@ -21,6 +24,9 @@ static ExecutorRun_hook_type previous_ExecutorRun_hook = nullptr; static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; static ExecutorEnd_hook_type previous_ExecutorEnd_hook = nullptr; static query_info_collect_hook_type previous_query_info_collect_hook = nullptr; +#ifdef IC_TEARDOWN_HOOK +static ic_teardown_hook_type previous_ic_teardown_hook = nullptr; +#endif static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); static void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, @@ -28,6 +34,8 @@ static void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, static void ya_ExecutorFinish_hook(QueryDesc *query_desc); static void ya_ExecutorEnd_hook(QueryDesc *query_desc); static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); +static void ya_ic_teardown_hook(ChunkTransportState *transportStates, + bool hasErrors); static EventSender *sender = nullptr; @@ -60,6 +68,10 @@ void hooks_init() { ExecutorEnd_hook = ya_ExecutorEnd_hook; previous_query_info_collect_hook = query_info_collect_hook; query_info_collect_hook = ya_query_info_collect_hook; +#ifdef IC_TEARDOWN_HOOK + previous_ic_teardown_hook = ic_teardown_hook; + ic_teardown_hook = ya_ic_teardown_hook; +#endif stat_statements_parser_init(); } @@ -69,6 +81,9 @@ void hooks_deinit() { ExecutorRun_hook = previous_ExecutorRun_hook; ExecutorFinish_hook = previous_ExecutorFinish_hook; query_info_collect_hook = previous_query_info_collect_hook; +#ifdef IC_TEARDOWN_HOOK + ic_teardown_hook = previous_ic_teardown_hook; +#endif stat_statements_parser_deinit(); if (sender) { delete sender; @@ -141,6 +156,15 @@ void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { } } +void ya_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors) { + cpp_call(get_sender(), &EventSender::ic_metrics_collect); +#ifdef IC_TEARDOWN_HOOK + if (previous_ic_teardown_hook) { + (*previous_ic_teardown_hook)(transportStates, hasErrors); + } +#endif +} + static void check_stats_loaded() { if (!YagpStat::loaded()) { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), From 15d4d6b151245aa1ebac2345f941931039c043c2 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 9 Jun 2025 16:59:13 +0300 Subject: [PATCH 29/49] [yagp_hooks_collector] Fix user filtering propagation timing --- src/Config.cpp | 51 ++++++++++++++------------------------------- src/Config.h | 1 + src/EventSender.cpp | 40 ++++++++++++++++++++++++++++++++++- src/EventSender.h | 1 + 4 files changed, 57 insertions(+), 36 deletions(-) diff --git a/src/Config.cpp b/src/Config.cpp index 42fa4b2fb12..19aa37d1b9d 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -6,7 +6,6 @@ extern "C" { #include "postgres.h" -#include "utils/builtins.h" #include "utils/guc.h" } @@ -17,7 +16,12 @@ static bool guc_enable_collector = true; static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; static int guc_max_text_size = 1024; // in KB -static std::unique_ptr> ignored_users = nullptr; +std::unique_ptr> ignored_users_set = nullptr; +bool ignored_users_guc_dirty = false; + +static void assign_ignored_users_hook(const char *, void *) { + ignored_users_guc_dirty = true; +} void Config::init() { DefineCustomStringVariable( @@ -44,11 +48,12 @@ void Config::init() { &guc_report_nested_queries, true, PGC_USERSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); - DefineCustomStringVariable( - "yagpcc.ignored_users_list", - "Make yagpcc ignore queries issued by given users", 0LL, - &guc_ignored_users, "gpadmin,repl,gpperfmon,monitor", PGC_SUSET, - GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + DefineCustomStringVariable("yagpcc.ignored_users_list", + "Make yagpcc ignore queries issued by given users", + 0LL, &guc_ignored_users, + "gpadmin,repl,gpperfmon,monitor", PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, + assign_ignored_users_hook, 0LL); DefineCustomIntVariable( "yagpcc.max_text_size", @@ -62,36 +67,12 @@ bool Config::enable_analyze() { return guc_enable_analyze; } bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } bool Config::report_nested_queries() { return guc_report_nested_queries; } +const char *Config::ignored_users() { return guc_ignored_users; } size_t Config::max_text_size() { return guc_max_text_size * 1024; } bool Config::filter_user(const std::string *username) { - if (!ignored_users) { - ignored_users.reset(new std::unordered_set()); - if (guc_ignored_users == nullptr || guc_ignored_users[0] == '0') { - return false; - } - /* Need a modifiable copy of string */ - char *rawstring = pstrdup(guc_ignored_users); - List *elemlist; - ListCell *l; - - /* Parse string into list of identifiers */ - if (!SplitIdentifierString(rawstring, ',', &elemlist)) { - /* syntax error in list */ - pfree(rawstring); - list_free(elemlist); - ereport( - LOG, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg( - "invalid list syntax in parameter yagpcc.ignored_users_list"))); - return false; - } - foreach (l, elemlist) { - ignored_users->insert((char *)lfirst(l)); - } - pfree(rawstring); - list_free(elemlist); + if (!username || !ignored_users_set) { + return true; } - return !username || ignored_users->find(*username) != ignored_users->end(); + return ignored_users_set->find(*username) != ignored_users_set->end(); } diff --git a/src/Config.h b/src/Config.h index f806bc0dbf5..9dd33c68321 100644 --- a/src/Config.h +++ b/src/Config.h @@ -11,5 +11,6 @@ class Config { static bool enable_collector(); static bool filter_user(const std::string *username); static bool report_nested_queries(); + static const char *ignored_users(); static size_t max_text_size(); }; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 2ba34d1e4cc..fed9b69911f 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -8,6 +8,7 @@ extern "C" { #include "access/hash.h" #include "executor/executor.h" #include "utils/elog.h" +#include "utils/builtins.h" #include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" @@ -20,6 +21,9 @@ extern "C" { #include "PgUtils.h" #include "ProtoUtils.h" +extern std::unique_ptr> ignored_users_set; +extern bool ignored_users_guc_dirty; + void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { return; @@ -62,6 +66,10 @@ void EventSender::executor_before_start(QueryDesc *query_desc, nested_timing = 0; nested_calls = 0; } + if (ignored_users_guc_dirty) { + update_ignored_users(Config::ignored_users()); + ignored_users_guc_dirty = false; + } if (!need_collect(query_desc, nesting_level)) { return; } @@ -262,7 +270,7 @@ void EventSender::ic_metrics_collect() { } EventSender::EventSender() { - if (Config::enable_collector() && !Config::filter_user(get_user_name())) { + if (Config::enable_collector()) { try { connector = new UDSConnector(); } catch (const std::exception &e) { @@ -347,6 +355,36 @@ void EventSender::update_nested_counters(QueryDesc *query_desc) { } } +void EventSender::update_ignored_users(const char *new_guc_ignored_users) { + auto new_ignored_users_set = + std::make_unique>(); + if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { + /* Need a modifiable copy of string */ + char *rawstring = pstrdup(new_guc_ignored_users); + List *elemlist; + ListCell *l; + + /* Parse string into list of identifiers */ + if (!SplitIdentifierString(rawstring, ',', &elemlist)) { + /* syntax error in list */ + pfree(rawstring); + list_free(elemlist); + ereport( + LOG, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg( + "invalid list syntax in parameter yagpcc.ignored_users_list"))); + return; + } + foreach (l, elemlist) { + new_ignored_users_set->insert((char *)lfirst(l)); + } + pfree(rawstring); + list_free(elemlist); + } + ignored_users_set = std::move(new_ignored_users_set); +} + EventSender::QueryItem::QueryItem(EventSender::QueryState st, yagpcc::SetQueryReq *msg) : state(st), message(msg) {} \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index 99f7b24753d..6919defbbb3 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -57,6 +57,7 @@ class EventSender { void collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status); void cleanup_messages(); void update_nested_counters(QueryDesc *query_desc); + void update_ignored_users(const char *new_guc_ignored_users); UDSConnector *connector = nullptr; int nesting_level = 0; From 8ba10d7f001bde2283a22d8f43f3f3b6b1d31021 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 16 Jun 2025 13:07:59 +0300 Subject: [PATCH 30/49] [yagp_hooks_collector] Miscellaneous fixes and refactoring Fix UB in strcpy. General code refactoring. --- src/Config.cpp | 44 +++++++++++++++++++++++++++++++++++++++++--- src/Config.h | 2 +- src/EventSender.cpp | 39 +-------------------------------------- src/EventSender.h | 1 - src/UDSConnector.cpp | 8 +++++++- 5 files changed, 50 insertions(+), 44 deletions(-) diff --git a/src/Config.cpp b/src/Config.cpp index 19aa37d1b9d..5e0749f171d 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -6,6 +6,7 @@ extern "C" { #include "postgres.h" +#include "utils/builtins.h" #include "utils/guc.h" } @@ -16,8 +17,39 @@ static bool guc_enable_collector = true; static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; static int guc_max_text_size = 1024; // in KB -std::unique_ptr> ignored_users_set = nullptr; -bool ignored_users_guc_dirty = false; +static std::unique_ptr> ignored_users_set = + nullptr; +static bool ignored_users_guc_dirty = false; + +static void update_ignored_users(const char *new_guc_ignored_users) { + auto new_ignored_users_set = + std::make_unique>(); + if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { + /* Need a modifiable copy of string */ + char *rawstring = pstrdup(new_guc_ignored_users); + List *elemlist; + ListCell *l; + + /* Parse string into list of identifiers */ + if (!SplitIdentifierString(rawstring, ',', &elemlist)) { + /* syntax error in list */ + pfree(rawstring); + list_free(elemlist); + ereport( + LOG, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg( + "invalid list syntax in parameter yagpcc.ignored_users_list"))); + return; + } + foreach (l, elemlist) { + new_ignored_users_set->insert((char *)lfirst(l)); + } + pfree(rawstring); + list_free(elemlist); + } + ignored_users_set = std::move(new_ignored_users_set); +} static void assign_ignored_users_hook(const char *, void *) { ignored_users_guc_dirty = true; @@ -67,7 +99,6 @@ bool Config::enable_analyze() { return guc_enable_analyze; } bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } bool Config::report_nested_queries() { return guc_report_nested_queries; } -const char *Config::ignored_users() { return guc_ignored_users; } size_t Config::max_text_size() { return guc_max_text_size * 1024; } bool Config::filter_user(const std::string *username) { @@ -76,3 +107,10 @@ bool Config::filter_user(const std::string *username) { } return ignored_users_set->find(*username) != ignored_users_set->end(); } + +void Config::sync() { + if (ignored_users_guc_dirty) { + update_ignored_users(guc_ignored_users); + ignored_users_guc_dirty = false; + } +} \ No newline at end of file diff --git a/src/Config.h b/src/Config.h index 9dd33c68321..3caa0c78339 100644 --- a/src/Config.h +++ b/src/Config.h @@ -11,6 +11,6 @@ class Config { static bool enable_collector(); static bool filter_user(const std::string *username); static bool report_nested_queries(); - static const char *ignored_users(); static size_t max_text_size(); + static void sync(); }; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index fed9b69911f..fc0f7e1aa07 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -8,7 +8,6 @@ extern "C" { #include "access/hash.h" #include "executor/executor.h" #include "utils/elog.h" -#include "utils/builtins.h" #include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" @@ -21,9 +20,6 @@ extern "C" { #include "PgUtils.h" #include "ProtoUtils.h" -extern std::unique_ptr> ignored_users_set; -extern bool ignored_users_guc_dirty; - void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { return; @@ -66,10 +62,7 @@ void EventSender::executor_before_start(QueryDesc *query_desc, nested_timing = 0; nested_calls = 0; } - if (ignored_users_guc_dirty) { - update_ignored_users(Config::ignored_users()); - ignored_users_guc_dirty = false; - } + Config::sync(); if (!need_collect(query_desc, nesting_level)) { return; } @@ -355,36 +348,6 @@ void EventSender::update_nested_counters(QueryDesc *query_desc) { } } -void EventSender::update_ignored_users(const char *new_guc_ignored_users) { - auto new_ignored_users_set = - std::make_unique>(); - if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { - /* Need a modifiable copy of string */ - char *rawstring = pstrdup(new_guc_ignored_users); - List *elemlist; - ListCell *l; - - /* Parse string into list of identifiers */ - if (!SplitIdentifierString(rawstring, ',', &elemlist)) { - /* syntax error in list */ - pfree(rawstring); - list_free(elemlist); - ereport( - LOG, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg( - "invalid list syntax in parameter yagpcc.ignored_users_list"))); - return; - } - foreach (l, elemlist) { - new_ignored_users_set->insert((char *)lfirst(l)); - } - pfree(rawstring); - list_free(elemlist); - } - ignored_users_set = std::move(new_ignored_users_set); -} - EventSender::QueryItem::QueryItem(EventSender::QueryState st, yagpcc::SetQueryReq *msg) : state(st), message(msg) {} \ No newline at end of file diff --git a/src/EventSender.h b/src/EventSender.h index 6919defbbb3..99f7b24753d 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -57,7 +57,6 @@ class EventSender { void collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status); void cleanup_messages(); void update_nested_counters(QueryDesc *query_desc); - void update_ignored_users(const char *new_guc_ignored_users); UDSConnector *connector = nullptr; int nesting_level = 0; diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index b9088205250..8a5f754f3b4 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -30,7 +30,13 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, const std::string &event) { sockaddr_un address; address.sun_family = AF_UNIX; - strcpy(address.sun_path, Config::uds_path().c_str()); + std::string uds_path = Config::uds_path(); + if (uds_path.size() >= sizeof(address.sun_path)) { + ereport(WARNING, (errmsg("UDS path is too long for socket buffer"))); + YagpStat::report_error(); + return false; + } + strcpy(address.sun_path, uds_path.c_str()); bool success = true; auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0); if (sockfd != -1) { From 2ffb1461359722493f04eef8544683bac65c793a Mon Sep 17 00:00:00 2001 From: NJrslv <108277031+NJrslv@users.noreply.github.com> Date: Tue, 24 Jun 2025 14:41:03 +0300 Subject: [PATCH 31/49] [yagp_hooks_collector] Add conditional EXPLAIN ANALYZE collection When enable_analyze is true and execution time exceeds min_analyze_time, generate EXPLAIN (ANALYZE, BUFFERS, TIMING, VERBOSE) output and include it in the done event. --- protos/yagpcc_metrics.proto | 1 + src/Config.cpp | 22 +++++++++++++-- src/Config.h | 2 ++ src/EventSender.cpp | 51 ++++++++++++++++++++++++++++++++--- src/EventSender.h | 1 + src/PgUtils.cpp | 37 ++++++++++++++++++++++++++ src/PgUtils.h | 1 + src/ProtoUtils.cpp | 53 ++++++++++++++++++++++++++++++------- src/ProtoUtils.h | 4 ++- src/hook_wrappers.cpp | 24 +++++++++++++++++ 10 files changed, 180 insertions(+), 16 deletions(-) diff --git a/protos/yagpcc_metrics.proto b/protos/yagpcc_metrics.proto index 086f3e63379..91ac0c4941a 100644 --- a/protos/yagpcc_metrics.proto +++ b/protos/yagpcc_metrics.proto @@ -34,6 +34,7 @@ message QueryInfo { string userName = 8; string databaseName = 9; string rsgname = 10; + string analyze_text = 11; } message AdditionalQueryInfo { diff --git a/src/Config.cpp b/src/Config.cpp index 5e0749f171d..ac274a1e218 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -16,7 +16,10 @@ static bool guc_enable_cdbstats = true; static bool guc_enable_collector = true; static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; -static int guc_max_text_size = 1024; // in KB +static int guc_max_text_size = 1024; // in KB +static int guc_max_plan_size = 1024; // in KB +static int guc_min_analyze_time = -1; // uninitialized state + static std::unique_ptr> ignored_users_set = nullptr; static bool ignored_users_guc_dirty = false; @@ -89,9 +92,22 @@ void Config::init() { DefineCustomIntVariable( "yagpcc.max_text_size", - "Make yagpcc trim plan and query texts longer than configured size", NULL, + "Make yagpcc trim query texts longer than configured size", NULL, &guc_max_text_size, 1024, 0, INT_MAX / 1024, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_KB, NULL, NULL, NULL); + + DefineCustomIntVariable( + "yagpcc.max_plan_size", + "Make yagpcc trim plan longer than configured size", NULL, + &guc_max_plan_size, 1024, 0, INT_MAX / 1024, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_KB, NULL, NULL, NULL); + + DefineCustomIntVariable( + "yagpcc.min_analyze_time", + "Sets the minimum execution time above which plans will be logged.", + "Zero prints all plans. -1 turns this feature off.", + &guc_min_analyze_time, -1, -1, INT_MAX, PGC_USERSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_MS, NULL, NULL, NULL); } std::string Config::uds_path() { return guc_uds_path; } @@ -100,6 +116,8 @@ bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } bool Config::report_nested_queries() { return guc_report_nested_queries; } size_t Config::max_text_size() { return guc_max_text_size * 1024; } +size_t Config::max_plan_size() { return guc_max_plan_size * 1024; } +int Config::min_analyze_time() { return guc_min_analyze_time; }; bool Config::filter_user(const std::string *username) { if (!username || !ignored_users_set) { diff --git a/src/Config.h b/src/Config.h index 3caa0c78339..dd081c41dd6 100644 --- a/src/Config.h +++ b/src/Config.h @@ -12,5 +12,7 @@ class Config { static bool filter_user(const std::string *username); static bool report_nested_queries(); static size_t max_text_size(); + static size_t max_plan_size(); + static int min_analyze_time(); static void sync(); }; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index fc0f7e1aa07..19787fe0db0 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -20,6 +20,10 @@ extern "C" { #include "PgUtils.h" #include "ProtoUtils.h" +#define need_collect_analyze() \ + (Gp_role == GP_ROLE_DISPATCH && Config::min_analyze_time() >= 0 && \ + Config::enable_analyze()) + void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { return; @@ -53,8 +57,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { } } -void EventSender::executor_before_start(QueryDesc *query_desc, - int /* eflags*/) { +void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { if (!connector) { return; } @@ -67,7 +70,8 @@ void EventSender::executor_before_start(QueryDesc *query_desc, return; } collect_query_submit(query_desc); - if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze()) { + if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze() && + (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0) { query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; query_desc->instrument_options |= INSTRUMENT_TIMER; @@ -97,6 +101,17 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { } update_query_state(query_desc, query, QueryState::START); set_query_plan(query_msg, query_desc); + if (need_collect_analyze()) { + // Set up to track total elapsed time during query run. + // Make sure the space is allocated in the per-query + // context so it will go away at executor_end. + if (query_desc->totaltime == NULL) { + MemoryContext oldcxt; + oldcxt = MemoryContextSwitchTo(query_desc->estate->es_query_cxt); + query_desc->totaltime = InstrAlloc(1, INSTRUMENT_ALL); + MemoryContextSwitchTo(oldcxt); + } + } yagpcc::GPMetrics stats; std::swap(stats, *query_msg->mutable_query_metrics()); if (connector->report_query(*query_msg, "started")) { @@ -262,6 +277,34 @@ void EventSender::ic_metrics_collect() { #endif } +void EventSender::analyze_stats_collect(QueryDesc *query_desc) { + if (!connector || Gp_role != GP_ROLE_DISPATCH) { + return; + } + if (!need_collect(query_desc, nesting_level)) { + return; + } + auto query = get_query_message(query_desc); + auto query_msg = query->message; + *query_msg->mutable_end_time() = current_ts(); + // Yet another greenplum weirdness: thats actually a nested query + // which is being committed/rollbacked. Treat it accordingly. + if (query->state == UNKNOWN && !need_report_nested_query()) { + return; + } + if (!query_desc->totaltime || !need_collect_analyze()) { + return; + } + // Make sure stats accumulation is done. + // (Note: it's okay if several levels of hook all do this.) + InstrEndLoop(query_desc->totaltime); + + double ms = query_desc->totaltime->total * 1000.0; + if (ms >= Config::min_analyze_time()) { + set_analyze_plan_text_json(query_desc, query_msg); + } +} + EventSender::EventSender() { if (Config::enable_collector()) { try { @@ -350,4 +393,4 @@ void EventSender::update_nested_counters(QueryDesc *query_desc) { EventSender::QueryItem::QueryItem(EventSender::QueryState st, yagpcc::SetQueryReq *msg) - : state(st), message(msg) {} \ No newline at end of file + : state(st), message(msg) {} diff --git a/src/EventSender.h b/src/EventSender.h index 99f7b24753d..4d09b429fc8 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -27,6 +27,7 @@ class EventSender { void executor_end(QueryDesc *query_desc); void query_metrics_collect(QueryMetricsStatus status, void *arg); void ic_metrics_collect(); + void analyze_stats_collect(QueryDesc *query_desc); void incr_depth() { nesting_level++; } void decr_depth() { nesting_level--; } EventSender(); diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index 5982ff77c1c..ed3e69c6d44 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -109,3 +109,40 @@ ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { ExplainEndOutput(&es); return es; } + +ExplainState get_analyze_state_json(QueryDesc *query_desc, bool analyze) { + ExplainState es; + ExplainInitState(&es); + es.analyze = analyze; + es.verbose = true; + es.buffers = es.analyze; + es.timing = es.analyze; + es.summary = es.analyze; + es.format = EXPLAIN_FORMAT_JSON; + ExplainBeginOutput(&es); + if (analyze) { + PG_TRY(); + { + ExplainPrintPlan(&es, query_desc); + ExplainPrintExecStatsEnd(&es, query_desc); + } + PG_CATCH(); + { + // PG and GP both have known and yet unknown bugs in EXPLAIN VERBOSE + // implementation. We don't want any queries to fail due to those bugs, so + // we report the bug here for future investigatin and continue collecting + // metrics w/o reporting any plans + resetStringInfo(es.str); + appendStringInfo( + es.str, + "Unable to restore analyze plan due to PostgreSQL internal error. " + "See logs for more information"); + ereport(INFO, + (errmsg("YAGPCC failed to reconstruct analyze text for query: %s", + query_desc->sourceText))); + } + PG_END_TRY(); + } + ExplainEndOutput(&es); + return es; +} diff --git a/src/PgUtils.h b/src/PgUtils.h index 85b1eb833cd..81282a473a8 100644 --- a/src/PgUtils.h +++ b/src/PgUtils.h @@ -14,3 +14,4 @@ bool need_report_nested_query(); bool filter_query(QueryDesc *query_desc); bool need_collect(QueryDesc *query_desc, int nesting_level); ExplainState get_explain_state(QueryDesc *query_desc, bool costs); +ExplainState get_analyze_state_json(QueryDesc *query_desc, bool analyze); diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index c37cefb72d6..6e9fa6bd5c5 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -8,6 +8,7 @@ extern "C" { #include "postgres.h" #include "access/hash.h" +#include "access/xact.h" #include "cdb/cdbinterconnect.h" #include "cdb/cdbvars.h" #include "cdb/ml_ipc.h" @@ -47,8 +48,9 @@ void set_segment_key(yagpcc::SegmentKey *key) { key->set_segindex(GpIdentity.segindex); } -inline std::string char_to_trimmed_str(const char *str, size_t len) { - return std::string(str, std::min(len, Config::max_text_size())); +inline std::string char_to_trimmed_str(const char *str, size_t len, + size_t lim) { + return std::string(str, std::min(len, lim)); } void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { @@ -61,10 +63,11 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { MemoryContextSwitchTo(query_desc->estate->es_query_cxt); auto es = get_explain_state(query_desc, true); MemoryContextSwitchTo(oldcxt); - *qi->mutable_plan_text() = char_to_trimmed_str(es.str->data, es.str->len); + *qi->mutable_plan_text() = + char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); StringInfo norm_plan = gen_normplan(es.str->data); - *qi->mutable_template_plan_text() = - char_to_trimmed_str(norm_plan->data, norm_plan->len); + *qi->mutable_template_plan_text() = char_to_trimmed_str( + norm_plan->data, norm_plan->len, Config::max_plan_size()); qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); qi->set_query_id(query_desc->plannedstmt->queryId); pfree(es.str->data); @@ -76,10 +79,11 @@ void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->sourceText) { auto qi = req->mutable_query_info(); *qi->mutable_query_text() = char_to_trimmed_str( - query_desc->sourceText, strlen(query_desc->sourceText)); + query_desc->sourceText, strlen(query_desc->sourceText), + Config::max_text_size()); char *norm_query = gen_normquery(query_desc->sourceText); - *qi->mutable_template_query_text() = - char_to_trimmed_str(norm_query, strlen(norm_query)); + *qi->mutable_template_query_text() = char_to_trimmed_str( + norm_query, strlen(norm_query), Config::max_text_size()); } } @@ -90,6 +94,7 @@ void clear_big_fields(yagpcc::SetQueryReq *req) { qi->clear_template_plan_text(); qi->clear_query_text(); qi->clear_template_query_text(); + qi->clear_analyze_text(); } } @@ -115,7 +120,8 @@ void set_qi_slice_id(yagpcc::SetQueryReq *req) { void set_qi_error_message(yagpcc::SetQueryReq *req) { auto aqi = req->mutable_add_info(); auto error = elog_message(); - *aqi->mutable_error_message() = char_to_trimmed_str(error, strlen(error)); + *aqi->mutable_error_message() = + char_to_trimmed_str(error, strlen(error), Config::max_text_size()); } void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, @@ -217,4 +223,33 @@ yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status) { double protots_to_double(const google::protobuf::Timestamp &ts) { return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; +} + +void set_analyze_plan_text_json(QueryDesc *query_desc, + yagpcc::SetQueryReq *req) { + // Make sure it is a valid txn and it is not an utility + // statement for ExplainPrintPlan() later. + if (!IsTransactionState() || !query_desc->plannedstmt) { + return; + } + MemoryContext oldcxt = + MemoryContextSwitchTo(query_desc->estate->es_query_cxt); + + ExplainState es = get_analyze_state_json( + query_desc, query_desc->instrument_options && Config::enable_analyze()); + // Remove last line break. + if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { + es.str->data[--es.str->len] = '\0'; + } + // Convert JSON array to JSON object. + if (es.str->len > 0) { + es.str->data[0] = '{'; + es.str->data[es.str->len - 1] = '}'; + } + auto trimmed_analyze = + char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); + req->mutable_query_info()->set_analyze_text(trimmed_analyze); + + pfree(es.str->data); + MemoryContextSwitchTo(oldcxt); } \ No newline at end of file diff --git a/src/ProtoUtils.h b/src/ProtoUtils.h index 4e4ed5e76a3..6fb880c2eb8 100644 --- a/src/ProtoUtils.h +++ b/src/ProtoUtils.h @@ -16,4 +16,6 @@ void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, void set_ic_stats(yagpcc::MetricInstrumentation *metrics, const ICStatistics *ic_statistics); yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status); -double protots_to_double(const google::protobuf::Timestamp &ts); \ No newline at end of file +double protots_to_double(const google::protobuf::Timestamp &ts); +void set_analyze_plan_text_json(QueryDesc *query_desc, + yagpcc::SetQueryReq *message); \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index f1d403b82f1..79d3ec45881 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -3,6 +3,7 @@ extern "C" { #include "postgres.h" #include "funcapi.h" #include "executor/executor.h" +#include "executor/execUtils.h" #include "utils/elog.h" #include "utils/builtins.h" #include "utils/metrics_utils.h" @@ -24,6 +25,10 @@ static ExecutorRun_hook_type previous_ExecutorRun_hook = nullptr; static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; static ExecutorEnd_hook_type previous_ExecutorEnd_hook = nullptr; static query_info_collect_hook_type previous_query_info_collect_hook = nullptr; +#ifdef ANALYZE_STATS_COLLECT_HOOK +static analyze_stats_collect_hook_type previous_analyze_stats_collect_hook = + nullptr; +#endif #ifdef IC_TEARDOWN_HOOK static ic_teardown_hook_type previous_ic_teardown_hook = nullptr; #endif @@ -36,6 +41,9 @@ static void ya_ExecutorEnd_hook(QueryDesc *query_desc); static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); static void ya_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors); +#ifdef ANALYZE_STATS_COLLECT_HOOK +static void ya_analyze_stats_collect_hook(QueryDesc *query_desc); +#endif static EventSender *sender = nullptr; @@ -71,6 +79,10 @@ void hooks_init() { #ifdef IC_TEARDOWN_HOOK previous_ic_teardown_hook = ic_teardown_hook; ic_teardown_hook = ya_ic_teardown_hook; +#endif +#ifdef ANALYZE_STATS_COLLECT_HOOK + previous_analyze_stats_collect_hook = analyze_stats_collect_hook; + analyze_stats_collect_hook = ya_analyze_stats_collect_hook; #endif stat_statements_parser_init(); } @@ -83,6 +95,9 @@ void hooks_deinit() { query_info_collect_hook = previous_query_info_collect_hook; #ifdef IC_TEARDOWN_HOOK ic_teardown_hook = previous_ic_teardown_hook; +#endif +#ifdef ANALYZE_STATS_COLLECT_HOOK + analyze_stats_collect_hook = previous_analyze_stats_collect_hook; #endif stat_statements_parser_deinit(); if (sender) { @@ -165,6 +180,15 @@ void ya_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors) { #endif } +#ifdef ANALYZE_STATS_COLLECT_HOOK +void ya_analyze_stats_collect_hook(QueryDesc *query_desc) { + cpp_call(get_sender(), &EventSender::analyze_stats_collect, query_desc); + if (previous_analyze_stats_collect_hook) { + (*previous_analyze_stats_collect_hook)(query_desc); + } +} +#endif + static void check_stats_loaded() { if (!YagpStat::loaded()) { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), From 87dc57037ba714e8be1abc90261e08313ac21e68 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Fri, 27 Jun 2025 13:31:34 +0300 Subject: [PATCH 32/49] [yagp_hooks_collector] Fix memory leaks, add safe C++ wrappers, improve Makefile Fix memory leaks in C++ and PG contexts. Add safe C++ wrappers around PG functions. Improve error message logging. Enable parallel make. Fix variable expansion. --- Makefile | 2 + src/Config.cpp | 22 +-- src/Config.h | 2 +- src/EventSender.cpp | 20 +-- src/EventSender.h | 3 - src/PgUtils.cpp | 106 +++---------- src/PgUtils.h | 6 +- src/ProcStats.cpp | 8 +- src/ProtoUtils.cpp | 73 ++++----- src/ProtoUtils.h | 2 + src/UDSConnector.cpp | 6 +- src/UDSConnector.h | 1 - src/hook_wrappers.cpp | 6 +- src/memory/gpdbwrappers.cpp | 148 ++++++++++++++++++ src/memory/gpdbwrappers.h | 131 ++++++++++++++++ .../pg_stat_statements_ya_parser.h | 6 +- 16 files changed, 380 insertions(+), 162 deletions(-) create mode 100644 src/memory/gpdbwrappers.cpp create mode 100644 src/memory/gpdbwrappers.h diff --git a/Makefile b/Makefile index 91be52c4468..15c5dabb70e 100644 --- a/Makefile +++ b/Makefile @@ -8,8 +8,10 @@ # to "Makefile" if it exists. PostgreSQL is shipped with a # "GNUmakefile". If the user hasn't run the configure script yet, the # GNUmakefile won't exist yet, so we catch that case as well. + # AIX make defaults to building *every* target of the first rule. Start with # a single-target, empty rule to make the other targets non-default. +all: all check install installdirs installcheck installcheck-parallel uninstall clean distclean maintainer-clean dist distcheck world check-world install-world installcheck-world installcheck-resgroup installcheck-resgroup-v2: @if [ ! -f GNUmakefile ] ; then \ diff --git a/src/Config.cpp b/src/Config.cpp index ac274a1e218..a1289a48891 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -1,4 +1,5 @@ #include "Config.h" +#include "memory/gpdbwrappers.h" #include #include #include @@ -6,7 +7,6 @@ extern "C" { #include "postgres.h" -#include "utils/builtins.h" #include "utils/guc.h" } @@ -29,15 +29,15 @@ static void update_ignored_users(const char *new_guc_ignored_users) { std::make_unique>(); if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { /* Need a modifiable copy of string */ - char *rawstring = pstrdup(new_guc_ignored_users); + char *rawstring = gpdb::pstrdup(new_guc_ignored_users); List *elemlist; ListCell *l; /* Parse string into list of identifiers */ - if (!SplitIdentifierString(rawstring, ',', &elemlist)) { + if (!gpdb::split_identifier_string(rawstring, ',', &elemlist)) { /* syntax error in list */ - pfree(rawstring); - list_free(elemlist); + gpdb::pfree(rawstring); + gpdb::list_free(elemlist); ereport( LOG, (errcode(ERRCODE_SYNTAX_ERROR), @@ -48,8 +48,8 @@ static void update_ignored_users(const char *new_guc_ignored_users) { foreach (l, elemlist) { new_ignored_users_set->insert((char *)lfirst(l)); } - pfree(rawstring); - list_free(elemlist); + gpdb::pfree(rawstring); + gpdb::list_free(elemlist); } ignored_users_set = std::move(new_ignored_users_set); } @@ -119,11 +119,11 @@ size_t Config::max_text_size() { return guc_max_text_size * 1024; } size_t Config::max_plan_size() { return guc_max_plan_size * 1024; } int Config::min_analyze_time() { return guc_min_analyze_time; }; -bool Config::filter_user(const std::string *username) { - if (!username || !ignored_users_set) { +bool Config::filter_user(std::string username) { + if (!ignored_users_set) { return true; } - return ignored_users_set->find(*username) != ignored_users_set->end(); + return ignored_users_set->find(username) != ignored_users_set->end(); } void Config::sync() { @@ -131,4 +131,4 @@ void Config::sync() { update_ignored_users(guc_ignored_users); ignored_users_guc_dirty = false; } -} \ No newline at end of file +} diff --git a/src/Config.h b/src/Config.h index dd081c41dd6..eff83f0960a 100644 --- a/src/Config.h +++ b/src/Config.h @@ -9,7 +9,7 @@ class Config { static bool enable_analyze(); static bool enable_cdbstats(); static bool enable_collector(); - static bool filter_user(const std::string *username); + static bool filter_user(std::string username); static bool report_nested_queries(); static size_t max_text_size(); static size_t max_plan_size(); diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 19787fe0db0..8711c4cbd4f 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,15 +1,14 @@ #include "Config.h" #include "UDSConnector.h" +#include "memory/gpdbwrappers.h" #define typeid __typeid extern "C" { #include "postgres.h" -#include "access/hash.h" #include "executor/executor.h" #include "utils/elog.h" -#include "cdb/cdbdisp.h" #include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" #include "cdb/ml_ipc.h" @@ -81,7 +80,7 @@ void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { instr_time starttime; INSTR_TIME_SET_CURRENT(starttime); query_desc->showstatctx = - cdbexplain_showExecStatsBegin(query_desc, starttime); + gpdb::cdbexplain_showExecStatsBegin(query_desc, starttime); } } } @@ -106,10 +105,10 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { // Make sure the space is allocated in the per-query // context so it will go away at executor_end. if (query_desc->totaltime == NULL) { - MemoryContext oldcxt; - oldcxt = MemoryContextSwitchTo(query_desc->estate->es_query_cxt); - query_desc->totaltime = InstrAlloc(1, INSTRUMENT_ALL); - MemoryContextSwitchTo(oldcxt); + MemoryContext oldcxt = + gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + query_desc->totaltime = gpdb::instr_alloc(1, INSTRUMENT_ALL); + gpdb::mem_ctx_switch_to(oldcxt); } } yagpcc::GPMetrics stats; @@ -240,7 +239,7 @@ void EventSender::collect_query_done(QueryDesc *query_desc, } query_msgs.erase({query_desc->gpmon_pkt->u.qexec.key.ccnt, query_desc->gpmon_pkt->u.qexec.key.tmid}); - pfree(query_desc->gpmon_pkt); + gpdb::pfree(query_desc->gpmon_pkt); } } @@ -297,7 +296,7 @@ void EventSender::analyze_stats_collect(QueryDesc *query_desc) { } // Make sure stats accumulation is done. // (Note: it's okay if several levels of hook all do this.) - InstrEndLoop(query_desc->totaltime); + gpdb::instr_end_loop(query_desc->totaltime); double ms = query_desc->totaltime->total * 1000.0; if (ms >= Config::min_analyze_time()) { @@ -364,7 +363,8 @@ EventSender::QueryItem *EventSender::get_query_message(QueryDesc *query_desc) { query_msgs.find({query_desc->gpmon_pkt->u.qexec.key.ccnt, query_desc->gpmon_pkt->u.qexec.key.tmid}) == query_msgs.end()) { - query_desc->gpmon_pkt = (gpmon_packet_t *)palloc0(sizeof(gpmon_packet_t)); + query_desc->gpmon_pkt = + (gpmon_packet_t *)gpdb::palloc0(sizeof(gpmon_packet_t)); query_desc->gpmon_pkt->u.qexec.key.ccnt = gp_command_count; query_desc->gpmon_pkt->u.qexec.key.tmid = nesting_level; query_msgs.insert({{gp_command_count, nesting_level}, diff --git a/src/EventSender.h b/src/EventSender.h index 4d09b429fc8..f3dd1d2a528 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -1,13 +1,10 @@ #pragma once -#include #include -#include #define typeid __typeid extern "C" { #include "utils/metrics_utils.h" -#include "cdb/ml_ipc.h" #ifdef IC_TEARDOWN_HOOK #include "cdb/ic_udpifc.h" #endif diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index ed3e69c6d44..f36cd030a39 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -1,37 +1,41 @@ #include "PgUtils.h" #include "Config.h" +#include "memory/gpdbwrappers.h" extern "C" { -#include "utils/guc.h" -#include "commands/dbcommands.h" #include "commands/resgroupcmds.h" #include "cdb/cdbvars.h" } -std::string *get_user_name() { - const char *username = GetConfigOption("session_authorization", false, false); - // username is not to be freed - return username ? new std::string(username) : nullptr; +std::string get_user_name() { + // username is allocated on stack, we don't need to pfree it. + const char *username = + ya_gpdb::get_config_option("session_authorization", false, false); + return username ? std::string(username) : ""; } -std::string *get_db_name() { - char *dbname = get_database_name(MyDatabaseId); - std::string *result = nullptr; +std::string get_db_name() { + char *dbname = ya_gpdb::get_database_name(MyDatabaseId); if (dbname) { - result = new std::string(dbname); - pfree(dbname); + std::string result(dbname); + ya_gpdb::pfree(dbname); + return result; } - return result; + return ""; } -std::string *get_rg_name() { - auto groupId = ResGroupGetGroupIdBySessionId(MySessionState->sessionId); +std::string get_rg_name() { + auto groupId = ya_gpdb::get_rg_id_by_session_id(MySessionState->sessionId); if (!OidIsValid(groupId)) - return nullptr; - char *rgname = GetResGroupNameForId(groupId); + return ""; + + char *rgname = ya_gpdb::get_rg_name_for_id(groupId); if (rgname == nullptr) - return nullptr; - return new std::string(rgname); + return ""; + + std::string result(rgname); + ya_gpdb::pfree(rgname); + return result; } /** @@ -80,69 +84,3 @@ bool need_collect(QueryDesc *query_desc, int nesting_level) { return !filter_query(query_desc) && nesting_is_valid(query_desc, nesting_level); } - -ExplainState get_explain_state(QueryDesc *query_desc, bool costs) { - ExplainState es; - ExplainInitState(&es); - es.costs = costs; - es.verbose = true; - es.format = EXPLAIN_FORMAT_TEXT; - ExplainBeginOutput(&es); - PG_TRY(); - { ExplainPrintPlan(&es, query_desc); } - PG_CATCH(); - { - // PG and GP both have known and yet unknown bugs in EXPLAIN VERBOSE - // implementation. We don't want any queries to fail due to those bugs, so - // we report the bug here for future investigatin and continue collecting - // metrics w/o reporting any plans - resetStringInfo(es.str); - appendStringInfo( - es.str, - "Unable to restore query plan due to PostgreSQL internal error. " - "See logs for more information"); - ereport(INFO, - (errmsg("YAGPCC failed to reconstruct explain text for query: %s", - query_desc->sourceText))); - } - PG_END_TRY(); - ExplainEndOutput(&es); - return es; -} - -ExplainState get_analyze_state_json(QueryDesc *query_desc, bool analyze) { - ExplainState es; - ExplainInitState(&es); - es.analyze = analyze; - es.verbose = true; - es.buffers = es.analyze; - es.timing = es.analyze; - es.summary = es.analyze; - es.format = EXPLAIN_FORMAT_JSON; - ExplainBeginOutput(&es); - if (analyze) { - PG_TRY(); - { - ExplainPrintPlan(&es, query_desc); - ExplainPrintExecStatsEnd(&es, query_desc); - } - PG_CATCH(); - { - // PG and GP both have known and yet unknown bugs in EXPLAIN VERBOSE - // implementation. We don't want any queries to fail due to those bugs, so - // we report the bug here for future investigatin and continue collecting - // metrics w/o reporting any plans - resetStringInfo(es.str); - appendStringInfo( - es.str, - "Unable to restore analyze plan due to PostgreSQL internal error. " - "See logs for more information"); - ereport(INFO, - (errmsg("YAGPCC failed to reconstruct analyze text for query: %s", - query_desc->sourceText))); - } - PG_END_TRY(); - } - ExplainEndOutput(&es); - return es; -} diff --git a/src/PgUtils.h b/src/PgUtils.h index 81282a473a8..ceb07c2e8e5 100644 --- a/src/PgUtils.h +++ b/src/PgUtils.h @@ -5,9 +5,9 @@ extern "C" { #include -std::string *get_user_name(); -std::string *get_db_name(); -std::string *get_rg_name(); +std::string get_user_name(); +std::string get_db_name(); +std::string get_rg_name(); bool is_top_level_query(QueryDesc *query_desc, int nesting_level); bool nesting_is_valid(QueryDesc *query_desc, int nesting_level); bool need_report_nested_query(); diff --git a/src/ProcStats.cpp b/src/ProcStats.cpp index a557a20cbb0..5c09fa0bce4 100644 --- a/src/ProcStats.cpp +++ b/src/ProcStats.cpp @@ -75,16 +75,16 @@ void fill_status_stats(yagpcc::SystemStat *stats) { stats->set_vmpeakkb(value); proc_stat >> measure; if (measure != "kB") { - ereport(FATAL, (errmsg("Expected memory sizes in kB, but got in %s", - measure.c_str()))); + throw std::runtime_error("Expected memory sizes in kB, but got in " + + measure); } } else if (key == "VmSize:") { uint64_t value; proc_stat >> value; stats->set_vmsizekb(value); if (measure != "kB") { - ereport(FATAL, (errmsg("Expected memory sizes in kB, but got in %s", - measure.c_str()))); + throw std::runtime_error("Expected memory sizes in kB, but got in " + + measure); } } } diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index 6e9fa6bd5c5..6dc39278bcd 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -2,6 +2,7 @@ #include "PgUtils.h" #include "ProcStats.h" #include "Config.h" +#include "memory/gpdbwrappers.h" #define typeid __typeid #define operator __operator @@ -15,10 +16,7 @@ extern "C" { #ifdef IC_TEARDOWN_HOOK #include "cdb/ic_udpifc.h" #endif -#include "gpmon/gpmon.h" #include "utils/workfile_mgr.h" - -#include "stat_statements_parser/pg_stat_statements_ya_parser.h" } #undef typeid #undef operator @@ -60,18 +58,21 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); MemoryContext oldcxt = - MemoryContextSwitchTo(query_desc->estate->es_query_cxt); - auto es = get_explain_state(query_desc, true); - MemoryContextSwitchTo(oldcxt); - *qi->mutable_plan_text() = - char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); - StringInfo norm_plan = gen_normplan(es.str->data); - *qi->mutable_template_plan_text() = char_to_trimmed_str( - norm_plan->data, norm_plan->len, Config::max_plan_size()); - qi->set_plan_id(hash_any((unsigned char *)norm_plan->data, norm_plan->len)); - qi->set_query_id(query_desc->plannedstmt->queryId); - pfree(es.str->data); - pfree(norm_plan->data); + gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + ExplainState es = gpdb::get_explain_state(query_desc, true); + if (es.str) { + *qi->mutable_plan_text() = char_to_trimmed_str(es.str->data, es.str->len, + Config::max_plan_size()); + StringInfo norm_plan = gpdb::gen_normplan(es.str->data); + *qi->mutable_template_plan_text() = char_to_trimmed_str( + norm_plan->data, norm_plan->len, Config::max_plan_size()); + qi->set_plan_id( + hash_any((unsigned char *)norm_plan->data, norm_plan->len)); + qi->set_query_id(query_desc->plannedstmt->queryId); + gpdb::pfree(es.str->data); + gpdb::pfree(norm_plan->data); + } + gpdb::mem_ctx_switch_to(oldcxt); } } @@ -81,7 +82,7 @@ void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { *qi->mutable_query_text() = char_to_trimmed_str( query_desc->sourceText, strlen(query_desc->sourceText), Config::max_text_size()); - char *norm_query = gen_normquery(query_desc->sourceText); + char *norm_query = gpdb::gen_normquery(query_desc->sourceText); *qi->mutable_template_query_text() = char_to_trimmed_str( norm_query, strlen(norm_query), Config::max_text_size()); } @@ -101,9 +102,9 @@ void clear_big_fields(yagpcc::SetQueryReq *req) { void set_query_info(yagpcc::SetQueryReq *req) { if (Gp_session_role == GP_ROLE_DISPATCH) { auto qi = req->mutable_query_info(); - qi->set_allocated_username(get_user_name()); - qi->set_allocated_databasename(get_db_name()); - qi->set_allocated_rsgname(get_rg_name()); + qi->set_username(get_user_name()); + qi->set_databasename(get_db_name()); + qi->set_rsgname(get_rg_name()); } } @@ -233,23 +234,23 @@ void set_analyze_plan_text_json(QueryDesc *query_desc, return; } MemoryContext oldcxt = - MemoryContextSwitchTo(query_desc->estate->es_query_cxt); - - ExplainState es = get_analyze_state_json( + gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + ExplainState es = gpdb::get_analyze_state_json( query_desc, query_desc->instrument_options && Config::enable_analyze()); - // Remove last line break. - if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { - es.str->data[--es.str->len] = '\0'; - } - // Convert JSON array to JSON object. - if (es.str->len > 0) { - es.str->data[0] = '{'; - es.str->data[es.str->len - 1] = '}'; + gpdb::mem_ctx_switch_to(oldcxt); + if (es.str) { + // Remove last line break. + if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { + es.str->data[--es.str->len] = '\0'; + } + // Convert JSON array to JSON object. + if (es.str->len > 0) { + es.str->data[0] = '{'; + es.str->data[es.str->len - 1] = '}'; + } + auto trimmed_analyze = + char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); + req->mutable_query_info()->set_analyze_text(trimmed_analyze); + gpdb::pfree(es.str->data); } - auto trimmed_analyze = - char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); - req->mutable_query_info()->set_analyze_text(trimmed_analyze); - - pfree(es.str->data); - MemoryContextSwitchTo(oldcxt); } \ No newline at end of file diff --git a/src/ProtoUtils.h b/src/ProtoUtils.h index 6fb880c2eb8..8287b3de7ea 100644 --- a/src/ProtoUtils.h +++ b/src/ProtoUtils.h @@ -1,3 +1,5 @@ +#pragma once + #include "protos/yagpcc_set_service.pb.h" struct QueryDesc; diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index 8a5f754f3b4..b5b70836db4 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -1,6 +1,7 @@ #include "UDSConnector.h" #include "Config.h" #include "YagpStat.h" +#include "memory/gpdbwrappers.h" #include #include @@ -13,7 +14,6 @@ extern "C" { #include "postgres.h" -#include "cdb/cdbvars.h" } UDSConnector::UDSConnector() { GOOGLE_PROTOBUF_VERIFY_VERSION; } @@ -44,7 +44,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, if (connect(sockfd, (sockaddr *)&address, sizeof(address)) != -1) { auto data_size = req.ByteSize(); auto total_size = data_size + sizeof(uint32_t); - uint8_t *buf = (uint8_t *)palloc(total_size); + uint8_t *buf = (uint8_t *)gpdb::palloc(total_size); uint32_t *size_payload = (uint32_t *)buf; *size_payload = data_size; req.SerializeWithCachedSizesToArray(buf + sizeof(uint32_t)); @@ -67,7 +67,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, } else { YagpStat::report_send(total_size); } - pfree(buf); + gpdb::pfree(buf); } else { // log the error and go on log_tracing_failure(req, event); diff --git a/src/UDSConnector.h b/src/UDSConnector.h index 42e0aa20968..67504fc8529 100644 --- a/src/UDSConnector.h +++ b/src/UDSConnector.h @@ -1,7 +1,6 @@ #pragma once #include "protos/yagpcc_set_service.pb.h" -#include class UDSConnector { public: diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 79d3ec45881..25a85f086d1 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -7,10 +7,10 @@ extern "C" { #include "utils/elog.h" #include "utils/builtins.h" #include "utils/metrics_utils.h" -#include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" #include "cdb/ml_ipc.h" #include "tcop/utility.h" +#include "stat_statements_parser/pg_stat_statements_ya_parser.h" } #undef typeid @@ -18,7 +18,7 @@ extern "C" { #include "YagpStat.h" #include "EventSender.h" #include "hook_wrappers.h" -#include "stat_statements_parser/pg_stat_statements_ya_parser.h" +#include "memory/gpdbwrappers.h" static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr; static ExecutorRun_hook_type previous_ExecutorRun_hook = nullptr; @@ -229,7 +229,7 @@ Datum yagp_functions_get(FunctionCallInfo fcinfo) { values[3] = Int64GetDatum(stats.failed_connects); values[4] = Int64GetDatum(stats.failed_other); values[5] = Int32GetDatum(stats.max_message_size); - HeapTuple tuple = heap_form_tuple(tupdesc, values, nulls); + HeapTuple tuple = gpdb::heap_form_tuple(tupdesc, values, nulls); Datum result = HeapTupleGetDatum(tuple); PG_RETURN_DATUM(result); } \ No newline at end of file diff --git a/src/memory/gpdbwrappers.cpp b/src/memory/gpdbwrappers.cpp new file mode 100644 index 00000000000..1fba702a9f5 --- /dev/null +++ b/src/memory/gpdbwrappers.cpp @@ -0,0 +1,148 @@ +#include "gpdbwrappers.h" + +extern "C" { +#include "postgres.h" +#include "utils/guc.h" +#include "commands/dbcommands.h" +#include "commands/resgroupcmds.h" +#include "utils/builtins.h" +#include "nodes/pg_list.h" +#include "commands/explain.h" +#include "executor/instrument.h" +#include "access/tupdesc.h" +#include "access/htup.h" +#include "utils/elog.h" +#include "cdb/cdbexplain.h" +#include "stat_statements_parser/pg_stat_statements_ya_parser.h" +} + +void *gpdb::palloc(Size size) { return detail::wrap_throw(::palloc, size); } + +void *gpdb::palloc0(Size size) { return detail::wrap_throw(::palloc0, size); } + +char *gpdb::pstrdup(const char *str) { + return detail::wrap_throw(::pstrdup, str); +} + +char *gpdb::get_database_name(Oid dbid) noexcept { + return detail::wrap_noexcept(::get_database_name, dbid); +} + +bool gpdb::split_identifier_string(char *rawstring, char separator, + List **namelist) noexcept { + return detail::wrap_noexcept(SplitIdentifierString, rawstring, separator, + namelist); +} + +ExplainState gpdb::get_explain_state(QueryDesc *query_desc, + bool costs) noexcept { + return detail::wrap_noexcept([&]() { + ExplainState es; + ExplainInitState(&es); + es.costs = costs; + es.verbose = true; + es.format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(&es); + ExplainPrintPlan(&es, query_desc); + ExplainEndOutput(&es); + return es; + }); +} + +ExplainState gpdb::get_analyze_state_json(QueryDesc *query_desc, + bool analyze) noexcept { + return detail::wrap_noexcept([&]() { + ExplainState es; + ExplainInitState(&es); + es.analyze = analyze; + es.verbose = true; + es.buffers = es.analyze; + es.timing = es.analyze; + es.summary = es.analyze; + es.format = EXPLAIN_FORMAT_JSON; + ExplainBeginOutput(&es); + if (analyze) { + ExplainPrintPlan(&es, query_desc); + ExplainPrintExecStatsEnd(&es, query_desc); + } + ExplainEndOutput(&es); + return es; + }); +} + +Instrumentation *gpdb::instr_alloc(size_t n, int instrument_options) { + return detail::wrap_throw(InstrAlloc, n, instrument_options); +} + +HeapTuple gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, + bool *isnull) { + if (!tupleDescriptor || !values || !isnull) + throw std::runtime_error( + "Invalid input parameters for heap tuple formation"); + + return detail::wrap_throw(::heap_form_tuple, tupleDescriptor, values, isnull); +} + +void gpdb::pfree(void *pointer) noexcept { + // Note that ::pfree asserts that pointer != NULL. + if (!pointer) + return; + + detail::wrap_noexcept(::pfree, pointer); +} + +MemoryContext gpdb::mem_ctx_switch_to(MemoryContext context) noexcept { + return MemoryContextSwitchTo(context); +} + +const char *gpdb::get_config_option(const char *name, bool missing_ok, + bool restrict_superuser) noexcept { + if (!name) + return nullptr; + + return detail::wrap_noexcept(GetConfigOption, name, missing_ok, + restrict_superuser); +} + +void gpdb::list_free(List *list) noexcept { + if (!list) + return; + + detail::wrap_noexcept(::list_free, list); +} + +CdbExplain_ShowStatCtx * +gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, + instr_time starttime) { + if (!query_desc) + throw std::runtime_error("Invalid query descriptor"); + + return detail::wrap_throw(::cdbexplain_showExecStatsBegin, query_desc, + starttime); +} + +void gpdb::instr_end_loop(Instrumentation *instr) { + if (!instr) + throw std::runtime_error("Invalid instrumentation pointer"); + + detail::wrap_throw(::InstrEndLoop, instr); +} + +char *gpdb::gen_normquery(const char *query) { + return detail::wrap_throw(::gen_normquery, query); +} + +StringInfo gpdb::gen_normplan(const char *exec_plan) { + if (!exec_plan) + throw std::runtime_error("Invalid execution plan string"); + + return detail::wrap_throw(::gen_normplan, exec_plan); +} + +char *gpdb::get_rg_name_for_id(Oid group_id) { + return detail::wrap_throw(GetResGroupNameForId, group_id); +} + +Oid gpdb::get_rg_id_by_session_id(int session_id) { + return detail::wrap_throw(ResGroupGetGroupIdBySessionId, session_id); +} \ No newline at end of file diff --git a/src/memory/gpdbwrappers.h b/src/memory/gpdbwrappers.h new file mode 100644 index 00000000000..437a5dd5d29 --- /dev/null +++ b/src/memory/gpdbwrappers.h @@ -0,0 +1,131 @@ +#pragma once + +extern "C" { +#include "postgres.h" +#include "nodes/pg_list.h" +#include "commands/explain.h" +#include "executor/instrument.h" +#include "access/htup.h" +#include "utils/elog.h" +#include "utils/memutils.h" +} + +#include +#include +#include +#include +#include + +namespace gpdb { +namespace detail { + +template +auto wrap(Func &&func, Args &&...args) noexcept(!Throws) + -> decltype(func(std::forward(args)...)) { + + using RetType = decltype(func(std::forward(args)...)); + + // Empty struct for void return type. + struct VoidResult {}; + using ResultHolder = std::conditional_t, VoidResult, + std::optional>; + + bool success; + ErrorData *edata; + ResultHolder result_holder; + + PG_TRY(); + { + if constexpr (!std::is_void_v) { + result_holder.emplace(func(std::forward(args)...)); + } else { + func(std::forward(args)...); + } + edata = NULL; + success = true; + } + PG_CATCH(); + { + MemoryContext oldctx = MemoryContextSwitchTo(TopMemoryContext); + edata = CopyErrorData(); + MemoryContextSwitchTo(oldctx); + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) { + std::string err; + if (edata && edata->message) { + err = std::string(edata->message); + } else { + err = "Unknown error occurred"; + } + + if (edata) { + FreeErrorData(edata); + } + + if constexpr (Throws) { + throw std::runtime_error(err); + } + + if constexpr (!std::is_void_v) { + return RetType{}; + } else { + return; + } + } + + if constexpr (!std::is_void_v) { + return *std::move(result_holder); + } else { + return; + } +} + +template +auto wrap_throw(Func &&func, Args &&...args) + -> decltype(func(std::forward(args)...)) { + return detail::wrap(std::forward(func), + std::forward(args)...); +} + +template +auto wrap_noexcept(Func &&func, Args &&...args) noexcept + -> decltype(func(std::forward(args)...)) { + return detail::wrap(std::forward(func), + std::forward(args)...); +} +} // namespace detail + +// Functions that call palloc(). +// Make sure correct memory context is set. +void *palloc(Size size); +void *palloc0(Size size); +char *pstrdup(const char *str); +char *get_database_name(Oid dbid) noexcept; +bool split_identifier_string(char *rawstring, char separator, + List **namelist) noexcept; +ExplainState get_explain_state(QueryDesc *query_desc, bool costs) noexcept; +ExplainState get_analyze_state_json(QueryDesc *query_desc, + bool analyze) noexcept; +Instrumentation *instr_alloc(size_t n, int instrument_options); +HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, + bool *isnull); +CdbExplain_ShowStatCtx *cdbexplain_showExecStatsBegin(QueryDesc *query_desc, + instr_time starttime); +void instr_end_loop(Instrumentation *instr); +char *gen_normquery(const char *query); +StringInfo gen_normplan(const char *executionPlan); +char *get_rg_name_for_id(Oid group_id); + +// Palloc-free functions. +void pfree(void *pointer) noexcept; +MemoryContext mem_ctx_switch_to(MemoryContext context) noexcept; +const char *get_config_option(const char *name, bool missing_ok, + bool restrict_superuser) noexcept; +void list_free(List *list) noexcept; +Oid get_rg_id_by_session_id(int session_id); + +} // namespace gpdb diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.h b/src/stat_statements_parser/pg_stat_statements_ya_parser.h index aa9cd217e31..b08e8533992 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.h +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.h @@ -8,9 +8,9 @@ extern "C" extern void stat_statements_parser_init(void); extern void stat_statements_parser_deinit(void); +StringInfo gen_normplan(const char *executionPlan); +char *gen_normquery(const char *query); + #ifdef __cplusplus } #endif - -StringInfo gen_normplan(const char *executionPlan); -char *gen_normquery(const char *query); \ No newline at end of file From 972192dbdbdf7f56f1647b805fe9460fa0d25e6b Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 14 Jul 2025 16:14:49 +0300 Subject: [PATCH 33/49] [yagp_hooks_collector] Add utility statement tracking and metrics documentation Hook into ProcessUtility to emit submit and done events for DDL. Add metrics documentation (metric.md). Change namespace to avoid GPOS conflicts. Report incomplete queries at extension shutdown. Clean up stray files. --- metric.md | 125 +++++++++++ src/Config.cpp | 12 +- src/EventSender.cpp | 407 ++++++++++++++++++++---------------- src/EventSender.h | 94 +++++++-- src/PgUtils.cpp | 10 +- src/ProtoUtils.cpp | 22 +- src/UDSConnector.cpp | 4 +- src/hook_wrappers.cpp | 2 +- src/memory/gpdbwrappers.cpp | 163 +++++++++++---- src/memory/gpdbwrappers.h | 85 +------- 10 files changed, 572 insertions(+), 352 deletions(-) create mode 100644 metric.md diff --git a/metric.md b/metric.md new file mode 100644 index 00000000000..2d198391a67 --- /dev/null +++ b/metric.md @@ -0,0 +1,125 @@ +## YAGP Hooks Collector Metrics + +### States +A Postgres process goes through 4 executor functions to execute a query: +1) `ExecutorStart()` - resource allocation for the query. +2) `ExecutorRun()` - query execution. +3) `ExecutorFinish()` - cleanup. +4) `ExecutorEnd()` - cleanup. + +yagp-hooks-collector sends messages with 4 states, from _Dispatcher_ and/or _Execute_ processes: `submit`, `start`, `end`, `done`, in this order: +``` +submit -> ExecutorStart() -> start -> ExecutorRun() -> ExecutorFinish() -> end -> ExecutorEnd() -> done +``` + +### Key Points +- Some queries may skip the _end_ state, then the _end_ statistics is sent during _done_. +- If a query finishes with an error (`METRICS_QUERY_ERROR`), or is cancelled (`METRICS_QUERY_CANCELLED`), statistics is sent at _done_. +- Some statistics is calculated as the difference between the current global metric and the previous. The initial snapshot is taken at submit, and at _end_/_done_ the diff is calculated. +- Nested queries on _Dispatcher_ become top-level on _Execute_. +- Each process (_Dispatcher_/_Execute_) sends its own statistics. + +### Notations +- **S** = Submit event. +- **T** = Start event. +- **E** = End event. +- **D** = Done event. +- **DIFF** = current_value - submit_value (submit event). +- **ABS** = Absolute value, or where diff is not applicable, the value taken. +- **Local*** - Statistics that starts counting from zero for each new query. A nested query is also considered new. + +### Statistics Table + +| Proto Field | Type | When | DIFF/ABS | Local* | Scope | Dispatcher | Execute | Units | Notes | +| :--------------------------- | :----- | :------ | :------- | ------ | :------ | :--------: | :-----: | :------ | :-------------------------------------------------- | +| **SystemStat** | | | | | | | | | | +| `runningTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | Wall clock time | +| `userTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | /proc/pid/stat utime | +| `kernelTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | /proc/pid/stat stime | +| `vsize` | uint64 | E, D | ABS | - | Node | + | + | pages | /proc/pid/stat vsize | +| `rss` | uint64 | E, D | ABS | - | Node | + | + | pages | /proc/pid/stat rss | +| `VmSizeKb` | uint64 | E, D | ABS | - | Node | + | + | KB | /proc/pid/status VmSize | +| `VmPeakKb` | uint64 | E, D | ABS | - | Node | + | + | KB | /proc/pid/status VmPeak | +| `rchar` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io rchar | +| `wchar` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io wchar | +| `syscr` | uint64 | E, D | DIFF | - | Node | + | + | count | /proc/pid/io syscr | +| `syscw` | uint64 | E, D | DIFF | - | Node | + | + | count | /proc/pid/io syscw | +| `read_bytes` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io read_bytes | +| `write_bytes` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io write_bytes | +| `cancelled_write_bytes` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io cancelled_write_bytes | +| **MetricInstrumentation** | | | | | | | | | | +| `ntuples` | uint64 | E, D | ABS | + | Node | + | + | tuples | Accumulated total tuples | +| `nloops` | uint64 | E, D | ABS | + | Node | + | + | count | Number of cycles | +| `tuplecount` | uint64 | E, D | ABS | + | Node | + | + | tuples | Accumulated tuples per cycle | +| `firsttuple` | double | E, D | ABS | + | Node | + | + | seconds | Time for first tuple of this cycle | +| `startup` | double | E, D | ABS | + | Node | + | + | seconds | Start time of current iteration | +| `total` | double | E, D | ABS | + | Node | + | + | seconds | Total time taken | +| `shared_blks_hit` | uint64 | E, D | ABS | + | Node | + | + | blocks | Shared buffer blocks found in cache | +| `shared_blks_read` | uint64 | E, D | ABS | + | Node | + | + | blocks | Shared buffer blocks read from disk | +| `shared_blks_dirtied` | uint64 | E, D | ABS | + | Node | + | + | blocks | Shared blocks dirtied | +| `shared_blks_written` | uint64 | E, D | ABS | + | Node | + | + | blocks | Dirty shared buffer blocks written to disk | +| `local_blks_hit` | uint64 | E, D | ABS | + | Node | + | + | blocks | Local buffer hits | +| `local_blks_read` | uint64 | E, D | ABS | + | Node | + | + | blocks | Disk blocks read | +| `local_blks_dirtied` | uint64 | E, D | ABS | + | Node | + | + | blocks | Local blocks dirtied | +| `local_blks_written` | uint64 | E, D | ABS | + | Node | + | + | blocks | Local blocks written to disk | +| `temp_blks_read` | uint64 | E, D | ABS | + | Node | + | + | blocks | Temp file blocks read | +| `temp_blks_written` | uint64 | E, D | ABS | + | Node | + | + | blocks | Temp file blocks written | +| `blk_read_time` | double | E, D | ABS | + | Node | + | + | seconds | Time reading data blocks | +| `blk_write_time` | double | E, D | ABS | + | Node | + | + | seconds | Time writing data blocks | +| `inherited_calls` | uint64 | E, D | ABS | - | Node | + | + | count | Nested query count (YAGPCC-specific) | +| `inherited_time` | double | E, D | ABS | - | Node | + | + | seconds | Nested query time (YAGPCC-specific) | +| **NetworkStat (sent)** | | | | | | | | | | +| `sent.total_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes sent, including headers | +| `sent.tuple_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes of pure tuple-data sent | +| `sent.chunks` | uint32 | D | ABS | - | Node | + | + | count | Tuple-chunks sent | +| **NetworkStat (received)** | | | | | | | | | | +| `received.total_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes of pure tuple-data received | +| `received.tuple_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes of pure tuple-data received | +| `received.chunks` | uint32 | D | ABS | - | Node | + | + | count | Tuple-chunks received | +| **InterconnectStat** | | | | | | | | | | +| `total_recv_queue_size` | uint64 | D | DIFF | - | Node | + | + | bytes | Receive queue size sum | +| `recv_queue_size_counting_t` | uint64 | D | DIFF | - | Node | + | + | count | Counting times when computing total_recv_queue_size | +| `total_capacity` | uint64 | D | DIFF | - | Node | + | + | bytes | the capacity sum for sent packets | +| `capacity_counting_time` | uint64 | D | DIFF | - | Node | + | + | count | counting times used to compute total_capacity | +| `total_buffers` | uint64 | D | DIFF | - | Node | + | + | count | Available buffers | +| `buffer_counting_time` | uint64 | D | DIFF | - | Node | + | + | count | counting times when compute total_buffers | +| `active_connections_num` | uint64 | D | DIFF | - | Node | + | + | count | Active connections | +| `retransmits` | int64 | D | DIFF | - | Node | + | + | count | Packet retransmits | +| `startup_cached_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Startup cached packets | +| `mismatch_num` | int64 | D | DIFF | - | Node | + | + | count | Mismatched packets received | +| `crc_errors` | int64 | D | DIFF | - | Node | + | + | count | CRC errors | +| `snd_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Packets sent | +| `recv_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Packets received | +| `disordered_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Out-of-order packets | +| `duplicated_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Duplicate packets | +| `recv_ack_num` | int64 | D | DIFF | - | Node | + | + | count | ACKs received | +| `status_query_msg_num` | int64 | D | DIFF | - | Node | + | + | count | Status query messages sent | +| **SpillInfo** | | | | | | | | | | +| `fileCount` | int32 | E, D | DIFF | - | Node | + | + | count | Spill (temp) files created | +| `totalBytes` | int64 | E, D | DIFF | - | Node | + | + | bytes | Spill bytes written | +| **QueryInfo** | | | | | | | | | | +| `generator` | enum | T, E, D | ABS | - | Cluster | + | - | enum | Planner/Optimizer | +| `query_id` | uint64 | T, E, D | ABS | - | Cluster | + | - | id | Query ID | +| `plan_id` | uint64 | T, E, D | ABS | - | Cluster | + | - | id | Hash of normalized plan | +| `query_text` | string | S | ABS | - | Cluster | + | - | text | Query text | +| `plan_text` | string | T | ABS | - | Cluster | + | - | text | EXPLAIN text | +| `template_query_text` | string | S | ABS | - | Cluster | + | - | text | Normalized query text | +| `template_plan_text` | string | T | ABS | - | Cluster | + | - | text | Normalized plan text | +| `userName` | string | All | ABS | - | Cluster | + | - | text | Session user | +| `databaseName` | string | All | ABS | - | Cluster | + | - | text | Database name | +| `rsgname` | string | All | ABS | - | Cluster | + | - | text | Resource group name | +| `analyze_text` | string | D | ABS | - | Cluster | + | - | text | EXPLAIN ANALYZE JSON | +| **AdditionalQueryInfo** | | | | | | | | | | +| `nested_level` | int64 | All | ABS | - | Node | + | + | count | Current nesting level | +| `error_message` | string | D | ABS | - | Node | + | + | text | Error message | +| `slice_id` | int64 | All | ABS | - | Node | + | + | id | Slice ID | +| **QueryKey** | | | | | | | | | | +| `tmid` | int32 | All | ABS | - | Node | + | + | id | Time ID | +| `ssid` | int32 | All | ABS | - | Node | + | + | id | Session ID | +| `ccnt` | int32 | All | ABS | - | Node | + | + | count | Command counter | +| **SegmentKey** | | | | | | | | | | +| `dbid` | int32 | All | ABS | - | Node | + | + | id | Database ID | +| `segment_index` | int32 | All | ABS | - | Node | + | + | id | Segment index (-1=coordinator) | + +--- + diff --git a/src/Config.cpp b/src/Config.cpp index a1289a48891..aef09fc7d73 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -29,15 +29,15 @@ static void update_ignored_users(const char *new_guc_ignored_users) { std::make_unique>(); if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { /* Need a modifiable copy of string */ - char *rawstring = gpdb::pstrdup(new_guc_ignored_users); + char *rawstring = ya_gpdb::pstrdup(new_guc_ignored_users); List *elemlist; ListCell *l; /* Parse string into list of identifiers */ - if (!gpdb::split_identifier_string(rawstring, ',', &elemlist)) { + if (!ya_gpdb::split_identifier_string(rawstring, ',', &elemlist)) { /* syntax error in list */ - gpdb::pfree(rawstring); - gpdb::list_free(elemlist); + ya_gpdb::pfree(rawstring); + ya_gpdb::list_free(elemlist); ereport( LOG, (errcode(ERRCODE_SYNTAX_ERROR), @@ -48,8 +48,8 @@ static void update_ignored_users(const char *new_guc_ignored_users) { foreach (l, elemlist) { new_ignored_users_set->insert((char *)lfirst(l)); } - gpdb::pfree(rawstring); - gpdb::list_free(elemlist); + ya_gpdb::pfree(rawstring); + ya_gpdb::list_free(elemlist); } ignored_users_set = std::move(new_ignored_users_set); } diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 8711c4cbd4f..133d409b574 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -8,6 +8,7 @@ extern "C" { #include "executor/executor.h" #include "utils/elog.h" +#include "utils/guc.h" #include "cdb/cdbexplain.h" #include "cdb/cdbvars.h" @@ -27,6 +28,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { return; } + auto *query_desc = reinterpret_cast(arg); switch (status) { case METRICS_PLAN_NODE_INITIALIZE: case METRICS_PLAN_NODE_EXECUTING: @@ -34,8 +36,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { // TODO break; case METRICS_QUERY_SUBMIT: - // don't collect anything here. We will fake this call in ExecutorStart as - // it really makes no difference. Just complicates things + collect_query_submit(query_desc); break; case METRICS_QUERY_START: // no-op: executor_after_start is enough @@ -49,7 +50,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { case METRICS_QUERY_ERROR: case METRICS_QUERY_CANCELED: case METRICS_INNER_QUERY_DONE: - collect_query_done(reinterpret_cast(arg), status); + collect_query_done(query_desc, status); break; default: ereport(FATAL, (errmsg("Unknown query status: %d", status))); @@ -60,15 +61,15 @@ void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { if (!connector) { return; } - if (is_top_level_query(query_desc, nesting_level)) { - nested_timing = 0; - nested_calls = 0; + if (filter_query(query_desc)) { + return; + } + if (!qdesc_submitted(query_desc)) { + collect_query_submit(query_desc); } - Config::sync(); if (!need_collect(query_desc, nesting_level)) { return; } - collect_query_submit(query_desc); if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze() && (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0) { query_desc->instrument_options |= INSTRUMENT_BUFFERS; @@ -80,167 +81,194 @@ void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { instr_time starttime; INSTR_TIME_SET_CURRENT(starttime); query_desc->showstatctx = - gpdb::cdbexplain_showExecStatsBegin(query_desc, starttime); + ya_gpdb::cdbexplain_showExecStatsBegin(query_desc, starttime); } } } } void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { - if (!connector) { + if (!connector || !need_collect(query_desc, nesting_level)) { return; } - if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { - if (!filter_query(query_desc)) { - auto *query = get_query_message(query_desc); - auto query_msg = query->message; - *query_msg->mutable_start_time() = current_ts(); - if (!nesting_is_valid(query_desc, nesting_level)) { - return; - } - update_query_state(query_desc, query, QueryState::START); - set_query_plan(query_msg, query_desc); - if (need_collect_analyze()) { - // Set up to track total elapsed time during query run. - // Make sure the space is allocated in the per-query - // context so it will go away at executor_end. - if (query_desc->totaltime == NULL) { - MemoryContext oldcxt = - gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - query_desc->totaltime = gpdb::instr_alloc(1, INSTRUMENT_ALL); - gpdb::mem_ctx_switch_to(oldcxt); - } - } - yagpcc::GPMetrics stats; - std::swap(stats, *query_msg->mutable_query_metrics()); - if (connector->report_query(*query_msg, "started")) { - clear_big_fields(query_msg); - } - std::swap(stats, *query_msg->mutable_query_metrics()); + if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + return; + } + auto &query = get_query(query_desc); + auto query_msg = query.message.get(); + *query_msg->mutable_start_time() = current_ts(); + update_query_state(query, QueryState::START); + set_query_plan(query_msg, query_desc); + if (need_collect_analyze()) { + // Set up to track total elapsed time during query run. + // Make sure the space is allocated in the per-query + // context so it will go away at executor_end. + if (query_desc->totaltime == NULL) { + MemoryContext oldcxt = + ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + query_desc->totaltime = ya_gpdb::instr_alloc(1, INSTRUMENT_ALL); + ya_gpdb::mem_ctx_switch_to(oldcxt); } } + yagpcc::GPMetrics stats; + std::swap(stats, *query_msg->mutable_query_metrics()); + if (connector->report_query(*query_msg, "started")) { + clear_big_fields(query_msg); + } + std::swap(stats, *query_msg->mutable_query_metrics()); } void EventSender::executor_end(QueryDesc *query_desc) { - if (!connector || - (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE)) { + if (!connector || !need_collect(query_desc, nesting_level)) { return; } - if (!filter_query(query_desc)) { - auto *query = get_query_message(query_desc); - auto query_msg = query->message; - *query_msg->mutable_end_time() = current_ts(); - if (nesting_is_valid(query_desc, nesting_level)) { - if (query->state == UNKNOWN && - // Yet another greenplum weirdness: thats actually a nested query - // which is being committed/rollbacked. Treat it accordingly. - !need_report_nested_query()) { - return; - } - update_query_state(query_desc, query, QueryState::END); - if (is_top_level_query(query_desc, nesting_level)) { - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, - nested_calls, nested_timing); - } else { - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); - } - if (connector->report_query(*query_msg, "ended")) { - clear_big_fields(query_msg); - } - } + if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + return; + } + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); + *query_msg->mutable_end_time() = current_ts(); + update_query_state(query, QueryState::END); + if (is_top_level_query(query_desc, nesting_level)) { + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, nested_calls, + nested_timing); + } else { + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); + } + if (connector->report_query(*query_msg, "ended")) { + clear_big_fields(query_msg); } } void EventSender::collect_query_submit(QueryDesc *query_desc) { - if (connector && need_collect(query_desc, nesting_level)) { - auto *query = get_query_message(query_desc); - query->state = QueryState::SUBMIT; - auto query_msg = query->message; - *query_msg = create_query_req(yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); - *query_msg->mutable_submit_time() = current_ts(); - set_query_info(query_msg); - set_qi_nesting_level(query_msg, query_desc->gpmon_pkt->u.qexec.key.tmid); - set_qi_slice_id(query_msg); - set_query_text(query_msg, query_desc); - if (connector->report_query(*query_msg, "submit")) { - clear_big_fields(query_msg); - } - // take initial metrics snapshot so that we can safely take diff afterwards - // in END or DONE events. - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); + if (!connector) { + return; + } + Config::sync(); + // Register qkey for a nested query we won't report, + // so we can detect nesting_level > 0 and skip reporting at end/done. + if (!need_report_nested_query() && nesting_level > 0) { + QueryKey::register_qkey(query_desc, nesting_level); + return; + } + if (is_top_level_query(query_desc, nesting_level)) { + nested_timing = 0; + nested_calls = 0; + } + if (!need_collect(query_desc, nesting_level)) { + return; + } + submit_query(query_desc); + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); + *query_msg = create_query_req(yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); + *query_msg->mutable_submit_time() = current_ts(); + set_query_info(query_msg); + set_qi_nesting_level(query_msg, nesting_level); + set_qi_slice_id(query_msg); + set_query_text(query_msg, query_desc); + if (connector->report_query(*query_msg, "submit")) { + clear_big_fields(query_msg); + } + // take initial metrics snapshot so that we can safely take diff afterwards + // in END or DONE events. + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); #ifdef IC_TEARDOWN_HOOK - // same for interconnect statistics - ic_metrics_collect(); - set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), - &ic_statistics); + // same for interconnect statistics + ic_metrics_collect(); + set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), + &ic_statistics); #endif +} + +void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, + QueryMetricsStatus status) { + yagpcc::QueryStatus query_status; + std::string msg; + switch (status) { + case METRICS_QUERY_DONE: + case METRICS_INNER_QUERY_DONE: + query_status = yagpcc::QueryStatus::QUERY_STATUS_DONE; + msg = "done"; + break; + case METRICS_QUERY_ERROR: + query_status = yagpcc::QueryStatus::QUERY_STATUS_ERROR; + msg = "error"; + break; + case METRICS_QUERY_CANCELING: + // at the moment we don't track this event, but I`ll leave this code + // here just in case + Assert(false); + query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELLING; + msg = "cancelling"; + break; + case METRICS_QUERY_CANCELED: + query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELED; + msg = "cancelled"; + break; + default: + ereport(FATAL, + (errmsg("Unexpected query status in query_done hook: %d", status))); } + auto prev_state = query.state; + update_query_state(query, QueryState::DONE, + query_status == yagpcc::QueryStatus::QUERY_STATUS_DONE); + auto query_msg = query.message.get(); + query_msg->set_query_status(query_status); + if (status == METRICS_QUERY_ERROR) { + set_qi_error_message(query_msg); + } + if (prev_state == START) { + // We've missed ExecutorEnd call due to query cancel or error. It's + // fine, but now we need to collect and report execution stats + *query_msg->mutable_end_time() = current_ts(); + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, nested_calls, + nested_timing); + } +#ifdef IC_TEARDOWN_HOOK + ic_metrics_collect(); + set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), + &ic_statistics); +#endif + connector->report_query(*query_msg, msg); } void EventSender::collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status) { - if (connector && !filter_query(query_desc)) { - auto *query = get_query_message(query_desc); - if (query->state != UNKNOWN || need_report_nested_query()) { - if (nesting_is_valid(query_desc, nesting_level)) { - yagpcc::QueryStatus query_status; - std::string msg; - switch (status) { - case METRICS_QUERY_DONE: - case METRICS_INNER_QUERY_DONE: - query_status = yagpcc::QueryStatus::QUERY_STATUS_DONE; - msg = "done"; - break; - case METRICS_QUERY_ERROR: - query_status = yagpcc::QueryStatus::QUERY_STATUS_ERROR; - msg = "error"; - break; - case METRICS_QUERY_CANCELING: - // at the moment we don't track this event, but I`ll leave this code - // here just in case - Assert(false); - query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELLING; - msg = "cancelling"; - break; - case METRICS_QUERY_CANCELED: - query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELED; - msg = "cancelled"; - break; - default: - ereport(FATAL, - (errmsg("Unexpected query status in query_done hook: %d", - status))); - } - auto prev_state = query->state; - update_query_state(query_desc, query, QueryState::DONE, - query_status == - yagpcc::QueryStatus::QUERY_STATUS_DONE); - auto query_msg = query->message; - query_msg->set_query_status(query_status); - if (status == METRICS_QUERY_ERROR) { - set_qi_error_message(query_msg); - } - if (prev_state == START) { - // We've missed ExecutorEnd call due to query cancel or error. It's - // fine, but now we need to collect and report execution stats - *query_msg->mutable_end_time() = current_ts(); - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, - nested_calls, nested_timing); - } -#ifdef IC_TEARDOWN_HOOK - ic_metrics_collect(); - set_ic_stats( - query_msg->mutable_query_metrics()->mutable_instrumentation(), - &ic_statistics); -#endif - connector->report_query(*query_msg, msg); - } - update_nested_counters(query_desc); + if (!connector || !need_collect(query_desc, nesting_level)) { + return; + } + + // Skip sending done message if query errored before submit. + if (!qdesc_submitted(query_desc)) { + if (status != METRICS_QUERY_ERROR) { + ereport(WARNING, (errmsg("YAGPCC trying to process DONE hook for " + "unsubmitted and unerrored query"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); } - query_msgs.erase({query_desc->gpmon_pkt->u.qexec.key.ccnt, - query_desc->gpmon_pkt->u.qexec.key.tmid}); - gpdb::pfree(query_desc->gpmon_pkt); + return; + } + + if (queries.empty()) { + ereport(WARNING, (errmsg("YAGPCC cannot find query to process DONE hook"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + return; } + auto &query = get_query(query_desc); + + bool report = need_report_nested_query() || + is_top_level_query(query_desc, nesting_level); + if (report) + report_query_done(query_desc, query, status); + + if (need_report_nested_query()) + update_nested_counters(query_desc); + + queries.erase(QueryKey::from_qdesc(query_desc)); + pfree(query_desc->yagp_query_key); + query_desc->yagp_query_key = NULL; } void EventSender::ic_metrics_collect() { @@ -283,20 +311,15 @@ void EventSender::analyze_stats_collect(QueryDesc *query_desc) { if (!need_collect(query_desc, nesting_level)) { return; } - auto query = get_query_message(query_desc); - auto query_msg = query->message; + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); *query_msg->mutable_end_time() = current_ts(); - // Yet another greenplum weirdness: thats actually a nested query - // which is being committed/rollbacked. Treat it accordingly. - if (query->state == UNKNOWN && !need_report_nested_query()) { - return; - } if (!query_desc->totaltime || !need_collect_analyze()) { return; } // Make sure stats accumulation is done. // (Note: it's okay if several levels of hook all do this.) - gpdb::instr_end_loop(query_desc->totaltime); + ya_gpdb::instr_end_loop(query_desc->totaltime); double ms = query_desc->totaltime->total * 1000.0; if (ms >= Config::min_analyze_time()) { @@ -318,26 +341,26 @@ EventSender::EventSender() { } EventSender::~EventSender() { - delete connector; - for (auto iter = query_msgs.begin(); iter != query_msgs.end(); ++iter) { - delete iter->second.message; + for (const auto &[qkey, _] : queries) { + ereport(LOG, + (errmsg("YAGPCC query with missing done event: " + "tmid=%d ssid=%d ccnt=%d nlvl=%d", + qkey.tmid, qkey.ssid, qkey.ccnt, qkey.nesting_level))); } + delete connector; } // That's basically a very simplistic state machine to fix or highlight any bugs // coming from GP -void EventSender::update_query_state(QueryDesc *query_desc, QueryItem *query, - QueryState new_state, bool success) { - if (query->state == UNKNOWN) { - collect_query_submit(query_desc); - } +void EventSender::update_query_state(QueryItem &query, QueryState new_state, + bool success) { switch (new_state) { case QueryState::SUBMIT: Assert(false); break; case QueryState::START: - if (query->state == QueryState::SUBMIT) { - query->message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); + if (query.state == QueryState::SUBMIT) { + query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); } else { Assert(false); } @@ -346,40 +369,52 @@ void EventSender::update_query_state(QueryDesc *query_desc, QueryItem *query, // Example of below assert triggering: CURSOR closes before ever being // executed Assert(query->state == QueryState::START || // IsAbortInProgress()); - query->message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); + query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); break; case QueryState::DONE: - Assert(query->state == QueryState::END || !success); - query->message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); + Assert(query.state == QueryState::END || !success); + query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); break; default: Assert(false); } - query->state = new_state; + query.state = new_state; } -EventSender::QueryItem *EventSender::get_query_message(QueryDesc *query_desc) { - if (query_desc->gpmon_pkt == nullptr || - query_msgs.find({query_desc->gpmon_pkt->u.qexec.key.ccnt, - query_desc->gpmon_pkt->u.qexec.key.tmid}) == - query_msgs.end()) { - query_desc->gpmon_pkt = - (gpmon_packet_t *)gpdb::palloc0(sizeof(gpmon_packet_t)); - query_desc->gpmon_pkt->u.qexec.key.ccnt = gp_command_count; - query_desc->gpmon_pkt->u.qexec.key.tmid = nesting_level; - query_msgs.insert({{gp_command_count, nesting_level}, - QueryItem(UNKNOWN, new yagpcc::SetQueryReq())}); - } - return &query_msgs.at({query_desc->gpmon_pkt->u.qexec.key.ccnt, - query_desc->gpmon_pkt->u.qexec.key.tmid}); +EventSender::QueryItem &EventSender::get_query(QueryDesc *query_desc) { + if (!qdesc_submitted(query_desc)) { + ereport(WARNING, + (errmsg("YAGPCC attempting to get query that was not submitted"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + throw std::runtime_error("Attempting to get query that was not submitted"); + } + return queries.find(QueryKey::from_qdesc(query_desc))->second; +} + +void EventSender::submit_query(QueryDesc *query_desc) { + if (query_desc->yagp_query_key) { + ereport(WARNING, + (errmsg("YAGPCC trying to submit already submitted query"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + } + QueryKey::register_qkey(query_desc, nesting_level); + auto key = QueryKey::from_qdesc(query_desc); + auto [_, inserted] = queries.emplace(key, QueryItem(QueryState::SUBMIT)); + if (!inserted) { + ereport(WARNING, (errmsg("YAGPCC duplicate query submit detected"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + } } void EventSender::update_nested_counters(QueryDesc *query_desc) { if (!is_top_level_query(query_desc, nesting_level)) { - auto query_msg = get_query_message(query_desc); + auto &query = get_query(query_desc); nested_calls++; - double end_time = protots_to_double(query_msg->message->end_time()); - double start_time = protots_to_double(query_msg->message->start_time()); + double end_time = protots_to_double(query.message->end_time()); + double start_time = protots_to_double(query.message->start_time()); if (end_time >= start_time) { nested_timing += end_time - start_time; } else { @@ -391,6 +426,12 @@ void EventSender::update_nested_counters(QueryDesc *query_desc) { } } -EventSender::QueryItem::QueryItem(EventSender::QueryState st, - yagpcc::SetQueryReq *msg) - : state(st), message(msg) {} +bool EventSender::qdesc_submitted(QueryDesc *query_desc) { + if (query_desc->yagp_query_key == NULL) { + return false; + } + return queries.find(QueryKey::from_qdesc(query_desc)) != queries.end(); +} + +EventSender::QueryItem::QueryItem(QueryState st) + : message(std::make_unique()), state(st) {} diff --git a/src/EventSender.h b/src/EventSender.h index f3dd1d2a528..4071d580ff9 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -1,6 +1,8 @@ #pragma once +#include #include +#include #define typeid __typeid extern "C" { @@ -11,12 +13,75 @@ extern "C" { } #undef typeid +#include "memory/gpdbwrappers.h" + class UDSConnector; struct QueryDesc; namespace yagpcc { class SetQueryReq; } +#include + +struct QueryKey { + int tmid; + int ssid; + int ccnt; + int nesting_level; + uintptr_t query_desc_addr; + + bool operator==(const QueryKey &other) const { + return std::tie(tmid, ssid, ccnt, nesting_level, query_desc_addr) == + std::tie(other.tmid, other.ssid, other.ccnt, other.nesting_level, + other.query_desc_addr); + } + + static void register_qkey(QueryDesc *query_desc, size_t nesting_level) { + query_desc->yagp_query_key = + (YagpQueryKey *)ya_gpdb::palloc0(sizeof(YagpQueryKey)); + int32 tmid; + gpmon_gettmid(&tmid); + query_desc->yagp_query_key->tmid = tmid; + query_desc->yagp_query_key->ssid = gp_session_id; + query_desc->yagp_query_key->ccnt = gp_command_count; + query_desc->yagp_query_key->nesting_level = nesting_level; + query_desc->yagp_query_key->query_desc_addr = (uintptr_t)query_desc; + } + + static QueryKey from_qdesc(QueryDesc *query_desc) { + return { + .tmid = query_desc->yagp_query_key->tmid, + .ssid = query_desc->yagp_query_key->ssid, + .ccnt = query_desc->yagp_query_key->ccnt, + .nesting_level = query_desc->yagp_query_key->nesting_level, + .query_desc_addr = query_desc->yagp_query_key->query_desc_addr, + }; + } +}; + +// https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html +template inline void hash_combine(std::size_t &seed, const T &v) { + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + +namespace std { +template <> struct hash { + size_t operator()(const QueryKey &k) const noexcept { + size_t seed = hash{}(k.tmid); + hash_combine(seed, k.ssid); + hash_combine(seed, k.ccnt); + hash_combine(seed, k.nesting_level); + uintptr_t addr = k.query_desc_addr; + if constexpr (SIZE_MAX < UINTPTR_MAX) { + addr %= SIZE_MAX; + } + hash_combine(seed, addr); + return seed; + } +}; +} // namespace std + class EventSender { public: void executor_before_start(QueryDesc *query_desc, int eflags); @@ -31,30 +96,25 @@ class EventSender { ~EventSender(); private: - enum QueryState { UNKNOWN, SUBMIT, START, END, DONE }; + enum QueryState { SUBMIT, START, END, DONE }; struct QueryItem { - QueryState state = QueryState::UNKNOWN; - yagpcc::SetQueryReq *message = nullptr; + std::unique_ptr message; + QueryState state; - QueryItem(QueryState st, yagpcc::SetQueryReq *msg); - }; - - struct pair_hash { - std::size_t operator()(const std::pair &p) const { - auto h1 = std::hash{}(p.first); - auto h2 = std::hash{}(p.second); - return h1 ^ h2; - } + explicit QueryItem(QueryState st); }; - void update_query_state(QueryDesc *query_desc, QueryItem *query, - QueryState new_state, bool success = true); - QueryItem *get_query_message(QueryDesc *query_desc); + void update_query_state(QueryItem &query, QueryState new_state, + bool success = true); + QueryItem &get_query(QueryDesc *query_desc); + void submit_query(QueryDesc *query_desc); void collect_query_submit(QueryDesc *query_desc); + void report_query_done(QueryDesc *query_desc, QueryItem &query, + QueryMetricsStatus status); void collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status); - void cleanup_messages(); void update_nested_counters(QueryDesc *query_desc); + bool qdesc_submitted(QueryDesc *query_desc); UDSConnector *connector = nullptr; int nesting_level = 0; @@ -63,5 +123,5 @@ class EventSender { #ifdef IC_TEARDOWN_HOOK ICStatistics ic_statistics; #endif - std::unordered_map, QueryItem, pair_hash> query_msgs; + std::unordered_map queries; }; \ No newline at end of file diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index f36cd030a39..929f0cf2681 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -60,14 +60,14 @@ std::string get_rg_name() { */ bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { - return (query_desc->gpmon_pkt && - query_desc->gpmon_pkt->u.qexec.key.tmid == 0) || - nesting_level == 0; + if (query_desc->yagp_query_key == NULL) { + return nesting_level == 0; + } + return query_desc->yagp_query_key->nesting_level == 0; } bool nesting_is_valid(QueryDesc *query_desc, int nesting_level) { - return (Gp_session_role == GP_ROLE_DISPATCH && - Config::report_nested_queries()) || + return need_report_nested_query() || is_top_level_query(query_desc, nesting_level); } diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index 6dc39278bcd..4655433c806 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -58,21 +58,21 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); MemoryContext oldcxt = - gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - ExplainState es = gpdb::get_explain_state(query_desc, true); + ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + ExplainState es = ya_gpdb::get_explain_state(query_desc, true); if (es.str) { *qi->mutable_plan_text() = char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); - StringInfo norm_plan = gpdb::gen_normplan(es.str->data); + StringInfo norm_plan = ya_gpdb::gen_normplan(es.str->data); *qi->mutable_template_plan_text() = char_to_trimmed_str( norm_plan->data, norm_plan->len, Config::max_plan_size()); qi->set_plan_id( hash_any((unsigned char *)norm_plan->data, norm_plan->len)); qi->set_query_id(query_desc->plannedstmt->queryId); - gpdb::pfree(es.str->data); - gpdb::pfree(norm_plan->data); + ya_gpdb::pfree(es.str->data); + ya_gpdb::pfree(norm_plan->data); } - gpdb::mem_ctx_switch_to(oldcxt); + ya_gpdb::mem_ctx_switch_to(oldcxt); } } @@ -82,7 +82,7 @@ void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { *qi->mutable_query_text() = char_to_trimmed_str( query_desc->sourceText, strlen(query_desc->sourceText), Config::max_text_size()); - char *norm_query = gpdb::gen_normquery(query_desc->sourceText); + char *norm_query = ya_gpdb::gen_normquery(query_desc->sourceText); *qi->mutable_template_query_text() = char_to_trimmed_str( norm_query, strlen(norm_query), Config::max_text_size()); } @@ -234,10 +234,10 @@ void set_analyze_plan_text_json(QueryDesc *query_desc, return; } MemoryContext oldcxt = - gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - ExplainState es = gpdb::get_analyze_state_json( + ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + ExplainState es = ya_gpdb::get_analyze_state_json( query_desc, query_desc->instrument_options && Config::enable_analyze()); - gpdb::mem_ctx_switch_to(oldcxt); + ya_gpdb::mem_ctx_switch_to(oldcxt); if (es.str) { // Remove last line break. if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { @@ -251,6 +251,6 @@ void set_analyze_plan_text_json(QueryDesc *query_desc, auto trimmed_analyze = char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); req->mutable_query_info()->set_analyze_text(trimmed_analyze); - gpdb::pfree(es.str->data); + ya_gpdb::pfree(es.str->data); } } \ No newline at end of file diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index b5b70836db4..f8c4586126d 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -44,7 +44,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, if (connect(sockfd, (sockaddr *)&address, sizeof(address)) != -1) { auto data_size = req.ByteSize(); auto total_size = data_size + sizeof(uint32_t); - uint8_t *buf = (uint8_t *)gpdb::palloc(total_size); + uint8_t *buf = (uint8_t *)ya_gpdb::palloc(total_size); uint32_t *size_payload = (uint32_t *)buf; *size_payload = data_size; req.SerializeWithCachedSizesToArray(buf + sizeof(uint32_t)); @@ -67,7 +67,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, } else { YagpStat::report_send(total_size); } - gpdb::pfree(buf); + ya_gpdb::pfree(buf); } else { // log the error and go on log_tracing_failure(req, event); diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 25a85f086d1..d76b7c64e10 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -229,7 +229,7 @@ Datum yagp_functions_get(FunctionCallInfo fcinfo) { values[3] = Int64GetDatum(stats.failed_connects); values[4] = Int64GetDatum(stats.failed_other); values[5] = Int32GetDatum(stats.max_message_size); - HeapTuple tuple = gpdb::heap_form_tuple(tupdesc, values, nulls); + HeapTuple tuple = ya_gpdb::heap_form_tuple(tupdesc, values, nulls); Datum result = HeapTupleGetDatum(tuple); PG_RETURN_DATUM(result); } \ No newline at end of file diff --git a/src/memory/gpdbwrappers.cpp b/src/memory/gpdbwrappers.cpp index 1fba702a9f5..9d579a91a30 100644 --- a/src/memory/gpdbwrappers.cpp +++ b/src/memory/gpdbwrappers.cpp @@ -16,27 +16,104 @@ extern "C" { #include "stat_statements_parser/pg_stat_statements_ya_parser.h" } -void *gpdb::palloc(Size size) { return detail::wrap_throw(::palloc, size); } +namespace { -void *gpdb::palloc0(Size size) { return detail::wrap_throw(::palloc0, size); } +template +auto wrap(Func &&func, Args &&...args) noexcept(!Throws) + -> decltype(func(std::forward(args)...)) { -char *gpdb::pstrdup(const char *str) { - return detail::wrap_throw(::pstrdup, str); + using RetType = decltype(func(std::forward(args)...)); + + // Empty struct for void return type. + struct VoidResult {}; + using ResultHolder = std::conditional_t, VoidResult, + std::optional>; + + bool success; + ErrorData *edata; + ResultHolder result_holder; + + PG_TRY(); + { + if constexpr (!std::is_void_v) { + result_holder.emplace(func(std::forward(args)...)); + } else { + func(std::forward(args)...); + } + edata = NULL; + success = true; + } + PG_CATCH(); + { + MemoryContext oldctx = MemoryContextSwitchTo(TopMemoryContext); + edata = CopyErrorData(); + MemoryContextSwitchTo(oldctx); + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) { + std::string err; + if (edata && edata->message) { + err = std::string(edata->message); + } else { + err = "Unknown error occurred"; + } + + if (edata) { + FreeErrorData(edata); + } + + if constexpr (Throws) { + throw std::runtime_error(err); + } + + if constexpr (!std::is_void_v) { + return RetType{}; + } else { + return; + } + } + + if constexpr (!std::is_void_v) { + return *std::move(result_holder); + } else { + return; + } +} + +template +auto wrap_throw(Func &&func, Args &&...args) + -> decltype(func(std::forward(args)...)) { + return wrap(std::forward(func), std::forward(args)...); } -char *gpdb::get_database_name(Oid dbid) noexcept { - return detail::wrap_noexcept(::get_database_name, dbid); +template +auto wrap_noexcept(Func &&func, Args &&...args) noexcept + -> decltype(func(std::forward(args)...)) { + return wrap(std::forward(func), std::forward(args)...); +} +} // namespace + +void *ya_gpdb::palloc(Size size) { return wrap_throw(::palloc, size); } + +void *ya_gpdb::palloc0(Size size) { return wrap_throw(::palloc0, size); } + +char *ya_gpdb::pstrdup(const char *str) { return wrap_throw(::pstrdup, str); } + +char *ya_gpdb::get_database_name(Oid dbid) noexcept { + return wrap_noexcept(::get_database_name, dbid); } -bool gpdb::split_identifier_string(char *rawstring, char separator, - List **namelist) noexcept { - return detail::wrap_noexcept(SplitIdentifierString, rawstring, separator, - namelist); +bool ya_gpdb::split_identifier_string(char *rawstring, char separator, + List **namelist) noexcept { + return wrap_noexcept(SplitIdentifierString, rawstring, separator, namelist); } -ExplainState gpdb::get_explain_state(QueryDesc *query_desc, - bool costs) noexcept { - return detail::wrap_noexcept([&]() { +ExplainState ya_gpdb::get_explain_state(QueryDesc *query_desc, + bool costs) noexcept { + return wrap_noexcept([&]() { ExplainState es; ExplainInitState(&es); es.costs = costs; @@ -49,9 +126,9 @@ ExplainState gpdb::get_explain_state(QueryDesc *query_desc, }); } -ExplainState gpdb::get_analyze_state_json(QueryDesc *query_desc, - bool analyze) noexcept { - return detail::wrap_noexcept([&]() { +ExplainState ya_gpdb::get_analyze_state_json(QueryDesc *query_desc, + bool analyze) noexcept { + return wrap_noexcept([&]() { ExplainState es; ExplainInitState(&es); es.analyze = analyze; @@ -70,79 +147,77 @@ ExplainState gpdb::get_analyze_state_json(QueryDesc *query_desc, }); } -Instrumentation *gpdb::instr_alloc(size_t n, int instrument_options) { - return detail::wrap_throw(InstrAlloc, n, instrument_options); +Instrumentation *ya_gpdb::instr_alloc(size_t n, int instrument_options) { + return wrap_throw(InstrAlloc, n, instrument_options); } -HeapTuple gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, - bool *isnull) { +HeapTuple ya_gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, + bool *isnull) { if (!tupleDescriptor || !values || !isnull) throw std::runtime_error( "Invalid input parameters for heap tuple formation"); - return detail::wrap_throw(::heap_form_tuple, tupleDescriptor, values, isnull); + return wrap_throw(::heap_form_tuple, tupleDescriptor, values, isnull); } -void gpdb::pfree(void *pointer) noexcept { +void ya_gpdb::pfree(void *pointer) noexcept { // Note that ::pfree asserts that pointer != NULL. if (!pointer) return; - detail::wrap_noexcept(::pfree, pointer); + wrap_noexcept(::pfree, pointer); } -MemoryContext gpdb::mem_ctx_switch_to(MemoryContext context) noexcept { +MemoryContext ya_gpdb::mem_ctx_switch_to(MemoryContext context) noexcept { return MemoryContextSwitchTo(context); } -const char *gpdb::get_config_option(const char *name, bool missing_ok, - bool restrict_superuser) noexcept { +const char *ya_gpdb::get_config_option(const char *name, bool missing_ok, + bool restrict_superuser) noexcept { if (!name) return nullptr; - return detail::wrap_noexcept(GetConfigOption, name, missing_ok, - restrict_superuser); + return wrap_noexcept(GetConfigOption, name, missing_ok, restrict_superuser); } -void gpdb::list_free(List *list) noexcept { +void ya_gpdb::list_free(List *list) noexcept { if (!list) return; - detail::wrap_noexcept(::list_free, list); + wrap_noexcept(::list_free, list); } CdbExplain_ShowStatCtx * -gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, - instr_time starttime) { +ya_gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, + instr_time starttime) { if (!query_desc) throw std::runtime_error("Invalid query descriptor"); - return detail::wrap_throw(::cdbexplain_showExecStatsBegin, query_desc, - starttime); + return wrap_throw(::cdbexplain_showExecStatsBegin, query_desc, starttime); } -void gpdb::instr_end_loop(Instrumentation *instr) { +void ya_gpdb::instr_end_loop(Instrumentation *instr) { if (!instr) throw std::runtime_error("Invalid instrumentation pointer"); - detail::wrap_throw(::InstrEndLoop, instr); + wrap_throw(::InstrEndLoop, instr); } -char *gpdb::gen_normquery(const char *query) { - return detail::wrap_throw(::gen_normquery, query); +char *ya_gpdb::gen_normquery(const char *query) { + return wrap_throw(::gen_normquery, query); } -StringInfo gpdb::gen_normplan(const char *exec_plan) { +StringInfo ya_gpdb::gen_normplan(const char *exec_plan) { if (!exec_plan) throw std::runtime_error("Invalid execution plan string"); - return detail::wrap_throw(::gen_normplan, exec_plan); + return wrap_throw(::gen_normplan, exec_plan); } -char *gpdb::get_rg_name_for_id(Oid group_id) { - return detail::wrap_throw(GetResGroupNameForId, group_id); +char *ya_gpdb::get_rg_name_for_id(Oid group_id) { + return wrap_throw(GetResGroupNameForId, group_id); } -Oid gpdb::get_rg_id_by_session_id(int session_id) { - return detail::wrap_throw(ResGroupGetGroupIdBySessionId, session_id); +Oid ya_gpdb::get_rg_id_by_session_id(int session_id) { + return wrap_throw(ResGroupGetGroupIdBySessionId, session_id); } \ No newline at end of file diff --git a/src/memory/gpdbwrappers.h b/src/memory/gpdbwrappers.h index 437a5dd5d29..ad7ae96c362 100644 --- a/src/memory/gpdbwrappers.h +++ b/src/memory/gpdbwrappers.h @@ -16,88 +16,7 @@ extern "C" { #include #include -namespace gpdb { -namespace detail { - -template -auto wrap(Func &&func, Args &&...args) noexcept(!Throws) - -> decltype(func(std::forward(args)...)) { - - using RetType = decltype(func(std::forward(args)...)); - - // Empty struct for void return type. - struct VoidResult {}; - using ResultHolder = std::conditional_t, VoidResult, - std::optional>; - - bool success; - ErrorData *edata; - ResultHolder result_holder; - - PG_TRY(); - { - if constexpr (!std::is_void_v) { - result_holder.emplace(func(std::forward(args)...)); - } else { - func(std::forward(args)...); - } - edata = NULL; - success = true; - } - PG_CATCH(); - { - MemoryContext oldctx = MemoryContextSwitchTo(TopMemoryContext); - edata = CopyErrorData(); - MemoryContextSwitchTo(oldctx); - FlushErrorState(); - success = false; - } - PG_END_TRY(); - - if (!success) { - std::string err; - if (edata && edata->message) { - err = std::string(edata->message); - } else { - err = "Unknown error occurred"; - } - - if (edata) { - FreeErrorData(edata); - } - - if constexpr (Throws) { - throw std::runtime_error(err); - } - - if constexpr (!std::is_void_v) { - return RetType{}; - } else { - return; - } - } - - if constexpr (!std::is_void_v) { - return *std::move(result_holder); - } else { - return; - } -} - -template -auto wrap_throw(Func &&func, Args &&...args) - -> decltype(func(std::forward(args)...)) { - return detail::wrap(std::forward(func), - std::forward(args)...); -} - -template -auto wrap_noexcept(Func &&func, Args &&...args) noexcept - -> decltype(func(std::forward(args)...)) { - return detail::wrap(std::forward(func), - std::forward(args)...); -} -} // namespace detail +namespace ya_gpdb { // Functions that call palloc(). // Make sure correct memory context is set. @@ -128,4 +47,4 @@ const char *get_config_option(const char *name, bool missing_ok, void list_free(List *list) noexcept; Oid get_rg_id_by_session_id(int session_id); -} // namespace gpdb +} // namespace ya_gpdb From ed059a25f231588ed9882c9b56064cfafa2f8933 Mon Sep 17 00:00:00 2001 From: NJrslv <108277031+NJrslv@users.noreply.github.com> Date: Thu, 4 Sep 2025 13:26:16 +0300 Subject: [PATCH 34/49] [yagp_hooks_collector] Add regression tests, ANALYZE text output, and UTF-8 trimming Add PG-style regression tests. Enable sending EXPLAIN ANALYZE as text. Add utility statement hook coverage. Implement UTF-8 safe trimming: discard partial multi-byte characters at cut boundaries. Clean up stray gmon.out. --- expected/yagp_cursors.out | 165 +++++++++++ expected/yagp_dist.out | 177 ++++++++++++ expected/yagp_select.out | 138 +++++++++ expected/yagp_utf8_trim.out | 66 +++++ expected/yagp_utility.out | 272 ++++++++++++++++++ metric.md | 49 ++-- sql/yagp_cursors.sql | 83 ++++++ sql/yagp_dist.sql | 86 ++++++ sql/yagp_select.sql | 67 +++++ sql/yagp_utf8_trim.sql | 43 +++ sql/yagp_utility.sql | 133 +++++++++ src/Config.cpp | 36 ++- src/Config.h | 5 + src/EventSender.cpp | 191 +++++++----- src/EventSender.h | 18 +- src/PgUtils.cpp | 5 - src/PgUtils.h | 3 - src/ProtoUtils.cpp | 69 +++-- src/ProtoUtils.h | 5 +- src/UDSConnector.cpp | 3 +- src/UDSConnector.h | 4 +- src/hook_wrappers.cpp | 60 +++- src/hook_wrappers.h | 3 + src/log/LogOps.cpp | 131 +++++++++ src/log/LogOps.h | 19 ++ src/log/LogSchema.cpp | 135 +++++++++ src/log/LogSchema.h | 166 +++++++++++ src/memory/gpdbwrappers.cpp | 13 +- src/memory/gpdbwrappers.h | 8 +- src/yagp_hooks_collector.c | 14 +- yagp_hooks_collector--1.0--1.1.sql | 113 ++++++++ ...--1.0.sql => yagp_hooks_collector--1.0.sql | 2 +- yagp_hooks_collector--1.1.sql | 95 ++++++ yagp_hooks_collector.control | 2 +- 34 files changed, 2224 insertions(+), 155 deletions(-) create mode 100644 expected/yagp_cursors.out create mode 100644 expected/yagp_dist.out create mode 100644 expected/yagp_select.out create mode 100644 expected/yagp_utf8_trim.out create mode 100644 expected/yagp_utility.out create mode 100644 sql/yagp_cursors.sql create mode 100644 sql/yagp_dist.sql create mode 100644 sql/yagp_select.sql create mode 100644 sql/yagp_utf8_trim.sql create mode 100644 sql/yagp_utility.sql create mode 100644 src/log/LogOps.cpp create mode 100644 src/log/LogOps.h create mode 100644 src/log/LogSchema.cpp create mode 100644 src/log/LogSchema.h create mode 100644 yagp_hooks_collector--1.0--1.1.sql rename sql/yagp_hooks_collector--1.0.sql => yagp_hooks_collector--1.0.sql (99%) create mode 100644 yagp_hooks_collector--1.1.sql diff --git a/expected/yagp_cursors.out b/expected/yagp_cursors.out new file mode 100644 index 00000000000..9587c00b550 --- /dev/null +++ b/expected/yagp_cursors.out @@ -0,0 +1,165 @@ +CREATE EXTENSION yagp_hooks_collector; +CREATE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +-- DECLARE +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_0 CURSOR FOR SELECT 0; +CLOSE cursor_stats_0; +COMMIT; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_0; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_0; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(10 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- DECLARE WITH HOLD +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; +CLOSE cursor_stats_1; +DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; +CLOSE cursor_stats_2; +COMMIT; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_1; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_1; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_2; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_2; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(14 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- ROLLBACK +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_3 CURSOR FOR SELECT 1; +CLOSE cursor_stats_3; +DECLARE cursor_stats_4 CURSOR FOR SELECT 1; +ROLLBACK; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_3; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_3; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_DONE + -1 | ROLLBACK; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(12 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- FETCH +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; +DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; +FETCH 1 IN cursor_stats_5; + ?column? +---------- + 2 +(1 row) + +FETCH 1 IN cursor_stats_6; + ?column? +---------- + 3 +(1 row) + +CLOSE cursor_stats_5; +CLOSE cursor_stats_6; +COMMIT; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; | QUERY_STATUS_DONE + -1 | FETCH 1 IN cursor_stats_5; | QUERY_STATUS_SUBMIT + -1 | FETCH 1 IN cursor_stats_5; | QUERY_STATUS_DONE + -1 | FETCH 1 IN cursor_stats_6; | QUERY_STATUS_SUBMIT + -1 | FETCH 1 IN cursor_stats_6; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_5; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_5; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_6; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_6; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(18 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/expected/yagp_dist.out b/expected/yagp_dist.out new file mode 100644 index 00000000000..ebaf839601d --- /dev/null +++ b/expected/yagp_dist.out @@ -0,0 +1,177 @@ +CREATE EXTENSION yagp_hooks_collector; +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.enable TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +SET yagpcc.enable_utility TO FALSE; +-- Hash distributed table +CREATE TABLE test_hash_dist (id int) DISTRIBUTED BY (id); +INSERT INTO test_hash_dist SELECT 1; +SET yagpcc.logging_mode to 'TBL'; +SET optimizer_enable_direct_dispatch TO TRUE; +-- Direct dispatch is used here, only one segment is scanned. +select * from test_hash_dist where id = 1; + id +---- + 1 +(1 row) + +RESET optimizer_enable_direct_dispatch; +RESET yagpcc.logging_mode; +-- Should see 8 rows. +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------------------------+--------------------- + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_SUBMIT + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_START + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_END + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_DONE + 1 | | QUERY_STATUS_SUBMIT + 1 | | QUERY_STATUS_START + 1 | | QUERY_STATUS_END + 1 | | QUERY_STATUS_DONE +(8 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +SET yagpcc.logging_mode to 'TBL'; +-- Scan all segments. +select * from test_hash_dist; + id +---- + 1 +(1 row) + +DROP TABLE test_hash_dist; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------+--------------------- + -1 | select * from test_hash_dist; | QUERY_STATUS_SUBMIT + -1 | select * from test_hash_dist; | QUERY_STATUS_START + -1 | select * from test_hash_dist; | QUERY_STATUS_END + -1 | select * from test_hash_dist; | QUERY_STATUS_DONE + 1 | | QUERY_STATUS_SUBMIT + 1 | | QUERY_STATUS_START + 1 | | QUERY_STATUS_END + 1 | | QUERY_STATUS_DONE + 2 | | QUERY_STATUS_SUBMIT + 2 | | QUERY_STATUS_START + 2 | | QUERY_STATUS_END + 2 | | QUERY_STATUS_DONE + | | QUERY_STATUS_SUBMIT + | | QUERY_STATUS_START + | | QUERY_STATUS_END + | | QUERY_STATUS_DONE +(16 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Replicated table +CREATE FUNCTION force_segments() RETURNS SETOF text AS $$ +BEGIN + RETURN NEXT 'seg'; +END; +$$ LANGUAGE plpgsql VOLATILE EXECUTE ON ALL SEGMENTS; +CREATE TABLE test_replicated (id int) DISTRIBUTED REPLICATED; +INSERT INTO test_replicated SELECT 1; +SET yagpcc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_replicated, force_segments(); + count +------- + 3 +(1 row) + +DROP TABLE test_replicated; +DROP FUNCTION force_segments(); +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------------------+--------------------- + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_SUBMIT + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_START + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_END + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_DONE + 1 | | QUERY_STATUS_SUBMIT + 1 | | QUERY_STATUS_START + 1 | | QUERY_STATUS_END + 1 | | QUERY_STATUS_DONE + 2 | | QUERY_STATUS_SUBMIT + 2 | | QUERY_STATUS_START + 2 | | QUERY_STATUS_END + 2 | | QUERY_STATUS_DONE + | | QUERY_STATUS_SUBMIT + | | QUERY_STATUS_START + | | QUERY_STATUS_END + | | QUERY_STATUS_DONE +(16 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Partially distributed table (2 numsegments) +SET allow_system_table_mods = ON; +CREATE TABLE test_partial_dist (id int, data text) DISTRIBUTED BY (id); +UPDATE gp_distribution_policy SET numsegments = 2 WHERE localoid = 'test_partial_dist'::regclass; +INSERT INTO test_partial_dist SELECT * FROM generate_series(1, 100); +SET yagpcc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_partial_dist; + count +------- + 100 +(1 row) + +RESET yagpcc.logging_mode; +DROP TABLE test_partial_dist; +RESET allow_system_table_mods; +-- Should see 12 rows. +SELECT query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + query_text | query_status +-----------------------------------------+--------------------- + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_SUBMIT + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_START + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_END + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_DONE + | QUERY_STATUS_SUBMIT + | QUERY_STATUS_START + | QUERY_STATUS_END + | QUERY_STATUS_DONE + | QUERY_STATUS_SUBMIT + | QUERY_STATUS_START + | QUERY_STATUS_END + | QUERY_STATUS_DONE +(12 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/expected/yagp_select.out b/expected/yagp_select.out new file mode 100644 index 00000000000..4c4a0218150 --- /dev/null +++ b/expected/yagp_select.out @@ -0,0 +1,138 @@ +CREATE EXTENSION yagp_hooks_collector; +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.enable TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +SET yagpcc.enable_utility TO FALSE; +-- Basic SELECT tests +SET yagpcc.logging_mode to 'TBL'; +SELECT 1; + ?column? +---------- + 1 +(1 row) + +SELECT COUNT(*) FROM generate_series(1,10); + count +------- + 10 +(1 row) + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------+--------------------- + -1 | SELECT 1; | QUERY_STATUS_SUBMIT + -1 | SELECT 1; | QUERY_STATUS_START + -1 | SELECT 1; | QUERY_STATUS_END + -1 | SELECT 1; | QUERY_STATUS_DONE + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_SUBMIT + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_START + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_END + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_DONE +(8 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Transaction test +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +SELECT 1; + ?column? +---------- + 1 +(1 row) + +COMMIT; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+------------+--------------------- + -1 | SELECT 1; | QUERY_STATUS_SUBMIT + -1 | SELECT 1; | QUERY_STATUS_START + -1 | SELECT 1; | QUERY_STATUS_END + -1 | SELECT 1; | QUERY_STATUS_DONE +(4 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- CTE test +SET yagpcc.logging_mode to 'TBL'; +WITH t AS (VALUES (1), (2)) +SELECT * FROM t; + column1 +--------- + 1 + 2 +(2 rows) + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-----------------------------+--------------------- + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_SUBMIT + | SELECT * FROM t; | + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_START + | SELECT * FROM t; | + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_END + | SELECT * FROM t; | + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_DONE + | SELECT * FROM t; | +(4 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Prepared statement test +SET yagpcc.logging_mode to 'TBL'; +PREPARE test_stmt AS SELECT 1; +EXECUTE test_stmt; + ?column? +---------- + 1 +(1 row) + +DEALLOCATE test_stmt; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------------+--------------------- + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_SUBMIT + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_START + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_END + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_DONE +(4 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/expected/yagp_utf8_trim.out b/expected/yagp_utf8_trim.out new file mode 100644 index 00000000000..194ee6b3609 --- /dev/null +++ b/expected/yagp_utf8_trim.out @@ -0,0 +1,66 @@ +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +CREATE OR REPLACE FUNCTION get_marked_query(marker TEXT) +RETURNS TEXT AS $$ + SELECT query_text + FROM yagpcc.log + WHERE query_text LIKE '%' || marker || '%' + ORDER BY datetime DESC + LIMIT 1 +$$ LANGUAGE sql VOLATILE; +SET yagpcc.enable TO TRUE; +-- Test 1: 1 byte chars +SET yagpcc.max_text_size to 19; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test1*/ 'HelloWorld'; + ?column? +------------ + HelloWorld +(1 row) + +RESET yagpcc.logging_mode; +SELECT octet_length(get_marked_query('test1')) = 19 AS correct_length; + correct_length +---------------- + t +(1 row) + +-- Test 2: 2 byte chars +SET yagpcc.max_text_size to 19; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test2*/ 'РУССКИЙЯЗЫК'; + ?column? +------------- + РУССКИЙЯЗЫК +(1 row) + +RESET yagpcc.logging_mode; +-- Character 'Р' has two bytes and cut in the middle => not included. +SELECT octet_length(get_marked_query('test2')) = 18 AS correct_length; + correct_length +---------------- + t +(1 row) + +-- Test 3: 4 byte chars +SET yagpcc.max_text_size to 21; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test3*/ '😀'; + ?column? +---------- + 😀 +(1 row) + +RESET yagpcc.logging_mode; +-- Emoji has 4 bytes and cut before the last byte => not included. +SELECT octet_length(get_marked_query('test3')) = 18 AS correct_length; + correct_length +---------------- + t +(1 row) + +-- Cleanup +DROP FUNCTION get_marked_query(TEXT); +RESET yagpcc.max_text_size; +RESET yagpcc.logging_mode; +RESET yagpcc.enable; +DROP EXTENSION yagp_hooks_collector; diff --git a/expected/yagp_utility.out b/expected/yagp_utility.out new file mode 100644 index 00000000000..03c17713575 --- /dev/null +++ b/expected/yagp_utility.out @@ -0,0 +1,272 @@ +CREATE EXTENSION yagp_hooks_collector; +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +SET yagpcc.logging_mode to 'TBL'; +CREATE TABLE test_table (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE INDEX test_idx ON test_table(a); +ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; +DROP TABLE test_table; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+----------------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_DONE + -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_SUBMIT + -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_DONE + -1 | ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; | QUERY_STATUS_SUBMIT + -1 | ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; | QUERY_STATUS_DONE + -1 | DROP TABLE test_table; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE test_table; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(10 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Partitioning +SET yagpcc.logging_mode to 'TBL'; +CREATE TABLE pt_test (a int, b int) +DISTRIBUTED BY (a) +PARTITION BY RANGE (a) +(START (0) END (100) EVERY (50)); +NOTICE: CREATE TABLE will create partition "pt_test_1_prt_1" for table "pt_test" +NOTICE: CREATE TABLE will create partition "pt_test_1_prt_2" for table "pt_test" +DROP TABLE pt_test; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | DROP TABLE pt_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE pt_test; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(10 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Views and Functions +SET yagpcc.logging_mode to 'TBL'; +CREATE VIEW test_view AS SELECT 1 AS a; +CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; +DROP VIEW test_view; +DROP FUNCTION test_func(int); +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+------------------------------------------------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_SUBMIT + -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_DONE + -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_SUBMIT + -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_DONE + -1 | DROP VIEW test_view; | QUERY_STATUS_SUBMIT + -1 | DROP VIEW test_view; | QUERY_STATUS_DONE + -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_SUBMIT + -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(10 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Transaction Operations +SET yagpcc.logging_mode to 'TBL'; +BEGIN; +SAVEPOINT sp1; +ROLLBACK TO sp1; +COMMIT; +BEGIN; +SAVEPOINT sp2; +ABORT; +BEGIN; +ROLLBACK; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+----------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | ROLLBACK; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(18 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- DML Operations +SET yagpcc.logging_mode to 'TBL'; +CREATE TABLE dml_test (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO dml_test VALUES (1, 'test'); +UPDATE dml_test SET b = 'updated' WHERE a = 1; +DELETE FROM dml_test WHERE a = 1; +DROP TABLE dml_test; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+----------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_DONE + -1 | DROP TABLE dml_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE dml_test; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(6 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- COPY Operations +SET yagpcc.logging_mode to 'TBL'; +CREATE TABLE copy_test (a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +COPY (SELECT 1) TO STDOUT; +1 +DROP TABLE copy_test; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_DONE + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_SUBMIT + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_DONE + -1 | DROP TABLE copy_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE copy_test; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(8 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Prepared Statements and error during execute +SET yagpcc.logging_mode to 'TBL'; +PREPARE test_prep(int) AS SELECT $1/0 AS value; +EXECUTE test_prep(0::int); +ERROR: division by zero +DEALLOCATE test_prep; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_SUBMIT + -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_DONE + -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_SUBMIT + -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_ERROR + -1 | DEALLOCATE test_prep; | QUERY_STATUS_SUBMIT + -1 | DEALLOCATE test_prep; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(8 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- GUC Settings +SET yagpcc.logging_mode to 'TBL'; +SET yagpcc.report_nested_queries TO FALSE; +RESET yagpcc.report_nested_queries; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------------------------+--------------------- + -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_SUBMIT + -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_DONE + -1 | RESET yagpcc.report_nested_queries; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.report_nested_queries; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT +(6 rows) + +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + t +--- + t +(1 row) + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/metric.md b/metric.md index 2d198391a67..5df56877edb 100644 --- a/metric.md +++ b/metric.md @@ -1,32 +1,33 @@ ## YAGP Hooks Collector Metrics -### States -A Postgres process goes through 4 executor functions to execute a query: -1) `ExecutorStart()` - resource allocation for the query. -2) `ExecutorRun()` - query execution. -3) `ExecutorFinish()` - cleanup. -4) `ExecutorEnd()` - cleanup. +### States +A Postgres process goes through 4 executor functions to execute a query: +1) `ExecutorStart()` - resource allocation for the query. +2) `ExecutorRun()` - query execution. +3) `ExecutorFinish()` - cleanup. +4) `ExecutorEnd()` - cleanup. -yagp-hooks-collector sends messages with 4 states, from _Dispatcher_ and/or _Execute_ processes: `submit`, `start`, `end`, `done`, in this order: +yagp-hooks-collector sends messages with 4 states, from _Dispatcher_ and/or _Execute_ processes: `submit`, `start`, `end`, `done`, in this order: ``` submit -> ExecutorStart() -> start -> ExecutorRun() -> ExecutorFinish() -> end -> ExecutorEnd() -> done ``` -### Key Points -- Some queries may skip the _end_ state, then the _end_ statistics is sent during _done_. -- If a query finishes with an error (`METRICS_QUERY_ERROR`), or is cancelled (`METRICS_QUERY_CANCELLED`), statistics is sent at _done_. -- Some statistics is calculated as the difference between the current global metric and the previous. The initial snapshot is taken at submit, and at _end_/_done_ the diff is calculated. -- Nested queries on _Dispatcher_ become top-level on _Execute_. -- Each process (_Dispatcher_/_Execute_) sends its own statistics. +### Key Points +- Some queries may skip the _end_ state, then the _end_ statistics is sent during _done_. +- If a query finishes with an error (`METRICS_QUERY_ERROR`), or is cancelled (`METRICS_QUERY_CANCELLED`), statistics is sent at _done_. +- Some statistics is calculated as the difference between the current global metric and the previous. The initial snapshot is taken at submit, and at _end_/_done_ the diff is calculated. +- Nested queries on _Dispatcher_ become top-level on _Execute_. +- Each process (_Dispatcher_/_Execute_) sends its own statistics -### Notations -- **S** = Submit event. -- **T** = Start event. -- **E** = End event. -- **D** = Done event. -- **DIFF** = current_value - submit_value (submit event). -- **ABS** = Absolute value, or where diff is not applicable, the value taken. -- **Local*** - Statistics that starts counting from zero for each new query. A nested query is also considered new. +### Notations +- **S** = Submit event. +- **T** = Start event. +- **E** = End event. +- **D** = Done event. +- **DIFF** = current_value - submit_value (submit event). +- **ABS** = Absolute value, or where diff is not applicable, the value taken. +- **Local*** - Statistics that starts counting from zero for each new query. A nested query is also considered new. +- **Node** - PG process, either a `Query Dispatcher` (on master) or an `Execute` (on segment). ### Statistics Table @@ -36,7 +37,7 @@ submit -> ExecutorStart() -> start -> ExecutorRun() -> ExecutorFinish() -> end - | `runningTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | Wall clock time | | `userTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | /proc/pid/stat utime | | `kernelTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | /proc/pid/stat stime | -| `vsize` | uint64 | E, D | ABS | - | Node | + | + | pages | /proc/pid/stat vsize | +| `vsize` | uint64 | E, D | ABS | - | Node | + | + | bytes | /proc/pid/stat vsize | | `rss` | uint64 | E, D | ABS | - | Node | + | + | pages | /proc/pid/stat rss | | `VmSizeKb` | uint64 | E, D | ABS | - | Node | + | + | KB | /proc/pid/status VmSize | | `VmPeakKb` | uint64 | E, D | ABS | - | Node | + | + | KB | /proc/pid/status VmPeak | @@ -108,13 +109,13 @@ submit -> ExecutorStart() -> start -> ExecutorRun() -> ExecutorFinish() -> end - | `userName` | string | All | ABS | - | Cluster | + | - | text | Session user | | `databaseName` | string | All | ABS | - | Cluster | + | - | text | Database name | | `rsgname` | string | All | ABS | - | Cluster | + | - | text | Resource group name | -| `analyze_text` | string | D | ABS | - | Cluster | + | - | text | EXPLAIN ANALYZE JSON | +| `analyze_text` | string | D | ABS | - | Cluster | + | - | text | EXPLAIN ANALYZE | | **AdditionalQueryInfo** | | | | | | | | | | | `nested_level` | int64 | All | ABS | - | Node | + | + | count | Current nesting level | | `error_message` | string | D | ABS | - | Node | + | + | text | Error message | | `slice_id` | int64 | All | ABS | - | Node | + | + | id | Slice ID | | **QueryKey** | | | | | | | | | | -| `tmid` | int32 | All | ABS | - | Node | + | + | id | Time ID | +| `tmid` | int32 | All | ABS | - | Node | + | + | id | Transaction start time | | `ssid` | int32 | All | ABS | - | Node | + | + | id | Session ID | | `ccnt` | int32 | All | ABS | - | Node | + | + | count | Command counter | | **SegmentKey** | | | | | | | | | | diff --git a/sql/yagp_cursors.sql b/sql/yagp_cursors.sql new file mode 100644 index 00000000000..5d5bde58110 --- /dev/null +++ b/sql/yagp_cursors.sql @@ -0,0 +1,83 @@ +CREATE EXTENSION yagp_hooks_collector; + +CREATE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; + +-- DECLARE +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_0 CURSOR FOR SELECT 0; +CLOSE cursor_stats_0; +COMMIT; + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- DECLARE WITH HOLD +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; +CLOSE cursor_stats_1; +DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; +CLOSE cursor_stats_2; +COMMIT; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- ROLLBACK +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_3 CURSOR FOR SELECT 1; +CLOSE cursor_stats_3; +DECLARE cursor_stats_4 CURSOR FOR SELECT 1; +ROLLBACK; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- FETCH +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; +DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; +FETCH 1 IN cursor_stats_5; +FETCH 1 IN cursor_stats_6; +CLOSE cursor_stats_5; +CLOSE cursor_stats_6; +COMMIT; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/sql/yagp_dist.sql b/sql/yagp_dist.sql new file mode 100644 index 00000000000..b837ef05335 --- /dev/null +++ b/sql/yagp_dist.sql @@ -0,0 +1,86 @@ +CREATE EXTENSION yagp_hooks_collector; + +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET yagpcc.enable TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +SET yagpcc.enable_utility TO FALSE; + +-- Hash distributed table + +CREATE TABLE test_hash_dist (id int) DISTRIBUTED BY (id); +INSERT INTO test_hash_dist SELECT 1; + +SET yagpcc.logging_mode to 'TBL'; +SET optimizer_enable_direct_dispatch TO TRUE; +-- Direct dispatch is used here, only one segment is scanned. +select * from test_hash_dist where id = 1; +RESET optimizer_enable_direct_dispatch; + +RESET yagpcc.logging_mode; +-- Should see 8 rows. +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +SET yagpcc.logging_mode to 'TBL'; + +-- Scan all segments. +select * from test_hash_dist; + +DROP TABLE test_hash_dist; +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Replicated table +CREATE FUNCTION force_segments() RETURNS SETOF text AS $$ +BEGIN + RETURN NEXT 'seg'; +END; +$$ LANGUAGE plpgsql VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE TABLE test_replicated (id int) DISTRIBUTED REPLICATED; +INSERT INTO test_replicated SELECT 1; + +SET yagpcc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_replicated, force_segments(); +DROP TABLE test_replicated; +DROP FUNCTION force_segments(); + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Partially distributed table (2 numsegments) +SET allow_system_table_mods = ON; +CREATE TABLE test_partial_dist (id int, data text) DISTRIBUTED BY (id); +UPDATE gp_distribution_policy SET numsegments = 2 WHERE localoid = 'test_partial_dist'::regclass; +INSERT INTO test_partial_dist SELECT * FROM generate_series(1, 100); + +SET yagpcc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_partial_dist; +RESET yagpcc.logging_mode; + +DROP TABLE test_partial_dist; +RESET allow_system_table_mods; +-- Should see 12 rows. +SELECT query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/sql/yagp_select.sql b/sql/yagp_select.sql new file mode 100644 index 00000000000..4038c6b7b63 --- /dev/null +++ b/sql/yagp_select.sql @@ -0,0 +1,67 @@ +CREATE EXTENSION yagp_hooks_collector; + +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET yagpcc.enable TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; +SET yagpcc.enable_utility TO FALSE; + +-- Basic SELECT tests +SET yagpcc.logging_mode to 'TBL'; + +SELECT 1; +SELECT COUNT(*) FROM generate_series(1,10); + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Transaction test +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +SELECT 1; +COMMIT; + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- CTE test +SET yagpcc.logging_mode to 'TBL'; + +WITH t AS (VALUES (1), (2)) +SELECT * FROM t; + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Prepared statement test +SET yagpcc.logging_mode to 'TBL'; + +PREPARE test_stmt AS SELECT 1; +EXECUTE test_stmt; +DEALLOCATE test_stmt; + +RESET yagpcc.logging_mode; +SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/sql/yagp_utf8_trim.sql b/sql/yagp_utf8_trim.sql new file mode 100644 index 00000000000..c0fdcce24a5 --- /dev/null +++ b/sql/yagp_utf8_trim.sql @@ -0,0 +1,43 @@ +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; + +CREATE OR REPLACE FUNCTION get_marked_query(marker TEXT) +RETURNS TEXT AS $$ + SELECT query_text + FROM yagpcc.log + WHERE query_text LIKE '%' || marker || '%' + ORDER BY datetime DESC + LIMIT 1 +$$ LANGUAGE sql VOLATILE; + +SET yagpcc.enable TO TRUE; + +-- Test 1: 1 byte chars +SET yagpcc.max_text_size to 19; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test1*/ 'HelloWorld'; +RESET yagpcc.logging_mode; +SELECT octet_length(get_marked_query('test1')) = 19 AS correct_length; + +-- Test 2: 2 byte chars +SET yagpcc.max_text_size to 19; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test2*/ 'РУССКИЙЯЗЫК'; +RESET yagpcc.logging_mode; +-- Character 'Р' has two bytes and cut in the middle => not included. +SELECT octet_length(get_marked_query('test2')) = 18 AS correct_length; + +-- Test 3: 4 byte chars +SET yagpcc.max_text_size to 21; +SET yagpcc.logging_mode to 'TBL'; +SELECT /*test3*/ '😀'; +RESET yagpcc.logging_mode; +-- Emoji has 4 bytes and cut before the last byte => not included. +SELECT octet_length(get_marked_query('test3')) = 18 AS correct_length; + +-- Cleanup +DROP FUNCTION get_marked_query(TEXT); +RESET yagpcc.max_text_size; +RESET yagpcc.logging_mode; +RESET yagpcc.enable; + +DROP EXTENSION yagp_hooks_collector; diff --git a/sql/yagp_utility.sql b/sql/yagp_utility.sql new file mode 100644 index 00000000000..b4cca6f5421 --- /dev/null +++ b/sql/yagp_utility.sql @@ -0,0 +1,133 @@ +CREATE EXTENSION yagp_hooks_collector; + +CREATE OR REPLACE FUNCTION yagp_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.report_nested_queries TO TRUE; + +SET yagpcc.logging_mode to 'TBL'; + +CREATE TABLE test_table (a int, b text); +CREATE INDEX test_idx ON test_table(a); +ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; +DROP TABLE test_table; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Partitioning +SET yagpcc.logging_mode to 'TBL'; + +CREATE TABLE pt_test (a int, b int) +DISTRIBUTED BY (a) +PARTITION BY RANGE (a) +(START (0) END (100) EVERY (50)); +DROP TABLE pt_test; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Views and Functions +SET yagpcc.logging_mode to 'TBL'; + +CREATE VIEW test_view AS SELECT 1 AS a; +CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; +DROP VIEW test_view; +DROP FUNCTION test_func(int); + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Transaction Operations +SET yagpcc.logging_mode to 'TBL'; + +BEGIN; +SAVEPOINT sp1; +ROLLBACK TO sp1; +COMMIT; + +BEGIN; +SAVEPOINT sp2; +ABORT; + +BEGIN; +ROLLBACK; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- DML Operations +SET yagpcc.logging_mode to 'TBL'; + +CREATE TABLE dml_test (a int, b text); +INSERT INTO dml_test VALUES (1, 'test'); +UPDATE dml_test SET b = 'updated' WHERE a = 1; +DELETE FROM dml_test WHERE a = 1; +DROP TABLE dml_test; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- COPY Operations +SET yagpcc.logging_mode to 'TBL'; + +CREATE TABLE copy_test (a int); +COPY (SELECT 1) TO STDOUT; +DROP TABLE copy_test; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- Prepared Statements and error during execute +SET yagpcc.logging_mode to 'TBL'; + +PREPARE test_prep(int) AS SELECT $1/0 AS value; +EXECUTE test_prep(0::int); +DEALLOCATE test_prep; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +-- GUC Settings +SET yagpcc.logging_mode to 'TBL'; + +SET yagpcc.report_nested_queries TO FALSE; +RESET yagpcc.report_nested_queries; + +RESET yagpcc.logging_mode; + +SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT yagpcc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION yagp_status_order(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.report_nested_queries; +RESET yagpcc.enable_utility; diff --git a/src/Config.cpp b/src/Config.cpp index aef09fc7d73..dbd7e25b483 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -16,9 +16,16 @@ static bool guc_enable_cdbstats = true; static bool guc_enable_collector = true; static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; -static int guc_max_text_size = 1024; // in KB -static int guc_max_plan_size = 1024; // in KB -static int guc_min_analyze_time = -1; // uninitialized state +static int guc_max_text_size = 1 << 20; // in bytes (1MB) +static int guc_max_plan_size = 1024; // in KB +static int guc_min_analyze_time = 10000; // in ms +static int guc_logging_mode = LOG_MODE_UDS; +static bool guc_enable_utility = false; + +static const struct config_enum_entry logging_mode_options[] = { + {"uds", LOG_MODE_UDS, false /* hidden */}, + {"tbl", LOG_MODE_TBL, false}, + {NULL, 0, false}}; static std::unique_ptr> ignored_users_set = nullptr; @@ -92,9 +99,9 @@ void Config::init() { DefineCustomIntVariable( "yagpcc.max_text_size", - "Make yagpcc trim query texts longer than configured size", NULL, - &guc_max_text_size, 1024, 0, INT_MAX / 1024, PGC_SUSET, - GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_KB, NULL, NULL, NULL); + "Make yagpcc trim query texts longer than configured size in bytes", NULL, + &guc_max_text_size, 1 << 20 /* 1MB */, 0, INT_MAX, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); DefineCustomIntVariable( "yagpcc.max_plan_size", @@ -106,18 +113,31 @@ void Config::init() { "yagpcc.min_analyze_time", "Sets the minimum execution time above which plans will be logged.", "Zero prints all plans. -1 turns this feature off.", - &guc_min_analyze_time, -1, -1, INT_MAX, PGC_USERSET, + &guc_min_analyze_time, 10000, -1, INT_MAX, PGC_USERSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_MS, NULL, NULL, NULL); + + DefineCustomEnumVariable( + "yagpcc.logging_mode", "Logging mode: UDS or PG Table", NULL, + &guc_logging_mode, LOG_MODE_UDS, logging_mode_options, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_SUPERUSER_ONLY, NULL, NULL, + NULL); + + DefineCustomBoolVariable( + "yagpcc.enable_utility", "Collect utility statement stats", NULL, + &guc_enable_utility, false, PGC_USERSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); } std::string Config::uds_path() { return guc_uds_path; } bool Config::enable_analyze() { return guc_enable_analyze; } bool Config::enable_cdbstats() { return guc_enable_cdbstats; } bool Config::enable_collector() { return guc_enable_collector; } +bool Config::enable_utility() { return guc_enable_utility; } bool Config::report_nested_queries() { return guc_report_nested_queries; } -size_t Config::max_text_size() { return guc_max_text_size * 1024; } +size_t Config::max_text_size() { return guc_max_text_size; } size_t Config::max_plan_size() { return guc_max_plan_size * 1024; } int Config::min_analyze_time() { return guc_min_analyze_time; }; +int Config::logging_mode() { return guc_logging_mode; } bool Config::filter_user(std::string username) { if (!ignored_users_set) { diff --git a/src/Config.h b/src/Config.h index eff83f0960a..7501c727a44 100644 --- a/src/Config.h +++ b/src/Config.h @@ -2,6 +2,9 @@ #include +#define LOG_MODE_UDS 0 +#define LOG_MODE_TBL 1 + class Config { public: static void init(); @@ -9,10 +12,12 @@ class Config { static bool enable_analyze(); static bool enable_cdbstats(); static bool enable_collector(); + static bool enable_utility(); static bool filter_user(std::string username); static bool report_nested_queries(); static size_t max_text_size(); static size_t max_plan_size(); static int min_analyze_time(); + static int logging_mode(); static void sync(); }; \ No newline at end of file diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 133d409b574..fee435a6dcc 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,6 +1,7 @@ #include "Config.h" #include "UDSConnector.h" #include "memory/gpdbwrappers.h" +#include "log/LogOps.h" #define typeid __typeid extern "C" { @@ -24,10 +25,82 @@ extern "C" { (Gp_role == GP_ROLE_DISPATCH && Config::min_analyze_time() >= 0 && \ Config::enable_analyze()) -void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { +static bool enable_utility = Config::enable_utility(); + +bool EventSender::verify_query(QueryDesc *query_desc, QueryState state, + bool utility) { + if (!proto_verified) { + return false; + } if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { - return; + return false; + } + + switch (state) { + case QueryState::SUBMIT: + // Cache enable_utility at SUBMIT to ensure consistent behavior at DONE. + // Without caching, a query that sets enable_utility to false from true + // would be accepted at SUBMIT (guc is true) but rejected at DONE (guc + // is false), causing a leak. + enable_utility = Config::enable_utility(); + if (utility && enable_utility == false) { + return false; + } + // Sync config in case current query changes it. + Config::sync(); + // Register qkey for a nested query we won't report, + // so we can detect nesting_level > 0 and skip reporting at end/done. + if (!need_report_nested_query() && nesting_level > 0) { + QueryKey::register_qkey(query_desc, nesting_level); + return false; + } + if (is_top_level_query(query_desc, nesting_level)) { + nested_timing = 0; + nested_calls = 0; + } + break; + case QueryState::START: + if (!qdesc_submitted(query_desc)) { + collect_query_submit(query_desc, false /* utility */); + } + break; + case QueryState::DONE: + if (utility && enable_utility == false) { + return false; + } + default: + break; + } + + if (filter_query(query_desc)) { + return false; + } + if (!nesting_is_valid(query_desc, nesting_level)) { + return false; + } + + return true; +} + +bool EventSender::log_query_req(const yagpcc::SetQueryReq &req, + const std::string &event, bool utility) { + bool clear_big_fields = false; + switch (Config::logging_mode()) { + case LOG_MODE_UDS: + clear_big_fields = UDSConnector::report_query(req, event); + break; + case LOG_MODE_TBL: + ya_gpdb::insert_log(req, utility); + clear_big_fields = false; + break; + default: + Assert(false); } + return clear_big_fields; +} + +void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg, + bool utility, ErrorData *edata) { auto *query_desc = reinterpret_cast(arg); switch (status) { case METRICS_PLAN_NODE_INITIALIZE: @@ -36,7 +109,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { // TODO break; case METRICS_QUERY_SUBMIT: - collect_query_submit(query_desc); + collect_query_submit(query_desc, utility); break; case METRICS_QUERY_START: // no-op: executor_after_start is enough @@ -50,7 +123,7 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { case METRICS_QUERY_ERROR: case METRICS_QUERY_CANCELED: case METRICS_INNER_QUERY_DONE: - collect_query_done(query_desc, status); + collect_query_done(query_desc, utility, status, edata); break; default: ereport(FATAL, (errmsg("Unknown query status: %d", status))); @@ -58,18 +131,10 @@ void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg) { } void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { - if (!connector) { - return; - } - if (filter_query(query_desc)) { - return; - } - if (!qdesc_submitted(query_desc)) { - collect_query_submit(query_desc); - } - if (!need_collect(query_desc, nesting_level)) { + if (!verify_query(query_desc, QueryState::START, false /* utility*/)) { return; } + if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze() && (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0) { query_desc->instrument_options |= INSTRUMENT_BUFFERS; @@ -88,16 +153,14 @@ void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { } void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { - if (!connector || !need_collect(query_desc, nesting_level)) { - return; - } - if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + if (!verify_query(query_desc, QueryState::START, false /* utility */)) { return; } + auto &query = get_query(query_desc); auto query_msg = query.message.get(); *query_msg->mutable_start_time() = current_ts(); - update_query_state(query, QueryState::START); + update_query_state(query, QueryState::START, false /* utility */); set_query_plan(query_msg, query_desc); if (need_collect_analyze()) { // Set up to track total elapsed time during query run. @@ -112,52 +175,37 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { } yagpcc::GPMetrics stats; std::swap(stats, *query_msg->mutable_query_metrics()); - if (connector->report_query(*query_msg, "started")) { + if (log_query_req(*query_msg, "started", false /* utility */)) { clear_big_fields(query_msg); } std::swap(stats, *query_msg->mutable_query_metrics()); } void EventSender::executor_end(QueryDesc *query_desc) { - if (!connector || !need_collect(query_desc, nesting_level)) { - return; - } - if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { + if (!verify_query(query_desc, QueryState::END, false /* utility */)) { return; } + auto &query = get_query(query_desc); auto *query_msg = query.message.get(); *query_msg->mutable_end_time() = current_ts(); - update_query_state(query, QueryState::END); + update_query_state(query, QueryState::END, false /* utility */); if (is_top_level_query(query_desc, nesting_level)) { set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, nested_calls, nested_timing); } else { set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); } - if (connector->report_query(*query_msg, "ended")) { + if (log_query_req(*query_msg, "ended", false /* utility */)) { clear_big_fields(query_msg); } } -void EventSender::collect_query_submit(QueryDesc *query_desc) { - if (!connector) { - return; - } - Config::sync(); - // Register qkey for a nested query we won't report, - // so we can detect nesting_level > 0 and skip reporting at end/done. - if (!need_report_nested_query() && nesting_level > 0) { - QueryKey::register_qkey(query_desc, nesting_level); - return; - } - if (is_top_level_query(query_desc, nesting_level)) { - nested_timing = 0; - nested_calls = 0; - } - if (!need_collect(query_desc, nesting_level)) { +void EventSender::collect_query_submit(QueryDesc *query_desc, bool utility) { + if (!verify_query(query_desc, QueryState::SUBMIT, utility)) { return; } + submit_query(query_desc); auto &query = get_query(query_desc); auto *query_msg = query.message.get(); @@ -167,7 +215,7 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { set_qi_nesting_level(query_msg, nesting_level); set_qi_slice_id(query_msg); set_query_text(query_msg, query_desc); - if (connector->report_query(*query_msg, "submit")) { + if (log_query_req(*query_msg, "submit", utility)) { clear_big_fields(query_msg); } // take initial metrics snapshot so that we can safely take diff afterwards @@ -182,7 +230,8 @@ void EventSender::collect_query_submit(QueryDesc *query_desc) { } void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, - QueryMetricsStatus status) { + QueryMetricsStatus status, bool utility, + ErrorData *edata) { yagpcc::QueryStatus query_status; std::string msg; switch (status) { @@ -211,12 +260,20 @@ void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, (errmsg("Unexpected query status in query_done hook: %d", status))); } auto prev_state = query.state; - update_query_state(query, QueryState::DONE, + update_query_state(query, QueryState::DONE, utility, query_status == yagpcc::QueryStatus::QUERY_STATUS_DONE); auto query_msg = query.message.get(); query_msg->set_query_status(query_status); if (status == METRICS_QUERY_ERROR) { - set_qi_error_message(query_msg); + bool error_flushed = elog_message() == NULL; + if (error_flushed && edata->message == NULL) { + ereport(WARNING, (errmsg("YAGPCC missing error message"))); + ereport(DEBUG3, + (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + } else { + set_qi_error_message(query_msg, + error_flushed ? edata->message : elog_message()); + } } if (prev_state == START) { // We've missed ExecutorEnd call due to query cancel or error. It's @@ -230,12 +287,13 @@ void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), &ic_statistics); #endif - connector->report_query(*query_msg, msg); + (void)log_query_req(*query_msg, msg, utility); } -void EventSender::collect_query_done(QueryDesc *query_desc, - QueryMetricsStatus status) { - if (!connector || !need_collect(query_desc, nesting_level)) { +void EventSender::collect_query_done(QueryDesc *query_desc, bool utility, + QueryMetricsStatus status, + ErrorData *edata) { + if (!verify_query(query_desc, QueryState::DONE, utility)) { return; } @@ -258,10 +316,7 @@ void EventSender::collect_query_done(QueryDesc *query_desc, } auto &query = get_query(query_desc); - bool report = need_report_nested_query() || - is_top_level_query(query_desc, nesting_level); - if (report) - report_query_done(query_desc, query, status); + report_query_done(query_desc, query, status, utility, edata); if (need_report_nested_query()) update_nested_counters(query_desc); @@ -276,7 +331,7 @@ void EventSender::ic_metrics_collect() { if (Gp_interconnect_type != INTERCONNECT_TYPE_UDPIFC) { return; } - if (!connector || gp_command_count == 0 || !Config::enable_collector() || + if (!proto_verified || gp_command_count == 0 || !Config::enable_collector() || Config::filter_user(get_user_name())) { return; } @@ -305,15 +360,12 @@ void EventSender::ic_metrics_collect() { } void EventSender::analyze_stats_collect(QueryDesc *query_desc) { - if (!connector || Gp_role != GP_ROLE_DISPATCH) { + if (!verify_query(query_desc, QueryState::END, false /* utility */)) { return; } - if (!need_collect(query_desc, nesting_level)) { + if (Gp_role != GP_ROLE_DISPATCH) { return; } - auto &query = get_query(query_desc); - auto *query_msg = query.message.get(); - *query_msg->mutable_end_time() = current_ts(); if (!query_desc->totaltime || !need_collect_analyze()) { return; } @@ -323,14 +375,17 @@ void EventSender::analyze_stats_collect(QueryDesc *query_desc) { double ms = query_desc->totaltime->total * 1000.0; if (ms >= Config::min_analyze_time()) { - set_analyze_plan_text_json(query_desc, query_msg); + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); + set_analyze_plan_text(query_desc, query_msg); } } EventSender::EventSender() { if (Config::enable_collector()) { try { - connector = new UDSConnector(); + GOOGLE_PROTOBUF_VERIFY_VERSION; + proto_verified = true; } catch (const std::exception &e) { ereport(INFO, (errmsg("Unable to start query tracing %s", e.what()))); } @@ -342,18 +397,16 @@ EventSender::EventSender() { EventSender::~EventSender() { for (const auto &[qkey, _] : queries) { - ereport(LOG, - (errmsg("YAGPCC query with missing done event: " - "tmid=%d ssid=%d ccnt=%d nlvl=%d", - qkey.tmid, qkey.ssid, qkey.ccnt, qkey.nesting_level))); + ereport(LOG, (errmsg("YAGPCC query with missing done event: " + "tmid=%d ssid=%d ccnt=%d nlvl=%d", + qkey.tmid, qkey.ssid, qkey.ccnt, qkey.nesting_level))); } - delete connector; } // That's basically a very simplistic state machine to fix or highlight any bugs // coming from GP void EventSender::update_query_state(QueryItem &query, QueryState new_state, - bool success) { + bool utility, bool success) { switch (new_state) { case QueryState::SUBMIT: Assert(false); @@ -372,7 +425,7 @@ void EventSender::update_query_state(QueryItem &query, QueryState new_state, query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); break; case QueryState::DONE: - Assert(query.state == QueryState::END || !success); + Assert(query.state == QueryState::END || !success || utility); query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); break; default: diff --git a/src/EventSender.h b/src/EventSender.h index 4071d580ff9..4afdf1e14a4 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -87,7 +87,8 @@ class EventSender { void executor_before_start(QueryDesc *query_desc, int eflags); void executor_after_start(QueryDesc *query_desc, int eflags); void executor_end(QueryDesc *query_desc); - void query_metrics_collect(QueryMetricsStatus status, void *arg); + void query_metrics_collect(QueryMetricsStatus status, void *arg, bool utility, + ErrorData *edata = NULL); void ic_metrics_collect(); void analyze_stats_collect(QueryDesc *query_desc); void incr_depth() { nesting_level++; } @@ -105,18 +106,23 @@ class EventSender { explicit QueryItem(QueryState st); }; - void update_query_state(QueryItem &query, QueryState new_state, + static bool log_query_req(const yagpcc::SetQueryReq &req, + const std::string &event, bool utility); + bool verify_query(QueryDesc *query_desc, QueryState state, bool utility); + void update_query_state(QueryItem &query, QueryState new_state, bool utility, bool success = true); QueryItem &get_query(QueryDesc *query_desc); void submit_query(QueryDesc *query_desc); - void collect_query_submit(QueryDesc *query_desc); + void collect_query_submit(QueryDesc *query_desc, bool utility); void report_query_done(QueryDesc *query_desc, QueryItem &query, - QueryMetricsStatus status); - void collect_query_done(QueryDesc *query_desc, QueryMetricsStatus status); + QueryMetricsStatus status, bool utility, + ErrorData *edata = NULL); + void collect_query_done(QueryDesc *query_desc, bool utility, + QueryMetricsStatus status, ErrorData *edata = NULL); void update_nested_counters(QueryDesc *query_desc); bool qdesc_submitted(QueryDesc *query_desc); - UDSConnector *connector = nullptr; + bool proto_verified = false; int nesting_level = 0; int64_t nested_calls = 0; double nested_timing = 0; diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index 929f0cf2681..fc58112bfaa 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -79,8 +79,3 @@ bool filter_query(QueryDesc *query_desc) { return gp_command_count == 0 || query_desc->sourceText == nullptr || !Config::enable_collector() || Config::filter_user(get_user_name()); } - -bool need_collect(QueryDesc *query_desc, int nesting_level) { - return !filter_query(query_desc) && - nesting_is_valid(query_desc, nesting_level); -} diff --git a/src/PgUtils.h b/src/PgUtils.h index ceb07c2e8e5..02f084c597a 100644 --- a/src/PgUtils.h +++ b/src/PgUtils.h @@ -12,6 +12,3 @@ bool is_top_level_query(QueryDesc *query_desc, int nesting_level); bool nesting_is_valid(QueryDesc *query_desc, int nesting_level); bool need_report_nested_query(); bool filter_query(QueryDesc *query_desc); -bool need_collect(QueryDesc *query_desc, int nesting_level); -ExplainState get_explain_state(QueryDesc *query_desc, bool costs); -ExplainState get_analyze_state_json(QueryDesc *query_desc, bool analyze); diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index 4655433c806..f28714da6ec 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -24,6 +24,18 @@ extern "C" { #include #include +namespace { +constexpr uint8_t UTF8_CONTINUATION_BYTE_MASK = (1 << 7) | (1 << 6); +constexpr uint8_t UTF8_CONTINUATION_BYTE = (1 << 7); +constexpr uint8_t UTF8_MAX_SYMBOL_BYTES = 4; + +// Returns true if byte is the starting byte of utf8 +// character, false if byte is the continuation (10xxxxxx). +inline bool utf8_start_byte(uint8_t byte) { + return (byte & UTF8_CONTINUATION_BYTE_MASK) != UTF8_CONTINUATION_BYTE; +} +} // namespace + google::protobuf::Timestamp current_ts() { google::protobuf::Timestamp current_ts; struct timeval tv; @@ -46,9 +58,26 @@ void set_segment_key(yagpcc::SegmentKey *key) { key->set_segindex(GpIdentity.segindex); } -inline std::string char_to_trimmed_str(const char *str, size_t len, - size_t lim) { - return std::string(str, std::min(len, lim)); +std::string trim_str_shrink_utf8(const char *str, size_t len, size_t lim) { + if (unlikely(str == nullptr)) { + return std::string(); + } + if (likely(len <= lim || GetDatabaseEncoding() != PG_UTF8)) { + return std::string(str, std::min(len, lim)); + } + + // Handle trimming of utf8 correctly, do not cut multi-byte characters. + size_t cut_pos = lim; + size_t visited_bytes = 1; + while (visited_bytes < UTF8_MAX_SYMBOL_BYTES && cut_pos > 0) { + if (utf8_start_byte(static_cast(str[cut_pos]))) { + break; + } + ++visited_bytes; + --cut_pos; + } + + return std::string(str, cut_pos); } void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { @@ -61,10 +90,10 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); ExplainState es = ya_gpdb::get_explain_state(query_desc, true); if (es.str) { - *qi->mutable_plan_text() = char_to_trimmed_str(es.str->data, es.str->len, - Config::max_plan_size()); + *qi->mutable_plan_text() = trim_str_shrink_utf8(es.str->data, es.str->len, + Config::max_plan_size()); StringInfo norm_plan = ya_gpdb::gen_normplan(es.str->data); - *qi->mutable_template_plan_text() = char_to_trimmed_str( + *qi->mutable_template_plan_text() = trim_str_shrink_utf8( norm_plan->data, norm_plan->len, Config::max_plan_size()); qi->set_plan_id( hash_any((unsigned char *)norm_plan->data, norm_plan->len)); @@ -79,11 +108,11 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->sourceText) { auto qi = req->mutable_query_info(); - *qi->mutable_query_text() = char_to_trimmed_str( + *qi->mutable_query_text() = trim_str_shrink_utf8( query_desc->sourceText, strlen(query_desc->sourceText), Config::max_text_size()); char *norm_query = ya_gpdb::gen_normquery(query_desc->sourceText); - *qi->mutable_template_query_text() = char_to_trimmed_str( + *qi->mutable_template_query_text() = trim_str_shrink_utf8( norm_query, strlen(norm_query), Config::max_text_size()); } } @@ -103,7 +132,8 @@ void set_query_info(yagpcc::SetQueryReq *req) { if (Gp_session_role == GP_ROLE_DISPATCH) { auto qi = req->mutable_query_info(); qi->set_username(get_user_name()); - qi->set_databasename(get_db_name()); + if (IsTransactionState()) + qi->set_databasename(get_db_name()); qi->set_rsgname(get_rg_name()); } } @@ -118,11 +148,10 @@ void set_qi_slice_id(yagpcc::SetQueryReq *req) { aqi->set_slice_id(currentSliceId); } -void set_qi_error_message(yagpcc::SetQueryReq *req) { +void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg) { auto aqi = req->mutable_add_info(); - auto error = elog_message(); *aqi->mutable_error_message() = - char_to_trimmed_str(error, strlen(error), Config::max_text_size()); + trim_str_shrink_utf8(err_msg, strlen(err_msg), Config::max_text_size()); } void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, @@ -226,8 +255,7 @@ double protots_to_double(const google::protobuf::Timestamp &ts) { return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; } -void set_analyze_plan_text_json(QueryDesc *query_desc, - yagpcc::SetQueryReq *req) { +void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *req) { // Make sure it is a valid txn and it is not an utility // statement for ExplainPrintPlan() later. if (!IsTransactionState() || !query_desc->plannedstmt) { @@ -235,7 +263,7 @@ void set_analyze_plan_text_json(QueryDesc *query_desc, } MemoryContext oldcxt = ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - ExplainState es = ya_gpdb::get_analyze_state_json( + ExplainState es = ya_gpdb::get_analyze_state( query_desc, query_desc->instrument_options && Config::enable_analyze()); ya_gpdb::mem_ctx_switch_to(oldcxt); if (es.str) { @@ -243,14 +271,9 @@ void set_analyze_plan_text_json(QueryDesc *query_desc, if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { es.str->data[--es.str->len] = '\0'; } - // Convert JSON array to JSON object. - if (es.str->len > 0) { - es.str->data[0] = '{'; - es.str->data[es.str->len - 1] = '}'; - } - auto trimmed_analyze = - char_to_trimmed_str(es.str->data, es.str->len, Config::max_plan_size()); + auto trimmed_analyze = trim_str_shrink_utf8(es.str->data, es.str->len, + Config::max_plan_size()); req->mutable_query_info()->set_analyze_text(trimmed_analyze); ya_gpdb::pfree(es.str->data); } -} \ No newline at end of file +} diff --git a/src/ProtoUtils.h b/src/ProtoUtils.h index 8287b3de7ea..725a634f765 100644 --- a/src/ProtoUtils.h +++ b/src/ProtoUtils.h @@ -12,12 +12,11 @@ void clear_big_fields(yagpcc::SetQueryReq *req); void set_query_info(yagpcc::SetQueryReq *req); void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level); void set_qi_slice_id(yagpcc::SetQueryReq *req); -void set_qi_error_message(yagpcc::SetQueryReq *req); +void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg); void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, int nested_calls, double nested_time); void set_ic_stats(yagpcc::MetricInstrumentation *metrics, const ICStatistics *ic_statistics); yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status); double protots_to_double(const google::protobuf::Timestamp &ts); -void set_analyze_plan_text_json(QueryDesc *query_desc, - yagpcc::SetQueryReq *message); \ No newline at end of file +void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *message); \ No newline at end of file diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index f8c4586126d..b6af303218d 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -2,6 +2,7 @@ #include "Config.h" #include "YagpStat.h" #include "memory/gpdbwrappers.h" +#include "log/LogOps.h" #include #include @@ -16,8 +17,6 @@ extern "C" { #include "postgres.h" } -UDSConnector::UDSConnector() { GOOGLE_PROTOBUF_VERIFY_VERSION; } - static void inline log_tracing_failure(const yagpcc::SetQueryReq &req, const std::string &event) { ereport(LOG, diff --git a/src/UDSConnector.h b/src/UDSConnector.h index 67504fc8529..f0dfcb77a3f 100644 --- a/src/UDSConnector.h +++ b/src/UDSConnector.h @@ -4,6 +4,6 @@ class UDSConnector { public: - UDSConnector(); - bool report_query(const yagpcc::SetQueryReq &req, const std::string &event); + bool static report_query(const yagpcc::SetQueryReq &req, + const std::string &event); }; \ No newline at end of file diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index d76b7c64e10..07ac511d546 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -32,6 +32,7 @@ static analyze_stats_collect_hook_type previous_analyze_stats_collect_hook = #ifdef IC_TEARDOWN_HOOK static ic_teardown_hook_type previous_ic_teardown_hook = nullptr; #endif +static ProcessUtility_hook_type previous_ProcessUtility_hook = nullptr; static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); static void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, @@ -44,6 +45,10 @@ static void ya_ic_teardown_hook(ChunkTransportState *transportStates, #ifdef ANALYZE_STATS_COLLECT_HOOK static void ya_analyze_stats_collect_hook(QueryDesc *query_desc); #endif +static void ya_process_utility_hook(Node *parsetree, const char *queryString, + ProcessUtilityContext context, + ParamListInfo params, DestReceiver *dest, + char *completionTag); static EventSender *sender = nullptr; @@ -85,6 +90,8 @@ void hooks_init() { analyze_stats_collect_hook = ya_analyze_stats_collect_hook; #endif stat_statements_parser_init(); + previous_ProcessUtility_hook = ProcessUtility_hook; + ProcessUtility_hook = ya_process_utility_hook; } void hooks_deinit() { @@ -104,6 +111,7 @@ void hooks_deinit() { delete sender; } YagpStat::deinit(); + ProcessUtility_hook = previous_ProcessUtility_hook; } void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { @@ -165,7 +173,8 @@ void ya_ExecutorEnd_hook(QueryDesc *query_desc) { } void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { - cpp_call(get_sender(), &EventSender::query_metrics_collect, status, arg); + cpp_call(get_sender(), &EventSender::query_metrics_collect, status, + arg /* queryDesc */, false /* utility */, (ErrorData *)NULL); if (previous_query_info_collect_hook) { (*previous_query_info_collect_hook)(status, arg); } @@ -189,6 +198,55 @@ void ya_analyze_stats_collect_hook(QueryDesc *query_desc) { } #endif +static void ya_process_utility_hook(Node *parsetree, const char *queryString, + ProcessUtilityContext context, + ParamListInfo params, DestReceiver *dest, + char *completionTag) { + /* Project utility data on QueryDesc to use existing logic */ + QueryDesc *query_desc = (QueryDesc *)palloc0(sizeof(QueryDesc)); + query_desc->sourceText = queryString; + + cpp_call(get_sender(), &EventSender::query_metrics_collect, + METRICS_QUERY_SUBMIT, (void *)query_desc, true /* utility */, + (ErrorData *)NULL); + + get_sender()->incr_depth(); + PG_TRY(); + { + if (previous_ProcessUtility_hook) { + (*previous_ProcessUtility_hook)(parsetree, queryString, context, params, + dest, completionTag); + } else { + standard_ProcessUtility(parsetree, queryString, context, params, dest, + completionTag); + } + + get_sender()->decr_depth(); + cpp_call(get_sender(), &EventSender::query_metrics_collect, METRICS_QUERY_DONE, + (void *)query_desc, true /* utility */, (ErrorData *)NULL); + + pfree(query_desc); + } + PG_CATCH(); + { + ErrorData *edata; + MemoryContext oldctx; + + oldctx = MemoryContextSwitchTo(TopMemoryContext); + edata = CopyErrorData(); + FlushErrorState(); + MemoryContextSwitchTo(oldctx); + + get_sender()->decr_depth(); + cpp_call(get_sender(), &EventSender::query_metrics_collect, METRICS_QUERY_ERROR, + (void *)query_desc, true /* utility */, edata); + + pfree(query_desc); + ReThrowError(edata); + } + PG_END_TRY(); +} + static void check_stats_loaded() { if (!YagpStat::loaded()) { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), diff --git a/src/hook_wrappers.h b/src/hook_wrappers.h index c158f42cf1d..cfabf39485e 100644 --- a/src/hook_wrappers.h +++ b/src/hook_wrappers.h @@ -9,6 +9,9 @@ extern void hooks_deinit(); extern void yagp_functions_reset(); extern Datum yagp_functions_get(FunctionCallInfo fcinfo); +extern void init_log(); +extern void truncate_log(); + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/src/log/LogOps.cpp b/src/log/LogOps.cpp new file mode 100644 index 00000000000..0868dd9fc1c --- /dev/null +++ b/src/log/LogOps.cpp @@ -0,0 +1,131 @@ +#include "protos/yagpcc_set_service.pb.h" + +#include "LogOps.h" +#include "LogSchema.h" + +extern "C" { +#include "postgres.h" + +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/xact.h" +#include "catalog/dependency.h" +#include "catalog/heap.h" +#include "catalog/namespace.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_type.h" +#include "cdb/cdbvars.h" +#include "commands/tablecmds.h" +#include "funcapi.h" +#include "fmgr.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" +#include "utils/timestamp.h" +} + +void init_log() { + Oid namespaceId; + Oid relationId; + ObjectAddress tableAddr; + ObjectAddress schemaAddr; + + namespaceId = get_namespace_oid(schema_name.data(), false /* missing_ok */); + + /* Create table */ + relationId = heap_create_with_catalog( + log_relname.data() /* relname */, namespaceId /* namespace */, + 0 /* tablespace */, InvalidOid /* relid */, InvalidOid /* reltype oid */, + InvalidOid /* reloftypeid */, GetUserId() /* owner */, + DescribeTuple() /* rel tuple */, NIL, InvalidOid /* relam */, + RELKIND_RELATION, RELPERSISTENCE_PERMANENT, RELSTORAGE_HEAP, false, false, + true, 0, ONCOMMIT_NOOP, NULL /* GP Policy */, (Datum)0, + false /* use_user_acl */, true, true, false /* valid_opts */, + false /* is_part_child */, false /* is part parent */, NULL); + + /* Make the table visible */ + CommandCounterIncrement(); + + /* Record dependency of the table on the schema */ + if (OidIsValid(relationId) && OidIsValid(namespaceId)) { + ObjectAddressSet(tableAddr, RelationRelationId, relationId); + ObjectAddressSet(schemaAddr, NamespaceRelationId, namespaceId); + + /* Table can be dropped only via DROP EXTENSION */ + recordDependencyOn(&tableAddr, &schemaAddr, DEPENDENCY_EXTENSION); + } else { + ereport(NOTICE, (errmsg("YAGPCC failed to create log table or schema"))); + } + + /* Make changes visible */ + CommandCounterIncrement(); +} + +void insert_log(const yagpcc::SetQueryReq &req, bool utility) { + Oid namespaceId; + Oid relationId; + Relation rel; + HeapTuple tuple; + + /* Return if xact is not valid (needed for catalog lookups). */ + if (!IsTransactionState()) { + return; + } + + /* Return if extension was not loaded */ + namespaceId = get_namespace_oid(schema_name.data(), true /* missing_ok */); + if (!OidIsValid(namespaceId)) { + return; + } + + /* Return if the table was not created yet */ + relationId = get_relname_relid(log_relname.data(), namespaceId); + if (!OidIsValid(relationId)) { + return; + } + + bool nulls[natts_yagp_log]; + Datum values[natts_yagp_log]; + + memset(nulls, true, sizeof(nulls)); + memset(values, 0, sizeof(values)); + + extract_query_req(req, "", values, nulls); + nulls[attnum_yagp_log_utility] = false; + values[attnum_yagp_log_utility] = BoolGetDatum(utility); + + rel = heap_open(relationId, RowExclusiveLock); + + /* Insert the tuple as a frozen one to ensure it is logged even if txn rolls + * back or aborts */ + tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls); + frozen_heap_insert(rel, tuple); + + heap_freetuple(tuple); + /* Keep lock on rel until end of xact */ + heap_close(rel, NoLock); + + /* Make changes visible */ + CommandCounterIncrement(); +} + +void truncate_log() { + Oid namespaceId; + Oid relationId; + Relation relation; + + namespaceId = get_namespace_oid(schema_name.data(), false /* missing_ok */); + relationId = get_relname_relid(log_relname.data(), namespaceId); + + relation = heap_open(relationId, AccessExclusiveLock); + + /* Truncate the main table */ + heap_truncate_one_rel(relation); + + /* Keep lock on rel until end of xact */ + heap_close(relation, NoLock); + + /* Make changes visible */ + CommandCounterIncrement(); +} \ No newline at end of file diff --git a/src/log/LogOps.h b/src/log/LogOps.h new file mode 100644 index 00000000000..bad03d09a8f --- /dev/null +++ b/src/log/LogOps.h @@ -0,0 +1,19 @@ +#pragma once + +#include + +extern "C" { +#include "postgres.h" +#include "fmgr.h" +} + +extern "C" { +/* CREATE TABLE yagpcc.__log (...); */ +void init_log(); + +/* TRUNCATE yagpcc.__log */ +void truncate_log(); +} + +/* INSERT INTO yagpcc.__log VALUES (...) */ +void insert_log(const yagpcc::SetQueryReq &req, bool utility); diff --git a/src/log/LogSchema.cpp b/src/log/LogSchema.cpp new file mode 100644 index 00000000000..335a3103cfd --- /dev/null +++ b/src/log/LogSchema.cpp @@ -0,0 +1,135 @@ +#include "google/protobuf/reflection.h" +#include "google/protobuf/descriptor.h" +#include "google/protobuf/timestamp.pb.h" + +#include "LogSchema.h" + +const std::unordered_map &proto_name_to_col_idx() { + static const auto name_col_idx = [] { + std::unordered_map map; + map.reserve(log_tbl_desc.size()); + + for (size_t idx = 0; idx < natts_yagp_log; ++idx) { + map.emplace(log_tbl_desc[idx].proto_field_name, idx); + } + + return map; + }(); + return name_col_idx; +} + +TupleDesc DescribeTuple() { + TupleDesc tupdesc = CreateTemplateTupleDesc(natts_yagp_log, false); + + for (size_t anum = 1; anum <= natts_yagp_log; ++anum) { + TupleDescInitEntry(tupdesc, anum, log_tbl_desc[anum - 1].pg_att_name.data(), + log_tbl_desc[anum - 1].type_oid, -1 /* typmod */, + 0 /* attdim */); + } + + return tupdesc; +} + +Datum protots_to_timestamptz(const google::protobuf::Timestamp &ts) { + TimestampTz pgtimestamp = + (TimestampTz)ts.seconds() * USECS_PER_SEC + (ts.nanos() / 1000); + pgtimestamp -= (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * USECS_PER_DAY; + return TimestampTzGetDatum(pgtimestamp); +} + +Datum field_to_datum(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg) { + using namespace google::protobuf; + + switch (field->cpp_type()) { + case FieldDescriptor::CPPTYPE_INT32: + return Int32GetDatum(reflection->GetInt32(msg, field)); + case FieldDescriptor::CPPTYPE_INT64: + return Int64GetDatum(reflection->GetInt64(msg, field)); + case FieldDescriptor::CPPTYPE_UINT32: + return Int64GetDatum(reflection->GetUInt32(msg, field)); + case FieldDescriptor::CPPTYPE_UINT64: + return Int64GetDatum( + static_cast(reflection->GetUInt64(msg, field))); + case FieldDescriptor::CPPTYPE_DOUBLE: + return Float8GetDatum(reflection->GetDouble(msg, field)); + case FieldDescriptor::CPPTYPE_FLOAT: + return Float4GetDatum(reflection->GetFloat(msg, field)); + case FieldDescriptor::CPPTYPE_BOOL: + return BoolGetDatum(reflection->GetBool(msg, field)); + case FieldDescriptor::CPPTYPE_ENUM: + return CStringGetTextDatum(reflection->GetEnum(msg, field)->name().data()); + case FieldDescriptor::CPPTYPE_STRING: + return CStringGetTextDatum(reflection->GetString(msg, field).c_str()); + default: + return (Datum)0; + } +} + +void process_field(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg, + const std::string &field_name, Datum *values, bool *nulls) { + + auto proto_idx_map = proto_name_to_col_idx(); + auto it = proto_idx_map.find(field_name); + + if (it == proto_idx_map.end()) { + ereport(NOTICE, + (errmsg("YAGPCC protobuf field %s is not registered in log table", + field_name.c_str()))); + return; + } + + int idx = it->second; + + if (!reflection->HasField(msg, field)) { + nulls[idx] = true; + return; + } + + if (field->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE && + field->message_type()->full_name() == "google.protobuf.Timestamp") { + const auto &ts = static_cast( + reflection->GetMessage(msg, field)); + values[idx] = protots_to_timestamptz(ts); + } else { + values[idx] = field_to_datum(field, reflection, msg); + } + nulls[idx] = false; + + return; +} + +void extract_query_req(const google::protobuf::Message &msg, + const std::string &prefix, Datum *values, bool *nulls) { + using namespace google::protobuf; + + const Descriptor *descriptor = msg.GetDescriptor(); + const Reflection *reflection = msg.GetReflection(); + + for (int i = 0; i < descriptor->field_count(); ++i) { + const FieldDescriptor *field = descriptor->field(i); + + // For now, we do not log any repeated fields plus they need special + // treatment. + if (field->is_repeated()) { + continue; + } + + std::string curr_pref = prefix.empty() ? "" : prefix + "."; + std::string field_name = curr_pref + field->name().data(); + + if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE && + field->message_type()->full_name() != "google.protobuf.Timestamp") { + + if (reflection->HasField(msg, field)) { + const Message &nested = reflection->GetMessage(msg, field); + extract_query_req(nested, field_name, values, nulls); + } + } else { + process_field(field, reflection, msg, field_name, values, nulls); + } + } +} diff --git a/src/log/LogSchema.h b/src/log/LogSchema.h new file mode 100644 index 00000000000..f713c1e9b0e --- /dev/null +++ b/src/log/LogSchema.h @@ -0,0 +1,166 @@ +#pragma once + +#include +#include +#include +#include + +extern "C" { +#include "postgres.h" +#include "access/htup_details.h" +#include "access/tupdesc.h" +#include "catalog/pg_type.h" +#include "utils/timestamp.h" +#include "utils/builtins.h" +} + +namespace google { +namespace protobuf { +class FieldDescriptor; +class Message; +class Reflection; +class Timestamp; +} // namespace protobuf +} // namespace google + +inline constexpr std::string_view schema_name = "yagpcc"; +inline constexpr std::string_view log_relname = "__log"; + +struct LogDesc { + std::string_view pg_att_name; + std::string_view proto_field_name; + Oid type_oid; +}; + +/* + * Definition of the log table structure. + * + * System stats collected as %lu (unsigned) may + * overflow INT8OID (signed), but this is acceptable. + */ +/* clang-format off */ +inline constexpr std::array log_tbl_desc = { + /* 8-byte aligned types first - Query Info */ + LogDesc{"query_id", "query_info.query_id", INT8OID}, + LogDesc{"plan_id", "query_info.plan_id", INT8OID}, + LogDesc{"nested_level", "add_info.nested_level", INT8OID}, + LogDesc{"slice_id", "add_info.slice_id", INT8OID}, + /* 8-byte aligned types - System Stats */ + LogDesc{"systemstat_vsize", "query_metrics.systemStat.vsize", INT8OID}, + LogDesc{"systemstat_rss", "query_metrics.systemStat.rss", INT8OID}, + LogDesc{"systemstat_vmsizekb", "query_metrics.systemStat.VmSizeKb", INT8OID}, + LogDesc{"systemstat_vmpeakkb", "query_metrics.systemStat.VmPeakKb", INT8OID}, + LogDesc{"systemstat_rchar", "query_metrics.systemStat.rchar", INT8OID}, + LogDesc{"systemstat_wchar", "query_metrics.systemStat.wchar", INT8OID}, + LogDesc{"systemstat_syscr", "query_metrics.systemStat.syscr", INT8OID}, + LogDesc{"systemstat_syscw", "query_metrics.systemStat.syscw", INT8OID}, + LogDesc{"systemstat_read_bytes", "query_metrics.systemStat.read_bytes", INT8OID}, + LogDesc{"systemstat_write_bytes", "query_metrics.systemStat.write_bytes", INT8OID}, + LogDesc{"systemstat_cancelled_write_bytes", "query_metrics.systemStat.cancelled_write_bytes", INT8OID}, + /* 8-byte aligned types - Metric Instrumentation */ + LogDesc{"instrumentation_ntuples", "query_metrics.instrumentation.ntuples", INT8OID}, + LogDesc{"instrumentation_nloops", "query_metrics.instrumentation.nloops", INT8OID}, + LogDesc{"instrumentation_tuplecount", "query_metrics.instrumentation.tuplecount", INT8OID}, + LogDesc{"instrumentation_shared_blks_hit", "query_metrics.instrumentation.shared_blks_hit", INT8OID}, + LogDesc{"instrumentation_shared_blks_read", "query_metrics.instrumentation.shared_blks_read", INT8OID}, + LogDesc{"instrumentation_shared_blks_dirtied", "query_metrics.instrumentation.shared_blks_dirtied", INT8OID}, + LogDesc{"instrumentation_shared_blks_written", "query_metrics.instrumentation.shared_blks_written", INT8OID}, + LogDesc{"instrumentation_local_blks_hit", "query_metrics.instrumentation.local_blks_hit", INT8OID}, + LogDesc{"instrumentation_local_blks_read", "query_metrics.instrumentation.local_blks_read", INT8OID}, + LogDesc{"instrumentation_local_blks_dirtied", "query_metrics.instrumentation.local_blks_dirtied", INT8OID}, + LogDesc{"instrumentation_local_blks_written", "query_metrics.instrumentation.local_blks_written", INT8OID}, + LogDesc{"instrumentation_temp_blks_read", "query_metrics.instrumentation.temp_blks_read", INT8OID}, + LogDesc{"instrumentation_temp_blks_written", "query_metrics.instrumentation.temp_blks_written", INT8OID}, + LogDesc{"instrumentation_inherited_calls", "query_metrics.instrumentation.inherited_calls", INT8OID}, + /* 8-byte aligned types - Network Stats */ + LogDesc{"instrumentation_sent_total_bytes", "query_metrics.instrumentation.sent.total_bytes", INT8OID}, + LogDesc{"instrumentation_sent_tuple_bytes", "query_metrics.instrumentation.sent.tuple_bytes", INT8OID}, + LogDesc{"instrumentation_sent_chunks", "query_metrics.instrumentation.sent.chunks", INT8OID}, + LogDesc{"instrumentation_received_total_bytes", "query_metrics.instrumentation.received.total_bytes", INT8OID}, + LogDesc{"instrumentation_received_tuple_bytes", "query_metrics.instrumentation.received.tuple_bytes", INT8OID}, + LogDesc{"instrumentation_received_chunks", "query_metrics.instrumentation.received.chunks", INT8OID}, + /* 8-byte aligned types - Interconnect Stats and spilled bytes */ + LogDesc{"interconnect_total_recv_queue_size", "query_metrics.instrumentation.interconnect.total_recv_queue_size", INT8OID}, + LogDesc{"interconnect_recv_queue_size_counting_time", "query_metrics.instrumentation.interconnect.recv_queue_size_counting_time", INT8OID}, + LogDesc{"interconnect_total_capacity", "query_metrics.instrumentation.interconnect.total_capacity", INT8OID}, + LogDesc{"interconnect_capacity_counting_time", "query_metrics.instrumentation.interconnect.capacity_counting_time", INT8OID}, + LogDesc{"interconnect_total_buffers", "query_metrics.instrumentation.interconnect.total_buffers", INT8OID}, + LogDesc{"interconnect_buffer_counting_time", "query_metrics.instrumentation.interconnect.buffer_counting_time", INT8OID}, + LogDesc{"interconnect_active_connections_num", "query_metrics.instrumentation.interconnect.active_connections_num", INT8OID}, + LogDesc{"interconnect_retransmits", "query_metrics.instrumentation.interconnect.retransmits", INT8OID}, + LogDesc{"interconnect_startup_cached_pkt_num", "query_metrics.instrumentation.interconnect.startup_cached_pkt_num", INT8OID}, + LogDesc{"interconnect_mismatch_num", "query_metrics.instrumentation.interconnect.mismatch_num", INT8OID}, + LogDesc{"interconnect_crc_errors", "query_metrics.instrumentation.interconnect.crc_errors", INT8OID}, + LogDesc{"interconnect_snd_pkt_num", "query_metrics.instrumentation.interconnect.snd_pkt_num", INT8OID}, + LogDesc{"interconnect_recv_pkt_num", "query_metrics.instrumentation.interconnect.recv_pkt_num", INT8OID}, + LogDesc{"interconnect_disordered_pkt_num", "query_metrics.instrumentation.interconnect.disordered_pkt_num", INT8OID}, + LogDesc{"interconnect_duplicated_pkt_num", "query_metrics.instrumentation.interconnect.duplicated_pkt_num", INT8OID}, + LogDesc{"interconnect_recv_ack_num", "query_metrics.instrumentation.interconnect.recv_ack_num", INT8OID}, + LogDesc{"interconnect_status_query_msg_num", "query_metrics.instrumentation.interconnect.status_query_msg_num", INT8OID}, + LogDesc{"spill_totalbytes", "query_metrics.spill.totalBytes", INT8OID}, + /* 8-byte aligned types - Float and Timestamp */ + LogDesc{"systemstat_runningtimeseconds", "query_metrics.systemStat.runningTimeSeconds", FLOAT8OID}, + LogDesc{"systemstat_usertimeseconds", "query_metrics.systemStat.userTimeSeconds", FLOAT8OID}, + LogDesc{"systemstat_kerneltimeseconds", "query_metrics.systemStat.kernelTimeSeconds", FLOAT8OID}, + LogDesc{"instrumentation_firsttuple", "query_metrics.instrumentation.firsttuple", FLOAT8OID}, + LogDesc{"instrumentation_startup", "query_metrics.instrumentation.startup", FLOAT8OID}, + LogDesc{"instrumentation_total", "query_metrics.instrumentation.total", FLOAT8OID}, + LogDesc{"instrumentation_blk_read_time", "query_metrics.instrumentation.blk_read_time", FLOAT8OID}, + LogDesc{"instrumentation_blk_write_time", "query_metrics.instrumentation.blk_write_time", FLOAT8OID}, + LogDesc{"instrumentation_startup_time", "query_metrics.instrumentation.startup_time", FLOAT8OID}, + LogDesc{"instrumentation_inherited_time", "query_metrics.instrumentation.inherited_time", FLOAT8OID}, + LogDesc{"datetime", "datetime", TIMESTAMPTZOID}, + LogDesc{"submit_time", "submit_time", TIMESTAMPTZOID}, + LogDesc{"start_time", "start_time", TIMESTAMPTZOID}, + LogDesc{"end_time", "end_time", TIMESTAMPTZOID}, + /* 4-byte aligned types - Query Key */ + LogDesc{"tmid", "query_key.tmid", INT4OID}, + LogDesc{"ssid", "query_key.ssid", INT4OID}, + LogDesc{"ccnt", "query_key.ccnt", INT4OID}, + /* 4-byte aligned types - Segment Key */ + LogDesc{"dbid", "segment_key.dbid", INT4OID}, + LogDesc{"segid", "segment_key.segindex", INT4OID}, + LogDesc{"spill_filecount", "query_metrics.spill.fileCount", INT4OID}, + /* Variable-length types - Query Info */ + LogDesc{"generator", "query_info.generator", TEXTOID}, + LogDesc{"query_text", "query_info.query_text", TEXTOID}, + LogDesc{"plan_text", "query_info.plan_text", TEXTOID}, + LogDesc{"template_query_text", "query_info.template_query_text", TEXTOID}, + LogDesc{"template_plan_text", "query_info.template_plan_text", TEXTOID}, + LogDesc{"user_name", "query_info.userName", TEXTOID}, + LogDesc{"database_name", "query_info.databaseName", TEXTOID}, + LogDesc{"rsgname", "query_info.rsgname", TEXTOID}, + LogDesc{"analyze_text", "query_info.analyze_text", TEXTOID}, + LogDesc{"error_message", "add_info.error_message", TEXTOID}, + LogDesc{"query_status", "query_status", TEXTOID}, + /* Extra field */ + LogDesc{"utility", "", BOOLOID}, +}; +/* clang-format on */ + +inline constexpr size_t natts_yagp_log = log_tbl_desc.size(); +inline constexpr size_t attnum_yagp_log_utility = natts_yagp_log - 1; + +const std::unordered_map &proto_name_to_col_idx(); + +TupleDesc DescribeTuple(); + +Datum protots_to_timestamptz(const google::protobuf::Timestamp &ts); + +Datum field_to_datum(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg); + +/* Process a single proto field and store in values/nulls arrays */ +void process_field(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg, + const std::string &field_name, Datum *values, bool *nulls); + +/* + * Extracts values from msg into values/nulls arrays. Caller must + * pre-init nulls[] to true (this function does net set nulls + * to true for nested messages if parent message is missing). + */ +void extract_query_req(const google::protobuf::Message &msg, + const std::string &prefix, Datum *values, bool *nulls); diff --git a/src/memory/gpdbwrappers.cpp b/src/memory/gpdbwrappers.cpp index 9d579a91a30..0824a3a6808 100644 --- a/src/memory/gpdbwrappers.cpp +++ b/src/memory/gpdbwrappers.cpp @@ -1,4 +1,5 @@ #include "gpdbwrappers.h" +#include "log/LogOps.h" extern "C" { #include "postgres.h" @@ -126,8 +127,8 @@ ExplainState ya_gpdb::get_explain_state(QueryDesc *query_desc, }); } -ExplainState ya_gpdb::get_analyze_state_json(QueryDesc *query_desc, - bool analyze) noexcept { +ExplainState ya_gpdb::get_analyze_state(QueryDesc *query_desc, + bool analyze) noexcept { return wrap_noexcept([&]() { ExplainState es; ExplainInitState(&es); @@ -136,7 +137,7 @@ ExplainState ya_gpdb::get_analyze_state_json(QueryDesc *query_desc, es.buffers = es.analyze; es.timing = es.analyze; es.summary = es.analyze; - es.format = EXPLAIN_FORMAT_JSON; + es.format = EXPLAIN_FORMAT_TEXT; ExplainBeginOutput(&es); if (analyze) { ExplainPrintPlan(&es, query_desc); @@ -220,4 +221,8 @@ char *ya_gpdb::get_rg_name_for_id(Oid group_id) { Oid ya_gpdb::get_rg_id_by_session_id(int session_id) { return wrap_throw(ResGroupGetGroupIdBySessionId, session_id); -} \ No newline at end of file +} + +void ya_gpdb::insert_log(const yagpcc::SetQueryReq &req, bool utility) { + return wrap_throw(::insert_log, req, utility); +} diff --git a/src/memory/gpdbwrappers.h b/src/memory/gpdbwrappers.h index ad7ae96c362..8f5f146cc67 100644 --- a/src/memory/gpdbwrappers.h +++ b/src/memory/gpdbwrappers.h @@ -16,6 +16,10 @@ extern "C" { #include #include +namespace yagpcc { +class SetQueryReq; +} // namespace yagpcc + namespace ya_gpdb { // Functions that call palloc(). @@ -27,8 +31,7 @@ char *get_database_name(Oid dbid) noexcept; bool split_identifier_string(char *rawstring, char separator, List **namelist) noexcept; ExplainState get_explain_state(QueryDesc *query_desc, bool costs) noexcept; -ExplainState get_analyze_state_json(QueryDesc *query_desc, - bool analyze) noexcept; +ExplainState get_analyze_state(QueryDesc *query_desc, bool analyze) noexcept; Instrumentation *instr_alloc(size_t n, int instrument_options); HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull); @@ -38,6 +41,7 @@ void instr_end_loop(Instrumentation *instr); char *gen_normquery(const char *query); StringInfo gen_normplan(const char *executionPlan); char *get_rg_name_for_id(Oid group_id); +void insert_log(const yagpcc::SetQueryReq &req, bool utility); // Palloc-free functions. void pfree(void *pointer) noexcept; diff --git a/src/yagp_hooks_collector.c b/src/yagp_hooks_collector.c index 2a9e7328e6d..9db73638b24 100644 --- a/src/yagp_hooks_collector.c +++ b/src/yagp_hooks_collector.c @@ -10,6 +10,8 @@ void _PG_init(void); void _PG_fini(void); PG_FUNCTION_INFO_V1(yagp_stat_messages_reset); PG_FUNCTION_INFO_V1(yagp_stat_messages); +PG_FUNCTION_INFO_V1(yagp_init_log); +PG_FUNCTION_INFO_V1(yagp_truncate_log); void _PG_init(void) { if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { @@ -30,4 +32,14 @@ Datum yagp_stat_messages_reset(PG_FUNCTION_ARGS) { Datum yagp_stat_messages(PG_FUNCTION_ARGS) { return yagp_functions_get(fcinfo); -} \ No newline at end of file +} + +Datum yagp_init_log(PG_FUNCTION_ARGS) { + init_log(); + PG_RETURN_VOID(); +} + +Datum yagp_truncate_log(PG_FUNCTION_ARGS) { + truncate_log(); + PG_RETURN_VOID(); +} diff --git a/yagp_hooks_collector--1.0--1.1.sql b/yagp_hooks_collector--1.0--1.1.sql new file mode 100644 index 00000000000..959d4f235d1 --- /dev/null +++ b/yagp_hooks_collector--1.0--1.1.sql @@ -0,0 +1,113 @@ +/* yagp_hooks_collector--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION yagp_hooks_collector UPDATE TO '1.1'" to load this file. \quit + +CREATE SCHEMA yagpcc; + +-- Unlink existing objects from extension. +ALTER EXTENSION yagp_hooks_collector DROP VIEW yagp_stat_messages; +ALTER EXTENSION yagp_hooks_collector DROP FUNCTION yagp_stat_messages_reset(); +ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_f_on_segments(); +ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_f_on_master(); +ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_reset_f_on_segments(); +ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_reset_f_on_master(); + +-- Now drop the objects. +DROP VIEW yagp_stat_messages; +DROP FUNCTION yagp_stat_messages_reset(); +DROP FUNCTION __yagp_stat_messages_f_on_segments(); +DROP FUNCTION __yagp_stat_messages_f_on_master(); +DROP FUNCTION __yagp_stat_messages_reset_f_on_segments(); +DROP FUNCTION __yagp_stat_messages_reset_f_on_master(); + +-- Recreate functions and view in new schema. +CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_master() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_segments() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION yagpcc.stat_messages_reset() +RETURNS void +AS +$$ + SELECT yagpcc.__stat_messages_reset_f_on_master(); + SELECT yagpcc.__stat_messages_reset_f_on_segments(); +$$ +LANGUAGE SQL EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_f_on_master() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_f_on_segments() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE VIEW yagpcc.stat_messages AS + SELECT C.* + FROM yagpcc.__stat_messages_f_on_master() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) + UNION ALL + SELECT C.* + FROM yagpcc.__stat_messages_f_on_segments() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) +ORDER BY segid; + +-- Create new objects. +CREATE FUNCTION yagpcc.__init_log_on_master() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__init_log_on_segments() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +-- Creates log table inside yagpcc schema. +SELECT yagpcc.__init_log_on_master(); +SELECT yagpcc.__init_log_on_segments(); + +CREATE VIEW yagpcc.log AS + SELECT * FROM yagpcc.__log -- master + UNION ALL + SELECT * FROM gp_dist_random('yagpcc.__log') -- segments + ORDER BY tmid, ssid, ccnt; + +CREATE FUNCTION yagpcc.__truncate_log_on_master() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__truncate_log_on_segments() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION yagpcc.truncate_log() +RETURNS void AS $$ +BEGIN + PERFORM yagpcc.__truncate_log_on_master(); + PERFORM yagpcc.__truncate_log_on_segments(); +END; +$$ LANGUAGE plpgsql VOLATILE; diff --git a/sql/yagp_hooks_collector--1.0.sql b/yagp_hooks_collector--1.0.sql similarity index 99% rename from sql/yagp_hooks_collector--1.0.sql rename to yagp_hooks_collector--1.0.sql index 88bbe4e0dc7..7ab4e1b2fb7 100644 --- a/sql/yagp_hooks_collector--1.0.sql +++ b/yagp_hooks_collector--1.0.sql @@ -15,7 +15,7 @@ LANGUAGE C EXECUTE ON ALL SEGMENTS; CREATE FUNCTION yagp_stat_messages_reset() RETURNS void -AS +AS $$ SELECT __yagp_stat_messages_reset_f_on_master(); SELECT __yagp_stat_messages_reset_f_on_segments(); diff --git a/yagp_hooks_collector--1.1.sql b/yagp_hooks_collector--1.1.sql new file mode 100644 index 00000000000..657720a88f2 --- /dev/null +++ b/yagp_hooks_collector--1.1.sql @@ -0,0 +1,95 @@ +/* yagp_hooks_collector--1.1.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION yagp_hooks_collector" to load this file. \quit + +CREATE SCHEMA yagpcc; + +CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_master() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_segments() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' +LANGUAGE C EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION yagpcc.stat_messages_reset() +RETURNS void +AS +$$ + SELECT yagpcc.__stat_messages_reset_f_on_master(); + SELECT yagpcc.__stat_messages_reset_f_on_segments(); +$$ +LANGUAGE SQL EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_f_on_master() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__stat_messages_f_on_segments() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'yagp_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE VIEW yagpcc.stat_messages AS + SELECT C.* + FROM yagpcc.__stat_messages_f_on_master() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) + UNION ALL + SELECT C.* + FROM yagpcc.__stat_messages_f_on_segments() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) +ORDER BY segid; + +CREATE FUNCTION yagpcc.__init_log_on_master() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__init_log_on_segments() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +-- Creates log table inside yagpcc schema. +SELECT yagpcc.__init_log_on_master(); +SELECT yagpcc.__init_log_on_segments(); + +CREATE VIEW yagpcc.log AS + SELECT * FROM yagpcc.__log -- master + UNION ALL + SELECT * FROM gp_dist_random('yagpcc.__log') -- segments +ORDER BY tmid, ssid, ccnt; + +CREATE FUNCTION yagpcc.__truncate_log_on_master() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__truncate_log_on_segments() +RETURNS void +AS 'MODULE_PATHNAME', 'yagp_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION yagpcc.truncate_log() +RETURNS void AS $$ +BEGIN + PERFORM yagpcc.__truncate_log_on_master(); + PERFORM yagpcc.__truncate_log_on_segments(); +END; +$$ LANGUAGE plpgsql VOLATILE; diff --git a/yagp_hooks_collector.control b/yagp_hooks_collector.control index b5539dd6462..cb5906a1302 100644 --- a/yagp_hooks_collector.control +++ b/yagp_hooks_collector.control @@ -1,5 +1,5 @@ # yagp_hooks_collector extension comment = 'Intercept query and plan execution hooks and report them to Yandex GPCC agents' -default_version = '1.0' +default_version = '1.1' module_pathname = '$libdir/yagp_hooks_collector' superuser = true From 47d0d9cc85b3e3cc8a5458ab6786d8b507821a4d Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 19 Jan 2026 10:17:05 +0300 Subject: [PATCH 35/49] [yagp_hooks_collector] Port backend infrastructure and adapt for Cloudberry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port GpscQueryKey to QueryDesc and workfile spill counters to workfile_mgr.c from gpdb. Update for Cloudberry API changes: ExplainInitState→NewExplainState, gpmon_gettmid→gp_gettmid, Gp_session_role→Gp_role, signature changes in standard_ExecutorRun, standard_ProcessUtility, InstrAlloc, CreateTemplateTupleDesc. Change test functions to SRF. Remove redundant jumbling copies. --- expected/yagp_cursors.out | 12 +- expected/yagp_dist.out | 12 +- expected/yagp_select.out | 12 +- expected/yagp_utility.out | 52 +- src/EventSender.cpp | 2 +- src/EventSender.h | 4 +- src/PgUtils.cpp | 2 +- src/ProtoUtils.cpp | 12 +- src/UDSConnector.cpp | 10 +- src/backend/tcop/pquery.c | 3 + .../utils/workfile_manager/workfile_mgr.c | 24 + src/hook_wrappers.cpp | 34 +- src/include/executor/execdesc.h | 11 + src/include/utils/workfile_mgr.h | 4 + src/log/LogOps.cpp | 12 +- src/log/LogSchema.cpp | 2 +- src/memory/gpdbwrappers.cpp | 48 +- src/memory/gpdbwrappers.h | 2 +- .../pg_stat_statements_ya_parser.c | 760 +----------------- src/yagp_hooks_collector.c | 34 +- yagp_hooks_collector--1.0--1.1.sql | 16 +- yagp_hooks_collector--1.0.sql | 6 +- yagp_hooks_collector--1.1.sql | 16 +- 23 files changed, 217 insertions(+), 873 deletions(-) diff --git a/expected/yagp_cursors.out b/expected/yagp_cursors.out index 9587c00b550..d251ddd3e1c 100644 --- a/expected/yagp_cursors.out +++ b/expected/yagp_cursors.out @@ -40,8 +40,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- DECLARE WITH HOLD SET yagpcc.logging_mode to 'TBL'; @@ -74,8 +73,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- ROLLBACK SET yagpcc.logging_mode to 'TBL'; @@ -105,8 +103,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- FETCH SET yagpcc.logging_mode to 'TBL'; @@ -155,8 +152,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) DROP FUNCTION yagp_status_order(text); DROP EXTENSION yagp_hooks_collector; diff --git a/expected/yagp_dist.out b/expected/yagp_dist.out index ebaf839601d..5fd5ea5fb3e 100644 --- a/expected/yagp_dist.out +++ b/expected/yagp_dist.out @@ -46,8 +46,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) SET yagpcc.logging_mode to 'TBL'; -- Scan all segments. @@ -83,8 +82,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Replicated table CREATE FUNCTION force_segments() RETURNS SETOF text AS $$ @@ -128,8 +126,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Partially distributed table (2 numsegments) SET allow_system_table_mods = ON; @@ -167,8 +164,7 @@ SELECT query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_statu SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) DROP FUNCTION yagp_status_order(text); DROP EXTENSION yagp_hooks_collector; diff --git a/expected/yagp_select.out b/expected/yagp_select.out index 4c4a0218150..b6e18dc862f 100644 --- a/expected/yagp_select.out +++ b/expected/yagp_select.out @@ -46,8 +46,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Transaction test SET yagpcc.logging_mode to 'TBL'; @@ -72,8 +71,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- CTE test SET yagpcc.logging_mode to 'TBL'; @@ -102,8 +100,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Prepared statement test SET yagpcc.logging_mode to 'TBL'; @@ -128,8 +125,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) DROP FUNCTION yagp_status_order(text); DROP EXTENSION yagp_hooks_collector; diff --git a/expected/yagp_utility.out b/expected/yagp_utility.out index 03c17713575..057f7d7a556 100644 --- a/expected/yagp_utility.out +++ b/expected/yagp_utility.out @@ -17,7 +17,7 @@ SET yagpcc.enable_utility TO TRUE; SET yagpcc.report_nested_queries TO TRUE; SET yagpcc.logging_mode to 'TBL'; CREATE TABLE test_table (a int, b text); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE INDEX test_idx ON test_table(a); ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; @@ -41,8 +41,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Partitioning SET yagpcc.logging_mode to 'TBL'; @@ -50,34 +49,16 @@ CREATE TABLE pt_test (a int, b int) DISTRIBUTED BY (a) PARTITION BY RANGE (a) (START (0) END (100) EVERY (50)); -NOTICE: CREATE TABLE will create partition "pt_test_1_prt_1" for table "pt_test" -NOTICE: CREATE TABLE will create partition "pt_test_1_prt_2" for table "pt_test" DROP TABLE pt_test; RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT | DISTRIBUTED BY (a) +| | PARTITION BY RANGE (a) +| | (START (0) END (100) EVERY (50)); | - -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT - | DISTRIBUTED BY (a) +| - | PARTITION BY RANGE (a) +| - | (START (0) END (100) EVERY (50)); | - -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT - | DISTRIBUTED BY (a) +| - | PARTITION BY RANGE (a) +| - | (START (0) END (100) EVERY (50)); | - -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE - | DISTRIBUTED BY (a) +| - | PARTITION BY RANGE (a) +| - | (START (0) END (100) EVERY (50)); | - -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE - | DISTRIBUTED BY (a) +| - | PARTITION BY RANGE (a) +| - | (START (0) END (100) EVERY (50)); | -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE | DISTRIBUTED BY (a) +| | PARTITION BY RANGE (a) +| @@ -85,13 +66,12 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | DROP TABLE pt_test; | QUERY_STATUS_SUBMIT -1 | DROP TABLE pt_test; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT -(10 rows) +(6 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Views and Functions SET yagpcc.logging_mode to 'TBL'; @@ -118,8 +98,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Transaction Operations SET yagpcc.logging_mode to 'TBL'; @@ -159,13 +138,12 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- DML Operations SET yagpcc.logging_mode to 'TBL'; CREATE TABLE dml_test (a int, b text); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO dml_test VALUES (1, 'test'); UPDATE dml_test SET b = 'updated' WHERE a = 1; @@ -186,13 +164,12 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- COPY Operations SET yagpcc.logging_mode to 'TBL'; CREATE TABLE copy_test (a int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. COPY (SELECT 1) TO STDOUT; 1 @@ -214,8 +191,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- Prepared Statements and error during execute SET yagpcc.logging_mode to 'TBL'; @@ -240,8 +216,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) -- GUC Settings SET yagpcc.logging_mode to 'TBL'; @@ -262,8 +237,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util SELECT yagpcc.truncate_log() IS NOT NULL AS t; t --- - t -(1 row) +(0 rows) DROP FUNCTION yagp_status_order(text); DROP EXTENSION yagp_hooks_collector; diff --git a/src/EventSender.cpp b/src/EventSender.cpp index fee435a6dcc..d638d275548 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -169,7 +169,7 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { if (query_desc->totaltime == NULL) { MemoryContext oldcxt = ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - query_desc->totaltime = ya_gpdb::instr_alloc(1, INSTRUMENT_ALL); + query_desc->totaltime = ya_gpdb::instr_alloc(1, INSTRUMENT_ALL, false); ya_gpdb::mem_ctx_switch_to(oldcxt); } } diff --git a/src/EventSender.h b/src/EventSender.h index 4afdf1e14a4..6e195eeacdf 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -23,6 +23,8 @@ class SetQueryReq; #include +extern void gp_gettmid(int32 *); + struct QueryKey { int tmid; int ssid; @@ -40,7 +42,7 @@ struct QueryKey { query_desc->yagp_query_key = (YagpQueryKey *)ya_gpdb::palloc0(sizeof(YagpQueryKey)); int32 tmid; - gpmon_gettmid(&tmid); + gp_gettmid(&tmid); query_desc->yagp_query_key->tmid = tmid; query_desc->yagp_query_key->ssid = gp_session_id; query_desc->yagp_query_key->ccnt = gp_command_count; diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index fc58112bfaa..96f46429643 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -72,7 +72,7 @@ bool nesting_is_valid(QueryDesc *query_desc, int nesting_level) { } bool need_report_nested_query() { - return Config::report_nested_queries() && Gp_session_role == GP_ROLE_DISPATCH; + return Config::report_nested_queries() && Gp_role == GP_ROLE_DISPATCH; } bool filter_query(QueryDesc *query_desc) { diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index f28714da6ec..aa8632477f5 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -24,6 +24,8 @@ extern "C" { #include #include +extern void gp_gettmid(int32 *); + namespace { constexpr uint8_t UTF8_CONTINUATION_BYTE_MASK = (1 << 7) | (1 << 6); constexpr uint8_t UTF8_CONTINUATION_BYTE = (1 << 7); @@ -49,7 +51,7 @@ void set_query_key(yagpcc::QueryKey *key) { key->set_ccnt(gp_command_count); key->set_ssid(gp_session_id); int32 tmid = 0; - gpmon_gettmid(&tmid); + gp_gettmid(&tmid); key->set_tmid(tmid); } @@ -81,7 +83,7 @@ std::string trim_str_shrink_utf8(const char *str, size_t len, size_t lim) { } void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { - if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { + if (Gp_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { auto qi = req->mutable_query_info(); qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER @@ -106,7 +108,7 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { } void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { - if (Gp_session_role == GP_ROLE_DISPATCH && query_desc->sourceText) { + if (Gp_role == GP_ROLE_DISPATCH && query_desc->sourceText) { auto qi = req->mutable_query_info(); *qi->mutable_query_text() = trim_str_shrink_utf8( query_desc->sourceText, strlen(query_desc->sourceText), @@ -118,7 +120,7 @@ void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { } void clear_big_fields(yagpcc::SetQueryReq *req) { - if (Gp_session_role == GP_ROLE_DISPATCH) { + if (Gp_role == GP_ROLE_DISPATCH) { auto qi = req->mutable_query_info(); qi->clear_plan_text(); qi->clear_template_plan_text(); @@ -129,7 +131,7 @@ void clear_big_fields(yagpcc::SetQueryReq *req) { } void set_query_info(yagpcc::SetQueryReq *req) { - if (Gp_session_role == GP_ROLE_DISPATCH) { + if (Gp_role == GP_ROLE_DISPATCH) { auto qi = req->mutable_query_info(); qi->set_username(get_user_name()); if (IsTransactionState()) diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index b6af303218d..a7eaed539f7 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -19,10 +19,9 @@ extern "C" { static void inline log_tracing_failure(const yagpcc::SetQueryReq &req, const std::string &event) { - ereport(LOG, - (errmsg("Query {%d-%d-%d} %s tracing failed with error %s", - req.query_key().tmid(), req.query_key().ssid(), - req.query_key().ccnt(), event.c_str(), strerror(errno)))); + ereport(LOG, (errmsg("Query {%d-%d-%d} %s tracing failed with error %m", + req.query_key().tmid(), req.query_key().ssid(), + req.query_key().ccnt(), event.c_str()))); } bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, @@ -77,8 +76,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, // That's a very important error that should never happen, so make it // visible to an end-user and admins. ereport(WARNING, - (errmsg("Unable to create non-blocking socket connection %s", - strerror(errno)))); + (errmsg("Unable to create non-blocking socket connection %m"))); success = false; YagpStat::report_error(); } diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 532690f1d51..7c1dbc480bc 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -127,6 +127,9 @@ CreateQueryDesc(PlannedStmt *plannedstmt, if (Gp_role != GP_ROLE_EXECUTE) increment_command_count(); + /* null this field until set by YAGP Hooks collector */ + qd->yagp_query_key = NULL; + return qd; } diff --git a/src/backend/utils/workfile_manager/workfile_mgr.c b/src/backend/utils/workfile_manager/workfile_mgr.c index e5b311cf9ba..21b4463e5f1 100644 --- a/src/backend/utils/workfile_manager/workfile_mgr.c +++ b/src/backend/utils/workfile_manager/workfile_mgr.c @@ -192,6 +192,9 @@ static void unpin_workset(workfile_set *work_set); static bool proc_exit_hook_registered = false; +static uint64 total_bytes_written = 0; +static uint64 total_files_created = 0; + Datum gp_workfile_mgr_cache_entries(PG_FUNCTION_ARGS); Datum gp_workfile_mgr_used_diskspace(PG_FUNCTION_ARGS); @@ -371,6 +374,7 @@ RegisterFileWithSet(File file, workfile_set *work_set) localCtl.entries[file].work_set = work_set; work_set->num_files++; work_set->perquery->num_files++; + total_files_created++; /* Enforce the limit on number of files */ if (gp_workfile_limit_files_per_query > 0 && @@ -447,6 +451,7 @@ UpdateWorkFileSize(File file, uint64 newsize) (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("workfile per segment size limit exceeded"))); } + total_bytes_written += diff; } /* @@ -986,3 +991,22 @@ workfile_is_active(workfile_set *workfile) { return workfile ? workfile->active : false; } + +uint64 +WorkfileTotalBytesWritten(void) +{ + return total_bytes_written; +} + +uint64 +WorkfileTotalFilesCreated(void) +{ + return total_files_created; +} + +void +WorkfileResetBackendStats(void) +{ + total_bytes_written = 0; + total_files_created = 0; +} diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 07ac511d546..56c1da9f4f6 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -36,7 +36,7 @@ static ProcessUtility_hook_type previous_ProcessUtility_hook = nullptr; static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); static void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, - long count); + uint64 count, bool execute_once); static void ya_ExecutorFinish_hook(QueryDesc *query_desc); static void ya_ExecutorEnd_hook(QueryDesc *query_desc); static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); @@ -45,10 +45,12 @@ static void ya_ic_teardown_hook(ChunkTransportState *transportStates, #ifdef ANALYZE_STATS_COLLECT_HOOK static void ya_analyze_stats_collect_hook(QueryDesc *query_desc); #endif -static void ya_process_utility_hook(Node *parsetree, const char *queryString, +static void ya_process_utility_hook(PlannedStmt *pstmt, const char *queryString, + bool readOnlyTree, ProcessUtilityContext context, - ParamListInfo params, DestReceiver *dest, - char *completionTag); + ParamListInfo params, + QueryEnvironment *queryEnv, + DestReceiver *dest, QueryCompletion *qc); static EventSender *sender = nullptr; @@ -127,14 +129,14 @@ void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { } void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, - long count) { + uint64 count, bool execute_once) { get_sender()->incr_depth(); PG_TRY(); { if (previous_ExecutorRun_hook) - previous_ExecutorRun_hook(query_desc, direction, count); + previous_ExecutorRun_hook(query_desc, direction, count, execute_once); else - standard_ExecutorRun(query_desc, direction, count); + standard_ExecutorRun(query_desc, direction, count, execute_once); get_sender()->decr_depth(); } PG_CATCH(); @@ -198,10 +200,12 @@ void ya_analyze_stats_collect_hook(QueryDesc *query_desc) { } #endif -static void ya_process_utility_hook(Node *parsetree, const char *queryString, +static void ya_process_utility_hook(PlannedStmt *pstmt, const char *queryString, + bool readOnlyTree, ProcessUtilityContext context, - ParamListInfo params, DestReceiver *dest, - char *completionTag) { + ParamListInfo params, + QueryEnvironment *queryEnv, + DestReceiver *dest, QueryCompletion *qc) { /* Project utility data on QueryDesc to use existing logic */ QueryDesc *query_desc = (QueryDesc *)palloc0(sizeof(QueryDesc)); query_desc->sourceText = queryString; @@ -214,11 +218,11 @@ static void ya_process_utility_hook(Node *parsetree, const char *queryString, PG_TRY(); { if (previous_ProcessUtility_hook) { - (*previous_ProcessUtility_hook)(parsetree, queryString, context, params, - dest, completionTag); + (*previous_ProcessUtility_hook)(pstmt, queryString, readOnlyTree, context, + params, queryEnv, dest, qc); } else { - standard_ProcessUtility(parsetree, queryString, context, params, dest, - completionTag); + standard_ProcessUtility(pstmt, queryString, readOnlyTree, context, params, + queryEnv, dest, qc); } get_sender()->decr_depth(); @@ -264,7 +268,7 @@ Datum yagp_functions_get(FunctionCallInfo fcinfo) { const int ATTNUM = 6; check_stats_loaded(); auto stats = YagpStat::get_stats(); - TupleDesc tupdesc = CreateTemplateTupleDesc(ATTNUM, false); + TupleDesc tupdesc = CreateTemplateTupleDesc(ATTNUM); TupleDescInitEntry(tupdesc, (AttrNumber)1, "segid", INT4OID, -1 /* typmod */, 0 /* attdim */); TupleDescInitEntry(tupdesc, (AttrNumber)2, "total_messages", INT8OID, diff --git a/src/include/executor/execdesc.h b/src/include/executor/execdesc.h index e3ecf31b664..e469945a4c5 100644 --- a/src/include/executor/execdesc.h +++ b/src/include/executor/execdesc.h @@ -22,6 +22,14 @@ struct CdbExplain_ShowStatCtx; /* private, in "cdb/cdbexplain.c" */ +typedef struct YagpQueryKey +{ + int tmid; /* transaction time */ + int ssid; /* session id */ + int ccnt; /* command count */ + int nesting_level; + uintptr_t query_desc_addr; +} YagpQueryKey; /* * SerializedParams is used to serialize external query parameters @@ -330,6 +338,9 @@ typedef struct QueryDesc /* This is always set NULL by the core system, but plugins can change it */ struct Instrumentation *totaltime; /* total time spent in ExecutorRun */ + + /* YAGP Hooks collector */ + YagpQueryKey *yagp_query_key; } QueryDesc; /* in pquery.c */ diff --git a/src/include/utils/workfile_mgr.h b/src/include/utils/workfile_mgr.h index dfbd17bca57..48c83620610 100644 --- a/src/include/utils/workfile_mgr.h +++ b/src/include/utils/workfile_mgr.h @@ -74,4 +74,8 @@ extern workfile_set *workfile_mgr_cache_entries_get_copy(int* num_actives); extern uint64 WorkfileSegspace_GetSize(void); extern bool workfile_is_active(workfile_set *workfile); +extern uint64 WorkfileTotalBytesWritten(void); +extern uint64 WorkfileTotalFilesCreated(void); +extern void WorkfileResetBackendStats(void); + #endif /* __WORKFILE_MGR_H__ */ diff --git a/src/log/LogOps.cpp b/src/log/LogOps.cpp index 0868dd9fc1c..cec9e33693a 100644 --- a/src/log/LogOps.cpp +++ b/src/log/LogOps.cpp @@ -37,12 +37,12 @@ void init_log() { relationId = heap_create_with_catalog( log_relname.data() /* relname */, namespaceId /* namespace */, 0 /* tablespace */, InvalidOid /* relid */, InvalidOid /* reltype oid */, - InvalidOid /* reloftypeid */, GetUserId() /* owner */, - DescribeTuple() /* rel tuple */, NIL, InvalidOid /* relam */, - RELKIND_RELATION, RELPERSISTENCE_PERMANENT, RELSTORAGE_HEAP, false, false, - true, 0, ONCOMMIT_NOOP, NULL /* GP Policy */, (Datum)0, - false /* use_user_acl */, true, true, false /* valid_opts */, - false /* is_part_child */, false /* is part parent */, NULL); + InvalidOid /* reloftypeid */, GetUserId() /* owner */, HEAP_TABLE_AM_OID, + DescribeTuple() /* rel tuple */, NIL, RELKIND_RELATION, + RELPERSISTENCE_PERMANENT, false, false, ONCOMMIT_NOOP, + NULL /* GP Policy */, (Datum)0, false /* use_user_acl */, true, true, + InvalidOid /* relrewrite */, NULL /* typaddress */, + false /* valid_opts */); /* Make the table visible */ CommandCounterIncrement(); diff --git a/src/log/LogSchema.cpp b/src/log/LogSchema.cpp index 335a3103cfd..2fadcc46599 100644 --- a/src/log/LogSchema.cpp +++ b/src/log/LogSchema.cpp @@ -19,7 +19,7 @@ const std::unordered_map &proto_name_to_col_idx() { } TupleDesc DescribeTuple() { - TupleDesc tupdesc = CreateTemplateTupleDesc(natts_yagp_log, false); + TupleDesc tupdesc = CreateTemplateTupleDesc(natts_yagp_log); for (size_t anum = 1; anum <= natts_yagp_log; ++anum) { TupleDescInitEntry(tupdesc, anum, log_tbl_desc[anum - 1].pg_att_name.data(), diff --git a/src/memory/gpdbwrappers.cpp b/src/memory/gpdbwrappers.cpp index 0824a3a6808..763e32e539c 100644 --- a/src/memory/gpdbwrappers.cpp +++ b/src/memory/gpdbwrappers.cpp @@ -7,6 +7,7 @@ extern "C" { #include "commands/dbcommands.h" #include "commands/resgroupcmds.h" #include "utils/builtins.h" +#include "utils/varlena.h" #include "nodes/pg_list.h" #include "commands/explain.h" #include "executor/instrument.h" @@ -115,41 +116,40 @@ bool ya_gpdb::split_identifier_string(char *rawstring, char separator, ExplainState ya_gpdb::get_explain_state(QueryDesc *query_desc, bool costs) noexcept { return wrap_noexcept([&]() { - ExplainState es; - ExplainInitState(&es); - es.costs = costs; - es.verbose = true; - es.format = EXPLAIN_FORMAT_TEXT; - ExplainBeginOutput(&es); - ExplainPrintPlan(&es, query_desc); - ExplainEndOutput(&es); - return es; + ExplainState *es = NewExplainState(); + es->costs = costs; + es->verbose = true; + es->format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(es); + ExplainPrintPlan(es, query_desc); + ExplainEndOutput(es); + return *es; }); } ExplainState ya_gpdb::get_analyze_state(QueryDesc *query_desc, bool analyze) noexcept { return wrap_noexcept([&]() { - ExplainState es; - ExplainInitState(&es); - es.analyze = analyze; - es.verbose = true; - es.buffers = es.analyze; - es.timing = es.analyze; - es.summary = es.analyze; - es.format = EXPLAIN_FORMAT_TEXT; - ExplainBeginOutput(&es); + ExplainState *es = NewExplainState(); + es->analyze = analyze; + es->verbose = true; + es->buffers = es->analyze; + es->timing = es->analyze; + es->summary = es->analyze; + es->format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(es); if (analyze) { - ExplainPrintPlan(&es, query_desc); - ExplainPrintExecStatsEnd(&es, query_desc); + ExplainPrintPlan(es, query_desc); + ExplainPrintExecStatsEnd(es, query_desc); } - ExplainEndOutput(&es); - return es; + ExplainEndOutput(es); + return *es; }); } -Instrumentation *ya_gpdb::instr_alloc(size_t n, int instrument_options) { - return wrap_throw(InstrAlloc, n, instrument_options); +Instrumentation *ya_gpdb::instr_alloc(size_t n, int instrument_options, + bool async_mode) { + return wrap_throw(InstrAlloc, n, instrument_options, async_mode); } HeapTuple ya_gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, diff --git a/src/memory/gpdbwrappers.h b/src/memory/gpdbwrappers.h index 8f5f146cc67..920fc1ae6e7 100644 --- a/src/memory/gpdbwrappers.h +++ b/src/memory/gpdbwrappers.h @@ -32,7 +32,7 @@ bool split_identifier_string(char *rawstring, char separator, List **namelist) noexcept; ExplainState get_explain_state(QueryDesc *query_desc, bool costs) noexcept; ExplainState get_analyze_state(QueryDesc *query_desc, bool analyze) noexcept; -Instrumentation *instr_alloc(size_t n, int instrument_options); +Instrumentation *instr_alloc(size_t n, int instrument_options, bool async_mode); HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull); CdbExplain_ShowStatCtx *cdbexplain_showExecStatsBegin(QueryDesc *query_desc, diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/src/stat_statements_parser/pg_stat_statements_ya_parser.c index 1c58d936093..c19805ce506 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -6,689 +6,48 @@ #include #include -#include "access/hash.h" -#include "executor/instrument.h" -#include "executor/execdesc.h" -#include "funcapi.h" +#include "common/hashfn.h" +#include "lib/stringinfo.h" #include "mb/pg_wchar.h" #include "miscadmin.h" -#include "parser/analyze.h" -#include "parser/parsetree.h" #include "parser/scanner.h" -#include "parser/gram.h" -#include "pgstat.h" -#include "storage/fd.h" -#include "storage/ipc.h" -#include "storage/spin.h" -#include "tcop/utility.h" #include "utils/builtins.h" #include "utils/memutils.h" +#include "utils/queryjumble.h" #include "pg_stat_statements_ya_parser.h" -static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL; - -#define JUMBLE_SIZE 1024 /* query serialization buffer size */ - -/* - * Struct for tracking locations/lengths of constants during normalization - */ -typedef struct pgssLocationLen -{ - int location; /* start offset in query text */ - int length; /* length in bytes, or -1 to ignore */ -} pgssLocationLen; - -/* - * Working state for computing a query jumble and producing a normalized - * query string - */ -typedef struct pgssJumbleState -{ - /* Jumble of current query tree */ - unsigned char *jumble; - - /* Number of bytes used in jumble[] */ - Size jumble_len; - - /* Array of locations of constants that should be removed */ - pgssLocationLen *clocations; - - /* Allocated length of clocations array */ - int clocations_buf_size; - - /* Current number of valid entries in clocations array */ - int clocations_count; - - /* highest Param id we've seen, in order to start normalization correctly */ - int highest_extern_param_id; -} pgssJumbleState; +#ifndef ICONST +#define ICONST 276 +#endif +#ifndef FCONST +#define FCONST 277 +#endif +#ifndef SCONST +#define SCONST 278 +#endif +#ifndef BCONST +#define BCONST 279 +#endif +#ifndef XCONST +#define XCONST 280 +#endif -static void AppendJumble(pgssJumbleState *jstate, - const unsigned char *item, Size size); -static void JumbleQuery(pgssJumbleState *jstate, Query *query); -static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable); -static void JumbleExpr(pgssJumbleState *jstate, Node *node); -static void RecordConstLocation(pgssJumbleState *jstate, int location); -static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query); +static void fill_in_constant_lengths(JumbleState *jstate, const char *query); static int comp_location(const void *a, const void *b); StringInfo gen_normplan(const char *execution_plan); static bool need_replace(int token); -void pgss_post_parse_analyze(ParseState *pstate, Query *query); -static char *generate_normalized_query(pgssJumbleState *jstate, const char *query, +static char *generate_normalized_query(JumbleState *jstate, const char *query, int *query_len_p, int encoding); - void stat_statements_parser_init() -{ - prev_post_parse_analyze_hook = post_parse_analyze_hook; - post_parse_analyze_hook = pgss_post_parse_analyze; -} - -void stat_statements_parser_deinit() +void stat_statements_parser_init(void) { - post_parse_analyze_hook = prev_post_parse_analyze_hook; -} - -/* - * AppendJumble: Append a value that is substantive in a given query to - * the current jumble. - */ -static void -AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size) -{ - unsigned char *jumble = jstate->jumble; - Size jumble_len = jstate->jumble_len; - - /* - * Whenever the jumble buffer is full, we hash the current contents and - * reset the buffer to contain just that hash value, thus relying on the - * hash to summarize everything so far. - */ - while (size > 0) - { - Size part_size; - - if (jumble_len >= JUMBLE_SIZE) - { - uint32 start_hash = hash_any(jumble, JUMBLE_SIZE); - - memcpy(jumble, &start_hash, sizeof(start_hash)); - jumble_len = sizeof(start_hash); - } - part_size = Min(size, JUMBLE_SIZE - jumble_len); - memcpy(jumble + jumble_len, item, part_size); - jumble_len += part_size; - item += part_size; - size -= part_size; - } - jstate->jumble_len = jumble_len; + EnableQueryId(); } -/* - * Wrappers around AppendJumble to encapsulate details of serialization - * of individual local variable elements. - */ -#define APP_JUMB(item) \ - AppendJumble(jstate, (const unsigned char *)&(item), sizeof(item)) -#define APP_JUMB_STRING(str) \ - AppendJumble(jstate, (const unsigned char *)(str), strlen(str) + 1) - -/* - * JumbleQuery: Selectively serialize the query tree, appending significant - * data to the "query jumble" while ignoring nonsignificant data. - * - * Rule of thumb for what to include is that we should ignore anything not - * semantically significant (such as alias names) as well as anything that can - * be deduced from child nodes (else we'd just be double-hashing that piece - * of information). - */ -void JumbleQuery(pgssJumbleState *jstate, Query *query) +void stat_statements_parser_deinit(void) { - Assert(IsA(query, Query)); - Assert(query->utilityStmt == NULL); - - APP_JUMB(query->commandType); - /* resultRelation is usually predictable from commandType */ - JumbleExpr(jstate, (Node *)query->cteList); - JumbleRangeTable(jstate, query->rtable); - JumbleExpr(jstate, (Node *)query->jointree); - JumbleExpr(jstate, (Node *)query->targetList); - JumbleExpr(jstate, (Node *)query->returningList); - JumbleExpr(jstate, (Node *)query->groupClause); - JumbleExpr(jstate, query->havingQual); - JumbleExpr(jstate, (Node *)query->windowClause); - JumbleExpr(jstate, (Node *)query->distinctClause); - JumbleExpr(jstate, (Node *)query->sortClause); - JumbleExpr(jstate, query->limitOffset); - JumbleExpr(jstate, query->limitCount); - /* we ignore rowMarks */ - JumbleExpr(jstate, query->setOperations); -} - -/* - * Jumble a range table - */ -static void -JumbleRangeTable(pgssJumbleState *jstate, List *rtable) -{ - ListCell *lc; - - foreach (lc, rtable) - { - RangeTblEntry *rte = (RangeTblEntry *)lfirst(lc); - - Assert(IsA(rte, RangeTblEntry)); - APP_JUMB(rte->rtekind); - switch (rte->rtekind) - { - case RTE_RELATION: - APP_JUMB(rte->relid); - break; - case RTE_SUBQUERY: - JumbleQuery(jstate, rte->subquery); - break; - case RTE_JOIN: - APP_JUMB(rte->jointype); - break; - case RTE_FUNCTION: - JumbleExpr(jstate, (Node *)rte->functions); - break; - case RTE_VALUES: - JumbleExpr(jstate, (Node *)rte->values_lists); - break; - case RTE_CTE: - - /* - * Depending on the CTE name here isn't ideal, but it's the - * only info we have to identify the referenced WITH item. - */ - APP_JUMB_STRING(rte->ctename); - APP_JUMB(rte->ctelevelsup); - break; - /* GPDB RTEs */ - case RTE_VOID: - break; - case RTE_TABLEFUNCTION: - JumbleQuery(jstate, rte->subquery); - JumbleExpr(jstate, (Node *)rte->functions); - break; - default: - ereport(ERROR, (errmsg("unrecognized RTE kind: %d", (int)rte->rtekind))); - break; - } - } -} - -/* - * Jumble an expression tree - * - * In general this function should handle all the same node types that - * expression_tree_walker() does, and therefore it's coded to be as parallel - * to that function as possible. However, since we are only invoked on - * queries immediately post-parse-analysis, we need not handle node types - * that only appear in planning. - * - * Note: the reason we don't simply use expression_tree_walker() is that the - * point of that function is to support tree walkers that don't care about - * most tree node types, but here we care about all types. We should complain - * about any unrecognized node type. - */ -static void -JumbleExpr(pgssJumbleState *jstate, Node *node) -{ - ListCell *temp; - - if (node == NULL) - return; - - /* Guard against stack overflow due to overly complex expressions */ - check_stack_depth(); - - /* - * We always emit the node's NodeTag, then any additional fields that are - * considered significant, and then we recurse to any child nodes. - */ - APP_JUMB(node->type); - - switch (nodeTag(node)) - { - case T_Var: - { - Var *var = (Var *)node; - - APP_JUMB(var->varno); - APP_JUMB(var->varattno); - APP_JUMB(var->varlevelsup); - } - break; - case T_Const: - { - Const *c = (Const *)node; - - /* We jumble only the constant's type, not its value */ - APP_JUMB(c->consttype); - /* Also, record its parse location for query normalization */ - RecordConstLocation(jstate, c->location); - } - break; - case T_Param: - { - Param *p = (Param *)node; - - APP_JUMB(p->paramkind); - APP_JUMB(p->paramid); - APP_JUMB(p->paramtype); - } - break; - case T_Aggref: - { - Aggref *expr = (Aggref *)node; - - APP_JUMB(expr->aggfnoid); - JumbleExpr(jstate, (Node *)expr->aggdirectargs); - JumbleExpr(jstate, (Node *)expr->args); - JumbleExpr(jstate, (Node *)expr->aggorder); - JumbleExpr(jstate, (Node *)expr->aggdistinct); - JumbleExpr(jstate, (Node *)expr->aggfilter); - } - break; - case T_WindowFunc: - { - WindowFunc *expr = (WindowFunc *)node; - - APP_JUMB(expr->winfnoid); - APP_JUMB(expr->winref); - JumbleExpr(jstate, (Node *)expr->args); - JumbleExpr(jstate, (Node *)expr->aggfilter); - } - break; - case T_ArrayRef: - { - ArrayRef *aref = (ArrayRef *)node; - - JumbleExpr(jstate, (Node *)aref->refupperindexpr); - JumbleExpr(jstate, (Node *)aref->reflowerindexpr); - JumbleExpr(jstate, (Node *)aref->refexpr); - JumbleExpr(jstate, (Node *)aref->refassgnexpr); - } - break; - case T_FuncExpr: - { - FuncExpr *expr = (FuncExpr *)node; - - APP_JUMB(expr->funcid); - JumbleExpr(jstate, (Node *)expr->args); - } - break; - case T_NamedArgExpr: - { - NamedArgExpr *nae = (NamedArgExpr *)node; - - APP_JUMB(nae->argnumber); - JumbleExpr(jstate, (Node *)nae->arg); - } - break; - case T_OpExpr: - case T_DistinctExpr: /* struct-equivalent to OpExpr */ - case T_NullIfExpr: /* struct-equivalent to OpExpr */ - { - OpExpr *expr = (OpExpr *)node; - - APP_JUMB(expr->opno); - JumbleExpr(jstate, (Node *)expr->args); - } - break; - case T_ScalarArrayOpExpr: - { - ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *)node; - - APP_JUMB(expr->opno); - APP_JUMB(expr->useOr); - JumbleExpr(jstate, (Node *)expr->args); - } - break; - case T_BoolExpr: - { - BoolExpr *expr = (BoolExpr *)node; - - APP_JUMB(expr->boolop); - JumbleExpr(jstate, (Node *)expr->args); - } - break; - case T_SubLink: - { - SubLink *sublink = (SubLink *)node; - - APP_JUMB(sublink->subLinkType); - JumbleExpr(jstate, (Node *)sublink->testexpr); - JumbleQuery(jstate, (Query *)sublink->subselect); - } - break; - case T_FieldSelect: - { - FieldSelect *fs = (FieldSelect *)node; - - APP_JUMB(fs->fieldnum); - JumbleExpr(jstate, (Node *)fs->arg); - } - break; - case T_FieldStore: - { - FieldStore *fstore = (FieldStore *)node; - - JumbleExpr(jstate, (Node *)fstore->arg); - JumbleExpr(jstate, (Node *)fstore->newvals); - } - break; - case T_RelabelType: - { - RelabelType *rt = (RelabelType *)node; - - APP_JUMB(rt->resulttype); - JumbleExpr(jstate, (Node *)rt->arg); - } - break; - case T_CoerceViaIO: - { - CoerceViaIO *cio = (CoerceViaIO *)node; - - APP_JUMB(cio->resulttype); - JumbleExpr(jstate, (Node *)cio->arg); - } - break; - case T_ArrayCoerceExpr: - { - ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *)node; - - APP_JUMB(acexpr->resulttype); - JumbleExpr(jstate, (Node *)acexpr->arg); - } - break; - case T_ConvertRowtypeExpr: - { - ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *)node; - - APP_JUMB(crexpr->resulttype); - JumbleExpr(jstate, (Node *)crexpr->arg); - } - break; - case T_CollateExpr: - { - CollateExpr *ce = (CollateExpr *)node; - - APP_JUMB(ce->collOid); - JumbleExpr(jstate, (Node *)ce->arg); - } - break; - case T_CaseExpr: - { - CaseExpr *caseexpr = (CaseExpr *)node; - - JumbleExpr(jstate, (Node *)caseexpr->arg); - foreach (temp, caseexpr->args) - { - CaseWhen *when = (CaseWhen *)lfirst(temp); - - Assert(IsA(when, CaseWhen)); - JumbleExpr(jstate, (Node *)when->expr); - JumbleExpr(jstate, (Node *)when->result); - } - JumbleExpr(jstate, (Node *)caseexpr->defresult); - } - break; - case T_CaseTestExpr: - { - CaseTestExpr *ct = (CaseTestExpr *)node; - - APP_JUMB(ct->typeId); - } - break; - case T_ArrayExpr: - JumbleExpr(jstate, (Node *)((ArrayExpr *)node)->elements); - break; - case T_RowExpr: - JumbleExpr(jstate, (Node *)((RowExpr *)node)->args); - break; - case T_RowCompareExpr: - { - RowCompareExpr *rcexpr = (RowCompareExpr *)node; - - APP_JUMB(rcexpr->rctype); - JumbleExpr(jstate, (Node *)rcexpr->largs); - JumbleExpr(jstate, (Node *)rcexpr->rargs); - } - break; - case T_CoalesceExpr: - JumbleExpr(jstate, (Node *)((CoalesceExpr *)node)->args); - break; - case T_MinMaxExpr: - { - MinMaxExpr *mmexpr = (MinMaxExpr *)node; - - APP_JUMB(mmexpr->op); - JumbleExpr(jstate, (Node *)mmexpr->args); - } - break; - case T_XmlExpr: - { - XmlExpr *xexpr = (XmlExpr *)node; - - APP_JUMB(xexpr->op); - JumbleExpr(jstate, (Node *)xexpr->named_args); - JumbleExpr(jstate, (Node *)xexpr->args); - } - break; - case T_NullTest: - { - NullTest *nt = (NullTest *)node; - - APP_JUMB(nt->nulltesttype); - JumbleExpr(jstate, (Node *)nt->arg); - } - break; - case T_BooleanTest: - { - BooleanTest *bt = (BooleanTest *)node; - - APP_JUMB(bt->booltesttype); - JumbleExpr(jstate, (Node *)bt->arg); - } - break; - case T_CoerceToDomain: - { - CoerceToDomain *cd = (CoerceToDomain *)node; - - APP_JUMB(cd->resulttype); - JumbleExpr(jstate, (Node *)cd->arg); - } - break; - case T_CoerceToDomainValue: - { - CoerceToDomainValue *cdv = (CoerceToDomainValue *)node; - - APP_JUMB(cdv->typeId); - } - break; - case T_SetToDefault: - { - SetToDefault *sd = (SetToDefault *)node; - - APP_JUMB(sd->typeId); - } - break; - case T_CurrentOfExpr: - { - CurrentOfExpr *ce = (CurrentOfExpr *)node; - - APP_JUMB(ce->cvarno); - if (ce->cursor_name) - APP_JUMB_STRING(ce->cursor_name); - APP_JUMB(ce->cursor_param); - } - break; - case T_TargetEntry: - { - TargetEntry *tle = (TargetEntry *)node; - - APP_JUMB(tle->resno); - APP_JUMB(tle->ressortgroupref); - JumbleExpr(jstate, (Node *)tle->expr); - } - break; - case T_RangeTblRef: - { - RangeTblRef *rtr = (RangeTblRef *)node; - - APP_JUMB(rtr->rtindex); - } - break; - case T_JoinExpr: - { - JoinExpr *join = (JoinExpr *)node; - - APP_JUMB(join->jointype); - APP_JUMB(join->isNatural); - APP_JUMB(join->rtindex); - JumbleExpr(jstate, join->larg); - JumbleExpr(jstate, join->rarg); - JumbleExpr(jstate, join->quals); - } - break; - case T_FromExpr: - { - FromExpr *from = (FromExpr *)node; - - JumbleExpr(jstate, (Node *)from->fromlist); - JumbleExpr(jstate, from->quals); - } - break; - case T_List: - foreach (temp, (List *)node) - { - JumbleExpr(jstate, (Node *)lfirst(temp)); - } - break; - case T_SortGroupClause: - { - SortGroupClause *sgc = (SortGroupClause *)node; - - APP_JUMB(sgc->tleSortGroupRef); - APP_JUMB(sgc->eqop); - APP_JUMB(sgc->sortop); - APP_JUMB(sgc->nulls_first); - } - break; - case T_WindowClause: - { - WindowClause *wc = (WindowClause *)node; - - APP_JUMB(wc->winref); - APP_JUMB(wc->frameOptions); - JumbleExpr(jstate, (Node *)wc->partitionClause); - JumbleExpr(jstate, (Node *)wc->orderClause); - JumbleExpr(jstate, wc->startOffset); - JumbleExpr(jstate, wc->endOffset); - } - break; - case T_CommonTableExpr: - { - CommonTableExpr *cte = (CommonTableExpr *)node; - - /* we store the string name because RTE_CTE RTEs need it */ - APP_JUMB_STRING(cte->ctename); - JumbleQuery(jstate, (Query *)cte->ctequery); - } - break; - case T_SetOperationStmt: - { - SetOperationStmt *setop = (SetOperationStmt *)node; - - APP_JUMB(setop->op); - APP_JUMB(setop->all); - JumbleExpr(jstate, setop->larg); - JumbleExpr(jstate, setop->rarg); - } - break; - case T_RangeTblFunction: - { - RangeTblFunction *rtfunc = (RangeTblFunction *)node; - - JumbleExpr(jstate, rtfunc->funcexpr); - } - break; - /* GPDB nodes */ - case T_GroupingClause: - { - GroupingClause *grpnode = (GroupingClause *)node; - - JumbleExpr(jstate, (Node *)grpnode->groupsets); - } - break; - case T_GroupingFunc: - { - GroupingFunc *grpnode = (GroupingFunc *)node; - - JumbleExpr(jstate, (Node *)grpnode->args); - } - break; - case T_Grouping: - case T_GroupId: - case T_Integer: - case T_Value: - // TODO:seems like nothing to do with it - break; - /* GPDB-only additions, nothing to do */ - case T_PartitionBy: - case T_PartitionElem: - case T_PartitionRangeItem: - case T_PartitionBoundSpec: - case T_PartitionSpec: - case T_PartitionValuesSpec: - case T_AlterPartitionId: - case T_AlterPartitionCmd: - case T_InheritPartitionCmd: - case T_CreateFileSpaceStmt: - case T_FileSpaceEntry: - case T_DropFileSpaceStmt: - case T_TableValueExpr: - case T_DenyLoginInterval: - case T_DenyLoginPoint: - case T_AlterTypeStmt: - case T_SetDistributionCmd: - case T_ExpandStmtSpec: - break; - default: - /* Only a warning, since we can stumble along anyway */ - ereport(WARNING, (errmsg("unrecognized node type: %d", - (int)nodeTag(node)))); - break; - } -} - -/* - * Record location of constant within query string of query tree - * that is currently being walked. - */ -static void -RecordConstLocation(pgssJumbleState *jstate, int location) -{ - /* -1 indicates unknown or undefined location */ - if (location >= 0) - { - /* enlarge array if needed */ - if (jstate->clocations_count >= jstate->clocations_buf_size) - { - jstate->clocations_buf_size *= 2; - jstate->clocations = (pgssLocationLen *) - repalloc(jstate->clocations, - jstate->clocations_buf_size * - sizeof(pgssLocationLen)); - } - jstate->clocations[jstate->clocations_count].location = location; - /* initialize lengths to -1 to simplify fill_in_constant_lengths */ - jstate->clocations[jstate->clocations_count].length = -1; - jstate->clocations_count++; - } + /* NO-OP */ } /* check if token should be replaced by substitute varable */ @@ -768,60 +127,13 @@ gen_normplan(const char *execution_plan) } /* - * Post-parse-analysis hook: mark query with a queryId - */ -void pgss_post_parse_analyze(ParseState *pstate, Query *query) -{ - pgssJumbleState jstate; - - if (prev_post_parse_analyze_hook) - prev_post_parse_analyze_hook(pstate, query); - - /* Assert we didn't do this already */ - Assert(query->queryId == 0); - - /* - * Utility statements get queryId zero. We do this even in cases where - * the statement contains an optimizable statement for which a queryId - * could be derived (such as EXPLAIN or DECLARE CURSOR). For such cases, - * runtime control will first go through ProcessUtility and then the - * executor, and we don't want the executor hooks to do anything, since we - * are already measuring the statement's costs at the utility level. - */ - if (query->utilityStmt) - { - query->queryId = 0; - return; - } - - /* Set up workspace for query jumbling */ - jstate.jumble = (unsigned char *)palloc(JUMBLE_SIZE); - jstate.jumble_len = 0; - jstate.clocations_buf_size = 32; - jstate.clocations = (pgssLocationLen *) - palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen)); - jstate.clocations_count = 0; - - /* Compute query ID and mark the Query node with it */ - JumbleQuery(&jstate, query); - query->queryId = hash_any(jstate.jumble, jstate.jumble_len); - - /* - * If we are unlucky enough to get a hash of zero, use 1 instead, to - * prevent confusion with the utility-statement case. - */ - if (query->queryId == 0) - query->queryId = 1; -} - -/* - * comp_location: comparator for qsorting pgssLocationLen structs by location + * comp_location: comparator for qsorting LocationLen structs by location */ static int comp_location(const void *a, const void *b) { - int l = ((const pgssLocationLen *) a)->location; - int r = ((const pgssLocationLen *) b)->location; + int l = ((const LocationLen *) a)->location; + int r = ((const LocationLen *) b)->location; if (l < r) return -1; @@ -854,9 +166,9 @@ comp_location(const void *a, const void *b) * reason for a constant to start with a '-'. */ static void -fill_in_constant_lengths(pgssJumbleState *jstate, const char *query) +fill_in_constant_lengths(JumbleState *jstate, const char *query) { - pgssLocationLen *locs; + LocationLen *locs; core_yyscan_t yyscanner; core_yy_extra_type yyextra; core_YYSTYPE yylval; @@ -870,14 +182,14 @@ fill_in_constant_lengths(pgssJumbleState *jstate, const char *query) */ if (jstate->clocations_count > 1) qsort(jstate->clocations, jstate->clocations_count, - sizeof(pgssLocationLen), comp_location); + sizeof(LocationLen), comp_location); locs = jstate->clocations; /* initialize the flex scanner --- should match raw_parser() */ yyscanner = scanner_init(query, &yyextra, - ScanKeywords, - NumScanKeywords); + &ScanKeywords, + ScanKeywordTokens); /* Search for each constant, in sequence */ for (i = 0; i < jstate->clocations_count; i++) @@ -957,7 +269,7 @@ fill_in_constant_lengths(pgssJumbleState *jstate, const char *query) * Returns a palloc'd string. */ static char * -generate_normalized_query(pgssJumbleState *jstate, const char *query, +generate_normalized_query(JumbleState *jstate, const char *query, int *query_len_p, int encoding) { char *norm_query; @@ -1027,12 +339,12 @@ char *gen_normquery(const char *query) if (!query) { return NULL; } - pgssJumbleState jstate; + JumbleState jstate; jstate.jumble = (unsigned char *)palloc(JUMBLE_SIZE); jstate.jumble_len = 0; jstate.clocations_buf_size = 32; - jstate.clocations = (pgssLocationLen *) - palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen)); + jstate.clocations = (LocationLen *) + palloc(jstate.clocations_buf_size * sizeof(LocationLen)); jstate.clocations_count = 0; int query_len = strlen(query); return generate_normalized_query(&jstate, query, &query_len, GetDatabaseEncoding()); diff --git a/src/yagp_hooks_collector.c b/src/yagp_hooks_collector.c index 9db73638b24..27fd0e04b26 100644 --- a/src/yagp_hooks_collector.c +++ b/src/yagp_hooks_collector.c @@ -1,5 +1,6 @@ #include "postgres.h" #include "cdb/cdbvars.h" +#include "funcapi.h" #include "utils/builtins.h" #include "hook_wrappers.h" @@ -26,8 +27,15 @@ void _PG_fini(void) { } Datum yagp_stat_messages_reset(PG_FUNCTION_ARGS) { - yagp_functions_reset(); - PG_RETURN_VOID(); + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + yagp_functions_reset(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); } Datum yagp_stat_messages(PG_FUNCTION_ARGS) { @@ -35,11 +43,25 @@ Datum yagp_stat_messages(PG_FUNCTION_ARGS) { } Datum yagp_init_log(PG_FUNCTION_ARGS) { - init_log(); - PG_RETURN_VOID(); + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + init_log(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); } Datum yagp_truncate_log(PG_FUNCTION_ARGS) { - truncate_log(); - PG_RETURN_VOID(); + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + truncate_log(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); } diff --git a/yagp_hooks_collector--1.0--1.1.sql b/yagp_hooks_collector--1.0--1.1.sql index 959d4f235d1..8684ca73915 100644 --- a/yagp_hooks_collector--1.0--1.1.sql +++ b/yagp_hooks_collector--1.0--1.1.sql @@ -23,17 +23,17 @@ DROP FUNCTION __yagp_stat_messages_reset_f_on_master(); -- Recreate functions and view in new schema. CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_master() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' LANGUAGE C EXECUTE ON MASTER; CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_segments() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' LANGUAGE C EXECUTE ON ALL SEGMENTS; CREATE FUNCTION yagpcc.stat_messages_reset() -RETURNS void +RETURNS SETOF void AS $$ SELECT yagpcc.__stat_messages_reset_f_on_master(); @@ -75,12 +75,12 @@ ORDER BY segid; -- Create new objects. CREATE FUNCTION yagpcc.__init_log_on_master() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_init_log' LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; CREATE FUNCTION yagpcc.__init_log_on_segments() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_init_log' LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; @@ -95,17 +95,17 @@ CREATE VIEW yagpcc.log AS ORDER BY tmid, ssid, ccnt; CREATE FUNCTION yagpcc.__truncate_log_on_master() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_truncate_log' LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; CREATE FUNCTION yagpcc.__truncate_log_on_segments() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_truncate_log' LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; CREATE FUNCTION yagpcc.truncate_log() -RETURNS void AS $$ +RETURNS SETOF void AS $$ BEGIN PERFORM yagpcc.__truncate_log_on_master(); PERFORM yagpcc.__truncate_log_on_segments(); diff --git a/yagp_hooks_collector--1.0.sql b/yagp_hooks_collector--1.0.sql index 7ab4e1b2fb7..270cab92382 100644 --- a/yagp_hooks_collector--1.0.sql +++ b/yagp_hooks_collector--1.0.sql @@ -4,17 +4,17 @@ \echo Use "CREATE EXTENSION yagp_hooks_collector" to load this file. \quit CREATE FUNCTION __yagp_stat_messages_reset_f_on_master() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' LANGUAGE C EXECUTE ON MASTER; CREATE FUNCTION __yagp_stat_messages_reset_f_on_segments() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' LANGUAGE C EXECUTE ON ALL SEGMENTS; CREATE FUNCTION yagp_stat_messages_reset() -RETURNS void +RETURNS SETOF void AS $$ SELECT __yagp_stat_messages_reset_f_on_master(); diff --git a/yagp_hooks_collector--1.1.sql b/yagp_hooks_collector--1.1.sql index 657720a88f2..e0e94b51493 100644 --- a/yagp_hooks_collector--1.1.sql +++ b/yagp_hooks_collector--1.1.sql @@ -6,17 +6,17 @@ CREATE SCHEMA yagpcc; CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_master() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' LANGUAGE C EXECUTE ON MASTER; CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_segments() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' LANGUAGE C EXECUTE ON ALL SEGMENTS; CREATE FUNCTION yagpcc.stat_messages_reset() -RETURNS void +RETURNS SETOF void AS $$ SELECT yagpcc.__stat_messages_reset_f_on_master(); @@ -57,12 +57,12 @@ CREATE VIEW yagpcc.stat_messages AS ORDER BY segid; CREATE FUNCTION yagpcc.__init_log_on_master() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_init_log' LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; CREATE FUNCTION yagpcc.__init_log_on_segments() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_init_log' LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; @@ -77,17 +77,17 @@ CREATE VIEW yagpcc.log AS ORDER BY tmid, ssid, ccnt; CREATE FUNCTION yagpcc.__truncate_log_on_master() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_truncate_log' LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; CREATE FUNCTION yagpcc.__truncate_log_on_segments() -RETURNS void +RETURNS SETOF void AS 'MODULE_PATHNAME', 'yagp_truncate_log' LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; CREATE FUNCTION yagpcc.truncate_log() -RETURNS void AS $$ +RETURNS SETOF void AS $$ BEGIN PERFORM yagpcc.__truncate_log_on_master(); PERFORM yagpcc.__truncate_log_on_segments(); From d0ca75c834b6a63b49455bceafd07b4ff721e8fa Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 19 Jan 2026 11:13:00 +0300 Subject: [PATCH 36/49] [yagp_hooks_collector] Add --with-yagp-hooks-collector configure option and CI Add configure.ac option with protobuf dependency. Add CI test configuration. Change env script from greenplum_path.sh to cloudberry-env.sh. --- .github/workflows/build-cloudberry.yml | 32 ++++++++- configure | 28 ++++++++ configure.ac | 7 ++ .../scripts/configure-cloudberry.sh | 4 +- expected/yagp_cursors.out | 10 +-- expected/yagp_dist.out | 2 + expected/yagp_select.out | 2 + expected/yagp_utf8_trim.out | 2 + expected/yagp_utility.out | 72 ++++++++++--------- gpcontrib/Makefile | 3 + gpcontrib/yagp_hooks_collector/Makefile | 41 +++++++++++ sql/yagp_cursors.sql | 2 + sql/yagp_dist.sql | 2 + sql/yagp_select.sql | 2 + sql/yagp_utf8_trim.sql | 2 + sql/yagp_utility.sql | 2 + src/Makefile.global.in | 1 + 17 files changed, 173 insertions(+), 41 deletions(-) create mode 100644 gpcontrib/yagp_hooks_collector/Makefile diff --git a/.github/workflows/build-cloudberry.yml b/.github/workflows/build-cloudberry.yml index adb57fb85ec..8484331998f 100644 --- a/.github/workflows/build-cloudberry.yml +++ b/.github/workflows/build-cloudberry.yml @@ -271,6 +271,10 @@ jobs: }, "enable_core_check":false }, + {"test":"gpcontrib-yagp-hooks-collector", + "make_configs":["gpcontrib/yagp_hooks_collector:installcheck"], + "extension":"yagp_hooks_collector" + }, {"test":"ic-expandshrink", "make_configs":["src/test/isolation2:installcheck-expandshrink"] }, @@ -535,10 +539,11 @@ jobs: if: needs.check-skip.outputs.should_skip != 'true' env: SRC_DIR: ${{ github.workspace }} + CONFIGURE_EXTRA_OPTS: --with-yagp-hooks-collector run: | set -eo pipefail chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} CONFIGURE_EXTRA_OPTS=${{ env.CONFIGURE_EXTRA_OPTS }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then echo "::error::Configure script failed" exit 1 fi @@ -1403,6 +1408,7 @@ jobs: if: success() && needs.check-skip.outputs.should_skip != 'true' env: SRC_DIR: ${{ github.workspace }} + BUILD_DESTINATION: /usr/local/cloudberry-db shell: bash {0} run: | set -o pipefail @@ -1432,6 +1438,30 @@ jobs: PG_OPTS="$PG_OPTS -c optimizer=${{ matrix.pg_settings.optimizer }}" fi + # Create extension if required + if [[ "${{ matrix.extension != '' }}" == "true" ]]; then + case "${{ matrix.extension }}" in + yagp_hooks_collector) + if ! su - gpadmin -c "source ${BUILD_DESTINATION}/cloudberry-env.sh && \ + source ${SRC_DIR}/gpAux/gpdemo/gpdemo-env.sh && \ + gpconfig -c shared_preload_libraries -v 'yagp_hooks_collector' && \ + gpstop -ra && \ + echo 'CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; \ + SHOW shared_preload_libraries; \ + TABLE pg_extension;' | \ + psql postgres" + then + echo "Error creating yagp_hooks_collector extension" + exit 1 + fi + ;; + *) + echo "Unknown extension: ${{ matrix.extension }}" + exit 1 + ;; + esac + fi + if [[ "${{ matrix.pg_settings.default_table_access_method != '' }}" == "true" ]]; then PG_OPTS="$PG_OPTS -c default_table_access_method=${{ matrix.pg_settings.default_table_access_method }}" fi diff --git a/configure b/configure index c9ce3d46302..598114a4b7f 100755 --- a/configure +++ b/configure @@ -722,6 +722,7 @@ with_apr_config with_libcurl with_rt with_zstd +with_yagp_hooks_collector with_libbz2 LZ4_LIBS LZ4_CFLAGS @@ -942,6 +943,7 @@ with_zlib with_lz4 with_libbz2 with_zstd +with_yagp_hooks_collector with_rt with_libcurl with_apr_config @@ -11150,6 +11152,32 @@ $as_echo "yes" >&6; } fi fi +# +# yagp_hooks_collector +# + + + +# Check whether --with-yagp-hooks-collector was given. +if test "${with_yagp_hooks_collector+set}" = set; then : + withval=$with_yagp_hooks_collector; + case $withval in + yes) + : + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-yagp-hooks-collector option" "$LINENO" 5 + ;; + esac + +else + with_yagp_hooks_collector=no + +fi + # # Realtime library # diff --git a/configure.ac b/configure.ac index 246edc4846e..792878fde4b 100644 --- a/configure.ac +++ b/configure.ac @@ -1365,6 +1365,13 @@ PGAC_ARG_BOOL(with, zstd, yes, [do not build with Zstandard], AC_MSG_RESULT([$with_zstd]) AC_SUBST(with_zstd) +# +# yagp_hooks_collector +# +PGAC_ARG_BOOL(with, yagp_hooks_collector, no, + [build with YAGP hooks collector extension]) +AC_SUBST(with_yagp_hooks_collector) + if test "$with_zstd" = yes; then dnl zstd_errors.h was renamed from error_public.h in v1.4.0 PKG_CHECK_MODULES([ZSTD], [libzstd >= 1.4.0]) diff --git a/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh b/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh index 2d7ad04aed8..d30a0b794f0 100755 --- a/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh +++ b/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh @@ -53,6 +53,7 @@ # # Optional Environment Variables: # LOG_DIR - Directory for logs (defaults to ${SRC_DIR}/build-logs) +# CONFIGURE_EXTRA_OPTS - Args to pass to configure command # ENABLE_DEBUG - Enable debug build options (true/false, defaults to # false) # @@ -177,7 +178,8 @@ execute_cmd ./configure --prefix=${BUILD_DESTINATION} \ --with-uuid=e2fs \ ${CONFIGURE_MDBLOCALES_OPTS} \ --with-includes=/usr/local/xerces-c/include \ - --with-libraries=${BUILD_DESTINATION}/lib || exit 4 + --with-libraries=${BUILD_DESTINATION}/lib \ + ${CONFIGURE_EXTRA_OPTS:-""} || exit 4 log_section_end "Configure" # Capture version information diff --git a/expected/yagp_cursors.out b/expected/yagp_cursors.out index d251ddd3e1c..46e124df5e8 100644 --- a/expected/yagp_cursors.out +++ b/expected/yagp_cursors.out @@ -12,6 +12,7 @@ BEGIN END; END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.enable_utility TO TRUE; SET yagpcc.report_nested_queries TO TRUE; @@ -25,7 +26,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+---------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_SUBMIT @@ -54,7 +55,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_SUBMIT @@ -86,7 +87,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+---------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT @@ -129,7 +130,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT @@ -159,3 +160,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/expected/yagp_dist.out b/expected/yagp_dist.out index 5fd5ea5fb3e..3b1e3504923 100644 --- a/expected/yagp_dist.out +++ b/expected/yagp_dist.out @@ -12,6 +12,7 @@ BEGIN END; END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.report_nested_queries TO TRUE; SET yagpcc.enable_utility TO FALSE; @@ -171,3 +172,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/expected/yagp_select.out b/expected/yagp_select.out index b6e18dc862f..af08f2d1def 100644 --- a/expected/yagp_select.out +++ b/expected/yagp_select.out @@ -12,6 +12,7 @@ BEGIN END; END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.report_nested_queries TO TRUE; SET yagpcc.enable_utility TO FALSE; @@ -132,3 +133,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/expected/yagp_utf8_trim.out b/expected/yagp_utf8_trim.out index 194ee6b3609..9de126dd882 100644 --- a/expected/yagp_utf8_trim.out +++ b/expected/yagp_utf8_trim.out @@ -7,6 +7,7 @@ RETURNS TEXT AS $$ ORDER BY datetime DESC LIMIT 1 $$ LANGUAGE sql VOLATILE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; -- Test 1: 1 byte chars SET yagpcc.max_text_size to 19; @@ -63,4 +64,5 @@ DROP FUNCTION get_marked_query(TEXT); RESET yagpcc.max_text_size; RESET yagpcc.logging_mode; RESET yagpcc.enable; +RESET yagpcc.ignored_users_list; DROP EXTENSION yagp_hooks_collector; diff --git a/expected/yagp_utility.out b/expected/yagp_utility.out index 057f7d7a556..0a77859d8d4 100644 --- a/expected/yagp_utility.out +++ b/expected/yagp_utility.out @@ -12,6 +12,7 @@ BEGIN END; END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.enable_utility TO TRUE; SET yagpcc.report_nested_queries TO TRUE; @@ -26,7 +27,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+----------------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_SUBMIT -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_DONE -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_SUBMIT @@ -83,7 +84,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+------------------------------------------------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_SUBMIT -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_DONE -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_SUBMIT @@ -113,26 +114,26 @@ BEGIN; ROLLBACK; RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | query_text | query_status --------+----------------------------+--------------------- - -1 | | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE - -1 | COMMIT; | QUERY_STATUS_SUBMIT - -1 | COMMIT; | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT - -1 | ABORT; | QUERY_STATUS_SUBMIT - -1 | ABORT; | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | ROLLBACK; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + segid | query_text | query_status +-------+-----------------------------------+--------------------- + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | ROLLBACK; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT (18 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -153,7 +154,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+----------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_SUBMIT -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_DONE -1 | DROP TABLE dml_test; | QUERY_STATUS_SUBMIT @@ -176,16 +177,16 @@ COPY (SELECT 1) TO STDOUT; DROP TABLE copy_test; RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | query_text | query_status --------+---------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE - -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_SUBMIT - -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_DONE - -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_SUBMIT - -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_DONE - -1 | DROP TABLE copy_test; | QUERY_STATUS_SUBMIT - -1 | DROP TABLE copy_test; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + segid | query_text | query_status +-------+-----------------------------------+--------------------- + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_DONE + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_SUBMIT + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_DONE + -1 | DROP TABLE copy_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE copy_test; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT (8 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -203,7 +204,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_SUBMIT -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_DONE -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_SUBMIT @@ -226,7 +227,7 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+--------------------------------------------+--------------------- - -1 | | QUERY_STATUS_DONE + -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_SUBMIT -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_DONE -1 | RESET yagpcc.report_nested_queries; | QUERY_STATUS_SUBMIT @@ -244,3 +245,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/Makefile b/gpcontrib/Makefile index 8d95a14f876..8b98dc9142c 100644 --- a/gpcontrib/Makefile +++ b/gpcontrib/Makefile @@ -35,6 +35,9 @@ else diskquota endif +ifeq "$(with_yagp_hooks_collector)" "yes" + recurse_targets += yagp_hooks_collector +endif ifeq "$(with_zstd)" "yes" recurse_targets += zstd endif diff --git a/gpcontrib/yagp_hooks_collector/Makefile b/gpcontrib/yagp_hooks_collector/Makefile new file mode 100644 index 00000000000..be46eb7149c --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/Makefile @@ -0,0 +1,41 @@ +MODULE_big = yagp_hooks_collector +EXTENSION = yagp_hooks_collector +DATA = $(wildcard *--*.sql) +REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility + +PROTO_BASES = yagpcc_plan yagpcc_metrics yagpcc_set_service +PROTO_OBJS = $(patsubst %,src/protos/%.pb.o,$(PROTO_BASES)) + +C_OBJS = $(patsubst %.c,%.o,$(wildcard src/*.c src/*/*.c)) +CPP_OBJS = $(patsubst %.cpp,%.o,$(wildcard src/*.cpp src/*/*.cpp)) +OBJS = $(C_OBJS) $(CPP_OBJS) $(PROTO_OBJS) + +override CXXFLAGS = -fPIC -g3 -Wall -Wpointer-arith -Wendif-labels \ + -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv \ + -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation \ + -Wno-stringop-truncation -g -ggdb -std=c++17 -Iinclude -Isrc/protos -Isrc -DGPBUILD + +PG_CXXFLAGS += -Isrc -Iinclude +SHLIB_LINK += -lprotobuf -lpthread -lstdc++ +EXTRA_CLEAN = src/protos + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = gpcontrib/yagp_hooks_collector +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif + +src/protos/%.pb.cpp src/protos/%.pb.h: protos/%.proto + @mkdir -p src/protos + sed -i 's/optional //g' $^ + sed -i 's|cloud/mdb/yagpcc/api/proto/common/|protos/|g' $^ + protoc -I /usr/include -I /usr/local/include -I . --cpp_out=src $^ + mv src/protos/$*.pb.cc src/protos/$*.pb.cpp + +$(CPP_OBJS): src/protos/yagpcc_metrics.pb.h src/protos/yagpcc_plan.pb.h src/protos/yagpcc_set_service.pb.h +src/protos/yagpcc_set_service.pb.o: src/protos/yagpcc_metrics.pb.h diff --git a/sql/yagp_cursors.sql b/sql/yagp_cursors.sql index 5d5bde58110..f56351e0d43 100644 --- a/sql/yagp_cursors.sql +++ b/sql/yagp_cursors.sql @@ -14,6 +14,7 @@ BEGIN END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.enable_utility TO TRUE; SET yagpcc.report_nested_queries TO TRUE; @@ -81,3 +82,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/sql/yagp_dist.sql b/sql/yagp_dist.sql index b837ef05335..d5519d0cd96 100644 --- a/sql/yagp_dist.sql +++ b/sql/yagp_dist.sql @@ -14,6 +14,7 @@ BEGIN END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.report_nested_queries TO TRUE; SET yagpcc.enable_utility TO FALSE; @@ -84,3 +85,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/sql/yagp_select.sql b/sql/yagp_select.sql index 4038c6b7b63..90e972ae4c1 100644 --- a/sql/yagp_select.sql +++ b/sql/yagp_select.sql @@ -14,6 +14,7 @@ BEGIN END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.report_nested_queries TO TRUE; SET yagpcc.enable_utility TO FALSE; @@ -65,3 +66,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/sql/yagp_utf8_trim.sql b/sql/yagp_utf8_trim.sql index c0fdcce24a5..c3053e4af0c 100644 --- a/sql/yagp_utf8_trim.sql +++ b/sql/yagp_utf8_trim.sql @@ -9,6 +9,7 @@ RETURNS TEXT AS $$ LIMIT 1 $$ LANGUAGE sql VOLATILE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; -- Test 1: 1 byte chars @@ -39,5 +40,6 @@ DROP FUNCTION get_marked_query(TEXT); RESET yagpcc.max_text_size; RESET yagpcc.logging_mode; RESET yagpcc.enable; +RESET yagpcc.ignored_users_list; DROP EXTENSION yagp_hooks_collector; diff --git a/sql/yagp_utility.sql b/sql/yagp_utility.sql index b4cca6f5421..cf9c1d253d0 100644 --- a/sql/yagp_utility.sql +++ b/sql/yagp_utility.sql @@ -14,6 +14,7 @@ BEGIN END; $$ LANGUAGE plpgsql IMMUTABLE; +SET yagpcc.ignored_users_list TO ''; SET yagpcc.enable TO TRUE; SET yagpcc.enable_utility TO TRUE; SET yagpcc.report_nested_queries TO TRUE; @@ -131,3 +132,4 @@ DROP EXTENSION yagp_hooks_collector; RESET yagpcc.enable; RESET yagpcc.report_nested_queries; RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 062ec75b039..edc49b72e05 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -271,6 +271,7 @@ with_zstd = @with_zstd@ ZSTD_CFLAGS = @ZSTD_CFLAGS@ ZSTD_LIBS = @ZSTD_LIBS@ EVENT_LIBS = @EVENT_LIBS@ +with_yagp_hooks_collector = @with_yagp_hooks_collector@ ########################################################################## # From 4c6b114eb84940a70a09684b15b7f4598e8b3576 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Mon, 19 Jan 2026 17:05:08 +0300 Subject: [PATCH 37/49] [yagp_hooks_collector] Add consistent GUC filtering and submit/done hook callsites Cache GUC values at SUBMIT so filtering criteria remain consistent across the full query lifecycle. Add query_info_collect_hook calls in ExecCreateTableAs, refresh_matview_datafill, and PortalCleanup. Correct tokens from gram.y. --- expected/yagp_cursors.out | 8 +- expected/yagp_guc_cache.out | 57 ++++++++++++ expected/yagp_utility.out | 72 +++++++-------- gpcontrib/yagp_hooks_collector/Makefile | 2 +- sql/yagp_guc_cache.sql | 43 +++++++++ src/Config.cpp | 90 +++++++++---------- src/Config.h | 49 +++++++--- src/EventSender.cpp | 68 ++++++++------ src/EventSender.h | 10 ++- src/PgUtils.cpp | 14 --- src/PgUtils.h | 3 - src/ProtoUtils.cpp | 28 +++--- src/ProtoUtils.h | 13 ++- src/UDSConnector.cpp | 5 +- src/UDSConnector.h | 6 +- src/backend/commands/createas.c | 8 +- src/backend/commands/matview.c | 5 ++ src/backend/commands/portalcmds.c | 5 ++ src/hook_wrappers.cpp | 2 +- src/log/LogOps.cpp | 6 +- .../pg_stat_statements_ya_parser.c | 14 +-- 21 files changed, 325 insertions(+), 183 deletions(-) create mode 100644 expected/yagp_guc_cache.out create mode 100644 sql/yagp_guc_cache.sql diff --git a/expected/yagp_cursors.out b/expected/yagp_cursors.out index 46e124df5e8..df12e3e1b66 100644 --- a/expected/yagp_cursors.out +++ b/expected/yagp_cursors.out @@ -26,7 +26,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+---------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_SUBMIT @@ -36,6 +35,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | COMMIT; | QUERY_STATUS_SUBMIT -1 | COMMIT; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (10 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -55,7 +55,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_SUBMIT @@ -69,6 +68,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | COMMIT; | QUERY_STATUS_SUBMIT -1 | COMMIT; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (14 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -87,7 +87,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+---------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT @@ -99,6 +98,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | ROLLBACK; | QUERY_STATUS_SUBMIT -1 | ROLLBACK; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (12 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -130,7 +130,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | BEGIN; | QUERY_STATUS_SUBMIT -1 | BEGIN; | QUERY_STATUS_DONE -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT @@ -148,6 +147,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | COMMIT; | QUERY_STATUS_SUBMIT -1 | COMMIT; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (18 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; diff --git a/expected/yagp_guc_cache.out b/expected/yagp_guc_cache.out new file mode 100644 index 00000000000..3085cfa42e1 --- /dev/null +++ b/expected/yagp_guc_cache.out @@ -0,0 +1,57 @@ +-- +-- Test GUC caching for query lifecycle consistency. +-- +-- The extension logs SUBMIT and DONE events for each query. +-- GUC values that control logging (enable_utility, ignored_users_list, ...) +-- must be cached at SUBMIT time to ensure DONE uses the same filtering +-- criteria. Otherwise, a SET command that modifies these GUCs would +-- have its DONE event rejected, creating orphaned SUBMIT entries. +-- This is due to query being actually executed between SUBMIT and DONE. +-- start_ignore +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +SELECT yagpcc.truncate_log(); +-- end_ignore +CREATE OR REPLACE FUNCTION print_last_query(query text) +RETURNS TABLE(query_status text) AS $$ + SELECT query_status + FROM yagpcc.log + WHERE segid = -1 AND query_text = query + ORDER BY ccnt DESC +$$ LANGUAGE sql; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.logging_mode TO 'TBL'; +-- SET below disables utility logging and DONE must still be logged. +SET yagpcc.enable_utility TO FALSE; +SELECT * FROM print_last_query('SET yagpcc.enable_utility TO FALSE;'); + query_status +--------------------- + QUERY_STATUS_SUBMIT + QUERY_STATUS_DONE +(2 rows) + +-- SELECT below adds current user to ignore list and DONE must still be logged. +-- start_ignore +SELECT set_config('yagpcc.ignored_users_list', current_user, false); + set_config +------------ + gpadmin +(1 row) + +-- end_ignore +SELECT * FROM print_last_query('SELECT set_config(''yagpcc.ignored_users_list'', current_user, false);'); + query_status +--------------------- + QUERY_STATUS_SUBMIT + QUERY_STATUS_START + QUERY_STATUS_END + QUERY_STATUS_DONE +(4 rows) + +DROP FUNCTION print_last_query(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; +RESET yagpcc.logging_mode; diff --git a/expected/yagp_utility.out b/expected/yagp_utility.out index 0a77859d8d4..7df1d2816eb 100644 --- a/expected/yagp_utility.out +++ b/expected/yagp_utility.out @@ -27,7 +27,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+----------------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_SUBMIT -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_DONE -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_SUBMIT @@ -37,6 +36,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | DROP TABLE test_table; | QUERY_STATUS_SUBMIT -1 | DROP TABLE test_table; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (10 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -55,7 +55,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT | DISTRIBUTED BY (a) +| | PARTITION BY RANGE (a) +| @@ -67,6 +66,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | DROP TABLE pt_test; | QUERY_STATUS_SUBMIT -1 | DROP TABLE pt_test; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (6 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -84,7 +84,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+------------------------------------------------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_SUBMIT -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_DONE -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_SUBMIT @@ -94,6 +93,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_SUBMIT -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (10 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -114,26 +114,26 @@ BEGIN; ROLLBACK; RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | query_text | query_status --------+-----------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE - -1 | COMMIT; | QUERY_STATUS_SUBMIT - -1 | COMMIT; | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT - -1 | ABORT; | QUERY_STATUS_SUBMIT - -1 | ABORT; | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | ROLLBACK; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + segid | query_text | query_status +-------+----------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | ROLLBACK; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (18 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -154,12 +154,12 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+----------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_SUBMIT -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_DONE -1 | DROP TABLE dml_test; | QUERY_STATUS_SUBMIT -1 | DROP TABLE dml_test; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (6 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -177,16 +177,16 @@ COPY (SELECT 1) TO STDOUT; DROP TABLE copy_test; RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | query_text | query_status --------+-----------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE - -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_SUBMIT - -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_DONE - -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_SUBMIT - -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_DONE - -1 | DROP TABLE copy_test; | QUERY_STATUS_SUBMIT - -1 | DROP TABLE copy_test; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + segid | query_text | query_status +-------+---------------------------------+--------------------- + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_DONE + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_SUBMIT + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_DONE + -1 | DROP TABLE copy_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE copy_test; | QUERY_STATUS_DONE + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (8 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -204,7 +204,6 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_SUBMIT -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_DONE -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_SUBMIT @@ -212,6 +211,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | DEALLOCATE test_prep; | QUERY_STATUS_SUBMIT -1 | DEALLOCATE test_prep; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (8 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; @@ -227,12 +227,12 @@ RESET yagpcc.logging_mode; SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; segid | query_text | query_status -------+--------------------------------------------+--------------------- - -1 | SET yagpcc.logging_mode to 'TBL'; | QUERY_STATUS_DONE -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_SUBMIT -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_DONE -1 | RESET yagpcc.report_nested_queries; | QUERY_STATUS_SUBMIT -1 | RESET yagpcc.report_nested_queries; | QUERY_STATUS_DONE -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE (6 rows) SELECT yagpcc.truncate_log() IS NOT NULL AS t; diff --git a/gpcontrib/yagp_hooks_collector/Makefile b/gpcontrib/yagp_hooks_collector/Makefile index be46eb7149c..79f5401c8d1 100644 --- a/gpcontrib/yagp_hooks_collector/Makefile +++ b/gpcontrib/yagp_hooks_collector/Makefile @@ -1,7 +1,7 @@ MODULE_big = yagp_hooks_collector EXTENSION = yagp_hooks_collector DATA = $(wildcard *--*.sql) -REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility +REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility yagp_guc_cache PROTO_BASES = yagpcc_plan yagpcc_metrics yagpcc_set_service PROTO_OBJS = $(patsubst %,src/protos/%.pb.o,$(PROTO_BASES)) diff --git a/sql/yagp_guc_cache.sql b/sql/yagp_guc_cache.sql new file mode 100644 index 00000000000..9e6de69d61e --- /dev/null +++ b/sql/yagp_guc_cache.sql @@ -0,0 +1,43 @@ +-- +-- Test GUC caching for query lifecycle consistency. +-- +-- The extension logs SUBMIT and DONE events for each query. +-- GUC values that control logging (enable_utility, ignored_users_list, ...) +-- must be cached at SUBMIT time to ensure DONE uses the same filtering +-- criteria. Otherwise, a SET command that modifies these GUCs would +-- have its DONE event rejected, creating orphaned SUBMIT entries. +-- This is due to query being actually executed between SUBMIT and DONE. +-- start_ignore +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +SELECT yagpcc.truncate_log(); +-- end_ignore + +CREATE OR REPLACE FUNCTION print_last_query(query text) +RETURNS TABLE(query_status text) AS $$ + SELECT query_status + FROM yagpcc.log + WHERE segid = -1 AND query_text = query + ORDER BY ccnt DESC +$$ LANGUAGE sql; + +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.logging_mode TO 'TBL'; + +-- SET below disables utility logging and DONE must still be logged. +SET yagpcc.enable_utility TO FALSE; +SELECT * FROM print_last_query('SET yagpcc.enable_utility TO FALSE;'); + +-- SELECT below adds current user to ignore list and DONE must still be logged. +-- start_ignore +SELECT set_config('yagpcc.ignored_users_list', current_user, false); +-- end_ignore +SELECT * FROM print_last_query('SELECT set_config(''yagpcc.ignored_users_list'', current_user, false);'); + +DROP FUNCTION print_last_query(text); +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.enable; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; +RESET yagpcc.logging_mode; diff --git a/src/Config.cpp b/src/Config.cpp index dbd7e25b483..4fb58677018 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -27,45 +27,13 @@ static const struct config_enum_entry logging_mode_options[] = { {"tbl", LOG_MODE_TBL, false}, {NULL, 0, false}}; -static std::unique_ptr> ignored_users_set = - nullptr; static bool ignored_users_guc_dirty = false; -static void update_ignored_users(const char *new_guc_ignored_users) { - auto new_ignored_users_set = - std::make_unique>(); - if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { - /* Need a modifiable copy of string */ - char *rawstring = ya_gpdb::pstrdup(new_guc_ignored_users); - List *elemlist; - ListCell *l; - - /* Parse string into list of identifiers */ - if (!ya_gpdb::split_identifier_string(rawstring, ',', &elemlist)) { - /* syntax error in list */ - ya_gpdb::pfree(rawstring); - ya_gpdb::list_free(elemlist); - ereport( - LOG, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg( - "invalid list syntax in parameter yagpcc.ignored_users_list"))); - return; - } - foreach (l, elemlist) { - new_ignored_users_set->insert((char *)lfirst(l)); - } - ya_gpdb::pfree(rawstring); - ya_gpdb::list_free(elemlist); - } - ignored_users_set = std::move(new_ignored_users_set); -} - static void assign_ignored_users_hook(const char *, void *) { ignored_users_guc_dirty = true; } -void Config::init() { +void Config::init_gucs() { DefineCustomStringVariable( "yagpcc.uds_path", "Sets filesystem path of the agent socket", 0LL, &guc_uds_path, "/tmp/yagpcc_agent.sock", PGC_SUSET, @@ -128,22 +96,40 @@ void Config::init() { GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); } -std::string Config::uds_path() { return guc_uds_path; } -bool Config::enable_analyze() { return guc_enable_analyze; } -bool Config::enable_cdbstats() { return guc_enable_cdbstats; } -bool Config::enable_collector() { return guc_enable_collector; } -bool Config::enable_utility() { return guc_enable_utility; } -bool Config::report_nested_queries() { return guc_report_nested_queries; } -size_t Config::max_text_size() { return guc_max_text_size; } -size_t Config::max_plan_size() { return guc_max_plan_size * 1024; } -int Config::min_analyze_time() { return guc_min_analyze_time; }; -int Config::logging_mode() { return guc_logging_mode; } - -bool Config::filter_user(std::string username) { - if (!ignored_users_set) { +void Config::update_ignored_users(const char *new_guc_ignored_users) { + auto new_ignored_users_set = std::make_unique(); + if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { + /* Need a modifiable copy of string */ + char *rawstring = ya_gpdb::pstrdup(new_guc_ignored_users); + List *elemlist; + ListCell *l; + + /* Parse string into list of identifiers */ + if (!ya_gpdb::split_identifier_string(rawstring, ',', &elemlist)) { + /* syntax error in list */ + ya_gpdb::pfree(rawstring); + ya_gpdb::list_free(elemlist); + ereport( + LOG, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg( + "invalid list syntax in parameter yagpcc.ignored_users_list"))); + return; + } + foreach (l, elemlist) { + new_ignored_users_set->insert((char *)lfirst(l)); + } + ya_gpdb::pfree(rawstring); + ya_gpdb::list_free(elemlist); + } + ignored_users_ = std::move(new_ignored_users_set); +} + +bool Config::filter_user(const std::string &username) const { + if (!ignored_users_) { return true; } - return ignored_users_set->find(username) != ignored_users_set->end(); + return ignored_users_->find(username) != ignored_users_->end(); } void Config::sync() { @@ -151,4 +137,14 @@ void Config::sync() { update_ignored_users(guc_ignored_users); ignored_users_guc_dirty = false; } + uds_path_ = guc_uds_path; + enable_analyze_ = guc_enable_analyze; + enable_cdbstats_ = guc_enable_cdbstats; + enable_collector_ = guc_enable_collector; + enable_utility_ = guc_enable_utility; + report_nested_queries_ = guc_report_nested_queries; + max_text_size_ = static_cast(guc_max_text_size); + max_plan_size_ = static_cast(guc_max_plan_size); + min_analyze_time_ = guc_min_analyze_time; + logging_mode_ = guc_logging_mode; } diff --git a/src/Config.h b/src/Config.h index 7501c727a44..b4a393b0383 100644 --- a/src/Config.h +++ b/src/Config.h @@ -1,23 +1,44 @@ #pragma once +#include #include +#include #define LOG_MODE_UDS 0 #define LOG_MODE_TBL 1 +using IgnoredUsers = std::unordered_set; + class Config { public: - static void init(); - static std::string uds_path(); - static bool enable_analyze(); - static bool enable_cdbstats(); - static bool enable_collector(); - static bool enable_utility(); - static bool filter_user(std::string username); - static bool report_nested_queries(); - static size_t max_text_size(); - static size_t max_plan_size(); - static int min_analyze_time(); - static int logging_mode(); - static void sync(); -}; \ No newline at end of file + static void init_gucs(); + + void sync(); + + const std::string &uds_path() const { return uds_path_; } + bool enable_analyze() const { return enable_analyze_; } + bool enable_cdbstats() const { return enable_cdbstats_; } + bool enable_collector() const { return enable_collector_; } + bool enable_utility() const { return enable_utility_; } + bool report_nested_queries() const { return report_nested_queries_; } + size_t max_text_size() const { return max_text_size_; } + size_t max_plan_size() const { return max_plan_size_ * 1024; } + int min_analyze_time() const { return min_analyze_time_; } + int logging_mode() const { return logging_mode_; } + bool filter_user(const std::string &username) const; + +private: + void update_ignored_users(const char *new_guc_ignored_users); + + std::unique_ptr ignored_users_; + std::string uds_path_; + bool enable_analyze_; + bool enable_cdbstats_; + bool enable_collector_; + bool enable_utility_; + bool report_nested_queries_; + size_t max_text_size_; + size_t max_plan_size_; + int min_analyze_time_; + int logging_mode_; +}; diff --git a/src/EventSender.cpp b/src/EventSender.cpp index d638d275548..853a0c43fb9 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,4 +1,3 @@ -#include "Config.h" #include "UDSConnector.h" #include "memory/gpdbwrappers.h" #include "log/LogOps.h" @@ -22,10 +21,8 @@ extern "C" { #include "ProtoUtils.h" #define need_collect_analyze() \ - (Gp_role == GP_ROLE_DISPATCH && Config::min_analyze_time() >= 0 && \ - Config::enable_analyze()) - -static bool enable_utility = Config::enable_utility(); + (Gp_role == GP_ROLE_DISPATCH && config.min_analyze_time() >= 0 && \ + config.enable_analyze()) bool EventSender::verify_query(QueryDesc *query_desc, QueryState state, bool utility) { @@ -38,16 +35,16 @@ bool EventSender::verify_query(QueryDesc *query_desc, QueryState state, switch (state) { case QueryState::SUBMIT: - // Cache enable_utility at SUBMIT to ensure consistent behavior at DONE. - // Without caching, a query that sets enable_utility to false from true - // would be accepted at SUBMIT (guc is true) but rejected at DONE (guc - // is false), causing a leak. - enable_utility = Config::enable_utility(); - if (utility && enable_utility == false) { + // Cache GUCs once at SUBMIT. Synced GUCs are visible to all subsequent + // states. Without caching, a query that unsets/sets filtering GUCs would + // see different filter criteria at DONE, because at SUBMIT the query was + // not executed yet, causing DONE to be skipped/added. + config.sync(); + + if (utility && !config.enable_utility()) { return false; } - // Sync config in case current query changes it. - Config::sync(); + // Register qkey for a nested query we won't report, // so we can detect nesting_level > 0 and skip reporting at end/done. if (!need_report_nested_query() && nesting_level > 0) { @@ -65,7 +62,7 @@ bool EventSender::verify_query(QueryDesc *query_desc, QueryState state, } break; case QueryState::DONE: - if (utility && enable_utility == false) { + if (utility && !config.enable_utility()) { return false; } default: @@ -85,9 +82,9 @@ bool EventSender::verify_query(QueryDesc *query_desc, QueryState state, bool EventSender::log_query_req(const yagpcc::SetQueryReq &req, const std::string &event, bool utility) { bool clear_big_fields = false; - switch (Config::logging_mode()) { + switch (config.logging_mode()) { case LOG_MODE_UDS: - clear_big_fields = UDSConnector::report_query(req, event); + clear_big_fields = UDSConnector::report_query(req, event, config); break; case LOG_MODE_TBL: ya_gpdb::insert_log(req, utility); @@ -135,12 +132,12 @@ void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { return; } - if (Gp_role == GP_ROLE_DISPATCH && Config::enable_analyze() && + if (Gp_role == GP_ROLE_DISPATCH && config.enable_analyze() && (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0) { query_desc->instrument_options |= INSTRUMENT_BUFFERS; query_desc->instrument_options |= INSTRUMENT_ROWS; query_desc->instrument_options |= INSTRUMENT_TIMER; - if (Config::enable_cdbstats()) { + if (config.enable_cdbstats()) { query_desc->instrument_options |= INSTRUMENT_CDB; if (!query_desc->showstatctx) { instr_time starttime; @@ -161,7 +158,7 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { auto query_msg = query.message.get(); *query_msg->mutable_start_time() = current_ts(); update_query_state(query, QueryState::START, false /* utility */); - set_query_plan(query_msg, query_desc); + set_query_plan(query_msg, query_desc, config); if (need_collect_analyze()) { // Set up to track total elapsed time during query run. // Make sure the space is allocated in the per-query @@ -214,7 +211,7 @@ void EventSender::collect_query_submit(QueryDesc *query_desc, bool utility) { set_query_info(query_msg); set_qi_nesting_level(query_msg, nesting_level); set_qi_slice_id(query_msg); - set_query_text(query_msg, query_desc); + set_query_text(query_msg, query_desc, config); if (log_query_req(*query_msg, "submit", utility)) { clear_big_fields(query_msg); } @@ -271,8 +268,8 @@ void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, ereport(DEBUG3, (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); } else { - set_qi_error_message(query_msg, - error_flushed ? edata->message : elog_message()); + set_qi_error_message( + query_msg, error_flushed ? edata->message : elog_message(), config); } } if (prev_state == START) { @@ -331,8 +328,8 @@ void EventSender::ic_metrics_collect() { if (Gp_interconnect_type != INTERCONNECT_TYPE_UDPIFC) { return; } - if (!proto_verified || gp_command_count == 0 || !Config::enable_collector() || - Config::filter_user(get_user_name())) { + if (!proto_verified || gp_command_count == 0 || !config.enable_collector() || + config.filter_user(get_user_name())) { return; } // we also would like to know nesting level here and filter queries BUT we @@ -374,15 +371,18 @@ void EventSender::analyze_stats_collect(QueryDesc *query_desc) { ya_gpdb::instr_end_loop(query_desc->totaltime); double ms = query_desc->totaltime->total * 1000.0; - if (ms >= Config::min_analyze_time()) { + if (ms >= config.min_analyze_time()) { auto &query = get_query(query_desc); auto *query_msg = query.message.get(); - set_analyze_plan_text(query_desc, query_msg); + set_analyze_plan_text(query_desc, query_msg, config); } } EventSender::EventSender() { - if (Config::enable_collector()) { + // Perform initial sync to get default GUC values + config.sync(); + + if (config.enable_collector()) { try { GOOGLE_PROTOBUF_VERIFY_VERSION; proto_verified = true; @@ -486,5 +486,19 @@ bool EventSender::qdesc_submitted(QueryDesc *query_desc) { return queries.find(QueryKey::from_qdesc(query_desc)) != queries.end(); } +bool EventSender::nesting_is_valid(QueryDesc *query_desc, int nesting_level) { + return need_report_nested_query() || + is_top_level_query(query_desc, nesting_level); +} + +bool EventSender::need_report_nested_query() { + return config.report_nested_queries() && Gp_role == GP_ROLE_DISPATCH; +} + +bool EventSender::filter_query(QueryDesc *query_desc) { + return gp_command_count == 0 || query_desc->sourceText == nullptr || + !config.enable_collector() || config.filter_user(get_user_name()); +} + EventSender::QueryItem::QueryItem(QueryState st) : message(std::make_unique()), state(st) {} diff --git a/src/EventSender.h b/src/EventSender.h index 6e195eeacdf..e9acb04422b 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -14,6 +14,7 @@ extern "C" { #undef typeid #include "memory/gpdbwrappers.h" +#include "Config.h" class UDSConnector; struct QueryDesc; @@ -108,8 +109,8 @@ class EventSender { explicit QueryItem(QueryState st); }; - static bool log_query_req(const yagpcc::SetQueryReq &req, - const std::string &event, bool utility); + bool log_query_req(const yagpcc::SetQueryReq &req, const std::string &event, + bool utility); bool verify_query(QueryDesc *query_desc, QueryState state, bool utility); void update_query_state(QueryItem &query, QueryState new_state, bool utility, bool success = true); @@ -123,6 +124,9 @@ class EventSender { QueryMetricsStatus status, ErrorData *edata = NULL); void update_nested_counters(QueryDesc *query_desc); bool qdesc_submitted(QueryDesc *query_desc); + bool nesting_is_valid(QueryDesc *query_desc, int nesting_level); + bool need_report_nested_query(); + bool filter_query(QueryDesc *query_desc); bool proto_verified = false; int nesting_level = 0; @@ -132,4 +136,6 @@ class EventSender { ICStatistics ic_statistics; #endif std::unordered_map queries; + + Config config; }; \ No newline at end of file diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index 96f46429643..7e53abdabbf 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -65,17 +65,3 @@ bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { } return query_desc->yagp_query_key->nesting_level == 0; } - -bool nesting_is_valid(QueryDesc *query_desc, int nesting_level) { - return need_report_nested_query() || - is_top_level_query(query_desc, nesting_level); -} - -bool need_report_nested_query() { - return Config::report_nested_queries() && Gp_role == GP_ROLE_DISPATCH; -} - -bool filter_query(QueryDesc *query_desc) { - return gp_command_count == 0 || query_desc->sourceText == nullptr || - !Config::enable_collector() || Config::filter_user(get_user_name()); -} diff --git a/src/PgUtils.h b/src/PgUtils.h index 02f084c597a..e9715ce10f4 100644 --- a/src/PgUtils.h +++ b/src/PgUtils.h @@ -9,6 +9,3 @@ std::string get_user_name(); std::string get_db_name(); std::string get_rg_name(); bool is_top_level_query(QueryDesc *query_desc, int nesting_level); -bool nesting_is_valid(QueryDesc *query_desc, int nesting_level); -bool need_report_nested_query(); -bool filter_query(QueryDesc *query_desc); diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index aa8632477f5..8ebbe19e289 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -82,7 +82,8 @@ std::string trim_str_shrink_utf8(const char *str, size_t len, size_t lim) { return std::string(str, cut_pos); } -void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { +void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config) { if (Gp_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { auto qi = req->mutable_query_info(); qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER @@ -93,10 +94,10 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { ExplainState es = ya_gpdb::get_explain_state(query_desc, true); if (es.str) { *qi->mutable_plan_text() = trim_str_shrink_utf8(es.str->data, es.str->len, - Config::max_plan_size()); + config.max_plan_size()); StringInfo norm_plan = ya_gpdb::gen_normplan(es.str->data); *qi->mutable_template_plan_text() = trim_str_shrink_utf8( - norm_plan->data, norm_plan->len, Config::max_plan_size()); + norm_plan->data, norm_plan->len, config.max_plan_size()); qi->set_plan_id( hash_any((unsigned char *)norm_plan->data, norm_plan->len)); qi->set_query_id(query_desc->plannedstmt->queryId); @@ -107,15 +108,16 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { } } -void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc) { +void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config) { if (Gp_role == GP_ROLE_DISPATCH && query_desc->sourceText) { auto qi = req->mutable_query_info(); *qi->mutable_query_text() = trim_str_shrink_utf8( query_desc->sourceText, strlen(query_desc->sourceText), - Config::max_text_size()); + config.max_text_size()); char *norm_query = ya_gpdb::gen_normquery(query_desc->sourceText); *qi->mutable_template_query_text() = trim_str_shrink_utf8( - norm_query, strlen(norm_query), Config::max_text_size()); + norm_query, strlen(norm_query), config.max_text_size()); } } @@ -150,10 +152,11 @@ void set_qi_slice_id(yagpcc::SetQueryReq *req) { aqi->set_slice_id(currentSliceId); } -void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg) { +void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg, + const Config &config) { auto aqi = req->mutable_add_info(); *aqi->mutable_error_message() = - trim_str_shrink_utf8(err_msg, strlen(err_msg), Config::max_text_size()); + trim_str_shrink_utf8(err_msg, strlen(err_msg), config.max_text_size()); } void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, @@ -257,7 +260,8 @@ double protots_to_double(const google::protobuf::Timestamp &ts) { return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; } -void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *req) { +void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *req, + const Config &config) { // Make sure it is a valid txn and it is not an utility // statement for ExplainPrintPlan() later. if (!IsTransactionState() || !query_desc->plannedstmt) { @@ -266,15 +270,15 @@ void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *req) { MemoryContext oldcxt = ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); ExplainState es = ya_gpdb::get_analyze_state( - query_desc, query_desc->instrument_options && Config::enable_analyze()); + query_desc, query_desc->instrument_options && config.enable_analyze()); ya_gpdb::mem_ctx_switch_to(oldcxt); if (es.str) { // Remove last line break. if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { es.str->data[--es.str->len] = '\0'; } - auto trimmed_analyze = trim_str_shrink_utf8(es.str->data, es.str->len, - Config::max_plan_size()); + auto trimmed_analyze = + trim_str_shrink_utf8(es.str->data, es.str->len, config.max_plan_size()); req->mutable_query_info()->set_analyze_text(trimmed_analyze); ya_gpdb::pfree(es.str->data); } diff --git a/src/ProtoUtils.h b/src/ProtoUtils.h index 725a634f765..37b7e4a8a29 100644 --- a/src/ProtoUtils.h +++ b/src/ProtoUtils.h @@ -4,19 +4,24 @@ struct QueryDesc; struct ICStatistics; +class Config; google::protobuf::Timestamp current_ts(); -void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc); -void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc); +void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config); +void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config); void clear_big_fields(yagpcc::SetQueryReq *req); void set_query_info(yagpcc::SetQueryReq *req); void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level); void set_qi_slice_id(yagpcc::SetQueryReq *req); -void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg); +void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg, + const Config &config); void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, int nested_calls, double nested_time); void set_ic_stats(yagpcc::MetricInstrumentation *metrics, const ICStatistics *ic_statistics); yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status); double protots_to_double(const google::protobuf::Timestamp &ts); -void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *message); \ No newline at end of file +void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *message, + const Config &config); diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index a7eaed539f7..74fd57a3ac0 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -25,10 +25,11 @@ static void inline log_tracing_failure(const yagpcc::SetQueryReq &req, } bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, - const std::string &event) { + const std::string &event, + const Config &config) { sockaddr_un address; address.sun_family = AF_UNIX; - std::string uds_path = Config::uds_path(); + const std::string &uds_path = config.uds_path(); if (uds_path.size() >= sizeof(address.sun_path)) { ereport(WARNING, (errmsg("UDS path is too long for socket buffer"))); YagpStat::report_error(); diff --git a/src/UDSConnector.h b/src/UDSConnector.h index f0dfcb77a3f..9483407159d 100644 --- a/src/UDSConnector.h +++ b/src/UDSConnector.h @@ -2,8 +2,10 @@ #include "protos/yagpcc_set_service.pb.h" +class Config; + class UDSConnector { public: bool static report_query(const yagpcc::SetQueryReq &req, - const std::string &event); -}; \ No newline at end of file + const std::string &event, const Config &config); +}; diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 6822032fe0d..a3d2f155fd8 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -478,10 +478,6 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, dest, params, queryEnv, 0); } - /* GPDB hook for collecting query info */ - if (query_info_collect_hook) - (*query_info_collect_hook)(METRICS_QUERY_SUBMIT, queryDesc); - if (into->skipData) { /* @@ -495,6 +491,10 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, } else { + /* GPDB hook for collecting query info */ + if (query_info_collect_hook) + (*query_info_collect_hook)(METRICS_QUERY_SUBMIT, queryDesc); + check_and_unassign_from_resgroup(queryDesc->plannedstmt); queryDesc->plannedstmt->query_mem = ResourceManagerGetQueryMemoryLimit(queryDesc->plannedstmt); diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 1555ea9d334..dc8efd4d892 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -63,6 +63,7 @@ #include "tcop/tcopprot.h" #include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/metrics_utils.h" #include "utils/rel.h" #include "utils/snapmgr.h" #include "utils/syscache.h" @@ -842,6 +843,10 @@ refresh_matview_datafill(DestReceiver *dest, Query *query, GetActiveSnapshot(), InvalidSnapshot, dest, NULL, NULL, 0); + /* GPDB hook for collecting query info */ + if (query_info_collect_hook) + (*query_info_collect_hook)(METRICS_QUERY_SUBMIT, queryDesc); + RestoreOidAssignments(saved_dispatch_oids); /* call ExecutorStart to prepare the plan for execution */ diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c index 4817c14f07d..553830e8599 100644 --- a/src/backend/commands/portalcmds.c +++ b/src/backend/commands/portalcmds.c @@ -35,6 +35,7 @@ #include "tcop/pquery.h" #include "tcop/tcopprot.h" #include "utils/memutils.h" +#include "utils/metrics_utils.h" #include "utils/snapmgr.h" #include "cdb/cdbendpoint.h" @@ -373,6 +374,10 @@ PortalCleanup(Portal portal) FreeQueryDesc(queryDesc); CurrentResourceOwner = saveResourceOwner; + } else { + /* GPDB hook for collecting query info */ + if (queryDesc->yagp_query_key && query_info_collect_hook) + (*query_info_collect_hook)(METRICS_QUERY_ERROR, queryDesc); } } diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 56c1da9f4f6..8cf74641c29 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -71,7 +71,7 @@ R cpp_call(T *obj, R (T::*func)(Args...), Args... args) { } void hooks_init() { - Config::init(); + Config::init_gucs(); YagpStat::init(); previous_ExecutorStart_hook = ExecutorStart_hook; ExecutorStart_hook = ya_ExecutorStart_hook; diff --git a/src/log/LogOps.cpp b/src/log/LogOps.cpp index cec9e33693a..56bdf1dca62 100644 --- a/src/log/LogOps.cpp +++ b/src/log/LogOps.cpp @@ -38,9 +38,9 @@ void init_log() { log_relname.data() /* relname */, namespaceId /* namespace */, 0 /* tablespace */, InvalidOid /* relid */, InvalidOid /* reltype oid */, InvalidOid /* reloftypeid */, GetUserId() /* owner */, HEAP_TABLE_AM_OID, - DescribeTuple() /* rel tuple */, NIL, RELKIND_RELATION, - RELPERSISTENCE_PERMANENT, false, false, ONCOMMIT_NOOP, - NULL /* GP Policy */, (Datum)0, false /* use_user_acl */, true, true, + DescribeTuple() /* rel tuple */, NIL /* cooked_constraints */, RELKIND_RELATION, + RELPERSISTENCE_PERMANENT, false /* shared_relation */, false /* mapped_relation */, ONCOMMIT_NOOP, + NULL /* GP Policy */, (Datum)0 /* reloptions */, false /* use_user_acl */, true /* allow_system_table_mods */, true /* is_internal */, InvalidOid /* relrewrite */, NULL /* typaddress */, false /* valid_opts */); diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/src/stat_statements_parser/pg_stat_statements_ya_parser.c index c19805ce506..54c8b2cf59f 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -17,20 +17,20 @@ #include "pg_stat_statements_ya_parser.h" -#ifndef ICONST -#define ICONST 276 -#endif #ifndef FCONST -#define FCONST 277 +#define FCONST 260 #endif #ifndef SCONST -#define SCONST 278 +#define SCONST 261 #endif #ifndef BCONST -#define BCONST 279 +#define BCONST 263 #endif #ifndef XCONST -#define XCONST 280 +#define XCONST 264 +#endif +#ifndef ICONST +#define ICONST 266 #endif static void fill_in_constant_lengths(JumbleState *jstate, const char *query); From ea705d9d1ec48013b34bb147a3e02c94f640c84f Mon Sep 17 00:00:00 2001 From: NJrslv Date: Tue, 20 Jan 2026 17:03:53 +0300 Subject: [PATCH 38/49] [yagp_hooks_collector] Add UDS round-trip test and fix send() accounting Add regression test for UDS transport. Fix send() return value: do not add -1 to total_bytes_sent on error. General refactoring. --- expected/yagp_uds.out | 42 +++++++++ gpcontrib/yagp_hooks_collector/Makefile | 2 +- sql/yagp_uds.sql | 31 +++++++ src/Config.cpp | 10 +- src/Config.h | 8 +- src/UDSConnector.cpp | 117 +++++++++++++----------- src/hook_wrappers.cpp | 96 ++++++++++++++++++- src/hook_wrappers.h | 4 + src/yagp_hooks_collector.c | 64 ++++++++++++- yagp_hooks_collector--1.1.sql | 15 +++ 10 files changed, 318 insertions(+), 71 deletions(-) create mode 100644 expected/yagp_uds.out create mode 100644 sql/yagp_uds.sql diff --git a/expected/yagp_uds.out b/expected/yagp_uds.out new file mode 100644 index 00000000000..d04929ffb4a --- /dev/null +++ b/expected/yagp_uds.out @@ -0,0 +1,42 @@ +-- Test UDS socket +-- start_ignore +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +-- end_ignore +\set UDS_PATH '/tmp/yagpcc_test.sock' +-- Configure extension to send via UDS +SET yagpcc.uds_path TO :'UDS_PATH'; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.logging_mode TO 'UDS'; +-- Start receiver +SELECT yagpcc.__test_uds_start_server(:'UDS_PATH'); + __test_uds_start_server +------------------------- +(0 rows) + +-- Send +SELECT 1; + ?column? +---------- + 1 +(1 row) + +-- Receive +SELECT yagpcc.__test_uds_receive() > 0 as received; + received +---------- + t +(1 row) + +-- Stop receiver +SELECT yagpcc.__test_uds_stop_server(); + __test_uds_stop_server +------------------------ +(0 rows) + +-- Cleanup +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.uds_path; +RESET yagpcc.ignored_users_list; +RESET yagpcc.enable; +RESET yagpcc.logging_mode; diff --git a/gpcontrib/yagp_hooks_collector/Makefile b/gpcontrib/yagp_hooks_collector/Makefile index 79f5401c8d1..eb6541b7687 100644 --- a/gpcontrib/yagp_hooks_collector/Makefile +++ b/gpcontrib/yagp_hooks_collector/Makefile @@ -1,7 +1,7 @@ MODULE_big = yagp_hooks_collector EXTENSION = yagp_hooks_collector DATA = $(wildcard *--*.sql) -REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility yagp_guc_cache +REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility yagp_guc_cache yagp_uds PROTO_BASES = yagpcc_plan yagpcc_metrics yagpcc_set_service PROTO_OBJS = $(patsubst %,src/protos/%.pb.o,$(PROTO_BASES)) diff --git a/sql/yagp_uds.sql b/sql/yagp_uds.sql new file mode 100644 index 00000000000..3eef697a4e7 --- /dev/null +++ b/sql/yagp_uds.sql @@ -0,0 +1,31 @@ +-- Test UDS socket +-- start_ignore +CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +-- end_ignore + +\set UDS_PATH '/tmp/yagpcc_test.sock' + +-- Configure extension to send via UDS +SET yagpcc.uds_path TO :'UDS_PATH'; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable TO TRUE; +SET yagpcc.logging_mode TO 'UDS'; + +-- Start receiver +SELECT yagpcc.__test_uds_start_server(:'UDS_PATH'); + +-- Send +SELECT 1; + +-- Receive +SELECT yagpcc.__test_uds_receive() > 0 as received; + +-- Stop receiver +SELECT yagpcc.__test_uds_stop_server(); + +-- Cleanup +DROP EXTENSION yagp_hooks_collector; +RESET yagpcc.uds_path; +RESET yagpcc.ignored_users_list; +RESET yagpcc.enable; +RESET yagpcc.logging_mode; diff --git a/src/Config.cpp b/src/Config.cpp index 4fb58677018..2c2032ebb03 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -16,9 +16,9 @@ static bool guc_enable_cdbstats = true; static bool guc_enable_collector = true; static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; -static int guc_max_text_size = 1 << 20; // in bytes (1MB) -static int guc_max_plan_size = 1024; // in KB -static int guc_min_analyze_time = 10000; // in ms +static int guc_max_text_size = 1 << 20; // in bytes (1MB) +static int guc_max_plan_size = 1024; // in KB +static int guc_min_analyze_time = 10000; // in ms static int guc_logging_mode = LOG_MODE_UDS; static bool guc_enable_utility = false; @@ -143,8 +143,8 @@ void Config::sync() { enable_collector_ = guc_enable_collector; enable_utility_ = guc_enable_utility; report_nested_queries_ = guc_report_nested_queries; - max_text_size_ = static_cast(guc_max_text_size); - max_plan_size_ = static_cast(guc_max_plan_size); + max_text_size_ = guc_max_text_size; + max_plan_size_ = guc_max_plan_size; min_analyze_time_ = guc_min_analyze_time; logging_mode_ = guc_logging_mode; } diff --git a/src/Config.h b/src/Config.h index b4a393b0383..aa6b5bdc0ba 100644 --- a/src/Config.h +++ b/src/Config.h @@ -21,8 +21,8 @@ class Config { bool enable_collector() const { return enable_collector_; } bool enable_utility() const { return enable_utility_; } bool report_nested_queries() const { return report_nested_queries_; } - size_t max_text_size() const { return max_text_size_; } - size_t max_plan_size() const { return max_plan_size_ * 1024; } + int max_text_size() const { return max_text_size_; } + int max_plan_size() const { return max_plan_size_ * 1024; } int min_analyze_time() const { return min_analyze_time_; } int logging_mode() const { return logging_mode_; } bool filter_user(const std::string &username) const; @@ -37,8 +37,8 @@ class Config { bool enable_collector_; bool enable_utility_; bool report_nested_queries_; - size_t max_text_size_; - size_t max_plan_size_; + int max_text_size_; + int max_plan_size_; int min_analyze_time_; int logging_mode_; }; diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index 74fd57a3ac0..ea118fca783 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -27,66 +27,77 @@ static void inline log_tracing_failure(const yagpcc::SetQueryReq &req, bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, const std::string &event, const Config &config) { - sockaddr_un address; + sockaddr_un address{}; address.sun_family = AF_UNIX; - const std::string &uds_path = config.uds_path(); + const auto &uds_path = config.uds_path(); + if (uds_path.size() >= sizeof(address.sun_path)) { ereport(WARNING, (errmsg("UDS path is too long for socket buffer"))); YagpStat::report_error(); return false; } strcpy(address.sun_path, uds_path.c_str()); - bool success = true; - auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0); - if (sockfd != -1) { - if (fcntl(sockfd, F_SETFL, O_NONBLOCK) != -1) { - if (connect(sockfd, (sockaddr *)&address, sizeof(address)) != -1) { - auto data_size = req.ByteSize(); - auto total_size = data_size + sizeof(uint32_t); - uint8_t *buf = (uint8_t *)ya_gpdb::palloc(total_size); - uint32_t *size_payload = (uint32_t *)buf; - *size_payload = data_size; - req.SerializeWithCachedSizesToArray(buf + sizeof(uint32_t)); - int64_t sent = 0, sent_total = 0; - do { - sent = send(sockfd, buf + sent_total, total_size - sent_total, - MSG_DONTWAIT); - sent_total += sent; - } while ( - sent > 0 && size_t(sent_total) != total_size && - // the line below is a small throttling hack: - // if a message does not fit a single packet, we take a nap - // before sending the next one. - // Otherwise, MSG_DONTWAIT send might overflow the UDS - (std::this_thread::sleep_for(std::chrono::milliseconds(1)), true)); - if (sent < 0) { - log_tracing_failure(req, event); - success = false; - YagpStat::report_bad_send(total_size); - } else { - YagpStat::report_send(total_size); - } - ya_gpdb::pfree(buf); - } else { - // log the error and go on - log_tracing_failure(req, event); - success = false; - YagpStat::report_bad_connection(); - } - } else { - // That's a very important error that should never happen, so make it - // visible to an end-user and admins. - ereport(WARNING, - (errmsg("Unable to create non-blocking socket connection %m"))); - success = false; - YagpStat::report_error(); - } - close(sockfd); - } else { - // log the error and go on + + const auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (sockfd == -1) { log_tracing_failure(req, event); - success = false; YagpStat::report_error(); + return false; } - return success; -} \ No newline at end of file + + // Close socket automatically on error path. + struct SockGuard { + int fd; + ~SockGuard() { close(fd); } + } sock_guard{sockfd}; + + if (fcntl(sockfd, F_SETFL, O_NONBLOCK) == -1) { + // That's a very important error that should never happen, so make it + // visible to an end-user and admins. + ereport(WARNING, + (errmsg("Unable to create non-blocking socket connection %m"))); + YagpStat::report_error(); + return false; + } + + if (connect(sockfd, reinterpret_cast(&address), + sizeof(address)) == -1) { + log_tracing_failure(req, event); + YagpStat::report_bad_connection(); + return false; + } + + const auto data_size = req.ByteSize(); + const auto total_size = data_size + sizeof(uint32_t); + auto *buf = static_cast(ya_gpdb::palloc(total_size)); + // Free buf automatically on error path. + struct BufGuard { + void *p; + ~BufGuard() { ya_gpdb::pfree(p); } + } buf_guard{buf}; + + *reinterpret_cast(buf) = data_size; + req.SerializeWithCachedSizesToArray(buf + sizeof(uint32_t)); + + int64_t sent = 0, sent_total = 0; + do { + sent = + send(sockfd, buf + sent_total, total_size - sent_total, MSG_DONTWAIT); + if (sent > 0) + sent_total += sent; + } while (sent > 0 && size_t(sent_total) != total_size && + // the line below is a small throttling hack: + // if a message does not fit a single packet, we take a nap + // before sending the next one. + // Otherwise, MSG_DONTWAIT send might overflow the UDS + (std::this_thread::sleep_for(std::chrono::milliseconds(1)), true)); + + if (sent < 0) { + log_tracing_failure(req, event); + YagpStat::report_bad_send(total_size); + return false; + } + + YagpStat::report_send(total_size); + return true; +} diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 8cf74641c29..602a2470805 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -11,6 +11,12 @@ extern "C" { #include "cdb/ml_ipc.h" #include "tcop/utility.h" #include "stat_statements_parser/pg_stat_statements_ya_parser.h" + +#include +#include +#include +#include +#include } #undef typeid @@ -52,6 +58,13 @@ static void ya_process_utility_hook(PlannedStmt *pstmt, const char *queryString, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc); +#define TEST_MAX_CONNECTIONS 4 +#define TEST_RCV_BUF_SIZE 8192 +#define TEST_POLL_TIMEOUT_MS 200 + +static int test_server_fd = -1; +static char *test_sock_path = NULL; + static EventSender *sender = nullptr; static inline EventSender *get_sender() { @@ -226,8 +239,9 @@ static void ya_process_utility_hook(PlannedStmt *pstmt, const char *queryString, } get_sender()->decr_depth(); - cpp_call(get_sender(), &EventSender::query_metrics_collect, METRICS_QUERY_DONE, - (void *)query_desc, true /* utility */, (ErrorData *)NULL); + cpp_call(get_sender(), &EventSender::query_metrics_collect, + METRICS_QUERY_DONE, (void *)query_desc, true /* utility */, + (ErrorData *)NULL); pfree(query_desc); } @@ -242,8 +256,9 @@ static void ya_process_utility_hook(PlannedStmt *pstmt, const char *queryString, MemoryContextSwitchTo(oldctx); get_sender()->decr_depth(); - cpp_call(get_sender(), &EventSender::query_metrics_collect, METRICS_QUERY_ERROR, - (void *)query_desc, true /* utility */, edata); + cpp_call(get_sender(), &EventSender::query_metrics_collect, + METRICS_QUERY_ERROR, (void *)query_desc, true /* utility */, + edata); pfree(query_desc); ReThrowError(edata); @@ -294,4 +309,77 @@ Datum yagp_functions_get(FunctionCallInfo fcinfo) { HeapTuple tuple = ya_gpdb::heap_form_tuple(tupdesc, values, nulls); Datum result = HeapTupleGetDatum(tuple); PG_RETURN_DATUM(result); +} + +void test_uds_stop_server() { + if (test_server_fd >= 0) { + close(test_server_fd); + test_server_fd = -1; + } + if (test_sock_path) { + unlink(test_sock_path); + pfree(test_sock_path); + test_sock_path = NULL; + } +} + +void test_uds_start_server(const char *path) { + struct sockaddr_un addr = {.sun_family = AF_UNIX}; + + if (strlen(path) >= sizeof(addr.sun_path)) + ereport(ERROR, (errmsg("path too long"))); + + test_uds_stop_server(); + + strlcpy(addr.sun_path, path, sizeof(addr.sun_path)); + test_sock_path = MemoryContextStrdup(TopMemoryContext, path); + unlink(path); + + if ((test_server_fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0 || + bind(test_server_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0 || + listen(test_server_fd, TEST_MAX_CONNECTIONS) < 0) { + test_uds_stop_server(); + ereport(ERROR, (errmsg("socket setup failed: %m"))); + } +} + +int64 test_uds_receive(int timeout_ms) { + char buf[TEST_RCV_BUF_SIZE]; + int rc; + struct pollfd pfd = {.fd = test_server_fd, .events = POLLIN}; + int64 total = 0; + + if (test_server_fd < 0) + ereport(ERROR, (errmsg("server not started"))); + + for (;;) { + CHECK_FOR_INTERRUPTS(); + rc = poll(&pfd, 1, Min(timeout_ms, TEST_POLL_TIMEOUT_MS)); + if (rc > 0) + break; + if (rc < 0 && errno != EINTR) + ereport(ERROR, (errmsg("poll: %m"))); + timeout_ms -= TEST_POLL_TIMEOUT_MS; + if (timeout_ms <= 0) + return total; + } + + if (pfd.revents & POLLIN) { + int client = accept(test_server_fd, NULL, NULL); + ssize_t n; + + if (client < 0) + ereport(ERROR, (errmsg("accept: %m"))); + + while ((n = recv(client, buf, sizeof(buf), 0)) != 0) { + if (n > 0) + total += n; + else if (errno != EINTR) + break; + } + + close(client); + } + + return total; } \ No newline at end of file diff --git a/src/hook_wrappers.h b/src/hook_wrappers.h index cfabf39485e..236c6eb9d79 100644 --- a/src/hook_wrappers.h +++ b/src/hook_wrappers.h @@ -12,6 +12,10 @@ extern Datum yagp_functions_get(FunctionCallInfo fcinfo); extern void init_log(); extern void truncate_log(); +extern void test_uds_start_server(const char *path); +extern int64_t test_uds_receive(int timeout_ms); +extern void test_uds_stop_server(); + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/src/yagp_hooks_collector.c b/src/yagp_hooks_collector.c index 27fd0e04b26..f7863a38921 100644 --- a/src/yagp_hooks_collector.c +++ b/src/yagp_hooks_collector.c @@ -14,16 +14,18 @@ PG_FUNCTION_INFO_V1(yagp_stat_messages); PG_FUNCTION_INFO_V1(yagp_init_log); PG_FUNCTION_INFO_V1(yagp_truncate_log); +PG_FUNCTION_INFO_V1(yagp_test_uds_start_server); +PG_FUNCTION_INFO_V1(yagp_test_uds_receive); +PG_FUNCTION_INFO_V1(yagp_test_uds_stop_server); + void _PG_init(void) { - if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) hooks_init(); - } } void _PG_fini(void) { - if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) { + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) hooks_deinit(); - } } Datum yagp_stat_messages_reset(PG_FUNCTION_ARGS) { @@ -65,3 +67,57 @@ Datum yagp_truncate_log(PG_FUNCTION_ARGS) { funcctx = SRF_PERCALL_SETUP(); SRF_RETURN_DONE(funcctx); } + +Datum yagp_test_uds_start_server(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + char *path = text_to_cstring(PG_GETARG_TEXT_PP(0)); + test_uds_start_server(path); + pfree(path); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); +} + +Datum yagp_test_uds_receive(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + int64 *result; + + if (SRF_IS_FIRSTCALL()) { + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + result = (int64 *)palloc(sizeof(int64)); + funcctx->user_fctx = result; + funcctx->max_calls = 1; + MemoryContextSwitchTo(oldcontext); + + int timeout_ms = PG_GETARG_INT32(0); + *result = test_uds_receive(timeout_ms); + } + + funcctx = SRF_PERCALL_SETUP(); + + if (funcctx->call_cntr < funcctx->max_calls) { + result = (int64 *)funcctx->user_fctx; + SRF_RETURN_NEXT(funcctx, Int64GetDatum(*result)); + } + + SRF_RETURN_DONE(funcctx); +} + +Datum yagp_test_uds_stop_server(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + test_uds_stop_server(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); +} diff --git a/yagp_hooks_collector--1.1.sql b/yagp_hooks_collector--1.1.sql index e0e94b51493..83bfb553638 100644 --- a/yagp_hooks_collector--1.1.sql +++ b/yagp_hooks_collector--1.1.sql @@ -93,3 +93,18 @@ BEGIN PERFORM yagpcc.__truncate_log_on_segments(); END; $$ LANGUAGE plpgsql VOLATILE; + +CREATE FUNCTION yagpcc.__test_uds_start_server(path text) +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_test_uds_start_server' +LANGUAGE C STRICT EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__test_uds_receive(timeout_ms int DEFAULT 2000) +RETURNS SETOF bigint +AS 'MODULE_PATHNAME', 'yagp_test_uds_receive' +LANGUAGE C STRICT EXECUTE ON MASTER; + +CREATE FUNCTION yagpcc.__test_uds_stop_server() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'yagp_test_uds_stop_server' +LANGUAGE C EXECUTE ON MASTER; From 3eda4ae6b9a0a286bc1aadef1e2546cc26c0dea9 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Wed, 21 Jan 2026 12:53:47 +0000 Subject: [PATCH 39/49] [yagp_hooks_collector] Fix locale-dependent normalization crash Make gen_normquery() and gen_normplan() noexcept. Wide-character conversion can fail for locales that cannot handle the input charset. --- expected/yagp_locale.out | 23 ++++++++++++++++++++ gpcontrib/yagp_hooks_collector/Makefile | 2 +- sql/yagp_locale.sql | 29 +++++++++++++++++++++++++ src/ProtoUtils.cpp | 19 ++++++++++------ src/memory/gpdbwrappers.cpp | 11 ++++------ src/memory/gpdbwrappers.h | 4 ++-- 6 files changed, 71 insertions(+), 17 deletions(-) create mode 100644 expected/yagp_locale.out create mode 100644 sql/yagp_locale.sql diff --git a/expected/yagp_locale.out b/expected/yagp_locale.out new file mode 100644 index 00000000000..6689b6a4ed3 --- /dev/null +++ b/expected/yagp_locale.out @@ -0,0 +1,23 @@ +-- The extension generates normalized query text and plan using jumbling functions. +-- Those functions may fail when translating to wide character if the current locale +-- cannot handle the character set. This test checks that even when those functions +-- fail, the plan is still generated and executed. This test is partially taken from +-- gp_locale. +-- start_ignore +DROP DATABASE IF EXISTS yagp_test_locale; +-- end_ignore +CREATE DATABASE yagp_test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; +\c yagp_test_locale +CREATE EXTENSION yagp_hooks_collector; +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.enable TO TRUE; +CREATE TABLE yagp_hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); +INSERT INTO yagp_hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); +-- Should not see error here +UPDATE yagp_hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; +RESET yagpcc.enable; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; +DROP TABLE yagp_hi_안녕세계; +DROP EXTENSION yagp_hooks_collector; diff --git a/gpcontrib/yagp_hooks_collector/Makefile b/gpcontrib/yagp_hooks_collector/Makefile index eb6541b7687..d145ae46dbe 100644 --- a/gpcontrib/yagp_hooks_collector/Makefile +++ b/gpcontrib/yagp_hooks_collector/Makefile @@ -1,7 +1,7 @@ MODULE_big = yagp_hooks_collector EXTENSION = yagp_hooks_collector DATA = $(wildcard *--*.sql) -REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility yagp_guc_cache yagp_uds +REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility yagp_guc_cache yagp_uds yagp_locale PROTO_BASES = yagpcc_plan yagpcc_metrics yagpcc_set_service PROTO_OBJS = $(patsubst %,src/protos/%.pb.o,$(PROTO_BASES)) diff --git a/sql/yagp_locale.sql b/sql/yagp_locale.sql new file mode 100644 index 00000000000..65d867d1680 --- /dev/null +++ b/sql/yagp_locale.sql @@ -0,0 +1,29 @@ +-- The extension generates normalized query text and plan using jumbling functions. +-- Those functions may fail when translating to wide character if the current locale +-- cannot handle the character set. This test checks that even when those functions +-- fail, the plan is still generated and executed. This test is partially taken from +-- gp_locale. + +-- start_ignore +DROP DATABASE IF EXISTS yagp_test_locale; +-- end_ignore + +CREATE DATABASE yagp_test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; +\c yagp_test_locale + +CREATE EXTENSION yagp_hooks_collector; + +SET yagpcc.ignored_users_list TO ''; +SET yagpcc.enable_utility TO TRUE; +SET yagpcc.enable TO TRUE; + +CREATE TABLE yagp_hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); +INSERT INTO yagp_hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); +-- Should not see error here +UPDATE yagp_hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; + +RESET yagpcc.enable; +RESET yagpcc.enable_utility; +RESET yagpcc.ignored_users_list; +DROP TABLE yagp_hi_안녕세계; +DROP EXTENSION yagp_hooks_collector; diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index 8ebbe19e289..f9119ca4b14 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -96,13 +96,15 @@ void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc, *qi->mutable_plan_text() = trim_str_shrink_utf8(es.str->data, es.str->len, config.max_plan_size()); StringInfo norm_plan = ya_gpdb::gen_normplan(es.str->data); - *qi->mutable_template_plan_text() = trim_str_shrink_utf8( - norm_plan->data, norm_plan->len, config.max_plan_size()); - qi->set_plan_id( - hash_any((unsigned char *)norm_plan->data, norm_plan->len)); + if (norm_plan) { + *qi->mutable_template_plan_text() = trim_str_shrink_utf8( + norm_plan->data, norm_plan->len, config.max_plan_size()); + qi->set_plan_id( + hash_any((unsigned char *)norm_plan->data, norm_plan->len)); + ya_gpdb::pfree(norm_plan->data); + } qi->set_query_id(query_desc->plannedstmt->queryId); ya_gpdb::pfree(es.str->data); - ya_gpdb::pfree(norm_plan->data); } ya_gpdb::mem_ctx_switch_to(oldcxt); } @@ -116,8 +118,11 @@ void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc, query_desc->sourceText, strlen(query_desc->sourceText), config.max_text_size()); char *norm_query = ya_gpdb::gen_normquery(query_desc->sourceText); - *qi->mutable_template_query_text() = trim_str_shrink_utf8( - norm_query, strlen(norm_query), config.max_text_size()); + if (norm_query) { + *qi->mutable_template_query_text() = trim_str_shrink_utf8( + norm_query, strlen(norm_query), config.max_text_size()); + ya_gpdb::pfree(norm_query); + } } } diff --git a/src/memory/gpdbwrappers.cpp b/src/memory/gpdbwrappers.cpp index 763e32e539c..8cc483a39de 100644 --- a/src/memory/gpdbwrappers.cpp +++ b/src/memory/gpdbwrappers.cpp @@ -204,15 +204,12 @@ void ya_gpdb::instr_end_loop(Instrumentation *instr) { wrap_throw(::InstrEndLoop, instr); } -char *ya_gpdb::gen_normquery(const char *query) { - return wrap_throw(::gen_normquery, query); +char *ya_gpdb::gen_normquery(const char *query) noexcept { + return wrap_noexcept(::gen_normquery, query); } -StringInfo ya_gpdb::gen_normplan(const char *exec_plan) { - if (!exec_plan) - throw std::runtime_error("Invalid execution plan string"); - - return wrap_throw(::gen_normplan, exec_plan); +StringInfo ya_gpdb::gen_normplan(const char *exec_plan) noexcept { + return wrap_noexcept(::gen_normplan, exec_plan); } char *ya_gpdb::get_rg_name_for_id(Oid group_id) { diff --git a/src/memory/gpdbwrappers.h b/src/memory/gpdbwrappers.h index 920fc1ae6e7..e080ef5cdd4 100644 --- a/src/memory/gpdbwrappers.h +++ b/src/memory/gpdbwrappers.h @@ -38,8 +38,8 @@ HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, CdbExplain_ShowStatCtx *cdbexplain_showExecStatsBegin(QueryDesc *query_desc, instr_time starttime); void instr_end_loop(Instrumentation *instr); -char *gen_normquery(const char *query); -StringInfo gen_normplan(const char *executionPlan); +char *gen_normquery(const char *query) noexcept; +StringInfo gen_normplan(const char *executionPlan) noexcept; char *get_rg_name_for_id(Oid group_id); void insert_log(const yagpcc::SetQueryReq &req, bool utility); From 6c98695cf98890ebcdba9e783d6453fb0c6149d9 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Wed, 21 Jan 2026 13:52:56 +0000 Subject: [PATCH 40/49] [yagp_hooks_collector] Add Apache license headers and enable -Werror --- gpcontrib/yagp_hooks_collector/Makefile | 2 +- pom.xml | 6 ++++ src/Config.cpp | 27 +++++++++++++++ src/Config.h | 27 +++++++++++++++ src/EventSender.cpp | 27 +++++++++++++++ src/EventSender.h | 27 +++++++++++++++ src/PgUtils.cpp | 27 +++++++++++++++ src/PgUtils.h | 27 +++++++++++++++ src/ProcStats.cpp | 27 +++++++++++++++ src/ProcStats.h | 27 +++++++++++++++ src/ProtoUtils.cpp | 27 +++++++++++++++ src/ProtoUtils.h | 27 +++++++++++++++ src/UDSConnector.cpp | 29 +++++++++++++++- src/UDSConnector.h | 27 +++++++++++++++ src/YagpStat.cpp | 27 +++++++++++++++ src/YagpStat.h | 27 +++++++++++++++ src/hook_wrappers.cpp | 33 +++++++++++++++++-- src/hook_wrappers.h | 27 +++++++++++++++ src/log/LogOps.cpp | 27 +++++++++++++++ src/log/LogOps.h | 27 +++++++++++++++ src/log/LogSchema.cpp | 27 +++++++++++++++ src/log/LogSchema.h | 27 +++++++++++++++ src/memory/gpdbwrappers.cpp | 27 +++++++++++++++ src/memory/gpdbwrappers.h | 27 +++++++++++++++ src/stat_statements_parser/README.md | 1 + .../pg_stat_statements_ya_parser.c | 27 +++++++++++++++ .../pg_stat_statements_ya_parser.h | 27 +++++++++++++++ src/yagp_hooks_collector.c | 27 +++++++++++++++ 28 files changed, 688 insertions(+), 4 deletions(-) create mode 100644 src/stat_statements_parser/README.md diff --git a/gpcontrib/yagp_hooks_collector/Makefile b/gpcontrib/yagp_hooks_collector/Makefile index d145ae46dbe..49825c55f35 100644 --- a/gpcontrib/yagp_hooks_collector/Makefile +++ b/gpcontrib/yagp_hooks_collector/Makefile @@ -10,7 +10,7 @@ C_OBJS = $(patsubst %.c,%.o,$(wildcard src/*.c src/*/*.c)) CPP_OBJS = $(patsubst %.cpp,%.o,$(wildcard src/*.cpp src/*/*.cpp)) OBJS = $(C_OBJS) $(CPP_OBJS) $(PROTO_OBJS) -override CXXFLAGS = -fPIC -g3 -Wall -Wpointer-arith -Wendif-labels \ +override CXXFLAGS = -Werror -fPIC -g3 -Wall -Wpointer-arith -Wendif-labels \ -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv \ -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation \ -Wno-stringop-truncation -g -ggdb -std=c++17 -Iinclude -Isrc/protos -Isrc -DGPBUILD diff --git a/pom.xml b/pom.xml index cfa44a0d6aa..aada6870658 100644 --- a/pom.xml +++ b/pom.xml @@ -154,6 +154,12 @@ code or new licensing patterns. gpcontrib/gp_exttable_fdw/gp_exttable_fdw.control gpcontrib/diskquota/** + gpcontrib/yagp_hooks_collector/yagp_hooks_collector.control + gpcontrib/yagp_hooks_collector/protos/yagpcc_set_service.proto + gpcontrib/yagp_hooks_collector/protos/yagpcc_plan.proto + gpcontrib/yagp_hooks_collector/protos/yagpcc_metrics.proto + gpcontrib/yagp_hooks_collector/.clang-format + gpcontrib/yagp_hooks_collector/Makefile getversion .git-blame-ignore-revs diff --git a/src/Config.cpp b/src/Config.cpp index 2c2032ebb03..62c16e91d1f 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * Config.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/Config.cpp + * + *------------------------------------------------------------------------- + */ + #include "Config.h" #include "memory/gpdbwrappers.h" #include diff --git a/src/Config.h b/src/Config.h index aa6b5bdc0ba..01ae5ea328e 100644 --- a/src/Config.h +++ b/src/Config.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * Config.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/Config.h + * + *------------------------------------------------------------------------- + */ + #pragma once #include diff --git a/src/EventSender.cpp b/src/EventSender.cpp index 853a0c43fb9..f1cc0cc6ea1 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * EventSender.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/EventSender.cpp + * + *------------------------------------------------------------------------- + */ + #include "UDSConnector.h" #include "memory/gpdbwrappers.h" #include "log/LogOps.h" diff --git a/src/EventSender.h b/src/EventSender.h index e9acb04422b..ef7dcb0bf8c 100644 --- a/src/EventSender.h +++ b/src/EventSender.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * EventSender.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/EventSender.h + * + *------------------------------------------------------------------------- + */ + #pragma once #include diff --git a/src/PgUtils.cpp b/src/PgUtils.cpp index 7e53abdabbf..ed4bf4d7e64 100644 --- a/src/PgUtils.cpp +++ b/src/PgUtils.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * PgUtils.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/PgUtils.cpp + * + *------------------------------------------------------------------------- + */ + #include "PgUtils.h" #include "Config.h" #include "memory/gpdbwrappers.h" diff --git a/src/PgUtils.h b/src/PgUtils.h index e9715ce10f4..5113fadbff2 100644 --- a/src/PgUtils.h +++ b/src/PgUtils.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * PgUtils.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/PgUtils.h + * + *------------------------------------------------------------------------- + */ + extern "C" { #include "postgres.h" #include "commands/explain.h" diff --git a/src/ProcStats.cpp b/src/ProcStats.cpp index 5c09fa0bce4..72a12e8ca00 100644 --- a/src/ProcStats.cpp +++ b/src/ProcStats.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProcStats.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/ProcStats.cpp + * + *------------------------------------------------------------------------- + */ + #include "ProcStats.h" #include "yagpcc_metrics.pb.h" #include diff --git a/src/ProcStats.h b/src/ProcStats.h index 30a90a60519..7629edd0aea 100644 --- a/src/ProcStats.h +++ b/src/ProcStats.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProcStats.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/ProcStats.h + * + *------------------------------------------------------------------------- + */ + #pragma once namespace yagpcc { diff --git a/src/ProtoUtils.cpp b/src/ProtoUtils.cpp index f9119ca4b14..b449ae20900 100644 --- a/src/ProtoUtils.cpp +++ b/src/ProtoUtils.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProtoUtils.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp + * + *------------------------------------------------------------------------- + */ + #include "ProtoUtils.h" #include "PgUtils.h" #include "ProcStats.h" diff --git a/src/ProtoUtils.h b/src/ProtoUtils.h index 37b7e4a8a29..c954545494f 100644 --- a/src/ProtoUtils.h +++ b/src/ProtoUtils.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProtoUtils.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/ProtoUtils.h + * + *------------------------------------------------------------------------- + */ + #pragma once #include "protos/yagpcc_set_service.pb.h" diff --git a/src/UDSConnector.cpp b/src/UDSConnector.cpp index ea118fca783..d13a82a5ca9 100644 --- a/src/UDSConnector.cpp +++ b/src/UDSConnector.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * UDSConnector.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp + * + *------------------------------------------------------------------------- + */ + #include "UDSConnector.h" #include "Config.h" #include "YagpStat.h" @@ -67,7 +94,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, return false; } - const auto data_size = req.ByteSize(); + const auto data_size = req.ByteSizeLong(); const auto total_size = data_size + sizeof(uint32_t); auto *buf = static_cast(ya_gpdb::palloc(total_size)); // Free buf automatically on error path. diff --git a/src/UDSConnector.h b/src/UDSConnector.h index 9483407159d..be5ab1ef413 100644 --- a/src/UDSConnector.h +++ b/src/UDSConnector.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * UDSConnector.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/UDSConnector.h + * + *------------------------------------------------------------------------- + */ + #pragma once #include "protos/yagpcc_set_service.pb.h" diff --git a/src/YagpStat.cpp b/src/YagpStat.cpp index 879cde85212..3a760b6ea97 100644 --- a/src/YagpStat.cpp +++ b/src/YagpStat.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * YagpStat.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/YagpStat.cpp + * + *------------------------------------------------------------------------- + */ + #include "YagpStat.h" #include diff --git a/src/YagpStat.h b/src/YagpStat.h index 110b1fdcbb1..57fc90cd4d1 100644 --- a/src/YagpStat.h +++ b/src/YagpStat.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * YagpStat.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/YagpStat.h + * + *------------------------------------------------------------------------- + */ + #pragma once #include diff --git a/src/hook_wrappers.cpp b/src/hook_wrappers.cpp index 602a2470805..cb4970d60d9 100644 --- a/src/hook_wrappers.cpp +++ b/src/hook_wrappers.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * hook_wrappers.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp + * + *------------------------------------------------------------------------- + */ + #define typeid __typeid extern "C" { #include "postgres.h" @@ -46,8 +73,10 @@ static void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, static void ya_ExecutorFinish_hook(QueryDesc *query_desc); static void ya_ExecutorEnd_hook(QueryDesc *query_desc); static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); +#ifdef IC_TEARDOWN_HOOK static void ya_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors); +#endif #ifdef ANALYZE_STATS_COLLECT_HOOK static void ya_analyze_stats_collect_hook(QueryDesc *query_desc); #endif @@ -195,14 +224,14 @@ void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { } } +#ifdef IC_TEARDOWN_HOOK void ya_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors) { cpp_call(get_sender(), &EventSender::ic_metrics_collect); -#ifdef IC_TEARDOWN_HOOK if (previous_ic_teardown_hook) { (*previous_ic_teardown_hook)(transportStates, hasErrors); } -#endif } +#endif #ifdef ANALYZE_STATS_COLLECT_HOOK void ya_analyze_stats_collect_hook(QueryDesc *query_desc) { diff --git a/src/hook_wrappers.h b/src/hook_wrappers.h index 236c6eb9d79..443406a5259 100644 --- a/src/hook_wrappers.h +++ b/src/hook_wrappers.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * hook_wrappers.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/hook_wrappers.h + * + *------------------------------------------------------------------------- + */ + #pragma once #ifdef __cplusplus diff --git a/src/log/LogOps.cpp b/src/log/LogOps.cpp index 56bdf1dca62..e8c927ece84 100644 --- a/src/log/LogOps.cpp +++ b/src/log/LogOps.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogOps.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp + * + *------------------------------------------------------------------------- + */ + #include "protos/yagpcc_set_service.pb.h" #include "LogOps.h" diff --git a/src/log/LogOps.h b/src/log/LogOps.h index bad03d09a8f..1fc30c21030 100644 --- a/src/log/LogOps.h +++ b/src/log/LogOps.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogOps.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/log/LogOps.h + * + *------------------------------------------------------------------------- + */ + #pragma once #include diff --git a/src/log/LogSchema.cpp b/src/log/LogSchema.cpp index 2fadcc46599..a391b1a2209 100644 --- a/src/log/LogSchema.cpp +++ b/src/log/LogSchema.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogSchema.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp + * + *------------------------------------------------------------------------- + */ + #include "google/protobuf/reflection.h" #include "google/protobuf/descriptor.h" #include "google/protobuf/timestamp.pb.h" diff --git a/src/log/LogSchema.h b/src/log/LogSchema.h index f713c1e9b0e..f78acec7ce9 100644 --- a/src/log/LogSchema.h +++ b/src/log/LogSchema.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogSchema.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/log/LogSchema.h + * + *------------------------------------------------------------------------- + */ + #pragma once #include diff --git a/src/memory/gpdbwrappers.cpp b/src/memory/gpdbwrappers.cpp index 8cc483a39de..22083e8bdaf 100644 --- a/src/memory/gpdbwrappers.cpp +++ b/src/memory/gpdbwrappers.cpp @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * gpdbwrappers.cpp + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp + * + *------------------------------------------------------------------------- + */ + #include "gpdbwrappers.h" #include "log/LogOps.h" diff --git a/src/memory/gpdbwrappers.h b/src/memory/gpdbwrappers.h index e080ef5cdd4..fe9b3ba0487 100644 --- a/src/memory/gpdbwrappers.h +++ b/src/memory/gpdbwrappers.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * gpdbwrappers.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h + * + *------------------------------------------------------------------------- + */ + #pragma once extern "C" { diff --git a/src/stat_statements_parser/README.md b/src/stat_statements_parser/README.md new file mode 100644 index 00000000000..291e31a3099 --- /dev/null +++ b/src/stat_statements_parser/README.md @@ -0,0 +1 @@ +This directory contains a slightly modified subset of pg_stat_statements for PG v9.4 to be used in query and plan ID generation. diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/src/stat_statements_parser/pg_stat_statements_ya_parser.c index 54c8b2cf59f..7404208055f 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * pg_stat_statements_ya_parser.c + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c + * + *------------------------------------------------------------------------- + */ + // NOTE: this file is just a bunch of code borrowed from pg_stat_statements for PG 9.4 // and from our own inhouse implementation of pg_stat_statements for managed PG diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.h b/src/stat_statements_parser/pg_stat_statements_ya_parser.h index b08e8533992..96c6a776dba 100644 --- a/src/stat_statements_parser/pg_stat_statements_ya_parser.h +++ b/src/stat_statements_parser/pg_stat_statements_ya_parser.h @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * pg_stat_statements_ya_parser.h + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h + * + *------------------------------------------------------------------------- + */ + #pragma once #ifdef __cplusplus diff --git a/src/yagp_hooks_collector.c b/src/yagp_hooks_collector.c index f7863a38921..271bceee178 100644 --- a/src/yagp_hooks_collector.c +++ b/src/yagp_hooks_collector.c @@ -1,3 +1,30 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * yagp_hooks_collector.c + * + * IDENTIFICATION + * gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c + * + *------------------------------------------------------------------------- + */ + #include "postgres.h" #include "cdb/cdbvars.h" #include "funcapi.h" From 9e01807fdab03c2369bf204888bcb5f5b4cb3dc8 Mon Sep 17 00:00:00 2001 From: NJrslv <108277031+NJrslv@users.noreply.github.com> Date: Tue, 10 Feb 2026 10:41:58 +0300 Subject: [PATCH 41/49] [yagp_hooks_collector] Fix null ErrorData dereference on segments Guard against NULL ErrorData in set_qi_error_message(). For some query types ErrorData can be NULL despite an error occurring. --- src/EventSender.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/EventSender.cpp b/src/EventSender.cpp index f1cc0cc6ea1..6993814ffbf 100644 --- a/src/EventSender.cpp +++ b/src/EventSender.cpp @@ -290,7 +290,7 @@ void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, query_msg->set_query_status(query_status); if (status == METRICS_QUERY_ERROR) { bool error_flushed = elog_message() == NULL; - if (error_flushed && edata->message == NULL) { + if (error_flushed && (edata == NULL || edata->message == NULL)) { ereport(WARNING, (errmsg("YAGPCC missing error message"))); ereport(DEBUG3, (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); From c80d761db045b2def7fa49ebea05bf4581400693 Mon Sep 17 00:00:00 2001 From: Leonid Borchuk Date: Wed, 18 Mar 2026 15:17:52 +0000 Subject: [PATCH 42/49] [yagp_hooks_collector] Move to gpcontrib directory --- .../yagp_hooks_collector/.clang-format | 0 gpcontrib/yagp_hooks_collector/README.md | 28 +++++++++++++++++++ .../expected}/yagp_cursors.out | 0 .../expected}/yagp_dist.out | 0 .../expected}/yagp_guc_cache.out | 0 .../expected}/yagp_locale.out | 0 .../expected}/yagp_select.out | 0 .../expected}/yagp_uds.out | 0 .../expected}/yagp_utf8_trim.out | 0 .../expected}/yagp_utility.out | 0 .../yagp_hooks_collector/metric.md | 0 .../protos}/yagpcc_metrics.proto | 0 .../protos}/yagpcc_plan.proto | 0 .../protos}/yagpcc_set_service.proto | 0 .../sql}/yagp_cursors.sql | 0 .../yagp_hooks_collector/sql}/yagp_dist.sql | 0 .../sql}/yagp_guc_cache.sql | 0 .../yagp_hooks_collector/sql}/yagp_locale.sql | 0 .../yagp_hooks_collector/sql}/yagp_select.sql | 0 .../yagp_hooks_collector/sql}/yagp_uds.sql | 0 .../sql}/yagp_utf8_trim.sql | 0 .../sql}/yagp_utility.sql | 0 .../yagp_hooks_collector/src}/Config.cpp | 0 .../yagp_hooks_collector/src}/Config.h | 0 .../yagp_hooks_collector/src}/EventSender.cpp | 0 .../yagp_hooks_collector/src}/EventSender.h | 0 .../yagp_hooks_collector/src}/PgUtils.cpp | 0 .../yagp_hooks_collector/src}/PgUtils.h | 0 .../yagp_hooks_collector/src}/ProcStats.cpp | 0 .../yagp_hooks_collector/src}/ProcStats.h | 0 .../yagp_hooks_collector/src}/ProtoUtils.cpp | 0 .../yagp_hooks_collector/src}/ProtoUtils.h | 0 .../src}/UDSConnector.cpp | 0 .../yagp_hooks_collector/src}/UDSConnector.h | 0 .../yagp_hooks_collector/src}/YagpStat.cpp | 0 .../yagp_hooks_collector/src}/YagpStat.h | 0 .../src}/hook_wrappers.cpp | 0 .../yagp_hooks_collector/src}/hook_wrappers.h | 0 .../yagp_hooks_collector/src}/log/LogOps.cpp | 0 .../yagp_hooks_collector/src}/log/LogOps.h | 0 .../src}/log/LogSchema.cpp | 0 .../yagp_hooks_collector/src}/log/LogSchema.h | 0 .../src}/memory/gpdbwrappers.cpp | 0 .../src}/memory/gpdbwrappers.h | 0 .../src}/stat_statements_parser/README.md | 0 .../pg_stat_statements_ya_parser.c | 0 .../pg_stat_statements_ya_parser.h | 0 .../src}/yagp_hooks_collector.c | 0 .../yagp_hooks_collector--1.0--1.1.sql | 0 .../yagp_hooks_collector--1.0.sql | 0 .../yagp_hooks_collector--1.1.sql | 0 .../yagp_hooks_collector.control | 0 src/stat_statements_parser/README.MD | 1 - 53 files changed, 28 insertions(+), 1 deletion(-) rename .clang-format => gpcontrib/yagp_hooks_collector/.clang-format (100%) create mode 100644 gpcontrib/yagp_hooks_collector/README.md rename {expected => gpcontrib/yagp_hooks_collector/expected}/yagp_cursors.out (100%) rename {expected => gpcontrib/yagp_hooks_collector/expected}/yagp_dist.out (100%) rename {expected => gpcontrib/yagp_hooks_collector/expected}/yagp_guc_cache.out (100%) rename {expected => gpcontrib/yagp_hooks_collector/expected}/yagp_locale.out (100%) rename {expected => gpcontrib/yagp_hooks_collector/expected}/yagp_select.out (100%) rename {expected => gpcontrib/yagp_hooks_collector/expected}/yagp_uds.out (100%) rename {expected => gpcontrib/yagp_hooks_collector/expected}/yagp_utf8_trim.out (100%) rename {expected => gpcontrib/yagp_hooks_collector/expected}/yagp_utility.out (100%) rename metric.md => gpcontrib/yagp_hooks_collector/metric.md (100%) rename {protos => gpcontrib/yagp_hooks_collector/protos}/yagpcc_metrics.proto (100%) rename {protos => gpcontrib/yagp_hooks_collector/protos}/yagpcc_plan.proto (100%) rename {protos => gpcontrib/yagp_hooks_collector/protos}/yagpcc_set_service.proto (100%) rename {sql => gpcontrib/yagp_hooks_collector/sql}/yagp_cursors.sql (100%) rename {sql => gpcontrib/yagp_hooks_collector/sql}/yagp_dist.sql (100%) rename {sql => gpcontrib/yagp_hooks_collector/sql}/yagp_guc_cache.sql (100%) rename {sql => gpcontrib/yagp_hooks_collector/sql}/yagp_locale.sql (100%) rename {sql => gpcontrib/yagp_hooks_collector/sql}/yagp_select.sql (100%) rename {sql => gpcontrib/yagp_hooks_collector/sql}/yagp_uds.sql (100%) rename {sql => gpcontrib/yagp_hooks_collector/sql}/yagp_utf8_trim.sql (100%) rename {sql => gpcontrib/yagp_hooks_collector/sql}/yagp_utility.sql (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/Config.cpp (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/Config.h (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/EventSender.cpp (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/EventSender.h (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/PgUtils.cpp (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/PgUtils.h (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/ProcStats.cpp (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/ProcStats.h (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/ProtoUtils.cpp (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/ProtoUtils.h (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/UDSConnector.cpp (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/UDSConnector.h (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/YagpStat.cpp (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/YagpStat.h (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/hook_wrappers.cpp (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/hook_wrappers.h (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/log/LogOps.cpp (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/log/LogOps.h (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/log/LogSchema.cpp (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/log/LogSchema.h (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/memory/gpdbwrappers.cpp (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/memory/gpdbwrappers.h (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/stat_statements_parser/README.md (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/stat_statements_parser/pg_stat_statements_ya_parser.c (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/stat_statements_parser/pg_stat_statements_ya_parser.h (100%) rename {src => gpcontrib/yagp_hooks_collector/src}/yagp_hooks_collector.c (100%) rename yagp_hooks_collector--1.0--1.1.sql => gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0--1.1.sql (100%) rename yagp_hooks_collector--1.0.sql => gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0.sql (100%) rename yagp_hooks_collector--1.1.sql => gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql (100%) rename yagp_hooks_collector.control => gpcontrib/yagp_hooks_collector/yagp_hooks_collector.control (100%) delete mode 100644 src/stat_statements_parser/README.MD diff --git a/.clang-format b/gpcontrib/yagp_hooks_collector/.clang-format similarity index 100% rename from .clang-format rename to gpcontrib/yagp_hooks_collector/.clang-format diff --git a/gpcontrib/yagp_hooks_collector/README.md b/gpcontrib/yagp_hooks_collector/README.md new file mode 100644 index 00000000000..9f465a190cb --- /dev/null +++ b/gpcontrib/yagp_hooks_collector/README.md @@ -0,0 +1,28 @@ +## YAGP Hooks Collector + +An extension for collecting greenplum query execution metrics and reporting them to an external agent. + +### Collected Statistics + +#### 1. Query Lifecycle +- **What:** Captures query text, normalized query text, timestamps (submit, start, end, done), and user/database info. +- **GUC:** `yagpcc.enable`. + +#### 2. `EXPLAIN` data +- **What:** Triggers generation of the `EXPLAIN (TEXT, COSTS, VERBOSE)` and captures it. +- **GUC:** `yagpcc.enable`. + +#### 3. `EXPLAIN ANALYZE` data +- **What:** Triggers generation of the `EXPLAIN (TEXT, ANALYZE, BUFFERS, TIMING, VERBOSE)` and captures it. +- **GUCs:** `yagpcc.enable`, `yagpcc.min_analyze_time`, `yagpcc.enable_cdbstats`(ANALYZE), `yagpcc.enable_analyze`(BUFFERS, TIMING, VERBOSE). + +#### 4. Other Metrics +- **What:** Captures Instrument, Greenplum, System, Network, Interconnect, Spill metrics. +- **GUC:** `yagpcc.enable`. + +### General Configuration +- **Nested Queries:** When `yagpcc.report_nested_queries` is `false`, only top-level queries are reported from the coordinator and segments, when `true`, both top-level and nested queries are reported from the coordinator, from segments collected as aggregates. +- **Data Destination:** All collected data is sent to a Unix Domain Socket. Configure the path with `yagpcc.uds_path`. +- **User Filtering:** To exclude activity from certain roles, add them to the comma-separated list in `yagpcc.ignored_users_list`. +- **Trimming plans:** Query texts and execution plans are trimmed based on `yagpcc.max_text_size` and `yagpcc.max_plan_size` (default: 1024KB). For now, it is not recommended to set these GUCs higher than 1024KB. +- **Analyze collection:** Analyze is sent if execution time exceeds `yagpcc.min_analyze_time`, which is 10 seconds by default. Analyze is collected if `yagpcc.enable_analyze` is true. diff --git a/expected/yagp_cursors.out b/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out similarity index 100% rename from expected/yagp_cursors.out rename to gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out diff --git a/expected/yagp_dist.out b/gpcontrib/yagp_hooks_collector/expected/yagp_dist.out similarity index 100% rename from expected/yagp_dist.out rename to gpcontrib/yagp_hooks_collector/expected/yagp_dist.out diff --git a/expected/yagp_guc_cache.out b/gpcontrib/yagp_hooks_collector/expected/yagp_guc_cache.out similarity index 100% rename from expected/yagp_guc_cache.out rename to gpcontrib/yagp_hooks_collector/expected/yagp_guc_cache.out diff --git a/expected/yagp_locale.out b/gpcontrib/yagp_hooks_collector/expected/yagp_locale.out similarity index 100% rename from expected/yagp_locale.out rename to gpcontrib/yagp_hooks_collector/expected/yagp_locale.out diff --git a/expected/yagp_select.out b/gpcontrib/yagp_hooks_collector/expected/yagp_select.out similarity index 100% rename from expected/yagp_select.out rename to gpcontrib/yagp_hooks_collector/expected/yagp_select.out diff --git a/expected/yagp_uds.out b/gpcontrib/yagp_hooks_collector/expected/yagp_uds.out similarity index 100% rename from expected/yagp_uds.out rename to gpcontrib/yagp_hooks_collector/expected/yagp_uds.out diff --git a/expected/yagp_utf8_trim.out b/gpcontrib/yagp_hooks_collector/expected/yagp_utf8_trim.out similarity index 100% rename from expected/yagp_utf8_trim.out rename to gpcontrib/yagp_hooks_collector/expected/yagp_utf8_trim.out diff --git a/expected/yagp_utility.out b/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out similarity index 100% rename from expected/yagp_utility.out rename to gpcontrib/yagp_hooks_collector/expected/yagp_utility.out diff --git a/metric.md b/gpcontrib/yagp_hooks_collector/metric.md similarity index 100% rename from metric.md rename to gpcontrib/yagp_hooks_collector/metric.md diff --git a/protos/yagpcc_metrics.proto b/gpcontrib/yagp_hooks_collector/protos/yagpcc_metrics.proto similarity index 100% rename from protos/yagpcc_metrics.proto rename to gpcontrib/yagp_hooks_collector/protos/yagpcc_metrics.proto diff --git a/protos/yagpcc_plan.proto b/gpcontrib/yagp_hooks_collector/protos/yagpcc_plan.proto similarity index 100% rename from protos/yagpcc_plan.proto rename to gpcontrib/yagp_hooks_collector/protos/yagpcc_plan.proto diff --git a/protos/yagpcc_set_service.proto b/gpcontrib/yagp_hooks_collector/protos/yagpcc_set_service.proto similarity index 100% rename from protos/yagpcc_set_service.proto rename to gpcontrib/yagp_hooks_collector/protos/yagpcc_set_service.proto diff --git a/sql/yagp_cursors.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_cursors.sql similarity index 100% rename from sql/yagp_cursors.sql rename to gpcontrib/yagp_hooks_collector/sql/yagp_cursors.sql diff --git a/sql/yagp_dist.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_dist.sql similarity index 100% rename from sql/yagp_dist.sql rename to gpcontrib/yagp_hooks_collector/sql/yagp_dist.sql diff --git a/sql/yagp_guc_cache.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_guc_cache.sql similarity index 100% rename from sql/yagp_guc_cache.sql rename to gpcontrib/yagp_hooks_collector/sql/yagp_guc_cache.sql diff --git a/sql/yagp_locale.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_locale.sql similarity index 100% rename from sql/yagp_locale.sql rename to gpcontrib/yagp_hooks_collector/sql/yagp_locale.sql diff --git a/sql/yagp_select.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_select.sql similarity index 100% rename from sql/yagp_select.sql rename to gpcontrib/yagp_hooks_collector/sql/yagp_select.sql diff --git a/sql/yagp_uds.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_uds.sql similarity index 100% rename from sql/yagp_uds.sql rename to gpcontrib/yagp_hooks_collector/sql/yagp_uds.sql diff --git a/sql/yagp_utf8_trim.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_utf8_trim.sql similarity index 100% rename from sql/yagp_utf8_trim.sql rename to gpcontrib/yagp_hooks_collector/sql/yagp_utf8_trim.sql diff --git a/sql/yagp_utility.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_utility.sql similarity index 100% rename from sql/yagp_utility.sql rename to gpcontrib/yagp_hooks_collector/sql/yagp_utility.sql diff --git a/src/Config.cpp b/gpcontrib/yagp_hooks_collector/src/Config.cpp similarity index 100% rename from src/Config.cpp rename to gpcontrib/yagp_hooks_collector/src/Config.cpp diff --git a/src/Config.h b/gpcontrib/yagp_hooks_collector/src/Config.h similarity index 100% rename from src/Config.h rename to gpcontrib/yagp_hooks_collector/src/Config.h diff --git a/src/EventSender.cpp b/gpcontrib/yagp_hooks_collector/src/EventSender.cpp similarity index 100% rename from src/EventSender.cpp rename to gpcontrib/yagp_hooks_collector/src/EventSender.cpp diff --git a/src/EventSender.h b/gpcontrib/yagp_hooks_collector/src/EventSender.h similarity index 100% rename from src/EventSender.h rename to gpcontrib/yagp_hooks_collector/src/EventSender.h diff --git a/src/PgUtils.cpp b/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp similarity index 100% rename from src/PgUtils.cpp rename to gpcontrib/yagp_hooks_collector/src/PgUtils.cpp diff --git a/src/PgUtils.h b/gpcontrib/yagp_hooks_collector/src/PgUtils.h similarity index 100% rename from src/PgUtils.h rename to gpcontrib/yagp_hooks_collector/src/PgUtils.h diff --git a/src/ProcStats.cpp b/gpcontrib/yagp_hooks_collector/src/ProcStats.cpp similarity index 100% rename from src/ProcStats.cpp rename to gpcontrib/yagp_hooks_collector/src/ProcStats.cpp diff --git a/src/ProcStats.h b/gpcontrib/yagp_hooks_collector/src/ProcStats.h similarity index 100% rename from src/ProcStats.h rename to gpcontrib/yagp_hooks_collector/src/ProcStats.h diff --git a/src/ProtoUtils.cpp b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp similarity index 100% rename from src/ProtoUtils.cpp rename to gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp diff --git a/src/ProtoUtils.h b/gpcontrib/yagp_hooks_collector/src/ProtoUtils.h similarity index 100% rename from src/ProtoUtils.h rename to gpcontrib/yagp_hooks_collector/src/ProtoUtils.h diff --git a/src/UDSConnector.cpp b/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp similarity index 100% rename from src/UDSConnector.cpp rename to gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp diff --git a/src/UDSConnector.h b/gpcontrib/yagp_hooks_collector/src/UDSConnector.h similarity index 100% rename from src/UDSConnector.h rename to gpcontrib/yagp_hooks_collector/src/UDSConnector.h diff --git a/src/YagpStat.cpp b/gpcontrib/yagp_hooks_collector/src/YagpStat.cpp similarity index 100% rename from src/YagpStat.cpp rename to gpcontrib/yagp_hooks_collector/src/YagpStat.cpp diff --git a/src/YagpStat.h b/gpcontrib/yagp_hooks_collector/src/YagpStat.h similarity index 100% rename from src/YagpStat.h rename to gpcontrib/yagp_hooks_collector/src/YagpStat.h diff --git a/src/hook_wrappers.cpp b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp similarity index 100% rename from src/hook_wrappers.cpp rename to gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp diff --git a/src/hook_wrappers.h b/gpcontrib/yagp_hooks_collector/src/hook_wrappers.h similarity index 100% rename from src/hook_wrappers.h rename to gpcontrib/yagp_hooks_collector/src/hook_wrappers.h diff --git a/src/log/LogOps.cpp b/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp similarity index 100% rename from src/log/LogOps.cpp rename to gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp diff --git a/src/log/LogOps.h b/gpcontrib/yagp_hooks_collector/src/log/LogOps.h similarity index 100% rename from src/log/LogOps.h rename to gpcontrib/yagp_hooks_collector/src/log/LogOps.h diff --git a/src/log/LogSchema.cpp b/gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp similarity index 100% rename from src/log/LogSchema.cpp rename to gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp diff --git a/src/log/LogSchema.h b/gpcontrib/yagp_hooks_collector/src/log/LogSchema.h similarity index 100% rename from src/log/LogSchema.h rename to gpcontrib/yagp_hooks_collector/src/log/LogSchema.h diff --git a/src/memory/gpdbwrappers.cpp b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp similarity index 100% rename from src/memory/gpdbwrappers.cpp rename to gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp diff --git a/src/memory/gpdbwrappers.h b/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h similarity index 100% rename from src/memory/gpdbwrappers.h rename to gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h diff --git a/src/stat_statements_parser/README.md b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/README.md similarity index 100% rename from src/stat_statements_parser/README.md rename to gpcontrib/yagp_hooks_collector/src/stat_statements_parser/README.md diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c similarity index 100% rename from src/stat_statements_parser/pg_stat_statements_ya_parser.c rename to gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c diff --git a/src/stat_statements_parser/pg_stat_statements_ya_parser.h b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h similarity index 100% rename from src/stat_statements_parser/pg_stat_statements_ya_parser.h rename to gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h diff --git a/src/yagp_hooks_collector.c b/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c similarity index 100% rename from src/yagp_hooks_collector.c rename to gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c diff --git a/yagp_hooks_collector--1.0--1.1.sql b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0--1.1.sql similarity index 100% rename from yagp_hooks_collector--1.0--1.1.sql rename to gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0--1.1.sql diff --git a/yagp_hooks_collector--1.0.sql b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0.sql similarity index 100% rename from yagp_hooks_collector--1.0.sql rename to gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0.sql diff --git a/yagp_hooks_collector--1.1.sql b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql similarity index 100% rename from yagp_hooks_collector--1.1.sql rename to gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql diff --git a/yagp_hooks_collector.control b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector.control similarity index 100% rename from yagp_hooks_collector.control rename to gpcontrib/yagp_hooks_collector/yagp_hooks_collector.control diff --git a/src/stat_statements_parser/README.MD b/src/stat_statements_parser/README.MD deleted file mode 100644 index 291e31a3099..00000000000 --- a/src/stat_statements_parser/README.MD +++ /dev/null @@ -1 +0,0 @@ -This directory contains a slightly modified subset of pg_stat_statements for PG v9.4 to be used in query and plan ID generation. From 24fd1d1cd525af68a938955a13b6ec24ffd6385e Mon Sep 17 00:00:00 2001 From: NJrslv <108277031+NJrslv@users.noreply.github.com> Date: Wed, 25 Mar 2026 18:37:39 +0300 Subject: [PATCH 43/49] [gp_stats_collector] Rename yagp_hooks_collector to gp_stats_collector Rename extension, shared library, SQL objects, GUC prefix, test files, and all internal identifiers. Restore accidentally deleted files. Clean up stray gmon.out. --- .github/workflows/build-cloudberry-rocky8.yml | 32 ++- .github/workflows/build-cloudberry.yml | 16 +- .github/workflows/build-deb-cloudberry.yml | 32 ++- .gitignore | 5 - Makefile | 2 + configure | 16 +- configure.ac | 19 +- gpcontrib/Makefile | 4 +- .../.clang-format | 0 gpcontrib/gp_stats_collector/.gitignore | 5 + .../Makefile | 16 +- gpcontrib/gp_stats_collector/README.md | 47 ++++ .../expected/gpsc_cursors.out} | 72 ++--- .../expected/gpsc_dist.out} | 56 ++-- .../expected/gpsc_guc_cache.out} | 32 +-- .../expected/gpsc_locale.out | 23 ++ .../expected/gpsc_select.out} | 56 ++-- .../gp_stats_collector/expected/gpsc_uds.out | 42 +++ .../expected/gpsc_utf8_trim.out} | 36 +-- .../expected/gpsc_utility.out} | 172 ++++++------ .../gp_stats_collector--1.0--1.1.sql | 113 ++++++++ .../gp_stats_collector--1.0.sql | 55 ++++ .../gp_stats_collector--1.1.sql | 110 ++++++++ .../gp_stats_collector.control | 5 + .../metric.md | 27 +- .../protos/gpsc_metrics.proto} | 4 +- .../protos/gpsc_plan.proto} | 4 +- .../protos/gpsc_set_service.proto} | 8 +- .../results/gpsc_cursors.out | 163 ++++++++++++ .../gp_stats_collector/results/gpsc_dist.out | 175 ++++++++++++ .../results/gpsc_guc_cache.out | 61 +++++ .../results/gpsc_locale.out | 23 ++ .../results/gpsc_select.out | 136 ++++++++++ .../gp_stats_collector/results/gpsc_uds.out | 42 +++ .../results/gpsc_utf8_trim.out | 68 +++++ .../results/gpsc_utility.out | 248 ++++++++++++++++++ .../gp_stats_collector/sql/gpsc_cursors.sql | 85 ++++++ .../sql/gpsc_dist.sql} | 58 ++-- .../sql/gpsc_guc_cache.sql} | 32 +-- .../gp_stats_collector/sql/gpsc_locale.sql | 29 ++ .../gp_stats_collector/sql/gpsc_select.sql | 69 +++++ gpcontrib/gp_stats_collector/sql/gpsc_uds.sql | 31 +++ .../sql/gpsc_utf8_trim.sql} | 36 +-- .../gp_stats_collector/sql/gpsc_utility.sql | 135 ++++++++++ .../src/Config.cpp | 46 ++-- .../src/Config.h | 2 +- .../src/EventSender.cpp | 78 +++--- .../src/EventSender.h | 32 +-- .../src/GpscStat.cpp} | 36 +-- .../src/GpscStat.h} | 6 +- .../src/PgUtils.cpp | 20 +- .../src/PgUtils.h | 2 +- .../src/ProcStats.cpp | 12 +- .../src/ProcStats.h | 6 +- .../src/ProtoUtils.cpp | 60 ++--- .../src/ProtoUtils.h | 26 +- .../src/UDSConnector.cpp | 24 +- .../src/UDSConnector.h | 6 +- .../src/gp_stats_collector.c} | 36 +-- .../src/hook_wrappers.cpp | 72 ++--- .../src/hook_wrappers.h | 6 +- .../src/log/LogOps.cpp | 16 +- .../src/log/LogOps.h | 10 +- .../src/log/LogSchema.cpp | 10 +- .../src/log/LogSchema.h | 8 +- .../src/memory/gpdbwrappers.cpp | 42 +-- .../src/memory/gpdbwrappers.h | 12 +- .../src/stat_statements_parser/README.md | 20 ++ .../pg_stat_statements_ya_parser.c | 2 +- .../pg_stat_statements_ya_parser.h | 2 +- gpcontrib/yagp_hooks_collector/README.md | 28 -- .../expected/yagp_locale.out | 23 -- .../expected/yagp_uds.out | 42 --- .../yagp_hooks_collector/sql/yagp_cursors.sql | 85 ------ .../yagp_hooks_collector/sql/yagp_locale.sql | 29 -- .../yagp_hooks_collector/sql/yagp_select.sql | 69 ----- .../yagp_hooks_collector/sql/yagp_uds.sql | 31 --- .../yagp_hooks_collector/sql/yagp_utility.sql | 135 ---------- .../src/stat_statements_parser/README.md | 1 - .../yagp_hooks_collector--1.0--1.1.sql | 113 -------- .../yagp_hooks_collector--1.0.sql | 55 ---- .../yagp_hooks_collector--1.1.sql | 110 -------- .../yagp_hooks_collector.control | 5 - pom.xml | 16 +- src/Makefile.global.in | 2 +- src/backend/commands/portalcmds.c | 2 +- src/backend/tcop/pquery.c | 4 +- src/include/executor/execdesc.h | 8 +- 88 files changed, 2396 insertions(+), 1354 deletions(-) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/.clang-format (100%) create mode 100644 gpcontrib/gp_stats_collector/.gitignore rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/Makefile (66%) create mode 100644 gpcontrib/gp_stats_collector/README.md rename gpcontrib/{yagp_hooks_collector/expected/yagp_cursors.out => gp_stats_collector/expected/gpsc_cursors.out} (73%) rename gpcontrib/{yagp_hooks_collector/expected/yagp_dist.out => gp_stats_collector/expected/gpsc_dist.out} (81%) rename gpcontrib/{yagp_hooks_collector/expected/yagp_guc_cache.out => gp_stats_collector/expected/gpsc_guc_cache.out} (64%) create mode 100644 gpcontrib/gp_stats_collector/expected/gpsc_locale.out rename gpcontrib/{yagp_hooks_collector/expected/yagp_select.out => gp_stats_collector/expected/gpsc_select.out} (67%) create mode 100644 gpcontrib/gp_stats_collector/expected/gpsc_uds.out rename gpcontrib/{yagp_hooks_collector/expected/yagp_utf8_trim.out => gp_stats_collector/expected/gpsc_utf8_trim.out} (65%) rename gpcontrib/{yagp_hooks_collector/expected/yagp_utility.out => gp_stats_collector/expected/gpsc_utility.out} (57%) create mode 100644 gpcontrib/gp_stats_collector/gp_stats_collector--1.0--1.1.sql create mode 100644 gpcontrib/gp_stats_collector/gp_stats_collector--1.0.sql create mode 100644 gpcontrib/gp_stats_collector/gp_stats_collector--1.1.sql create mode 100644 gpcontrib/gp_stats_collector/gp_stats_collector.control rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/metric.md (94%) rename gpcontrib/{yagp_hooks_collector/protos/yagpcc_metrics.proto => gp_stats_collector/protos/gpsc_metrics.proto} (97%) rename gpcontrib/{yagp_hooks_collector/protos/yagpcc_plan.proto => gp_stats_collector/protos/gpsc_plan.proto} (98%) rename gpcontrib/{yagp_hooks_collector/protos/yagpcc_set_service.proto => gp_stats_collector/protos/gpsc_set_service.proto} (86%) create mode 100644 gpcontrib/gp_stats_collector/results/gpsc_cursors.out create mode 100644 gpcontrib/gp_stats_collector/results/gpsc_dist.out create mode 100644 gpcontrib/gp_stats_collector/results/gpsc_guc_cache.out create mode 100644 gpcontrib/gp_stats_collector/results/gpsc_locale.out create mode 100644 gpcontrib/gp_stats_collector/results/gpsc_select.out create mode 100644 gpcontrib/gp_stats_collector/results/gpsc_uds.out create mode 100644 gpcontrib/gp_stats_collector/results/gpsc_utf8_trim.out create mode 100644 gpcontrib/gp_stats_collector/results/gpsc_utility.out create mode 100644 gpcontrib/gp_stats_collector/sql/gpsc_cursors.sql rename gpcontrib/{yagp_hooks_collector/sql/yagp_dist.sql => gp_stats_collector/sql/gpsc_dist.sql} (53%) rename gpcontrib/{yagp_hooks_collector/sql/yagp_guc_cache.sql => gp_stats_collector/sql/gpsc_guc_cache.sql} (58%) create mode 100644 gpcontrib/gp_stats_collector/sql/gpsc_locale.sql create mode 100644 gpcontrib/gp_stats_collector/sql/gpsc_select.sql create mode 100644 gpcontrib/gp_stats_collector/sql/gpsc_uds.sql rename gpcontrib/{yagp_hooks_collector/sql/yagp_utf8_trim.sql => gp_stats_collector/sql/gpsc_utf8_trim.sql} (58%) create mode 100644 gpcontrib/gp_stats_collector/sql/gpsc_utility.sql rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/Config.cpp (79%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/Config.h (97%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/EventSender.cpp (86%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/EventSender.h (84%) rename gpcontrib/{yagp_hooks_collector/src/YagpStat.cpp => gp_stats_collector/src/GpscStat.cpp} (78%) rename gpcontrib/{yagp_hooks_collector/src/YagpStat.h => gp_stats_collector/src/GpscStat.h} (94%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/PgUtils.cpp (83%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/PgUtils.h (96%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/ProcStats.cpp (92%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/ProcStats.h (89%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/ProtoUtils.cpp (85%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/ProtoUtils.h (65%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/UDSConnector.cpp (87%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/UDSConnector.h (88%) rename gpcontrib/{yagp_hooks_collector/src/yagp_hooks_collector.c => gp_stats_collector/src/gp_stats_collector.c} (79%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/hook_wrappers.cpp (84%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/hook_wrappers.h (89%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/log/LogOps.cpp (91%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/log/LogOps.h (83%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/log/LogSchema.cpp (94%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/log/LogSchema.h (98%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/memory/gpdbwrappers.cpp (81%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/memory/gpdbwrappers.h (92%) create mode 100644 gpcontrib/gp_stats_collector/src/stat_statements_parser/README.md rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/stat_statements_parser/pg_stat_statements_ya_parser.c (99%) rename gpcontrib/{yagp_hooks_collector => gp_stats_collector}/src/stat_statements_parser/pg_stat_statements_ya_parser.h (93%) delete mode 100644 gpcontrib/yagp_hooks_collector/README.md delete mode 100644 gpcontrib/yagp_hooks_collector/expected/yagp_locale.out delete mode 100644 gpcontrib/yagp_hooks_collector/expected/yagp_uds.out delete mode 100644 gpcontrib/yagp_hooks_collector/sql/yagp_cursors.sql delete mode 100644 gpcontrib/yagp_hooks_collector/sql/yagp_locale.sql delete mode 100644 gpcontrib/yagp_hooks_collector/sql/yagp_select.sql delete mode 100644 gpcontrib/yagp_hooks_collector/sql/yagp_uds.sql delete mode 100644 gpcontrib/yagp_hooks_collector/sql/yagp_utility.sql delete mode 100644 gpcontrib/yagp_hooks_collector/src/stat_statements_parser/README.md delete mode 100644 gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0--1.1.sql delete mode 100644 gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0.sql delete mode 100644 gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql delete mode 100644 gpcontrib/yagp_hooks_collector/yagp_hooks_collector.control diff --git a/.github/workflows/build-cloudberry-rocky8.yml b/.github/workflows/build-cloudberry-rocky8.yml index e0936c725c8..39175753a99 100644 --- a/.github/workflows/build-cloudberry-rocky8.yml +++ b/.github/workflows/build-cloudberry-rocky8.yml @@ -320,6 +320,10 @@ jobs: "gpcontrib/gp_sparse_vector:installcheck", "gpcontrib/gp_toolkit:installcheck"] }, + {"test":"gpcontrib-gp-stats-collector", + "make_configs":["gpcontrib/gp_stats_collector:installcheck"], + "extension":"gp_stats_collector" + }, {"test":"ic-fixme", "make_configs":["src/test/regress:installcheck-fixme"], "enable_core_check":false @@ -540,10 +544,11 @@ jobs: if: needs.check-skip.outputs.should_skip != 'true' env: SRC_DIR: ${{ github.workspace }} + CONFIGURE_EXTRA_OPTS: --with-gp-stats-collector run: | set -eo pipefail chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} CONFIGURE_EXTRA_OPTS=${{ env.CONFIGURE_EXTRA_OPTS }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then echo "::error::Configure script failed" exit 1 fi @@ -1400,6 +1405,7 @@ jobs: if: success() && needs.check-skip.outputs.should_skip != 'true' env: SRC_DIR: ${{ github.workspace }} + BUILD_DESTINATION: /usr/local/cloudberry-db shell: bash {0} run: | set -o pipefail @@ -1423,6 +1429,30 @@ jobs: # 2. Follow the same pattern as optimizer # 3. Update matrix entries to include the new setting + # Create extension if required + if [[ "${{ matrix.extension != '' }}" == "true" ]]; then + case "${{ matrix.extension }}" in + gp_stats_collector) + if ! su - gpadmin -c "source ${BUILD_DESTINATION}/cloudberry-env.sh && \ + source ${SRC_DIR}/gpAux/gpdemo/gpdemo-env.sh && \ + gpconfig -c shared_preload_libraries -v 'gp_stats_collector' && \ + gpstop -ra && \ + echo 'CREATE EXTENSION IF NOT EXISTS gp_stats_collector; \ + SHOW shared_preload_libraries; \ + TABLE pg_extension;' | \ + psql postgres" + then + echo "Error creating gp_stats_collector extension" + exit 1 + fi + ;; + *) + echo "Unknown extension: ${{ matrix.extension }}" + exit 1 + ;; + esac + fi + # Set PostgreSQL options if defined PG_OPTS="" if [[ "${{ matrix.pg_settings.optimizer != '' }}" == "true" ]]; then diff --git a/.github/workflows/build-cloudberry.yml b/.github/workflows/build-cloudberry.yml index 8484331998f..cbd4fd753dc 100644 --- a/.github/workflows/build-cloudberry.yml +++ b/.github/workflows/build-cloudberry.yml @@ -271,9 +271,9 @@ jobs: }, "enable_core_check":false }, - {"test":"gpcontrib-yagp-hooks-collector", - "make_configs":["gpcontrib/yagp_hooks_collector:installcheck"], - "extension":"yagp_hooks_collector" + {"test":"gpcontrib-gp-stats-collector", + "make_configs":["gpcontrib/gp_stats_collector:installcheck"], + "extension":"gp_stats_collector" }, {"test":"ic-expandshrink", "make_configs":["src/test/isolation2:installcheck-expandshrink"] @@ -539,7 +539,7 @@ jobs: if: needs.check-skip.outputs.should_skip != 'true' env: SRC_DIR: ${{ github.workspace }} - CONFIGURE_EXTRA_OPTS: --with-yagp-hooks-collector + CONFIGURE_EXTRA_OPTS: --with-gp-stats-collector run: | set -eo pipefail chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh @@ -1441,17 +1441,17 @@ jobs: # Create extension if required if [[ "${{ matrix.extension != '' }}" == "true" ]]; then case "${{ matrix.extension }}" in - yagp_hooks_collector) + gp_stats_collector) if ! su - gpadmin -c "source ${BUILD_DESTINATION}/cloudberry-env.sh && \ source ${SRC_DIR}/gpAux/gpdemo/gpdemo-env.sh && \ - gpconfig -c shared_preload_libraries -v 'yagp_hooks_collector' && \ + gpconfig -c shared_preload_libraries -v 'gp_stats_collector' && \ gpstop -ra && \ - echo 'CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; \ + echo 'CREATE EXTENSION IF NOT EXISTS gp_stats_collector; \ SHOW shared_preload_libraries; \ TABLE pg_extension;' | \ psql postgres" then - echo "Error creating yagp_hooks_collector extension" + echo "Error creating gp_stats_collector extension" exit 1 fi ;; diff --git a/.github/workflows/build-deb-cloudberry.yml b/.github/workflows/build-deb-cloudberry.yml index 85d917b8ff0..bf85a107b31 100644 --- a/.github/workflows/build-deb-cloudberry.yml +++ b/.github/workflows/build-deb-cloudberry.yml @@ -252,6 +252,10 @@ jobs: "gpcontrib/gp_sparse_vector:installcheck", "gpcontrib/gp_toolkit:installcheck"] }, + {"test":"gpcontrib-gp-stats-collector", + "make_configs":["gpcontrib/gp_stats_collector:installcheck"], + "extension":"gp_stats_collector" + }, {"test":"ic-cbdb-parallel", "make_configs":["src/test/regress:installcheck-cbdb-parallel"] } @@ -448,13 +452,14 @@ jobs: shell: bash env: SRC_DIR: ${{ github.workspace }} + CONFIGURE_EXTRA_OPTS: --with-gp-stats-collector run: | set -eo pipefail export BUILD_DESTINATION=${SRC_DIR}/debian/build chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} BUILD_DESTINATION=${BUILD_DESTINATION} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} CONFIGURE_EXTRA_OPTS=${{ env.CONFIGURE_EXTRA_OPTS }} BUILD_DESTINATION=${BUILD_DESTINATION} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then echo "::error::Configure script failed" exit 1 fi @@ -1341,6 +1346,7 @@ jobs: if: success() && needs.check-skip.outputs.should_skip != 'true' env: SRC_DIR: ${{ github.workspace }} + BUILD_DESTINATION: ${{ github.workspace }}/debian/build shell: bash {0} run: | set -o pipefail @@ -1365,6 +1371,30 @@ jobs: # 3. Update matrix entries to include the new setting + # Create extension if required + if [[ "${{ matrix.extension != '' }}" == "true" ]]; then + case "${{ matrix.extension }}" in + gp_stats_collector) + if ! su - gpadmin -c "source ${BUILD_DESTINATION}/cloudberry-env.sh && \ + source ${SRC_DIR}/gpAux/gpdemo/gpdemo-env.sh && \ + gpconfig -c shared_preload_libraries -v 'gp_stats_collector' && \ + gpstop -ra && \ + echo 'CREATE EXTENSION IF NOT EXISTS gp_stats_collector; \ + SHOW shared_preload_libraries; \ + TABLE pg_extension;' | \ + psql postgres" + then + echo "Error creating gp_stats_collector extension" + exit 1 + fi + ;; + *) + echo "Unknown extension: ${{ matrix.extension }}" + exit 1 + ;; + esac + fi + # Set PostgreSQL options if defined PG_OPTS="" if [[ "${{ matrix.pg_settings.optimizer != '' }}" == "true" ]]; then diff --git a/.gitignore b/.gitignore index 29b40ee096c..7f5110d5c8e 100644 --- a/.gitignore +++ b/.gitignore @@ -74,8 +74,3 @@ lib*.pc /tmp_install/ /.cache/ /install/ -*.o -*.so -src/protos/ -.vscode -compile_commands.json diff --git a/Makefile b/Makefile index 15c5dabb70e..e9ab3fbf2d4 100644 --- a/Makefile +++ b/Makefile @@ -3,12 +3,14 @@ # to build Postgres with a different make, we have this make file # that, as a service, will look for a GNU make and invoke it, or show # an error message if none could be found. + # If the user were using GNU make now, this file would not get used # because GNU make uses a make file named "GNUmakefile" in preference # to "Makefile" if it exists. PostgreSQL is shipped with a # "GNUmakefile". If the user hasn't run the configure script yet, the # GNUmakefile won't exist yet, so we catch that case as well. + # AIX make defaults to building *every* target of the first rule. Start with # a single-target, empty rule to make the other targets non-default. all: diff --git a/configure b/configure index 598114a4b7f..13295a946ca 100755 --- a/configure +++ b/configure @@ -722,7 +722,7 @@ with_apr_config with_libcurl with_rt with_zstd -with_yagp_hooks_collector +with_gp_stats_collector with_libbz2 LZ4_LIBS LZ4_CFLAGS @@ -943,7 +943,7 @@ with_zlib with_lz4 with_libbz2 with_zstd -with_yagp_hooks_collector +with_gp_stats_collector with_rt with_libcurl with_apr_config @@ -11153,14 +11153,14 @@ fi fi # -# yagp_hooks_collector +# gp_stats_collector # -# Check whether --with-yagp-hooks-collector was given. -if test "${with_yagp_hooks_collector+set}" = set; then : - withval=$with_yagp_hooks_collector; +# Check whether --with-gp-stats-collector was given. +if test "${with_gp_stats_collector+set}" = set; then : + withval=$with_gp_stats_collector; case $withval in yes) : @@ -11169,12 +11169,12 @@ if test "${with_yagp_hooks_collector+set}" = set; then : : ;; *) - as_fn_error $? "no argument expected for --with-yagp-hooks-collector option" "$LINENO" 5 + as_fn_error $? "no argument expected for --with-gp-stats-collector option" "$LINENO" 5 ;; esac else - with_yagp_hooks_collector=no + with_gp_stats_collector=no fi diff --git a/configure.ac b/configure.ac index 792878fde4b..3024e4b9b65 100644 --- a/configure.ac +++ b/configure.ac @@ -1366,11 +1366,22 @@ AC_MSG_RESULT([$with_zstd]) AC_SUBST(with_zstd) # -# yagp_hooks_collector +# gp_stats_collector # -PGAC_ARG_BOOL(with, yagp_hooks_collector, no, - [build with YAGP hooks collector extension]) -AC_SUBST(with_yagp_hooks_collector) +PGAC_ARG_BOOL(with, gp_stats_collector, no, + [build with stats collector extension]) +AC_SUBST(with_gp_stats_collector) + +if test "$with_gp_stats_collector" = yes; then + PKG_CHECK_MODULES([PROTOBUF], [protobuf >= 3.0.0], + [], + [AC_MSG_ERROR([protobuf >= 3.0.0 is required for gp_stats_collector])] + ) + AC_PATH_PROG([PROTOC], [protoc], [no]) + if test "$PROTOC" = no; then + AC_MSG_ERROR([protoc is required for gp_stats_collector but was not found in PATH]) + fi +fi if test "$with_zstd" = yes; then dnl zstd_errors.h was renamed from error_public.h in v1.4.0 diff --git a/gpcontrib/Makefile b/gpcontrib/Makefile index 8b98dc9142c..956cb470477 100644 --- a/gpcontrib/Makefile +++ b/gpcontrib/Makefile @@ -35,8 +35,8 @@ else diskquota endif -ifeq "$(with_yagp_hooks_collector)" "yes" - recurse_targets += yagp_hooks_collector +ifeq "$(with_gp_stats_collector)" "yes" + recurse_targets += gp_stats_collector endif ifeq "$(with_zstd)" "yes" recurse_targets += zstd diff --git a/gpcontrib/yagp_hooks_collector/.clang-format b/gpcontrib/gp_stats_collector/.clang-format similarity index 100% rename from gpcontrib/yagp_hooks_collector/.clang-format rename to gpcontrib/gp_stats_collector/.clang-format diff --git a/gpcontrib/gp_stats_collector/.gitignore b/gpcontrib/gp_stats_collector/.gitignore new file mode 100644 index 00000000000..e8dfe855dad --- /dev/null +++ b/gpcontrib/gp_stats_collector/.gitignore @@ -0,0 +1,5 @@ +*.o +*.so +src/protos/ +.vscode +compile_commands.json diff --git a/gpcontrib/yagp_hooks_collector/Makefile b/gpcontrib/gp_stats_collector/Makefile similarity index 66% rename from gpcontrib/yagp_hooks_collector/Makefile rename to gpcontrib/gp_stats_collector/Makefile index 49825c55f35..c8f7b3c30fe 100644 --- a/gpcontrib/yagp_hooks_collector/Makefile +++ b/gpcontrib/gp_stats_collector/Makefile @@ -1,9 +1,9 @@ -MODULE_big = yagp_hooks_collector -EXTENSION = yagp_hooks_collector +MODULE_big = gp_stats_collector +EXTENSION = gp_stats_collector DATA = $(wildcard *--*.sql) -REGRESS = yagp_cursors yagp_dist yagp_select yagp_utf8_trim yagp_utility yagp_guc_cache yagp_uds yagp_locale +REGRESS = gpsc_cursors gpsc_dist gpsc_select gpsc_utf8_trim gpsc_utility gpsc_guc_cache gpsc_uds gpsc_locale -PROTO_BASES = yagpcc_plan yagpcc_metrics yagpcc_set_service +PROTO_BASES = gpsc_plan gpsc_metrics gpsc_set_service PROTO_OBJS = $(patsubst %,src/protos/%.pb.o,$(PROTO_BASES)) C_OBJS = $(patsubst %.c,%.o,$(wildcard src/*.c src/*/*.c)) @@ -24,7 +24,7 @@ PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) else -subdir = gpcontrib/yagp_hooks_collector +subdir = gpcontrib/gp_stats_collector top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk @@ -32,10 +32,8 @@ endif src/protos/%.pb.cpp src/protos/%.pb.h: protos/%.proto @mkdir -p src/protos - sed -i 's/optional //g' $^ - sed -i 's|cloud/mdb/yagpcc/api/proto/common/|protos/|g' $^ protoc -I /usr/include -I /usr/local/include -I . --cpp_out=src $^ mv src/protos/$*.pb.cc src/protos/$*.pb.cpp -$(CPP_OBJS): src/protos/yagpcc_metrics.pb.h src/protos/yagpcc_plan.pb.h src/protos/yagpcc_set_service.pb.h -src/protos/yagpcc_set_service.pb.o: src/protos/yagpcc_metrics.pb.h +$(CPP_OBJS): src/protos/gpsc_metrics.pb.h src/protos/gpsc_plan.pb.h src/protos/gpsc_set_service.pb.h +src/protos/gpsc_set_service.pb.o: src/protos/gpsc_metrics.pb.h diff --git a/gpcontrib/gp_stats_collector/README.md b/gpcontrib/gp_stats_collector/README.md new file mode 100644 index 00000000000..8c2d5c6868e --- /dev/null +++ b/gpcontrib/gp_stats_collector/README.md @@ -0,0 +1,47 @@ + + +## GP Stats Collector + +An extension for collecting query execution metrics and reporting them to an external agent. + +### Collected Statistics + +#### 1. Query Lifecycle +- **What:** Captures query text, normalized query text, timestamps (submit, start, end, done), and user/database info. +- **GUC:** `gpsc.enable`. + +#### 2. `EXPLAIN` data +- **What:** Triggers generation of the `EXPLAIN (TEXT, COSTS, VERBOSE)` and captures it. +- **GUC:** `gpsc.enable`. + +#### 3. `EXPLAIN ANALYZE` data +- **What:** Triggers generation of the `EXPLAIN (TEXT, ANALYZE, BUFFERS, TIMING, VERBOSE)` and captures it. +- **GUCs:** `gpsc.enable`, `gpsc.min_analyze_time`, `gpsc.enable_cdbstats`(ANALYZE), `gpsc.enable_analyze`(BUFFERS, TIMING, VERBOSE). + +#### 4. Other Metrics +- **What:** Captures Instrument, System, Network, Interconnect, Spill metrics. +- **GUC:** `gpsc.enable`. + +### General Configuration +- **Nested Queries:** When `gpsc.report_nested_queries` is `false`, only top-level queries are reported from the coordinator and segments, when `true`, both top-level and nested queries are reported from the coordinator, from segments collected as aggregates. +- **Data Destination:** All collected data is sent to a Unix Domain Socket. Configure the path with `gpsc.uds_path`. +- **User Filtering:** To exclude activity from certain roles, add them to the comma-separated list in `gpsc.ignored_users_list`. +- **Trimming plans:** Query texts and execution plans are trimmed based on `gpsc.max_text_size` and `gpsc.max_plan_size` (default: 1024KB). For now, it is not recommended to set these GUCs higher than 1024KB. +- **Analyze collection:** Analyze is sent if execution time exceeds `gpsc.min_analyze_time`, which is 10 seconds by default. Analyze is collected if `gpsc.enable_analyze` is true. diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out b/gpcontrib/gp_stats_collector/expected/gpsc_cursors.out similarity index 73% rename from gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out rename to gpcontrib/gp_stats_collector/expected/gpsc_cursors.out index df12e3e1b66..282d9ac49e1 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_cursors.out +++ b/gpcontrib/gp_stats_collector/expected/gpsc_cursors.out @@ -1,5 +1,5 @@ -CREATE EXTENSION yagp_hooks_collector; -CREATE FUNCTION yagp_status_order(status text) +CREATE EXTENSION gp_stats_collector; +CREATE FUNCTION gpsc_status_order(status text) RETURNS integer AS $$ BEGIN @@ -12,18 +12,18 @@ BEGIN END; END; $$ LANGUAGE plpgsql IMMUTABLE; -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable TO TRUE; -SET yagpcc.enable_utility TO TRUE; -SET yagpcc.report_nested_queries TO TRUE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.enable_utility TO TRUE; +SET gpsc.report_nested_queries TO TRUE; -- DECLARE -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; BEGIN; DECLARE cursor_stats_0 CURSOR FOR SELECT 0; CLOSE cursor_stats_0; COMMIT; -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+---------------------------------------------+--------------------- -1 | BEGIN; | QUERY_STATUS_SUBMIT @@ -34,25 +34,25 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | CLOSE cursor_stats_0; | QUERY_STATUS_DONE -1 | COMMIT; | QUERY_STATUS_SUBMIT -1 | COMMIT; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE (10 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -- DECLARE WITH HOLD -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; BEGIN; DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; CLOSE cursor_stats_1; DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; CLOSE cursor_stats_2; COMMIT; -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------------------------+--------------------- -1 | BEGIN; | QUERY_STATUS_SUBMIT @@ -67,24 +67,24 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | CLOSE cursor_stats_2; | QUERY_STATUS_DONE -1 | COMMIT; | QUERY_STATUS_SUBMIT -1 | COMMIT; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE (14 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -- ROLLBACK -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; BEGIN; DECLARE cursor_stats_3 CURSOR FOR SELECT 1; CLOSE cursor_stats_3; DECLARE cursor_stats_4 CURSOR FOR SELECT 1; ROLLBACK; -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+---------------------------------------------+--------------------- -1 | BEGIN; | QUERY_STATUS_SUBMIT @@ -97,17 +97,17 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_DONE -1 | ROLLBACK; | QUERY_STATUS_SUBMIT -1 | ROLLBACK; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE (12 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -- FETCH -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; BEGIN; DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; @@ -126,8 +126,8 @@ FETCH 1 IN cursor_stats_6; CLOSE cursor_stats_5; CLOSE cursor_stats_6; COMMIT; -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------------------------+--------------------- -1 | BEGIN; | QUERY_STATUS_SUBMIT @@ -146,18 +146,18 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | CLOSE cursor_stats_6; | QUERY_STATUS_DONE -1 | COMMIT; | QUERY_STATUS_SUBMIT -1 | COMMIT; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE (18 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -DROP FUNCTION yagp_status_order(text); -DROP EXTENSION yagp_hooks_collector; -RESET yagpcc.enable; -RESET yagpcc.report_nested_queries; -RESET yagpcc.enable_utility; -RESET yagpcc.ignored_users_list; +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_dist.out b/gpcontrib/gp_stats_collector/expected/gpsc_dist.out similarity index 81% rename from gpcontrib/yagp_hooks_collector/expected/yagp_dist.out rename to gpcontrib/gp_stats_collector/expected/gpsc_dist.out index 3b1e3504923..92e8678767b 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_dist.out +++ b/gpcontrib/gp_stats_collector/expected/gpsc_dist.out @@ -1,5 +1,5 @@ -CREATE EXTENSION yagp_hooks_collector; -CREATE OR REPLACE FUNCTION yagp_status_order(status text) +CREATE EXTENSION gp_stats_collector; +CREATE OR REPLACE FUNCTION gpsc_status_order(status text) RETURNS integer AS $$ BEGIN @@ -12,14 +12,14 @@ BEGIN END; END; $$ LANGUAGE plpgsql IMMUTABLE; -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable TO TRUE; -SET yagpcc.report_nested_queries TO TRUE; -SET yagpcc.enable_utility TO FALSE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.report_nested_queries TO TRUE; +SET gpsc.enable_utility TO FALSE; -- Hash distributed table CREATE TABLE test_hash_dist (id int) DISTRIBUTED BY (id); INSERT INTO test_hash_dist SELECT 1; -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; SET optimizer_enable_direct_dispatch TO TRUE; -- Direct dispatch is used here, only one segment is scanned. select * from test_hash_dist where id = 1; @@ -29,9 +29,9 @@ select * from test_hash_dist where id = 1; (1 row) RESET optimizer_enable_direct_dispatch; -RESET yagpcc.logging_mode; +RESET gpsc.logging_mode; -- Should see 8 rows. -SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+--------------------------------------------+--------------------- -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_SUBMIT @@ -44,12 +44,12 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag 1 | | QUERY_STATUS_DONE (8 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; -- Scan all segments. select * from test_hash_dist; id @@ -58,8 +58,8 @@ select * from test_hash_dist; (1 row) DROP TABLE test_hash_dist; -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------+--------------------- -1 | select * from test_hash_dist; | QUERY_STATUS_SUBMIT @@ -80,7 +80,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag | | QUERY_STATUS_DONE (16 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) @@ -93,7 +93,7 @@ END; $$ LANGUAGE plpgsql VOLATILE EXECUTE ON ALL SEGMENTS; CREATE TABLE test_replicated (id int) DISTRIBUTED REPLICATED; INSERT INTO test_replicated SELECT 1; -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; SELECT COUNT(*) FROM test_replicated, force_segments(); count ------- @@ -102,8 +102,8 @@ SELECT COUNT(*) FROM test_replicated, force_segments(); DROP TABLE test_replicated; DROP FUNCTION force_segments(); -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+---------------------------------------------------------+--------------------- -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_SUBMIT @@ -124,7 +124,7 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag | | QUERY_STATUS_DONE (16 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) @@ -134,18 +134,18 @@ SET allow_system_table_mods = ON; CREATE TABLE test_partial_dist (id int, data text) DISTRIBUTED BY (id); UPDATE gp_distribution_policy SET numsegments = 2 WHERE localoid = 'test_partial_dist'::regclass; INSERT INTO test_partial_dist SELECT * FROM generate_series(1, 100); -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; SELECT COUNT(*) FROM test_partial_dist; count ------- 100 (1 row) -RESET yagpcc.logging_mode; +RESET gpsc.logging_mode; DROP TABLE test_partial_dist; RESET allow_system_table_mods; -- Should see 12 rows. -SELECT query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +SELECT query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; query_text | query_status -----------------------------------------+--------------------- SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_SUBMIT @@ -162,14 +162,14 @@ SELECT query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_statu | QUERY_STATUS_DONE (12 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -DROP FUNCTION yagp_status_order(text); -DROP EXTENSION yagp_hooks_collector; -RESET yagpcc.enable; -RESET yagpcc.report_nested_queries; -RESET yagpcc.enable_utility; -RESET yagpcc.ignored_users_list; +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_guc_cache.out b/gpcontrib/gp_stats_collector/expected/gpsc_guc_cache.out similarity index 64% rename from gpcontrib/yagp_hooks_collector/expected/yagp_guc_cache.out rename to gpcontrib/gp_stats_collector/expected/gpsc_guc_cache.out index 3085cfa42e1..11a420839db 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_guc_cache.out +++ b/gpcontrib/gp_stats_collector/expected/gpsc_guc_cache.out @@ -8,23 +8,23 @@ -- have its DONE event rejected, creating orphaned SUBMIT entries. -- This is due to query being actually executed between SUBMIT and DONE. -- start_ignore -CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; -SELECT yagpcc.truncate_log(); +CREATE EXTENSION IF NOT EXISTS gp_stats_collector; +SELECT gpsc.truncate_log(); -- end_ignore CREATE OR REPLACE FUNCTION print_last_query(query text) RETURNS TABLE(query_status text) AS $$ SELECT query_status - FROM yagpcc.log + FROM gpsc.log WHERE segid = -1 AND query_text = query ORDER BY ccnt DESC $$ LANGUAGE sql; -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable TO TRUE; -SET yagpcc.enable_utility TO TRUE; -SET yagpcc.logging_mode TO 'TBL'; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.enable_utility TO TRUE; +SET gpsc.logging_mode TO 'TBL'; -- SET below disables utility logging and DONE must still be logged. -SET yagpcc.enable_utility TO FALSE; -SELECT * FROM print_last_query('SET yagpcc.enable_utility TO FALSE;'); +SET gpsc.enable_utility TO FALSE; +SELECT * FROM print_last_query('SET gpsc.enable_utility TO FALSE;'); query_status --------------------- QUERY_STATUS_SUBMIT @@ -33,14 +33,14 @@ SELECT * FROM print_last_query('SET yagpcc.enable_utility TO FALSE;'); -- SELECT below adds current user to ignore list and DONE must still be logged. -- start_ignore -SELECT set_config('yagpcc.ignored_users_list', current_user, false); +SELECT set_config('gpsc.ignored_users_list', current_user, false); set_config ------------ gpadmin (1 row) -- end_ignore -SELECT * FROM print_last_query('SELECT set_config(''yagpcc.ignored_users_list'', current_user, false);'); +SELECT * FROM print_last_query('SELECT set_config(''gpsc.ignored_users_list'', current_user, false);'); query_status --------------------- QUERY_STATUS_SUBMIT @@ -50,8 +50,8 @@ SELECT * FROM print_last_query('SELECT set_config(''yagpcc.ignored_users_list'', (4 rows) DROP FUNCTION print_last_query(text); -DROP EXTENSION yagp_hooks_collector; -RESET yagpcc.enable; -RESET yagpcc.enable_utility; -RESET yagpcc.ignored_users_list; -RESET yagpcc.logging_mode; +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; +RESET gpsc.logging_mode; diff --git a/gpcontrib/gp_stats_collector/expected/gpsc_locale.out b/gpcontrib/gp_stats_collector/expected/gpsc_locale.out new file mode 100644 index 00000000000..a01fe0648b9 --- /dev/null +++ b/gpcontrib/gp_stats_collector/expected/gpsc_locale.out @@ -0,0 +1,23 @@ +-- The extension generates normalized query text and plan using jumbling functions. +-- Those functions may fail when translating to wide character if the current locale +-- cannot handle the character set. This test checks that even when those functions +-- fail, the plan is still generated and executed. This test is partially taken from +-- gp_locale. +-- start_ignore +DROP DATABASE IF EXISTS gpsc_test_locale; +-- end_ignore +CREATE DATABASE gpsc_test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; +\c gpsc_test_locale +CREATE EXTENSION gp_stats_collector; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable_utility TO TRUE; +SET gpsc.enable TO TRUE; +CREATE TABLE gpsc_hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); +INSERT INTO gpsc_hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); +-- Should not see error here +UPDATE gpsc_hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; +RESET gpsc.enable; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; +DROP TABLE gpsc_hi_안녕세계; +DROP EXTENSION gp_stats_collector; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_select.out b/gpcontrib/gp_stats_collector/expected/gpsc_select.out similarity index 67% rename from gpcontrib/yagp_hooks_collector/expected/yagp_select.out rename to gpcontrib/gp_stats_collector/expected/gpsc_select.out index af08f2d1def..3008c8f6d55 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_select.out +++ b/gpcontrib/gp_stats_collector/expected/gpsc_select.out @@ -1,5 +1,5 @@ -CREATE EXTENSION yagp_hooks_collector; -CREATE OR REPLACE FUNCTION yagp_status_order(status text) +CREATE EXTENSION gp_stats_collector; +CREATE OR REPLACE FUNCTION gpsc_status_order(status text) RETURNS integer AS $$ BEGIN @@ -12,12 +12,12 @@ BEGIN END; END; $$ LANGUAGE plpgsql IMMUTABLE; -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable TO TRUE; -SET yagpcc.report_nested_queries TO TRUE; -SET yagpcc.enable_utility TO FALSE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.report_nested_queries TO TRUE; +SET gpsc.enable_utility TO FALSE; -- Basic SELECT tests -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; SELECT 1; ?column? ---------- @@ -30,8 +30,8 @@ SELECT COUNT(*) FROM generate_series(1,10); 10 (1 row) -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+---------------------------------------------+--------------------- -1 | SELECT 1; | QUERY_STATUS_SUBMIT @@ -44,13 +44,13 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_DONE (8 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -- Transaction test -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; BEGIN; SELECT 1; ?column? @@ -59,8 +59,8 @@ SELECT 1; (1 row) COMMIT; -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+------------+--------------------- -1 | SELECT 1; | QUERY_STATUS_SUBMIT @@ -69,13 +69,13 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag -1 | SELECT 1; | QUERY_STATUS_DONE (4 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -- CTE test -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; WITH t AS (VALUES (1), (2)) SELECT * FROM t; column1 @@ -84,8 +84,8 @@ SELECT * FROM t; 2 (2 rows) -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+-----------------------------+--------------------- -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_SUBMIT @@ -98,13 +98,13 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag | SELECT * FROM t; | (4 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -- Prepared statement test -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; PREPARE test_stmt AS SELECT 1; EXECUTE test_stmt; ?column? @@ -113,8 +113,8 @@ EXECUTE test_stmt; (1 row) DEALLOCATE test_stmt; -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+--------------------------------+--------------------- -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_SUBMIT @@ -123,14 +123,14 @@ SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yag -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_DONE (4 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -DROP FUNCTION yagp_status_order(text); -DROP EXTENSION yagp_hooks_collector; -RESET yagpcc.enable; -RESET yagpcc.report_nested_queries; -RESET yagpcc.enable_utility; -RESET yagpcc.ignored_users_list; +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/expected/gpsc_uds.out b/gpcontrib/gp_stats_collector/expected/gpsc_uds.out new file mode 100644 index 00000000000..e8bca79e669 --- /dev/null +++ b/gpcontrib/gp_stats_collector/expected/gpsc_uds.out @@ -0,0 +1,42 @@ +-- Test UDS socket +-- start_ignore +CREATE EXTENSION IF NOT EXISTS gp_stats_collector; +-- end_ignore +\set UDS_PATH '/tmp/gpsc_test.sock' +-- Configure extension to send via UDS +SET gpsc.uds_path TO :'UDS_PATH'; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.logging_mode TO 'UDS'; +-- Start receiver +SELECT gpsc.__test_uds_start_server(:'UDS_PATH'); + __test_uds_start_server +------------------------- +(0 rows) + +-- Send +SELECT 1; + ?column? +---------- + 1 +(1 row) + +-- Receive +SELECT gpsc.__test_uds_receive() > 0 as received; + received +---------- + t +(1 row) + +-- Stop receiver +SELECT gpsc.__test_uds_stop_server(); + __test_uds_stop_server +------------------------ +(0 rows) + +-- Cleanup +DROP EXTENSION gp_stats_collector; +RESET gpsc.uds_path; +RESET gpsc.ignored_users_list; +RESET gpsc.enable; +RESET gpsc.logging_mode; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_utf8_trim.out b/gpcontrib/gp_stats_collector/expected/gpsc_utf8_trim.out similarity index 65% rename from gpcontrib/yagp_hooks_collector/expected/yagp_utf8_trim.out rename to gpcontrib/gp_stats_collector/expected/gpsc_utf8_trim.out index 9de126dd882..db3949f3152 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_utf8_trim.out +++ b/gpcontrib/gp_stats_collector/expected/gpsc_utf8_trim.out @@ -1,24 +1,24 @@ -CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +CREATE EXTENSION IF NOT EXISTS gp_stats_collector; CREATE OR REPLACE FUNCTION get_marked_query(marker TEXT) RETURNS TEXT AS $$ SELECT query_text - FROM yagpcc.log + FROM gpsc.log WHERE query_text LIKE '%' || marker || '%' ORDER BY datetime DESC LIMIT 1 $$ LANGUAGE sql VOLATILE; -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable TO TRUE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; -- Test 1: 1 byte chars -SET yagpcc.max_text_size to 19; -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.max_text_size to 19; +SET gpsc.logging_mode to 'TBL'; SELECT /*test1*/ 'HelloWorld'; ?column? ------------ HelloWorld (1 row) -RESET yagpcc.logging_mode; +RESET gpsc.logging_mode; SELECT octet_length(get_marked_query('test1')) = 19 AS correct_length; correct_length ---------------- @@ -26,15 +26,15 @@ SELECT octet_length(get_marked_query('test1')) = 19 AS correct_length; (1 row) -- Test 2: 2 byte chars -SET yagpcc.max_text_size to 19; -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.max_text_size to 19; +SET gpsc.logging_mode to 'TBL'; SELECT /*test2*/ 'РУССКИЙЯЗЫК'; ?column? ------------- РУССКИЙЯЗЫК (1 row) -RESET yagpcc.logging_mode; +RESET gpsc.logging_mode; -- Character 'Р' has two bytes and cut in the middle => not included. SELECT octet_length(get_marked_query('test2')) = 18 AS correct_length; correct_length @@ -43,15 +43,15 @@ SELECT octet_length(get_marked_query('test2')) = 18 AS correct_length; (1 row) -- Test 3: 4 byte chars -SET yagpcc.max_text_size to 21; -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.max_text_size to 21; +SET gpsc.logging_mode to 'TBL'; SELECT /*test3*/ '😀'; ?column? ---------- 😀 (1 row) -RESET yagpcc.logging_mode; +RESET gpsc.logging_mode; -- Emoji has 4 bytes and cut before the last byte => not included. SELECT octet_length(get_marked_query('test3')) = 18 AS correct_length; correct_length @@ -61,8 +61,8 @@ SELECT octet_length(get_marked_query('test3')) = 18 AS correct_length; -- Cleanup DROP FUNCTION get_marked_query(TEXT); -RESET yagpcc.max_text_size; -RESET yagpcc.logging_mode; -RESET yagpcc.enable; -RESET yagpcc.ignored_users_list; -DROP EXTENSION yagp_hooks_collector; +RESET gpsc.max_text_size; +RESET gpsc.logging_mode; +RESET gpsc.enable; +RESET gpsc.ignored_users_list; +DROP EXTENSION gp_stats_collector; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out b/gpcontrib/gp_stats_collector/expected/gpsc_utility.out similarity index 57% rename from gpcontrib/yagp_hooks_collector/expected/yagp_utility.out rename to gpcontrib/gp_stats_collector/expected/gpsc_utility.out index 7df1d2816eb..e8e28614370 100644 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_utility.out +++ b/gpcontrib/gp_stats_collector/expected/gpsc_utility.out @@ -1,5 +1,5 @@ -CREATE EXTENSION yagp_hooks_collector; -CREATE OR REPLACE FUNCTION yagp_status_order(status text) +CREATE EXTENSION gp_stats_collector; +CREATE OR REPLACE FUNCTION gpsc_status_order(status text) RETURNS integer AS $$ BEGIN @@ -12,19 +12,19 @@ BEGIN END; END; $$ LANGUAGE plpgsql IMMUTABLE; -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable TO TRUE; -SET yagpcc.enable_utility TO TRUE; -SET yagpcc.report_nested_queries TO TRUE; -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.enable_utility TO TRUE; +SET gpsc.report_nested_queries TO TRUE; +SET gpsc.logging_mode to 'TBL'; CREATE TABLE test_table (a int, b text); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE INDEX test_idx ON test_table(a); ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; DROP TABLE test_table; -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+----------------------------------------------------+--------------------- -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_SUBMIT @@ -35,24 +35,24 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; | QUERY_STATUS_DONE -1 | DROP TABLE test_table; | QUERY_STATUS_SUBMIT -1 | DROP TABLE test_table; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE (10 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -- Partitioning -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; CREATE TABLE pt_test (a int, b int) DISTRIBUTED BY (a) PARTITION BY RANGE (a) (START (0) END (100) EVERY (50)); DROP TABLE pt_test; -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------+--------------------- -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT @@ -65,23 +65,23 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util | (START (0) END (100) EVERY (50)); | -1 | DROP TABLE pt_test; | QUERY_STATUS_SUBMIT -1 | DROP TABLE pt_test; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE (6 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -- Views and Functions -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; CREATE VIEW test_view AS SELECT 1 AS a; CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; DROP VIEW test_view; DROP FUNCTION test_func(int); -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+------------------------------------------------------------------------------------+--------------------- -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_SUBMIT @@ -92,17 +92,17 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | DROP VIEW test_view; | QUERY_STATUS_DONE -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_SUBMIT -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE (10 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -- Transaction Operations -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; BEGIN; SAVEPOINT sp1; ROLLBACK TO sp1; @@ -112,37 +112,37 @@ SAVEPOINT sp2; ABORT; BEGIN; ROLLBACK; -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | query_text | query_status --------+----------------------------+--------------------- - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE - -1 | COMMIT; | QUERY_STATUS_SUBMIT - -1 | COMMIT; | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT - -1 | ABORT; | QUERY_STATUS_SUBMIT - -1 | ABORT; | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | ROLLBACK; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | ROLLBACK; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE (18 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -- DML Operations -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; CREATE TABLE dml_test (a int, b text); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. @@ -150,33 +150,33 @@ INSERT INTO dml_test VALUES (1, 'test'); UPDATE dml_test SET b = 'updated' WHERE a = 1; DELETE FROM dml_test WHERE a = 1; DROP TABLE dml_test; -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+----------------------------------------+--------------------- -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_SUBMIT -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_DONE -1 | DROP TABLE dml_test; | QUERY_STATUS_SUBMIT -1 | DROP TABLE dml_test; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE (6 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -- COPY Operations -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; CREATE TABLE copy_test (a int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. COPY (SELECT 1) TO STDOUT; 1 DROP TABLE copy_test; -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+---------------------------------+--------------------- -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_SUBMIT @@ -185,23 +185,23 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_DONE -1 | DROP TABLE copy_test; | QUERY_STATUS_SUBMIT -1 | DROP TABLE copy_test; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE (8 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -- Prepared Statements and error during execute -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; PREPARE test_prep(int) AS SELECT $1/0 AS value; EXECUTE test_prep(0::int); ERROR: division by zero DEALLOCATE test_prep; -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; segid | query_text | query_status -------+-------------------------------------------------+--------------------- -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_SUBMIT @@ -210,39 +210,39 @@ SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND util -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_ERROR -1 | DEALLOCATE test_prep; | QUERY_STATUS_SUBMIT -1 | DEALLOCATE test_prep; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE (8 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -- GUC Settings -SET yagpcc.logging_mode to 'TBL'; -SET yagpcc.report_nested_queries TO FALSE; -RESET yagpcc.report_nested_queries; -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; - segid | query_text | query_status --------+--------------------------------------------+--------------------- - -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_SUBMIT - -1 | SET yagpcc.report_nested_queries TO FALSE; | QUERY_STATUS_DONE - -1 | RESET yagpcc.report_nested_queries; | QUERY_STATUS_SUBMIT - -1 | RESET yagpcc.report_nested_queries; | QUERY_STATUS_DONE - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET yagpcc.logging_mode; | QUERY_STATUS_DONE +SET gpsc.logging_mode to 'TBL'; +SET gpsc.report_nested_queries TO FALSE; +RESET gpsc.report_nested_queries; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+------------------------------------------+--------------------- + -1 | SET gpsc.report_nested_queries TO FALSE; | QUERY_STATUS_SUBMIT + -1 | SET gpsc.report_nested_queries TO FALSE; | QUERY_STATUS_DONE + -1 | RESET gpsc.report_nested_queries; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.report_nested_queries; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE (6 rows) -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT gpsc.truncate_log() IS NOT NULL AS t; t --- (0 rows) -DROP FUNCTION yagp_status_order(text); -DROP EXTENSION yagp_hooks_collector; -RESET yagpcc.enable; -RESET yagpcc.report_nested_queries; -RESET yagpcc.enable_utility; -RESET yagpcc.ignored_users_list; +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/gp_stats_collector--1.0--1.1.sql b/gpcontrib/gp_stats_collector/gp_stats_collector--1.0--1.1.sql new file mode 100644 index 00000000000..4e0157117e9 --- /dev/null +++ b/gpcontrib/gp_stats_collector/gp_stats_collector--1.0--1.1.sql @@ -0,0 +1,113 @@ +/* gp_stats_collector--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION gp_stats_collector UPDATE TO '1.1'" to load this file. \quit + +CREATE SCHEMA gpsc; + +-- Unlink existing objects from extension. +ALTER EXTENSION gp_stats_collector DROP VIEW gpsc_stat_messages; +ALTER EXTENSION gp_stats_collector DROP FUNCTION gpsc_stat_messages_reset(); +ALTER EXTENSION gp_stats_collector DROP FUNCTION __gpsc_stat_messages_f_on_segments(); +ALTER EXTENSION gp_stats_collector DROP FUNCTION __gpsc_stat_messages_f_on_master(); +ALTER EXTENSION gp_stats_collector DROP FUNCTION __gpsc_stat_messages_reset_f_on_segments(); +ALTER EXTENSION gp_stats_collector DROP FUNCTION __gpsc_stat_messages_reset_f_on_master(); + +-- Now drop the objects. +DROP VIEW gpsc_stat_messages; +DROP FUNCTION gpsc_stat_messages_reset(); +DROP FUNCTION __gpsc_stat_messages_f_on_segments(); +DROP FUNCTION __gpsc_stat_messages_f_on_master(); +DROP FUNCTION __gpsc_stat_messages_reset_f_on_segments(); +DROP FUNCTION __gpsc_stat_messages_reset_f_on_master(); + +-- Recreate functions and view in new schema. +CREATE FUNCTION gpsc.__stat_messages_reset_f_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_stat_messages_reset' +LANGUAGE C EXECUTE ON MASTER; + +CREATE FUNCTION gpsc.__stat_messages_reset_f_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_stat_messages_reset' +LANGUAGE C EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION gpsc.stat_messages_reset() +RETURNS SETOF void +AS +$$ + SELECT gpsc.__stat_messages_reset_f_on_master(); + SELECT gpsc.__stat_messages_reset_f_on_segments(); +$$ +LANGUAGE SQL EXECUTE ON MASTER; + +CREATE FUNCTION gpsc.__stat_messages_f_on_master() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gpsc_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION gpsc.__stat_messages_f_on_segments() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gpsc_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE VIEW gpsc.stat_messages AS + SELECT C.* + FROM gpsc.__stat_messages_f_on_master() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) + UNION ALL + SELECT C.* + FROM gpsc.__stat_messages_f_on_segments() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) +ORDER BY segid; + +-- Create new objects. +CREATE FUNCTION gpsc.__init_log_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION gpsc.__init_log_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +-- Creates log table inside gpsc schema. +SELECT gpsc.__init_log_on_master(); +SELECT gpsc.__init_log_on_segments(); + +CREATE VIEW gpsc.log AS + SELECT * FROM gpsc.__log -- master + UNION ALL + SELECT * FROM gp_dist_random('gpsc.__log') -- segments + ORDER BY tmid, ssid, ccnt; + +CREATE FUNCTION gpsc.__truncate_log_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION gpsc.__truncate_log_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION gpsc.truncate_log() +RETURNS SETOF void AS $$ +BEGIN + PERFORM gpsc.__truncate_log_on_master(); + PERFORM gpsc.__truncate_log_on_segments(); +END; +$$ LANGUAGE plpgsql VOLATILE; diff --git a/gpcontrib/gp_stats_collector/gp_stats_collector--1.0.sql b/gpcontrib/gp_stats_collector/gp_stats_collector--1.0.sql new file mode 100644 index 00000000000..ec902b02e02 --- /dev/null +++ b/gpcontrib/gp_stats_collector/gp_stats_collector--1.0.sql @@ -0,0 +1,55 @@ +/* gp_stats_collector--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION gp_stats_collector" to load this file. \quit + +CREATE FUNCTION __gpsc_stat_messages_reset_f_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_stat_messages_reset' +LANGUAGE C EXECUTE ON MASTER; + +CREATE FUNCTION __gpsc_stat_messages_reset_f_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_stat_messages_reset' +LANGUAGE C EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION gpsc_stat_messages_reset() +RETURNS SETOF void +AS +$$ + SELECT __gpsc_stat_messages_reset_f_on_master(); + SELECT __gpsc_stat_messages_reset_f_on_segments(); +$$ +LANGUAGE SQL EXECUTE ON MASTER; + +CREATE FUNCTION __gpsc_stat_messages_f_on_master() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gpsc_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION __gpsc_stat_messages_f_on_segments() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gpsc_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE VIEW gpsc_stat_messages AS + SELECT C.* + FROM __gpsc_stat_messages_f_on_master() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) + UNION ALL + SELECT C.* + FROM __gpsc_stat_messages_f_on_segments() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) +ORDER BY segid; diff --git a/gpcontrib/gp_stats_collector/gp_stats_collector--1.1.sql b/gpcontrib/gp_stats_collector/gp_stats_collector--1.1.sql new file mode 100644 index 00000000000..6e24207e913 --- /dev/null +++ b/gpcontrib/gp_stats_collector/gp_stats_collector--1.1.sql @@ -0,0 +1,110 @@ +/* gp_stats_collector--1.1.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION gp_stats_collector" to load this file. \quit + +CREATE SCHEMA gpsc; + +CREATE FUNCTION gpsc.__stat_messages_reset_f_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_stat_messages_reset' +LANGUAGE C EXECUTE ON MASTER; + +CREATE FUNCTION gpsc.__stat_messages_reset_f_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_stat_messages_reset' +LANGUAGE C EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION gpsc.stat_messages_reset() +RETURNS SETOF void +AS +$$ + SELECT gpsc.__stat_messages_reset_f_on_master(); + SELECT gpsc.__stat_messages_reset_f_on_segments(); +$$ +LANGUAGE SQL EXECUTE ON MASTER; + +CREATE FUNCTION gpsc.__stat_messages_f_on_master() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gpsc_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION gpsc.__stat_messages_f_on_segments() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gpsc_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE VIEW gpsc.stat_messages AS + SELECT C.* + FROM gpsc.__stat_messages_f_on_master() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) + UNION ALL + SELECT C.* + FROM gpsc.__stat_messages_f_on_segments() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) +ORDER BY segid; + +CREATE FUNCTION gpsc.__init_log_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION gpsc.__init_log_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +-- Creates log table inside gpsc schema. +SELECT gpsc.__init_log_on_master(); +SELECT gpsc.__init_log_on_segments(); + +CREATE VIEW gpsc.log AS + SELECT * FROM gpsc.__log -- master + UNION ALL + SELECT * FROM gp_dist_random('gpsc.__log') -- segments +ORDER BY tmid, ssid, ccnt; + +CREATE FUNCTION gpsc.__truncate_log_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; + +CREATE FUNCTION gpsc.__truncate_log_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION gpsc.truncate_log() +RETURNS SETOF void AS $$ +BEGIN + PERFORM gpsc.__truncate_log_on_master(); + PERFORM gpsc.__truncate_log_on_segments(); +END; +$$ LANGUAGE plpgsql VOLATILE; + +CREATE FUNCTION gpsc.__test_uds_start_server(path text) +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_test_uds_start_server' +LANGUAGE C STRICT EXECUTE ON MASTER; + +CREATE FUNCTION gpsc.__test_uds_receive(timeout_ms int DEFAULT 2000) +RETURNS SETOF bigint +AS 'MODULE_PATHNAME', 'gpsc_test_uds_receive' +LANGUAGE C STRICT EXECUTE ON MASTER; + +CREATE FUNCTION gpsc.__test_uds_stop_server() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_test_uds_stop_server' +LANGUAGE C EXECUTE ON MASTER; diff --git a/gpcontrib/gp_stats_collector/gp_stats_collector.control b/gpcontrib/gp_stats_collector/gp_stats_collector.control new file mode 100644 index 00000000000..4aea2bd49b8 --- /dev/null +++ b/gpcontrib/gp_stats_collector/gp_stats_collector.control @@ -0,0 +1,5 @@ +# gp_stats_collector extension +comment = 'Intercept query and plan execution hooks and report them to Cloudberry monitor agents' +default_version = '1.1' +module_pathname = '$libdir/gp_stats_collector' +superuser = true diff --git a/gpcontrib/yagp_hooks_collector/metric.md b/gpcontrib/gp_stats_collector/metric.md similarity index 94% rename from gpcontrib/yagp_hooks_collector/metric.md rename to gpcontrib/gp_stats_collector/metric.md index 5df56877edb..6f168d8cd98 100644 --- a/gpcontrib/yagp_hooks_collector/metric.md +++ b/gpcontrib/gp_stats_collector/metric.md @@ -1,4 +1,23 @@ -## YAGP Hooks Collector Metrics + + +## GP Stats Collector Metrics ### States A Postgres process goes through 4 executor functions to execute a query: @@ -7,7 +26,7 @@ A Postgres process goes through 4 executor functions to execute a query: 3) `ExecutorFinish()` - cleanup. 4) `ExecutorEnd()` - cleanup. -yagp-hooks-collector sends messages with 4 states, from _Dispatcher_ and/or _Execute_ processes: `submit`, `start`, `end`, `done`, in this order: +gp-stats-collector sends messages with 4 states, from _Dispatcher_ and/or _Execute_ processes: `submit`, `start`, `end`, `done`, in this order: ``` submit -> ExecutorStart() -> start -> ExecutorRun() -> ExecutorFinish() -> end -> ExecutorEnd() -> done ``` @@ -67,8 +86,8 @@ submit -> ExecutorStart() -> start -> ExecutorRun() -> ExecutorFinish() -> end - | `temp_blks_written` | uint64 | E, D | ABS | + | Node | + | + | blocks | Temp file blocks written | | `blk_read_time` | double | E, D | ABS | + | Node | + | + | seconds | Time reading data blocks | | `blk_write_time` | double | E, D | ABS | + | Node | + | + | seconds | Time writing data blocks | -| `inherited_calls` | uint64 | E, D | ABS | - | Node | + | + | count | Nested query count (YAGPCC-specific) | -| `inherited_time` | double | E, D | ABS | - | Node | + | + | seconds | Nested query time (YAGPCC-specific) | +| `inherited_calls` | uint64 | E, D | ABS | - | Node | + | + | count | Nested query count (GPSC-specific) | +| `inherited_time` | double | E, D | ABS | - | Node | + | + | seconds | Nested query time (GPSC-specific) | | **NetworkStat (sent)** | | | | | | | | | | | `sent.total_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes sent, including headers | | `sent.tuple_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes of pure tuple-data sent | diff --git a/gpcontrib/yagp_hooks_collector/protos/yagpcc_metrics.proto b/gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto similarity index 97% rename from gpcontrib/yagp_hooks_collector/protos/yagpcc_metrics.proto rename to gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto index 91ac0c4941a..a9e26471839 100644 --- a/gpcontrib/yagp_hooks_collector/protos/yagpcc_metrics.proto +++ b/gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto @@ -1,8 +1,6 @@ syntax = "proto3"; -package yagpcc; -option java_outer_classname = "SegmentYAGPCCM"; -option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/common;greenplum"; +package gpsc; enum QueryStatus { QUERY_STATUS_UNSPECIFIED = 0; diff --git a/gpcontrib/yagp_hooks_collector/protos/yagpcc_plan.proto b/gpcontrib/gp_stats_collector/protos/gpsc_plan.proto similarity index 98% rename from gpcontrib/yagp_hooks_collector/protos/yagpcc_plan.proto rename to gpcontrib/gp_stats_collector/protos/gpsc_plan.proto index 962fab4bbdd..5a7269edd20 100644 --- a/gpcontrib/yagp_hooks_collector/protos/yagpcc_plan.proto +++ b/gpcontrib/gp_stats_collector/protos/gpsc_plan.proto @@ -1,8 +1,6 @@ syntax = "proto3"; -package yagpcc; -option java_outer_classname = "SegmentYAGPCCP"; -option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/common;greenplum"; +package gpsc; message MetricPlan { GpdbNodeType type = 1; diff --git a/gpcontrib/yagp_hooks_collector/protos/yagpcc_set_service.proto b/gpcontrib/gp_stats_collector/protos/gpsc_set_service.proto similarity index 86% rename from gpcontrib/yagp_hooks_collector/protos/yagpcc_set_service.proto rename to gpcontrib/gp_stats_collector/protos/gpsc_set_service.proto index 0b9e34df49d..4cd795424ab 100644 --- a/gpcontrib/yagp_hooks_collector/protos/yagpcc_set_service.proto +++ b/gpcontrib/gp_stats_collector/protos/gpsc_set_service.proto @@ -2,12 +2,10 @@ syntax = "proto3"; import "google/protobuf/timestamp.proto"; -import "protos/yagpcc_metrics.proto"; -import "protos/yagpcc_plan.proto"; +import "protos/gpsc_metrics.proto"; +import "protos/gpsc_plan.proto"; -package yagpcc; -option java_outer_classname = "SegmentYAGPCCAS"; -option go_package = "a.yandex-team.ru/cloud/mdb/yagpcc/api/proto/agent_segment;greenplum"; +package gpsc; service SetQueryInfo { rpc SetMetricPlanNode (SetPlanNodeReq) returns (MetricResponse) {} diff --git a/gpcontrib/gp_stats_collector/results/gpsc_cursors.out b/gpcontrib/gp_stats_collector/results/gpsc_cursors.out new file mode 100644 index 00000000000..282d9ac49e1 --- /dev/null +++ b/gpcontrib/gp_stats_collector/results/gpsc_cursors.out @@ -0,0 +1,163 @@ +CREATE EXTENSION gp_stats_collector; +CREATE FUNCTION gpsc_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.enable_utility TO TRUE; +SET gpsc.report_nested_queries TO TRUE; +-- DECLARE +SET gpsc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_0 CURSOR FOR SELECT 0; +CLOSE cursor_stats_0; +COMMIT; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_0; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_0; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(10 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- DECLARE WITH HOLD +SET gpsc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; +CLOSE cursor_stats_1; +DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; +CLOSE cursor_stats_2; +COMMIT; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_1; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_1; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_2; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_2; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(14 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- ROLLBACK +SET gpsc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_3 CURSOR FOR SELECT 1; +CLOSE cursor_stats_3; +DECLARE cursor_stats_4 CURSOR FOR SELECT 1; +ROLLBACK; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_3; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_3; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_DONE + -1 | ROLLBACK; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(12 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- FETCH +SET gpsc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; +DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; +FETCH 1 IN cursor_stats_5; + ?column? +---------- + 2 +(1 row) + +FETCH 1 IN cursor_stats_6; + ?column? +---------- + 3 +(1 row) + +CLOSE cursor_stats_5; +CLOSE cursor_stats_6; +COMMIT; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; | QUERY_STATUS_DONE + -1 | FETCH 1 IN cursor_stats_5; | QUERY_STATUS_SUBMIT + -1 | FETCH 1 IN cursor_stats_5; | QUERY_STATUS_DONE + -1 | FETCH 1 IN cursor_stats_6; | QUERY_STATUS_SUBMIT + -1 | FETCH 1 IN cursor_stats_6; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_5; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_5; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_6; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_6; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(18 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/results/gpsc_dist.out b/gpcontrib/gp_stats_collector/results/gpsc_dist.out new file mode 100644 index 00000000000..92e8678767b --- /dev/null +++ b/gpcontrib/gp_stats_collector/results/gpsc_dist.out @@ -0,0 +1,175 @@ +CREATE EXTENSION gp_stats_collector; +CREATE OR REPLACE FUNCTION gpsc_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.report_nested_queries TO TRUE; +SET gpsc.enable_utility TO FALSE; +-- Hash distributed table +CREATE TABLE test_hash_dist (id int) DISTRIBUTED BY (id); +INSERT INTO test_hash_dist SELECT 1; +SET gpsc.logging_mode to 'TBL'; +SET optimizer_enable_direct_dispatch TO TRUE; +-- Direct dispatch is used here, only one segment is scanned. +select * from test_hash_dist where id = 1; + id +---- + 1 +(1 row) + +RESET optimizer_enable_direct_dispatch; +RESET gpsc.logging_mode; +-- Should see 8 rows. +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------------------------+--------------------- + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_SUBMIT + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_START + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_END + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_DONE + 1 | | QUERY_STATUS_SUBMIT + 1 | | QUERY_STATUS_START + 1 | | QUERY_STATUS_END + 1 | | QUERY_STATUS_DONE +(8 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +SET gpsc.logging_mode to 'TBL'; +-- Scan all segments. +select * from test_hash_dist; + id +---- + 1 +(1 row) + +DROP TABLE test_hash_dist; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------+--------------------- + -1 | select * from test_hash_dist; | QUERY_STATUS_SUBMIT + -1 | select * from test_hash_dist; | QUERY_STATUS_START + -1 | select * from test_hash_dist; | QUERY_STATUS_END + -1 | select * from test_hash_dist; | QUERY_STATUS_DONE + 1 | | QUERY_STATUS_SUBMIT + 1 | | QUERY_STATUS_START + 1 | | QUERY_STATUS_END + 1 | | QUERY_STATUS_DONE + 2 | | QUERY_STATUS_SUBMIT + 2 | | QUERY_STATUS_START + 2 | | QUERY_STATUS_END + 2 | | QUERY_STATUS_DONE + | | QUERY_STATUS_SUBMIT + | | QUERY_STATUS_START + | | QUERY_STATUS_END + | | QUERY_STATUS_DONE +(16 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Replicated table +CREATE FUNCTION force_segments() RETURNS SETOF text AS $$ +BEGIN + RETURN NEXT 'seg'; +END; +$$ LANGUAGE plpgsql VOLATILE EXECUTE ON ALL SEGMENTS; +CREATE TABLE test_replicated (id int) DISTRIBUTED REPLICATED; +INSERT INTO test_replicated SELECT 1; +SET gpsc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_replicated, force_segments(); + count +------- + 3 +(1 row) + +DROP TABLE test_replicated; +DROP FUNCTION force_segments(); +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------------------+--------------------- + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_SUBMIT + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_START + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_END + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_DONE + 1 | | QUERY_STATUS_SUBMIT + 1 | | QUERY_STATUS_START + 1 | | QUERY_STATUS_END + 1 | | QUERY_STATUS_DONE + 2 | | QUERY_STATUS_SUBMIT + 2 | | QUERY_STATUS_START + 2 | | QUERY_STATUS_END + 2 | | QUERY_STATUS_DONE + | | QUERY_STATUS_SUBMIT + | | QUERY_STATUS_START + | | QUERY_STATUS_END + | | QUERY_STATUS_DONE +(16 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Partially distributed table (2 numsegments) +SET allow_system_table_mods = ON; +CREATE TABLE test_partial_dist (id int, data text) DISTRIBUTED BY (id); +UPDATE gp_distribution_policy SET numsegments = 2 WHERE localoid = 'test_partial_dist'::regclass; +INSERT INTO test_partial_dist SELECT * FROM generate_series(1, 100); +SET gpsc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_partial_dist; + count +------- + 100 +(1 row) + +RESET gpsc.logging_mode; +DROP TABLE test_partial_dist; +RESET allow_system_table_mods; +-- Should see 12 rows. +SELECT query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + query_text | query_status +-----------------------------------------+--------------------- + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_SUBMIT + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_START + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_END + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_DONE + | QUERY_STATUS_SUBMIT + | QUERY_STATUS_START + | QUERY_STATUS_END + | QUERY_STATUS_DONE + | QUERY_STATUS_SUBMIT + | QUERY_STATUS_START + | QUERY_STATUS_END + | QUERY_STATUS_DONE +(12 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/results/gpsc_guc_cache.out b/gpcontrib/gp_stats_collector/results/gpsc_guc_cache.out new file mode 100644 index 00000000000..19c4774575d --- /dev/null +++ b/gpcontrib/gp_stats_collector/results/gpsc_guc_cache.out @@ -0,0 +1,61 @@ +-- +-- Test GUC caching for query lifecycle consistency. +-- +-- The extension logs SUBMIT and DONE events for each query. +-- GUC values that control logging (enable_utility, ignored_users_list, ...) +-- must be cached at SUBMIT time to ensure DONE uses the same filtering +-- criteria. Otherwise, a SET command that modifies these GUCs would +-- have its DONE event rejected, creating orphaned SUBMIT entries. +-- This is due to query being actually executed between SUBMIT and DONE. +-- start_ignore +CREATE EXTENSION IF NOT EXISTS gp_stats_collector; +SELECT gpsc.truncate_log(); + truncate_log +-------------- +(0 rows) + +-- end_ignore +CREATE OR REPLACE FUNCTION print_last_query(query text) +RETURNS TABLE(query_status text) AS $$ + SELECT query_status + FROM gpsc.log + WHERE segid = -1 AND query_text = query + ORDER BY ccnt DESC +$$ LANGUAGE sql; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.enable_utility TO TRUE; +SET gpsc.logging_mode TO 'TBL'; +-- SET below disables utility logging and DONE must still be logged. +SET gpsc.enable_utility TO FALSE; +SELECT * FROM print_last_query('SET gpsc.enable_utility TO FALSE;'); + query_status +--------------------- + QUERY_STATUS_SUBMIT + QUERY_STATUS_DONE +(2 rows) + +-- SELECT below adds current user to ignore list and DONE must still be logged. +-- start_ignore +SELECT set_config('gpsc.ignored_users_list', current_user, false); + set_config +------------ + gpadmin +(1 row) + +-- end_ignore +SELECT * FROM print_last_query('SELECT set_config(''gpsc.ignored_users_list'', current_user, false);'); + query_status +--------------------- + QUERY_STATUS_SUBMIT + QUERY_STATUS_START + QUERY_STATUS_END + QUERY_STATUS_DONE +(4 rows) + +DROP FUNCTION print_last_query(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; +RESET gpsc.logging_mode; diff --git a/gpcontrib/gp_stats_collector/results/gpsc_locale.out b/gpcontrib/gp_stats_collector/results/gpsc_locale.out new file mode 100644 index 00000000000..a01fe0648b9 --- /dev/null +++ b/gpcontrib/gp_stats_collector/results/gpsc_locale.out @@ -0,0 +1,23 @@ +-- The extension generates normalized query text and plan using jumbling functions. +-- Those functions may fail when translating to wide character if the current locale +-- cannot handle the character set. This test checks that even when those functions +-- fail, the plan is still generated and executed. This test is partially taken from +-- gp_locale. +-- start_ignore +DROP DATABASE IF EXISTS gpsc_test_locale; +-- end_ignore +CREATE DATABASE gpsc_test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; +\c gpsc_test_locale +CREATE EXTENSION gp_stats_collector; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable_utility TO TRUE; +SET gpsc.enable TO TRUE; +CREATE TABLE gpsc_hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); +INSERT INTO gpsc_hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); +-- Should not see error here +UPDATE gpsc_hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; +RESET gpsc.enable; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; +DROP TABLE gpsc_hi_안녕세계; +DROP EXTENSION gp_stats_collector; diff --git a/gpcontrib/gp_stats_collector/results/gpsc_select.out b/gpcontrib/gp_stats_collector/results/gpsc_select.out new file mode 100644 index 00000000000..3008c8f6d55 --- /dev/null +++ b/gpcontrib/gp_stats_collector/results/gpsc_select.out @@ -0,0 +1,136 @@ +CREATE EXTENSION gp_stats_collector; +CREATE OR REPLACE FUNCTION gpsc_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.report_nested_queries TO TRUE; +SET gpsc.enable_utility TO FALSE; +-- Basic SELECT tests +SET gpsc.logging_mode to 'TBL'; +SELECT 1; + ?column? +---------- + 1 +(1 row) + +SELECT COUNT(*) FROM generate_series(1,10); + count +------- + 10 +(1 row) + +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------+--------------------- + -1 | SELECT 1; | QUERY_STATUS_SUBMIT + -1 | SELECT 1; | QUERY_STATUS_START + -1 | SELECT 1; | QUERY_STATUS_END + -1 | SELECT 1; | QUERY_STATUS_DONE + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_SUBMIT + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_START + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_END + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_DONE +(8 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Transaction test +SET gpsc.logging_mode to 'TBL'; +BEGIN; +SELECT 1; + ?column? +---------- + 1 +(1 row) + +COMMIT; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+------------+--------------------- + -1 | SELECT 1; | QUERY_STATUS_SUBMIT + -1 | SELECT 1; | QUERY_STATUS_START + -1 | SELECT 1; | QUERY_STATUS_END + -1 | SELECT 1; | QUERY_STATUS_DONE +(4 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- CTE test +SET gpsc.logging_mode to 'TBL'; +WITH t AS (VALUES (1), (2)) +SELECT * FROM t; + column1 +--------- + 1 + 2 +(2 rows) + +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+-----------------------------+--------------------- + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_SUBMIT + | SELECT * FROM t; | + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_START + | SELECT * FROM t; | + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_END + | SELECT * FROM t; | + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_DONE + | SELECT * FROM t; | +(4 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Prepared statement test +SET gpsc.logging_mode to 'TBL'; +PREPARE test_stmt AS SELECT 1; +EXECUTE test_stmt; + ?column? +---------- + 1 +(1 row) + +DEALLOCATE test_stmt; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------------+--------------------- + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_SUBMIT + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_START + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_END + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_DONE +(4 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/results/gpsc_uds.out b/gpcontrib/gp_stats_collector/results/gpsc_uds.out new file mode 100644 index 00000000000..e8bca79e669 --- /dev/null +++ b/gpcontrib/gp_stats_collector/results/gpsc_uds.out @@ -0,0 +1,42 @@ +-- Test UDS socket +-- start_ignore +CREATE EXTENSION IF NOT EXISTS gp_stats_collector; +-- end_ignore +\set UDS_PATH '/tmp/gpsc_test.sock' +-- Configure extension to send via UDS +SET gpsc.uds_path TO :'UDS_PATH'; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.logging_mode TO 'UDS'; +-- Start receiver +SELECT gpsc.__test_uds_start_server(:'UDS_PATH'); + __test_uds_start_server +------------------------- +(0 rows) + +-- Send +SELECT 1; + ?column? +---------- + 1 +(1 row) + +-- Receive +SELECT gpsc.__test_uds_receive() > 0 as received; + received +---------- + t +(1 row) + +-- Stop receiver +SELECT gpsc.__test_uds_stop_server(); + __test_uds_stop_server +------------------------ +(0 rows) + +-- Cleanup +DROP EXTENSION gp_stats_collector; +RESET gpsc.uds_path; +RESET gpsc.ignored_users_list; +RESET gpsc.enable; +RESET gpsc.logging_mode; diff --git a/gpcontrib/gp_stats_collector/results/gpsc_utf8_trim.out b/gpcontrib/gp_stats_collector/results/gpsc_utf8_trim.out new file mode 100644 index 00000000000..db3949f3152 --- /dev/null +++ b/gpcontrib/gp_stats_collector/results/gpsc_utf8_trim.out @@ -0,0 +1,68 @@ +CREATE EXTENSION IF NOT EXISTS gp_stats_collector; +CREATE OR REPLACE FUNCTION get_marked_query(marker TEXT) +RETURNS TEXT AS $$ + SELECT query_text + FROM gpsc.log + WHERE query_text LIKE '%' || marker || '%' + ORDER BY datetime DESC + LIMIT 1 +$$ LANGUAGE sql VOLATILE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +-- Test 1: 1 byte chars +SET gpsc.max_text_size to 19; +SET gpsc.logging_mode to 'TBL'; +SELECT /*test1*/ 'HelloWorld'; + ?column? +------------ + HelloWorld +(1 row) + +RESET gpsc.logging_mode; +SELECT octet_length(get_marked_query('test1')) = 19 AS correct_length; + correct_length +---------------- + t +(1 row) + +-- Test 2: 2 byte chars +SET gpsc.max_text_size to 19; +SET gpsc.logging_mode to 'TBL'; +SELECT /*test2*/ 'РУССКИЙЯЗЫК'; + ?column? +------------- + РУССКИЙЯЗЫК +(1 row) + +RESET gpsc.logging_mode; +-- Character 'Р' has two bytes and cut in the middle => not included. +SELECT octet_length(get_marked_query('test2')) = 18 AS correct_length; + correct_length +---------------- + t +(1 row) + +-- Test 3: 4 byte chars +SET gpsc.max_text_size to 21; +SET gpsc.logging_mode to 'TBL'; +SELECT /*test3*/ '😀'; + ?column? +---------- + 😀 +(1 row) + +RESET gpsc.logging_mode; +-- Emoji has 4 bytes and cut before the last byte => not included. +SELECT octet_length(get_marked_query('test3')) = 18 AS correct_length; + correct_length +---------------- + t +(1 row) + +-- Cleanup +DROP FUNCTION get_marked_query(TEXT); +RESET gpsc.max_text_size; +RESET gpsc.logging_mode; +RESET gpsc.enable; +RESET gpsc.ignored_users_list; +DROP EXTENSION gp_stats_collector; diff --git a/gpcontrib/gp_stats_collector/results/gpsc_utility.out b/gpcontrib/gp_stats_collector/results/gpsc_utility.out new file mode 100644 index 00000000000..e8e28614370 --- /dev/null +++ b/gpcontrib/gp_stats_collector/results/gpsc_utility.out @@ -0,0 +1,248 @@ +CREATE EXTENSION gp_stats_collector; +CREATE OR REPLACE FUNCTION gpsc_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.enable_utility TO TRUE; +SET gpsc.report_nested_queries TO TRUE; +SET gpsc.logging_mode to 'TBL'; +CREATE TABLE test_table (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE INDEX test_idx ON test_table(a); +ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; +DROP TABLE test_table; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+----------------------------------------------------+--------------------- + -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_DONE + -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_SUBMIT + -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_DONE + -1 | ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; | QUERY_STATUS_SUBMIT + -1 | ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; | QUERY_STATUS_DONE + -1 | DROP TABLE test_table; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE test_table; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(10 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Partitioning +SET gpsc.logging_mode to 'TBL'; +CREATE TABLE pt_test (a int, b int) +DISTRIBUTED BY (a) +PARTITION BY RANGE (a) +(START (0) END (100) EVERY (50)); +DROP TABLE pt_test; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------+--------------------- + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | DROP TABLE pt_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE pt_test; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(6 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Views and Functions +SET gpsc.logging_mode to 'TBL'; +CREATE VIEW test_view AS SELECT 1 AS a; +CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; +DROP VIEW test_view; +DROP FUNCTION test_func(int); +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+------------------------------------------------------------------------------------+--------------------- + -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_SUBMIT + -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_DONE + -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_SUBMIT + -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_DONE + -1 | DROP VIEW test_view; | QUERY_STATUS_SUBMIT + -1 | DROP VIEW test_view; | QUERY_STATUS_DONE + -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_SUBMIT + -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(10 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Transaction Operations +SET gpsc.logging_mode to 'TBL'; +BEGIN; +SAVEPOINT sp1; +ROLLBACK TO sp1; +COMMIT; +BEGIN; +SAVEPOINT sp2; +ABORT; +BEGIN; +ROLLBACK; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | ROLLBACK; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(18 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- DML Operations +SET gpsc.logging_mode to 'TBL'; +CREATE TABLE dml_test (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO dml_test VALUES (1, 'test'); +UPDATE dml_test SET b = 'updated' WHERE a = 1; +DELETE FROM dml_test WHERE a = 1; +DROP TABLE dml_test; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+----------------------------------------+--------------------- + -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_DONE + -1 | DROP TABLE dml_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE dml_test; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(6 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- COPY Operations +SET gpsc.logging_mode to 'TBL'; +CREATE TABLE copy_test (a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +COPY (SELECT 1) TO STDOUT; +1 +DROP TABLE copy_test; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------+--------------------- + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_DONE + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_SUBMIT + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_DONE + -1 | DROP TABLE copy_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE copy_test; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(8 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Prepared Statements and error during execute +SET gpsc.logging_mode to 'TBL'; +PREPARE test_prep(int) AS SELECT $1/0 AS value; +EXECUTE test_prep(0::int); +ERROR: division by zero +DEALLOCATE test_prep; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------------------+--------------------- + -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_SUBMIT + -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_DONE + -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_SUBMIT + -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_ERROR + -1 | DEALLOCATE test_prep; | QUERY_STATUS_SUBMIT + -1 | DEALLOCATE test_prep; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(8 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- GUC Settings +SET gpsc.logging_mode to 'TBL'; +SET gpsc.report_nested_queries TO FALSE; +RESET gpsc.report_nested_queries; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+------------------------------------------+--------------------- + -1 | SET gpsc.report_nested_queries TO FALSE; | QUERY_STATUS_SUBMIT + -1 | SET gpsc.report_nested_queries TO FALSE; | QUERY_STATUS_DONE + -1 | RESET gpsc.report_nested_queries; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.report_nested_queries; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(6 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/sql/gpsc_cursors.sql b/gpcontrib/gp_stats_collector/sql/gpsc_cursors.sql new file mode 100644 index 00000000000..8361f7b678d --- /dev/null +++ b/gpcontrib/gp_stats_collector/sql/gpsc_cursors.sql @@ -0,0 +1,85 @@ +CREATE EXTENSION gp_stats_collector; + +CREATE FUNCTION gpsc_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.enable_utility TO TRUE; +SET gpsc.report_nested_queries TO TRUE; + +-- DECLARE +SET gpsc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_0 CURSOR FOR SELECT 0; +CLOSE cursor_stats_0; +COMMIT; + +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- DECLARE WITH HOLD +SET gpsc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; +CLOSE cursor_stats_1; +DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; +CLOSE cursor_stats_2; +COMMIT; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- ROLLBACK +SET gpsc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_3 CURSOR FOR SELECT 1; +CLOSE cursor_stats_3; +DECLARE cursor_stats_4 CURSOR FOR SELECT 1; +ROLLBACK; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- FETCH +SET gpsc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; +DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; +FETCH 1 IN cursor_stats_5; +FETCH 1 IN cursor_stats_6; +CLOSE cursor_stats_5; +CLOSE cursor_stats_6; +COMMIT; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_dist.sql b/gpcontrib/gp_stats_collector/sql/gpsc_dist.sql similarity index 53% rename from gpcontrib/yagp_hooks_collector/sql/yagp_dist.sql rename to gpcontrib/gp_stats_collector/sql/gpsc_dist.sql index d5519d0cd96..46b531a70ca 100644 --- a/gpcontrib/yagp_hooks_collector/sql/yagp_dist.sql +++ b/gpcontrib/gp_stats_collector/sql/gpsc_dist.sql @@ -1,6 +1,6 @@ -CREATE EXTENSION yagp_hooks_collector; +CREATE EXTENSION gp_stats_collector; -CREATE OR REPLACE FUNCTION yagp_status_order(status text) +CREATE OR REPLACE FUNCTION gpsc_status_order(status text) RETURNS integer AS $$ BEGIN @@ -14,36 +14,36 @@ BEGIN END; $$ LANGUAGE plpgsql IMMUTABLE; -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable TO TRUE; -SET yagpcc.report_nested_queries TO TRUE; -SET yagpcc.enable_utility TO FALSE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.report_nested_queries TO TRUE; +SET gpsc.enable_utility TO FALSE; -- Hash distributed table CREATE TABLE test_hash_dist (id int) DISTRIBUTED BY (id); INSERT INTO test_hash_dist SELECT 1; -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; SET optimizer_enable_direct_dispatch TO TRUE; -- Direct dispatch is used here, only one segment is scanned. select * from test_hash_dist where id = 1; RESET optimizer_enable_direct_dispatch; -RESET yagpcc.logging_mode; +RESET gpsc.logging_mode; -- Should see 8 rows. -SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; -- Scan all segments. select * from test_hash_dist; DROP TABLE test_hash_dist; -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; -- Replicated table CREATE FUNCTION force_segments() RETURNS SETOF text AS $$ @@ -55,14 +55,14 @@ $$ LANGUAGE plpgsql VOLATILE EXECUTE ON ALL SEGMENTS; CREATE TABLE test_replicated (id int) DISTRIBUTED REPLICATED; INSERT INTO test_replicated SELECT 1; -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; SELECT COUNT(*) FROM test_replicated, force_segments(); DROP TABLE test_replicated; DROP FUNCTION force_segments(); -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; -- Partially distributed table (2 numsegments) SET allow_system_table_mods = ON; @@ -70,19 +70,19 @@ CREATE TABLE test_partial_dist (id int, data text) DISTRIBUTED BY (id); UPDATE gp_distribution_policy SET numsegments = 2 WHERE localoid = 'test_partial_dist'::regclass; INSERT INTO test_partial_dist SELECT * FROM generate_series(1, 100); -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.logging_mode to 'TBL'; SELECT COUNT(*) FROM test_partial_dist; -RESET yagpcc.logging_mode; +RESET gpsc.logging_mode; DROP TABLE test_partial_dist; RESET allow_system_table_mods; -- Should see 12 rows. -SELECT query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - -DROP FUNCTION yagp_status_order(text); -DROP EXTENSION yagp_hooks_collector; -RESET yagpcc.enable; -RESET yagpcc.report_nested_queries; -RESET yagpcc.enable_utility; -RESET yagpcc.ignored_users_list; +SELECT query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_guc_cache.sql b/gpcontrib/gp_stats_collector/sql/gpsc_guc_cache.sql similarity index 58% rename from gpcontrib/yagp_hooks_collector/sql/yagp_guc_cache.sql rename to gpcontrib/gp_stats_collector/sql/gpsc_guc_cache.sql index 9e6de69d61e..6aff2ad5cf6 100644 --- a/gpcontrib/yagp_hooks_collector/sql/yagp_guc_cache.sql +++ b/gpcontrib/gp_stats_collector/sql/gpsc_guc_cache.sql @@ -8,36 +8,36 @@ -- have its DONE event rejected, creating orphaned SUBMIT entries. -- This is due to query being actually executed between SUBMIT and DONE. -- start_ignore -CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; -SELECT yagpcc.truncate_log(); +CREATE EXTENSION IF NOT EXISTS gp_stats_collector; +SELECT gpsc.truncate_log(); -- end_ignore CREATE OR REPLACE FUNCTION print_last_query(query text) RETURNS TABLE(query_status text) AS $$ SELECT query_status - FROM yagpcc.log + FROM gpsc.log WHERE segid = -1 AND query_text = query ORDER BY ccnt DESC $$ LANGUAGE sql; -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable TO TRUE; -SET yagpcc.enable_utility TO TRUE; -SET yagpcc.logging_mode TO 'TBL'; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.enable_utility TO TRUE; +SET gpsc.logging_mode TO 'TBL'; -- SET below disables utility logging and DONE must still be logged. -SET yagpcc.enable_utility TO FALSE; -SELECT * FROM print_last_query('SET yagpcc.enable_utility TO FALSE;'); +SET gpsc.enable_utility TO FALSE; +SELECT * FROM print_last_query('SET gpsc.enable_utility TO FALSE;'); -- SELECT below adds current user to ignore list and DONE must still be logged. -- start_ignore -SELECT set_config('yagpcc.ignored_users_list', current_user, false); +SELECT set_config('gpsc.ignored_users_list', current_user, false); -- end_ignore -SELECT * FROM print_last_query('SELECT set_config(''yagpcc.ignored_users_list'', current_user, false);'); +SELECT * FROM print_last_query('SELECT set_config(''gpsc.ignored_users_list'', current_user, false);'); DROP FUNCTION print_last_query(text); -DROP EXTENSION yagp_hooks_collector; -RESET yagpcc.enable; -RESET yagpcc.enable_utility; -RESET yagpcc.ignored_users_list; -RESET yagpcc.logging_mode; +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; +RESET gpsc.logging_mode; diff --git a/gpcontrib/gp_stats_collector/sql/gpsc_locale.sql b/gpcontrib/gp_stats_collector/sql/gpsc_locale.sql new file mode 100644 index 00000000000..6321c93f5ab --- /dev/null +++ b/gpcontrib/gp_stats_collector/sql/gpsc_locale.sql @@ -0,0 +1,29 @@ +-- The extension generates normalized query text and plan using jumbling functions. +-- Those functions may fail when translating to wide character if the current locale +-- cannot handle the character set. This test checks that even when those functions +-- fail, the plan is still generated and executed. This test is partially taken from +-- gp_locale. + +-- start_ignore +DROP DATABASE IF EXISTS gpsc_test_locale; +-- end_ignore + +CREATE DATABASE gpsc_test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; +\c gpsc_test_locale + +CREATE EXTENSION gp_stats_collector; + +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable_utility TO TRUE; +SET gpsc.enable TO TRUE; + +CREATE TABLE gpsc_hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); +INSERT INTO gpsc_hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); +-- Should not see error here +UPDATE gpsc_hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; + +RESET gpsc.enable; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; +DROP TABLE gpsc_hi_안녕세계; +DROP EXTENSION gp_stats_collector; diff --git a/gpcontrib/gp_stats_collector/sql/gpsc_select.sql b/gpcontrib/gp_stats_collector/sql/gpsc_select.sql new file mode 100644 index 00000000000..673cbee0c10 --- /dev/null +++ b/gpcontrib/gp_stats_collector/sql/gpsc_select.sql @@ -0,0 +1,69 @@ +CREATE EXTENSION gp_stats_collector; + +CREATE OR REPLACE FUNCTION gpsc_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.report_nested_queries TO TRUE; +SET gpsc.enable_utility TO FALSE; + +-- Basic SELECT tests +SET gpsc.logging_mode to 'TBL'; + +SELECT 1; +SELECT COUNT(*) FROM generate_series(1,10); + +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- Transaction test +SET gpsc.logging_mode to 'TBL'; + +BEGIN; +SELECT 1; +COMMIT; + +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- CTE test +SET gpsc.logging_mode to 'TBL'; + +WITH t AS (VALUES (1), (2)) +SELECT * FROM t; + +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- Prepared statement test +SET gpsc.logging_mode to 'TBL'; + +PREPARE test_stmt AS SELECT 1; +EXECUTE test_stmt; +DEALLOCATE test_stmt; + +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/sql/gpsc_uds.sql b/gpcontrib/gp_stats_collector/sql/gpsc_uds.sql new file mode 100644 index 00000000000..14377b15c8c --- /dev/null +++ b/gpcontrib/gp_stats_collector/sql/gpsc_uds.sql @@ -0,0 +1,31 @@ +-- Test UDS socket +-- start_ignore +CREATE EXTENSION IF NOT EXISTS gp_stats_collector; +-- end_ignore + +\set UDS_PATH '/tmp/gpsc_test.sock' + +-- Configure extension to send via UDS +SET gpsc.uds_path TO :'UDS_PATH'; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.logging_mode TO 'UDS'; + +-- Start receiver +SELECT gpsc.__test_uds_start_server(:'UDS_PATH'); + +-- Send +SELECT 1; + +-- Receive +SELECT gpsc.__test_uds_receive() > 0 as received; + +-- Stop receiver +SELECT gpsc.__test_uds_stop_server(); + +-- Cleanup +DROP EXTENSION gp_stats_collector; +RESET gpsc.uds_path; +RESET gpsc.ignored_users_list; +RESET gpsc.enable; +RESET gpsc.logging_mode; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_utf8_trim.sql b/gpcontrib/gp_stats_collector/sql/gpsc_utf8_trim.sql similarity index 58% rename from gpcontrib/yagp_hooks_collector/sql/yagp_utf8_trim.sql rename to gpcontrib/gp_stats_collector/sql/gpsc_utf8_trim.sql index c3053e4af0c..a3f8a376d55 100644 --- a/gpcontrib/yagp_hooks_collector/sql/yagp_utf8_trim.sql +++ b/gpcontrib/gp_stats_collector/sql/gpsc_utf8_trim.sql @@ -1,45 +1,45 @@ -CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; +CREATE EXTENSION IF NOT EXISTS gp_stats_collector; CREATE OR REPLACE FUNCTION get_marked_query(marker TEXT) RETURNS TEXT AS $$ SELECT query_text - FROM yagpcc.log + FROM gpsc.log WHERE query_text LIKE '%' || marker || '%' ORDER BY datetime DESC LIMIT 1 $$ LANGUAGE sql VOLATILE; -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable TO TRUE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; -- Test 1: 1 byte chars -SET yagpcc.max_text_size to 19; -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.max_text_size to 19; +SET gpsc.logging_mode to 'TBL'; SELECT /*test1*/ 'HelloWorld'; -RESET yagpcc.logging_mode; +RESET gpsc.logging_mode; SELECT octet_length(get_marked_query('test1')) = 19 AS correct_length; -- Test 2: 2 byte chars -SET yagpcc.max_text_size to 19; -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.max_text_size to 19; +SET gpsc.logging_mode to 'TBL'; SELECT /*test2*/ 'РУССКИЙЯЗЫК'; -RESET yagpcc.logging_mode; +RESET gpsc.logging_mode; -- Character 'Р' has two bytes and cut in the middle => not included. SELECT octet_length(get_marked_query('test2')) = 18 AS correct_length; -- Test 3: 4 byte chars -SET yagpcc.max_text_size to 21; -SET yagpcc.logging_mode to 'TBL'; +SET gpsc.max_text_size to 21; +SET gpsc.logging_mode to 'TBL'; SELECT /*test3*/ '😀'; -RESET yagpcc.logging_mode; +RESET gpsc.logging_mode; -- Emoji has 4 bytes and cut before the last byte => not included. SELECT octet_length(get_marked_query('test3')) = 18 AS correct_length; -- Cleanup DROP FUNCTION get_marked_query(TEXT); -RESET yagpcc.max_text_size; -RESET yagpcc.logging_mode; -RESET yagpcc.enable; -RESET yagpcc.ignored_users_list; +RESET gpsc.max_text_size; +RESET gpsc.logging_mode; +RESET gpsc.enable; +RESET gpsc.ignored_users_list; -DROP EXTENSION yagp_hooks_collector; +DROP EXTENSION gp_stats_collector; diff --git a/gpcontrib/gp_stats_collector/sql/gpsc_utility.sql b/gpcontrib/gp_stats_collector/sql/gpsc_utility.sql new file mode 100644 index 00000000000..9abb965db37 --- /dev/null +++ b/gpcontrib/gp_stats_collector/sql/gpsc_utility.sql @@ -0,0 +1,135 @@ +CREATE EXTENSION gp_stats_collector; + +CREATE OR REPLACE FUNCTION gpsc_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.enable_utility TO TRUE; +SET gpsc.report_nested_queries TO TRUE; + +SET gpsc.logging_mode to 'TBL'; + +CREATE TABLE test_table (a int, b text); +CREATE INDEX test_idx ON test_table(a); +ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; +DROP TABLE test_table; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- Partitioning +SET gpsc.logging_mode to 'TBL'; + +CREATE TABLE pt_test (a int, b int) +DISTRIBUTED BY (a) +PARTITION BY RANGE (a) +(START (0) END (100) EVERY (50)); +DROP TABLE pt_test; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- Views and Functions +SET gpsc.logging_mode to 'TBL'; + +CREATE VIEW test_view AS SELECT 1 AS a; +CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; +DROP VIEW test_view; +DROP FUNCTION test_func(int); + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- Transaction Operations +SET gpsc.logging_mode to 'TBL'; + +BEGIN; +SAVEPOINT sp1; +ROLLBACK TO sp1; +COMMIT; + +BEGIN; +SAVEPOINT sp2; +ABORT; + +BEGIN; +ROLLBACK; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- DML Operations +SET gpsc.logging_mode to 'TBL'; + +CREATE TABLE dml_test (a int, b text); +INSERT INTO dml_test VALUES (1, 'test'); +UPDATE dml_test SET b = 'updated' WHERE a = 1; +DELETE FROM dml_test WHERE a = 1; +DROP TABLE dml_test; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- COPY Operations +SET gpsc.logging_mode to 'TBL'; + +CREATE TABLE copy_test (a int); +COPY (SELECT 1) TO STDOUT; +DROP TABLE copy_test; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- Prepared Statements and error during execute +SET gpsc.logging_mode to 'TBL'; + +PREPARE test_prep(int) AS SELECT $1/0 AS value; +EXECUTE test_prep(0::int); +DEALLOCATE test_prep; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- GUC Settings +SET gpsc.logging_mode to 'TBL'; + +SET gpsc.report_nested_queries TO FALSE; +RESET gpsc.report_nested_queries; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/src/Config.cpp b/gpcontrib/gp_stats_collector/src/Config.cpp similarity index 79% rename from gpcontrib/yagp_hooks_collector/src/Config.cpp rename to gpcontrib/gp_stats_collector/src/Config.cpp index 62c16e91d1f..e117aa941fd 100644 --- a/gpcontrib/yagp_hooks_collector/src/Config.cpp +++ b/gpcontrib/gp_stats_collector/src/Config.cpp @@ -20,7 +20,7 @@ * Config.cpp * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/Config.cpp + * gpcontrib/gp_stats_collector/src/Config.cpp * *------------------------------------------------------------------------- */ @@ -62,63 +62,63 @@ static void assign_ignored_users_hook(const char *, void *) { void Config::init_gucs() { DefineCustomStringVariable( - "yagpcc.uds_path", "Sets filesystem path of the agent socket", 0LL, - &guc_uds_path, "/tmp/yagpcc_agent.sock", PGC_SUSET, + "gpsc.uds_path", "Sets filesystem path of the agent socket", 0LL, + &guc_uds_path, "/tmp/gpsc_agent.sock", PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); DefineCustomBoolVariable( - "yagpcc.enable", "Enable metrics collector", 0LL, &guc_enable_collector, + "gpsc.enable", "Enable metrics collector", 0LL, &guc_enable_collector, true, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); DefineCustomBoolVariable( - "yagpcc.enable_analyze", "Collect analyze metrics in yagpcc", 0LL, + "gpsc.enable_analyze", "Collect analyze metrics in gpsc", 0LL, &guc_enable_analyze, true, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); DefineCustomBoolVariable( - "yagpcc.enable_cdbstats", "Collect CDB metrics in yagpcc", 0LL, + "gpsc.enable_cdbstats", "Collect CDB metrics in gpsc", 0LL, &guc_enable_cdbstats, true, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); DefineCustomBoolVariable( - "yagpcc.report_nested_queries", "Collect stats on nested queries", 0LL, + "gpsc.report_nested_queries", "Collect stats on nested queries", 0LL, &guc_report_nested_queries, true, PGC_USERSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); - DefineCustomStringVariable("yagpcc.ignored_users_list", - "Make yagpcc ignore queries issued by given users", + DefineCustomStringVariable("gpsc.ignored_users_list", + "Make gpsc ignore queries issued by given users", 0LL, &guc_ignored_users, "gpadmin,repl,gpperfmon,monitor", PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, assign_ignored_users_hook, 0LL); DefineCustomIntVariable( - "yagpcc.max_text_size", - "Make yagpcc trim query texts longer than configured size in bytes", NULL, + "gpsc.max_text_size", + "Make gpsc trim query texts longer than configured size in bytes", NULL, &guc_max_text_size, 1 << 20 /* 1MB */, 0, INT_MAX, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); DefineCustomIntVariable( - "yagpcc.max_plan_size", - "Make yagpcc trim plan longer than configured size", NULL, + "gpsc.max_plan_size", + "Make gpsc trim plan longer than configured size", NULL, &guc_max_plan_size, 1024, 0, INT_MAX / 1024, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_KB, NULL, NULL, NULL); DefineCustomIntVariable( - "yagpcc.min_analyze_time", + "gpsc.min_analyze_time", "Sets the minimum execution time above which plans will be logged.", "Zero prints all plans. -1 turns this feature off.", &guc_min_analyze_time, 10000, -1, INT_MAX, PGC_USERSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_MS, NULL, NULL, NULL); DefineCustomEnumVariable( - "yagpcc.logging_mode", "Logging mode: UDS or PG Table", NULL, + "gpsc.logging_mode", "Logging mode: UDS or PG Table", NULL, &guc_logging_mode, LOG_MODE_UDS, logging_mode_options, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_SUPERUSER_ONLY, NULL, NULL, NULL); DefineCustomBoolVariable( - "yagpcc.enable_utility", "Collect utility statement stats", NULL, + "gpsc.enable_utility", "Collect utility statement stats", NULL, &guc_enable_utility, false, PGC_USERSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); } @@ -127,27 +127,27 @@ void Config::update_ignored_users(const char *new_guc_ignored_users) { auto new_ignored_users_set = std::make_unique(); if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { /* Need a modifiable copy of string */ - char *rawstring = ya_gpdb::pstrdup(new_guc_ignored_users); + char *rawstring = gpdb::pstrdup(new_guc_ignored_users); List *elemlist; ListCell *l; /* Parse string into list of identifiers */ - if (!ya_gpdb::split_identifier_string(rawstring, ',', &elemlist)) { + if (!gpdb::split_identifier_string(rawstring, ',', &elemlist)) { /* syntax error in list */ - ya_gpdb::pfree(rawstring); - ya_gpdb::list_free(elemlist); + gpdb::pfree(rawstring); + gpdb::list_free(elemlist); ereport( LOG, (errcode(ERRCODE_SYNTAX_ERROR), errmsg( - "invalid list syntax in parameter yagpcc.ignored_users_list"))); + "invalid list syntax in parameter gpsc.ignored_users_list"))); return; } foreach (l, elemlist) { new_ignored_users_set->insert((char *)lfirst(l)); } - ya_gpdb::pfree(rawstring); - ya_gpdb::list_free(elemlist); + gpdb::pfree(rawstring); + gpdb::list_free(elemlist); } ignored_users_ = std::move(new_ignored_users_set); } diff --git a/gpcontrib/yagp_hooks_collector/src/Config.h b/gpcontrib/gp_stats_collector/src/Config.h similarity index 97% rename from gpcontrib/yagp_hooks_collector/src/Config.h rename to gpcontrib/gp_stats_collector/src/Config.h index 01ae5ea328e..91a1ffe44f2 100644 --- a/gpcontrib/yagp_hooks_collector/src/Config.h +++ b/gpcontrib/gp_stats_collector/src/Config.h @@ -20,7 +20,7 @@ * Config.h * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/Config.h + * gpcontrib/gp_stats_collector/src/Config.h * *------------------------------------------------------------------------- */ diff --git a/gpcontrib/yagp_hooks_collector/src/EventSender.cpp b/gpcontrib/gp_stats_collector/src/EventSender.cpp similarity index 86% rename from gpcontrib/yagp_hooks_collector/src/EventSender.cpp rename to gpcontrib/gp_stats_collector/src/EventSender.cpp index 6993814ffbf..b28ceba175a 100644 --- a/gpcontrib/yagp_hooks_collector/src/EventSender.cpp +++ b/gpcontrib/gp_stats_collector/src/EventSender.cpp @@ -20,7 +20,7 @@ * EventSender.cpp * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/EventSender.cpp + * gpcontrib/gp_stats_collector/src/EventSender.cpp * *------------------------------------------------------------------------- */ @@ -106,7 +106,7 @@ bool EventSender::verify_query(QueryDesc *query_desc, QueryState state, return true; } -bool EventSender::log_query_req(const yagpcc::SetQueryReq &req, +bool EventSender::log_query_req(const gpsc::SetQueryReq &req, const std::string &event, bool utility) { bool clear_big_fields = false; switch (config.logging_mode()) { @@ -114,7 +114,7 @@ bool EventSender::log_query_req(const yagpcc::SetQueryReq &req, clear_big_fields = UDSConnector::report_query(req, event, config); break; case LOG_MODE_TBL: - ya_gpdb::insert_log(req, utility); + gpdb::insert_log(req, utility); clear_big_fields = false; break; default: @@ -170,7 +170,7 @@ void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { instr_time starttime; INSTR_TIME_SET_CURRENT(starttime); query_desc->showstatctx = - ya_gpdb::cdbexplain_showExecStatsBegin(query_desc, starttime); + gpdb::cdbexplain_showExecStatsBegin(query_desc, starttime); } } } @@ -192,12 +192,12 @@ void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { // context so it will go away at executor_end. if (query_desc->totaltime == NULL) { MemoryContext oldcxt = - ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - query_desc->totaltime = ya_gpdb::instr_alloc(1, INSTRUMENT_ALL, false); - ya_gpdb::mem_ctx_switch_to(oldcxt); + gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + query_desc->totaltime = gpdb::instr_alloc(1, INSTRUMENT_ALL, false); + gpdb::mem_ctx_switch_to(oldcxt); } } - yagpcc::GPMetrics stats; + gpsc::GPMetrics stats; std::swap(stats, *query_msg->mutable_query_metrics()); if (log_query_req(*query_msg, "started", false /* utility */)) { clear_big_fields(query_msg); @@ -233,7 +233,7 @@ void EventSender::collect_query_submit(QueryDesc *query_desc, bool utility) { submit_query(query_desc); auto &query = get_query(query_desc); auto *query_msg = query.message.get(); - *query_msg = create_query_req(yagpcc::QueryStatus::QUERY_STATUS_SUBMIT); + *query_msg = create_query_req(gpsc::QueryStatus::QUERY_STATUS_SUBMIT); *query_msg->mutable_submit_time() = current_ts(); set_query_info(query_msg); set_qi_nesting_level(query_msg, nesting_level); @@ -256,27 +256,27 @@ void EventSender::collect_query_submit(QueryDesc *query_desc, bool utility) { void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, QueryMetricsStatus status, bool utility, ErrorData *edata) { - yagpcc::QueryStatus query_status; + gpsc::QueryStatus query_status; std::string msg; switch (status) { case METRICS_QUERY_DONE: case METRICS_INNER_QUERY_DONE: - query_status = yagpcc::QueryStatus::QUERY_STATUS_DONE; + query_status = gpsc::QueryStatus::QUERY_STATUS_DONE; msg = "done"; break; case METRICS_QUERY_ERROR: - query_status = yagpcc::QueryStatus::QUERY_STATUS_ERROR; + query_status = gpsc::QueryStatus::QUERY_STATUS_ERROR; msg = "error"; break; case METRICS_QUERY_CANCELING: // at the moment we don't track this event, but I`ll leave this code // here just in case Assert(false); - query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELLING; + query_status = gpsc::QueryStatus::QUERY_STATUS_CANCELLING; msg = "cancelling"; break; case METRICS_QUERY_CANCELED: - query_status = yagpcc::QueryStatus::QUERY_STATUS_CANCELED; + query_status = gpsc::QueryStatus::QUERY_STATUS_CANCELED; msg = "cancelled"; break; default: @@ -285,15 +285,15 @@ void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, } auto prev_state = query.state; update_query_state(query, QueryState::DONE, utility, - query_status == yagpcc::QueryStatus::QUERY_STATUS_DONE); + query_status == gpsc::QueryStatus::QUERY_STATUS_DONE); auto query_msg = query.message.get(); query_msg->set_query_status(query_status); if (status == METRICS_QUERY_ERROR) { bool error_flushed = elog_message() == NULL; if (error_flushed && (edata == NULL || edata->message == NULL)) { - ereport(WARNING, (errmsg("YAGPCC missing error message"))); + ereport(WARNING, (errmsg("GPSC missing error message"))); ereport(DEBUG3, - (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); } else { set_qi_error_message( query_msg, error_flushed ? edata->message : elog_message(), config); @@ -324,18 +324,18 @@ void EventSender::collect_query_done(QueryDesc *query_desc, bool utility, // Skip sending done message if query errored before submit. if (!qdesc_submitted(query_desc)) { if (status != METRICS_QUERY_ERROR) { - ereport(WARNING, (errmsg("YAGPCC trying to process DONE hook for " + ereport(WARNING, (errmsg("GPSC trying to process DONE hook for " "unsubmitted and unerrored query"))); ereport(DEBUG3, - (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); } return; } if (queries.empty()) { - ereport(WARNING, (errmsg("YAGPCC cannot find query to process DONE hook"))); + ereport(WARNING, (errmsg("GPSC cannot find query to process DONE hook"))); ereport(DEBUG3, - (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); return; } auto &query = get_query(query_desc); @@ -346,8 +346,8 @@ void EventSender::collect_query_done(QueryDesc *query_desc, bool utility, update_nested_counters(query_desc); queries.erase(QueryKey::from_qdesc(query_desc)); - pfree(query_desc->yagp_query_key); - query_desc->yagp_query_key = NULL; + pfree(query_desc->gpsc_query_key); + query_desc->gpsc_query_key = NULL; } void EventSender::ic_metrics_collect() { @@ -395,7 +395,7 @@ void EventSender::analyze_stats_collect(QueryDesc *query_desc) { } // Make sure stats accumulation is done. // (Note: it's okay if several levels of hook all do this.) - ya_gpdb::instr_end_loop(query_desc->totaltime); + gpdb::instr_end_loop(query_desc->totaltime); double ms = query_desc->totaltime->total * 1000.0; if (ms >= config.min_analyze_time()) { @@ -424,7 +424,7 @@ EventSender::EventSender() { EventSender::~EventSender() { for (const auto &[qkey, _] : queries) { - ereport(LOG, (errmsg("YAGPCC query with missing done event: " + ereport(LOG, (errmsg("GPSC query with missing done event: " "tmid=%d ssid=%d ccnt=%d nlvl=%d", qkey.tmid, qkey.ssid, qkey.ccnt, qkey.nesting_level))); } @@ -440,7 +440,7 @@ void EventSender::update_query_state(QueryItem &query, QueryState new_state, break; case QueryState::START: if (query.state == QueryState::SUBMIT) { - query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_START); + query.message->set_query_status(gpsc::QueryStatus::QUERY_STATUS_START); } else { Assert(false); } @@ -449,11 +449,11 @@ void EventSender::update_query_state(QueryItem &query, QueryState new_state, // Example of below assert triggering: CURSOR closes before ever being // executed Assert(query->state == QueryState::START || // IsAbortInProgress()); - query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_END); + query.message->set_query_status(gpsc::QueryStatus::QUERY_STATUS_END); break; case QueryState::DONE: Assert(query.state == QueryState::END || !success || utility); - query.message->set_query_status(yagpcc::QueryStatus::QUERY_STATUS_DONE); + query.message->set_query_status(gpsc::QueryStatus::QUERY_STATUS_DONE); break; default: Assert(false); @@ -464,28 +464,28 @@ void EventSender::update_query_state(QueryItem &query, QueryState new_state, EventSender::QueryItem &EventSender::get_query(QueryDesc *query_desc) { if (!qdesc_submitted(query_desc)) { ereport(WARNING, - (errmsg("YAGPCC attempting to get query that was not submitted"))); + (errmsg("GPSC attempting to get query that was not submitted"))); ereport(DEBUG3, - (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); throw std::runtime_error("Attempting to get query that was not submitted"); } return queries.find(QueryKey::from_qdesc(query_desc))->second; } void EventSender::submit_query(QueryDesc *query_desc) { - if (query_desc->yagp_query_key) { + if (query_desc->gpsc_query_key) { ereport(WARNING, - (errmsg("YAGPCC trying to submit already submitted query"))); + (errmsg("GPSC trying to submit already submitted query"))); ereport(DEBUG3, - (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); } QueryKey::register_qkey(query_desc, nesting_level); auto key = QueryKey::from_qdesc(query_desc); auto [_, inserted] = queries.emplace(key, QueryItem(QueryState::SUBMIT)); if (!inserted) { - ereport(WARNING, (errmsg("YAGPCC duplicate query submit detected"))); + ereport(WARNING, (errmsg("GPSC duplicate query submit detected"))); ereport(DEBUG3, - (errmsg("YAGPCC query sourceText: %s", query_desc->sourceText))); + (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); } } @@ -498,16 +498,16 @@ void EventSender::update_nested_counters(QueryDesc *query_desc) { if (end_time >= start_time) { nested_timing += end_time - start_time; } else { - ereport(WARNING, (errmsg("YAGPCC query start_time > end_time (%f > %f)", + ereport(WARNING, (errmsg("GPSC query start_time > end_time (%f > %f)", start_time, end_time))); ereport(DEBUG3, - (errmsg("YAGPCC nested query text %s", query_desc->sourceText))); + (errmsg("GPSC nested query text %s", query_desc->sourceText))); } } } bool EventSender::qdesc_submitted(QueryDesc *query_desc) { - if (query_desc->yagp_query_key == NULL) { + if (query_desc->gpsc_query_key == NULL) { return false; } return queries.find(QueryKey::from_qdesc(query_desc)) != queries.end(); @@ -528,4 +528,4 @@ bool EventSender::filter_query(QueryDesc *query_desc) { } EventSender::QueryItem::QueryItem(QueryState st) - : message(std::make_unique()), state(st) {} + : message(std::make_unique()), state(st) {} diff --git a/gpcontrib/yagp_hooks_collector/src/EventSender.h b/gpcontrib/gp_stats_collector/src/EventSender.h similarity index 84% rename from gpcontrib/yagp_hooks_collector/src/EventSender.h rename to gpcontrib/gp_stats_collector/src/EventSender.h index ef7dcb0bf8c..154c2c0dceb 100644 --- a/gpcontrib/yagp_hooks_collector/src/EventSender.h +++ b/gpcontrib/gp_stats_collector/src/EventSender.h @@ -20,7 +20,7 @@ * EventSender.h * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/EventSender.h + * gpcontrib/gp_stats_collector/src/EventSender.h * *------------------------------------------------------------------------- */ @@ -45,7 +45,7 @@ extern "C" { class UDSConnector; struct QueryDesc; -namespace yagpcc { +namespace gpsc { class SetQueryReq; } @@ -67,24 +67,24 @@ struct QueryKey { } static void register_qkey(QueryDesc *query_desc, size_t nesting_level) { - query_desc->yagp_query_key = - (YagpQueryKey *)ya_gpdb::palloc0(sizeof(YagpQueryKey)); + query_desc->gpsc_query_key = + (GpscQueryKey *)gpdb::palloc0(sizeof(GpscQueryKey)); int32 tmid; gp_gettmid(&tmid); - query_desc->yagp_query_key->tmid = tmid; - query_desc->yagp_query_key->ssid = gp_session_id; - query_desc->yagp_query_key->ccnt = gp_command_count; - query_desc->yagp_query_key->nesting_level = nesting_level; - query_desc->yagp_query_key->query_desc_addr = (uintptr_t)query_desc; + query_desc->gpsc_query_key->tmid = tmid; + query_desc->gpsc_query_key->ssid = gp_session_id; + query_desc->gpsc_query_key->ccnt = gp_command_count; + query_desc->gpsc_query_key->nesting_level = nesting_level; + query_desc->gpsc_query_key->query_desc_addr = (uintptr_t)query_desc; } static QueryKey from_qdesc(QueryDesc *query_desc) { return { - .tmid = query_desc->yagp_query_key->tmid, - .ssid = query_desc->yagp_query_key->ssid, - .ccnt = query_desc->yagp_query_key->ccnt, - .nesting_level = query_desc->yagp_query_key->nesting_level, - .query_desc_addr = query_desc->yagp_query_key->query_desc_addr, + .tmid = query_desc->gpsc_query_key->tmid, + .ssid = query_desc->gpsc_query_key->ssid, + .ccnt = query_desc->gpsc_query_key->ccnt, + .nesting_level = query_desc->gpsc_query_key->nesting_level, + .query_desc_addr = query_desc->gpsc_query_key->query_desc_addr, }; } }; @@ -130,13 +130,13 @@ class EventSender { enum QueryState { SUBMIT, START, END, DONE }; struct QueryItem { - std::unique_ptr message; + std::unique_ptr message; QueryState state; explicit QueryItem(QueryState st); }; - bool log_query_req(const yagpcc::SetQueryReq &req, const std::string &event, + bool log_query_req(const gpsc::SetQueryReq &req, const std::string &event, bool utility); bool verify_query(QueryDesc *query_desc, QueryState state, bool utility); void update_query_state(QueryItem &query, QueryState new_state, bool utility, diff --git a/gpcontrib/yagp_hooks_collector/src/YagpStat.cpp b/gpcontrib/gp_stats_collector/src/GpscStat.cpp similarity index 78% rename from gpcontrib/yagp_hooks_collector/src/YagpStat.cpp rename to gpcontrib/gp_stats_collector/src/GpscStat.cpp index 3a760b6ea97..c4029f085cf 100644 --- a/gpcontrib/yagp_hooks_collector/src/YagpStat.cpp +++ b/gpcontrib/gp_stats_collector/src/GpscStat.cpp @@ -17,15 +17,15 @@ * specific language governing permissions and limitations * under the License. * - * YagpStat.cpp + * GpscStat.cpp * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/YagpStat.cpp + * gpcontrib/gp_stats_collector/src/GpscStat.cpp * *------------------------------------------------------------------------- */ -#include "YagpStat.h" +#include "GpscStat.h" #include @@ -41,21 +41,21 @@ extern "C" { namespace { struct ProtectedData { slock_t mutex; - YagpStat::Data data; + GpscStat::Data data; }; shmem_startup_hook_type prev_shmem_startup_hook = NULL; ProtectedData *data = nullptr; -void yagp_shmem_startup() { +void gpsc_shmem_startup() { if (prev_shmem_startup_hook) prev_shmem_startup_hook(); LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); bool found; data = reinterpret_cast( - ShmemInitStruct("yagp_stat_messages", sizeof(ProtectedData), &found)); + ShmemInitStruct("gpsc_stat_messages", sizeof(ProtectedData), &found)); if (!found) { SpinLockInit(&data->mutex); - data->data = YagpStat::Data(); + data->data = GpscStat::Data(); } LWLockRelease(AddinShmemInitLock); } @@ -70,49 +70,49 @@ class LockGuard { }; } // namespace -void YagpStat::init() { +void GpscStat::init() { if (!process_shared_preload_libraries_in_progress) return; RequestAddinShmemSpace(sizeof(ProtectedData)); prev_shmem_startup_hook = shmem_startup_hook; - shmem_startup_hook = yagp_shmem_startup; + shmem_startup_hook = gpsc_shmem_startup; } -void YagpStat::deinit() { shmem_startup_hook = prev_shmem_startup_hook; } +void GpscStat::deinit() { shmem_startup_hook = prev_shmem_startup_hook; } -void YagpStat::reset() { +void GpscStat::reset() { LockGuard lg(&data->mutex); - data->data = YagpStat::Data(); + data->data = GpscStat::Data(); } -void YagpStat::report_send(int32_t msg_size) { +void GpscStat::report_send(int32_t msg_size) { LockGuard lg(&data->mutex); data->data.total++; data->data.max_message_size = std::max(msg_size, data->data.max_message_size); } -void YagpStat::report_bad_connection() { +void GpscStat::report_bad_connection() { LockGuard lg(&data->mutex); data->data.total++; data->data.failed_connects++; } -void YagpStat::report_bad_send(int32_t msg_size) { +void GpscStat::report_bad_send(int32_t msg_size) { LockGuard lg(&data->mutex); data->data.total++; data->data.failed_sends++; data->data.max_message_size = std::max(msg_size, data->data.max_message_size); } -void YagpStat::report_error() { +void GpscStat::report_error() { LockGuard lg(&data->mutex); data->data.total++; data->data.failed_other++; } -YagpStat::Data YagpStat::get_stats() { +GpscStat::Data GpscStat::get_stats() { LockGuard lg(&data->mutex); return data->data; } -bool YagpStat::loaded() { return data != nullptr; } +bool GpscStat::loaded() { return data != nullptr; } diff --git a/gpcontrib/yagp_hooks_collector/src/YagpStat.h b/gpcontrib/gp_stats_collector/src/GpscStat.h similarity index 94% rename from gpcontrib/yagp_hooks_collector/src/YagpStat.h rename to gpcontrib/gp_stats_collector/src/GpscStat.h index 57fc90cd4d1..af1a1261776 100644 --- a/gpcontrib/yagp_hooks_collector/src/YagpStat.h +++ b/gpcontrib/gp_stats_collector/src/GpscStat.h @@ -17,10 +17,10 @@ * specific language governing permissions and limitations * under the License. * - * YagpStat.h + * GpscStat.h * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/YagpStat.h + * gpcontrib/gp_stats_collector/src/GpscStat.h * *------------------------------------------------------------------------- */ @@ -29,7 +29,7 @@ #include -class YagpStat { +class GpscStat { public: struct Data { int64_t total, failed_sends, failed_connects, failed_other; diff --git a/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp b/gpcontrib/gp_stats_collector/src/PgUtils.cpp similarity index 83% rename from gpcontrib/yagp_hooks_collector/src/PgUtils.cpp rename to gpcontrib/gp_stats_collector/src/PgUtils.cpp index ed4bf4d7e64..3dbee97061b 100644 --- a/gpcontrib/yagp_hooks_collector/src/PgUtils.cpp +++ b/gpcontrib/gp_stats_collector/src/PgUtils.cpp @@ -20,7 +20,7 @@ * PgUtils.cpp * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/PgUtils.cpp + * gpcontrib/gp_stats_collector/src/PgUtils.cpp * *------------------------------------------------------------------------- */ @@ -37,31 +37,31 @@ extern "C" { std::string get_user_name() { // username is allocated on stack, we don't need to pfree it. const char *username = - ya_gpdb::get_config_option("session_authorization", false, false); + gpdb::get_config_option("session_authorization", false, false); return username ? std::string(username) : ""; } std::string get_db_name() { - char *dbname = ya_gpdb::get_database_name(MyDatabaseId); + char *dbname = gpdb::get_database_name(MyDatabaseId); if (dbname) { std::string result(dbname); - ya_gpdb::pfree(dbname); + gpdb::pfree(dbname); return result; } return ""; } std::string get_rg_name() { - auto groupId = ya_gpdb::get_rg_id_by_session_id(MySessionState->sessionId); + auto groupId = gpdb::get_rg_id_by_session_id(MySessionState->sessionId); if (!OidIsValid(groupId)) return ""; - char *rgname = ya_gpdb::get_rg_name_for_id(groupId); + char *rgname = gpdb::get_rg_name_for_id(groupId); if (rgname == nullptr) return ""; std::string result(rgname); - ya_gpdb::pfree(rgname); + gpdb::pfree(rgname); return result; } @@ -77,7 +77,7 @@ std::string get_rg_name() { * segment. An example would be `select a from tbl where is_good_value(b);`. In * this case master will issue one top-level statement, but segments will change * contexts for UDF execution and execute is_good_value(b) once for each tuple - * as a nested query. Creating massive load on gpcc agent. + * as a nested query. Creating massive load on external agent. * * Hence, here is a decision: * 1) ignore all queries that are nested on segments @@ -87,8 +87,8 @@ std::string get_rg_name() { */ bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { - if (query_desc->yagp_query_key == NULL) { + if (query_desc->gpsc_query_key == NULL) { return nesting_level == 0; } - return query_desc->yagp_query_key->nesting_level == 0; + return query_desc->gpsc_query_key->nesting_level == 0; } diff --git a/gpcontrib/yagp_hooks_collector/src/PgUtils.h b/gpcontrib/gp_stats_collector/src/PgUtils.h similarity index 96% rename from gpcontrib/yagp_hooks_collector/src/PgUtils.h rename to gpcontrib/gp_stats_collector/src/PgUtils.h index 5113fadbff2..d9f673e7cbc 100644 --- a/gpcontrib/yagp_hooks_collector/src/PgUtils.h +++ b/gpcontrib/gp_stats_collector/src/PgUtils.h @@ -20,7 +20,7 @@ * PgUtils.h * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/PgUtils.h + * gpcontrib/gp_stats_collector/src/PgUtils.h * *------------------------------------------------------------------------- */ diff --git a/gpcontrib/yagp_hooks_collector/src/ProcStats.cpp b/gpcontrib/gp_stats_collector/src/ProcStats.cpp similarity index 92% rename from gpcontrib/yagp_hooks_collector/src/ProcStats.cpp rename to gpcontrib/gp_stats_collector/src/ProcStats.cpp index 72a12e8ca00..9c557879fc6 100644 --- a/gpcontrib/yagp_hooks_collector/src/ProcStats.cpp +++ b/gpcontrib/gp_stats_collector/src/ProcStats.cpp @@ -20,13 +20,13 @@ * ProcStats.cpp * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/ProcStats.cpp + * gpcontrib/gp_stats_collector/src/ProcStats.cpp * *------------------------------------------------------------------------- */ #include "ProcStats.h" -#include "yagpcc_metrics.pb.h" +#include "gpsc_metrics.pb.h" #include #include #include @@ -42,7 +42,7 @@ namespace { proc_stat >> tmp >> stat_name; \ stats->set_##stat_name(stat_name - stats->stat_name()); -void fill_io_stats(yagpcc::SystemStat *stats) { +void fill_io_stats(gpsc::SystemStat *stats) { std::ifstream proc_stat("/proc/self/io"); std::string tmp; FILL_IO_STAT(rchar); @@ -54,7 +54,7 @@ void fill_io_stats(yagpcc::SystemStat *stats) { FILL_IO_STAT(cancelled_write_bytes); } -void fill_cpu_stats(yagpcc::SystemStat *stats) { +void fill_cpu_stats(gpsc::SystemStat *stats) { static const int UTIME_ID = 13; static const int STIME_ID = 14; static const int VSIZE_ID = 22; @@ -92,7 +92,7 @@ void fill_cpu_stats(yagpcc::SystemStat *stats) { } } -void fill_status_stats(yagpcc::SystemStat *stats) { +void fill_status_stats(gpsc::SystemStat *stats) { std::ifstream proc_stat("/proc/self/status"); std::string key, measure; while (proc_stat >> key) { @@ -118,7 +118,7 @@ void fill_status_stats(yagpcc::SystemStat *stats) { } } // namespace -void fill_self_stats(yagpcc::SystemStat *stats) { +void fill_self_stats(gpsc::SystemStat *stats) { fill_io_stats(stats); fill_cpu_stats(stats); fill_status_stats(stats); diff --git a/gpcontrib/yagp_hooks_collector/src/ProcStats.h b/gpcontrib/gp_stats_collector/src/ProcStats.h similarity index 89% rename from gpcontrib/yagp_hooks_collector/src/ProcStats.h rename to gpcontrib/gp_stats_collector/src/ProcStats.h index 7629edd0aea..4473125f875 100644 --- a/gpcontrib/yagp_hooks_collector/src/ProcStats.h +++ b/gpcontrib/gp_stats_collector/src/ProcStats.h @@ -20,15 +20,15 @@ * ProcStats.h * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/ProcStats.h + * gpcontrib/gp_stats_collector/src/ProcStats.h * *------------------------------------------------------------------------- */ #pragma once -namespace yagpcc { +namespace gpsc { class SystemStat; } -void fill_self_stats(yagpcc::SystemStat *stats); \ No newline at end of file +void fill_self_stats(gpsc::SystemStat *stats); \ No newline at end of file diff --git a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp b/gpcontrib/gp_stats_collector/src/ProtoUtils.cpp similarity index 85% rename from gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp rename to gpcontrib/gp_stats_collector/src/ProtoUtils.cpp index b449ae20900..c9ceff4739b 100644 --- a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp +++ b/gpcontrib/gp_stats_collector/src/ProtoUtils.cpp @@ -20,7 +20,7 @@ * ProtoUtils.cpp * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/ProtoUtils.cpp + * gpcontrib/gp_stats_collector/src/ProtoUtils.cpp * *------------------------------------------------------------------------- */ @@ -74,7 +74,7 @@ google::protobuf::Timestamp current_ts() { return current_ts; } -void set_query_key(yagpcc::QueryKey *key) { +void set_query_key(gpsc::QueryKey *key) { key->set_ccnt(gp_command_count); key->set_ssid(gp_session_id); int32 tmid = 0; @@ -82,7 +82,7 @@ void set_query_key(yagpcc::QueryKey *key) { key->set_tmid(tmid); } -void set_segment_key(yagpcc::SegmentKey *key) { +void set_segment_key(gpsc::SegmentKey *key) { key->set_dbid(GpIdentity.dbid); key->set_segindex(GpIdentity.segindex); } @@ -109,51 +109,51 @@ std::string trim_str_shrink_utf8(const char *str, size_t len, size_t lim) { return std::string(str, cut_pos); } -void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc, +void set_query_plan(gpsc::SetQueryReq *req, QueryDesc *query_desc, const Config &config) { if (Gp_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { auto qi = req->mutable_query_info(); qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER - ? yagpcc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER - : yagpcc::PlanGenerator::PLAN_GENERATOR_PLANNER); + ? gpsc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER + : gpsc::PlanGenerator::PLAN_GENERATOR_PLANNER); MemoryContext oldcxt = - ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - ExplainState es = ya_gpdb::get_explain_state(query_desc, true); + gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + ExplainState es = gpdb::get_explain_state(query_desc, true); if (es.str) { *qi->mutable_plan_text() = trim_str_shrink_utf8(es.str->data, es.str->len, config.max_plan_size()); - StringInfo norm_plan = ya_gpdb::gen_normplan(es.str->data); + StringInfo norm_plan = gpdb::gen_normplan(es.str->data); if (norm_plan) { *qi->mutable_template_plan_text() = trim_str_shrink_utf8( norm_plan->data, norm_plan->len, config.max_plan_size()); qi->set_plan_id( hash_any((unsigned char *)norm_plan->data, norm_plan->len)); - ya_gpdb::pfree(norm_plan->data); + gpdb::pfree(norm_plan->data); } qi->set_query_id(query_desc->plannedstmt->queryId); - ya_gpdb::pfree(es.str->data); + gpdb::pfree(es.str->data); } - ya_gpdb::mem_ctx_switch_to(oldcxt); + gpdb::mem_ctx_switch_to(oldcxt); } } -void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc, +void set_query_text(gpsc::SetQueryReq *req, QueryDesc *query_desc, const Config &config) { if (Gp_role == GP_ROLE_DISPATCH && query_desc->sourceText) { auto qi = req->mutable_query_info(); *qi->mutable_query_text() = trim_str_shrink_utf8( query_desc->sourceText, strlen(query_desc->sourceText), config.max_text_size()); - char *norm_query = ya_gpdb::gen_normquery(query_desc->sourceText); + char *norm_query = gpdb::gen_normquery(query_desc->sourceText); if (norm_query) { *qi->mutable_template_query_text() = trim_str_shrink_utf8( norm_query, strlen(norm_query), config.max_text_size()); - ya_gpdb::pfree(norm_query); + gpdb::pfree(norm_query); } } } -void clear_big_fields(yagpcc::SetQueryReq *req) { +void clear_big_fields(gpsc::SetQueryReq *req) { if (Gp_role == GP_ROLE_DISPATCH) { auto qi = req->mutable_query_info(); qi->clear_plan_text(); @@ -164,7 +164,7 @@ void clear_big_fields(yagpcc::SetQueryReq *req) { } } -void set_query_info(yagpcc::SetQueryReq *req) { +void set_query_info(gpsc::SetQueryReq *req) { if (Gp_role == GP_ROLE_DISPATCH) { auto qi = req->mutable_query_info(); qi->set_username(get_user_name()); @@ -174,24 +174,24 @@ void set_query_info(yagpcc::SetQueryReq *req) { } } -void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level) { +void set_qi_nesting_level(gpsc::SetQueryReq *req, int nesting_level) { auto aqi = req->mutable_add_info(); aqi->set_nested_level(nesting_level); } -void set_qi_slice_id(yagpcc::SetQueryReq *req) { +void set_qi_slice_id(gpsc::SetQueryReq *req) { auto aqi = req->mutable_add_info(); aqi->set_slice_id(currentSliceId); } -void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg, +void set_qi_error_message(gpsc::SetQueryReq *req, const char *err_msg, const Config &config) { auto aqi = req->mutable_add_info(); *aqi->mutable_error_message() = trim_str_shrink_utf8(err_msg, strlen(err_msg), config.max_text_size()); } -void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, +void set_metric_instrumentation(gpsc::MetricInstrumentation *metrics, QueryDesc *query_desc, int nested_calls, double nested_time) { auto instrument = query_desc->planstate->instrument; @@ -233,7 +233,7 @@ void set_metric_instrumentation(yagpcc::MetricInstrumentation *metrics, metrics->set_inherited_time(nested_time); } -void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, +void set_gp_metrics(gpsc::GPMetrics *metrics, QueryDesc *query_desc, int nested_calls, double nested_time) { if (query_desc->planstate && query_desc->planstate->instrument) { set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc, @@ -256,7 +256,7 @@ void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, metrics->mutable_interconnect()->proto_name() <= \ ic_statistics->stat_name) -void set_ic_stats(yagpcc::MetricInstrumentation *metrics, +void set_ic_stats(gpsc::MetricInstrumentation *metrics, const ICStatistics *ic_statistics) { #ifdef IC_TEARDOWN_HOOK UPDATE_IC_STATS(total_recv_queue_size, totalRecvQueueSize); @@ -279,8 +279,8 @@ void set_ic_stats(yagpcc::MetricInstrumentation *metrics, #endif } -yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status) { - yagpcc::SetQueryReq req; +gpsc::SetQueryReq create_query_req(gpsc::QueryStatus status) { + gpsc::SetQueryReq req; req.set_query_status(status); *req.mutable_datetime() = current_ts(); set_query_key(req.mutable_query_key()); @@ -292,7 +292,7 @@ double protots_to_double(const google::protobuf::Timestamp &ts) { return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; } -void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *req, +void set_analyze_plan_text(QueryDesc *query_desc, gpsc::SetQueryReq *req, const Config &config) { // Make sure it is a valid txn and it is not an utility // statement for ExplainPrintPlan() later. @@ -300,10 +300,10 @@ void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *req, return; } MemoryContext oldcxt = - ya_gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - ExplainState es = ya_gpdb::get_analyze_state( + gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + ExplainState es = gpdb::get_analyze_state( query_desc, query_desc->instrument_options && config.enable_analyze()); - ya_gpdb::mem_ctx_switch_to(oldcxt); + gpdb::mem_ctx_switch_to(oldcxt); if (es.str) { // Remove last line break. if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { @@ -312,6 +312,6 @@ void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *req, auto trimmed_analyze = trim_str_shrink_utf8(es.str->data, es.str->len, config.max_plan_size()); req->mutable_query_info()->set_analyze_text(trimmed_analyze); - ya_gpdb::pfree(es.str->data); + gpdb::pfree(es.str->data); } } diff --git a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.h b/gpcontrib/gp_stats_collector/src/ProtoUtils.h similarity index 65% rename from gpcontrib/yagp_hooks_collector/src/ProtoUtils.h rename to gpcontrib/gp_stats_collector/src/ProtoUtils.h index c954545494f..5ddcd42d308 100644 --- a/gpcontrib/yagp_hooks_collector/src/ProtoUtils.h +++ b/gpcontrib/gp_stats_collector/src/ProtoUtils.h @@ -20,35 +20,35 @@ * ProtoUtils.h * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/ProtoUtils.h + * gpcontrib/gp_stats_collector/src/ProtoUtils.h * *------------------------------------------------------------------------- */ #pragma once -#include "protos/yagpcc_set_service.pb.h" +#include "protos/gpsc_set_service.pb.h" struct QueryDesc; struct ICStatistics; class Config; google::protobuf::Timestamp current_ts(); -void set_query_plan(yagpcc::SetQueryReq *req, QueryDesc *query_desc, +void set_query_plan(gpsc::SetQueryReq *req, QueryDesc *query_desc, const Config &config); -void set_query_text(yagpcc::SetQueryReq *req, QueryDesc *query_desc, +void set_query_text(gpsc::SetQueryReq *req, QueryDesc *query_desc, const Config &config); -void clear_big_fields(yagpcc::SetQueryReq *req); -void set_query_info(yagpcc::SetQueryReq *req); -void set_qi_nesting_level(yagpcc::SetQueryReq *req, int nesting_level); -void set_qi_slice_id(yagpcc::SetQueryReq *req); -void set_qi_error_message(yagpcc::SetQueryReq *req, const char *err_msg, +void clear_big_fields(gpsc::SetQueryReq *req); +void set_query_info(gpsc::SetQueryReq *req); +void set_qi_nesting_level(gpsc::SetQueryReq *req, int nesting_level); +void set_qi_slice_id(gpsc::SetQueryReq *req); +void set_qi_error_message(gpsc::SetQueryReq *req, const char *err_msg, const Config &config); -void set_gp_metrics(yagpcc::GPMetrics *metrics, QueryDesc *query_desc, +void set_gp_metrics(gpsc::GPMetrics *metrics, QueryDesc *query_desc, int nested_calls, double nested_time); -void set_ic_stats(yagpcc::MetricInstrumentation *metrics, +void set_ic_stats(gpsc::MetricInstrumentation *metrics, const ICStatistics *ic_statistics); -yagpcc::SetQueryReq create_query_req(yagpcc::QueryStatus status); +gpsc::SetQueryReq create_query_req(gpsc::QueryStatus status); double protots_to_double(const google::protobuf::Timestamp &ts); -void set_analyze_plan_text(QueryDesc *query_desc, yagpcc::SetQueryReq *message, +void set_analyze_plan_text(QueryDesc *query_desc, gpsc::SetQueryReq *message, const Config &config); diff --git a/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp b/gpcontrib/gp_stats_collector/src/UDSConnector.cpp similarity index 87% rename from gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp rename to gpcontrib/gp_stats_collector/src/UDSConnector.cpp index d13a82a5ca9..9a01d4033d0 100644 --- a/gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp +++ b/gpcontrib/gp_stats_collector/src/UDSConnector.cpp @@ -20,14 +20,14 @@ * UDSConnector.cpp * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/UDSConnector.cpp + * gpcontrib/gp_stats_collector/src/UDSConnector.cpp * *------------------------------------------------------------------------- */ #include "UDSConnector.h" #include "Config.h" -#include "YagpStat.h" +#include "GpscStat.h" #include "memory/gpdbwrappers.h" #include "log/LogOps.h" @@ -44,14 +44,14 @@ extern "C" { #include "postgres.h" } -static void inline log_tracing_failure(const yagpcc::SetQueryReq &req, +static void inline log_tracing_failure(const gpsc::SetQueryReq &req, const std::string &event) { ereport(LOG, (errmsg("Query {%d-%d-%d} %s tracing failed with error %m", req.query_key().tmid(), req.query_key().ssid(), req.query_key().ccnt(), event.c_str()))); } -bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, +bool UDSConnector::report_query(const gpsc::SetQueryReq &req, const std::string &event, const Config &config) { sockaddr_un address{}; @@ -60,7 +60,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, if (uds_path.size() >= sizeof(address.sun_path)) { ereport(WARNING, (errmsg("UDS path is too long for socket buffer"))); - YagpStat::report_error(); + GpscStat::report_error(); return false; } strcpy(address.sun_path, uds_path.c_str()); @@ -68,7 +68,7 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, const auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0); if (sockfd == -1) { log_tracing_failure(req, event); - YagpStat::report_error(); + GpscStat::report_error(); return false; } @@ -83,24 +83,24 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, // visible to an end-user and admins. ereport(WARNING, (errmsg("Unable to create non-blocking socket connection %m"))); - YagpStat::report_error(); + GpscStat::report_error(); return false; } if (connect(sockfd, reinterpret_cast(&address), sizeof(address)) == -1) { log_tracing_failure(req, event); - YagpStat::report_bad_connection(); + GpscStat::report_bad_connection(); return false; } const auto data_size = req.ByteSizeLong(); const auto total_size = data_size + sizeof(uint32_t); - auto *buf = static_cast(ya_gpdb::palloc(total_size)); + auto *buf = static_cast(gpdb::palloc(total_size)); // Free buf automatically on error path. struct BufGuard { void *p; - ~BufGuard() { ya_gpdb::pfree(p); } + ~BufGuard() { gpdb::pfree(p); } } buf_guard{buf}; *reinterpret_cast(buf) = data_size; @@ -121,10 +121,10 @@ bool UDSConnector::report_query(const yagpcc::SetQueryReq &req, if (sent < 0) { log_tracing_failure(req, event); - YagpStat::report_bad_send(total_size); + GpscStat::report_bad_send(total_size); return false; } - YagpStat::report_send(total_size); + GpscStat::report_send(total_size); return true; } diff --git a/gpcontrib/yagp_hooks_collector/src/UDSConnector.h b/gpcontrib/gp_stats_collector/src/UDSConnector.h similarity index 88% rename from gpcontrib/yagp_hooks_collector/src/UDSConnector.h rename to gpcontrib/gp_stats_collector/src/UDSConnector.h index be5ab1ef413..a91d22f9df1 100644 --- a/gpcontrib/yagp_hooks_collector/src/UDSConnector.h +++ b/gpcontrib/gp_stats_collector/src/UDSConnector.h @@ -20,19 +20,19 @@ * UDSConnector.h * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/UDSConnector.h + * gpcontrib/gp_stats_collector/src/UDSConnector.h * *------------------------------------------------------------------------- */ #pragma once -#include "protos/yagpcc_set_service.pb.h" +#include "protos/gpsc_set_service.pb.h" class Config; class UDSConnector { public: - bool static report_query(const yagpcc::SetQueryReq &req, + bool static report_query(const gpsc::SetQueryReq &req, const std::string &event, const Config &config); }; diff --git a/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c b/gpcontrib/gp_stats_collector/src/gp_stats_collector.c similarity index 79% rename from gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c rename to gpcontrib/gp_stats_collector/src/gp_stats_collector.c index 271bceee178..d930f72246d 100644 --- a/gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c +++ b/gpcontrib/gp_stats_collector/src/gp_stats_collector.c @@ -17,10 +17,10 @@ * specific language governing permissions and limitations * under the License. * - * yagp_hooks_collector.c + * gp_stats_collector.c * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/yagp_hooks_collector.c + * gpcontrib/gp_stats_collector/src/gp_stats_collector.c * *------------------------------------------------------------------------- */ @@ -36,14 +36,14 @@ PG_MODULE_MAGIC; void _PG_init(void); void _PG_fini(void); -PG_FUNCTION_INFO_V1(yagp_stat_messages_reset); -PG_FUNCTION_INFO_V1(yagp_stat_messages); -PG_FUNCTION_INFO_V1(yagp_init_log); -PG_FUNCTION_INFO_V1(yagp_truncate_log); +PG_FUNCTION_INFO_V1(gpsc_stat_messages_reset); +PG_FUNCTION_INFO_V1(gpsc_stat_messages); +PG_FUNCTION_INFO_V1(gpsc_init_log); +PG_FUNCTION_INFO_V1(gpsc_truncate_log); -PG_FUNCTION_INFO_V1(yagp_test_uds_start_server); -PG_FUNCTION_INFO_V1(yagp_test_uds_receive); -PG_FUNCTION_INFO_V1(yagp_test_uds_stop_server); +PG_FUNCTION_INFO_V1(gpsc_test_uds_start_server); +PG_FUNCTION_INFO_V1(gpsc_test_uds_receive); +PG_FUNCTION_INFO_V1(gpsc_test_uds_stop_server); void _PG_init(void) { if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) @@ -55,23 +55,23 @@ void _PG_fini(void) { hooks_deinit(); } -Datum yagp_stat_messages_reset(PG_FUNCTION_ARGS) { +Datum gpsc_stat_messages_reset(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; if (SRF_IS_FIRSTCALL()) { funcctx = SRF_FIRSTCALL_INIT(); - yagp_functions_reset(); + gpsc_functions_reset(); } funcctx = SRF_PERCALL_SETUP(); SRF_RETURN_DONE(funcctx); } -Datum yagp_stat_messages(PG_FUNCTION_ARGS) { - return yagp_functions_get(fcinfo); +Datum gpsc_stat_messages(PG_FUNCTION_ARGS) { + return gpsc_functions_get(fcinfo); } -Datum yagp_init_log(PG_FUNCTION_ARGS) { +Datum gpsc_init_log(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; if (SRF_IS_FIRSTCALL()) { @@ -83,7 +83,7 @@ Datum yagp_init_log(PG_FUNCTION_ARGS) { SRF_RETURN_DONE(funcctx); } -Datum yagp_truncate_log(PG_FUNCTION_ARGS) { +Datum gpsc_truncate_log(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; if (SRF_IS_FIRSTCALL()) { @@ -95,7 +95,7 @@ Datum yagp_truncate_log(PG_FUNCTION_ARGS) { SRF_RETURN_DONE(funcctx); } -Datum yagp_test_uds_start_server(PG_FUNCTION_ARGS) { +Datum gpsc_test_uds_start_server(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; if (SRF_IS_FIRSTCALL()) { @@ -109,7 +109,7 @@ Datum yagp_test_uds_start_server(PG_FUNCTION_ARGS) { SRF_RETURN_DONE(funcctx); } -Datum yagp_test_uds_receive(PG_FUNCTION_ARGS) { +Datum gpsc_test_uds_receive(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; int64 *result; @@ -137,7 +137,7 @@ Datum yagp_test_uds_receive(PG_FUNCTION_ARGS) { SRF_RETURN_DONE(funcctx); } -Datum yagp_test_uds_stop_server(PG_FUNCTION_ARGS) { +Datum gpsc_test_uds_stop_server(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; if (SRF_IS_FIRSTCALL()) { diff --git a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp b/gpcontrib/gp_stats_collector/src/hook_wrappers.cpp similarity index 84% rename from gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp rename to gpcontrib/gp_stats_collector/src/hook_wrappers.cpp index cb4970d60d9..0a40b4cb359 100644 --- a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp +++ b/gpcontrib/gp_stats_collector/src/hook_wrappers.cpp @@ -20,7 +20,7 @@ * hook_wrappers.cpp * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/hook_wrappers.cpp + * gpcontrib/gp_stats_collector/src/hook_wrappers.cpp * *------------------------------------------------------------------------- */ @@ -48,7 +48,7 @@ extern "C" { #undef typeid #include "Config.h" -#include "YagpStat.h" +#include "GpscStat.h" #include "EventSender.h" #include "hook_wrappers.h" #include "memory/gpdbwrappers.h" @@ -67,20 +67,20 @@ static ic_teardown_hook_type previous_ic_teardown_hook = nullptr; #endif static ProcessUtility_hook_type previous_ProcessUtility_hook = nullptr; -static void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags); -static void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, +static void gpsc_ExecutorStart_hook(QueryDesc *query_desc, int eflags); +static void gpsc_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, uint64 count, bool execute_once); -static void ya_ExecutorFinish_hook(QueryDesc *query_desc); -static void ya_ExecutorEnd_hook(QueryDesc *query_desc); -static void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg); +static void gpsc_ExecutorFinish_hook(QueryDesc *query_desc); +static void gpsc_ExecutorEnd_hook(QueryDesc *query_desc); +static void gpsc_query_info_collect_hook(QueryMetricsStatus status, void *arg); #ifdef IC_TEARDOWN_HOOK -static void ya_ic_teardown_hook(ChunkTransportState *transportStates, +static void gpsc_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors); #endif #ifdef ANALYZE_STATS_COLLECT_HOOK -static void ya_analyze_stats_collect_hook(QueryDesc *query_desc); +static void gpsc_analyze_stats_collect_hook(QueryDesc *query_desc); #endif -static void ya_process_utility_hook(PlannedStmt *pstmt, const char *queryString, +static void gpsc_process_utility_hook(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, @@ -108,34 +108,34 @@ R cpp_call(T *obj, R (T::*func)(Args...), Args... args) { try { return (obj->*func)(args...); } catch (const std::exception &e) { - ereport(FATAL, (errmsg("Unexpected exception in yagpcc %s", e.what()))); + ereport(FATAL, (errmsg("Unexpected exception in gpsc %s", e.what()))); } } void hooks_init() { Config::init_gucs(); - YagpStat::init(); + GpscStat::init(); previous_ExecutorStart_hook = ExecutorStart_hook; - ExecutorStart_hook = ya_ExecutorStart_hook; + ExecutorStart_hook = gpsc_ExecutorStart_hook; previous_ExecutorRun_hook = ExecutorRun_hook; - ExecutorRun_hook = ya_ExecutorRun_hook; + ExecutorRun_hook = gpsc_ExecutorRun_hook; previous_ExecutorFinish_hook = ExecutorFinish_hook; - ExecutorFinish_hook = ya_ExecutorFinish_hook; + ExecutorFinish_hook = gpsc_ExecutorFinish_hook; previous_ExecutorEnd_hook = ExecutorEnd_hook; - ExecutorEnd_hook = ya_ExecutorEnd_hook; + ExecutorEnd_hook = gpsc_ExecutorEnd_hook; previous_query_info_collect_hook = query_info_collect_hook; - query_info_collect_hook = ya_query_info_collect_hook; + query_info_collect_hook = gpsc_query_info_collect_hook; #ifdef IC_TEARDOWN_HOOK previous_ic_teardown_hook = ic_teardown_hook; - ic_teardown_hook = ya_ic_teardown_hook; + ic_teardown_hook = gpsc_ic_teardown_hook; #endif #ifdef ANALYZE_STATS_COLLECT_HOOK previous_analyze_stats_collect_hook = analyze_stats_collect_hook; - analyze_stats_collect_hook = ya_analyze_stats_collect_hook; + analyze_stats_collect_hook = gpsc_analyze_stats_collect_hook; #endif stat_statements_parser_init(); previous_ProcessUtility_hook = ProcessUtility_hook; - ProcessUtility_hook = ya_process_utility_hook; + ProcessUtility_hook = gpsc_process_utility_hook; } void hooks_deinit() { @@ -154,11 +154,11 @@ void hooks_deinit() { if (sender) { delete sender; } - YagpStat::deinit(); + GpscStat::deinit(); ProcessUtility_hook = previous_ProcessUtility_hook; } -void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { +void gpsc_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { cpp_call(get_sender(), &EventSender::executor_before_start, query_desc, eflags); if (previous_ExecutorStart_hook) { @@ -170,7 +170,7 @@ void ya_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { eflags); } -void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, +void gpsc_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, uint64 count, bool execute_once) { get_sender()->incr_depth(); PG_TRY(); @@ -189,7 +189,7 @@ void ya_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, PG_END_TRY(); } -void ya_ExecutorFinish_hook(QueryDesc *query_desc) { +void gpsc_ExecutorFinish_hook(QueryDesc *query_desc) { get_sender()->incr_depth(); PG_TRY(); { @@ -207,7 +207,7 @@ void ya_ExecutorFinish_hook(QueryDesc *query_desc) { PG_END_TRY(); } -void ya_ExecutorEnd_hook(QueryDesc *query_desc) { +void gpsc_ExecutorEnd_hook(QueryDesc *query_desc) { cpp_call(get_sender(), &EventSender::executor_end, query_desc); if (previous_ExecutorEnd_hook) { (*previous_ExecutorEnd_hook)(query_desc); @@ -216,7 +216,7 @@ void ya_ExecutorEnd_hook(QueryDesc *query_desc) { } } -void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { +void gpsc_query_info_collect_hook(QueryMetricsStatus status, void *arg) { cpp_call(get_sender(), &EventSender::query_metrics_collect, status, arg /* queryDesc */, false /* utility */, (ErrorData *)NULL); if (previous_query_info_collect_hook) { @@ -225,7 +225,7 @@ void ya_query_info_collect_hook(QueryMetricsStatus status, void *arg) { } #ifdef IC_TEARDOWN_HOOK -void ya_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors) { +void gpsc_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors) { cpp_call(get_sender(), &EventSender::ic_metrics_collect); if (previous_ic_teardown_hook) { (*previous_ic_teardown_hook)(transportStates, hasErrors); @@ -234,7 +234,7 @@ void ya_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors) { #endif #ifdef ANALYZE_STATS_COLLECT_HOOK -void ya_analyze_stats_collect_hook(QueryDesc *query_desc) { +void gpsc_analyze_stats_collect_hook(QueryDesc *query_desc) { cpp_call(get_sender(), &EventSender::analyze_stats_collect, query_desc); if (previous_analyze_stats_collect_hook) { (*previous_analyze_stats_collect_hook)(query_desc); @@ -242,7 +242,7 @@ void ya_analyze_stats_collect_hook(QueryDesc *query_desc) { } #endif -static void ya_process_utility_hook(PlannedStmt *pstmt, const char *queryString, +static void gpsc_process_utility_hook(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, @@ -296,22 +296,22 @@ static void ya_process_utility_hook(PlannedStmt *pstmt, const char *queryString, } static void check_stats_loaded() { - if (!YagpStat::loaded()) { + if (!GpscStat::loaded()) { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("yagp_hooks_collector must be loaded via " + errmsg("gp_stats_collector must be loaded via " "shared_preload_libraries"))); } } -void yagp_functions_reset() { +void gpsc_functions_reset() { check_stats_loaded(); - YagpStat::reset(); + GpscStat::reset(); } -Datum yagp_functions_get(FunctionCallInfo fcinfo) { +Datum gpsc_functions_get(FunctionCallInfo fcinfo) { const int ATTNUM = 6; check_stats_loaded(); - auto stats = YagpStat::get_stats(); + auto stats = GpscStat::get_stats(); TupleDesc tupdesc = CreateTemplateTupleDesc(ATTNUM); TupleDescInitEntry(tupdesc, (AttrNumber)1, "segid", INT4OID, -1 /* typmod */, 0 /* attdim */); @@ -335,7 +335,7 @@ Datum yagp_functions_get(FunctionCallInfo fcinfo) { values[3] = Int64GetDatum(stats.failed_connects); values[4] = Int64GetDatum(stats.failed_other); values[5] = Int32GetDatum(stats.max_message_size); - HeapTuple tuple = ya_gpdb::heap_form_tuple(tupdesc, values, nulls); + HeapTuple tuple = gpdb::heap_form_tuple(tupdesc, values, nulls); Datum result = HeapTupleGetDatum(tuple); PG_RETURN_DATUM(result); } diff --git a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.h b/gpcontrib/gp_stats_collector/src/hook_wrappers.h similarity index 89% rename from gpcontrib/yagp_hooks_collector/src/hook_wrappers.h rename to gpcontrib/gp_stats_collector/src/hook_wrappers.h index 443406a5259..06c8d064404 100644 --- a/gpcontrib/yagp_hooks_collector/src/hook_wrappers.h +++ b/gpcontrib/gp_stats_collector/src/hook_wrappers.h @@ -20,7 +20,7 @@ * hook_wrappers.h * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/hook_wrappers.h + * gpcontrib/gp_stats_collector/src/hook_wrappers.h * *------------------------------------------------------------------------- */ @@ -33,8 +33,8 @@ extern "C" { extern void hooks_init(); extern void hooks_deinit(); -extern void yagp_functions_reset(); -extern Datum yagp_functions_get(FunctionCallInfo fcinfo); +extern void gpsc_functions_reset(); +extern Datum gpsc_functions_get(FunctionCallInfo fcinfo); extern void init_log(); extern void truncate_log(); diff --git a/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp b/gpcontrib/gp_stats_collector/src/log/LogOps.cpp similarity index 91% rename from gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp rename to gpcontrib/gp_stats_collector/src/log/LogOps.cpp index e8c927ece84..ef4f39c0749 100644 --- a/gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp +++ b/gpcontrib/gp_stats_collector/src/log/LogOps.cpp @@ -20,12 +20,12 @@ * LogOps.cpp * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/log/LogOps.cpp + * gpcontrib/gp_stats_collector/src/log/LogOps.cpp * *------------------------------------------------------------------------- */ -#include "protos/yagpcc_set_service.pb.h" +#include "protos/gpsc_set_service.pb.h" #include "LogOps.h" #include "LogSchema.h" @@ -82,14 +82,14 @@ void init_log() { /* Table can be dropped only via DROP EXTENSION */ recordDependencyOn(&tableAddr, &schemaAddr, DEPENDENCY_EXTENSION); } else { - ereport(NOTICE, (errmsg("YAGPCC failed to create log table or schema"))); + ereport(NOTICE, (errmsg("GPSC failed to create log table or schema"))); } /* Make changes visible */ CommandCounterIncrement(); } -void insert_log(const yagpcc::SetQueryReq &req, bool utility) { +void insert_log(const gpsc::SetQueryReq &req, bool utility) { Oid namespaceId; Oid relationId; Relation rel; @@ -112,15 +112,15 @@ void insert_log(const yagpcc::SetQueryReq &req, bool utility) { return; } - bool nulls[natts_yagp_log]; - Datum values[natts_yagp_log]; + bool nulls[natts_gpsc_log]; + Datum values[natts_gpsc_log]; memset(nulls, true, sizeof(nulls)); memset(values, 0, sizeof(values)); extract_query_req(req, "", values, nulls); - nulls[attnum_yagp_log_utility] = false; - values[attnum_yagp_log_utility] = BoolGetDatum(utility); + nulls[attnum_gpsc_log_utility] = false; + values[attnum_gpsc_log_utility] = BoolGetDatum(utility); rel = heap_open(relationId, RowExclusiveLock); diff --git a/gpcontrib/yagp_hooks_collector/src/log/LogOps.h b/gpcontrib/gp_stats_collector/src/log/LogOps.h similarity index 83% rename from gpcontrib/yagp_hooks_collector/src/log/LogOps.h rename to gpcontrib/gp_stats_collector/src/log/LogOps.h index 1fc30c21030..f784270bb8f 100644 --- a/gpcontrib/yagp_hooks_collector/src/log/LogOps.h +++ b/gpcontrib/gp_stats_collector/src/log/LogOps.h @@ -20,7 +20,7 @@ * LogOps.h * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/log/LogOps.h + * gpcontrib/gp_stats_collector/src/log/LogOps.h * *------------------------------------------------------------------------- */ @@ -35,12 +35,12 @@ extern "C" { } extern "C" { -/* CREATE TABLE yagpcc.__log (...); */ +/* CREATE TABLE gpsc.__log (...); */ void init_log(); -/* TRUNCATE yagpcc.__log */ +/* TRUNCATE gpsc.__log */ void truncate_log(); } -/* INSERT INTO yagpcc.__log VALUES (...) */ -void insert_log(const yagpcc::SetQueryReq &req, bool utility); +/* INSERT INTO gpsc.__log VALUES (...) */ +void insert_log(const gpsc::SetQueryReq &req, bool utility); diff --git a/gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp b/gpcontrib/gp_stats_collector/src/log/LogSchema.cpp similarity index 94% rename from gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp rename to gpcontrib/gp_stats_collector/src/log/LogSchema.cpp index a391b1a2209..f9f43fac2fd 100644 --- a/gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp +++ b/gpcontrib/gp_stats_collector/src/log/LogSchema.cpp @@ -20,7 +20,7 @@ * LogSchema.cpp * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/log/LogSchema.cpp + * gpcontrib/gp_stats_collector/src/log/LogSchema.cpp * *------------------------------------------------------------------------- */ @@ -36,7 +36,7 @@ const std::unordered_map &proto_name_to_col_idx() { std::unordered_map map; map.reserve(log_tbl_desc.size()); - for (size_t idx = 0; idx < natts_yagp_log; ++idx) { + for (size_t idx = 0; idx < natts_gpsc_log; ++idx) { map.emplace(log_tbl_desc[idx].proto_field_name, idx); } @@ -46,9 +46,9 @@ const std::unordered_map &proto_name_to_col_idx() { } TupleDesc DescribeTuple() { - TupleDesc tupdesc = CreateTemplateTupleDesc(natts_yagp_log); + TupleDesc tupdesc = CreateTemplateTupleDesc(natts_gpsc_log); - for (size_t anum = 1; anum <= natts_yagp_log; ++anum) { + for (size_t anum = 1; anum <= natts_gpsc_log; ++anum) { TupleDescInitEntry(tupdesc, anum, log_tbl_desc[anum - 1].pg_att_name.data(), log_tbl_desc[anum - 1].type_oid, -1 /* typmod */, 0 /* attdim */); @@ -104,7 +104,7 @@ void process_field(const google::protobuf::FieldDescriptor *field, if (it == proto_idx_map.end()) { ereport(NOTICE, - (errmsg("YAGPCC protobuf field %s is not registered in log table", + (errmsg("GPSC protobuf field %s is not registered in log table", field_name.c_str()))); return; } diff --git a/gpcontrib/yagp_hooks_collector/src/log/LogSchema.h b/gpcontrib/gp_stats_collector/src/log/LogSchema.h similarity index 98% rename from gpcontrib/yagp_hooks_collector/src/log/LogSchema.h rename to gpcontrib/gp_stats_collector/src/log/LogSchema.h index f78acec7ce9..8754741823a 100644 --- a/gpcontrib/yagp_hooks_collector/src/log/LogSchema.h +++ b/gpcontrib/gp_stats_collector/src/log/LogSchema.h @@ -20,7 +20,7 @@ * LogSchema.h * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/log/LogSchema.h + * gpcontrib/gp_stats_collector/src/log/LogSchema.h * *------------------------------------------------------------------------- */ @@ -50,7 +50,7 @@ class Timestamp; } // namespace protobuf } // namespace google -inline constexpr std::string_view schema_name = "yagpcc"; +inline constexpr std::string_view schema_name = "gpsc"; inline constexpr std::string_view log_relname = "__log"; struct LogDesc { @@ -165,8 +165,8 @@ inline constexpr std::array log_tbl_desc = { }; /* clang-format on */ -inline constexpr size_t natts_yagp_log = log_tbl_desc.size(); -inline constexpr size_t attnum_yagp_log_utility = natts_yagp_log - 1; +inline constexpr size_t natts_gpsc_log = log_tbl_desc.size(); +inline constexpr size_t attnum_gpsc_log_utility = natts_gpsc_log - 1; const std::unordered_map &proto_name_to_col_idx(); diff --git a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp b/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.cpp similarity index 81% rename from gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp rename to gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.cpp index 22083e8bdaf..4e3f6dae99f 100644 --- a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp +++ b/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.cpp @@ -20,7 +20,7 @@ * gpdbwrappers.cpp * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.cpp + * gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.cpp * *------------------------------------------------------------------------- */ @@ -125,22 +125,22 @@ auto wrap_noexcept(Func &&func, Args &&...args) noexcept } } // namespace -void *ya_gpdb::palloc(Size size) { return wrap_throw(::palloc, size); } +void *gpdb::palloc(Size size) { return wrap_throw(::palloc, size); } -void *ya_gpdb::palloc0(Size size) { return wrap_throw(::palloc0, size); } +void *gpdb::palloc0(Size size) { return wrap_throw(::palloc0, size); } -char *ya_gpdb::pstrdup(const char *str) { return wrap_throw(::pstrdup, str); } +char *gpdb::pstrdup(const char *str) { return wrap_throw(::pstrdup, str); } -char *ya_gpdb::get_database_name(Oid dbid) noexcept { +char *gpdb::get_database_name(Oid dbid) noexcept { return wrap_noexcept(::get_database_name, dbid); } -bool ya_gpdb::split_identifier_string(char *rawstring, char separator, +bool gpdb::split_identifier_string(char *rawstring, char separator, List **namelist) noexcept { return wrap_noexcept(SplitIdentifierString, rawstring, separator, namelist); } -ExplainState ya_gpdb::get_explain_state(QueryDesc *query_desc, +ExplainState gpdb::get_explain_state(QueryDesc *query_desc, bool costs) noexcept { return wrap_noexcept([&]() { ExplainState *es = NewExplainState(); @@ -154,7 +154,7 @@ ExplainState ya_gpdb::get_explain_state(QueryDesc *query_desc, }); } -ExplainState ya_gpdb::get_analyze_state(QueryDesc *query_desc, +ExplainState gpdb::get_analyze_state(QueryDesc *query_desc, bool analyze) noexcept { return wrap_noexcept([&]() { ExplainState *es = NewExplainState(); @@ -174,12 +174,12 @@ ExplainState ya_gpdb::get_analyze_state(QueryDesc *query_desc, }); } -Instrumentation *ya_gpdb::instr_alloc(size_t n, int instrument_options, +Instrumentation *gpdb::instr_alloc(size_t n, int instrument_options, bool async_mode) { return wrap_throw(InstrAlloc, n, instrument_options, async_mode); } -HeapTuple ya_gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, +HeapTuple gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull) { if (!tupleDescriptor || !values || !isnull) throw std::runtime_error( @@ -188,7 +188,7 @@ HeapTuple ya_gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, return wrap_throw(::heap_form_tuple, tupleDescriptor, values, isnull); } -void ya_gpdb::pfree(void *pointer) noexcept { +void gpdb::pfree(void *pointer) noexcept { // Note that ::pfree asserts that pointer != NULL. if (!pointer) return; @@ -196,11 +196,11 @@ void ya_gpdb::pfree(void *pointer) noexcept { wrap_noexcept(::pfree, pointer); } -MemoryContext ya_gpdb::mem_ctx_switch_to(MemoryContext context) noexcept { +MemoryContext gpdb::mem_ctx_switch_to(MemoryContext context) noexcept { return MemoryContextSwitchTo(context); } -const char *ya_gpdb::get_config_option(const char *name, bool missing_ok, +const char *gpdb::get_config_option(const char *name, bool missing_ok, bool restrict_superuser) noexcept { if (!name) return nullptr; @@ -208,7 +208,7 @@ const char *ya_gpdb::get_config_option(const char *name, bool missing_ok, return wrap_noexcept(GetConfigOption, name, missing_ok, restrict_superuser); } -void ya_gpdb::list_free(List *list) noexcept { +void gpdb::list_free(List *list) noexcept { if (!list) return; @@ -216,7 +216,7 @@ void ya_gpdb::list_free(List *list) noexcept { } CdbExplain_ShowStatCtx * -ya_gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, +gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, instr_time starttime) { if (!query_desc) throw std::runtime_error("Invalid query descriptor"); @@ -224,29 +224,29 @@ ya_gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, return wrap_throw(::cdbexplain_showExecStatsBegin, query_desc, starttime); } -void ya_gpdb::instr_end_loop(Instrumentation *instr) { +void gpdb::instr_end_loop(Instrumentation *instr) { if (!instr) throw std::runtime_error("Invalid instrumentation pointer"); wrap_throw(::InstrEndLoop, instr); } -char *ya_gpdb::gen_normquery(const char *query) noexcept { +char *gpdb::gen_normquery(const char *query) noexcept { return wrap_noexcept(::gen_normquery, query); } -StringInfo ya_gpdb::gen_normplan(const char *exec_plan) noexcept { +StringInfo gpdb::gen_normplan(const char *exec_plan) noexcept { return wrap_noexcept(::gen_normplan, exec_plan); } -char *ya_gpdb::get_rg_name_for_id(Oid group_id) { +char *gpdb::get_rg_name_for_id(Oid group_id) { return wrap_throw(GetResGroupNameForId, group_id); } -Oid ya_gpdb::get_rg_id_by_session_id(int session_id) { +Oid gpdb::get_rg_id_by_session_id(int session_id) { return wrap_throw(ResGroupGetGroupIdBySessionId, session_id); } -void ya_gpdb::insert_log(const yagpcc::SetQueryReq &req, bool utility) { +void gpdb::insert_log(const gpsc::SetQueryReq &req, bool utility) { return wrap_throw(::insert_log, req, utility); } diff --git a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h b/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.h similarity index 92% rename from gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h rename to gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.h index fe9b3ba0487..576007f6c7c 100644 --- a/gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h +++ b/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.h @@ -20,7 +20,7 @@ * gpdbwrappers.h * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/memory/gpdbwrappers.h + * gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.h * *------------------------------------------------------------------------- */ @@ -43,11 +43,11 @@ extern "C" { #include #include -namespace yagpcc { +namespace gpsc { class SetQueryReq; -} // namespace yagpcc +} // namespace gpsc -namespace ya_gpdb { +namespace gpdb { // Functions that call palloc(). // Make sure correct memory context is set. @@ -68,7 +68,7 @@ void instr_end_loop(Instrumentation *instr); char *gen_normquery(const char *query) noexcept; StringInfo gen_normplan(const char *executionPlan) noexcept; char *get_rg_name_for_id(Oid group_id); -void insert_log(const yagpcc::SetQueryReq &req, bool utility); +void insert_log(const gpsc::SetQueryReq &req, bool utility); // Palloc-free functions. void pfree(void *pointer) noexcept; @@ -78,4 +78,4 @@ const char *get_config_option(const char *name, bool missing_ok, void list_free(List *list) noexcept; Oid get_rg_id_by_session_id(int session_id); -} // namespace ya_gpdb +} // namespace gpdb diff --git a/gpcontrib/gp_stats_collector/src/stat_statements_parser/README.md b/gpcontrib/gp_stats_collector/src/stat_statements_parser/README.md new file mode 100644 index 00000000000..927189474fe --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/stat_statements_parser/README.md @@ -0,0 +1,20 @@ + + +This directory contains a slightly modified subset of pg_stat_statements for PG v9.4 to be used in query and plan ID generation. diff --git a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c similarity index 99% rename from gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c rename to gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c index 7404208055f..e24f53536a4 100644 --- a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c @@ -20,7 +20,7 @@ * pg_stat_statements_ya_parser.c * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c + * gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c * *------------------------------------------------------------------------- */ diff --git a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h b/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h similarity index 93% rename from gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h rename to gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h index 96c6a776dba..a613ba04259 100644 --- a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h +++ b/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h @@ -20,7 +20,7 @@ * pg_stat_statements_ya_parser.h * * IDENTIFICATION - * gpcontrib/yagp_hooks_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h + * gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h * *------------------------------------------------------------------------- */ diff --git a/gpcontrib/yagp_hooks_collector/README.md b/gpcontrib/yagp_hooks_collector/README.md deleted file mode 100644 index 9f465a190cb..00000000000 --- a/gpcontrib/yagp_hooks_collector/README.md +++ /dev/null @@ -1,28 +0,0 @@ -## YAGP Hooks Collector - -An extension for collecting greenplum query execution metrics and reporting them to an external agent. - -### Collected Statistics - -#### 1. Query Lifecycle -- **What:** Captures query text, normalized query text, timestamps (submit, start, end, done), and user/database info. -- **GUC:** `yagpcc.enable`. - -#### 2. `EXPLAIN` data -- **What:** Triggers generation of the `EXPLAIN (TEXT, COSTS, VERBOSE)` and captures it. -- **GUC:** `yagpcc.enable`. - -#### 3. `EXPLAIN ANALYZE` data -- **What:** Triggers generation of the `EXPLAIN (TEXT, ANALYZE, BUFFERS, TIMING, VERBOSE)` and captures it. -- **GUCs:** `yagpcc.enable`, `yagpcc.min_analyze_time`, `yagpcc.enable_cdbstats`(ANALYZE), `yagpcc.enable_analyze`(BUFFERS, TIMING, VERBOSE). - -#### 4. Other Metrics -- **What:** Captures Instrument, Greenplum, System, Network, Interconnect, Spill metrics. -- **GUC:** `yagpcc.enable`. - -### General Configuration -- **Nested Queries:** When `yagpcc.report_nested_queries` is `false`, only top-level queries are reported from the coordinator and segments, when `true`, both top-level and nested queries are reported from the coordinator, from segments collected as aggregates. -- **Data Destination:** All collected data is sent to a Unix Domain Socket. Configure the path with `yagpcc.uds_path`. -- **User Filtering:** To exclude activity from certain roles, add them to the comma-separated list in `yagpcc.ignored_users_list`. -- **Trimming plans:** Query texts and execution plans are trimmed based on `yagpcc.max_text_size` and `yagpcc.max_plan_size` (default: 1024KB). For now, it is not recommended to set these GUCs higher than 1024KB. -- **Analyze collection:** Analyze is sent if execution time exceeds `yagpcc.min_analyze_time`, which is 10 seconds by default. Analyze is collected if `yagpcc.enable_analyze` is true. diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_locale.out b/gpcontrib/yagp_hooks_collector/expected/yagp_locale.out deleted file mode 100644 index 6689b6a4ed3..00000000000 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_locale.out +++ /dev/null @@ -1,23 +0,0 @@ --- The extension generates normalized query text and plan using jumbling functions. --- Those functions may fail when translating to wide character if the current locale --- cannot handle the character set. This test checks that even when those functions --- fail, the plan is still generated and executed. This test is partially taken from --- gp_locale. --- start_ignore -DROP DATABASE IF EXISTS yagp_test_locale; --- end_ignore -CREATE DATABASE yagp_test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; -\c yagp_test_locale -CREATE EXTENSION yagp_hooks_collector; -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable_utility TO TRUE; -SET yagpcc.enable TO TRUE; -CREATE TABLE yagp_hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); -INSERT INTO yagp_hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); --- Should not see error here -UPDATE yagp_hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; -RESET yagpcc.enable; -RESET yagpcc.enable_utility; -RESET yagpcc.ignored_users_list; -DROP TABLE yagp_hi_안녕세계; -DROP EXTENSION yagp_hooks_collector; diff --git a/gpcontrib/yagp_hooks_collector/expected/yagp_uds.out b/gpcontrib/yagp_hooks_collector/expected/yagp_uds.out deleted file mode 100644 index d04929ffb4a..00000000000 --- a/gpcontrib/yagp_hooks_collector/expected/yagp_uds.out +++ /dev/null @@ -1,42 +0,0 @@ --- Test UDS socket --- start_ignore -CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; --- end_ignore -\set UDS_PATH '/tmp/yagpcc_test.sock' --- Configure extension to send via UDS -SET yagpcc.uds_path TO :'UDS_PATH'; -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable TO TRUE; -SET yagpcc.logging_mode TO 'UDS'; --- Start receiver -SELECT yagpcc.__test_uds_start_server(:'UDS_PATH'); - __test_uds_start_server -------------------------- -(0 rows) - --- Send -SELECT 1; - ?column? ----------- - 1 -(1 row) - --- Receive -SELECT yagpcc.__test_uds_receive() > 0 as received; - received ----------- - t -(1 row) - --- Stop receiver -SELECT yagpcc.__test_uds_stop_server(); - __test_uds_stop_server ------------------------- -(0 rows) - --- Cleanup -DROP EXTENSION yagp_hooks_collector; -RESET yagpcc.uds_path; -RESET yagpcc.ignored_users_list; -RESET yagpcc.enable; -RESET yagpcc.logging_mode; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_cursors.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_cursors.sql deleted file mode 100644 index f56351e0d43..00000000000 --- a/gpcontrib/yagp_hooks_collector/sql/yagp_cursors.sql +++ /dev/null @@ -1,85 +0,0 @@ -CREATE EXTENSION yagp_hooks_collector; - -CREATE FUNCTION yagp_status_order(status text) -RETURNS integer -AS $$ -BEGIN - RETURN CASE status - WHEN 'QUERY_STATUS_SUBMIT' THEN 1 - WHEN 'QUERY_STATUS_START' THEN 2 - WHEN 'QUERY_STATUS_END' THEN 3 - WHEN 'QUERY_STATUS_DONE' THEN 4 - ELSE 999 - END; -END; -$$ LANGUAGE plpgsql IMMUTABLE; - -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable TO TRUE; -SET yagpcc.enable_utility TO TRUE; -SET yagpcc.report_nested_queries TO TRUE; - --- DECLARE -SET yagpcc.logging_mode to 'TBL'; - -BEGIN; -DECLARE cursor_stats_0 CURSOR FOR SELECT 0; -CLOSE cursor_stats_0; -COMMIT; - -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - --- DECLARE WITH HOLD -SET yagpcc.logging_mode to 'TBL'; - -BEGIN; -DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; -CLOSE cursor_stats_1; -DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; -CLOSE cursor_stats_2; -COMMIT; - -RESET yagpcc.logging_mode; - -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - --- ROLLBACK -SET yagpcc.logging_mode to 'TBL'; - -BEGIN; -DECLARE cursor_stats_3 CURSOR FOR SELECT 1; -CLOSE cursor_stats_3; -DECLARE cursor_stats_4 CURSOR FOR SELECT 1; -ROLLBACK; - -RESET yagpcc.logging_mode; - -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - --- FETCH -SET yagpcc.logging_mode to 'TBL'; - -BEGIN; -DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; -DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; -FETCH 1 IN cursor_stats_5; -FETCH 1 IN cursor_stats_6; -CLOSE cursor_stats_5; -CLOSE cursor_stats_6; -COMMIT; - -RESET yagpcc.logging_mode; - -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - -DROP FUNCTION yagp_status_order(text); -DROP EXTENSION yagp_hooks_collector; -RESET yagpcc.enable; -RESET yagpcc.report_nested_queries; -RESET yagpcc.enable_utility; -RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_locale.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_locale.sql deleted file mode 100644 index 65d867d1680..00000000000 --- a/gpcontrib/yagp_hooks_collector/sql/yagp_locale.sql +++ /dev/null @@ -1,29 +0,0 @@ --- The extension generates normalized query text and plan using jumbling functions. --- Those functions may fail when translating to wide character if the current locale --- cannot handle the character set. This test checks that even when those functions --- fail, the plan is still generated and executed. This test is partially taken from --- gp_locale. - --- start_ignore -DROP DATABASE IF EXISTS yagp_test_locale; --- end_ignore - -CREATE DATABASE yagp_test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; -\c yagp_test_locale - -CREATE EXTENSION yagp_hooks_collector; - -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable_utility TO TRUE; -SET yagpcc.enable TO TRUE; - -CREATE TABLE yagp_hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); -INSERT INTO yagp_hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); --- Should not see error here -UPDATE yagp_hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; - -RESET yagpcc.enable; -RESET yagpcc.enable_utility; -RESET yagpcc.ignored_users_list; -DROP TABLE yagp_hi_안녕세계; -DROP EXTENSION yagp_hooks_collector; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_select.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_select.sql deleted file mode 100644 index 90e972ae4c1..00000000000 --- a/gpcontrib/yagp_hooks_collector/sql/yagp_select.sql +++ /dev/null @@ -1,69 +0,0 @@ -CREATE EXTENSION yagp_hooks_collector; - -CREATE OR REPLACE FUNCTION yagp_status_order(status text) -RETURNS integer -AS $$ -BEGIN - RETURN CASE status - WHEN 'QUERY_STATUS_SUBMIT' THEN 1 - WHEN 'QUERY_STATUS_START' THEN 2 - WHEN 'QUERY_STATUS_END' THEN 3 - WHEN 'QUERY_STATUS_DONE' THEN 4 - ELSE 999 - END; -END; -$$ LANGUAGE plpgsql IMMUTABLE; - -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable TO TRUE; -SET yagpcc.report_nested_queries TO TRUE; -SET yagpcc.enable_utility TO FALSE; - --- Basic SELECT tests -SET yagpcc.logging_mode to 'TBL'; - -SELECT 1; -SELECT COUNT(*) FROM generate_series(1,10); - -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - --- Transaction test -SET yagpcc.logging_mode to 'TBL'; - -BEGIN; -SELECT 1; -COMMIT; - -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - --- CTE test -SET yagpcc.logging_mode to 'TBL'; - -WITH t AS (VALUES (1), (2)) -SELECT * FROM t; - -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - --- Prepared statement test -SET yagpcc.logging_mode to 'TBL'; - -PREPARE test_stmt AS SELECT 1; -EXECUTE test_stmt; -DEALLOCATE test_stmt; - -RESET yagpcc.logging_mode; -SELECT segid, query_text, query_status FROM yagpcc.log ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - -DROP FUNCTION yagp_status_order(text); -DROP EXTENSION yagp_hooks_collector; -RESET yagpcc.enable; -RESET yagpcc.report_nested_queries; -RESET yagpcc.enable_utility; -RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_uds.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_uds.sql deleted file mode 100644 index 3eef697a4e7..00000000000 --- a/gpcontrib/yagp_hooks_collector/sql/yagp_uds.sql +++ /dev/null @@ -1,31 +0,0 @@ --- Test UDS socket --- start_ignore -CREATE EXTENSION IF NOT EXISTS yagp_hooks_collector; --- end_ignore - -\set UDS_PATH '/tmp/yagpcc_test.sock' - --- Configure extension to send via UDS -SET yagpcc.uds_path TO :'UDS_PATH'; -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable TO TRUE; -SET yagpcc.logging_mode TO 'UDS'; - --- Start receiver -SELECT yagpcc.__test_uds_start_server(:'UDS_PATH'); - --- Send -SELECT 1; - --- Receive -SELECT yagpcc.__test_uds_receive() > 0 as received; - --- Stop receiver -SELECT yagpcc.__test_uds_stop_server(); - --- Cleanup -DROP EXTENSION yagp_hooks_collector; -RESET yagpcc.uds_path; -RESET yagpcc.ignored_users_list; -RESET yagpcc.enable; -RESET yagpcc.logging_mode; diff --git a/gpcontrib/yagp_hooks_collector/sql/yagp_utility.sql b/gpcontrib/yagp_hooks_collector/sql/yagp_utility.sql deleted file mode 100644 index cf9c1d253d0..00000000000 --- a/gpcontrib/yagp_hooks_collector/sql/yagp_utility.sql +++ /dev/null @@ -1,135 +0,0 @@ -CREATE EXTENSION yagp_hooks_collector; - -CREATE OR REPLACE FUNCTION yagp_status_order(status text) -RETURNS integer -AS $$ -BEGIN - RETURN CASE status - WHEN 'QUERY_STATUS_SUBMIT' THEN 1 - WHEN 'QUERY_STATUS_START' THEN 2 - WHEN 'QUERY_STATUS_END' THEN 3 - WHEN 'QUERY_STATUS_DONE' THEN 4 - ELSE 999 - END; -END; -$$ LANGUAGE plpgsql IMMUTABLE; - -SET yagpcc.ignored_users_list TO ''; -SET yagpcc.enable TO TRUE; -SET yagpcc.enable_utility TO TRUE; -SET yagpcc.report_nested_queries TO TRUE; - -SET yagpcc.logging_mode to 'TBL'; - -CREATE TABLE test_table (a int, b text); -CREATE INDEX test_idx ON test_table(a); -ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; -DROP TABLE test_table; - -RESET yagpcc.logging_mode; - -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - --- Partitioning -SET yagpcc.logging_mode to 'TBL'; - -CREATE TABLE pt_test (a int, b int) -DISTRIBUTED BY (a) -PARTITION BY RANGE (a) -(START (0) END (100) EVERY (50)); -DROP TABLE pt_test; - -RESET yagpcc.logging_mode; - -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - --- Views and Functions -SET yagpcc.logging_mode to 'TBL'; - -CREATE VIEW test_view AS SELECT 1 AS a; -CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; -DROP VIEW test_view; -DROP FUNCTION test_func(int); - -RESET yagpcc.logging_mode; - -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - --- Transaction Operations -SET yagpcc.logging_mode to 'TBL'; - -BEGIN; -SAVEPOINT sp1; -ROLLBACK TO sp1; -COMMIT; - -BEGIN; -SAVEPOINT sp2; -ABORT; - -BEGIN; -ROLLBACK; - -RESET yagpcc.logging_mode; - -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - --- DML Operations -SET yagpcc.logging_mode to 'TBL'; - -CREATE TABLE dml_test (a int, b text); -INSERT INTO dml_test VALUES (1, 'test'); -UPDATE dml_test SET b = 'updated' WHERE a = 1; -DELETE FROM dml_test WHERE a = 1; -DROP TABLE dml_test; - -RESET yagpcc.logging_mode; - -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - --- COPY Operations -SET yagpcc.logging_mode to 'TBL'; - -CREATE TABLE copy_test (a int); -COPY (SELECT 1) TO STDOUT; -DROP TABLE copy_test; - -RESET yagpcc.logging_mode; - -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - --- Prepared Statements and error during execute -SET yagpcc.logging_mode to 'TBL'; - -PREPARE test_prep(int) AS SELECT $1/0 AS value; -EXECUTE test_prep(0::int); -DEALLOCATE test_prep; - -RESET yagpcc.logging_mode; - -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - --- GUC Settings -SET yagpcc.logging_mode to 'TBL'; - -SET yagpcc.report_nested_queries TO FALSE; -RESET yagpcc.report_nested_queries; - -RESET yagpcc.logging_mode; - -SELECT segid, query_text, query_status FROM yagpcc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, yagp_status_order(query_status) ASC; -SELECT yagpcc.truncate_log() IS NOT NULL AS t; - -DROP FUNCTION yagp_status_order(text); -DROP EXTENSION yagp_hooks_collector; -RESET yagpcc.enable; -RESET yagpcc.report_nested_queries; -RESET yagpcc.enable_utility; -RESET yagpcc.ignored_users_list; diff --git a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/README.md b/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/README.md deleted file mode 100644 index 291e31a3099..00000000000 --- a/gpcontrib/yagp_hooks_collector/src/stat_statements_parser/README.md +++ /dev/null @@ -1 +0,0 @@ -This directory contains a slightly modified subset of pg_stat_statements for PG v9.4 to be used in query and plan ID generation. diff --git a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0--1.1.sql b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0--1.1.sql deleted file mode 100644 index 8684ca73915..00000000000 --- a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0--1.1.sql +++ /dev/null @@ -1,113 +0,0 @@ -/* yagp_hooks_collector--1.0--1.1.sql */ - --- complain if script is sourced in psql, rather than via ALTER EXTENSION -\echo Use "ALTER EXTENSION yagp_hooks_collector UPDATE TO '1.1'" to load this file. \quit - -CREATE SCHEMA yagpcc; - --- Unlink existing objects from extension. -ALTER EXTENSION yagp_hooks_collector DROP VIEW yagp_stat_messages; -ALTER EXTENSION yagp_hooks_collector DROP FUNCTION yagp_stat_messages_reset(); -ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_f_on_segments(); -ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_f_on_master(); -ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_reset_f_on_segments(); -ALTER EXTENSION yagp_hooks_collector DROP FUNCTION __yagp_stat_messages_reset_f_on_master(); - --- Now drop the objects. -DROP VIEW yagp_stat_messages; -DROP FUNCTION yagp_stat_messages_reset(); -DROP FUNCTION __yagp_stat_messages_f_on_segments(); -DROP FUNCTION __yagp_stat_messages_f_on_master(); -DROP FUNCTION __yagp_stat_messages_reset_f_on_segments(); -DROP FUNCTION __yagp_stat_messages_reset_f_on_master(); - --- Recreate functions and view in new schema. -CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_master() -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' -LANGUAGE C EXECUTE ON MASTER; - -CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_segments() -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' -LANGUAGE C EXECUTE ON ALL SEGMENTS; - -CREATE FUNCTION yagpcc.stat_messages_reset() -RETURNS SETOF void -AS -$$ - SELECT yagpcc.__stat_messages_reset_f_on_master(); - SELECT yagpcc.__stat_messages_reset_f_on_segments(); -$$ -LANGUAGE SQL EXECUTE ON MASTER; - -CREATE FUNCTION yagpcc.__stat_messages_f_on_master() -RETURNS SETOF record -AS 'MODULE_PATHNAME', 'yagp_stat_messages' -LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; - -CREATE FUNCTION yagpcc.__stat_messages_f_on_segments() -RETURNS SETOF record -AS 'MODULE_PATHNAME', 'yagp_stat_messages' -LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; - -CREATE VIEW yagpcc.stat_messages AS - SELECT C.* - FROM yagpcc.__stat_messages_f_on_master() as C ( - segid int, - total_messages bigint, - send_failures bigint, - connection_failures bigint, - other_errors bigint, - max_message_size int - ) - UNION ALL - SELECT C.* - FROM yagpcc.__stat_messages_f_on_segments() as C ( - segid int, - total_messages bigint, - send_failures bigint, - connection_failures bigint, - other_errors bigint, - max_message_size int - ) -ORDER BY segid; - --- Create new objects. -CREATE FUNCTION yagpcc.__init_log_on_master() -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_init_log' -LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; - -CREATE FUNCTION yagpcc.__init_log_on_segments() -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_init_log' -LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; - --- Creates log table inside yagpcc schema. -SELECT yagpcc.__init_log_on_master(); -SELECT yagpcc.__init_log_on_segments(); - -CREATE VIEW yagpcc.log AS - SELECT * FROM yagpcc.__log -- master - UNION ALL - SELECT * FROM gp_dist_random('yagpcc.__log') -- segments - ORDER BY tmid, ssid, ccnt; - -CREATE FUNCTION yagpcc.__truncate_log_on_master() -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_truncate_log' -LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; - -CREATE FUNCTION yagpcc.__truncate_log_on_segments() -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_truncate_log' -LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; - -CREATE FUNCTION yagpcc.truncate_log() -RETURNS SETOF void AS $$ -BEGIN - PERFORM yagpcc.__truncate_log_on_master(); - PERFORM yagpcc.__truncate_log_on_segments(); -END; -$$ LANGUAGE plpgsql VOLATILE; diff --git a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0.sql b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0.sql deleted file mode 100644 index 270cab92382..00000000000 --- a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.0.sql +++ /dev/null @@ -1,55 +0,0 @@ -/* yagp_hooks_collector--1.0.sql */ - --- complain if script is sourced in psql, rather than via CREATE EXTENSION -\echo Use "CREATE EXTENSION yagp_hooks_collector" to load this file. \quit - -CREATE FUNCTION __yagp_stat_messages_reset_f_on_master() -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' -LANGUAGE C EXECUTE ON MASTER; - -CREATE FUNCTION __yagp_stat_messages_reset_f_on_segments() -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' -LANGUAGE C EXECUTE ON ALL SEGMENTS; - -CREATE FUNCTION yagp_stat_messages_reset() -RETURNS SETOF void -AS -$$ - SELECT __yagp_stat_messages_reset_f_on_master(); - SELECT __yagp_stat_messages_reset_f_on_segments(); -$$ -LANGUAGE SQL EXECUTE ON MASTER; - -CREATE FUNCTION __yagp_stat_messages_f_on_master() -RETURNS SETOF record -AS 'MODULE_PATHNAME', 'yagp_stat_messages' -LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; - -CREATE FUNCTION __yagp_stat_messages_f_on_segments() -RETURNS SETOF record -AS 'MODULE_PATHNAME', 'yagp_stat_messages' -LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; - -CREATE VIEW yagp_stat_messages AS - SELECT C.* - FROM __yagp_stat_messages_f_on_master() as C ( - segid int, - total_messages bigint, - send_failures bigint, - connection_failures bigint, - other_errors bigint, - max_message_size int - ) - UNION ALL - SELECT C.* - FROM __yagp_stat_messages_f_on_segments() as C ( - segid int, - total_messages bigint, - send_failures bigint, - connection_failures bigint, - other_errors bigint, - max_message_size int - ) -ORDER BY segid; diff --git a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql deleted file mode 100644 index 83bfb553638..00000000000 --- a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector--1.1.sql +++ /dev/null @@ -1,110 +0,0 @@ -/* yagp_hooks_collector--1.1.sql */ - --- complain if script is sourced in psql, rather than via CREATE EXTENSION -\echo Use "CREATE EXTENSION yagp_hooks_collector" to load this file. \quit - -CREATE SCHEMA yagpcc; - -CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_master() -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' -LANGUAGE C EXECUTE ON MASTER; - -CREATE FUNCTION yagpcc.__stat_messages_reset_f_on_segments() -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_stat_messages_reset' -LANGUAGE C EXECUTE ON ALL SEGMENTS; - -CREATE FUNCTION yagpcc.stat_messages_reset() -RETURNS SETOF void -AS -$$ - SELECT yagpcc.__stat_messages_reset_f_on_master(); - SELECT yagpcc.__stat_messages_reset_f_on_segments(); -$$ -LANGUAGE SQL EXECUTE ON MASTER; - -CREATE FUNCTION yagpcc.__stat_messages_f_on_master() -RETURNS SETOF record -AS 'MODULE_PATHNAME', 'yagp_stat_messages' -LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; - -CREATE FUNCTION yagpcc.__stat_messages_f_on_segments() -RETURNS SETOF record -AS 'MODULE_PATHNAME', 'yagp_stat_messages' -LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; - -CREATE VIEW yagpcc.stat_messages AS - SELECT C.* - FROM yagpcc.__stat_messages_f_on_master() as C ( - segid int, - total_messages bigint, - send_failures bigint, - connection_failures bigint, - other_errors bigint, - max_message_size int - ) - UNION ALL - SELECT C.* - FROM yagpcc.__stat_messages_f_on_segments() as C ( - segid int, - total_messages bigint, - send_failures bigint, - connection_failures bigint, - other_errors bigint, - max_message_size int - ) -ORDER BY segid; - -CREATE FUNCTION yagpcc.__init_log_on_master() -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_init_log' -LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; - -CREATE FUNCTION yagpcc.__init_log_on_segments() -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_init_log' -LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; - --- Creates log table inside yagpcc schema. -SELECT yagpcc.__init_log_on_master(); -SELECT yagpcc.__init_log_on_segments(); - -CREATE VIEW yagpcc.log AS - SELECT * FROM yagpcc.__log -- master - UNION ALL - SELECT * FROM gp_dist_random('yagpcc.__log') -- segments -ORDER BY tmid, ssid, ccnt; - -CREATE FUNCTION yagpcc.__truncate_log_on_master() -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_truncate_log' -LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; - -CREATE FUNCTION yagpcc.__truncate_log_on_segments() -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_truncate_log' -LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; - -CREATE FUNCTION yagpcc.truncate_log() -RETURNS SETOF void AS $$ -BEGIN - PERFORM yagpcc.__truncate_log_on_master(); - PERFORM yagpcc.__truncate_log_on_segments(); -END; -$$ LANGUAGE plpgsql VOLATILE; - -CREATE FUNCTION yagpcc.__test_uds_start_server(path text) -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_test_uds_start_server' -LANGUAGE C STRICT EXECUTE ON MASTER; - -CREATE FUNCTION yagpcc.__test_uds_receive(timeout_ms int DEFAULT 2000) -RETURNS SETOF bigint -AS 'MODULE_PATHNAME', 'yagp_test_uds_receive' -LANGUAGE C STRICT EXECUTE ON MASTER; - -CREATE FUNCTION yagpcc.__test_uds_stop_server() -RETURNS SETOF void -AS 'MODULE_PATHNAME', 'yagp_test_uds_stop_server' -LANGUAGE C EXECUTE ON MASTER; diff --git a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector.control b/gpcontrib/yagp_hooks_collector/yagp_hooks_collector.control deleted file mode 100644 index cb5906a1302..00000000000 --- a/gpcontrib/yagp_hooks_collector/yagp_hooks_collector.control +++ /dev/null @@ -1,5 +0,0 @@ -# yagp_hooks_collector extension -comment = 'Intercept query and plan execution hooks and report them to Yandex GPCC agents' -default_version = '1.1' -module_pathname = '$libdir/yagp_hooks_collector' -superuser = true diff --git a/pom.xml b/pom.xml index aada6870658..b1825dec460 100644 --- a/pom.xml +++ b/pom.xml @@ -154,12 +154,6 @@ code or new licensing patterns. gpcontrib/gp_exttable_fdw/gp_exttable_fdw.control gpcontrib/diskquota/** - gpcontrib/yagp_hooks_collector/yagp_hooks_collector.control - gpcontrib/yagp_hooks_collector/protos/yagpcc_set_service.proto - gpcontrib/yagp_hooks_collector/protos/yagpcc_plan.proto - gpcontrib/yagp_hooks_collector/protos/yagpcc_metrics.proto - gpcontrib/yagp_hooks_collector/.clang-format - gpcontrib/yagp_hooks_collector/Makefile getversion .git-blame-ignore-revs @@ -1275,6 +1269,16 @@ code or new licensing patterns. src/include/task/task_states.h src/include/task/job_metadata.h + + gpcontrib/gp_stats_collector/gp_stats_collector.control + gpcontrib/gp_stats_collector/protos/gpsc_set_service.proto + gpcontrib/gp_stats_collector/protos/gpsc_plan.proto + gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto + gpcontrib/gp_stats_collector/.clang-format + gpcontrib/gp_stats_collector/Makefile + diff --git a/src/Makefile.global.in b/src/Makefile.global.in index edc49b72e05..234d09a56b2 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -271,7 +271,7 @@ with_zstd = @with_zstd@ ZSTD_CFLAGS = @ZSTD_CFLAGS@ ZSTD_LIBS = @ZSTD_LIBS@ EVENT_LIBS = @EVENT_LIBS@ -with_yagp_hooks_collector = @with_yagp_hooks_collector@ +with_gp_stats_collector = @with_gp_stats_collector@ ########################################################################## # diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c index 553830e8599..0ea5874e884 100644 --- a/src/backend/commands/portalcmds.c +++ b/src/backend/commands/portalcmds.c @@ -376,7 +376,7 @@ PortalCleanup(Portal portal) CurrentResourceOwner = saveResourceOwner; } else { /* GPDB hook for collecting query info */ - if (queryDesc->yagp_query_key && query_info_collect_hook) + if (queryDesc->gpsc_query_key && query_info_collect_hook) (*query_info_collect_hook)(METRICS_QUERY_ERROR, queryDesc); } } diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 7c1dbc480bc..e5512bb8271 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -127,8 +127,8 @@ CreateQueryDesc(PlannedStmt *plannedstmt, if (Gp_role != GP_ROLE_EXECUTE) increment_command_count(); - /* null this field until set by YAGP Hooks collector */ - qd->yagp_query_key = NULL; + /* null this field until set by GP Stats Collector */ + qd->gpsc_query_key = NULL; return qd; } diff --git a/src/include/executor/execdesc.h b/src/include/executor/execdesc.h index e469945a4c5..d50d3e48f6b 100644 --- a/src/include/executor/execdesc.h +++ b/src/include/executor/execdesc.h @@ -22,14 +22,14 @@ struct CdbExplain_ShowStatCtx; /* private, in "cdb/cdbexplain.c" */ -typedef struct YagpQueryKey +typedef struct GpscQueryKey { int tmid; /* transaction time */ int ssid; /* session id */ int ccnt; /* command count */ int nesting_level; uintptr_t query_desc_addr; -} YagpQueryKey; +} GpscQueryKey; /* * SerializedParams is used to serialize external query parameters @@ -339,8 +339,8 @@ typedef struct QueryDesc /* This is always set NULL by the core system, but plugins can change it */ struct Instrumentation *totaltime; /* total time spent in ExecutorRun */ - /* YAGP Hooks collector */ - YagpQueryKey *yagp_query_key; + /* GP Stats Collector */ + GpscQueryKey *gpsc_query_key; } QueryDesc; /* in pquery.c */ From 5a981621bafb305d73db7633a351fcde42f9eda6 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Thu, 26 Mar 2026 11:21:35 +0300 Subject: [PATCH 44/49] [gp_stats_collector] Simplify Makefile and add -Wno-unused-but-set-variable --- gpcontrib/gp_stats_collector/Makefile | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/gpcontrib/gp_stats_collector/Makefile b/gpcontrib/gp_stats_collector/Makefile index c8f7b3c30fe..43255ca1955 100644 --- a/gpcontrib/gp_stats_collector/Makefile +++ b/gpcontrib/gp_stats_collector/Makefile @@ -10,13 +10,8 @@ C_OBJS = $(patsubst %.c,%.o,$(wildcard src/*.c src/*/*.c)) CPP_OBJS = $(patsubst %.cpp,%.o,$(wildcard src/*.cpp src/*/*.cpp)) OBJS = $(C_OBJS) $(CPP_OBJS) $(PROTO_OBJS) -override CXXFLAGS = -Werror -fPIC -g3 -Wall -Wpointer-arith -Wendif-labels \ - -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv \ - -Wno-unused-but-set-variable -Wno-address -Wno-format-truncation \ - -Wno-stringop-truncation -g -ggdb -std=c++17 -Iinclude -Isrc/protos -Isrc -DGPBUILD - -PG_CXXFLAGS += -Isrc -Iinclude -SHLIB_LINK += -lprotobuf -lpthread -lstdc++ +PG_CXXFLAGS += -Werror -Wall -Wno-unused-but-set-variable -std=c++17 -Isrc/protos -Isrc -Iinclude -DGPBUILD +SHLIB_LINK += -lprotobuf -lstdc++ EXTRA_CLEAN = src/protos ifdef USE_PGXS @@ -30,10 +25,11 @@ include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif -src/protos/%.pb.cpp src/protos/%.pb.h: protos/%.proto +src/protos/.done: $(wildcard protos/*.proto) @mkdir -p src/protos protoc -I /usr/include -I /usr/local/include -I . --cpp_out=src $^ - mv src/protos/$*.pb.cc src/protos/$*.pb.cpp + for f in src/protos/*.pb.cc; do mv "$$f" "$${f%.cc}.cpp"; done + touch $@ -$(CPP_OBJS): src/protos/gpsc_metrics.pb.h src/protos/gpsc_plan.pb.h src/protos/gpsc_set_service.pb.h -src/protos/gpsc_set_service.pb.o: src/protos/gpsc_metrics.pb.h +src/protos/%.pb.cpp src/protos/%.pb.h: src/protos/.done ; +$(CPP_OBJS): src/protos/.done From b8ae0fc9abcd9caf081be3848eaa5bbfb78e0d4f Mon Sep 17 00:00:00 2001 From: NJrslv Date: Tue, 31 Mar 2026 08:34:53 +0300 Subject: [PATCH 45/49] [gp_stats_collector] Build by default with extension disabled via GUCs Enable building gp_stats_collector by default in configure. Add missing check in verify_query() to ensure the extension does not execute main code while disabled. Always verify protobuf version once the shared library is preloaded. --- .github/workflows/build-cloudberry-rocky8.yml | 3 +-- .github/workflows/build-cloudberry.yml | 3 +-- .github/workflows/build-deb-cloudberry.yml | 3 +-- .../cloudberry/scripts/configure-cloudberry.sh | 1 + gpcontrib/gp_stats_collector/src/Config.cpp | 6 +++--- gpcontrib/gp_stats_collector/src/EventSender.cpp | 16 +++++++++------- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build-cloudberry-rocky8.yml b/.github/workflows/build-cloudberry-rocky8.yml index 39175753a99..dd4d10ab115 100644 --- a/.github/workflows/build-cloudberry-rocky8.yml +++ b/.github/workflows/build-cloudberry-rocky8.yml @@ -544,11 +544,10 @@ jobs: if: needs.check-skip.outputs.should_skip != 'true' env: SRC_DIR: ${{ github.workspace }} - CONFIGURE_EXTRA_OPTS: --with-gp-stats-collector run: | set -eo pipefail chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} CONFIGURE_EXTRA_OPTS=${{ env.CONFIGURE_EXTRA_OPTS }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then echo "::error::Configure script failed" exit 1 fi diff --git a/.github/workflows/build-cloudberry.yml b/.github/workflows/build-cloudberry.yml index cbd4fd753dc..cc99a997c3f 100644 --- a/.github/workflows/build-cloudberry.yml +++ b/.github/workflows/build-cloudberry.yml @@ -539,11 +539,10 @@ jobs: if: needs.check-skip.outputs.should_skip != 'true' env: SRC_DIR: ${{ github.workspace }} - CONFIGURE_EXTRA_OPTS: --with-gp-stats-collector run: | set -eo pipefail chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} CONFIGURE_EXTRA_OPTS=${{ env.CONFIGURE_EXTRA_OPTS }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then echo "::error::Configure script failed" exit 1 fi diff --git a/.github/workflows/build-deb-cloudberry.yml b/.github/workflows/build-deb-cloudberry.yml index bf85a107b31..52db1819194 100644 --- a/.github/workflows/build-deb-cloudberry.yml +++ b/.github/workflows/build-deb-cloudberry.yml @@ -452,14 +452,13 @@ jobs: shell: bash env: SRC_DIR: ${{ github.workspace }} - CONFIGURE_EXTRA_OPTS: --with-gp-stats-collector run: | set -eo pipefail export BUILD_DESTINATION=${SRC_DIR}/debian/build chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh - if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} CONFIGURE_EXTRA_OPTS=${{ env.CONFIGURE_EXTRA_OPTS }} BUILD_DESTINATION=${BUILD_DESTINATION} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} BUILD_DESTINATION=${BUILD_DESTINATION} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then echo "::error::Configure script failed" exit 1 fi diff --git a/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh b/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh index d30a0b794f0..a9086a434fb 100755 --- a/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh +++ b/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh @@ -163,6 +163,7 @@ execute_cmd ./configure --prefix=${BUILD_DESTINATION} \ --disable-pxf \ --enable-tap-tests \ ${CONFIGURE_DEBUG_OPTS} \ + --with-gp-stats-collector \ --with-gssapi \ --with-ldap \ --with-libxml \ diff --git a/gpcontrib/gp_stats_collector/src/Config.cpp b/gpcontrib/gp_stats_collector/src/Config.cpp index e117aa941fd..2f40b30e922 100644 --- a/gpcontrib/gp_stats_collector/src/Config.cpp +++ b/gpcontrib/gp_stats_collector/src/Config.cpp @@ -40,7 +40,7 @@ extern "C" { static char *guc_uds_path = nullptr; static bool guc_enable_analyze = true; static bool guc_enable_cdbstats = true; -static bool guc_enable_collector = true; +static bool guc_enable_collector = false; static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; static int guc_max_text_size = 1 << 20; // in bytes (1MB) @@ -68,7 +68,7 @@ void Config::init_gucs() { DefineCustomBoolVariable( "gpsc.enable", "Enable metrics collector", 0LL, &guc_enable_collector, - true, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + false, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); DefineCustomBoolVariable( "gpsc.enable_analyze", "Collect analyze metrics in gpsc", 0LL, @@ -88,7 +88,7 @@ void Config::init_gucs() { DefineCustomStringVariable("gpsc.ignored_users_list", "Make gpsc ignore queries issued by given users", 0LL, &guc_ignored_users, - "gpadmin,repl,gpperfmon,monitor", PGC_SUSET, + "", PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, assign_ignored_users_hook, 0LL); diff --git a/gpcontrib/gp_stats_collector/src/EventSender.cpp b/gpcontrib/gp_stats_collector/src/EventSender.cpp index b28ceba175a..c0faaf0ad0e 100644 --- a/gpcontrib/gp_stats_collector/src/EventSender.cpp +++ b/gpcontrib/gp_stats_collector/src/EventSender.cpp @@ -68,6 +68,10 @@ bool EventSender::verify_query(QueryDesc *query_desc, QueryState state, // not executed yet, causing DONE to be skipped/added. config.sync(); + if (!config.enable_collector()) { + return false; + } + if (utility && !config.enable_utility()) { return false; } @@ -409,13 +413,11 @@ EventSender::EventSender() { // Perform initial sync to get default GUC values config.sync(); - if (config.enable_collector()) { - try { - GOOGLE_PROTOBUF_VERIFY_VERSION; - proto_verified = true; - } catch (const std::exception &e) { - ereport(INFO, (errmsg("Unable to start query tracing %s", e.what()))); - } + try { + GOOGLE_PROTOBUF_VERIFY_VERSION; + proto_verified = true; + } catch (const std::exception &e) { + ereport(INFO, (errmsg("GPSC protobuf version mismatch is detected %s", e.what()))); } #ifdef IC_TEARDOWN_HOOK memset(&ic_statistics, 0, sizeof(ICStatistics)); From 1c25757c3596d31f7748c8f788947ef3222fc98d Mon Sep 17 00:00:00 2001 From: NJrslv Date: Tue, 31 Mar 2026 08:47:13 +0300 Subject: [PATCH 46/49] [gp_stats_collector] Code quality cleanup Delete stale .gitignore. Add Apache headers to .proto files. Change #pragma once to #ifndef guards. Remove test result files from tree. Change ereport(FATAL) to ereport(ERROR). Remove internal naming suffixes. Apply clang-format from gporca. --- .gitignore | 2 +- gpcontrib/gp_stats_collector/.clang-format | 180 +++- gpcontrib/gp_stats_collector/.gitignore | 5 - .../protos/gpsc_metrics.proto | 18 + .../gp_stats_collector/protos/gpsc_plan.proto | 18 + .../protos/gpsc_set_service.proto | 18 + .../results/gpsc_cursors.out | 163 --- .../gp_stats_collector/results/gpsc_dist.out | 175 ---- .../results/gpsc_guc_cache.out | 61 -- .../results/gpsc_locale.out | 23 - .../results/gpsc_select.out | 136 --- .../gp_stats_collector/results/gpsc_uds.out | 42 - .../results/gpsc_utf8_trim.out | 68 -- .../results/gpsc_utility.out | 248 ----- gpcontrib/gp_stats_collector/src/Config.cpp | 248 ++--- gpcontrib/gp_stats_collector/src/Config.h | 98 +- .../gp_stats_collector/src/EventSender.cpp | 976 ++++++++++-------- .../gp_stats_collector/src/EventSender.h | 243 +++-- gpcontrib/gp_stats_collector/src/GpscStat.cpp | 142 ++- gpcontrib/gp_stats_collector/src/GpscStat.h | 36 +- gpcontrib/gp_stats_collector/src/PgUtils.cpp | 68 +- .../gp_stats_collector/src/ProcStats.cpp | 179 ++-- gpcontrib/gp_stats_collector/src/ProcStats.h | 9 +- .../gp_stats_collector/src/ProtoUtils.cpp | 492 +++++---- gpcontrib/gp_stats_collector/src/ProtoUtils.h | 17 +- .../gp_stats_collector/src/UDSConnector.cpp | 166 +-- .../gp_stats_collector/src/UDSConnector.h | 12 +- .../src/gp_stats_collector.c | 169 +-- .../gp_stats_collector/src/hook_wrappers.cpp | 627 ++++++----- .../gp_stats_collector/src/hook_wrappers.h | 6 +- .../gp_stats_collector/src/log/LogOps.cpp | 199 ++-- gpcontrib/gp_stats_collector/src/log/LogOps.h | 5 +- .../gp_stats_collector/src/log/LogSchema.cpp | 261 ++--- .../gp_stats_collector/src/log/LogSchema.h | 38 +- .../src/memory/gpdbwrappers.cpp | 386 ++++--- .../src/memory/gpdbwrappers.h | 33 +- ...a_parser.c => pg_stat_statements_parser.c} | 94 +- ...a_parser.h => pg_stat_statements_parser.h} | 12 +- pom.xml | 3 - 39 files changed, 2774 insertions(+), 2902 deletions(-) delete mode 100644 gpcontrib/gp_stats_collector/.gitignore delete mode 100644 gpcontrib/gp_stats_collector/results/gpsc_cursors.out delete mode 100644 gpcontrib/gp_stats_collector/results/gpsc_dist.out delete mode 100644 gpcontrib/gp_stats_collector/results/gpsc_guc_cache.out delete mode 100644 gpcontrib/gp_stats_collector/results/gpsc_locale.out delete mode 100644 gpcontrib/gp_stats_collector/results/gpsc_select.out delete mode 100644 gpcontrib/gp_stats_collector/results/gpsc_uds.out delete mode 100644 gpcontrib/gp_stats_collector/results/gpsc_utf8_trim.out delete mode 100644 gpcontrib/gp_stats_collector/results/gpsc_utility.out rename gpcontrib/gp_stats_collector/src/stat_statements_parser/{pg_stat_statements_ya_parser.c => pg_stat_statements_parser.c} (82%) rename gpcontrib/gp_stats_collector/src/stat_statements_parser/{pg_stat_statements_ya_parser.h => pg_stat_statements_parser.h} (87%) diff --git a/.gitignore b/.gitignore index 7f5110d5c8e..5c21989c4ab 100644 --- a/.gitignore +++ b/.gitignore @@ -73,4 +73,4 @@ lib*.pc /compile_commands.json /tmp_install/ /.cache/ -/install/ +/install/ \ No newline at end of file diff --git a/gpcontrib/gp_stats_collector/.clang-format b/gpcontrib/gp_stats_collector/.clang-format index 99130575c9a..eb90ff33671 100644 --- a/gpcontrib/gp_stats_collector/.clang-format +++ b/gpcontrib/gp_stats_collector/.clang-format @@ -1,2 +1,178 @@ -BasedOnStyle: LLVM -SortIncludes: false +--- +Language: Cpp +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignConsecutiveMacros: false +AlignConsecutiveAssignments: false +AlignConsecutiveBitFields: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Left +AlignOperands: Align +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortEnumsOnASingleLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: All +AlwaysBreakAfterReturnType: AllDefinitions +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: true + AfterClass: true + AfterControlStatement: Always + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: true + AfterStruct: true + AfterUnion: true + AfterExternBlock: false + BeforeCatch: true + BeforeElse: true + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: false +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^<.*' + Priority: 1 + - Regex: '"protos/.*\.pb\.h"' + Priority: 2 + - Regex: '"postgres\.h"' + Priority: 3 + - Regex: '.*' + Priority: 4 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IncludeIsMainSourceRegex: '' +IndentCaseLabels: true +IndentCaseBlocks: false +IndentGotoLabels: true +IndentPPDirectives: None +IndentExternBlock: AfterExternBlock +IndentWidth: 4 +IndentWrappedFunctionNames: false +InsertTrailingCommas: None +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 3 +NamespaceIndentation: None +ObjCBinPackProtocolList: Never +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Right +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' + BasedOnStyle: google + - Language: TextProto + Delimiters: + - pb + - PB + - proto + - PROTO + EnclosingFunctions: + - EqualsProto + - EquivToProto + - PARSE_PARTIAL_TEXT_PROTO + - PARSE_TEST_PROTO + - PARSE_TEXT_PROTO + - ParseTextOrDie + - ParseTextProtoOrDie + - ParseTestProto + - ParsePartialTestProto + CanonicalDelimiter: '' + BasedOnStyle: google +ReflowComments: false +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: true +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +Standard: Auto +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 4 +UseCRLF: false +UseTab: Always +WhitespaceSensitiveMacros: + - STRINGIZE + - PP_STRINGIZE + - BOOST_PP_STRINGIZE +... + + diff --git a/gpcontrib/gp_stats_collector/.gitignore b/gpcontrib/gp_stats_collector/.gitignore deleted file mode 100644 index e8dfe855dad..00000000000 --- a/gpcontrib/gp_stats_collector/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*.o -*.so -src/protos/ -.vscode -compile_commands.json diff --git a/gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto b/gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto index a9e26471839..7853dc58db7 100644 --- a/gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto +++ b/gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package gpsc; diff --git a/gpcontrib/gp_stats_collector/protos/gpsc_plan.proto b/gpcontrib/gp_stats_collector/protos/gpsc_plan.proto index 5a7269edd20..c1632478464 100644 --- a/gpcontrib/gp_stats_collector/protos/gpsc_plan.proto +++ b/gpcontrib/gp_stats_collector/protos/gpsc_plan.proto @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package gpsc; diff --git a/gpcontrib/gp_stats_collector/protos/gpsc_set_service.proto b/gpcontrib/gp_stats_collector/protos/gpsc_set_service.proto index 4cd795424ab..bcf09074ed7 100644 --- a/gpcontrib/gp_stats_collector/protos/gpsc_set_service.proto +++ b/gpcontrib/gp_stats_collector/protos/gpsc_set_service.proto @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; import "google/protobuf/timestamp.proto"; diff --git a/gpcontrib/gp_stats_collector/results/gpsc_cursors.out b/gpcontrib/gp_stats_collector/results/gpsc_cursors.out deleted file mode 100644 index 282d9ac49e1..00000000000 --- a/gpcontrib/gp_stats_collector/results/gpsc_cursors.out +++ /dev/null @@ -1,163 +0,0 @@ -CREATE EXTENSION gp_stats_collector; -CREATE FUNCTION gpsc_status_order(status text) -RETURNS integer -AS $$ -BEGIN - RETURN CASE status - WHEN 'QUERY_STATUS_SUBMIT' THEN 1 - WHEN 'QUERY_STATUS_START' THEN 2 - WHEN 'QUERY_STATUS_END' THEN 3 - WHEN 'QUERY_STATUS_DONE' THEN 4 - ELSE 999 - END; -END; -$$ LANGUAGE plpgsql IMMUTABLE; -SET gpsc.ignored_users_list TO ''; -SET gpsc.enable TO TRUE; -SET gpsc.enable_utility TO TRUE; -SET gpsc.report_nested_queries TO TRUE; --- DECLARE -SET gpsc.logging_mode to 'TBL'; -BEGIN; -DECLARE cursor_stats_0 CURSOR FOR SELECT 0; -CLOSE cursor_stats_0; -COMMIT; -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+---------------------------------------------+--------------------- - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_SUBMIT - -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_DONE - -1 | CLOSE cursor_stats_0; | QUERY_STATUS_SUBMIT - -1 | CLOSE cursor_stats_0; | QUERY_STATUS_DONE - -1 | COMMIT; | QUERY_STATUS_SUBMIT - -1 | COMMIT; | QUERY_STATUS_DONE - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE -(10 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - --- DECLARE WITH HOLD -SET gpsc.logging_mode to 'TBL'; -BEGIN; -DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; -CLOSE cursor_stats_1; -DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; -CLOSE cursor_stats_2; -COMMIT; -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+-------------------------------------------------------+--------------------- - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_SUBMIT - -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_DONE - -1 | CLOSE cursor_stats_1; | QUERY_STATUS_SUBMIT - -1 | CLOSE cursor_stats_1; | QUERY_STATUS_DONE - -1 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT - -1 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_DONE - -1 | CLOSE cursor_stats_2; | QUERY_STATUS_SUBMIT - -1 | CLOSE cursor_stats_2; | QUERY_STATUS_DONE - -1 | COMMIT; | QUERY_STATUS_SUBMIT - -1 | COMMIT; | QUERY_STATUS_DONE - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE -(14 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - --- ROLLBACK -SET gpsc.logging_mode to 'TBL'; -BEGIN; -DECLARE cursor_stats_3 CURSOR FOR SELECT 1; -CLOSE cursor_stats_3; -DECLARE cursor_stats_4 CURSOR FOR SELECT 1; -ROLLBACK; -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+---------------------------------------------+--------------------- - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT - -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_DONE - -1 | CLOSE cursor_stats_3; | QUERY_STATUS_SUBMIT - -1 | CLOSE cursor_stats_3; | QUERY_STATUS_DONE - -1 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT - -1 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_DONE - -1 | ROLLBACK; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK; | QUERY_STATUS_DONE - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE -(12 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - --- FETCH -SET gpsc.logging_mode to 'TBL'; -BEGIN; -DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; -DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; -FETCH 1 IN cursor_stats_5; - ?column? ----------- - 2 -(1 row) - -FETCH 1 IN cursor_stats_6; - ?column? ----------- - 3 -(1 row) - -CLOSE cursor_stats_5; -CLOSE cursor_stats_6; -COMMIT; -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+-------------------------------------------------------+--------------------- - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT - -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_DONE - -1 | DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; | QUERY_STATUS_SUBMIT - -1 | DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; | QUERY_STATUS_DONE - -1 | FETCH 1 IN cursor_stats_5; | QUERY_STATUS_SUBMIT - -1 | FETCH 1 IN cursor_stats_5; | QUERY_STATUS_DONE - -1 | FETCH 1 IN cursor_stats_6; | QUERY_STATUS_SUBMIT - -1 | FETCH 1 IN cursor_stats_6; | QUERY_STATUS_DONE - -1 | CLOSE cursor_stats_5; | QUERY_STATUS_SUBMIT - -1 | CLOSE cursor_stats_5; | QUERY_STATUS_DONE - -1 | CLOSE cursor_stats_6; | QUERY_STATUS_SUBMIT - -1 | CLOSE cursor_stats_6; | QUERY_STATUS_DONE - -1 | COMMIT; | QUERY_STATUS_SUBMIT - -1 | COMMIT; | QUERY_STATUS_DONE - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE -(18 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - -DROP FUNCTION gpsc_status_order(text); -DROP EXTENSION gp_stats_collector; -RESET gpsc.enable; -RESET gpsc.report_nested_queries; -RESET gpsc.enable_utility; -RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/results/gpsc_dist.out b/gpcontrib/gp_stats_collector/results/gpsc_dist.out deleted file mode 100644 index 92e8678767b..00000000000 --- a/gpcontrib/gp_stats_collector/results/gpsc_dist.out +++ /dev/null @@ -1,175 +0,0 @@ -CREATE EXTENSION gp_stats_collector; -CREATE OR REPLACE FUNCTION gpsc_status_order(status text) -RETURNS integer -AS $$ -BEGIN - RETURN CASE status - WHEN 'QUERY_STATUS_SUBMIT' THEN 1 - WHEN 'QUERY_STATUS_START' THEN 2 - WHEN 'QUERY_STATUS_END' THEN 3 - WHEN 'QUERY_STATUS_DONE' THEN 4 - ELSE 999 - END; -END; -$$ LANGUAGE plpgsql IMMUTABLE; -SET gpsc.ignored_users_list TO ''; -SET gpsc.enable TO TRUE; -SET gpsc.report_nested_queries TO TRUE; -SET gpsc.enable_utility TO FALSE; --- Hash distributed table -CREATE TABLE test_hash_dist (id int) DISTRIBUTED BY (id); -INSERT INTO test_hash_dist SELECT 1; -SET gpsc.logging_mode to 'TBL'; -SET optimizer_enable_direct_dispatch TO TRUE; --- Direct dispatch is used here, only one segment is scanned. -select * from test_hash_dist where id = 1; - id ----- - 1 -(1 row) - -RESET optimizer_enable_direct_dispatch; -RESET gpsc.logging_mode; --- Should see 8 rows. -SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+--------------------------------------------+--------------------- - -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_SUBMIT - -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_START - -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_END - -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_DONE - 1 | | QUERY_STATUS_SUBMIT - 1 | | QUERY_STATUS_START - 1 | | QUERY_STATUS_END - 1 | | QUERY_STATUS_DONE -(8 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - -SET gpsc.logging_mode to 'TBL'; --- Scan all segments. -select * from test_hash_dist; - id ----- - 1 -(1 row) - -DROP TABLE test_hash_dist; -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+-------------------------------+--------------------- - -1 | select * from test_hash_dist; | QUERY_STATUS_SUBMIT - -1 | select * from test_hash_dist; | QUERY_STATUS_START - -1 | select * from test_hash_dist; | QUERY_STATUS_END - -1 | select * from test_hash_dist; | QUERY_STATUS_DONE - 1 | | QUERY_STATUS_SUBMIT - 1 | | QUERY_STATUS_START - 1 | | QUERY_STATUS_END - 1 | | QUERY_STATUS_DONE - 2 | | QUERY_STATUS_SUBMIT - 2 | | QUERY_STATUS_START - 2 | | QUERY_STATUS_END - 2 | | QUERY_STATUS_DONE - | | QUERY_STATUS_SUBMIT - | | QUERY_STATUS_START - | | QUERY_STATUS_END - | | QUERY_STATUS_DONE -(16 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - --- Replicated table -CREATE FUNCTION force_segments() RETURNS SETOF text AS $$ -BEGIN - RETURN NEXT 'seg'; -END; -$$ LANGUAGE plpgsql VOLATILE EXECUTE ON ALL SEGMENTS; -CREATE TABLE test_replicated (id int) DISTRIBUTED REPLICATED; -INSERT INTO test_replicated SELECT 1; -SET gpsc.logging_mode to 'TBL'; -SELECT COUNT(*) FROM test_replicated, force_segments(); - count -------- - 3 -(1 row) - -DROP TABLE test_replicated; -DROP FUNCTION force_segments(); -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+---------------------------------------------------------+--------------------- - -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_SUBMIT - -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_START - -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_END - -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_DONE - 1 | | QUERY_STATUS_SUBMIT - 1 | | QUERY_STATUS_START - 1 | | QUERY_STATUS_END - 1 | | QUERY_STATUS_DONE - 2 | | QUERY_STATUS_SUBMIT - 2 | | QUERY_STATUS_START - 2 | | QUERY_STATUS_END - 2 | | QUERY_STATUS_DONE - | | QUERY_STATUS_SUBMIT - | | QUERY_STATUS_START - | | QUERY_STATUS_END - | | QUERY_STATUS_DONE -(16 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - --- Partially distributed table (2 numsegments) -SET allow_system_table_mods = ON; -CREATE TABLE test_partial_dist (id int, data text) DISTRIBUTED BY (id); -UPDATE gp_distribution_policy SET numsegments = 2 WHERE localoid = 'test_partial_dist'::regclass; -INSERT INTO test_partial_dist SELECT * FROM generate_series(1, 100); -SET gpsc.logging_mode to 'TBL'; -SELECT COUNT(*) FROM test_partial_dist; - count -------- - 100 -(1 row) - -RESET gpsc.logging_mode; -DROP TABLE test_partial_dist; -RESET allow_system_table_mods; --- Should see 12 rows. -SELECT query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - query_text | query_status ------------------------------------------+--------------------- - SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_SUBMIT - SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_START - SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_END - SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_DONE - | QUERY_STATUS_SUBMIT - | QUERY_STATUS_START - | QUERY_STATUS_END - | QUERY_STATUS_DONE - | QUERY_STATUS_SUBMIT - | QUERY_STATUS_START - | QUERY_STATUS_END - | QUERY_STATUS_DONE -(12 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - -DROP FUNCTION gpsc_status_order(text); -DROP EXTENSION gp_stats_collector; -RESET gpsc.enable; -RESET gpsc.report_nested_queries; -RESET gpsc.enable_utility; -RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/results/gpsc_guc_cache.out b/gpcontrib/gp_stats_collector/results/gpsc_guc_cache.out deleted file mode 100644 index 19c4774575d..00000000000 --- a/gpcontrib/gp_stats_collector/results/gpsc_guc_cache.out +++ /dev/null @@ -1,61 +0,0 @@ --- --- Test GUC caching for query lifecycle consistency. --- --- The extension logs SUBMIT and DONE events for each query. --- GUC values that control logging (enable_utility, ignored_users_list, ...) --- must be cached at SUBMIT time to ensure DONE uses the same filtering --- criteria. Otherwise, a SET command that modifies these GUCs would --- have its DONE event rejected, creating orphaned SUBMIT entries. --- This is due to query being actually executed between SUBMIT and DONE. --- start_ignore -CREATE EXTENSION IF NOT EXISTS gp_stats_collector; -SELECT gpsc.truncate_log(); - truncate_log --------------- -(0 rows) - --- end_ignore -CREATE OR REPLACE FUNCTION print_last_query(query text) -RETURNS TABLE(query_status text) AS $$ - SELECT query_status - FROM gpsc.log - WHERE segid = -1 AND query_text = query - ORDER BY ccnt DESC -$$ LANGUAGE sql; -SET gpsc.ignored_users_list TO ''; -SET gpsc.enable TO TRUE; -SET gpsc.enable_utility TO TRUE; -SET gpsc.logging_mode TO 'TBL'; --- SET below disables utility logging and DONE must still be logged. -SET gpsc.enable_utility TO FALSE; -SELECT * FROM print_last_query('SET gpsc.enable_utility TO FALSE;'); - query_status ---------------------- - QUERY_STATUS_SUBMIT - QUERY_STATUS_DONE -(2 rows) - --- SELECT below adds current user to ignore list and DONE must still be logged. --- start_ignore -SELECT set_config('gpsc.ignored_users_list', current_user, false); - set_config ------------- - gpadmin -(1 row) - --- end_ignore -SELECT * FROM print_last_query('SELECT set_config(''gpsc.ignored_users_list'', current_user, false);'); - query_status ---------------------- - QUERY_STATUS_SUBMIT - QUERY_STATUS_START - QUERY_STATUS_END - QUERY_STATUS_DONE -(4 rows) - -DROP FUNCTION print_last_query(text); -DROP EXTENSION gp_stats_collector; -RESET gpsc.enable; -RESET gpsc.enable_utility; -RESET gpsc.ignored_users_list; -RESET gpsc.logging_mode; diff --git a/gpcontrib/gp_stats_collector/results/gpsc_locale.out b/gpcontrib/gp_stats_collector/results/gpsc_locale.out deleted file mode 100644 index a01fe0648b9..00000000000 --- a/gpcontrib/gp_stats_collector/results/gpsc_locale.out +++ /dev/null @@ -1,23 +0,0 @@ --- The extension generates normalized query text and plan using jumbling functions. --- Those functions may fail when translating to wide character if the current locale --- cannot handle the character set. This test checks that even when those functions --- fail, the plan is still generated and executed. This test is partially taken from --- gp_locale. --- start_ignore -DROP DATABASE IF EXISTS gpsc_test_locale; --- end_ignore -CREATE DATABASE gpsc_test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; -\c gpsc_test_locale -CREATE EXTENSION gp_stats_collector; -SET gpsc.ignored_users_list TO ''; -SET gpsc.enable_utility TO TRUE; -SET gpsc.enable TO TRUE; -CREATE TABLE gpsc_hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); -INSERT INTO gpsc_hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); --- Should not see error here -UPDATE gpsc_hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; -RESET gpsc.enable; -RESET gpsc.enable_utility; -RESET gpsc.ignored_users_list; -DROP TABLE gpsc_hi_안녕세계; -DROP EXTENSION gp_stats_collector; diff --git a/gpcontrib/gp_stats_collector/results/gpsc_select.out b/gpcontrib/gp_stats_collector/results/gpsc_select.out deleted file mode 100644 index 3008c8f6d55..00000000000 --- a/gpcontrib/gp_stats_collector/results/gpsc_select.out +++ /dev/null @@ -1,136 +0,0 @@ -CREATE EXTENSION gp_stats_collector; -CREATE OR REPLACE FUNCTION gpsc_status_order(status text) -RETURNS integer -AS $$ -BEGIN - RETURN CASE status - WHEN 'QUERY_STATUS_SUBMIT' THEN 1 - WHEN 'QUERY_STATUS_START' THEN 2 - WHEN 'QUERY_STATUS_END' THEN 3 - WHEN 'QUERY_STATUS_DONE' THEN 4 - ELSE 999 - END; -END; -$$ LANGUAGE plpgsql IMMUTABLE; -SET gpsc.ignored_users_list TO ''; -SET gpsc.enable TO TRUE; -SET gpsc.report_nested_queries TO TRUE; -SET gpsc.enable_utility TO FALSE; --- Basic SELECT tests -SET gpsc.logging_mode to 'TBL'; -SELECT 1; - ?column? ----------- - 1 -(1 row) - -SELECT COUNT(*) FROM generate_series(1,10); - count -------- - 10 -(1 row) - -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+---------------------------------------------+--------------------- - -1 | SELECT 1; | QUERY_STATUS_SUBMIT - -1 | SELECT 1; | QUERY_STATUS_START - -1 | SELECT 1; | QUERY_STATUS_END - -1 | SELECT 1; | QUERY_STATUS_DONE - -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_SUBMIT - -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_START - -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_END - -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_DONE -(8 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - --- Transaction test -SET gpsc.logging_mode to 'TBL'; -BEGIN; -SELECT 1; - ?column? ----------- - 1 -(1 row) - -COMMIT; -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+------------+--------------------- - -1 | SELECT 1; | QUERY_STATUS_SUBMIT - -1 | SELECT 1; | QUERY_STATUS_START - -1 | SELECT 1; | QUERY_STATUS_END - -1 | SELECT 1; | QUERY_STATUS_DONE -(4 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - --- CTE test -SET gpsc.logging_mode to 'TBL'; -WITH t AS (VALUES (1), (2)) -SELECT * FROM t; - column1 ---------- - 1 - 2 -(2 rows) - -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+-----------------------------+--------------------- - -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_SUBMIT - | SELECT * FROM t; | - -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_START - | SELECT * FROM t; | - -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_END - | SELECT * FROM t; | - -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_DONE - | SELECT * FROM t; | -(4 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - --- Prepared statement test -SET gpsc.logging_mode to 'TBL'; -PREPARE test_stmt AS SELECT 1; -EXECUTE test_stmt; - ?column? ----------- - 1 -(1 row) - -DEALLOCATE test_stmt; -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+--------------------------------+--------------------- - -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_SUBMIT - -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_START - -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_END - -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_DONE -(4 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - -DROP FUNCTION gpsc_status_order(text); -DROP EXTENSION gp_stats_collector; -RESET gpsc.enable; -RESET gpsc.report_nested_queries; -RESET gpsc.enable_utility; -RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/results/gpsc_uds.out b/gpcontrib/gp_stats_collector/results/gpsc_uds.out deleted file mode 100644 index e8bca79e669..00000000000 --- a/gpcontrib/gp_stats_collector/results/gpsc_uds.out +++ /dev/null @@ -1,42 +0,0 @@ --- Test UDS socket --- start_ignore -CREATE EXTENSION IF NOT EXISTS gp_stats_collector; --- end_ignore -\set UDS_PATH '/tmp/gpsc_test.sock' --- Configure extension to send via UDS -SET gpsc.uds_path TO :'UDS_PATH'; -SET gpsc.ignored_users_list TO ''; -SET gpsc.enable TO TRUE; -SET gpsc.logging_mode TO 'UDS'; --- Start receiver -SELECT gpsc.__test_uds_start_server(:'UDS_PATH'); - __test_uds_start_server -------------------------- -(0 rows) - --- Send -SELECT 1; - ?column? ----------- - 1 -(1 row) - --- Receive -SELECT gpsc.__test_uds_receive() > 0 as received; - received ----------- - t -(1 row) - --- Stop receiver -SELECT gpsc.__test_uds_stop_server(); - __test_uds_stop_server ------------------------- -(0 rows) - --- Cleanup -DROP EXTENSION gp_stats_collector; -RESET gpsc.uds_path; -RESET gpsc.ignored_users_list; -RESET gpsc.enable; -RESET gpsc.logging_mode; diff --git a/gpcontrib/gp_stats_collector/results/gpsc_utf8_trim.out b/gpcontrib/gp_stats_collector/results/gpsc_utf8_trim.out deleted file mode 100644 index db3949f3152..00000000000 --- a/gpcontrib/gp_stats_collector/results/gpsc_utf8_trim.out +++ /dev/null @@ -1,68 +0,0 @@ -CREATE EXTENSION IF NOT EXISTS gp_stats_collector; -CREATE OR REPLACE FUNCTION get_marked_query(marker TEXT) -RETURNS TEXT AS $$ - SELECT query_text - FROM gpsc.log - WHERE query_text LIKE '%' || marker || '%' - ORDER BY datetime DESC - LIMIT 1 -$$ LANGUAGE sql VOLATILE; -SET gpsc.ignored_users_list TO ''; -SET gpsc.enable TO TRUE; --- Test 1: 1 byte chars -SET gpsc.max_text_size to 19; -SET gpsc.logging_mode to 'TBL'; -SELECT /*test1*/ 'HelloWorld'; - ?column? ------------- - HelloWorld -(1 row) - -RESET gpsc.logging_mode; -SELECT octet_length(get_marked_query('test1')) = 19 AS correct_length; - correct_length ----------------- - t -(1 row) - --- Test 2: 2 byte chars -SET gpsc.max_text_size to 19; -SET gpsc.logging_mode to 'TBL'; -SELECT /*test2*/ 'РУССКИЙЯЗЫК'; - ?column? -------------- - РУССКИЙЯЗЫК -(1 row) - -RESET gpsc.logging_mode; --- Character 'Р' has two bytes and cut in the middle => not included. -SELECT octet_length(get_marked_query('test2')) = 18 AS correct_length; - correct_length ----------------- - t -(1 row) - --- Test 3: 4 byte chars -SET gpsc.max_text_size to 21; -SET gpsc.logging_mode to 'TBL'; -SELECT /*test3*/ '😀'; - ?column? ----------- - 😀 -(1 row) - -RESET gpsc.logging_mode; --- Emoji has 4 bytes and cut before the last byte => not included. -SELECT octet_length(get_marked_query('test3')) = 18 AS correct_length; - correct_length ----------------- - t -(1 row) - --- Cleanup -DROP FUNCTION get_marked_query(TEXT); -RESET gpsc.max_text_size; -RESET gpsc.logging_mode; -RESET gpsc.enable; -RESET gpsc.ignored_users_list; -DROP EXTENSION gp_stats_collector; diff --git a/gpcontrib/gp_stats_collector/results/gpsc_utility.out b/gpcontrib/gp_stats_collector/results/gpsc_utility.out deleted file mode 100644 index e8e28614370..00000000000 --- a/gpcontrib/gp_stats_collector/results/gpsc_utility.out +++ /dev/null @@ -1,248 +0,0 @@ -CREATE EXTENSION gp_stats_collector; -CREATE OR REPLACE FUNCTION gpsc_status_order(status text) -RETURNS integer -AS $$ -BEGIN - RETURN CASE status - WHEN 'QUERY_STATUS_SUBMIT' THEN 1 - WHEN 'QUERY_STATUS_START' THEN 2 - WHEN 'QUERY_STATUS_END' THEN 3 - WHEN 'QUERY_STATUS_DONE' THEN 4 - ELSE 999 - END; -END; -$$ LANGUAGE plpgsql IMMUTABLE; -SET gpsc.ignored_users_list TO ''; -SET gpsc.enable TO TRUE; -SET gpsc.enable_utility TO TRUE; -SET gpsc.report_nested_queries TO TRUE; -SET gpsc.logging_mode to 'TBL'; -CREATE TABLE test_table (a int, b text); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX test_idx ON test_table(a); -ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; -DROP TABLE test_table; -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+----------------------------------------------------+--------------------- - -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_SUBMIT - -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_DONE - -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_SUBMIT - -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_DONE - -1 | ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; | QUERY_STATUS_SUBMIT - -1 | ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; | QUERY_STATUS_DONE - -1 | DROP TABLE test_table; | QUERY_STATUS_SUBMIT - -1 | DROP TABLE test_table; | QUERY_STATUS_DONE - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE -(10 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - --- Partitioning -SET gpsc.logging_mode to 'TBL'; -CREATE TABLE pt_test (a int, b int) -DISTRIBUTED BY (a) -PARTITION BY RANGE (a) -(START (0) END (100) EVERY (50)); -DROP TABLE pt_test; -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+-------------------------------------+--------------------- - -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT - | DISTRIBUTED BY (a) +| - | PARTITION BY RANGE (a) +| - | (START (0) END (100) EVERY (50)); | - -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE - | DISTRIBUTED BY (a) +| - | PARTITION BY RANGE (a) +| - | (START (0) END (100) EVERY (50)); | - -1 | DROP TABLE pt_test; | QUERY_STATUS_SUBMIT - -1 | DROP TABLE pt_test; | QUERY_STATUS_DONE - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE -(6 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - --- Views and Functions -SET gpsc.logging_mode to 'TBL'; -CREATE VIEW test_view AS SELECT 1 AS a; -CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; -DROP VIEW test_view; -DROP FUNCTION test_func(int); -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+------------------------------------------------------------------------------------+--------------------- - -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_SUBMIT - -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_DONE - -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_SUBMIT - -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_DONE - -1 | DROP VIEW test_view; | QUERY_STATUS_SUBMIT - -1 | DROP VIEW test_view; | QUERY_STATUS_DONE - -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_SUBMIT - -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_DONE - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE -(10 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - --- Transaction Operations -SET gpsc.logging_mode to 'TBL'; -BEGIN; -SAVEPOINT sp1; -ROLLBACK TO sp1; -COMMIT; -BEGIN; -SAVEPOINT sp2; -ABORT; -BEGIN; -ROLLBACK; -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+--------------------------+--------------------- - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE - -1 | COMMIT; | QUERY_STATUS_SUBMIT - -1 | COMMIT; | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT - -1 | ABORT; | QUERY_STATUS_SUBMIT - -1 | ABORT; | QUERY_STATUS_DONE - -1 | BEGIN; | QUERY_STATUS_SUBMIT - -1 | BEGIN; | QUERY_STATUS_DONE - -1 | ROLLBACK; | QUERY_STATUS_SUBMIT - -1 | ROLLBACK; | QUERY_STATUS_DONE - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE -(18 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - --- DML Operations -SET gpsc.logging_mode to 'TBL'; -CREATE TABLE dml_test (a int, b text); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -INSERT INTO dml_test VALUES (1, 'test'); -UPDATE dml_test SET b = 'updated' WHERE a = 1; -DELETE FROM dml_test WHERE a = 1; -DROP TABLE dml_test; -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+----------------------------------------+--------------------- - -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_SUBMIT - -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_DONE - -1 | DROP TABLE dml_test; | QUERY_STATUS_SUBMIT - -1 | DROP TABLE dml_test; | QUERY_STATUS_DONE - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE -(6 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - --- COPY Operations -SET gpsc.logging_mode to 'TBL'; -CREATE TABLE copy_test (a int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -COPY (SELECT 1) TO STDOUT; -1 -DROP TABLE copy_test; -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+---------------------------------+--------------------- - -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_SUBMIT - -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_DONE - -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_SUBMIT - -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_DONE - -1 | DROP TABLE copy_test; | QUERY_STATUS_SUBMIT - -1 | DROP TABLE copy_test; | QUERY_STATUS_DONE - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE -(8 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - --- Prepared Statements and error during execute -SET gpsc.logging_mode to 'TBL'; -PREPARE test_prep(int) AS SELECT $1/0 AS value; -EXECUTE test_prep(0::int); -ERROR: division by zero -DEALLOCATE test_prep; -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+-------------------------------------------------+--------------------- - -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_SUBMIT - -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_DONE - -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_SUBMIT - -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_ERROR - -1 | DEALLOCATE test_prep; | QUERY_STATUS_SUBMIT - -1 | DEALLOCATE test_prep; | QUERY_STATUS_DONE - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE -(8 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - --- GUC Settings -SET gpsc.logging_mode to 'TBL'; -SET gpsc.report_nested_queries TO FALSE; -RESET gpsc.report_nested_queries; -RESET gpsc.logging_mode; -SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; - segid | query_text | query_status --------+------------------------------------------+--------------------- - -1 | SET gpsc.report_nested_queries TO FALSE; | QUERY_STATUS_SUBMIT - -1 | SET gpsc.report_nested_queries TO FALSE; | QUERY_STATUS_DONE - -1 | RESET gpsc.report_nested_queries; | QUERY_STATUS_SUBMIT - -1 | RESET gpsc.report_nested_queries; | QUERY_STATUS_DONE - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT - -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE -(6 rows) - -SELECT gpsc.truncate_log() IS NOT NULL AS t; - t ---- -(0 rows) - -DROP FUNCTION gpsc_status_order(text); -DROP EXTENSION gp_stats_collector; -RESET gpsc.enable; -RESET gpsc.report_nested_queries; -RESET gpsc.enable_utility; -RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/src/Config.cpp b/gpcontrib/gp_stats_collector/src/Config.cpp index 2f40b30e922..08a8d8cff86 100644 --- a/gpcontrib/gp_stats_collector/src/Config.cpp +++ b/gpcontrib/gp_stats_collector/src/Config.cpp @@ -26,11 +26,11 @@ */ #include "Config.h" -#include "memory/gpdbwrappers.h" #include #include #include #include +#include "memory/gpdbwrappers.h" extern "C" { #include "postgres.h" @@ -43,135 +43,149 @@ static bool guc_enable_cdbstats = true; static bool guc_enable_collector = false; static bool guc_report_nested_queries = true; static char *guc_ignored_users = nullptr; -static int guc_max_text_size = 1 << 20; // in bytes (1MB) -static int guc_max_plan_size = 1024; // in KB -static int guc_min_analyze_time = 10000; // in ms +static int guc_max_text_size = 1 << 20; // in bytes (1MB) +static int guc_max_plan_size = 1024; // in KB +static int guc_min_analyze_time = 10000; // in ms static int guc_logging_mode = LOG_MODE_UDS; static bool guc_enable_utility = false; static const struct config_enum_entry logging_mode_options[] = { - {"uds", LOG_MODE_UDS, false /* hidden */}, - {"tbl", LOG_MODE_TBL, false}, - {NULL, 0, false}}; + {"uds", LOG_MODE_UDS, false /* hidden */}, + {"tbl", LOG_MODE_TBL, false}, + {NULL, 0, false}}; static bool ignored_users_guc_dirty = false; -static void assign_ignored_users_hook(const char *, void *) { - ignored_users_guc_dirty = true; +static void +assign_ignored_users_hook(const char *, void *) +{ + ignored_users_guc_dirty = true; } -void Config::init_gucs() { - DefineCustomStringVariable( - "gpsc.uds_path", "Sets filesystem path of the agent socket", 0LL, - &guc_uds_path, "/tmp/gpsc_agent.sock", PGC_SUSET, - GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); - - DefineCustomBoolVariable( - "gpsc.enable", "Enable metrics collector", 0LL, &guc_enable_collector, - false, PGC_SUSET, GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); - - DefineCustomBoolVariable( - "gpsc.enable_analyze", "Collect analyze metrics in gpsc", 0LL, - &guc_enable_analyze, true, PGC_SUSET, - GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); - - DefineCustomBoolVariable( - "gpsc.enable_cdbstats", "Collect CDB metrics in gpsc", 0LL, - &guc_enable_cdbstats, true, PGC_SUSET, - GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); - - DefineCustomBoolVariable( - "gpsc.report_nested_queries", "Collect stats on nested queries", 0LL, - &guc_report_nested_queries, true, PGC_USERSET, - GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); - - DefineCustomStringVariable("gpsc.ignored_users_list", - "Make gpsc ignore queries issued by given users", - 0LL, &guc_ignored_users, - "", PGC_SUSET, - GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, - assign_ignored_users_hook, 0LL); - - DefineCustomIntVariable( - "gpsc.max_text_size", - "Make gpsc trim query texts longer than configured size in bytes", NULL, - &guc_max_text_size, 1 << 20 /* 1MB */, 0, INT_MAX, PGC_SUSET, - GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); - - DefineCustomIntVariable( - "gpsc.max_plan_size", - "Make gpsc trim plan longer than configured size", NULL, - &guc_max_plan_size, 1024, 0, INT_MAX / 1024, PGC_SUSET, - GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_KB, NULL, NULL, NULL); - - DefineCustomIntVariable( - "gpsc.min_analyze_time", - "Sets the minimum execution time above which plans will be logged.", - "Zero prints all plans. -1 turns this feature off.", - &guc_min_analyze_time, 10000, -1, INT_MAX, PGC_USERSET, - GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_MS, NULL, NULL, NULL); - - DefineCustomEnumVariable( - "gpsc.logging_mode", "Logging mode: UDS or PG Table", NULL, - &guc_logging_mode, LOG_MODE_UDS, logging_mode_options, PGC_SUSET, - GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_SUPERUSER_ONLY, NULL, NULL, - NULL); - - DefineCustomBoolVariable( - "gpsc.enable_utility", "Collect utility statement stats", NULL, - &guc_enable_utility, false, PGC_USERSET, - GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); +void +Config::init_gucs() +{ + DefineCustomStringVariable( + "gpsc.uds_path", "Sets filesystem path of the agent socket", 0LL, + &guc_uds_path, "/tmp/gpsc_agent.sock", PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable("gpsc.enable", "Enable metrics collector", 0LL, + &guc_enable_collector, false, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, + 0LL); + + DefineCustomBoolVariable( + "gpsc.enable_analyze", "Collect analyze metrics in gpsc", 0LL, + &guc_enable_analyze, true, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable( + "gpsc.enable_cdbstats", "Collect CDB metrics in gpsc", 0LL, + &guc_enable_cdbstats, true, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable( + "gpsc.report_nested_queries", "Collect stats on nested queries", 0LL, + &guc_report_nested_queries, true, PGC_USERSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomStringVariable("gpsc.ignored_users_list", + "Make gpsc ignore queries issued by given users", + 0LL, &guc_ignored_users, "", PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, + assign_ignored_users_hook, 0LL); + + DefineCustomIntVariable( + "gpsc.max_text_size", + "Make gpsc trim query texts longer than configured size in bytes", NULL, + &guc_max_text_size, 1 << 20 /* 1MB */, 0, INT_MAX, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); + + DefineCustomIntVariable( + "gpsc.max_plan_size", "Make gpsc trim plan longer than configured size", + NULL, &guc_max_plan_size, 1024, 0, INT_MAX / 1024, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_KB, NULL, NULL, NULL); + + DefineCustomIntVariable( + "gpsc.min_analyze_time", + "Sets the minimum execution time above which plans will be logged.", + "Zero prints all plans. -1 turns this feature off.", + &guc_min_analyze_time, 10000, -1, INT_MAX, PGC_USERSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_MS, NULL, NULL, NULL); + + DefineCustomEnumVariable( + "gpsc.logging_mode", "Logging mode: UDS or PG Table", NULL, + &guc_logging_mode, LOG_MODE_UDS, logging_mode_options, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_SUPERUSER_ONLY, NULL, NULL, + NULL); + + DefineCustomBoolVariable( + "gpsc.enable_utility", "Collect utility statement stats", NULL, + &guc_enable_utility, false, PGC_USERSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); } -void Config::update_ignored_users(const char *new_guc_ignored_users) { - auto new_ignored_users_set = std::make_unique(); - if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') { - /* Need a modifiable copy of string */ - char *rawstring = gpdb::pstrdup(new_guc_ignored_users); - List *elemlist; - ListCell *l; - - /* Parse string into list of identifiers */ - if (!gpdb::split_identifier_string(rawstring, ',', &elemlist)) { - /* syntax error in list */ - gpdb::pfree(rawstring); - gpdb::list_free(elemlist); - ereport( - LOG, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg( - "invalid list syntax in parameter gpsc.ignored_users_list"))); - return; - } - foreach (l, elemlist) { - new_ignored_users_set->insert((char *)lfirst(l)); - } - gpdb::pfree(rawstring); - gpdb::list_free(elemlist); - } - ignored_users_ = std::move(new_ignored_users_set); +void +Config::update_ignored_users(const char *new_guc_ignored_users) +{ + auto new_ignored_users_set = std::make_unique(); + if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') + { + /* Need a modifiable copy of string */ + char *rawstring = gpdb::pstrdup(new_guc_ignored_users); + List *elemlist; + ListCell *l; + + /* Parse string into list of identifiers */ + if (!gpdb::split_identifier_string(rawstring, ',', &elemlist)) + { + /* syntax error in list */ + gpdb::pfree(rawstring); + gpdb::list_free(elemlist); + ereport( + LOG, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg( + "invalid list syntax in parameter gpsc.ignored_users_list"))); + return; + } + foreach (l, elemlist) + { + new_ignored_users_set->insert((char *) lfirst(l)); + } + gpdb::pfree(rawstring); + gpdb::list_free(elemlist); + } + ignored_users_ = std::move(new_ignored_users_set); } -bool Config::filter_user(const std::string &username) const { - if (!ignored_users_) { - return true; - } - return ignored_users_->find(username) != ignored_users_->end(); +bool +Config::filter_user(const std::string &username) const +{ + if (!ignored_users_) + { + return true; + } + return ignored_users_->find(username) != ignored_users_->end(); } -void Config::sync() { - if (ignored_users_guc_dirty) { - update_ignored_users(guc_ignored_users); - ignored_users_guc_dirty = false; - } - uds_path_ = guc_uds_path; - enable_analyze_ = guc_enable_analyze; - enable_cdbstats_ = guc_enable_cdbstats; - enable_collector_ = guc_enable_collector; - enable_utility_ = guc_enable_utility; - report_nested_queries_ = guc_report_nested_queries; - max_text_size_ = guc_max_text_size; - max_plan_size_ = guc_max_plan_size; - min_analyze_time_ = guc_min_analyze_time; - logging_mode_ = guc_logging_mode; +void +Config::sync() +{ + if (ignored_users_guc_dirty) + { + update_ignored_users(guc_ignored_users); + ignored_users_guc_dirty = false; + } + uds_path_ = guc_uds_path; + enable_analyze_ = guc_enable_analyze; + enable_cdbstats_ = guc_enable_cdbstats; + enable_collector_ = guc_enable_collector; + enable_utility_ = guc_enable_utility; + report_nested_queries_ = guc_report_nested_queries; + max_text_size_ = guc_max_text_size; + max_plan_size_ = guc_max_plan_size; + min_analyze_time_ = guc_min_analyze_time; + logging_mode_ = guc_logging_mode; } diff --git a/gpcontrib/gp_stats_collector/src/Config.h b/gpcontrib/gp_stats_collector/src/Config.h index 91a1ffe44f2..259799e5135 100644 --- a/gpcontrib/gp_stats_collector/src/Config.h +++ b/gpcontrib/gp_stats_collector/src/Config.h @@ -25,7 +25,8 @@ *------------------------------------------------------------------------- */ -#pragma once +#ifndef CONFIG_H +#define CONFIG_H #include #include @@ -36,36 +37,79 @@ using IgnoredUsers = std::unordered_set; -class Config { +class Config +{ public: - static void init_gucs(); + static void init_gucs(); - void sync(); + void sync(); - const std::string &uds_path() const { return uds_path_; } - bool enable_analyze() const { return enable_analyze_; } - bool enable_cdbstats() const { return enable_cdbstats_; } - bool enable_collector() const { return enable_collector_; } - bool enable_utility() const { return enable_utility_; } - bool report_nested_queries() const { return report_nested_queries_; } - int max_text_size() const { return max_text_size_; } - int max_plan_size() const { return max_plan_size_ * 1024; } - int min_analyze_time() const { return min_analyze_time_; } - int logging_mode() const { return logging_mode_; } - bool filter_user(const std::string &username) const; + const std::string & + uds_path() const + { + return uds_path_; + } + bool + enable_analyze() const + { + return enable_analyze_; + } + bool + enable_cdbstats() const + { + return enable_cdbstats_; + } + bool + enable_collector() const + { + return enable_collector_; + } + bool + enable_utility() const + { + return enable_utility_; + } + bool + report_nested_queries() const + { + return report_nested_queries_; + } + int + max_text_size() const + { + return max_text_size_; + } + int + max_plan_size() const + { + return max_plan_size_ * 1024; + } + int + min_analyze_time() const + { + return min_analyze_time_; + } + int + logging_mode() const + { + return logging_mode_; + } + bool filter_user(const std::string &username) const; private: - void update_ignored_users(const char *new_guc_ignored_users); + void update_ignored_users(const char *new_guc_ignored_users); - std::unique_ptr ignored_users_; - std::string uds_path_; - bool enable_analyze_; - bool enable_cdbstats_; - bool enable_collector_; - bool enable_utility_; - bool report_nested_queries_; - int max_text_size_; - int max_plan_size_; - int min_analyze_time_; - int logging_mode_; + std::unique_ptr ignored_users_; + std::string uds_path_; + bool enable_analyze_; + bool enable_cdbstats_; + bool enable_collector_; + bool enable_utility_; + bool report_nested_queries_; + int max_text_size_; + int max_plan_size_; + int min_analyze_time_; + int logging_mode_; }; + +#endif /* CONFIG_H */ diff --git a/gpcontrib/gp_stats_collector/src/EventSender.cpp b/gpcontrib/gp_stats_collector/src/EventSender.cpp index c0faaf0ad0e..0bc44c1198d 100644 --- a/gpcontrib/gp_stats_collector/src/EventSender.cpp +++ b/gpcontrib/gp_stats_collector/src/EventSender.cpp @@ -26,8 +26,8 @@ */ #include "UDSConnector.h" -#include "memory/gpdbwrappers.h" #include "log/LogOps.h" +#include "memory/gpdbwrappers.h" #define typeid __typeid extern "C" { @@ -47,487 +47,599 @@ extern "C" { #include "PgUtils.h" #include "ProtoUtils.h" -#define need_collect_analyze() \ - (Gp_role == GP_ROLE_DISPATCH && config.min_analyze_time() >= 0 && \ - config.enable_analyze()) - -bool EventSender::verify_query(QueryDesc *query_desc, QueryState state, - bool utility) { - if (!proto_verified) { - return false; - } - if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) { - return false; - } - - switch (state) { - case QueryState::SUBMIT: - // Cache GUCs once at SUBMIT. Synced GUCs are visible to all subsequent - // states. Without caching, a query that unsets/sets filtering GUCs would - // see different filter criteria at DONE, because at SUBMIT the query was - // not executed yet, causing DONE to be skipped/added. - config.sync(); - - if (!config.enable_collector()) { - return false; - } - - if (utility && !config.enable_utility()) { - return false; - } - - // Register qkey for a nested query we won't report, - // so we can detect nesting_level > 0 and skip reporting at end/done. - if (!need_report_nested_query() && nesting_level > 0) { - QueryKey::register_qkey(query_desc, nesting_level); - return false; - } - if (is_top_level_query(query_desc, nesting_level)) { - nested_timing = 0; - nested_calls = 0; - } - break; - case QueryState::START: - if (!qdesc_submitted(query_desc)) { - collect_query_submit(query_desc, false /* utility */); - } - break; - case QueryState::DONE: - if (utility && !config.enable_utility()) { - return false; - } - default: - break; - } - - if (filter_query(query_desc)) { - return false; - } - if (!nesting_is_valid(query_desc, nesting_level)) { - return false; - } - - return true; +#define need_collect_analyze() \ + (Gp_role == GP_ROLE_DISPATCH && config.min_analyze_time() >= 0 && \ + config.enable_analyze()) + +bool +EventSender::verify_query(QueryDesc *query_desc, QueryState state, bool utility) +{ + if (!proto_verified) + { + return false; + } + if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) + { + return false; + } + + switch (state) + { + case QueryState::SUBMIT: + // Cache GUCs once at SUBMIT. Synced GUCs are visible to all subsequent + // states. Without caching, a query that unsets/sets filtering GUCs would + // see different filter criteria at DONE, because at SUBMIT the query was + // not executed yet, causing DONE to be skipped/added. + config.sync(); + + if (!config.enable_collector()) + { + return false; + } + + if (utility && !config.enable_utility()) + { + return false; + } + + // Register qkey for a nested query we won't report, + // so we can detect nesting_level > 0 and skip reporting at end/done. + if (!need_report_nested_query() && nesting_level > 0) + { + QueryKey::register_qkey(query_desc, nesting_level); + return false; + } + if (is_top_level_query(query_desc, nesting_level)) + { + nested_timing = 0; + nested_calls = 0; + } + break; + case QueryState::START: + if (!qdesc_submitted(query_desc)) + { + collect_query_submit(query_desc, false /* utility */); + } + break; + case QueryState::DONE: + if (utility && !config.enable_utility()) + { + return false; + } + default: + break; + } + + if (filter_query(query_desc)) + { + return false; + } + if (!nesting_is_valid(query_desc, nesting_level)) + { + return false; + } + + return true; } -bool EventSender::log_query_req(const gpsc::SetQueryReq &req, - const std::string &event, bool utility) { - bool clear_big_fields = false; - switch (config.logging_mode()) { - case LOG_MODE_UDS: - clear_big_fields = UDSConnector::report_query(req, event, config); - break; - case LOG_MODE_TBL: - gpdb::insert_log(req, utility); - clear_big_fields = false; - break; - default: - Assert(false); - } - return clear_big_fields; +bool +EventSender::log_query_req(const gpsc::SetQueryReq &req, + const std::string &event, bool utility) +{ + bool clear_big_fields = false; + switch (config.logging_mode()) + { + case LOG_MODE_UDS: + clear_big_fields = UDSConnector::report_query(req, event, config); + break; + case LOG_MODE_TBL: + gpdb::insert_log(req, utility); + clear_big_fields = false; + break; + default: + Assert(false); + } + return clear_big_fields; } -void EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg, - bool utility, ErrorData *edata) { - auto *query_desc = reinterpret_cast(arg); - switch (status) { - case METRICS_PLAN_NODE_INITIALIZE: - case METRICS_PLAN_NODE_EXECUTING: - case METRICS_PLAN_NODE_FINISHED: - // TODO - break; - case METRICS_QUERY_SUBMIT: - collect_query_submit(query_desc, utility); - break; - case METRICS_QUERY_START: - // no-op: executor_after_start is enough - break; - case METRICS_QUERY_CANCELING: - // it appears we're only interested in the actual CANCELED event. - // for now we will ignore CANCELING state unless otherwise requested from - // end users - break; - case METRICS_QUERY_DONE: - case METRICS_QUERY_ERROR: - case METRICS_QUERY_CANCELED: - case METRICS_INNER_QUERY_DONE: - collect_query_done(query_desc, utility, status, edata); - break; - default: - ereport(FATAL, (errmsg("Unknown query status: %d", status))); - } +void +EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg, + bool utility, ErrorData *edata) +{ + auto *query_desc = reinterpret_cast(arg); + switch (status) + { + case METRICS_PLAN_NODE_INITIALIZE: + case METRICS_PLAN_NODE_EXECUTING: + case METRICS_PLAN_NODE_FINISHED: + // TODO + break; + case METRICS_QUERY_SUBMIT: + collect_query_submit(query_desc, utility); + break; + case METRICS_QUERY_START: + // no-op: executor_after_start is enough + break; + case METRICS_QUERY_CANCELING: + // it appears we're only interested in the actual CANCELED event. + // for now we will ignore CANCELING state unless otherwise requested from + // end users + break; + case METRICS_QUERY_DONE: + case METRICS_QUERY_ERROR: + case METRICS_QUERY_CANCELED: + case METRICS_INNER_QUERY_DONE: + collect_query_done(query_desc, utility, status, edata); + break; + default: + ereport(ERROR, (errmsg("Unknown query status: %d", status))); + } } -void EventSender::executor_before_start(QueryDesc *query_desc, int eflags) { - if (!verify_query(query_desc, QueryState::START, false /* utility*/)) { - return; - } - - if (Gp_role == GP_ROLE_DISPATCH && config.enable_analyze() && - (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0) { - query_desc->instrument_options |= INSTRUMENT_BUFFERS; - query_desc->instrument_options |= INSTRUMENT_ROWS; - query_desc->instrument_options |= INSTRUMENT_TIMER; - if (config.enable_cdbstats()) { - query_desc->instrument_options |= INSTRUMENT_CDB; - if (!query_desc->showstatctx) { - instr_time starttime; - INSTR_TIME_SET_CURRENT(starttime); - query_desc->showstatctx = - gpdb::cdbexplain_showExecStatsBegin(query_desc, starttime); - } - } - } +void +EventSender::executor_before_start(QueryDesc *query_desc, int eflags) +{ + if (!verify_query(query_desc, QueryState::START, false /* utility*/)) + { + return; + } + + if (Gp_role == GP_ROLE_DISPATCH && config.enable_analyze() && + (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0) + { + query_desc->instrument_options |= INSTRUMENT_BUFFERS; + query_desc->instrument_options |= INSTRUMENT_ROWS; + query_desc->instrument_options |= INSTRUMENT_TIMER; + if (config.enable_cdbstats()) + { + query_desc->instrument_options |= INSTRUMENT_CDB; + if (!query_desc->showstatctx) + { + instr_time starttime; + INSTR_TIME_SET_CURRENT(starttime); + query_desc->showstatctx = + gpdb::cdbexplain_showExecStatsBegin(query_desc, starttime); + } + } + } } -void EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) { - if (!verify_query(query_desc, QueryState::START, false /* utility */)) { - return; - } - - auto &query = get_query(query_desc); - auto query_msg = query.message.get(); - *query_msg->mutable_start_time() = current_ts(); - update_query_state(query, QueryState::START, false /* utility */); - set_query_plan(query_msg, query_desc, config); - if (need_collect_analyze()) { - // Set up to track total elapsed time during query run. - // Make sure the space is allocated in the per-query - // context so it will go away at executor_end. - if (query_desc->totaltime == NULL) { - MemoryContext oldcxt = - gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - query_desc->totaltime = gpdb::instr_alloc(1, INSTRUMENT_ALL, false); - gpdb::mem_ctx_switch_to(oldcxt); - } - } - gpsc::GPMetrics stats; - std::swap(stats, *query_msg->mutable_query_metrics()); - if (log_query_req(*query_msg, "started", false /* utility */)) { - clear_big_fields(query_msg); - } - std::swap(stats, *query_msg->mutable_query_metrics()); +void +EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) +{ + if (!verify_query(query_desc, QueryState::START, false /* utility */)) + { + return; + } + + auto &query = get_query(query_desc); + auto query_msg = query.message.get(); + *query_msg->mutable_start_time() = current_ts(); + update_query_state(query, QueryState::START, false /* utility */); + set_query_plan(query_msg, query_desc, config); + if (need_collect_analyze()) + { + // Set up to track total elapsed time during query run. + // Make sure the space is allocated in the per-query + // context so it will go away at executor_end. + if (query_desc->totaltime == NULL) + { + MemoryContext oldcxt = + gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + query_desc->totaltime = gpdb::instr_alloc(1, INSTRUMENT_ALL, false); + gpdb::mem_ctx_switch_to(oldcxt); + } + } + gpsc::GPMetrics stats; + std::swap(stats, *query_msg->mutable_query_metrics()); + if (log_query_req(*query_msg, "started", false /* utility */)) + { + clear_big_fields(query_msg); + } + std::swap(stats, *query_msg->mutable_query_metrics()); } -void EventSender::executor_end(QueryDesc *query_desc) { - if (!verify_query(query_desc, QueryState::END, false /* utility */)) { - return; - } - - auto &query = get_query(query_desc); - auto *query_msg = query.message.get(); - *query_msg->mutable_end_time() = current_ts(); - update_query_state(query, QueryState::END, false /* utility */); - if (is_top_level_query(query_desc, nesting_level)) { - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, nested_calls, - nested_timing); - } else { - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); - } - if (log_query_req(*query_msg, "ended", false /* utility */)) { - clear_big_fields(query_msg); - } +void +EventSender::executor_end(QueryDesc *query_desc) +{ + if (!verify_query(query_desc, QueryState::END, false /* utility */)) + { + return; + } + + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); + *query_msg->mutable_end_time() = current_ts(); + update_query_state(query, QueryState::END, false /* utility */); + if (is_top_level_query(query_desc, nesting_level)) + { + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, + nested_calls, nested_timing); + } + else + { + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); + } + if (log_query_req(*query_msg, "ended", false /* utility */)) + { + clear_big_fields(query_msg); + } } -void EventSender::collect_query_submit(QueryDesc *query_desc, bool utility) { - if (!verify_query(query_desc, QueryState::SUBMIT, utility)) { - return; - } - - submit_query(query_desc); - auto &query = get_query(query_desc); - auto *query_msg = query.message.get(); - *query_msg = create_query_req(gpsc::QueryStatus::QUERY_STATUS_SUBMIT); - *query_msg->mutable_submit_time() = current_ts(); - set_query_info(query_msg); - set_qi_nesting_level(query_msg, nesting_level); - set_qi_slice_id(query_msg); - set_query_text(query_msg, query_desc, config); - if (log_query_req(*query_msg, "submit", utility)) { - clear_big_fields(query_msg); - } - // take initial metrics snapshot so that we can safely take diff afterwards - // in END or DONE events. - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); +void +EventSender::collect_query_submit(QueryDesc *query_desc, bool utility) +{ + if (!verify_query(query_desc, QueryState::SUBMIT, utility)) + { + return; + } + + submit_query(query_desc); + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); + *query_msg = create_query_req(gpsc::QueryStatus::QUERY_STATUS_SUBMIT); + *query_msg->mutable_submit_time() = current_ts(); + set_query_info(query_msg); + set_qi_nesting_level(query_msg, nesting_level); + set_qi_slice_id(query_msg); + set_query_text(query_msg, query_desc, config); + if (log_query_req(*query_msg, "submit", utility)) + { + clear_big_fields(query_msg); + } + // take initial metrics snapshot so that we can safely take diff afterwards + // in END or DONE events. + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); #ifdef IC_TEARDOWN_HOOK - // same for interconnect statistics - ic_metrics_collect(); - set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), - &ic_statistics); + // same for interconnect statistics + ic_metrics_collect(); + set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), + &ic_statistics); #endif } -void EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, - QueryMetricsStatus status, bool utility, - ErrorData *edata) { - gpsc::QueryStatus query_status; - std::string msg; - switch (status) { - case METRICS_QUERY_DONE: - case METRICS_INNER_QUERY_DONE: - query_status = gpsc::QueryStatus::QUERY_STATUS_DONE; - msg = "done"; - break; - case METRICS_QUERY_ERROR: - query_status = gpsc::QueryStatus::QUERY_STATUS_ERROR; - msg = "error"; - break; - case METRICS_QUERY_CANCELING: - // at the moment we don't track this event, but I`ll leave this code - // here just in case - Assert(false); - query_status = gpsc::QueryStatus::QUERY_STATUS_CANCELLING; - msg = "cancelling"; - break; - case METRICS_QUERY_CANCELED: - query_status = gpsc::QueryStatus::QUERY_STATUS_CANCELED; - msg = "cancelled"; - break; - default: - ereport(FATAL, - (errmsg("Unexpected query status in query_done hook: %d", status))); - } - auto prev_state = query.state; - update_query_state(query, QueryState::DONE, utility, - query_status == gpsc::QueryStatus::QUERY_STATUS_DONE); - auto query_msg = query.message.get(); - query_msg->set_query_status(query_status); - if (status == METRICS_QUERY_ERROR) { - bool error_flushed = elog_message() == NULL; - if (error_flushed && (edata == NULL || edata->message == NULL)) { - ereport(WARNING, (errmsg("GPSC missing error message"))); - ereport(DEBUG3, - (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); - } else { - set_qi_error_message( - query_msg, error_flushed ? edata->message : elog_message(), config); - } - } - if (prev_state == START) { - // We've missed ExecutorEnd call due to query cancel or error. It's - // fine, but now we need to collect and report execution stats - *query_msg->mutable_end_time() = current_ts(); - set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, nested_calls, - nested_timing); - } +void +EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, + QueryMetricsStatus status, bool utility, + ErrorData *edata) +{ + gpsc::QueryStatus query_status; + std::string msg; + switch (status) + { + case METRICS_QUERY_DONE: + case METRICS_INNER_QUERY_DONE: + query_status = gpsc::QueryStatus::QUERY_STATUS_DONE; + msg = "done"; + break; + case METRICS_QUERY_ERROR: + query_status = gpsc::QueryStatus::QUERY_STATUS_ERROR; + msg = "error"; + break; + case METRICS_QUERY_CANCELING: + // at the moment we don't track this event, but I`ll leave this code + // here just in case + Assert(false); + query_status = gpsc::QueryStatus::QUERY_STATUS_CANCELLING; + msg = "cancelling"; + break; + case METRICS_QUERY_CANCELED: + query_status = gpsc::QueryStatus::QUERY_STATUS_CANCELED; + msg = "cancelled"; + break; + default: + ereport(ERROR, + (errmsg("Unexpected query status in query_done hook: %d", + status))); + } + auto prev_state = query.state; + update_query_state(query, QueryState::DONE, utility, + query_status == gpsc::QueryStatus::QUERY_STATUS_DONE); + auto query_msg = query.message.get(); + query_msg->set_query_status(query_status); + if (status == METRICS_QUERY_ERROR) + { + bool error_flushed = elog_message() == NULL; + if (error_flushed && (edata == NULL || edata->message == NULL)) + { + ereport(WARNING, (errmsg("GPSC missing error message"))); + ereport(DEBUG3, (errmsg("GPSC query sourceText: %s", + query_desc->sourceText))); + } + else + { + set_qi_error_message( + query_msg, error_flushed ? edata->message : elog_message(), + config); + } + } + if (prev_state == START) + { + // We've missed ExecutorEnd call due to query cancel or error. It's + // fine, but now we need to collect and report execution stats + *query_msg->mutable_end_time() = current_ts(); + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, + nested_calls, nested_timing); + } #ifdef IC_TEARDOWN_HOOK - ic_metrics_collect(); - set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), - &ic_statistics); + ic_metrics_collect(); + set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), + &ic_statistics); #endif - (void)log_query_req(*query_msg, msg, utility); + (void) log_query_req(*query_msg, msg, utility); } -void EventSender::collect_query_done(QueryDesc *query_desc, bool utility, - QueryMetricsStatus status, - ErrorData *edata) { - if (!verify_query(query_desc, QueryState::DONE, utility)) { - return; - } - - // Skip sending done message if query errored before submit. - if (!qdesc_submitted(query_desc)) { - if (status != METRICS_QUERY_ERROR) { - ereport(WARNING, (errmsg("GPSC trying to process DONE hook for " - "unsubmitted and unerrored query"))); - ereport(DEBUG3, - (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); - } - return; - } - - if (queries.empty()) { - ereport(WARNING, (errmsg("GPSC cannot find query to process DONE hook"))); - ereport(DEBUG3, - (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); - return; - } - auto &query = get_query(query_desc); - - report_query_done(query_desc, query, status, utility, edata); - - if (need_report_nested_query()) - update_nested_counters(query_desc); - - queries.erase(QueryKey::from_qdesc(query_desc)); - pfree(query_desc->gpsc_query_key); - query_desc->gpsc_query_key = NULL; +void +EventSender::collect_query_done(QueryDesc *query_desc, bool utility, + QueryMetricsStatus status, ErrorData *edata) +{ + if (!verify_query(query_desc, QueryState::DONE, utility)) + { + return; + } + + // Skip sending done message if query errored before submit. + if (!qdesc_submitted(query_desc)) + { + if (status != METRICS_QUERY_ERROR) + { + ereport(WARNING, (errmsg("GPSC trying to process DONE hook for " + "unsubmitted and unerrored query"))); + ereport(DEBUG3, (errmsg("GPSC query sourceText: %s", + query_desc->sourceText))); + } + return; + } + + if (queries.empty()) + { + ereport(WARNING, + (errmsg("GPSC cannot find query to process DONE hook"))); + ereport(DEBUG3, + (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); + return; + } + auto &query = get_query(query_desc); + + report_query_done(query_desc, query, status, utility, edata); + + if (need_report_nested_query()) + update_nested_counters(query_desc); + + queries.erase(QueryKey::from_qdesc(query_desc)); + pfree(query_desc->gpsc_query_key); + query_desc->gpsc_query_key = NULL; } -void EventSender::ic_metrics_collect() { +void +EventSender::ic_metrics_collect() +{ #ifdef IC_TEARDOWN_HOOK - if (Gp_interconnect_type != INTERCONNECT_TYPE_UDPIFC) { - return; - } - if (!proto_verified || gp_command_count == 0 || !config.enable_collector() || - config.filter_user(get_user_name())) { - return; - } - // we also would like to know nesting level here and filter queries BUT we - // don't have this kind of information from this callback. Will have to - // collect stats anyways and throw it away later, if necessary - auto metrics = UDPIFCGetICStats(); - ic_statistics.totalRecvQueueSize += metrics.totalRecvQueueSize; - ic_statistics.recvQueueSizeCountingTime += metrics.recvQueueSizeCountingTime; - ic_statistics.totalCapacity += metrics.totalCapacity; - ic_statistics.capacityCountingTime += metrics.capacityCountingTime; - ic_statistics.totalBuffers += metrics.totalBuffers; - ic_statistics.bufferCountingTime += metrics.bufferCountingTime; - ic_statistics.activeConnectionsNum += metrics.activeConnectionsNum; - ic_statistics.retransmits += metrics.retransmits; - ic_statistics.startupCachedPktNum += metrics.startupCachedPktNum; - ic_statistics.mismatchNum += metrics.mismatchNum; - ic_statistics.crcErrors += metrics.crcErrors; - ic_statistics.sndPktNum += metrics.sndPktNum; - ic_statistics.recvPktNum += metrics.recvPktNum; - ic_statistics.disorderedPktNum += metrics.disorderedPktNum; - ic_statistics.duplicatedPktNum += metrics.duplicatedPktNum; - ic_statistics.recvAckNum += metrics.recvAckNum; - ic_statistics.statusQueryMsgNum += metrics.statusQueryMsgNum; + if (Gp_interconnect_type != INTERCONNECT_TYPE_UDPIFC) + { + return; + } + if (!proto_verified || gp_command_count == 0 || + !config.enable_collector() || config.filter_user(get_user_name())) + { + return; + } + // we also would like to know nesting level here and filter queries BUT we + // don't have this kind of information from this callback. Will have to + // collect stats anyways and throw it away later, if necessary + auto metrics = UDPIFCGetICStats(); + ic_statistics.totalRecvQueueSize += metrics.totalRecvQueueSize; + ic_statistics.recvQueueSizeCountingTime += + metrics.recvQueueSizeCountingTime; + ic_statistics.totalCapacity += metrics.totalCapacity; + ic_statistics.capacityCountingTime += metrics.capacityCountingTime; + ic_statistics.totalBuffers += metrics.totalBuffers; + ic_statistics.bufferCountingTime += metrics.bufferCountingTime; + ic_statistics.activeConnectionsNum += metrics.activeConnectionsNum; + ic_statistics.retransmits += metrics.retransmits; + ic_statistics.startupCachedPktNum += metrics.startupCachedPktNum; + ic_statistics.mismatchNum += metrics.mismatchNum; + ic_statistics.crcErrors += metrics.crcErrors; + ic_statistics.sndPktNum += metrics.sndPktNum; + ic_statistics.recvPktNum += metrics.recvPktNum; + ic_statistics.disorderedPktNum += metrics.disorderedPktNum; + ic_statistics.duplicatedPktNum += metrics.duplicatedPktNum; + ic_statistics.recvAckNum += metrics.recvAckNum; + ic_statistics.statusQueryMsgNum += metrics.statusQueryMsgNum; #endif } -void EventSender::analyze_stats_collect(QueryDesc *query_desc) { - if (!verify_query(query_desc, QueryState::END, false /* utility */)) { - return; - } - if (Gp_role != GP_ROLE_DISPATCH) { - return; - } - if (!query_desc->totaltime || !need_collect_analyze()) { - return; - } - // Make sure stats accumulation is done. - // (Note: it's okay if several levels of hook all do this.) - gpdb::instr_end_loop(query_desc->totaltime); - - double ms = query_desc->totaltime->total * 1000.0; - if (ms >= config.min_analyze_time()) { - auto &query = get_query(query_desc); - auto *query_msg = query.message.get(); - set_analyze_plan_text(query_desc, query_msg, config); - } +void +EventSender::analyze_stats_collect(QueryDesc *query_desc) +{ + if (!verify_query(query_desc, QueryState::END, false /* utility */)) + { + return; + } + if (Gp_role != GP_ROLE_DISPATCH) + { + return; + } + if (!query_desc->totaltime || !need_collect_analyze()) + { + return; + } + // Make sure stats accumulation is done. + // (Note: it's okay if several levels of hook all do this.) + gpdb::instr_end_loop(query_desc->totaltime); + + double ms = query_desc->totaltime->total * 1000.0; + if (ms >= config.min_analyze_time()) + { + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); + set_analyze_plan_text(query_desc, query_msg, config); + } } -EventSender::EventSender() { - // Perform initial sync to get default GUC values - config.sync(); - - try { - GOOGLE_PROTOBUF_VERIFY_VERSION; - proto_verified = true; - } catch (const std::exception &e) { - ereport(INFO, (errmsg("GPSC protobuf version mismatch is detected %s", e.what()))); - } +EventSender::EventSender() +{ + // Perform initial sync to get default GUC values + config.sync(); + + try + { + GOOGLE_PROTOBUF_VERIFY_VERSION; + proto_verified = true; + } + catch (const std::exception &e) + { + ereport(INFO, (errmsg("GPSC protobuf version mismatch is detected %s", + e.what()))); + } #ifdef IC_TEARDOWN_HOOK - memset(&ic_statistics, 0, sizeof(ICStatistics)); + memset(&ic_statistics, 0, sizeof(ICStatistics)); #endif } -EventSender::~EventSender() { - for (const auto &[qkey, _] : queries) { - ereport(LOG, (errmsg("GPSC query with missing done event: " - "tmid=%d ssid=%d ccnt=%d nlvl=%d", - qkey.tmid, qkey.ssid, qkey.ccnt, qkey.nesting_level))); - } +EventSender::~EventSender() +{ + for (const auto &[qkey, _] : queries) + { + ereport(LOG, + (errmsg("GPSC query with missing done event: " + "tmid=%d ssid=%d ccnt=%d nlvl=%d", + qkey.tmid, qkey.ssid, qkey.ccnt, qkey.nesting_level))); + } } // That's basically a very simplistic state machine to fix or highlight any bugs // coming from GP -void EventSender::update_query_state(QueryItem &query, QueryState new_state, - bool utility, bool success) { - switch (new_state) { - case QueryState::SUBMIT: - Assert(false); - break; - case QueryState::START: - if (query.state == QueryState::SUBMIT) { - query.message->set_query_status(gpsc::QueryStatus::QUERY_STATUS_START); - } else { - Assert(false); - } - break; - case QueryState::END: - // Example of below assert triggering: CURSOR closes before ever being - // executed Assert(query->state == QueryState::START || - // IsAbortInProgress()); - query.message->set_query_status(gpsc::QueryStatus::QUERY_STATUS_END); - break; - case QueryState::DONE: - Assert(query.state == QueryState::END || !success || utility); - query.message->set_query_status(gpsc::QueryStatus::QUERY_STATUS_DONE); - break; - default: - Assert(false); - } - query.state = new_state; +void +EventSender::update_query_state(QueryItem &query, QueryState new_state, + bool utility, bool success) +{ + switch (new_state) + { + case QueryState::SUBMIT: + Assert(false); + break; + case QueryState::START: + if (query.state == QueryState::SUBMIT) + { + query.message->set_query_status( + gpsc::QueryStatus::QUERY_STATUS_START); + } + else + { + Assert(false); + } + break; + case QueryState::END: + // Example of below assert triggering: CURSOR closes before ever being + // executed Assert(query->state == QueryState::START || + // IsAbortInProgress()); + query.message->set_query_status( + gpsc::QueryStatus::QUERY_STATUS_END); + break; + case QueryState::DONE: + Assert(query.state == QueryState::END || !success || utility); + query.message->set_query_status( + gpsc::QueryStatus::QUERY_STATUS_DONE); + break; + default: + Assert(false); + } + query.state = new_state; } -EventSender::QueryItem &EventSender::get_query(QueryDesc *query_desc) { - if (!qdesc_submitted(query_desc)) { - ereport(WARNING, - (errmsg("GPSC attempting to get query that was not submitted"))); - ereport(DEBUG3, - (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); - throw std::runtime_error("Attempting to get query that was not submitted"); - } - return queries.find(QueryKey::from_qdesc(query_desc))->second; +EventSender::QueryItem & +EventSender::get_query(QueryDesc *query_desc) +{ + if (!qdesc_submitted(query_desc)) + { + ereport( + WARNING, + (errmsg("GPSC attempting to get query that was not submitted"))); + ereport(DEBUG3, + (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); + throw std::runtime_error( + "Attempting to get query that was not submitted"); + } + return queries.find(QueryKey::from_qdesc(query_desc))->second; } -void EventSender::submit_query(QueryDesc *query_desc) { - if (query_desc->gpsc_query_key) { - ereport(WARNING, - (errmsg("GPSC trying to submit already submitted query"))); - ereport(DEBUG3, - (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); - } - QueryKey::register_qkey(query_desc, nesting_level); - auto key = QueryKey::from_qdesc(query_desc); - auto [_, inserted] = queries.emplace(key, QueryItem(QueryState::SUBMIT)); - if (!inserted) { - ereport(WARNING, (errmsg("GPSC duplicate query submit detected"))); - ereport(DEBUG3, - (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); - } +void +EventSender::submit_query(QueryDesc *query_desc) +{ + if (query_desc->gpsc_query_key) + { + ereport(WARNING, + (errmsg("GPSC trying to submit already submitted query"))); + ereport(DEBUG3, + (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); + } + QueryKey::register_qkey(query_desc, nesting_level); + auto key = QueryKey::from_qdesc(query_desc); + auto [_, inserted] = queries.emplace(key, QueryItem(QueryState::SUBMIT)); + if (!inserted) + { + ereport(WARNING, (errmsg("GPSC duplicate query submit detected"))); + ereport(DEBUG3, + (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); + } } -void EventSender::update_nested_counters(QueryDesc *query_desc) { - if (!is_top_level_query(query_desc, nesting_level)) { - auto &query = get_query(query_desc); - nested_calls++; - double end_time = protots_to_double(query.message->end_time()); - double start_time = protots_to_double(query.message->start_time()); - if (end_time >= start_time) { - nested_timing += end_time - start_time; - } else { - ereport(WARNING, (errmsg("GPSC query start_time > end_time (%f > %f)", - start_time, end_time))); - ereport(DEBUG3, - (errmsg("GPSC nested query text %s", query_desc->sourceText))); - } - } +void +EventSender::update_nested_counters(QueryDesc *query_desc) +{ + if (!is_top_level_query(query_desc, nesting_level)) + { + auto &query = get_query(query_desc); + nested_calls++; + double end_time = protots_to_double(query.message->end_time()); + double start_time = protots_to_double(query.message->start_time()); + if (end_time >= start_time) + { + nested_timing += end_time - start_time; + } + else + { + ereport(WARNING, + (errmsg("GPSC query start_time > end_time (%f > %f)", + start_time, end_time))); + ereport(DEBUG3, (errmsg("GPSC nested query text %s", + query_desc->sourceText))); + } + } } -bool EventSender::qdesc_submitted(QueryDesc *query_desc) { - if (query_desc->gpsc_query_key == NULL) { - return false; - } - return queries.find(QueryKey::from_qdesc(query_desc)) != queries.end(); +bool +EventSender::qdesc_submitted(QueryDesc *query_desc) +{ + if (query_desc->gpsc_query_key == NULL) + { + return false; + } + return queries.find(QueryKey::from_qdesc(query_desc)) != queries.end(); } -bool EventSender::nesting_is_valid(QueryDesc *query_desc, int nesting_level) { - return need_report_nested_query() || - is_top_level_query(query_desc, nesting_level); +bool +EventSender::nesting_is_valid(QueryDesc *query_desc, int nesting_level) +{ + return need_report_nested_query() || + is_top_level_query(query_desc, nesting_level); } -bool EventSender::need_report_nested_query() { - return config.report_nested_queries() && Gp_role == GP_ROLE_DISPATCH; +bool +EventSender::need_report_nested_query() +{ + return config.report_nested_queries() && Gp_role == GP_ROLE_DISPATCH; } -bool EventSender::filter_query(QueryDesc *query_desc) { - return gp_command_count == 0 || query_desc->sourceText == nullptr || - !config.enable_collector() || config.filter_user(get_user_name()); +bool +EventSender::filter_query(QueryDesc *query_desc) +{ + return gp_command_count == 0 || query_desc->sourceText == nullptr || + !config.enable_collector() || config.filter_user(get_user_name()); } EventSender::QueryItem::QueryItem(QueryState st) - : message(std::make_unique()), state(st) {} + : message(std::make_unique()), state(st) +{ +} diff --git a/gpcontrib/gp_stats_collector/src/EventSender.h b/gpcontrib/gp_stats_collector/src/EventSender.h index 154c2c0dceb..2651a020593 100644 --- a/gpcontrib/gp_stats_collector/src/EventSender.h +++ b/gpcontrib/gp_stats_collector/src/EventSender.h @@ -25,11 +25,12 @@ *------------------------------------------------------------------------- */ -#pragma once +#ifndef EVENTSENDER_H +#define EVENTSENDER_H #include -#include #include +#include #define typeid __typeid extern "C" { @@ -40,12 +41,13 @@ extern "C" { } #undef typeid -#include "memory/gpdbwrappers.h" #include "Config.h" +#include "memory/gpdbwrappers.h" class UDSConnector; struct QueryDesc; -namespace gpsc { +namespace gpsc +{ class SetQueryReq; } @@ -53,116 +55,149 @@ class SetQueryReq; extern void gp_gettmid(int32 *); -struct QueryKey { - int tmid; - int ssid; - int ccnt; - int nesting_level; - uintptr_t query_desc_addr; - - bool operator==(const QueryKey &other) const { - return std::tie(tmid, ssid, ccnt, nesting_level, query_desc_addr) == - std::tie(other.tmid, other.ssid, other.ccnt, other.nesting_level, - other.query_desc_addr); - } - - static void register_qkey(QueryDesc *query_desc, size_t nesting_level) { - query_desc->gpsc_query_key = - (GpscQueryKey *)gpdb::palloc0(sizeof(GpscQueryKey)); - int32 tmid; - gp_gettmid(&tmid); - query_desc->gpsc_query_key->tmid = tmid; - query_desc->gpsc_query_key->ssid = gp_session_id; - query_desc->gpsc_query_key->ccnt = gp_command_count; - query_desc->gpsc_query_key->nesting_level = nesting_level; - query_desc->gpsc_query_key->query_desc_addr = (uintptr_t)query_desc; - } - - static QueryKey from_qdesc(QueryDesc *query_desc) { - return { - .tmid = query_desc->gpsc_query_key->tmid, - .ssid = query_desc->gpsc_query_key->ssid, - .ccnt = query_desc->gpsc_query_key->ccnt, - .nesting_level = query_desc->gpsc_query_key->nesting_level, - .query_desc_addr = query_desc->gpsc_query_key->query_desc_addr, - }; - } +struct QueryKey +{ + int tmid; + int ssid; + int ccnt; + int nesting_level; + uintptr_t query_desc_addr; + + bool + operator==(const QueryKey &other) const + { + return std::tie(tmid, ssid, ccnt, nesting_level, query_desc_addr) == + std::tie(other.tmid, other.ssid, other.ccnt, other.nesting_level, + other.query_desc_addr); + } + + static void + register_qkey(QueryDesc *query_desc, size_t nesting_level) + { + query_desc->gpsc_query_key = + (GpscQueryKey *) gpdb::palloc0(sizeof(GpscQueryKey)); + int32 tmid; + gp_gettmid(&tmid); + query_desc->gpsc_query_key->tmid = tmid; + query_desc->gpsc_query_key->ssid = gp_session_id; + query_desc->gpsc_query_key->ccnt = gp_command_count; + query_desc->gpsc_query_key->nesting_level = nesting_level; + query_desc->gpsc_query_key->query_desc_addr = (uintptr_t) query_desc; + } + + static QueryKey + from_qdesc(QueryDesc *query_desc) + { + return { + .tmid = query_desc->gpsc_query_key->tmid, + .ssid = query_desc->gpsc_query_key->ssid, + .ccnt = query_desc->gpsc_query_key->ccnt, + .nesting_level = query_desc->gpsc_query_key->nesting_level, + .query_desc_addr = query_desc->gpsc_query_key->query_desc_addr, + }; + } }; // https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html -template inline void hash_combine(std::size_t &seed, const T &v) { - std::hash hasher; - seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +template +inline void +hash_combine(std::size_t &seed, const T &v) +{ + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); } -namespace std { -template <> struct hash { - size_t operator()(const QueryKey &k) const noexcept { - size_t seed = hash{}(k.tmid); - hash_combine(seed, k.ssid); - hash_combine(seed, k.ccnt); - hash_combine(seed, k.nesting_level); - uintptr_t addr = k.query_desc_addr; - if constexpr (SIZE_MAX < UINTPTR_MAX) { - addr %= SIZE_MAX; - } - hash_combine(seed, addr); - return seed; - } +namespace std +{ +template <> +struct hash +{ + size_t + operator()(const QueryKey &k) const noexcept + { + size_t seed = hash{}(k.tmid); + hash_combine(seed, k.ssid); + hash_combine(seed, k.ccnt); + hash_combine(seed, k.nesting_level); + uintptr_t addr = k.query_desc_addr; + if constexpr (SIZE_MAX < UINTPTR_MAX) + { + addr %= SIZE_MAX; + } + hash_combine(seed, addr); + return seed; + } }; -} // namespace std +} // namespace std -class EventSender { +class EventSender +{ public: - void executor_before_start(QueryDesc *query_desc, int eflags); - void executor_after_start(QueryDesc *query_desc, int eflags); - void executor_end(QueryDesc *query_desc); - void query_metrics_collect(QueryMetricsStatus status, void *arg, bool utility, - ErrorData *edata = NULL); - void ic_metrics_collect(); - void analyze_stats_collect(QueryDesc *query_desc); - void incr_depth() { nesting_level++; } - void decr_depth() { nesting_level--; } - EventSender(); - ~EventSender(); + void executor_before_start(QueryDesc *query_desc, int eflags); + void executor_after_start(QueryDesc *query_desc, int eflags); + void executor_end(QueryDesc *query_desc); + void query_metrics_collect(QueryMetricsStatus status, void *arg, + bool utility, ErrorData *edata = NULL); + void ic_metrics_collect(); + void analyze_stats_collect(QueryDesc *query_desc); + void + incr_depth() + { + nesting_level++; + } + void + decr_depth() + { + nesting_level--; + } + EventSender(); + ~EventSender(); private: - enum QueryState { SUBMIT, START, END, DONE }; - - struct QueryItem { - std::unique_ptr message; - QueryState state; - - explicit QueryItem(QueryState st); - }; - - bool log_query_req(const gpsc::SetQueryReq &req, const std::string &event, - bool utility); - bool verify_query(QueryDesc *query_desc, QueryState state, bool utility); - void update_query_state(QueryItem &query, QueryState new_state, bool utility, - bool success = true); - QueryItem &get_query(QueryDesc *query_desc); - void submit_query(QueryDesc *query_desc); - void collect_query_submit(QueryDesc *query_desc, bool utility); - void report_query_done(QueryDesc *query_desc, QueryItem &query, - QueryMetricsStatus status, bool utility, - ErrorData *edata = NULL); - void collect_query_done(QueryDesc *query_desc, bool utility, - QueryMetricsStatus status, ErrorData *edata = NULL); - void update_nested_counters(QueryDesc *query_desc); - bool qdesc_submitted(QueryDesc *query_desc); - bool nesting_is_valid(QueryDesc *query_desc, int nesting_level); - bool need_report_nested_query(); - bool filter_query(QueryDesc *query_desc); - - bool proto_verified = false; - int nesting_level = 0; - int64_t nested_calls = 0; - double nested_timing = 0; + enum QueryState + { + SUBMIT, + START, + END, + DONE + }; + + struct QueryItem + { + std::unique_ptr message; + QueryState state; + + explicit QueryItem(QueryState st); + }; + + bool log_query_req(const gpsc::SetQueryReq &req, const std::string &event, + bool utility); + bool verify_query(QueryDesc *query_desc, QueryState state, bool utility); + void update_query_state(QueryItem &query, QueryState new_state, + bool utility, bool success = true); + QueryItem &get_query(QueryDesc *query_desc); + void submit_query(QueryDesc *query_desc); + void collect_query_submit(QueryDesc *query_desc, bool utility); + void report_query_done(QueryDesc *query_desc, QueryItem &query, + QueryMetricsStatus status, bool utility, + ErrorData *edata = NULL); + void collect_query_done(QueryDesc *query_desc, bool utility, + QueryMetricsStatus status, ErrorData *edata = NULL); + void update_nested_counters(QueryDesc *query_desc); + bool qdesc_submitted(QueryDesc *query_desc); + bool nesting_is_valid(QueryDesc *query_desc, int nesting_level); + bool need_report_nested_query(); + bool filter_query(QueryDesc *query_desc); + + bool proto_verified = false; + int nesting_level = 0; + int64_t nested_calls = 0; + double nested_timing = 0; #ifdef IC_TEARDOWN_HOOK - ICStatistics ic_statistics; + ICStatistics ic_statistics; #endif - std::unordered_map queries; + std::unordered_map queries; - Config config; -}; \ No newline at end of file + Config config; +}; +#endif /* EVENTSENDER_H */ diff --git a/gpcontrib/gp_stats_collector/src/GpscStat.cpp b/gpcontrib/gp_stats_collector/src/GpscStat.cpp index c4029f085cf..151cfd87c02 100644 --- a/gpcontrib/gp_stats_collector/src/GpscStat.cpp +++ b/gpcontrib/gp_stats_collector/src/GpscStat.cpp @@ -38,81 +38,117 @@ extern "C" { #include "storage/spin.h" } -namespace { -struct ProtectedData { - slock_t mutex; - GpscStat::Data data; +namespace +{ +struct ProtectedData +{ + slock_t mutex; + GpscStat::Data data; }; shmem_startup_hook_type prev_shmem_startup_hook = NULL; ProtectedData *data = nullptr; -void gpsc_shmem_startup() { - if (prev_shmem_startup_hook) - prev_shmem_startup_hook(); - LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); - bool found; - data = reinterpret_cast( - ShmemInitStruct("gpsc_stat_messages", sizeof(ProtectedData), &found)); - if (!found) { - SpinLockInit(&data->mutex); - data->data = GpscStat::Data(); - } - LWLockRelease(AddinShmemInitLock); +void +gpsc_shmem_startup() +{ + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + bool found; + data = reinterpret_cast( + ShmemInitStruct("gpsc_stat_messages", sizeof(ProtectedData), &found)); + if (!found) + { + SpinLockInit(&data->mutex); + data->data = GpscStat::Data(); + } + LWLockRelease(AddinShmemInitLock); } -class LockGuard { +class LockGuard +{ public: - LockGuard(slock_t *mutex) : mutex_(mutex) { SpinLockAcquire(mutex_); } - ~LockGuard() { SpinLockRelease(mutex_); } + LockGuard(slock_t *mutex) : mutex_(mutex) + { + SpinLockAcquire(mutex_); + } + ~LockGuard() + { + SpinLockRelease(mutex_); + } private: - slock_t *mutex_; + slock_t *mutex_; }; -} // namespace - -void GpscStat::init() { - if (!process_shared_preload_libraries_in_progress) - return; - RequestAddinShmemSpace(sizeof(ProtectedData)); - prev_shmem_startup_hook = shmem_startup_hook; - shmem_startup_hook = gpsc_shmem_startup; +} // namespace + +void +GpscStat::init() +{ + if (!process_shared_preload_libraries_in_progress) + return; + RequestAddinShmemSpace(sizeof(ProtectedData)); + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = gpsc_shmem_startup; } -void GpscStat::deinit() { shmem_startup_hook = prev_shmem_startup_hook; } +void +GpscStat::deinit() +{ + shmem_startup_hook = prev_shmem_startup_hook; +} -void GpscStat::reset() { - LockGuard lg(&data->mutex); - data->data = GpscStat::Data(); +void +GpscStat::reset() +{ + LockGuard lg(&data->mutex); + data->data = GpscStat::Data(); } -void GpscStat::report_send(int32_t msg_size) { - LockGuard lg(&data->mutex); - data->data.total++; - data->data.max_message_size = std::max(msg_size, data->data.max_message_size); +void +GpscStat::report_send(int32_t msg_size) +{ + LockGuard lg(&data->mutex); + data->data.total++; + data->data.max_message_size = + std::max(msg_size, data->data.max_message_size); } -void GpscStat::report_bad_connection() { - LockGuard lg(&data->mutex); - data->data.total++; - data->data.failed_connects++; +void +GpscStat::report_bad_connection() +{ + LockGuard lg(&data->mutex); + data->data.total++; + data->data.failed_connects++; } -void GpscStat::report_bad_send(int32_t msg_size) { - LockGuard lg(&data->mutex); - data->data.total++; - data->data.failed_sends++; - data->data.max_message_size = std::max(msg_size, data->data.max_message_size); +void +GpscStat::report_bad_send(int32_t msg_size) +{ + LockGuard lg(&data->mutex); + data->data.total++; + data->data.failed_sends++; + data->data.max_message_size = + std::max(msg_size, data->data.max_message_size); } -void GpscStat::report_error() { - LockGuard lg(&data->mutex); - data->data.total++; - data->data.failed_other++; +void +GpscStat::report_error() +{ + LockGuard lg(&data->mutex); + data->data.total++; + data->data.failed_other++; } -GpscStat::Data GpscStat::get_stats() { - LockGuard lg(&data->mutex); - return data->data; +GpscStat::Data +GpscStat::get_stats() +{ + LockGuard lg(&data->mutex); + return data->data; } -bool GpscStat::loaded() { return data != nullptr; } +bool +GpscStat::loaded() +{ + return data != nullptr; +} diff --git a/gpcontrib/gp_stats_collector/src/GpscStat.h b/gpcontrib/gp_stats_collector/src/GpscStat.h index af1a1261776..d82930c7b5b 100644 --- a/gpcontrib/gp_stats_collector/src/GpscStat.h +++ b/gpcontrib/gp_stats_collector/src/GpscStat.h @@ -25,24 +25,28 @@ *------------------------------------------------------------------------- */ -#pragma once +#ifndef GPSCSTAT_H +#define GPSCSTAT_H #include -class GpscStat { +class GpscStat +{ public: - struct Data { - int64_t total, failed_sends, failed_connects, failed_other; - int32_t max_message_size; - }; + struct Data + { + int64_t total, failed_sends, failed_connects, failed_other; + int32_t max_message_size; + }; - static void init(); - static void deinit(); - static void reset(); - static void report_send(int32_t msg_size); - static void report_bad_connection(); - static void report_bad_send(int32_t msg_size); - static void report_error(); - static Data get_stats(); - static bool loaded(); -}; \ No newline at end of file + static void init(); + static void deinit(); + static void reset(); + static void report_send(int32_t msg_size); + static void report_bad_connection(); + static void report_bad_send(int32_t msg_size); + static void report_error(); + static Data get_stats(); + static bool loaded(); +}; +#endif /* GPSCSTAT_H */ diff --git a/gpcontrib/gp_stats_collector/src/PgUtils.cpp b/gpcontrib/gp_stats_collector/src/PgUtils.cpp index 3dbee97061b..c473cc383f2 100644 --- a/gpcontrib/gp_stats_collector/src/PgUtils.cpp +++ b/gpcontrib/gp_stats_collector/src/PgUtils.cpp @@ -30,39 +30,46 @@ #include "memory/gpdbwrappers.h" extern "C" { -#include "commands/resgroupcmds.h" #include "cdb/cdbvars.h" +#include "commands/resgroupcmds.h" } -std::string get_user_name() { - // username is allocated on stack, we don't need to pfree it. - const char *username = - gpdb::get_config_option("session_authorization", false, false); - return username ? std::string(username) : ""; +std::string +get_user_name() +{ + // username is allocated on stack, we don't need to pfree it. + const char *username = + gpdb::get_config_option("session_authorization", false, false); + return username ? std::string(username) : ""; } -std::string get_db_name() { - char *dbname = gpdb::get_database_name(MyDatabaseId); - if (dbname) { - std::string result(dbname); - gpdb::pfree(dbname); - return result; - } - return ""; +std::string +get_db_name() +{ + char *dbname = gpdb::get_database_name(MyDatabaseId); + if (dbname) + { + std::string result(dbname); + gpdb::pfree(dbname); + return result; + } + return ""; } -std::string get_rg_name() { - auto groupId = gpdb::get_rg_id_by_session_id(MySessionState->sessionId); - if (!OidIsValid(groupId)) - return ""; +std::string +get_rg_name() +{ + auto groupId = gpdb::get_rg_id_by_session_id(MySessionState->sessionId); + if (!OidIsValid(groupId)) + return ""; - char *rgname = gpdb::get_rg_name_for_id(groupId); - if (rgname == nullptr) - return ""; + char *rgname = gpdb::get_rg_name_for_id(groupId); + if (rgname == nullptr) + return ""; - std::string result(rgname); - gpdb::pfree(rgname); - return result; + std::string result(rgname); + gpdb::pfree(rgname); + return result; } /** @@ -86,9 +93,12 @@ std::string get_rg_name() { * segment sees those as top-level. */ -bool is_top_level_query(QueryDesc *query_desc, int nesting_level) { - if (query_desc->gpsc_query_key == NULL) { - return nesting_level == 0; - } - return query_desc->gpsc_query_key->nesting_level == 0; +bool +is_top_level_query(QueryDesc *query_desc, int nesting_level) +{ + if (query_desc->gpsc_query_key == NULL) + { + return nesting_level == 0; + } + return query_desc->gpsc_query_key->nesting_level == 0; } diff --git a/gpcontrib/gp_stats_collector/src/ProcStats.cpp b/gpcontrib/gp_stats_collector/src/ProcStats.cpp index 9c557879fc6..e308b30dfa5 100644 --- a/gpcontrib/gp_stats_collector/src/ProcStats.cpp +++ b/gpcontrib/gp_stats_collector/src/ProcStats.cpp @@ -26,100 +26,119 @@ */ #include "ProcStats.h" -#include "gpsc_metrics.pb.h" -#include #include +#include #include +#include "gpsc_metrics.pb.h" extern "C" { #include "postgres.h" #include "utils/elog.h" } -namespace { -#define FILL_IO_STAT(stat_name) \ - uint64_t stat_name; \ - proc_stat >> tmp >> stat_name; \ - stats->set_##stat_name(stat_name - stats->stat_name()); +namespace +{ +#define FILL_IO_STAT(stat_name) \ + uint64_t stat_name; \ + proc_stat >> tmp >> stat_name; \ + stats->set_##stat_name(stat_name - stats->stat_name()); -void fill_io_stats(gpsc::SystemStat *stats) { - std::ifstream proc_stat("/proc/self/io"); - std::string tmp; - FILL_IO_STAT(rchar); - FILL_IO_STAT(wchar); - FILL_IO_STAT(syscr); - FILL_IO_STAT(syscw); - FILL_IO_STAT(read_bytes); - FILL_IO_STAT(write_bytes); - FILL_IO_STAT(cancelled_write_bytes); +void +fill_io_stats(gpsc::SystemStat *stats) +{ + std::ifstream proc_stat("/proc/self/io"); + std::string tmp; + FILL_IO_STAT(rchar); + FILL_IO_STAT(wchar); + FILL_IO_STAT(syscr); + FILL_IO_STAT(syscw); + FILL_IO_STAT(read_bytes); + FILL_IO_STAT(write_bytes); + FILL_IO_STAT(cancelled_write_bytes); } -void fill_cpu_stats(gpsc::SystemStat *stats) { - static const int UTIME_ID = 13; - static const int STIME_ID = 14; - static const int VSIZE_ID = 22; - static const int RSS_ID = 23; - static const double tps = sysconf(_SC_CLK_TCK); +void +fill_cpu_stats(gpsc::SystemStat *stats) +{ + static const int UTIME_ID = 13; + static const int STIME_ID = 14; + static const int VSIZE_ID = 22; + static const int RSS_ID = 23; + static const double tps = sysconf(_SC_CLK_TCK); - std::ifstream proc_stat("/proc/self/stat"); - std::string trash; - for (int i = 0; i <= RSS_ID; ++i) { - switch (i) { - case UTIME_ID: - double utime; - proc_stat >> utime; - stats->set_usertimeseconds(utime / tps - stats->usertimeseconds()); - break; - case STIME_ID: - double stime; - proc_stat >> stime; - stats->set_kerneltimeseconds(stime / tps - stats->kerneltimeseconds()); - break; - case VSIZE_ID: - uint64_t vsize; - proc_stat >> vsize; - stats->set_vsize(vsize); - break; - case RSS_ID: - uint64_t rss; - proc_stat >> rss; - // NOTE: this is a double AFAIU, need to double-check - stats->set_rss(rss); - break; - default: - proc_stat >> trash; - } - } + std::ifstream proc_stat("/proc/self/stat"); + std::string trash; + for (int i = 0; i <= RSS_ID; ++i) + { + switch (i) + { + case UTIME_ID: + double utime; + proc_stat >> utime; + stats->set_usertimeseconds(utime / tps - + stats->usertimeseconds()); + break; + case STIME_ID: + double stime; + proc_stat >> stime; + stats->set_kerneltimeseconds(stime / tps - + stats->kerneltimeseconds()); + break; + case VSIZE_ID: + uint64_t vsize; + proc_stat >> vsize; + stats->set_vsize(vsize); + break; + case RSS_ID: + uint64_t rss; + proc_stat >> rss; + // NOTE: this is a double AFAIU, need to double-check + stats->set_rss(rss); + break; + default: + proc_stat >> trash; + } + } } -void fill_status_stats(gpsc::SystemStat *stats) { - std::ifstream proc_stat("/proc/self/status"); - std::string key, measure; - while (proc_stat >> key) { - if (key == "VmPeak:") { - uint64_t value; - proc_stat >> value; - stats->set_vmpeakkb(value); - proc_stat >> measure; - if (measure != "kB") { - throw std::runtime_error("Expected memory sizes in kB, but got in " + - measure); - } - } else if (key == "VmSize:") { - uint64_t value; - proc_stat >> value; - stats->set_vmsizekb(value); - if (measure != "kB") { - throw std::runtime_error("Expected memory sizes in kB, but got in " + - measure); - } - } - } +void +fill_status_stats(gpsc::SystemStat *stats) +{ + std::ifstream proc_stat("/proc/self/status"); + std::string key, measure; + while (proc_stat >> key) + { + if (key == "VmPeak:") + { + uint64_t value; + proc_stat >> value; + stats->set_vmpeakkb(value); + proc_stat >> measure; + if (measure != "kB") + { + throw std::runtime_error( + "Expected memory sizes in kB, but got in " + measure); + } + } + else if (key == "VmSize:") + { + uint64_t value; + proc_stat >> value; + stats->set_vmsizekb(value); + if (measure != "kB") + { + throw std::runtime_error( + "Expected memory sizes in kB, but got in " + measure); + } + } + } } -} // namespace +} // namespace -void fill_self_stats(gpsc::SystemStat *stats) { - fill_io_stats(stats); - fill_cpu_stats(stats); - fill_status_stats(stats); +void +fill_self_stats(gpsc::SystemStat *stats) +{ + fill_io_stats(stats); + fill_cpu_stats(stats); + fill_status_stats(stats); } \ No newline at end of file diff --git a/gpcontrib/gp_stats_collector/src/ProcStats.h b/gpcontrib/gp_stats_collector/src/ProcStats.h index 4473125f875..8b83dbfef02 100644 --- a/gpcontrib/gp_stats_collector/src/ProcStats.h +++ b/gpcontrib/gp_stats_collector/src/ProcStats.h @@ -25,10 +25,13 @@ *------------------------------------------------------------------------- */ -#pragma once +#ifndef PROCSTATS_H +#define PROCSTATS_H -namespace gpsc { +namespace gpsc +{ class SystemStat; } -void fill_self_stats(gpsc::SystemStat *stats); \ No newline at end of file +void fill_self_stats(gpsc::SystemStat *stats); +#endif /* PROCSTATS_H */ diff --git a/gpcontrib/gp_stats_collector/src/ProtoUtils.cpp b/gpcontrib/gp_stats_collector/src/ProtoUtils.cpp index c9ceff4739b..b22f580303e 100644 --- a/gpcontrib/gp_stats_collector/src/ProtoUtils.cpp +++ b/gpcontrib/gp_stats_collector/src/ProtoUtils.cpp @@ -26,9 +26,9 @@ */ #include "ProtoUtils.h" +#include "Config.h" #include "PgUtils.h" #include "ProcStats.h" -#include "Config.h" #include "memory/gpdbwrappers.h" #define typeid __typeid @@ -53,265 +53,323 @@ extern "C" { extern void gp_gettmid(int32 *); -namespace { +namespace +{ constexpr uint8_t UTF8_CONTINUATION_BYTE_MASK = (1 << 7) | (1 << 6); constexpr uint8_t UTF8_CONTINUATION_BYTE = (1 << 7); constexpr uint8_t UTF8_MAX_SYMBOL_BYTES = 4; // Returns true if byte is the starting byte of utf8 // character, false if byte is the continuation (10xxxxxx). -inline bool utf8_start_byte(uint8_t byte) { - return (byte & UTF8_CONTINUATION_BYTE_MASK) != UTF8_CONTINUATION_BYTE; +inline bool +utf8_start_byte(uint8_t byte) +{ + return (byte & UTF8_CONTINUATION_BYTE_MASK) != UTF8_CONTINUATION_BYTE; } -} // namespace +} // namespace -google::protobuf::Timestamp current_ts() { - google::protobuf::Timestamp current_ts; - struct timeval tv; - gettimeofday(&tv, nullptr); - current_ts.set_seconds(tv.tv_sec); - current_ts.set_nanos(static_cast(tv.tv_usec * 1000)); - return current_ts; +google::protobuf::Timestamp +current_ts() +{ + google::protobuf::Timestamp current_ts; + struct timeval tv; + gettimeofday(&tv, nullptr); + current_ts.set_seconds(tv.tv_sec); + current_ts.set_nanos(static_cast(tv.tv_usec * 1000)); + return current_ts; } -void set_query_key(gpsc::QueryKey *key) { - key->set_ccnt(gp_command_count); - key->set_ssid(gp_session_id); - int32 tmid = 0; - gp_gettmid(&tmid); - key->set_tmid(tmid); +void +set_query_key(gpsc::QueryKey *key) +{ + key->set_ccnt(gp_command_count); + key->set_ssid(gp_session_id); + int32 tmid = 0; + gp_gettmid(&tmid); + key->set_tmid(tmid); } -void set_segment_key(gpsc::SegmentKey *key) { - key->set_dbid(GpIdentity.dbid); - key->set_segindex(GpIdentity.segindex); +void +set_segment_key(gpsc::SegmentKey *key) +{ + key->set_dbid(GpIdentity.dbid); + key->set_segindex(GpIdentity.segindex); } -std::string trim_str_shrink_utf8(const char *str, size_t len, size_t lim) { - if (unlikely(str == nullptr)) { - return std::string(); - } - if (likely(len <= lim || GetDatabaseEncoding() != PG_UTF8)) { - return std::string(str, std::min(len, lim)); - } +std::string +trim_str_shrink_utf8(const char *str, size_t len, size_t lim) +{ + if (unlikely(str == nullptr)) + { + return std::string(); + } + if (likely(len <= lim || GetDatabaseEncoding() != PG_UTF8)) + { + return std::string(str, std::min(len, lim)); + } - // Handle trimming of utf8 correctly, do not cut multi-byte characters. - size_t cut_pos = lim; - size_t visited_bytes = 1; - while (visited_bytes < UTF8_MAX_SYMBOL_BYTES && cut_pos > 0) { - if (utf8_start_byte(static_cast(str[cut_pos]))) { - break; - } - ++visited_bytes; - --cut_pos; - } + // Handle trimming of utf8 correctly, do not cut multi-byte characters. + size_t cut_pos = lim; + size_t visited_bytes = 1; + while (visited_bytes < UTF8_MAX_SYMBOL_BYTES && cut_pos > 0) + { + if (utf8_start_byte(static_cast(str[cut_pos]))) + { + break; + } + ++visited_bytes; + --cut_pos; + } - return std::string(str, cut_pos); + return std::string(str, cut_pos); } -void set_query_plan(gpsc::SetQueryReq *req, QueryDesc *query_desc, - const Config &config) { - if (Gp_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) { - auto qi = req->mutable_query_info(); - qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER - ? gpsc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER - : gpsc::PlanGenerator::PLAN_GENERATOR_PLANNER); - MemoryContext oldcxt = - gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - ExplainState es = gpdb::get_explain_state(query_desc, true); - if (es.str) { - *qi->mutable_plan_text() = trim_str_shrink_utf8(es.str->data, es.str->len, - config.max_plan_size()); - StringInfo norm_plan = gpdb::gen_normplan(es.str->data); - if (norm_plan) { - *qi->mutable_template_plan_text() = trim_str_shrink_utf8( - norm_plan->data, norm_plan->len, config.max_plan_size()); - qi->set_plan_id( - hash_any((unsigned char *)norm_plan->data, norm_plan->len)); - gpdb::pfree(norm_plan->data); - } - qi->set_query_id(query_desc->plannedstmt->queryId); - gpdb::pfree(es.str->data); - } - gpdb::mem_ctx_switch_to(oldcxt); - } +void +set_query_plan(gpsc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config) +{ + if (Gp_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) + { + auto qi = req->mutable_query_info(); + qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER + ? gpsc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER + : gpsc::PlanGenerator::PLAN_GENERATOR_PLANNER); + MemoryContext oldcxt = + gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + ExplainState es = gpdb::get_explain_state(query_desc, true); + if (es.str) + { + *qi->mutable_plan_text() = trim_str_shrink_utf8( + es.str->data, es.str->len, config.max_plan_size()); + StringInfo norm_plan = gpdb::gen_normplan(es.str->data); + if (norm_plan) + { + *qi->mutable_template_plan_text() = trim_str_shrink_utf8( + norm_plan->data, norm_plan->len, config.max_plan_size()); + qi->set_plan_id(hash_any((unsigned char *) norm_plan->data, + norm_plan->len)); + gpdb::pfree(norm_plan->data); + } + qi->set_query_id(query_desc->plannedstmt->queryId); + gpdb::pfree(es.str->data); + } + gpdb::mem_ctx_switch_to(oldcxt); + } } -void set_query_text(gpsc::SetQueryReq *req, QueryDesc *query_desc, - const Config &config) { - if (Gp_role == GP_ROLE_DISPATCH && query_desc->sourceText) { - auto qi = req->mutable_query_info(); - *qi->mutable_query_text() = trim_str_shrink_utf8( - query_desc->sourceText, strlen(query_desc->sourceText), - config.max_text_size()); - char *norm_query = gpdb::gen_normquery(query_desc->sourceText); - if (norm_query) { - *qi->mutable_template_query_text() = trim_str_shrink_utf8( - norm_query, strlen(norm_query), config.max_text_size()); - gpdb::pfree(norm_query); - } - } +void +set_query_text(gpsc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config) +{ + if (Gp_role == GP_ROLE_DISPATCH && query_desc->sourceText) + { + auto qi = req->mutable_query_info(); + *qi->mutable_query_text() = trim_str_shrink_utf8( + query_desc->sourceText, strlen(query_desc->sourceText), + config.max_text_size()); + char *norm_query = gpdb::gen_normquery(query_desc->sourceText); + if (norm_query) + { + *qi->mutable_template_query_text() = trim_str_shrink_utf8( + norm_query, strlen(norm_query), config.max_text_size()); + gpdb::pfree(norm_query); + } + } } -void clear_big_fields(gpsc::SetQueryReq *req) { - if (Gp_role == GP_ROLE_DISPATCH) { - auto qi = req->mutable_query_info(); - qi->clear_plan_text(); - qi->clear_template_plan_text(); - qi->clear_query_text(); - qi->clear_template_query_text(); - qi->clear_analyze_text(); - } +void +clear_big_fields(gpsc::SetQueryReq *req) +{ + if (Gp_role == GP_ROLE_DISPATCH) + { + auto qi = req->mutable_query_info(); + qi->clear_plan_text(); + qi->clear_template_plan_text(); + qi->clear_query_text(); + qi->clear_template_query_text(); + qi->clear_analyze_text(); + } } -void set_query_info(gpsc::SetQueryReq *req) { - if (Gp_role == GP_ROLE_DISPATCH) { - auto qi = req->mutable_query_info(); - qi->set_username(get_user_name()); - if (IsTransactionState()) - qi->set_databasename(get_db_name()); - qi->set_rsgname(get_rg_name()); - } +void +set_query_info(gpsc::SetQueryReq *req) +{ + if (Gp_role == GP_ROLE_DISPATCH) + { + auto qi = req->mutable_query_info(); + qi->set_username(get_user_name()); + if (IsTransactionState()) + qi->set_databasename(get_db_name()); + qi->set_rsgname(get_rg_name()); + } } -void set_qi_nesting_level(gpsc::SetQueryReq *req, int nesting_level) { - auto aqi = req->mutable_add_info(); - aqi->set_nested_level(nesting_level); +void +set_qi_nesting_level(gpsc::SetQueryReq *req, int nesting_level) +{ + auto aqi = req->mutable_add_info(); + aqi->set_nested_level(nesting_level); } -void set_qi_slice_id(gpsc::SetQueryReq *req) { - auto aqi = req->mutable_add_info(); - aqi->set_slice_id(currentSliceId); +void +set_qi_slice_id(gpsc::SetQueryReq *req) +{ + auto aqi = req->mutable_add_info(); + aqi->set_slice_id(currentSliceId); } -void set_qi_error_message(gpsc::SetQueryReq *req, const char *err_msg, - const Config &config) { - auto aqi = req->mutable_add_info(); - *aqi->mutable_error_message() = - trim_str_shrink_utf8(err_msg, strlen(err_msg), config.max_text_size()); +void +set_qi_error_message(gpsc::SetQueryReq *req, const char *err_msg, + const Config &config) +{ + auto aqi = req->mutable_add_info(); + *aqi->mutable_error_message() = + trim_str_shrink_utf8(err_msg, strlen(err_msg), config.max_text_size()); } -void set_metric_instrumentation(gpsc::MetricInstrumentation *metrics, - QueryDesc *query_desc, int nested_calls, - double nested_time) { - auto instrument = query_desc->planstate->instrument; - if (instrument) { - metrics->set_ntuples(instrument->ntuples); - metrics->set_nloops(instrument->nloops); - metrics->set_tuplecount(instrument->tuplecount); - metrics->set_firsttuple(instrument->firsttuple); - metrics->set_startup(instrument->startup); - metrics->set_total(instrument->total); - auto &buffusage = instrument->bufusage; - metrics->set_shared_blks_hit(buffusage.shared_blks_hit); - metrics->set_shared_blks_read(buffusage.shared_blks_read); - metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); - metrics->set_shared_blks_written(buffusage.shared_blks_written); - metrics->set_local_blks_hit(buffusage.local_blks_hit); - metrics->set_local_blks_read(buffusage.local_blks_read); - metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); - metrics->set_local_blks_written(buffusage.local_blks_written); - metrics->set_temp_blks_read(buffusage.temp_blks_read); - metrics->set_temp_blks_written(buffusage.temp_blks_written); - metrics->set_blk_read_time(INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); - metrics->set_blk_write_time( - INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); - } - if (query_desc->estate && query_desc->estate->motionlayer_context) { - MotionLayerState *mlstate = - (MotionLayerState *)query_desc->estate->motionlayer_context; - metrics->mutable_sent()->set_total_bytes(mlstate->stat_total_bytes_sent); - metrics->mutable_sent()->set_tuple_bytes(mlstate->stat_tuple_bytes_sent); - metrics->mutable_sent()->set_chunks(mlstate->stat_total_chunks_sent); - metrics->mutable_received()->set_total_bytes( - mlstate->stat_total_bytes_recvd); - metrics->mutable_received()->set_tuple_bytes( - mlstate->stat_tuple_bytes_recvd); - metrics->mutable_received()->set_chunks(mlstate->stat_total_chunks_recvd); - } - metrics->set_inherited_calls(nested_calls); - metrics->set_inherited_time(nested_time); +void +set_metric_instrumentation(gpsc::MetricInstrumentation *metrics, + QueryDesc *query_desc, int nested_calls, + double nested_time) +{ + auto instrument = query_desc->planstate->instrument; + if (instrument) + { + metrics->set_ntuples(instrument->ntuples); + metrics->set_nloops(instrument->nloops); + metrics->set_tuplecount(instrument->tuplecount); + metrics->set_firsttuple(instrument->firsttuple); + metrics->set_startup(instrument->startup); + metrics->set_total(instrument->total); + auto &buffusage = instrument->bufusage; + metrics->set_shared_blks_hit(buffusage.shared_blks_hit); + metrics->set_shared_blks_read(buffusage.shared_blks_read); + metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); + metrics->set_shared_blks_written(buffusage.shared_blks_written); + metrics->set_local_blks_hit(buffusage.local_blks_hit); + metrics->set_local_blks_read(buffusage.local_blks_read); + metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); + metrics->set_local_blks_written(buffusage.local_blks_written); + metrics->set_temp_blks_read(buffusage.temp_blks_read); + metrics->set_temp_blks_written(buffusage.temp_blks_written); + metrics->set_blk_read_time( + INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); + metrics->set_blk_write_time( + INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); + } + if (query_desc->estate && query_desc->estate->motionlayer_context) + { + MotionLayerState *mlstate = + (MotionLayerState *) query_desc->estate->motionlayer_context; + metrics->mutable_sent()->set_total_bytes( + mlstate->stat_total_bytes_sent); + metrics->mutable_sent()->set_tuple_bytes( + mlstate->stat_tuple_bytes_sent); + metrics->mutable_sent()->set_chunks(mlstate->stat_total_chunks_sent); + metrics->mutable_received()->set_total_bytes( + mlstate->stat_total_bytes_recvd); + metrics->mutable_received()->set_tuple_bytes( + mlstate->stat_tuple_bytes_recvd); + metrics->mutable_received()->set_chunks( + mlstate->stat_total_chunks_recvd); + } + metrics->set_inherited_calls(nested_calls); + metrics->set_inherited_time(nested_time); } -void set_gp_metrics(gpsc::GPMetrics *metrics, QueryDesc *query_desc, - int nested_calls, double nested_time) { - if (query_desc->planstate && query_desc->planstate->instrument) { - set_metric_instrumentation(metrics->mutable_instrumentation(), query_desc, - nested_calls, nested_time); - } - fill_self_stats(metrics->mutable_systemstat()); - metrics->mutable_systemstat()->set_runningtimeseconds( - time(NULL) - metrics->mutable_systemstat()->runningtimeseconds()); - metrics->mutable_spill()->set_filecount( - WorkfileTotalFilesCreated() - metrics->mutable_spill()->filecount()); - metrics->mutable_spill()->set_totalbytes( - WorkfileTotalBytesWritten() - metrics->mutable_spill()->totalbytes()); +void +set_gp_metrics(gpsc::GPMetrics *metrics, QueryDesc *query_desc, + int nested_calls, double nested_time) +{ + if (query_desc->planstate && query_desc->planstate->instrument) + { + set_metric_instrumentation(metrics->mutable_instrumentation(), + query_desc, nested_calls, nested_time); + } + fill_self_stats(metrics->mutable_systemstat()); + metrics->mutable_systemstat()->set_runningtimeseconds( + time(NULL) - metrics->mutable_systemstat()->runningtimeseconds()); + metrics->mutable_spill()->set_filecount( + WorkfileTotalFilesCreated() - metrics->mutable_spill()->filecount()); + metrics->mutable_spill()->set_totalbytes( + WorkfileTotalBytesWritten() - metrics->mutable_spill()->totalbytes()); } -#define UPDATE_IC_STATS(proto_name, stat_name) \ - metrics->mutable_interconnect()->set_##proto_name( \ - ic_statistics->stat_name - \ - metrics->mutable_interconnect()->proto_name()); \ - Assert(metrics->mutable_interconnect()->proto_name() >= 0 && \ - metrics->mutable_interconnect()->proto_name() <= \ - ic_statistics->stat_name) +#define UPDATE_IC_STATS(proto_name, stat_name) \ + metrics->mutable_interconnect()->set_##proto_name( \ + ic_statistics->stat_name - \ + metrics->mutable_interconnect()->proto_name()); \ + Assert(metrics->mutable_interconnect()->proto_name() >= 0 && \ + metrics->mutable_interconnect()->proto_name() <= \ + ic_statistics->stat_name) -void set_ic_stats(gpsc::MetricInstrumentation *metrics, - const ICStatistics *ic_statistics) { +void +set_ic_stats(gpsc::MetricInstrumentation *metrics, + const ICStatistics *ic_statistics) +{ #ifdef IC_TEARDOWN_HOOK - UPDATE_IC_STATS(total_recv_queue_size, totalRecvQueueSize); - UPDATE_IC_STATS(recv_queue_size_counting_time, recvQueueSizeCountingTime); - UPDATE_IC_STATS(total_capacity, totalCapacity); - UPDATE_IC_STATS(capacity_counting_time, capacityCountingTime); - UPDATE_IC_STATS(total_buffers, totalBuffers); - UPDATE_IC_STATS(buffer_counting_time, bufferCountingTime); - UPDATE_IC_STATS(active_connections_num, activeConnectionsNum); - UPDATE_IC_STATS(retransmits, retransmits); - UPDATE_IC_STATS(startup_cached_pkt_num, startupCachedPktNum); - UPDATE_IC_STATS(mismatch_num, mismatchNum); - UPDATE_IC_STATS(crc_errors, crcErrors); - UPDATE_IC_STATS(snd_pkt_num, sndPktNum); - UPDATE_IC_STATS(recv_pkt_num, recvPktNum); - UPDATE_IC_STATS(disordered_pkt_num, disorderedPktNum); - UPDATE_IC_STATS(duplicated_pkt_num, duplicatedPktNum); - UPDATE_IC_STATS(recv_ack_num, recvAckNum); - UPDATE_IC_STATS(status_query_msg_num, statusQueryMsgNum); + UPDATE_IC_STATS(total_recv_queue_size, totalRecvQueueSize); + UPDATE_IC_STATS(recv_queue_size_counting_time, recvQueueSizeCountingTime); + UPDATE_IC_STATS(total_capacity, totalCapacity); + UPDATE_IC_STATS(capacity_counting_time, capacityCountingTime); + UPDATE_IC_STATS(total_buffers, totalBuffers); + UPDATE_IC_STATS(buffer_counting_time, bufferCountingTime); + UPDATE_IC_STATS(active_connections_num, activeConnectionsNum); + UPDATE_IC_STATS(retransmits, retransmits); + UPDATE_IC_STATS(startup_cached_pkt_num, startupCachedPktNum); + UPDATE_IC_STATS(mismatch_num, mismatchNum); + UPDATE_IC_STATS(crc_errors, crcErrors); + UPDATE_IC_STATS(snd_pkt_num, sndPktNum); + UPDATE_IC_STATS(recv_pkt_num, recvPktNum); + UPDATE_IC_STATS(disordered_pkt_num, disorderedPktNum); + UPDATE_IC_STATS(duplicated_pkt_num, duplicatedPktNum); + UPDATE_IC_STATS(recv_ack_num, recvAckNum); + UPDATE_IC_STATS(status_query_msg_num, statusQueryMsgNum); #endif } -gpsc::SetQueryReq create_query_req(gpsc::QueryStatus status) { - gpsc::SetQueryReq req; - req.set_query_status(status); - *req.mutable_datetime() = current_ts(); - set_query_key(req.mutable_query_key()); - set_segment_key(req.mutable_segment_key()); - return req; +gpsc::SetQueryReq +create_query_req(gpsc::QueryStatus status) +{ + gpsc::SetQueryReq req; + req.set_query_status(status); + *req.mutable_datetime() = current_ts(); + set_query_key(req.mutable_query_key()); + set_segment_key(req.mutable_segment_key()); + return req; } -double protots_to_double(const google::protobuf::Timestamp &ts) { - return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; +double +protots_to_double(const google::protobuf::Timestamp &ts) +{ + return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; } -void set_analyze_plan_text(QueryDesc *query_desc, gpsc::SetQueryReq *req, - const Config &config) { - // Make sure it is a valid txn and it is not an utility - // statement for ExplainPrintPlan() later. - if (!IsTransactionState() || !query_desc->plannedstmt) { - return; - } - MemoryContext oldcxt = - gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); - ExplainState es = gpdb::get_analyze_state( - query_desc, query_desc->instrument_options && config.enable_analyze()); - gpdb::mem_ctx_switch_to(oldcxt); - if (es.str) { - // Remove last line break. - if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') { - es.str->data[--es.str->len] = '\0'; - } - auto trimmed_analyze = - trim_str_shrink_utf8(es.str->data, es.str->len, config.max_plan_size()); - req->mutable_query_info()->set_analyze_text(trimmed_analyze); - gpdb::pfree(es.str->data); - } +void +set_analyze_plan_text(QueryDesc *query_desc, gpsc::SetQueryReq *req, + const Config &config) +{ + // Make sure it is a valid txn and it is not an utility + // statement for ExplainPrintPlan() later. + if (!IsTransactionState() || !query_desc->plannedstmt) + { + return; + } + MemoryContext oldcxt = + gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + ExplainState es = gpdb::get_analyze_state( + query_desc, query_desc->instrument_options && config.enable_analyze()); + gpdb::mem_ctx_switch_to(oldcxt); + if (es.str) + { + // Remove last line break. + if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') + { + es.str->data[--es.str->len] = '\0'; + } + auto trimmed_analyze = trim_str_shrink_utf8(es.str->data, es.str->len, + config.max_plan_size()); + req->mutable_query_info()->set_analyze_text(trimmed_analyze); + gpdb::pfree(es.str->data); + } } diff --git a/gpcontrib/gp_stats_collector/src/ProtoUtils.h b/gpcontrib/gp_stats_collector/src/ProtoUtils.h index 5ddcd42d308..6b38097fbcc 100644 --- a/gpcontrib/gp_stats_collector/src/ProtoUtils.h +++ b/gpcontrib/gp_stats_collector/src/ProtoUtils.h @@ -25,7 +25,8 @@ *------------------------------------------------------------------------- */ -#pragma once +#ifndef PROTOUTILS_H +#define PROTOUTILS_H #include "protos/gpsc_set_service.pb.h" @@ -35,20 +36,22 @@ class Config; google::protobuf::Timestamp current_ts(); void set_query_plan(gpsc::SetQueryReq *req, QueryDesc *query_desc, - const Config &config); + const Config &config); void set_query_text(gpsc::SetQueryReq *req, QueryDesc *query_desc, - const Config &config); + const Config &config); void clear_big_fields(gpsc::SetQueryReq *req); void set_query_info(gpsc::SetQueryReq *req); void set_qi_nesting_level(gpsc::SetQueryReq *req, int nesting_level); void set_qi_slice_id(gpsc::SetQueryReq *req); void set_qi_error_message(gpsc::SetQueryReq *req, const char *err_msg, - const Config &config); + const Config &config); void set_gp_metrics(gpsc::GPMetrics *metrics, QueryDesc *query_desc, - int nested_calls, double nested_time); + int nested_calls, double nested_time); void set_ic_stats(gpsc::MetricInstrumentation *metrics, - const ICStatistics *ic_statistics); + const ICStatistics *ic_statistics); gpsc::SetQueryReq create_query_req(gpsc::QueryStatus status); double protots_to_double(const google::protobuf::Timestamp &ts); void set_analyze_plan_text(QueryDesc *query_desc, gpsc::SetQueryReq *message, - const Config &config); + const Config &config); + +#endif /* PROTOUTILS_H */ diff --git a/gpcontrib/gp_stats_collector/src/UDSConnector.cpp b/gpcontrib/gp_stats_collector/src/UDSConnector.cpp index 9a01d4033d0..16344366456 100644 --- a/gpcontrib/gp_stats_collector/src/UDSConnector.cpp +++ b/gpcontrib/gp_stats_collector/src/UDSConnector.cpp @@ -28,103 +28,119 @@ #include "UDSConnector.h" #include "Config.h" #include "GpscStat.h" -#include "memory/gpdbwrappers.h" #include "log/LogOps.h" +#include "memory/gpdbwrappers.h" +#include #include -#include +#include #include -#include #include -#include -#include +#include #include +#include extern "C" { #include "postgres.h" } static void inline log_tracing_failure(const gpsc::SetQueryReq &req, - const std::string &event) { - ereport(LOG, (errmsg("Query {%d-%d-%d} %s tracing failed with error %m", - req.query_key().tmid(), req.query_key().ssid(), - req.query_key().ccnt(), event.c_str()))); + const std::string &event) +{ + ereport(LOG, (errmsg("Query {%d-%d-%d} %s tracing failed with error %m", + req.query_key().tmid(), req.query_key().ssid(), + req.query_key().ccnt(), event.c_str()))); } -bool UDSConnector::report_query(const gpsc::SetQueryReq &req, - const std::string &event, - const Config &config) { - sockaddr_un address{}; - address.sun_family = AF_UNIX; - const auto &uds_path = config.uds_path(); +bool +UDSConnector::report_query(const gpsc::SetQueryReq &req, + const std::string &event, const Config &config) +{ + sockaddr_un address{}; + address.sun_family = AF_UNIX; + const auto &uds_path = config.uds_path(); - if (uds_path.size() >= sizeof(address.sun_path)) { - ereport(WARNING, (errmsg("UDS path is too long for socket buffer"))); - GpscStat::report_error(); - return false; - } - strcpy(address.sun_path, uds_path.c_str()); + if (uds_path.size() >= sizeof(address.sun_path)) + { + ereport(WARNING, (errmsg("UDS path is too long for socket buffer"))); + GpscStat::report_error(); + return false; + } + strcpy(address.sun_path, uds_path.c_str()); - const auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0); - if (sockfd == -1) { - log_tracing_failure(req, event); - GpscStat::report_error(); - return false; - } + const auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (sockfd == -1) + { + log_tracing_failure(req, event); + GpscStat::report_error(); + return false; + } - // Close socket automatically on error path. - struct SockGuard { - int fd; - ~SockGuard() { close(fd); } - } sock_guard{sockfd}; + // Close socket automatically on error path. + struct SockGuard + { + int fd; + ~SockGuard() + { + close(fd); + } + } sock_guard{sockfd}; - if (fcntl(sockfd, F_SETFL, O_NONBLOCK) == -1) { - // That's a very important error that should never happen, so make it - // visible to an end-user and admins. - ereport(WARNING, - (errmsg("Unable to create non-blocking socket connection %m"))); - GpscStat::report_error(); - return false; - } + if (fcntl(sockfd, F_SETFL, O_NONBLOCK) == -1) + { + // That's a very important error that should never happen, so make it + // visible to an end-user and admins. + ereport(WARNING, + (errmsg("Unable to create non-blocking socket connection %m"))); + GpscStat::report_error(); + return false; + } - if (connect(sockfd, reinterpret_cast(&address), - sizeof(address)) == -1) { - log_tracing_failure(req, event); - GpscStat::report_bad_connection(); - return false; - } + if (connect(sockfd, reinterpret_cast(&address), + sizeof(address)) == -1) + { + log_tracing_failure(req, event); + GpscStat::report_bad_connection(); + return false; + } - const auto data_size = req.ByteSizeLong(); - const auto total_size = data_size + sizeof(uint32_t); - auto *buf = static_cast(gpdb::palloc(total_size)); - // Free buf automatically on error path. - struct BufGuard { - void *p; - ~BufGuard() { gpdb::pfree(p); } - } buf_guard{buf}; + const auto data_size = req.ByteSizeLong(); + const auto total_size = data_size + sizeof(uint32_t); + auto *buf = static_cast(gpdb::palloc(total_size)); + // Free buf automatically on error path. + struct BufGuard + { + void *p; + ~BufGuard() + { + gpdb::pfree(p); + } + } buf_guard{buf}; - *reinterpret_cast(buf) = data_size; - req.SerializeWithCachedSizesToArray(buf + sizeof(uint32_t)); + *reinterpret_cast(buf) = data_size; + req.SerializeWithCachedSizesToArray(buf + sizeof(uint32_t)); - int64_t sent = 0, sent_total = 0; - do { - sent = - send(sockfd, buf + sent_total, total_size - sent_total, MSG_DONTWAIT); - if (sent > 0) - sent_total += sent; - } while (sent > 0 && size_t(sent_total) != total_size && - // the line below is a small throttling hack: - // if a message does not fit a single packet, we take a nap - // before sending the next one. - // Otherwise, MSG_DONTWAIT send might overflow the UDS - (std::this_thread::sleep_for(std::chrono::milliseconds(1)), true)); + int64_t sent = 0, sent_total = 0; + do + { + sent = send(sockfd, buf + sent_total, total_size - sent_total, + MSG_DONTWAIT); + if (sent > 0) + sent_total += sent; + } while (sent > 0 && size_t(sent_total) != total_size && + // the line below is a small throttling hack: + // if a message does not fit a single packet, we take a nap + // before sending the next one. + // Otherwise, MSG_DONTWAIT send might overflow the UDS + (std::this_thread::sleep_for(std::chrono::milliseconds(1)), true)); - if (sent < 0) { - log_tracing_failure(req, event); - GpscStat::report_bad_send(total_size); - return false; - } + if (sent < 0) + { + log_tracing_failure(req, event); + GpscStat::report_bad_send(total_size); + return false; + } - GpscStat::report_send(total_size); - return true; + GpscStat::report_send(total_size); + return true; } diff --git a/gpcontrib/gp_stats_collector/src/UDSConnector.h b/gpcontrib/gp_stats_collector/src/UDSConnector.h index a91d22f9df1..ac56dd54f44 100644 --- a/gpcontrib/gp_stats_collector/src/UDSConnector.h +++ b/gpcontrib/gp_stats_collector/src/UDSConnector.h @@ -25,14 +25,18 @@ *------------------------------------------------------------------------- */ -#pragma once +#ifndef UDSCONNECTOR_H +#define UDSCONNECTOR_H #include "protos/gpsc_set_service.pb.h" class Config; -class UDSConnector { +class UDSConnector +{ public: - bool static report_query(const gpsc::SetQueryReq &req, - const std::string &event, const Config &config); + bool static report_query(const gpsc::SetQueryReq &req, + const std::string &event, const Config &config); }; + +#endif /* UDSCONNECTOR_H */ diff --git a/gpcontrib/gp_stats_collector/src/gp_stats_collector.c b/gpcontrib/gp_stats_collector/src/gp_stats_collector.c index d930f72246d..d295e37b396 100644 --- a/gpcontrib/gp_stats_collector/src/gp_stats_collector.c +++ b/gpcontrib/gp_stats_collector/src/gp_stats_collector.c @@ -45,106 +45,131 @@ PG_FUNCTION_INFO_V1(gpsc_test_uds_start_server); PG_FUNCTION_INFO_V1(gpsc_test_uds_receive); PG_FUNCTION_INFO_V1(gpsc_test_uds_stop_server); -void _PG_init(void) { - if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) - hooks_init(); +void +_PG_init(void) +{ + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) + hooks_init(); } -void _PG_fini(void) { - if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) - hooks_deinit(); +void +_PG_fini(void) +{ + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) + hooks_deinit(); } -Datum gpsc_stat_messages_reset(PG_FUNCTION_ARGS) { - FuncCallContext *funcctx; +Datum +gpsc_stat_messages_reset(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; - if (SRF_IS_FIRSTCALL()) { - funcctx = SRF_FIRSTCALL_INIT(); - gpsc_functions_reset(); - } + if (SRF_IS_FIRSTCALL()) + { + funcctx = SRF_FIRSTCALL_INIT(); + gpsc_functions_reset(); + } - funcctx = SRF_PERCALL_SETUP(); - SRF_RETURN_DONE(funcctx); + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); } -Datum gpsc_stat_messages(PG_FUNCTION_ARGS) { - return gpsc_functions_get(fcinfo); +Datum +gpsc_stat_messages(PG_FUNCTION_ARGS) +{ + return gpsc_functions_get(fcinfo); } -Datum gpsc_init_log(PG_FUNCTION_ARGS) { - FuncCallContext *funcctx; +Datum +gpsc_init_log(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; - if (SRF_IS_FIRSTCALL()) { - funcctx = SRF_FIRSTCALL_INIT(); - init_log(); - } + if (SRF_IS_FIRSTCALL()) + { + funcctx = SRF_FIRSTCALL_INIT(); + init_log(); + } - funcctx = SRF_PERCALL_SETUP(); - SRF_RETURN_DONE(funcctx); + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); } -Datum gpsc_truncate_log(PG_FUNCTION_ARGS) { - FuncCallContext *funcctx; +Datum +gpsc_truncate_log(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; - if (SRF_IS_FIRSTCALL()) { - funcctx = SRF_FIRSTCALL_INIT(); - truncate_log(); - } + if (SRF_IS_FIRSTCALL()) + { + funcctx = SRF_FIRSTCALL_INIT(); + truncate_log(); + } - funcctx = SRF_PERCALL_SETUP(); - SRF_RETURN_DONE(funcctx); + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); } -Datum gpsc_test_uds_start_server(PG_FUNCTION_ARGS) { - FuncCallContext *funcctx; - - if (SRF_IS_FIRSTCALL()) { - funcctx = SRF_FIRSTCALL_INIT(); - char *path = text_to_cstring(PG_GETARG_TEXT_PP(0)); - test_uds_start_server(path); - pfree(path); - } - - funcctx = SRF_PERCALL_SETUP(); - SRF_RETURN_DONE(funcctx); +Datum +gpsc_test_uds_start_server(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) + { + funcctx = SRF_FIRSTCALL_INIT(); + char *path = text_to_cstring(PG_GETARG_TEXT_PP(0)); + test_uds_start_server(path); + pfree(path); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); } -Datum gpsc_test_uds_receive(PG_FUNCTION_ARGS) { - FuncCallContext *funcctx; - int64 *result; +Datum +gpsc_test_uds_receive(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + int64 *result; - if (SRF_IS_FIRSTCALL()) { - MemoryContext oldcontext; + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; - funcctx = SRF_FIRSTCALL_INIT(); - oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - result = (int64 *)palloc(sizeof(int64)); - funcctx->user_fctx = result; - funcctx->max_calls = 1; - MemoryContextSwitchTo(oldcontext); + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + result = (int64 *) palloc(sizeof(int64)); + funcctx->user_fctx = result; + funcctx->max_calls = 1; + MemoryContextSwitchTo(oldcontext); - int timeout_ms = PG_GETARG_INT32(0); - *result = test_uds_receive(timeout_ms); - } + int timeout_ms = PG_GETARG_INT32(0); + *result = test_uds_receive(timeout_ms); + } - funcctx = SRF_PERCALL_SETUP(); + funcctx = SRF_PERCALL_SETUP(); - if (funcctx->call_cntr < funcctx->max_calls) { - result = (int64 *)funcctx->user_fctx; - SRF_RETURN_NEXT(funcctx, Int64GetDatum(*result)); - } + if (funcctx->call_cntr < funcctx->max_calls) + { + result = (int64 *) funcctx->user_fctx; + SRF_RETURN_NEXT(funcctx, Int64GetDatum(*result)); + } - SRF_RETURN_DONE(funcctx); + SRF_RETURN_DONE(funcctx); } -Datum gpsc_test_uds_stop_server(PG_FUNCTION_ARGS) { - FuncCallContext *funcctx; +Datum +gpsc_test_uds_stop_server(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; - if (SRF_IS_FIRSTCALL()) { - funcctx = SRF_FIRSTCALL_INIT(); - test_uds_stop_server(); - } + if (SRF_IS_FIRSTCALL()) + { + funcctx = SRF_FIRSTCALL_INIT(); + test_uds_stop_server(); + } - funcctx = SRF_PERCALL_SETUP(); - SRF_RETURN_DONE(funcctx); + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); } diff --git a/gpcontrib/gp_stats_collector/src/hook_wrappers.cpp b/gpcontrib/gp_stats_collector/src/hook_wrappers.cpp index 0a40b4cb359..3f19d4d9930 100644 --- a/gpcontrib/gp_stats_collector/src/hook_wrappers.cpp +++ b/gpcontrib/gp_stats_collector/src/hook_wrappers.cpp @@ -28,28 +28,28 @@ #define typeid __typeid extern "C" { #include "postgres.h" -#include "funcapi.h" -#include "executor/executor.h" -#include "executor/execUtils.h" -#include "utils/elog.h" -#include "utils/builtins.h" -#include "utils/metrics_utils.h" #include "cdb/cdbvars.h" #include "cdb/ml_ipc.h" +#include "executor/execUtils.h" +#include "executor/executor.h" +#include "funcapi.h" +#include "stat_statements_parser/pg_stat_statements_parser.h" #include "tcop/utility.h" -#include "stat_statements_parser/pg_stat_statements_ya_parser.h" +#include "utils/builtins.h" +#include "utils/elog.h" +#include "utils/metrics_utils.h" +#include +#include #include #include #include -#include -#include } #undef typeid #include "Config.h" -#include "GpscStat.h" #include "EventSender.h" +#include "GpscStat.h" #include "hook_wrappers.h" #include "memory/gpdbwrappers.h" @@ -60,7 +60,7 @@ static ExecutorEnd_hook_type previous_ExecutorEnd_hook = nullptr; static query_info_collect_hook_type previous_query_info_collect_hook = nullptr; #ifdef ANALYZE_STATS_COLLECT_HOOK static analyze_stats_collect_hook_type previous_analyze_stats_collect_hook = - nullptr; + nullptr; #endif #ifdef IC_TEARDOWN_HOOK static ic_teardown_hook_type previous_ic_teardown_hook = nullptr; @@ -68,24 +68,23 @@ static ic_teardown_hook_type previous_ic_teardown_hook = nullptr; static ProcessUtility_hook_type previous_ProcessUtility_hook = nullptr; static void gpsc_ExecutorStart_hook(QueryDesc *query_desc, int eflags); -static void gpsc_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, - uint64 count, bool execute_once); +static void gpsc_ExecutorRun_hook(QueryDesc *query_desc, + ScanDirection direction, uint64 count, + bool execute_once); static void gpsc_ExecutorFinish_hook(QueryDesc *query_desc); static void gpsc_ExecutorEnd_hook(QueryDesc *query_desc); static void gpsc_query_info_collect_hook(QueryMetricsStatus status, void *arg); #ifdef IC_TEARDOWN_HOOK static void gpsc_ic_teardown_hook(ChunkTransportState *transportStates, - bool hasErrors); + bool hasErrors); #endif #ifdef ANALYZE_STATS_COLLECT_HOOK static void gpsc_analyze_stats_collect_hook(QueryDesc *query_desc); #endif -static void gpsc_process_utility_hook(PlannedStmt *pstmt, const char *queryString, - bool readOnlyTree, - ProcessUtilityContext context, - ParamListInfo params, - QueryEnvironment *queryEnv, - DestReceiver *dest, QueryCompletion *qc); +static void gpsc_process_utility_hook( + PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, + ProcessUtilityContext context, ParamListInfo params, + QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc); #define TEST_MAX_CONNECTIONS 4 #define TEST_RCV_BUF_SIZE 8192 @@ -96,319 +95,379 @@ static char *test_sock_path = NULL; static EventSender *sender = nullptr; -static inline EventSender *get_sender() { - if (!sender) { - sender = new EventSender(); - } - return sender; +static inline EventSender * +get_sender() +{ + if (!sender) + { + sender = new EventSender(); + } + return sender; } template -R cpp_call(T *obj, R (T::*func)(Args...), Args... args) { - try { - return (obj->*func)(args...); - } catch (const std::exception &e) { - ereport(FATAL, (errmsg("Unexpected exception in gpsc %s", e.what()))); - } +R +cpp_call(T *obj, R (T::*func)(Args...), Args... args) +{ + try + { + return (obj->*func)(args...); + } + catch (const std::exception &e) + { + ereport(ERROR, (errmsg("Unexpected exception in gpsc %s", e.what()))); + } } -void hooks_init() { - Config::init_gucs(); - GpscStat::init(); - previous_ExecutorStart_hook = ExecutorStart_hook; - ExecutorStart_hook = gpsc_ExecutorStart_hook; - previous_ExecutorRun_hook = ExecutorRun_hook; - ExecutorRun_hook = gpsc_ExecutorRun_hook; - previous_ExecutorFinish_hook = ExecutorFinish_hook; - ExecutorFinish_hook = gpsc_ExecutorFinish_hook; - previous_ExecutorEnd_hook = ExecutorEnd_hook; - ExecutorEnd_hook = gpsc_ExecutorEnd_hook; - previous_query_info_collect_hook = query_info_collect_hook; - query_info_collect_hook = gpsc_query_info_collect_hook; +void +hooks_init() +{ + Config::init_gucs(); + GpscStat::init(); + previous_ExecutorStart_hook = ExecutorStart_hook; + ExecutorStart_hook = gpsc_ExecutorStart_hook; + previous_ExecutorRun_hook = ExecutorRun_hook; + ExecutorRun_hook = gpsc_ExecutorRun_hook; + previous_ExecutorFinish_hook = ExecutorFinish_hook; + ExecutorFinish_hook = gpsc_ExecutorFinish_hook; + previous_ExecutorEnd_hook = ExecutorEnd_hook; + ExecutorEnd_hook = gpsc_ExecutorEnd_hook; + previous_query_info_collect_hook = query_info_collect_hook; + query_info_collect_hook = gpsc_query_info_collect_hook; #ifdef IC_TEARDOWN_HOOK - previous_ic_teardown_hook = ic_teardown_hook; - ic_teardown_hook = gpsc_ic_teardown_hook; + previous_ic_teardown_hook = ic_teardown_hook; + ic_teardown_hook = gpsc_ic_teardown_hook; #endif #ifdef ANALYZE_STATS_COLLECT_HOOK - previous_analyze_stats_collect_hook = analyze_stats_collect_hook; - analyze_stats_collect_hook = gpsc_analyze_stats_collect_hook; + previous_analyze_stats_collect_hook = analyze_stats_collect_hook; + analyze_stats_collect_hook = gpsc_analyze_stats_collect_hook; #endif - stat_statements_parser_init(); - previous_ProcessUtility_hook = ProcessUtility_hook; - ProcessUtility_hook = gpsc_process_utility_hook; + stat_statements_parser_init(); + previous_ProcessUtility_hook = ProcessUtility_hook; + ProcessUtility_hook = gpsc_process_utility_hook; } -void hooks_deinit() { - ExecutorStart_hook = previous_ExecutorStart_hook; - ExecutorEnd_hook = previous_ExecutorEnd_hook; - ExecutorRun_hook = previous_ExecutorRun_hook; - ExecutorFinish_hook = previous_ExecutorFinish_hook; - query_info_collect_hook = previous_query_info_collect_hook; +void +hooks_deinit() +{ + ExecutorStart_hook = previous_ExecutorStart_hook; + ExecutorEnd_hook = previous_ExecutorEnd_hook; + ExecutorRun_hook = previous_ExecutorRun_hook; + ExecutorFinish_hook = previous_ExecutorFinish_hook; + query_info_collect_hook = previous_query_info_collect_hook; #ifdef IC_TEARDOWN_HOOK - ic_teardown_hook = previous_ic_teardown_hook; + ic_teardown_hook = previous_ic_teardown_hook; #endif #ifdef ANALYZE_STATS_COLLECT_HOOK - analyze_stats_collect_hook = previous_analyze_stats_collect_hook; + analyze_stats_collect_hook = previous_analyze_stats_collect_hook; #endif - stat_statements_parser_deinit(); - if (sender) { - delete sender; - } - GpscStat::deinit(); - ProcessUtility_hook = previous_ProcessUtility_hook; + stat_statements_parser_deinit(); + if (sender) + { + delete sender; + } + GpscStat::deinit(); + ProcessUtility_hook = previous_ProcessUtility_hook; } -void gpsc_ExecutorStart_hook(QueryDesc *query_desc, int eflags) { - cpp_call(get_sender(), &EventSender::executor_before_start, query_desc, - eflags); - if (previous_ExecutorStart_hook) { - (*previous_ExecutorStart_hook)(query_desc, eflags); - } else { - standard_ExecutorStart(query_desc, eflags); - } - cpp_call(get_sender(), &EventSender::executor_after_start, query_desc, - eflags); +void +gpsc_ExecutorStart_hook(QueryDesc *query_desc, int eflags) +{ + cpp_call(get_sender(), &EventSender::executor_before_start, query_desc, + eflags); + if (previous_ExecutorStart_hook) + { + (*previous_ExecutorStart_hook)(query_desc, eflags); + } + else + { + standard_ExecutorStart(query_desc, eflags); + } + cpp_call(get_sender(), &EventSender::executor_after_start, query_desc, + eflags); } -void gpsc_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, - uint64 count, bool execute_once) { - get_sender()->incr_depth(); - PG_TRY(); - { - if (previous_ExecutorRun_hook) - previous_ExecutorRun_hook(query_desc, direction, count, execute_once); - else - standard_ExecutorRun(query_desc, direction, count, execute_once); - get_sender()->decr_depth(); - } - PG_CATCH(); - { - get_sender()->decr_depth(); - PG_RE_THROW(); - } - PG_END_TRY(); +void +gpsc_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, + uint64 count, bool execute_once) +{ + get_sender()->incr_depth(); + PG_TRY(); + { + if (previous_ExecutorRun_hook) + previous_ExecutorRun_hook(query_desc, direction, count, + execute_once); + else + standard_ExecutorRun(query_desc, direction, count, execute_once); + get_sender()->decr_depth(); + } + PG_CATCH(); + { + get_sender()->decr_depth(); + PG_RE_THROW(); + } + PG_END_TRY(); } -void gpsc_ExecutorFinish_hook(QueryDesc *query_desc) { - get_sender()->incr_depth(); - PG_TRY(); - { - if (previous_ExecutorFinish_hook) - previous_ExecutorFinish_hook(query_desc); - else - standard_ExecutorFinish(query_desc); - get_sender()->decr_depth(); - } - PG_CATCH(); - { - get_sender()->decr_depth(); - PG_RE_THROW(); - } - PG_END_TRY(); +void +gpsc_ExecutorFinish_hook(QueryDesc *query_desc) +{ + get_sender()->incr_depth(); + PG_TRY(); + { + if (previous_ExecutorFinish_hook) + previous_ExecutorFinish_hook(query_desc); + else + standard_ExecutorFinish(query_desc); + get_sender()->decr_depth(); + } + PG_CATCH(); + { + get_sender()->decr_depth(); + PG_RE_THROW(); + } + PG_END_TRY(); } -void gpsc_ExecutorEnd_hook(QueryDesc *query_desc) { - cpp_call(get_sender(), &EventSender::executor_end, query_desc); - if (previous_ExecutorEnd_hook) { - (*previous_ExecutorEnd_hook)(query_desc); - } else { - standard_ExecutorEnd(query_desc); - } +void +gpsc_ExecutorEnd_hook(QueryDesc *query_desc) +{ + cpp_call(get_sender(), &EventSender::executor_end, query_desc); + if (previous_ExecutorEnd_hook) + { + (*previous_ExecutorEnd_hook)(query_desc); + } + else + { + standard_ExecutorEnd(query_desc); + } } -void gpsc_query_info_collect_hook(QueryMetricsStatus status, void *arg) { - cpp_call(get_sender(), &EventSender::query_metrics_collect, status, - arg /* queryDesc */, false /* utility */, (ErrorData *)NULL); - if (previous_query_info_collect_hook) { - (*previous_query_info_collect_hook)(status, arg); - } +void +gpsc_query_info_collect_hook(QueryMetricsStatus status, void *arg) +{ + cpp_call(get_sender(), &EventSender::query_metrics_collect, status, + arg /* queryDesc */, false /* utility */, (ErrorData *) NULL); + if (previous_query_info_collect_hook) + { + (*previous_query_info_collect_hook)(status, arg); + } } #ifdef IC_TEARDOWN_HOOK -void gpsc_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors) { - cpp_call(get_sender(), &EventSender::ic_metrics_collect); - if (previous_ic_teardown_hook) { - (*previous_ic_teardown_hook)(transportStates, hasErrors); - } +void +gpsc_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors) +{ + cpp_call(get_sender(), &EventSender::ic_metrics_collect); + if (previous_ic_teardown_hook) + { + (*previous_ic_teardown_hook)(transportStates, hasErrors); + } } #endif #ifdef ANALYZE_STATS_COLLECT_HOOK -void gpsc_analyze_stats_collect_hook(QueryDesc *query_desc) { - cpp_call(get_sender(), &EventSender::analyze_stats_collect, query_desc); - if (previous_analyze_stats_collect_hook) { - (*previous_analyze_stats_collect_hook)(query_desc); - } +void +gpsc_analyze_stats_collect_hook(QueryDesc *query_desc) +{ + cpp_call(get_sender(), &EventSender::analyze_stats_collect, query_desc); + if (previous_analyze_stats_collect_hook) + { + (*previous_analyze_stats_collect_hook)(query_desc); + } } #endif -static void gpsc_process_utility_hook(PlannedStmt *pstmt, const char *queryString, - bool readOnlyTree, - ProcessUtilityContext context, - ParamListInfo params, - QueryEnvironment *queryEnv, - DestReceiver *dest, QueryCompletion *qc) { - /* Project utility data on QueryDesc to use existing logic */ - QueryDesc *query_desc = (QueryDesc *)palloc0(sizeof(QueryDesc)); - query_desc->sourceText = queryString; - - cpp_call(get_sender(), &EventSender::query_metrics_collect, - METRICS_QUERY_SUBMIT, (void *)query_desc, true /* utility */, - (ErrorData *)NULL); - - get_sender()->incr_depth(); - PG_TRY(); - { - if (previous_ProcessUtility_hook) { - (*previous_ProcessUtility_hook)(pstmt, queryString, readOnlyTree, context, - params, queryEnv, dest, qc); - } else { - standard_ProcessUtility(pstmt, queryString, readOnlyTree, context, params, - queryEnv, dest, qc); - } - - get_sender()->decr_depth(); - cpp_call(get_sender(), &EventSender::query_metrics_collect, - METRICS_QUERY_DONE, (void *)query_desc, true /* utility */, - (ErrorData *)NULL); - - pfree(query_desc); - } - PG_CATCH(); - { - ErrorData *edata; - MemoryContext oldctx; - - oldctx = MemoryContextSwitchTo(TopMemoryContext); - edata = CopyErrorData(); - FlushErrorState(); - MemoryContextSwitchTo(oldctx); - - get_sender()->decr_depth(); - cpp_call(get_sender(), &EventSender::query_metrics_collect, - METRICS_QUERY_ERROR, (void *)query_desc, true /* utility */, - edata); - - pfree(query_desc); - ReThrowError(edata); - } - PG_END_TRY(); +static void +gpsc_process_utility_hook(PlannedStmt *pstmt, const char *queryString, + bool readOnlyTree, ProcessUtilityContext context, + ParamListInfo params, QueryEnvironment *queryEnv, + DestReceiver *dest, QueryCompletion *qc) +{ + /* Project utility data on QueryDesc to use existing logic */ + QueryDesc *query_desc = (QueryDesc *) palloc0(sizeof(QueryDesc)); + query_desc->sourceText = queryString; + + cpp_call(get_sender(), &EventSender::query_metrics_collect, + METRICS_QUERY_SUBMIT, (void *) query_desc, true /* utility */, + (ErrorData *) NULL); + + get_sender()->incr_depth(); + PG_TRY(); + { + if (previous_ProcessUtility_hook) + { + (*previous_ProcessUtility_hook)(pstmt, queryString, readOnlyTree, + context, params, queryEnv, dest, + qc); + } + else + { + standard_ProcessUtility(pstmt, queryString, readOnlyTree, context, + params, queryEnv, dest, qc); + } + + get_sender()->decr_depth(); + cpp_call(get_sender(), &EventSender::query_metrics_collect, + METRICS_QUERY_DONE, (void *) query_desc, true /* utility */, + (ErrorData *) NULL); + + pfree(query_desc); + } + PG_CATCH(); + { + ErrorData *edata; + MemoryContext oldctx; + + oldctx = MemoryContextSwitchTo(TopMemoryContext); + edata = CopyErrorData(); + FlushErrorState(); + MemoryContextSwitchTo(oldctx); + + get_sender()->decr_depth(); + cpp_call(get_sender(), &EventSender::query_metrics_collect, + METRICS_QUERY_ERROR, (void *) query_desc, true /* utility */, + edata); + + pfree(query_desc); + ReThrowError(edata); + } + PG_END_TRY(); } -static void check_stats_loaded() { - if (!GpscStat::loaded()) { - ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("gp_stats_collector must be loaded via " - "shared_preload_libraries"))); - } +static void +check_stats_loaded() +{ + if (!GpscStat::loaded()) + { + ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("gp_stats_collector must be loaded via " + "shared_preload_libraries"))); + } } -void gpsc_functions_reset() { - check_stats_loaded(); - GpscStat::reset(); +void +gpsc_functions_reset() +{ + check_stats_loaded(); + GpscStat::reset(); } -Datum gpsc_functions_get(FunctionCallInfo fcinfo) { - const int ATTNUM = 6; - check_stats_loaded(); - auto stats = GpscStat::get_stats(); - TupleDesc tupdesc = CreateTemplateTupleDesc(ATTNUM); - TupleDescInitEntry(tupdesc, (AttrNumber)1, "segid", INT4OID, -1 /* typmod */, - 0 /* attdim */); - TupleDescInitEntry(tupdesc, (AttrNumber)2, "total_messages", INT8OID, - -1 /* typmod */, 0 /* attdim */); - TupleDescInitEntry(tupdesc, (AttrNumber)3, "send_failures", INT8OID, - -1 /* typmod */, 0 /* attdim */); - TupleDescInitEntry(tupdesc, (AttrNumber)4, "connection_failures", INT8OID, - -1 /* typmod */, 0 /* attdim */); - TupleDescInitEntry(tupdesc, (AttrNumber)5, "other_errors", INT8OID, - -1 /* typmod */, 0 /* attdim */); - TupleDescInitEntry(tupdesc, (AttrNumber)6, "max_message_size", INT4OID, - -1 /* typmod */, 0 /* attdim */); - tupdesc = BlessTupleDesc(tupdesc); - Datum values[ATTNUM]; - bool nulls[ATTNUM]; - MemSet(nulls, 0, sizeof(nulls)); - values[0] = Int32GetDatum(GpIdentity.segindex); - values[1] = Int64GetDatum(stats.total); - values[2] = Int64GetDatum(stats.failed_sends); - values[3] = Int64GetDatum(stats.failed_connects); - values[4] = Int64GetDatum(stats.failed_other); - values[5] = Int32GetDatum(stats.max_message_size); - HeapTuple tuple = gpdb::heap_form_tuple(tupdesc, values, nulls); - Datum result = HeapTupleGetDatum(tuple); - PG_RETURN_DATUM(result); +Datum +gpsc_functions_get(FunctionCallInfo fcinfo) +{ + const int ATTNUM = 6; + check_stats_loaded(); + auto stats = GpscStat::get_stats(); + TupleDesc tupdesc = CreateTemplateTupleDesc(ATTNUM); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segid", INT4OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "total_messages", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "send_failures", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "connection_failures", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "other_errors", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "max_message_size", INT4OID, + -1 /* typmod */, 0 /* attdim */); + tupdesc = BlessTupleDesc(tupdesc); + Datum values[ATTNUM]; + bool nulls[ATTNUM]; + MemSet(nulls, 0, sizeof(nulls)); + values[0] = Int32GetDatum(GpIdentity.segindex); + values[1] = Int64GetDatum(stats.total); + values[2] = Int64GetDatum(stats.failed_sends); + values[3] = Int64GetDatum(stats.failed_connects); + values[4] = Int64GetDatum(stats.failed_other); + values[5] = Int32GetDatum(stats.max_message_size); + HeapTuple tuple = gpdb::heap_form_tuple(tupdesc, values, nulls); + Datum result = HeapTupleGetDatum(tuple); + PG_RETURN_DATUM(result); } -void test_uds_stop_server() { - if (test_server_fd >= 0) { - close(test_server_fd); - test_server_fd = -1; - } - if (test_sock_path) { - unlink(test_sock_path); - pfree(test_sock_path); - test_sock_path = NULL; - } +void +test_uds_stop_server() +{ + if (test_server_fd >= 0) + { + close(test_server_fd); + test_server_fd = -1; + } + if (test_sock_path) + { + unlink(test_sock_path); + pfree(test_sock_path); + test_sock_path = NULL; + } } -void test_uds_start_server(const char *path) { - struct sockaddr_un addr = {.sun_family = AF_UNIX}; +void +test_uds_start_server(const char *path) +{ + struct sockaddr_un addr = {.sun_family = AF_UNIX}; - if (strlen(path) >= sizeof(addr.sun_path)) - ereport(ERROR, (errmsg("path too long"))); + if (strlen(path) >= sizeof(addr.sun_path)) + ereport(ERROR, (errmsg("path too long"))); - test_uds_stop_server(); + test_uds_stop_server(); - strlcpy(addr.sun_path, path, sizeof(addr.sun_path)); - test_sock_path = MemoryContextStrdup(TopMemoryContext, path); - unlink(path); + strlcpy(addr.sun_path, path, sizeof(addr.sun_path)); + test_sock_path = MemoryContextStrdup(TopMemoryContext, path); + unlink(path); - if ((test_server_fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0 || - bind(test_server_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0 || - listen(test_server_fd, TEST_MAX_CONNECTIONS) < 0) { - test_uds_stop_server(); - ereport(ERROR, (errmsg("socket setup failed: %m"))); - } + if ((test_server_fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0 || + bind(test_server_fd, (struct sockaddr *) &addr, sizeof(addr)) < 0 || + listen(test_server_fd, TEST_MAX_CONNECTIONS) < 0) + { + test_uds_stop_server(); + ereport(ERROR, (errmsg("socket setup failed: %m"))); + } } -int64 test_uds_receive(int timeout_ms) { - char buf[TEST_RCV_BUF_SIZE]; - int rc; - struct pollfd pfd = {.fd = test_server_fd, .events = POLLIN}; - int64 total = 0; - - if (test_server_fd < 0) - ereport(ERROR, (errmsg("server not started"))); - - for (;;) { - CHECK_FOR_INTERRUPTS(); - rc = poll(&pfd, 1, Min(timeout_ms, TEST_POLL_TIMEOUT_MS)); - if (rc > 0) - break; - if (rc < 0 && errno != EINTR) - ereport(ERROR, (errmsg("poll: %m"))); - timeout_ms -= TEST_POLL_TIMEOUT_MS; - if (timeout_ms <= 0) - return total; - } - - if (pfd.revents & POLLIN) { - int client = accept(test_server_fd, NULL, NULL); - ssize_t n; - - if (client < 0) - ereport(ERROR, (errmsg("accept: %m"))); - - while ((n = recv(client, buf, sizeof(buf), 0)) != 0) { - if (n > 0) - total += n; - else if (errno != EINTR) - break; - } - - close(client); - } - - return total; +int64 +test_uds_receive(int timeout_ms) +{ + char buf[TEST_RCV_BUF_SIZE]; + int rc; + struct pollfd pfd = {.fd = test_server_fd, .events = POLLIN}; + int64 total = 0; + + if (test_server_fd < 0) + ereport(ERROR, (errmsg("server not started"))); + + for (;;) + { + CHECK_FOR_INTERRUPTS(); + rc = poll(&pfd, 1, Min(timeout_ms, TEST_POLL_TIMEOUT_MS)); + if (rc > 0) + break; + if (rc < 0 && errno != EINTR) + ereport(ERROR, (errmsg("poll: %m"))); + timeout_ms -= TEST_POLL_TIMEOUT_MS; + if (timeout_ms <= 0) + return total; + } + + if (pfd.revents & POLLIN) + { + int client = accept(test_server_fd, NULL, NULL); + ssize_t n; + + if (client < 0) + ereport(ERROR, (errmsg("accept: %m"))); + + while ((n = recv(client, buf, sizeof(buf), 0)) != 0) + { + if (n > 0) + total += n; + else if (errno != EINTR) + break; + } + + close(client); + } + + return total; } \ No newline at end of file diff --git a/gpcontrib/gp_stats_collector/src/hook_wrappers.h b/gpcontrib/gp_stats_collector/src/hook_wrappers.h index 06c8d064404..a04f5a95144 100644 --- a/gpcontrib/gp_stats_collector/src/hook_wrappers.h +++ b/gpcontrib/gp_stats_collector/src/hook_wrappers.h @@ -25,7 +25,8 @@ *------------------------------------------------------------------------- */ -#pragma once +#ifndef HOOK_WRAPPERS_H +#define HOOK_WRAPPERS_H #ifdef __cplusplus extern "C" { @@ -45,4 +46,5 @@ extern void test_uds_stop_server(); #ifdef __cplusplus } -#endif \ No newline at end of file +#endif +#endif /* HOOK_WRAPPERS_H */ diff --git a/gpcontrib/gp_stats_collector/src/log/LogOps.cpp b/gpcontrib/gp_stats_collector/src/log/LogOps.cpp index ef4f39c0749..865e0f6ce3f 100644 --- a/gpcontrib/gp_stats_collector/src/log/LogOps.cpp +++ b/gpcontrib/gp_stats_collector/src/log/LogOps.cpp @@ -43,8 +43,8 @@ extern "C" { #include "catalog/pg_type.h" #include "cdb/cdbvars.h" #include "commands/tablecmds.h" -#include "funcapi.h" #include "fmgr.h" +#include "funcapi.h" #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/rel.h" @@ -52,107 +52,122 @@ extern "C" { #include "utils/timestamp.h" } -void init_log() { - Oid namespaceId; - Oid relationId; - ObjectAddress tableAddr; - ObjectAddress schemaAddr; - - namespaceId = get_namespace_oid(schema_name.data(), false /* missing_ok */); - - /* Create table */ - relationId = heap_create_with_catalog( - log_relname.data() /* relname */, namespaceId /* namespace */, - 0 /* tablespace */, InvalidOid /* relid */, InvalidOid /* reltype oid */, - InvalidOid /* reloftypeid */, GetUserId() /* owner */, HEAP_TABLE_AM_OID, - DescribeTuple() /* rel tuple */, NIL /* cooked_constraints */, RELKIND_RELATION, - RELPERSISTENCE_PERMANENT, false /* shared_relation */, false /* mapped_relation */, ONCOMMIT_NOOP, - NULL /* GP Policy */, (Datum)0 /* reloptions */, false /* use_user_acl */, true /* allow_system_table_mods */, true /* is_internal */, - InvalidOid /* relrewrite */, NULL /* typaddress */, - false /* valid_opts */); - - /* Make the table visible */ - CommandCounterIncrement(); - - /* Record dependency of the table on the schema */ - if (OidIsValid(relationId) && OidIsValid(namespaceId)) { - ObjectAddressSet(tableAddr, RelationRelationId, relationId); - ObjectAddressSet(schemaAddr, NamespaceRelationId, namespaceId); - - /* Table can be dropped only via DROP EXTENSION */ - recordDependencyOn(&tableAddr, &schemaAddr, DEPENDENCY_EXTENSION); - } else { - ereport(NOTICE, (errmsg("GPSC failed to create log table or schema"))); - } - - /* Make changes visible */ - CommandCounterIncrement(); +void +init_log() +{ + Oid namespaceId; + Oid relationId; + ObjectAddress tableAddr; + ObjectAddress schemaAddr; + + namespaceId = get_namespace_oid(schema_name.data(), false /* missing_ok */); + + /* Create table */ + relationId = heap_create_with_catalog( + log_relname.data() /* relname */, namespaceId /* namespace */, + 0 /* tablespace */, InvalidOid /* relid */, + InvalidOid /* reltype oid */, InvalidOid /* reloftypeid */, + GetUserId() /* owner */, HEAP_TABLE_AM_OID, + DescribeTuple() /* rel tuple */, NIL /* cooked_constraints */, + RELKIND_RELATION, RELPERSISTENCE_PERMANENT, false /* shared_relation */, + false /* mapped_relation */, ONCOMMIT_NOOP, NULL /* GP Policy */, + (Datum) 0 /* reloptions */, false /* use_user_acl */, + true /* allow_system_table_mods */, true /* is_internal */, + InvalidOid /* relrewrite */, NULL /* typaddress */, + false /* valid_opts */); + + /* Make the table visible */ + CommandCounterIncrement(); + + /* Record dependency of the table on the schema */ + if (OidIsValid(relationId) && OidIsValid(namespaceId)) + { + ObjectAddressSet(tableAddr, RelationRelationId, relationId); + ObjectAddressSet(schemaAddr, NamespaceRelationId, namespaceId); + + /* Table can be dropped only via DROP EXTENSION */ + recordDependencyOn(&tableAddr, &schemaAddr, DEPENDENCY_EXTENSION); + } + else + { + ereport(NOTICE, (errmsg("GPSC failed to create log table or schema"))); + } + + /* Make changes visible */ + CommandCounterIncrement(); } -void insert_log(const gpsc::SetQueryReq &req, bool utility) { - Oid namespaceId; - Oid relationId; - Relation rel; - HeapTuple tuple; - - /* Return if xact is not valid (needed for catalog lookups). */ - if (!IsTransactionState()) { - return; - } - - /* Return if extension was not loaded */ - namespaceId = get_namespace_oid(schema_name.data(), true /* missing_ok */); - if (!OidIsValid(namespaceId)) { - return; - } - - /* Return if the table was not created yet */ - relationId = get_relname_relid(log_relname.data(), namespaceId); - if (!OidIsValid(relationId)) { - return; - } - - bool nulls[natts_gpsc_log]; - Datum values[natts_gpsc_log]; - - memset(nulls, true, sizeof(nulls)); - memset(values, 0, sizeof(values)); - - extract_query_req(req, "", values, nulls); - nulls[attnum_gpsc_log_utility] = false; - values[attnum_gpsc_log_utility] = BoolGetDatum(utility); - - rel = heap_open(relationId, RowExclusiveLock); - - /* Insert the tuple as a frozen one to ensure it is logged even if txn rolls +void +insert_log(const gpsc::SetQueryReq &req, bool utility) +{ + Oid namespaceId; + Oid relationId; + Relation rel; + HeapTuple tuple; + + /* Return if xact is not valid (needed for catalog lookups). */ + if (!IsTransactionState()) + { + return; + } + + /* Return if extension was not loaded */ + namespaceId = get_namespace_oid(schema_name.data(), true /* missing_ok */); + if (!OidIsValid(namespaceId)) + { + return; + } + + /* Return if the table was not created yet */ + relationId = get_relname_relid(log_relname.data(), namespaceId); + if (!OidIsValid(relationId)) + { + return; + } + + bool nulls[natts_gpsc_log]; + Datum values[natts_gpsc_log]; + + memset(nulls, true, sizeof(nulls)); + memset(values, 0, sizeof(values)); + + extract_query_req(req, "", values, nulls); + nulls[attnum_gpsc_log_utility] = false; + values[attnum_gpsc_log_utility] = BoolGetDatum(utility); + + rel = heap_open(relationId, RowExclusiveLock); + + /* Insert the tuple as a frozen one to ensure it is logged even if txn rolls * back or aborts */ - tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls); - frozen_heap_insert(rel, tuple); + tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls); + frozen_heap_insert(rel, tuple); - heap_freetuple(tuple); - /* Keep lock on rel until end of xact */ - heap_close(rel, NoLock); + heap_freetuple(tuple); + /* Keep lock on rel until end of xact */ + heap_close(rel, NoLock); - /* Make changes visible */ - CommandCounterIncrement(); + /* Make changes visible */ + CommandCounterIncrement(); } -void truncate_log() { - Oid namespaceId; - Oid relationId; - Relation relation; +void +truncate_log() +{ + Oid namespaceId; + Oid relationId; + Relation relation; - namespaceId = get_namespace_oid(schema_name.data(), false /* missing_ok */); - relationId = get_relname_relid(log_relname.data(), namespaceId); + namespaceId = get_namespace_oid(schema_name.data(), false /* missing_ok */); + relationId = get_relname_relid(log_relname.data(), namespaceId); - relation = heap_open(relationId, AccessExclusiveLock); + relation = heap_open(relationId, AccessExclusiveLock); - /* Truncate the main table */ - heap_truncate_one_rel(relation); + /* Truncate the main table */ + heap_truncate_one_rel(relation); - /* Keep lock on rel until end of xact */ - heap_close(relation, NoLock); + /* Keep lock on rel until end of xact */ + heap_close(relation, NoLock); - /* Make changes visible */ - CommandCounterIncrement(); + /* Make changes visible */ + CommandCounterIncrement(); } \ No newline at end of file diff --git a/gpcontrib/gp_stats_collector/src/log/LogOps.h b/gpcontrib/gp_stats_collector/src/log/LogOps.h index f784270bb8f..45d79cd4560 100644 --- a/gpcontrib/gp_stats_collector/src/log/LogOps.h +++ b/gpcontrib/gp_stats_collector/src/log/LogOps.h @@ -25,7 +25,8 @@ *------------------------------------------------------------------------- */ -#pragma once +#ifndef LOGOPS_H +#define LOGOPS_H #include @@ -44,3 +45,5 @@ void truncate_log(); /* INSERT INTO gpsc.__log VALUES (...) */ void insert_log(const gpsc::SetQueryReq &req, bool utility); + +#endif /* LOGOPS_H */ diff --git a/gpcontrib/gp_stats_collector/src/log/LogSchema.cpp b/gpcontrib/gp_stats_collector/src/log/LogSchema.cpp index f9f43fac2fd..254b1b04af4 100644 --- a/gpcontrib/gp_stats_collector/src/log/LogSchema.cpp +++ b/gpcontrib/gp_stats_collector/src/log/LogSchema.cpp @@ -25,138 +25,165 @@ *------------------------------------------------------------------------- */ -#include "google/protobuf/reflection.h" #include "google/protobuf/descriptor.h" +#include "google/protobuf/reflection.h" #include "google/protobuf/timestamp.pb.h" #include "LogSchema.h" -const std::unordered_map &proto_name_to_col_idx() { - static const auto name_col_idx = [] { - std::unordered_map map; - map.reserve(log_tbl_desc.size()); - - for (size_t idx = 0; idx < natts_gpsc_log; ++idx) { - map.emplace(log_tbl_desc[idx].proto_field_name, idx); - } - - return map; - }(); - return name_col_idx; +const std::unordered_map & +proto_name_to_col_idx() +{ + static const auto name_col_idx = [] { + std::unordered_map map; + map.reserve(log_tbl_desc.size()); + + for (size_t idx = 0; idx < natts_gpsc_log; ++idx) + { + map.emplace(log_tbl_desc[idx].proto_field_name, idx); + } + + return map; + }(); + return name_col_idx; } -TupleDesc DescribeTuple() { - TupleDesc tupdesc = CreateTemplateTupleDesc(natts_gpsc_log); +TupleDesc +DescribeTuple() +{ + TupleDesc tupdesc = CreateTemplateTupleDesc(natts_gpsc_log); - for (size_t anum = 1; anum <= natts_gpsc_log; ++anum) { - TupleDescInitEntry(tupdesc, anum, log_tbl_desc[anum - 1].pg_att_name.data(), - log_tbl_desc[anum - 1].type_oid, -1 /* typmod */, - 0 /* attdim */); - } + for (size_t anum = 1; anum <= natts_gpsc_log; ++anum) + { + TupleDescInitEntry( + tupdesc, anum, log_tbl_desc[anum - 1].pg_att_name.data(), + log_tbl_desc[anum - 1].type_oid, -1 /* typmod */, 0 /* attdim */); + } - return tupdesc; + return tupdesc; } -Datum protots_to_timestamptz(const google::protobuf::Timestamp &ts) { - TimestampTz pgtimestamp = - (TimestampTz)ts.seconds() * USECS_PER_SEC + (ts.nanos() / 1000); - pgtimestamp -= (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * USECS_PER_DAY; - return TimestampTzGetDatum(pgtimestamp); +Datum +protots_to_timestamptz(const google::protobuf::Timestamp &ts) +{ + TimestampTz pgtimestamp = + (TimestampTz) ts.seconds() * USECS_PER_SEC + (ts.nanos() / 1000); + pgtimestamp -= (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * USECS_PER_DAY; + return TimestampTzGetDatum(pgtimestamp); } -Datum field_to_datum(const google::protobuf::FieldDescriptor *field, - const google::protobuf::Reflection *reflection, - const google::protobuf::Message &msg) { - using namespace google::protobuf; - - switch (field->cpp_type()) { - case FieldDescriptor::CPPTYPE_INT32: - return Int32GetDatum(reflection->GetInt32(msg, field)); - case FieldDescriptor::CPPTYPE_INT64: - return Int64GetDatum(reflection->GetInt64(msg, field)); - case FieldDescriptor::CPPTYPE_UINT32: - return Int64GetDatum(reflection->GetUInt32(msg, field)); - case FieldDescriptor::CPPTYPE_UINT64: - return Int64GetDatum( - static_cast(reflection->GetUInt64(msg, field))); - case FieldDescriptor::CPPTYPE_DOUBLE: - return Float8GetDatum(reflection->GetDouble(msg, field)); - case FieldDescriptor::CPPTYPE_FLOAT: - return Float4GetDatum(reflection->GetFloat(msg, field)); - case FieldDescriptor::CPPTYPE_BOOL: - return BoolGetDatum(reflection->GetBool(msg, field)); - case FieldDescriptor::CPPTYPE_ENUM: - return CStringGetTextDatum(reflection->GetEnum(msg, field)->name().data()); - case FieldDescriptor::CPPTYPE_STRING: - return CStringGetTextDatum(reflection->GetString(msg, field).c_str()); - default: - return (Datum)0; - } +Datum +field_to_datum(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg) +{ + using namespace google::protobuf; + + switch (field->cpp_type()) + { + case FieldDescriptor::CPPTYPE_INT32: + return Int32GetDatum(reflection->GetInt32(msg, field)); + case FieldDescriptor::CPPTYPE_INT64: + return Int64GetDatum(reflection->GetInt64(msg, field)); + case FieldDescriptor::CPPTYPE_UINT32: + return Int64GetDatum(reflection->GetUInt32(msg, field)); + case FieldDescriptor::CPPTYPE_UINT64: + return Int64GetDatum( + static_cast(reflection->GetUInt64(msg, field))); + case FieldDescriptor::CPPTYPE_DOUBLE: + return Float8GetDatum(reflection->GetDouble(msg, field)); + case FieldDescriptor::CPPTYPE_FLOAT: + return Float4GetDatum(reflection->GetFloat(msg, field)); + case FieldDescriptor::CPPTYPE_BOOL: + return BoolGetDatum(reflection->GetBool(msg, field)); + case FieldDescriptor::CPPTYPE_ENUM: + return CStringGetTextDatum( + reflection->GetEnum(msg, field)->name().data()); + case FieldDescriptor::CPPTYPE_STRING: + return CStringGetTextDatum( + reflection->GetString(msg, field).c_str()); + default: + return (Datum) 0; + } } -void process_field(const google::protobuf::FieldDescriptor *field, - const google::protobuf::Reflection *reflection, - const google::protobuf::Message &msg, - const std::string &field_name, Datum *values, bool *nulls) { - - auto proto_idx_map = proto_name_to_col_idx(); - auto it = proto_idx_map.find(field_name); - - if (it == proto_idx_map.end()) { - ereport(NOTICE, - (errmsg("GPSC protobuf field %s is not registered in log table", - field_name.c_str()))); - return; - } - - int idx = it->second; - - if (!reflection->HasField(msg, field)) { - nulls[idx] = true; - return; - } - - if (field->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE && - field->message_type()->full_name() == "google.protobuf.Timestamp") { - const auto &ts = static_cast( - reflection->GetMessage(msg, field)); - values[idx] = protots_to_timestamptz(ts); - } else { - values[idx] = field_to_datum(field, reflection, msg); - } - nulls[idx] = false; - - return; +void +process_field(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg, + const std::string &field_name, Datum *values, bool *nulls) +{ + auto proto_idx_map = proto_name_to_col_idx(); + auto it = proto_idx_map.find(field_name); + + if (it == proto_idx_map.end()) + { + ereport(NOTICE, + (errmsg("GPSC protobuf field %s is not registered in log table", + field_name.c_str()))); + return; + } + + int idx = it->second; + + if (!reflection->HasField(msg, field)) + { + nulls[idx] = true; + return; + } + + if (field->cpp_type() == + google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE && + field->message_type()->full_name() == "google.protobuf.Timestamp") + { + const auto &ts = static_cast( + reflection->GetMessage(msg, field)); + values[idx] = protots_to_timestamptz(ts); + } + else + { + values[idx] = field_to_datum(field, reflection, msg); + } + nulls[idx] = false; + + return; } -void extract_query_req(const google::protobuf::Message &msg, - const std::string &prefix, Datum *values, bool *nulls) { - using namespace google::protobuf; - - const Descriptor *descriptor = msg.GetDescriptor(); - const Reflection *reflection = msg.GetReflection(); - - for (int i = 0; i < descriptor->field_count(); ++i) { - const FieldDescriptor *field = descriptor->field(i); - - // For now, we do not log any repeated fields plus they need special - // treatment. - if (field->is_repeated()) { - continue; - } - - std::string curr_pref = prefix.empty() ? "" : prefix + "."; - std::string field_name = curr_pref + field->name().data(); - - if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE && - field->message_type()->full_name() != "google.protobuf.Timestamp") { - - if (reflection->HasField(msg, field)) { - const Message &nested = reflection->GetMessage(msg, field); - extract_query_req(nested, field_name, values, nulls); - } - } else { - process_field(field, reflection, msg, field_name, values, nulls); - } - } +void +extract_query_req(const google::protobuf::Message &msg, + const std::string &prefix, Datum *values, bool *nulls) +{ + using namespace google::protobuf; + + const Descriptor *descriptor = msg.GetDescriptor(); + const Reflection *reflection = msg.GetReflection(); + + for (int i = 0; i < descriptor->field_count(); ++i) + { + const FieldDescriptor *field = descriptor->field(i); + + // For now, we do not log any repeated fields plus they need special + // treatment. + if (field->is_repeated()) + { + continue; + } + + std::string curr_pref = prefix.empty() ? "" : prefix + "."; + std::string field_name = curr_pref + field->name().data(); + + if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE && + field->message_type()->full_name() != "google.protobuf.Timestamp") + { + if (reflection->HasField(msg, field)) + { + const Message &nested = reflection->GetMessage(msg, field); + extract_query_req(nested, field_name, values, nulls); + } + } + else + { + process_field(field, reflection, msg, field_name, values, nulls); + } + } } diff --git a/gpcontrib/gp_stats_collector/src/log/LogSchema.h b/gpcontrib/gp_stats_collector/src/log/LogSchema.h index 8754741823a..f6c2247370a 100644 --- a/gpcontrib/gp_stats_collector/src/log/LogSchema.h +++ b/gpcontrib/gp_stats_collector/src/log/LogSchema.h @@ -25,7 +25,8 @@ *------------------------------------------------------------------------- */ -#pragma once +#ifndef LOGSCHEMA_H +#define LOGSCHEMA_H #include #include @@ -37,26 +38,29 @@ extern "C" { #include "access/htup_details.h" #include "access/tupdesc.h" #include "catalog/pg_type.h" -#include "utils/timestamp.h" #include "utils/builtins.h" +#include "utils/timestamp.h" } -namespace google { -namespace protobuf { +namespace google +{ +namespace protobuf +{ class FieldDescriptor; class Message; class Reflection; class Timestamp; -} // namespace protobuf -} // namespace google +} // namespace protobuf +} // namespace google inline constexpr std::string_view schema_name = "gpsc"; inline constexpr std::string_view log_relname = "__log"; -struct LogDesc { - std::string_view pg_att_name; - std::string_view proto_field_name; - Oid type_oid; +struct LogDesc +{ + std::string_view pg_att_name; + std::string_view proto_field_name; + Oid type_oid; }; /* @@ -175,14 +179,14 @@ TupleDesc DescribeTuple(); Datum protots_to_timestamptz(const google::protobuf::Timestamp &ts); Datum field_to_datum(const google::protobuf::FieldDescriptor *field, - const google::protobuf::Reflection *reflection, - const google::protobuf::Message &msg); + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg); /* Process a single proto field and store in values/nulls arrays */ void process_field(const google::protobuf::FieldDescriptor *field, - const google::protobuf::Reflection *reflection, - const google::protobuf::Message &msg, - const std::string &field_name, Datum *values, bool *nulls); + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg, + const std::string &field_name, Datum *values, bool *nulls); /* * Extracts values from msg into values/nulls arrays. Caller must @@ -190,4 +194,6 @@ void process_field(const google::protobuf::FieldDescriptor *field, * to true for nested messages if parent message is missing). */ void extract_query_req(const google::protobuf::Message &msg, - const std::string &prefix, Datum *values, bool *nulls); + const std::string &prefix, Datum *values, bool *nulls); + +#endif /* LOGSCHEMA_H */ diff --git a/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.cpp b/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.cpp index 4e3f6dae99f..de54a716016 100644 --- a/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.cpp +++ b/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.cpp @@ -30,223 +30,287 @@ extern "C" { #include "postgres.h" -#include "utils/guc.h" +#include "access/htup.h" +#include "access/tupdesc.h" +#include "cdb/cdbexplain.h" #include "commands/dbcommands.h" -#include "commands/resgroupcmds.h" -#include "utils/builtins.h" -#include "utils/varlena.h" -#include "nodes/pg_list.h" #include "commands/explain.h" +#include "commands/resgroupcmds.h" #include "executor/instrument.h" -#include "access/tupdesc.h" -#include "access/htup.h" +#include "nodes/pg_list.h" +#include "stat_statements_parser/pg_stat_statements_parser.h" +#include "utils/builtins.h" #include "utils/elog.h" -#include "cdb/cdbexplain.h" -#include "stat_statements_parser/pg_stat_statements_ya_parser.h" +#include "utils/guc.h" +#include "utils/varlena.h" } -namespace { +namespace +{ template -auto wrap(Func &&func, Args &&...args) noexcept(!Throws) - -> decltype(func(std::forward(args)...)) { - - using RetType = decltype(func(std::forward(args)...)); - - // Empty struct for void return type. - struct VoidResult {}; - using ResultHolder = std::conditional_t, VoidResult, - std::optional>; - - bool success; - ErrorData *edata; - ResultHolder result_holder; - - PG_TRY(); - { - if constexpr (!std::is_void_v) { - result_holder.emplace(func(std::forward(args)...)); - } else { - func(std::forward(args)...); - } - edata = NULL; - success = true; - } - PG_CATCH(); - { - MemoryContext oldctx = MemoryContextSwitchTo(TopMemoryContext); - edata = CopyErrorData(); - MemoryContextSwitchTo(oldctx); - FlushErrorState(); - success = false; - } - PG_END_TRY(); - - if (!success) { - std::string err; - if (edata && edata->message) { - err = std::string(edata->message); - } else { - err = "Unknown error occurred"; - } - - if (edata) { - FreeErrorData(edata); - } - - if constexpr (Throws) { - throw std::runtime_error(err); - } - - if constexpr (!std::is_void_v) { - return RetType{}; - } else { - return; - } - } - - if constexpr (!std::is_void_v) { - return *std::move(result_holder); - } else { - return; - } +auto +wrap(Func &&func, Args &&...args) noexcept(!Throws) + -> decltype(func(std::forward(args)...)) +{ + using RetType = decltype(func(std::forward(args)...)); + + // Empty struct for void return type. + struct VoidResult + { + }; + using ResultHolder = std::conditional_t, VoidResult, + std::optional>; + + bool success; + ErrorData *edata; + ResultHolder result_holder; + + PG_TRY(); + { + if constexpr (!std::is_void_v) + { + result_holder.emplace(func(std::forward(args)...)); + } + else + { + func(std::forward(args)...); + } + edata = NULL; + success = true; + } + PG_CATCH(); + { + MemoryContext oldctx = MemoryContextSwitchTo(TopMemoryContext); + edata = CopyErrorData(); + MemoryContextSwitchTo(oldctx); + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) + { + std::string err; + if (edata && edata->message) + { + err = std::string(edata->message); + } + else + { + err = "Unknown error occurred"; + } + + if (edata) + { + FreeErrorData(edata); + } + + if constexpr (Throws) + { + throw std::runtime_error(err); + } + + if constexpr (!std::is_void_v) + { + return RetType{}; + } + else + { + return; + } + } + + if constexpr (!std::is_void_v) + { + return *std::move(result_holder); + } + else + { + return; + } } template -auto wrap_throw(Func &&func, Args &&...args) - -> decltype(func(std::forward(args)...)) { - return wrap(std::forward(func), std::forward(args)...); +auto +wrap_throw(Func &&func, Args &&...args) + -> decltype(func(std::forward(args)...)) +{ + return wrap(std::forward(func), std::forward(args)...); } template -auto wrap_noexcept(Func &&func, Args &&...args) noexcept - -> decltype(func(std::forward(args)...)) { - return wrap(std::forward(func), std::forward(args)...); +auto +wrap_noexcept(Func &&func, Args &&...args) noexcept + -> decltype(func(std::forward(args)...)) +{ + return wrap(std::forward(func), std::forward(args)...); } -} // namespace +} // namespace -void *gpdb::palloc(Size size) { return wrap_throw(::palloc, size); } +void * +gpdb::palloc(Size size) +{ + return wrap_throw(::palloc, size); +} -void *gpdb::palloc0(Size size) { return wrap_throw(::palloc0, size); } +void * +gpdb::palloc0(Size size) +{ + return wrap_throw(::palloc0, size); +} -char *gpdb::pstrdup(const char *str) { return wrap_throw(::pstrdup, str); } +char * +gpdb::pstrdup(const char *str) +{ + return wrap_throw(::pstrdup, str); +} -char *gpdb::get_database_name(Oid dbid) noexcept { - return wrap_noexcept(::get_database_name, dbid); +char * +gpdb::get_database_name(Oid dbid) noexcept +{ + return wrap_noexcept(::get_database_name, dbid); } -bool gpdb::split_identifier_string(char *rawstring, char separator, - List **namelist) noexcept { - return wrap_noexcept(SplitIdentifierString, rawstring, separator, namelist); +bool +gpdb::split_identifier_string(char *rawstring, char separator, + List **namelist) noexcept +{ + return wrap_noexcept(SplitIdentifierString, rawstring, separator, namelist); } -ExplainState gpdb::get_explain_state(QueryDesc *query_desc, - bool costs) noexcept { - return wrap_noexcept([&]() { - ExplainState *es = NewExplainState(); - es->costs = costs; - es->verbose = true; - es->format = EXPLAIN_FORMAT_TEXT; - ExplainBeginOutput(es); - ExplainPrintPlan(es, query_desc); - ExplainEndOutput(es); - return *es; - }); +ExplainState +gpdb::get_explain_state(QueryDesc *query_desc, bool costs) noexcept +{ + return wrap_noexcept([&]() { + ExplainState *es = NewExplainState(); + es->costs = costs; + es->verbose = true; + es->format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(es); + ExplainPrintPlan(es, query_desc); + ExplainEndOutput(es); + return *es; + }); } -ExplainState gpdb::get_analyze_state(QueryDesc *query_desc, - bool analyze) noexcept { - return wrap_noexcept([&]() { - ExplainState *es = NewExplainState(); - es->analyze = analyze; - es->verbose = true; - es->buffers = es->analyze; - es->timing = es->analyze; - es->summary = es->analyze; - es->format = EXPLAIN_FORMAT_TEXT; - ExplainBeginOutput(es); - if (analyze) { - ExplainPrintPlan(es, query_desc); - ExplainPrintExecStatsEnd(es, query_desc); - } - ExplainEndOutput(es); - return *es; - }); +ExplainState +gpdb::get_analyze_state(QueryDesc *query_desc, bool analyze) noexcept +{ + return wrap_noexcept([&]() { + ExplainState *es = NewExplainState(); + es->analyze = analyze; + es->verbose = true; + es->buffers = es->analyze; + es->timing = es->analyze; + es->summary = es->analyze; + es->format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(es); + if (analyze) + { + ExplainPrintPlan(es, query_desc); + ExplainPrintExecStatsEnd(es, query_desc); + } + ExplainEndOutput(es); + return *es; + }); } -Instrumentation *gpdb::instr_alloc(size_t n, int instrument_options, - bool async_mode) { - return wrap_throw(InstrAlloc, n, instrument_options, async_mode); +Instrumentation * +gpdb::instr_alloc(size_t n, int instrument_options, bool async_mode) +{ + return wrap_throw(InstrAlloc, n, instrument_options, async_mode); } -HeapTuple gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, - bool *isnull) { - if (!tupleDescriptor || !values || !isnull) - throw std::runtime_error( - "Invalid input parameters for heap tuple formation"); +HeapTuple +gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull) +{ + if (!tupleDescriptor || !values || !isnull) + throw std::runtime_error( + "Invalid input parameters for heap tuple formation"); - return wrap_throw(::heap_form_tuple, tupleDescriptor, values, isnull); + return wrap_throw(::heap_form_tuple, tupleDescriptor, values, isnull); } -void gpdb::pfree(void *pointer) noexcept { - // Note that ::pfree asserts that pointer != NULL. - if (!pointer) - return; +void +gpdb::pfree(void *pointer) noexcept +{ + // Note that ::pfree asserts that pointer != NULL. + if (!pointer) + return; - wrap_noexcept(::pfree, pointer); + wrap_noexcept(::pfree, pointer); } -MemoryContext gpdb::mem_ctx_switch_to(MemoryContext context) noexcept { - return MemoryContextSwitchTo(context); +MemoryContext +gpdb::mem_ctx_switch_to(MemoryContext context) noexcept +{ + return MemoryContextSwitchTo(context); } -const char *gpdb::get_config_option(const char *name, bool missing_ok, - bool restrict_superuser) noexcept { - if (!name) - return nullptr; +const char * +gpdb::get_config_option(const char *name, bool missing_ok, + bool restrict_superuser) noexcept +{ + if (!name) + return nullptr; - return wrap_noexcept(GetConfigOption, name, missing_ok, restrict_superuser); + return wrap_noexcept(GetConfigOption, name, missing_ok, restrict_superuser); } -void gpdb::list_free(List *list) noexcept { - if (!list) - return; +void +gpdb::list_free(List *list) noexcept +{ + if (!list) + return; - wrap_noexcept(::list_free, list); + wrap_noexcept(::list_free, list); } CdbExplain_ShowStatCtx * -gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, - instr_time starttime) { - if (!query_desc) - throw std::runtime_error("Invalid query descriptor"); +gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, instr_time starttime) +{ + if (!query_desc) + throw std::runtime_error("Invalid query descriptor"); - return wrap_throw(::cdbexplain_showExecStatsBegin, query_desc, starttime); + return wrap_throw(::cdbexplain_showExecStatsBegin, query_desc, starttime); } -void gpdb::instr_end_loop(Instrumentation *instr) { - if (!instr) - throw std::runtime_error("Invalid instrumentation pointer"); +void +gpdb::instr_end_loop(Instrumentation *instr) +{ + if (!instr) + throw std::runtime_error("Invalid instrumentation pointer"); - wrap_throw(::InstrEndLoop, instr); + wrap_throw(::InstrEndLoop, instr); } -char *gpdb::gen_normquery(const char *query) noexcept { - return wrap_noexcept(::gen_normquery, query); +char * +gpdb::gen_normquery(const char *query) noexcept +{ + return wrap_noexcept(::gen_normquery, query); } -StringInfo gpdb::gen_normplan(const char *exec_plan) noexcept { - return wrap_noexcept(::gen_normplan, exec_plan); +StringInfo +gpdb::gen_normplan(const char *exec_plan) noexcept +{ + return wrap_noexcept(::gen_normplan, exec_plan); } -char *gpdb::get_rg_name_for_id(Oid group_id) { - return wrap_throw(GetResGroupNameForId, group_id); +char * +gpdb::get_rg_name_for_id(Oid group_id) +{ + return wrap_throw(GetResGroupNameForId, group_id); } -Oid gpdb::get_rg_id_by_session_id(int session_id) { - return wrap_throw(ResGroupGetGroupIdBySessionId, session_id); +Oid +gpdb::get_rg_id_by_session_id(int session_id) +{ + return wrap_throw(ResGroupGetGroupIdBySessionId, session_id); } -void gpdb::insert_log(const gpsc::SetQueryReq &req, bool utility) { - return wrap_throw(::insert_log, req, utility); +void +gpdb::insert_log(const gpsc::SetQueryReq &req, bool utility) +{ + return wrap_throw(::insert_log, req, utility); } diff --git a/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.h b/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.h index 576007f6c7c..5237b6be68a 100644 --- a/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.h +++ b/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.h @@ -25,29 +25,32 @@ *------------------------------------------------------------------------- */ -#pragma once +#ifndef GPDBWRAPPERS_H +#define GPDBWRAPPERS_H extern "C" { #include "postgres.h" -#include "nodes/pg_list.h" +#include "access/htup.h" #include "commands/explain.h" #include "executor/instrument.h" -#include "access/htup.h" +#include "nodes/pg_list.h" #include "utils/elog.h" #include "utils/memutils.h" } -#include -#include #include -#include +#include #include +#include +#include -namespace gpsc { +namespace gpsc +{ class SetQueryReq; -} // namespace gpsc +} // namespace gpsc -namespace gpdb { +namespace gpdb +{ // Functions that call palloc(). // Make sure correct memory context is set. @@ -56,14 +59,14 @@ void *palloc0(Size size); char *pstrdup(const char *str); char *get_database_name(Oid dbid) noexcept; bool split_identifier_string(char *rawstring, char separator, - List **namelist) noexcept; + List **namelist) noexcept; ExplainState get_explain_state(QueryDesc *query_desc, bool costs) noexcept; ExplainState get_analyze_state(QueryDesc *query_desc, bool analyze) noexcept; Instrumentation *instr_alloc(size_t n, int instrument_options, bool async_mode); HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, - bool *isnull); + bool *isnull); CdbExplain_ShowStatCtx *cdbexplain_showExecStatsBegin(QueryDesc *query_desc, - instr_time starttime); + instr_time starttime); void instr_end_loop(Instrumentation *instr); char *gen_normquery(const char *query) noexcept; StringInfo gen_normplan(const char *executionPlan) noexcept; @@ -74,8 +77,10 @@ void insert_log(const gpsc::SetQueryReq &req, bool utility); void pfree(void *pointer) noexcept; MemoryContext mem_ctx_switch_to(MemoryContext context) noexcept; const char *get_config_option(const char *name, bool missing_ok, - bool restrict_superuser) noexcept; + bool restrict_superuser) noexcept; void list_free(List *list) noexcept; Oid get_rg_id_by_session_id(int session_id); -} // namespace gpdb +} // namespace gpdb + +#endif /* GPDBWRAPPERS_H */ diff --git a/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c b/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.c similarity index 82% rename from gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c rename to gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.c index e24f53536a4..8e7bd917541 100644 --- a/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c +++ b/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.c @@ -17,10 +17,10 @@ * specific language governing permissions and limitations * under the License. * - * pg_stat_statements_ya_parser.c + * pg_stat_statements_parser.c * * IDENTIFICATION - * gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.c + * gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.c * *------------------------------------------------------------------------- */ @@ -42,7 +42,7 @@ #include "utils/memutils.h" #include "utils/queryjumble.h" -#include "pg_stat_statements_ya_parser.h" +#include "pg_stat_statements_parser.h" #ifndef FCONST #define FCONST 260 @@ -67,12 +67,14 @@ static bool need_replace(int token); static char *generate_normalized_query(JumbleState *jstate, const char *query, int *query_len_p, int encoding); -void stat_statements_parser_init(void) +void +stat_statements_parser_init(void) { EnableQueryId(); } -void stat_statements_parser_deinit(void) +void +stat_statements_parser_deinit(void) { /* NO-OP */ } @@ -81,7 +83,8 @@ void stat_statements_parser_deinit(void) static bool need_replace(int token) { - return (token == FCONST) || (token == ICONST) || (token == SCONST) || (token == BCONST) || (token == XCONST); + return (token == FCONST) || (token == ICONST) || (token == SCONST) || + (token == BCONST) || (token == XCONST); } /* @@ -103,14 +106,11 @@ gen_normplan(const char *execution_plan) StringInfo plan_out = makeStringInfo(); ; - yyscanner = scanner_init(execution_plan, - &yyextra, + yyscanner = scanner_init(execution_plan, &yyextra, #if PG_VERSION_NUM >= 120000 - &ScanKeywords, - ScanKeywordTokens + &ScanKeywords, ScanKeywordTokens #else - ScanKeywords, - NumScanKeywords + ScanKeywords, NumScanKeywords #endif ); @@ -137,7 +137,8 @@ gen_normplan(const char *execution_plan) else { /* do not change - just copy as-is */ - tmp_str = strndup((char *)execution_plan + last_yylloc, yylloc - last_yylloc); + tmp_str = strndup((char *) execution_plan + last_yylloc, + yylloc - last_yylloc); appendStringInfoString(plan_out, tmp_str); free(tmp_str); } @@ -159,8 +160,8 @@ gen_normplan(const char *execution_plan) static int comp_location(const void *a, const void *b) { - int l = ((const LocationLen *) a)->location; - int r = ((const LocationLen *) b)->location; + int l = ((const LocationLen *) a)->location; + int r = ((const LocationLen *) b)->location; if (l < r) return -1; @@ -199,35 +200,32 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query) core_yyscan_t yyscanner; core_yy_extra_type yyextra; core_YYSTYPE yylval; - YYLTYPE yylloc; - int last_loc = -1; - int i; + YYLTYPE yylloc; + int last_loc = -1; + int i; /* * Sort the records by location so that we can process them in order while * scanning the query text. */ if (jstate->clocations_count > 1) - qsort(jstate->clocations, jstate->clocations_count, - sizeof(LocationLen), comp_location); + qsort(jstate->clocations, jstate->clocations_count, sizeof(LocationLen), + comp_location); locs = jstate->clocations; /* initialize the flex scanner --- should match raw_parser() */ - yyscanner = scanner_init(query, - &yyextra, - &ScanKeywords, - ScanKeywordTokens); + yyscanner = scanner_init(query, &yyextra, &ScanKeywords, ScanKeywordTokens); /* Search for each constant, in sequence */ for (i = 0; i < jstate->clocations_count; i++) { - int loc = locs[i].location; - int tok; + int loc = locs[i].location; + int tok; Assert(loc >= 0); if (loc <= last_loc) - continue; /* Duplicate constant, ignore */ + continue; /* Duplicate constant, ignore */ /* Lex tokens until we find the desired constant */ for (;;) @@ -236,7 +234,7 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query) /* We should not hit end-of-string, but if we do, behave sanely */ if (tok == 0) - break; /* out of inner for-loop */ + break; /* out of inner for-loop */ /* * We should find the token position exactly, but if we somehow @@ -260,7 +258,7 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query) */ tok = core_yylex(&yylval, &yylloc, yyscanner); if (tok == 0) - break; /* out of inner for-loop */ + break; /* out of inner for-loop */ } /* @@ -268,7 +266,7 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query) * byte after the text of the current token in scanbuf. */ locs[i].length = strlen(yyextra.scanbuf + loc); - break; /* out of inner for-loop */ + break; /* out of inner for-loop */ } } @@ -299,14 +297,13 @@ static char * generate_normalized_query(JumbleState *jstate, const char *query, int *query_len_p, int encoding) { - char *norm_query; - int query_len = *query_len_p; - int i, - len_to_wrt, /* Length (in bytes) to write */ - quer_loc = 0, /* Source query byte location */ - n_quer_loc = 0, /* Normalized query byte location */ - last_off = 0, /* Offset from start for previous tok */ - last_tok_len = 0; /* Length (in bytes) of that tok */ + char *norm_query; + int query_len = *query_len_p; + int i, len_to_wrt, /* Length (in bytes) to write */ + quer_loc = 0, /* Source query byte location */ + n_quer_loc = 0, /* Normalized query byte location */ + last_off = 0, /* Offset from start for previous tok */ + last_tok_len = 0; /* Length (in bytes) of that tok */ /* * Get constants' lengths (core system only gives us locations). Note @@ -319,14 +316,14 @@ generate_normalized_query(JumbleState *jstate, const char *query, for (i = 0; i < jstate->clocations_count; i++) { - int off, /* Offset from start for cur tok */ - tok_len; /* Length (in bytes) of that tok */ + int off, /* Offset from start for cur tok */ + tok_len; /* Length (in bytes) of that tok */ off = jstate->clocations[i].location; tok_len = jstate->clocations[i].length; if (tok_len < 0) - continue; /* ignore any duplicates */ + continue; /* ignore any duplicates */ /* Copy next chunk (what precedes the next constant) */ len_to_wrt = off - last_off; @@ -361,18 +358,21 @@ generate_normalized_query(JumbleState *jstate, const char *query, return norm_query; } -char *gen_normquery(const char *query) +char * +gen_normquery(const char *query) { - if (!query) { + if (!query) + { return NULL; } JumbleState jstate; - jstate.jumble = (unsigned char *)palloc(JUMBLE_SIZE); + jstate.jumble = (unsigned char *) palloc(JUMBLE_SIZE); jstate.jumble_len = 0; jstate.clocations_buf_size = 32; - jstate.clocations = (LocationLen *) - palloc(jstate.clocations_buf_size * sizeof(LocationLen)); + jstate.clocations = (LocationLen *) palloc(jstate.clocations_buf_size * + sizeof(LocationLen)); jstate.clocations_count = 0; int query_len = strlen(query); - return generate_normalized_query(&jstate, query, &query_len, GetDatabaseEncoding()); + return generate_normalized_query(&jstate, query, &query_len, + GetDatabaseEncoding()); } \ No newline at end of file diff --git a/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h b/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.h similarity index 87% rename from gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h rename to gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.h index a613ba04259..b6c5dea7b36 100644 --- a/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h +++ b/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.h @@ -17,19 +17,19 @@ * specific language governing permissions and limitations * under the License. * - * pg_stat_statements_ya_parser.h + * pg_stat_statements_parser.h * * IDENTIFICATION - * gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_ya_parser.h + * gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.h * *------------------------------------------------------------------------- */ -#pragma once +#ifndef PG_STAT_STATEMENTS_PARSER_H +#define PG_STAT_STATEMENTS_PARSER_H #ifdef __cplusplus -extern "C" -{ +extern "C" { #endif extern void stat_statements_parser_init(void); @@ -41,3 +41,5 @@ char *gen_normquery(const char *query); #ifdef __cplusplus } #endif + +#endif /* PG_STAT_STATEMENTS_PARSER_H */ diff --git a/pom.xml b/pom.xml index b1825dec460..bf1bd47b22d 100644 --- a/pom.xml +++ b/pom.xml @@ -1273,9 +1273,6 @@ code or new licensing patterns. introduced by Cloudberry. --> gpcontrib/gp_stats_collector/gp_stats_collector.control - gpcontrib/gp_stats_collector/protos/gpsc_set_service.proto - gpcontrib/gp_stats_collector/protos/gpsc_plan.proto - gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto gpcontrib/gp_stats_collector/.clang-format gpcontrib/gp_stats_collector/Makefile From 6a845282894167744655924b78534225eec690a6 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Tue, 31 Mar 2026 14:15:58 +0300 Subject: [PATCH 47/49] [gp_stats_collector] Wrap hook call in try/catch on error path Add PG_TRY/PG_CATCH around query_info_collect_hook in PortalCleanup error path to prevent exceptions from propagating during cleanup. --- src/backend/commands/portalcmds.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c index 0ea5874e884..e23dc1d9c43 100644 --- a/src/backend/commands/portalcmds.c +++ b/src/backend/commands/portalcmds.c @@ -374,10 +374,22 @@ PortalCleanup(Portal portal) FreeQueryDesc(queryDesc); CurrentResourceOwner = saveResourceOwner; - } else { + } + else + { /* GPDB hook for collecting query info */ if (queryDesc->gpsc_query_key && query_info_collect_hook) - (*query_info_collect_hook)(METRICS_QUERY_ERROR, queryDesc); + { + PG_TRY(); + { + (*query_info_collect_hook)(METRICS_QUERY_ERROR, queryDesc); + } + PG_CATCH(); + { + FlushErrorState(); + } + PG_END_TRY(); + } } } From 157f1da0854589ec3afc220d1ed4db7b09b00763 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Tue, 31 Mar 2026 14:33:20 +0300 Subject: [PATCH 48/49] [gp_stats_collector] Adapt namings for Cloudberry Rename ON MASTER to ON COORDINATOR in test SQL. Prefer pg_usleep() over std::this_thread::sleep_for(). Add pg_unreachable() after ereport(ERROR). Widen motion stats fields to uint64. --- .../gp_stats_collector--1.0--1.1.sql | 10 +++++----- .../gp_stats_collector--1.0.sql | 6 +++--- .../gp_stats_collector--1.1.sql | 16 ++++++++-------- .../gp_stats_collector/protos/gpsc_metrics.proto | 6 +++--- .../gp_stats_collector/src/UDSConnector.cpp | 4 +--- .../gp_stats_collector/src/hook_wrappers.cpp | 1 + 6 files changed, 21 insertions(+), 22 deletions(-) diff --git a/gpcontrib/gp_stats_collector/gp_stats_collector--1.0--1.1.sql b/gpcontrib/gp_stats_collector/gp_stats_collector--1.0--1.1.sql index 4e0157117e9..398f03b4fa9 100644 --- a/gpcontrib/gp_stats_collector/gp_stats_collector--1.0--1.1.sql +++ b/gpcontrib/gp_stats_collector/gp_stats_collector--1.0--1.1.sql @@ -25,7 +25,7 @@ DROP FUNCTION __gpsc_stat_messages_reset_f_on_master(); CREATE FUNCTION gpsc.__stat_messages_reset_f_on_master() RETURNS SETOF void AS 'MODULE_PATHNAME', 'gpsc_stat_messages_reset' -LANGUAGE C EXECUTE ON MASTER; +LANGUAGE C EXECUTE ON COORDINATOR; CREATE FUNCTION gpsc.__stat_messages_reset_f_on_segments() RETURNS SETOF void @@ -39,12 +39,12 @@ $$ SELECT gpsc.__stat_messages_reset_f_on_master(); SELECT gpsc.__stat_messages_reset_f_on_segments(); $$ -LANGUAGE SQL EXECUTE ON MASTER; +LANGUAGE SQL EXECUTE ON COORDINATOR; CREATE FUNCTION gpsc.__stat_messages_f_on_master() RETURNS SETOF record AS 'MODULE_PATHNAME', 'gpsc_stat_messages' -LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; +LANGUAGE C STRICT VOLATILE EXECUTE ON COORDINATOR; CREATE FUNCTION gpsc.__stat_messages_f_on_segments() RETURNS SETOF record @@ -77,7 +77,7 @@ ORDER BY segid; CREATE FUNCTION gpsc.__init_log_on_master() RETURNS SETOF void AS 'MODULE_PATHNAME', 'gpsc_init_log' -LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; +LANGUAGE C STRICT VOLATILE EXECUTE ON COORDINATOR; CREATE FUNCTION gpsc.__init_log_on_segments() RETURNS SETOF void @@ -97,7 +97,7 @@ CREATE VIEW gpsc.log AS CREATE FUNCTION gpsc.__truncate_log_on_master() RETURNS SETOF void AS 'MODULE_PATHNAME', 'gpsc_truncate_log' -LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; +LANGUAGE C STRICT VOLATILE EXECUTE ON COORDINATOR; CREATE FUNCTION gpsc.__truncate_log_on_segments() RETURNS SETOF void diff --git a/gpcontrib/gp_stats_collector/gp_stats_collector--1.0.sql b/gpcontrib/gp_stats_collector/gp_stats_collector--1.0.sql index ec902b02e02..e4a50aa2133 100644 --- a/gpcontrib/gp_stats_collector/gp_stats_collector--1.0.sql +++ b/gpcontrib/gp_stats_collector/gp_stats_collector--1.0.sql @@ -6,7 +6,7 @@ CREATE FUNCTION __gpsc_stat_messages_reset_f_on_master() RETURNS SETOF void AS 'MODULE_PATHNAME', 'gpsc_stat_messages_reset' -LANGUAGE C EXECUTE ON MASTER; +LANGUAGE C EXECUTE ON COORDINATOR; CREATE FUNCTION __gpsc_stat_messages_reset_f_on_segments() RETURNS SETOF void @@ -20,12 +20,12 @@ $$ SELECT __gpsc_stat_messages_reset_f_on_master(); SELECT __gpsc_stat_messages_reset_f_on_segments(); $$ -LANGUAGE SQL EXECUTE ON MASTER; +LANGUAGE SQL EXECUTE ON COORDINATOR; CREATE FUNCTION __gpsc_stat_messages_f_on_master() RETURNS SETOF record AS 'MODULE_PATHNAME', 'gpsc_stat_messages' -LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; +LANGUAGE C STRICT VOLATILE EXECUTE ON COORDINATOR; CREATE FUNCTION __gpsc_stat_messages_f_on_segments() RETURNS SETOF record diff --git a/gpcontrib/gp_stats_collector/gp_stats_collector--1.1.sql b/gpcontrib/gp_stats_collector/gp_stats_collector--1.1.sql index 6e24207e913..3ebdad14b06 100644 --- a/gpcontrib/gp_stats_collector/gp_stats_collector--1.1.sql +++ b/gpcontrib/gp_stats_collector/gp_stats_collector--1.1.sql @@ -8,7 +8,7 @@ CREATE SCHEMA gpsc; CREATE FUNCTION gpsc.__stat_messages_reset_f_on_master() RETURNS SETOF void AS 'MODULE_PATHNAME', 'gpsc_stat_messages_reset' -LANGUAGE C EXECUTE ON MASTER; +LANGUAGE C EXECUTE ON COORDINATOR; CREATE FUNCTION gpsc.__stat_messages_reset_f_on_segments() RETURNS SETOF void @@ -22,12 +22,12 @@ $$ SELECT gpsc.__stat_messages_reset_f_on_master(); SELECT gpsc.__stat_messages_reset_f_on_segments(); $$ -LANGUAGE SQL EXECUTE ON MASTER; +LANGUAGE SQL EXECUTE ON COORDINATOR; CREATE FUNCTION gpsc.__stat_messages_f_on_master() RETURNS SETOF record AS 'MODULE_PATHNAME', 'gpsc_stat_messages' -LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; +LANGUAGE C STRICT VOLATILE EXECUTE ON COORDINATOR; CREATE FUNCTION gpsc.__stat_messages_f_on_segments() RETURNS SETOF record @@ -59,7 +59,7 @@ ORDER BY segid; CREATE FUNCTION gpsc.__init_log_on_master() RETURNS SETOF void AS 'MODULE_PATHNAME', 'gpsc_init_log' -LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; +LANGUAGE C STRICT VOLATILE EXECUTE ON COORDINATOR; CREATE FUNCTION gpsc.__init_log_on_segments() RETURNS SETOF void @@ -79,7 +79,7 @@ ORDER BY tmid, ssid, ccnt; CREATE FUNCTION gpsc.__truncate_log_on_master() RETURNS SETOF void AS 'MODULE_PATHNAME', 'gpsc_truncate_log' -LANGUAGE C STRICT VOLATILE EXECUTE ON MASTER; +LANGUAGE C STRICT VOLATILE EXECUTE ON COORDINATOR; CREATE FUNCTION gpsc.__truncate_log_on_segments() RETURNS SETOF void @@ -97,14 +97,14 @@ $$ LANGUAGE plpgsql VOLATILE; CREATE FUNCTION gpsc.__test_uds_start_server(path text) RETURNS SETOF void AS 'MODULE_PATHNAME', 'gpsc_test_uds_start_server' -LANGUAGE C STRICT EXECUTE ON MASTER; +LANGUAGE C STRICT EXECUTE ON COORDINATOR; CREATE FUNCTION gpsc.__test_uds_receive(timeout_ms int DEFAULT 2000) RETURNS SETOF bigint AS 'MODULE_PATHNAME', 'gpsc_test_uds_receive' -LANGUAGE C STRICT EXECUTE ON MASTER; +LANGUAGE C STRICT EXECUTE ON COORDINATOR; CREATE FUNCTION gpsc.__test_uds_stop_server() RETURNS SETOF void AS 'MODULE_PATHNAME', 'gpsc_test_uds_stop_server' -LANGUAGE C EXECUTE ON MASTER; +LANGUAGE C EXECUTE ON COORDINATOR; diff --git a/gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto b/gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto index 7853dc58db7..10991301557 100644 --- a/gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto +++ b/gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto @@ -113,9 +113,9 @@ message SystemStat { } message NetworkStat { - uint32 total_bytes = 1; - uint32 tuple_bytes = 2; - uint32 chunks = 3; + uint64 total_bytes = 1; + uint64 tuple_bytes = 2; + uint64 chunks = 3; } message InterconnectStat { diff --git a/gpcontrib/gp_stats_collector/src/UDSConnector.cpp b/gpcontrib/gp_stats_collector/src/UDSConnector.cpp index 16344366456..056fa9071a5 100644 --- a/gpcontrib/gp_stats_collector/src/UDSConnector.cpp +++ b/gpcontrib/gp_stats_collector/src/UDSConnector.cpp @@ -31,13 +31,11 @@ #include "log/LogOps.h" #include "memory/gpdbwrappers.h" -#include #include #include #include #include #include -#include #include extern "C" { @@ -132,7 +130,7 @@ UDSConnector::report_query(const gpsc::SetQueryReq &req, // if a message does not fit a single packet, we take a nap // before sending the next one. // Otherwise, MSG_DONTWAIT send might overflow the UDS - (std::this_thread::sleep_for(std::chrono::milliseconds(1)), true)); + (pg_usleep(1000), true)); if (sent < 0) { diff --git a/gpcontrib/gp_stats_collector/src/hook_wrappers.cpp b/gpcontrib/gp_stats_collector/src/hook_wrappers.cpp index 3f19d4d9930..38ea117bda2 100644 --- a/gpcontrib/gp_stats_collector/src/hook_wrappers.cpp +++ b/gpcontrib/gp_stats_collector/src/hook_wrappers.cpp @@ -116,6 +116,7 @@ cpp_call(T *obj, R (T::*func)(Args...), Args... args) catch (const std::exception &e) { ereport(ERROR, (errmsg("Unexpected exception in gpsc %s", e.what()))); + pg_unreachable(); } } From d5b88c6e32ad6276176050ee5c42c28eb8d2db30 Mon Sep 17 00:00:00 2001 From: NJrslv Date: Wed, 1 Apr 2026 10:39:45 +0300 Subject: [PATCH 49/49] [gp_stats_collector] Remove unnecessary CONFIGURE_EXTRA_OPTS param --- .../automation/cloudberry/scripts/configure-cloudberry.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh b/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh index a9086a434fb..90f0614bfe8 100755 --- a/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh +++ b/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh @@ -53,7 +53,6 @@ # # Optional Environment Variables: # LOG_DIR - Directory for logs (defaults to ${SRC_DIR}/build-logs) -# CONFIGURE_EXTRA_OPTS - Args to pass to configure command # ENABLE_DEBUG - Enable debug build options (true/false, defaults to # false) # @@ -179,8 +178,7 @@ execute_cmd ./configure --prefix=${BUILD_DESTINATION} \ --with-uuid=e2fs \ ${CONFIGURE_MDBLOCALES_OPTS} \ --with-includes=/usr/local/xerces-c/include \ - --with-libraries=${BUILD_DESTINATION}/lib \ - ${CONFIGURE_EXTRA_OPTS:-""} || exit 4 + --with-libraries=${BUILD_DESTINATION}/lib || exit 4 log_section_end "Configure" # Capture version information