From 75cc3aaea1ed3e81971fc455576bbbb4f417e39e Mon Sep 17 00:00:00 2001 From: Sergey Pershin Date: Mon, 21 Apr 2025 16:33:28 -0700 Subject: [PATCH] [native] Track CPU & Memory overload in native worker. --- .../src/main/sphinx/presto_cpp/properties.rst | 27 +++++++++++ .../presto_cpp/main/PrestoServer.cpp | 47 +++++++++++++++++++ .../presto_cpp/main/PrestoServer.h | 4 ++ .../presto_cpp/main/common/Configs.cpp | 10 ++++ .../presto_cpp/main/common/Configs.h | 15 +++++- .../presto_cpp/main/common/Counters.cpp | 2 + .../presto_cpp/main/common/Counters.h | 7 +++ 7 files changed, 111 insertions(+), 1 deletion(-) diff --git a/presto-docs/src/main/sphinx/presto_cpp/properties.rst b/presto-docs/src/main/sphinx/presto_cpp/properties.rst index 6aa03b8fb8d91..31cfcc99b9830 100644 --- a/presto-docs/src/main/sphinx/presto_cpp/properties.rst +++ b/presto-docs/src/main/sphinx/presto_cpp/properties.rst @@ -578,6 +578,33 @@ The default value of 60 gb is calculated based on available machine memory of 64 Specifies the amount of memory to shrink when the memory pushback is triggered. This only applies if ``system-mem-pushback-enabled`` is ``true``. +``system-mem-pushback-abort-enabled`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* **Type:** ``boolean`` +* **Default value:** ``false`` + +If true, memory pushback will abort queries with the largest memory usage under +low memory condition. This only applies if ``system-mem-pushback-enabled`` is ``true``. + +``worker-overloaded-threshold-mem-gb`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* **Type:** ``integer`` +* **Default value:** ``0`` + +Memory threshold in GB above which the worker is considered overloaded in terms of +memory use. Ignored if zero. + +``worker-overloaded-threshold-cpu-pct`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* **Type:** ``integer`` +* **Default value:** ``0`` + +CPU threshold in % above which the worker is considered overloaded in terms of +CPU use. Ignored if zero. + Environment Variables As Values For Worker Properties ----------------------------------------------------- diff --git a/presto-native-execution/presto_cpp/main/PrestoServer.cpp b/presto-native-execution/presto_cpp/main/PrestoServer.cpp index 29427f4671b77..80ca50a0cfa1b 100644 --- a/presto-native-execution/presto_cpp/main/PrestoServer.cpp +++ b/presto-native-execution/presto_cpp/main/PrestoServer.cpp @@ -1426,9 +1426,56 @@ void PrestoServer::populateMemAndCPUInfo() { }); RECORD_METRIC_VALUE(kCounterNumQueryContexts, numContexts); cpuMon_.update(); + checkOverload(); **memoryInfo_.wlock() = std::move(memoryInfo); } +void PrestoServer::checkOverload() { + auto systemConfig = SystemConfig::instance(); + + const auto overloadedThresholdMemBytes = + systemConfig->workerOverloadedThresholdMemGb() * 1024 * 1024 * 1024; + if (overloadedThresholdMemBytes > 0) { + const auto currentUsedMemoryBytes = (memoryChecker_ != nullptr) + ? memoryChecker_->cachedSystemUsedMemoryBytes() + : 0; + const bool isMemOverloaded = + (currentUsedMemoryBytes > overloadedThresholdMemBytes); + if (isMemOverloaded) { + LOG(WARNING) << "Server memory is overloaded. Currently used: " + << velox::succinctBytes(currentUsedMemoryBytes) + << ", threshold: " + << velox::succinctBytes(overloadedThresholdMemBytes); + } else if (isMemOverloaded_) { + LOG(INFO) << "Server memory is no longer overloaded. Currently used: " + << velox::succinctBytes(currentUsedMemoryBytes) + << ", threshold: " + << velox::succinctBytes(overloadedThresholdMemBytes); + } + RECORD_METRIC_VALUE(kCounterOverloadedMem, isMemOverloaded ? 100 : 0); + isMemOverloaded_ = isMemOverloaded; + } + + const auto overloadedThresholdCpuPct = + systemConfig->workerOverloadedThresholdCpuPct(); + if (overloadedThresholdCpuPct > 0) { + const auto currentUsedCpuPct = cpuMon_.getCPULoadPct(); + const bool isCpuOverloaded = + (currentUsedCpuPct > overloadedThresholdCpuPct); + if (isCpuOverloaded) { + LOG(WARNING) << "Server CPU is overloaded. Currently used: " + << currentUsedCpuPct + << "%, threshold: " << overloadedThresholdCpuPct << "%"; + } else if (isCpuOverloaded_) { + LOG(INFO) << "Server CPU is no longer overloaded. Currently used: " + << currentUsedCpuPct + << "%, threshold: " << overloadedThresholdCpuPct << "%"; + } + RECORD_METRIC_VALUE(kCounterOverloadedCpu, isCpuOverloaded ? 100 : 0); + isCpuOverloaded_ = isCpuOverloaded; + } +} + static protocol::Duration getUptime( std::chrono::steady_clock::time_point& start) { auto seconds = std::chrono::duration_cast( diff --git a/presto-native-execution/presto_cpp/main/PrestoServer.h b/presto-native-execution/presto_cpp/main/PrestoServer.h index a2c25303725ca..272940c916798 100644 --- a/presto-native-execution/presto_cpp/main/PrestoServer.h +++ b/presto-native-execution/presto_cpp/main/PrestoServer.h @@ -221,6 +221,8 @@ class PrestoServer { std::unique_ptr setupSsdCache(); + void checkOverload(); + const std::string configDirectoryPath_; std::shared_ptr coordinatorDiscoverer_; @@ -273,6 +275,8 @@ class PrestoServer { std::unique_ptr periodicTaskManager_; std::unique_ptr prestoServerOperations_; std::unique_ptr memoryChecker_; + bool isMemOverloaded_{false}; + bool isCpuOverloaded_{false}; // We update these members asynchronously and return in http requests w/o // delay. diff --git a/presto-native-execution/presto_cpp/main/common/Configs.cpp b/presto-native-execution/presto_cpp/main/common/Configs.cpp index 9de3bdd4941af..728d8c002111e 100644 --- a/presto-native-execution/presto_cpp/main/common/Configs.cpp +++ b/presto-native-execution/presto_cpp/main/common/Configs.cpp @@ -183,6 +183,8 @@ SystemConfig::SystemConfig() { NUM_PROP(kSystemMemShrinkGb, 8), BOOL_PROP(kMallocMemHeapDumpEnabled, false), BOOL_PROP(kSystemMemPushbackAbortEnabled, false), + NUM_PROP(kWorkerOverloadedThresholdMemGb, 0), + NUM_PROP(kWorkerOverloadedThresholdCpuPct, 0), NUM_PROP(kMallocHeapDumpThresholdGb, 20), NUM_PROP(kMallocMemMinHeapDumpInterval, 10), NUM_PROP(kMallocMemMaxHeapDumpFiles, 5), @@ -499,6 +501,14 @@ bool SystemConfig::systemMemPushBackAbortEnabled() const { return optionalProperty(kSystemMemPushbackAbortEnabled).value(); } +uint64_t SystemConfig::workerOverloadedThresholdMemGb() const { + return optionalProperty(kWorkerOverloadedThresholdMemGb).value(); +} + +uint32_t SystemConfig::workerOverloadedThresholdCpuPct() const { + return optionalProperty(kWorkerOverloadedThresholdCpuPct).value(); +} + bool SystemConfig::mallocMemHeapDumpEnabled() const { return optionalProperty(kMallocMemHeapDumpEnabled).value(); } diff --git a/presto-native-execution/presto_cpp/main/common/Configs.h b/presto-native-execution/presto_cpp/main/common/Configs.h index a405aaf1beeb2..2367cc9a99b6e 100644 --- a/presto-native-execution/presto_cpp/main/common/Configs.h +++ b/presto-native-execution/presto_cpp/main/common/Configs.h @@ -299,12 +299,21 @@ class SystemConfig : public ConfigBase { /// get the server out of low memory condition. This only applies if /// 'system-mem-pushback-enabled' is true. static constexpr std::string_view kSystemMemShrinkGb{"system-mem-shrink-gb"}; - /// If true, memory pushback will quickly abort queries with the most memory + /// If true, memory pushback will abort queries with the largest memory /// usage under low memory condition. This only applies if /// 'system-mem-pushback-enabled' is set. static constexpr std::string_view kSystemMemPushbackAbortEnabled{ "system-mem-pushback-abort-enabled"}; + /// Memory threshold in GB above which the worker is considered overloaded. + /// Ignored if zero. Default is zero. + static constexpr std::string_view kWorkerOverloadedThresholdMemGb{ + "worker-overloaded-threshold-mem-gb"}; + /// CPU threshold in % above which the worker is considered overloaded. + /// Ignored if zero. Default is zero. + static constexpr std::string_view kWorkerOverloadedThresholdCpuPct{ + "worker-overloaded-threshold-cpu-pct"}; + /// If true, memory allocated via malloc is periodically checked and a heap /// profile is dumped if usage exceeds 'malloc-heap-dump-gb-threshold'. static constexpr std::string_view kMallocMemHeapDumpEnabled{ @@ -828,6 +837,10 @@ class SystemConfig : public ConfigBase { bool systemMemPushBackAbortEnabled() const; + uint64_t workerOverloadedThresholdMemGb() const; + + uint32_t workerOverloadedThresholdCpuPct() const; + bool mallocMemHeapDumpEnabled() const; uint32_t mallocHeapDumpThresholdGb() const; diff --git a/presto-native-execution/presto_cpp/main/common/Counters.cpp b/presto-native-execution/presto_cpp/main/common/Counters.cpp index b19c8063faa09..7d4853278939b 100644 --- a/presto-native-execution/presto_cpp/main/common/Counters.cpp +++ b/presto-native-execution/presto_cpp/main/common/Counters.cpp @@ -84,6 +84,8 @@ void registerPrestoMetrics() { kCounterNumBlockedWaitForConnectorDrivers, facebook::velox::StatType::AVG); DEFINE_METRIC(kCounterNumBlockedYieldDrivers, facebook::velox::StatType::AVG); + DEFINE_METRIC(kCounterOverloadedMem, facebook::velox::StatType::AVG); + DEFINE_METRIC(kCounterOverloadedCpu, facebook::velox::StatType::AVG); DEFINE_METRIC(kCounterNumStuckDrivers, facebook::velox::StatType::AVG); DEFINE_METRIC( kCounterTotalPartitionedOutputBuffer, facebook::velox::StatType::AVG); diff --git a/presto-native-execution/presto_cpp/main/common/Counters.h b/presto-native-execution/presto_cpp/main/common/Counters.h index f2b031752f049..9252dd7096a03 100644 --- a/presto-native-execution/presto_cpp/main/common/Counters.h +++ b/presto-native-execution/presto_cpp/main/common/Counters.h @@ -108,6 +108,13 @@ constexpr folly::StringPiece kCounterNumBlockedYieldDrivers{ constexpr folly::StringPiece kCounterNumStuckDrivers{ "presto_cpp.num_stuck_drivers"}; +/// Worker exports 0 or 100 for this counter. 0 meaning not memory overloaded +/// and 100 meaning memory overloaded. +constexpr folly::StringPiece kCounterOverloadedMem{"presto_cpp.overloaded_mem"}; +/// Worker exports 0 or 100 for this counter. 0 meaning not CPU overloaded +/// and 100 meaning CPU overloaded. +constexpr folly::StringPiece kCounterOverloadedCpu{"presto_cpp.overloaded_cpu"}; + /// Number of total OutputBuffer managed by all /// OutputBufferManager constexpr folly::StringPiece kCounterTotalPartitionedOutputBuffer{