From d3b102fa3351952c19705585b8fcefc8f57e11b3 Mon Sep 17 00:00:00 2001 From: code Date: Thu, 25 Sep 2025 00:55:27 +0800 Subject: [PATCH 01/10] formatter: removed legacy router formatter support (#41039) Commit Message: formatter: removed legacy router formatter support Additional Description: Three yeas ago, at #21932, we unified all the formatters to the substitution formatter. And we add a warn log for the legacy UPSTREAM_METADATA and DYNAMIC_METADATA. Now, I think it's time to remove it finally. This PR Removed legacy header formatter support for `%DYNAMIC_METADATA(["namespace", "key", ...])%` and `%UPSTREAM_METADATA(["namespace", "key", ...])%`. Please use `%DYNAMIC_METADATA(namespace:key:...])%` and `%UPSTREAM_METADATA(namespace:key:...])%` as alternatives. This change can be reverted temporarily by setting the runtime guard `envoy.reloadable_features.remove_legacy_route_formatter` to `false`. Risk Level: low. Testing: unit. Docs Changes: n/a. Release Notes: added. --------- Signed-off-by: WangBaiping --- changelogs/current.yaml | 9 +++ .../http/http_conn_man/headers.rst | 2 +- source/common/router/BUILD | 1 + source/common/router/header_parser.cc | 19 ++++--- source/common/router/header_parser_utils.cc | 18 ++++++ source/common/runtime/runtime_features.cc | 4 ++ .../formatter/substitution_formatter_test.cc | 12 +++- test/common/router/BUILD | 1 + test/common/router/config_impl_test.cc | 8 +-- test/common/router/header_formatter_test.cc | 56 +++++++++++++++---- ...e-header_parser_fuzz_test-5163306626580480 | 6 +- .../router/route_corpus/regex_parsing_error | 2 +- .../wrong_UPSTREAM_HEADER_BYTES_RECEIVED | 2 +- test/integration/header_integration_test.cc | 17 +++--- .../http_subset_lb_integration_test.cc | 3 +- 15 files changed, 120 insertions(+), 40 deletions(-) diff --git a/changelogs/current.yaml b/changelogs/current.yaml index 7901b280771ee..d89ec4b7c0169 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -76,6 +76,15 @@ minor_behavior_changes: This can be accessed through the ``%UPSTREAM_DECOMPRESSED_HEADER_BYTES_RECEIVED%``, ``%DOWNSTREAM_DECOMPRESSED_HEADER_BYTES_RECEIVED%``, ``%UPSTREAM_DECOMPRESSED_HEADER_BYTES_SENT%``, and ``%DOWNSTREAM_DECOMPRESSED_HEADER_BYTES_SENT%`` access log command operators. +- area: formatter + change: | + Deprecated legacy header formatter support for ``%DYNAMIC_METADATA(["namespace", "key", ...])%`` + , ``%UPSTREAM_METADATA(["namespace", "key", ...])%`` and ``%PER_REQUEST_STATE(key)%``. Please use + ``%DYNAMIC_METADATA(namespace:key:...])%``, ``%UPSTREAM_METADATA(namespace:key:...])%`` + and ``%FILTER_STATE(key:PLAIN)%`` as alternatives. + This change is guarded by the runtime flag + ``envoy.reloadable_features.remove_legacy_route_formatter`` and default to ``false`` for now + and will be flipped to ``true`` after two release periods. - area: oauth2 change: | Added response code details to ``401`` local responses generated by the OAuth2 filter. diff --git a/docs/root/configuration/http/http_conn_man/headers.rst b/docs/root/configuration/http/http_conn_man/headers.rst index df30d728f73c2..a84f992d4c9d7 100644 --- a/docs/root/configuration/http/http_conn_man/headers.rst +++ b/docs/root/configuration/http/http_conn_man/headers.rst @@ -702,7 +702,7 @@ headers are modified before the request is sent upstream and the response is not .. attention:: - The following legacy header formatters are still supported, but will be deprecated in the future. + The following legacy header formatters are deprecated and will be removed soon. The equivalent information can be accessed using indicated substitutes. ``%DYNAMIC_METADATA(["namespace", "key", ...])%`` diff --git a/source/common/router/BUILD b/source/common/router/BUILD index fb5d8979aa5f3..ba6ce85acb9b3 100644 --- a/source/common/router/BUILD +++ b/source/common/router/BUILD @@ -496,6 +496,7 @@ envoy_cc_library( "//source/common/http:headers_lib", "//source/common/json:json_loader_lib", "//source/common/protobuf:utility_lib", + "//source/common/runtime:runtime_lib", "@envoy_api//envoy/config/core/v3:pkg_cc_proto", ], ) diff --git a/source/common/router/header_parser.cc b/source/common/router/header_parser.cc index a3f75e621223b..d0e2bbc27ddd6 100644 --- a/source/common/router/header_parser.cc +++ b/source/common/router/header_parser.cc @@ -13,6 +13,7 @@ #include "source/common/http/headers.h" #include "source/common/json/json_loader.h" #include "source/common/protobuf/utility.h" +#include "source/common/runtime/runtime_features.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_replace.h" @@ -38,14 +39,18 @@ parseHttpHeaderFormatter(const envoy::config::core::v3::HeaderValue& header_valu return absl::InvalidArgumentError(":-prefixed or host headers may not be modified"); } - // UPSTREAM_METADATA and DYNAMIC_METADATA must be translated from JSON ["a", "b"] format to colon - // format (a:b) - std::string final_header_value = HeaderParser::translateMetadataFormat(header_value.value()); - // Change PER_REQUEST_STATE to FILTER_STATE. - final_header_value = HeaderParser::translatePerRequestState(final_header_value); + if (!Runtime::runtimeFeatureEnabled("envoy.reloadable_features.remove_legacy_route_formatter")) { + // UPSTREAM_METADATA and DYNAMIC_METADATA must be translated from JSON ["a", "b"] format to + // colon format (a:b) + std::string final_header_value = HeaderParser::translateMetadataFormat(header_value.value()); + // Change PER_REQUEST_STATE to FILTER_STATE. + final_header_value = HeaderParser::translatePerRequestState(final_header_value); + // Let the substitution formatter parse the final_header_value. + return Envoy::Formatter::FormatterImpl::create(final_header_value, true); + } - // Let the substitution formatter parse the final_header_value. - return Envoy::Formatter::FormatterImpl::create(final_header_value, true); + // Let the substitution formatter parse the header_value. + return Envoy::Formatter::FormatterImpl::create(header_value.value(), true); } } // namespace diff --git a/source/common/router/header_parser_utils.cc b/source/common/router/header_parser_utils.cc index db65eaedbb822..f9210482c1457 100644 --- a/source/common/router/header_parser_utils.cc +++ b/source/common/router/header_parser_utils.cc @@ -1,6 +1,8 @@ #include #include +#include "envoy/server/factory_context.h" + #include "source/common/common/assert.h" #include "source/common/json/json_loader.h" #include "source/common/router/header_parser.h" @@ -64,6 +66,13 @@ std::string HeaderParser::translateMetadataFormat(const std::string& header_valu "Header formatter: JSON format of {}_METADATA parameters has been obsoleted. " "Use colon format: {}", matches[1], new_format.c_str()); + // The parsing should only happen on the main thread and the singleton context should be + // available. In case it is not set in tests or other non-standard Envoy usage, we skip + // counting the deprecated feature usage instead of crashing. + auto* context = Server::Configuration::ServerFactoryContextInstance::getExisting(); + if (context != nullptr) { + context->runtime().countDeprecatedFeatureUse(); + } int subs = absl::StrReplaceAll({{matches[0], new_format}}, &new_header_value); ASSERT(subs > 0); @@ -94,6 +103,15 @@ std::string HeaderParser::translatePerRequestState(const std::string& header_val ENVOY_LOG_MISC(warn, "PER_REQUEST_STATE header formatter has been obsoleted. Use {}", new_format.c_str()); + + // The parsing should only happen on the main thread and the singleton context should be + // available. In case it is not set in tests or other non-standard Envoy usage, we skip + // counting the deprecated feature usage instead of crashing. + auto* context = Server::Configuration::ServerFactoryContextInstance::getExisting(); + if (context != nullptr) { + context->runtime().countDeprecatedFeatureUse(); + } + int subs = absl::StrReplaceAll({{matches[0], new_format}}, &new_header_value); ASSERT(subs > 0); } diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc index cce48cee1c088..84eae43b26313 100644 --- a/source/common/runtime/runtime_features.cc +++ b/source/common/runtime/runtime_features.cc @@ -167,6 +167,10 @@ FALSE_RUNTIME_GUARD(envoy_reloadable_features_ext_proc_graceful_grpc_close); FALSE_RUNTIME_GUARD(envoy_reloadable_features_getaddrinfo_no_ai_flags); +// Flag to remove legacy route formatter support in header parser +// Flip to true after two release periods. +FALSE_RUNTIME_GUARD(envoy_reloadable_features_remove_legacy_route_formatter); + // TODO(grnmeira): // Enables the new DNS implementation, a merged implementation of // strict and logical DNS clusters. This new implementation will diff --git a/test/common/formatter/substitution_formatter_test.cc b/test/common/formatter/substitution_formatter_test.cc index a78b171c64661..b7d5f6247bd25 100644 --- a/test/common/formatter/substitution_formatter_test.cc +++ b/test/common/formatter/substitution_formatter_test.cc @@ -4153,7 +4153,7 @@ TEST(SubstitutionFormatterTest, FilterStateSpeciferTest) { Http::TestRequestHeaderMapImpl request_headers; Http::TestResponseHeaderMapImpl response_headers; Http::TestResponseTrailerMapImpl response_trailers; - StreamInfo::MockStreamInfo stream_info; + NiceMock stream_info; std::string body; HttpFormatterContext formatter_context(&request_headers, &response_headers, &response_trailers, @@ -4162,13 +4162,18 @@ TEST(SubstitutionFormatterTest, FilterStateSpeciferTest) { stream_info.filter_state_->setData( "test_key", std::make_unique("test_value"), StreamInfo::FilterState::StateType::ReadOnly); + stream_info.upstream_info_->setUpstreamFilterState( + stream_info.filter_state_); // Reuse the same filter state for test only. EXPECT_CALL(Const(stream_info), filterState()).Times(testing::AtLeast(1)); const std::string expected_json_map = R"EOF( { "test_key_plain": "test_value By PLAIN", "test_key_typed": "test_value By TYPED", - "test_key_field": "test_value" + "test_key_field": "test_value", + "upstream_test_key_plain": "test_value By PLAIN", + "upstream_test_key_typed": "test_value By TYPED", + "upstream_test_key_field": "test_value" } )EOF"; @@ -4177,6 +4182,9 @@ TEST(SubstitutionFormatterTest, FilterStateSpeciferTest) { test_key_plain: '%FILTER_STATE(test_key:PLAIN)%' test_key_typed: '%FILTER_STATE(test_key:TYPED)%' test_key_field: '%FILTER_STATE(test_key:FIELD:test_field)%' + upstream_test_key_plain: '%UPSTREAM_FILTER_STATE(test_key:PLAIN)%' + upstream_test_key_typed: '%UPSTREAM_FILTER_STATE(test_key:TYPED)%' + upstream_test_key_field: '%UPSTREAM_FILTER_STATE(test_key:FIELD:test_field)%' )EOF", key_mapping); JsonFormatterImpl formatter(key_mapping, false); diff --git a/test/common/router/BUILD b/test/common/router/BUILD index 6e9ff2a8cfe27..01afcd9cefa37 100644 --- a/test/common/router/BUILD +++ b/test/common/router/BUILD @@ -490,6 +490,7 @@ envoy_cc_test( "//test/common/stream_info:test_int_accessor_lib", "//test/mocks/api:api_mocks", "//test/mocks/http:http_mocks", + "//test/mocks/server:server_factory_context_mocks", "//test/mocks/ssl:ssl_mocks", "//test/mocks/stream_info:stream_info_mocks", "//test/mocks/upstream:host_mocks", diff --git a/test/common/router/config_impl_test.cc b/test/common/router/config_impl_test.cc index d6deff68f1267..70c9c2c027227 100644 --- a/test/common/router/config_impl_test.cc +++ b/test/common/router/config_impl_test.cc @@ -2245,7 +2245,7 @@ class HeaderTransformsDoFormattingTest : public RouteMatcherTest { {0}: - header: key: x-has-variable - value: "%PER_REQUEST_STATE(testing)%" + value: "%FILTER_STATE(testing:PLAIN)%" append_action: OVERWRITE_IF_EXISTS_OR_ADD )EOF"; const std::string yaml = @@ -2281,9 +2281,9 @@ class HeaderTransformsDoFormattingTest : public RouteMatcherTest { transforms = run_request_header_test ? route_entry->requestHeaderTransforms(stream_info, /*do_formatting=*/false) : route_entry->responseHeaderTransforms(stream_info, /*do_formatting=*/false); - EXPECT_THAT( - transforms.headers_to_overwrite_or_add, - ElementsAre(Pair(Http::LowerCaseString("x-has-variable"), "%PER_REQUEST_STATE(testing)%"))); + EXPECT_THAT(transforms.headers_to_overwrite_or_add, + ElementsAre(Pair(Http::LowerCaseString("x-has-variable"), + "%FILTER_STATE(testing:PLAIN)%"))); } }; diff --git a/test/common/router/header_formatter_test.cc b/test/common/router/header_formatter_test.cc index e843fd18a3e16..7056659980b51 100644 --- a/test/common/router/header_formatter_test.cc +++ b/test/common/router/header_formatter_test.cc @@ -17,6 +17,7 @@ #include "test/common/stream_info/test_int_accessor.h" #include "test/mocks/api/mocks.h" #include "test/mocks/http/mocks.h" +#include "test/mocks/server/server_factory_context.h" #include "test/mocks/ssl/mocks.h" #include "test/mocks/stream_info/mocks.h" #include "test/mocks/upstream/host.h" @@ -73,14 +74,11 @@ TEST(HeaderParserTest, TestParse) { {"%DOWNSTREAM_DIRECT_LOCAL_ADDRESS%", {"127.0.0.2:0"}, {}}, {"%DOWNSTREAM_DIRECT_LOCAL_PORT%", {"0"}, {}}, {"%DOWNSTREAM_DIRECT_LOCAL_ADDRESS_WITHOUT_PORT%", {"127.0.0.2"}, {}}, - {"%UPSTREAM_METADATA([\"ns\", \"key\"])%", {"value"}, {}}, - {"[%UPSTREAM_METADATA([\"ns\", \"key\"])%", {"[value"}, {}}, - {"%UPSTREAM_METADATA([\"ns\", \"key\"])%]", {"value]"}, {}}, - {"[%UPSTREAM_METADATA([\"ns\", \"key\"])%]", {"[value]"}, {}}, - {"%UPSTREAM_METADATA([\"ns\", \t \"key\"])%", {"value"}, {}}, - {"%UPSTREAM_METADATA([\"ns\", \n \"key\"])%", {"value"}, {}}, - {"%UPSTREAM_METADATA( \t [ \t \"ns\" \t , \t \"key\" \t ] \t )%", {"value"}, {}}, - {R"EOF(%UPSTREAM_METADATA(["\"quoted\"", "\"key\""])%)EOF", {"value"}, {}}, + {"%UPSTREAM_METADATA(ns:key)%", {"value"}, {}}, + {"[%UPSTREAM_METADATA(ns:key)%", {"[value"}, {}}, + {"%UPSTREAM_METADATA(ns:key)%]", {"value]"}, {}}, + {"[%UPSTREAM_METADATA(ns:key)%]", {"[value]"}, {}}, + {"%UPSTREAM_METADATA(ns:key)%", {"value"}, {}}, {"%UPSTREAM_REMOTE_ADDRESS%", {"10.0.0.1:443"}, {}}, {"%UPSTREAM_REMOTE_ADDRESS_WITHOUT_PORT%", {"10.0.0.1"}, {}}, {"%UPSTREAM_REMOTE_PORT%", {"443"}, {}}, @@ -242,6 +240,10 @@ TEST(HeaderParserTest, TestParse) { } TEST(HeaderParser, TestMetadataTranslator) { + NiceMock context; + ScopedThreadLocalServerContextSetter setter(context); + EXPECT_CALL(context.runtime_loader_, countDeprecatedFeatureUse()).Times(testing::AtLeast(1)); + struct TestCase { std::string input_; std::string expected_output_; @@ -281,6 +283,10 @@ TEST(HeaderParser, TestMetadataTranslatorExceptions) { } TEST(HeaderParser, TestPerFilterStateTranslator) { + NiceMock context; + ScopedThreadLocalServerContextSetter setter(context); + EXPECT_CALL(context.runtime_loader_, countDeprecatedFeatureUse()).Times(testing::AtLeast(1)); + struct TestCase { std::string input_; std::string expected_output_; @@ -431,7 +437,11 @@ TEST(HeaderParserTest, EvaluateHeaderValuesWithNullStreamInfo) { EXPECT_FALSE(header_map.has("empty")); } -TEST(HeaderParserTest, EvaluateEmptyHeaders) { +TEST(HeaderParserTest, EvaluateEmptyHeadersWithLegacyFormat) { + TestScopedRuntime scoped_runtime; + scoped_runtime.mergeValues( + {{"envoy.reloadable_features.remove_legacy_route_formatter", "false"}}); + const std::string yaml = R"EOF( match: { prefix: "/new_endpoint" } route: @@ -457,6 +467,32 @@ match: { prefix: "/new_endpoint" } EXPECT_FALSE(header_map.has("x-key")); } +TEST(HeaderParserTest, EvaluateEmptyHeaders) { + const std::string yaml = R"EOF( +match: { prefix: "/new_endpoint" } +route: + cluster: "www2" + prefix_rewrite: "/api/new_endpoint" +request_headers_to_add: + - header: + key: "x-key" + value: "%UPSTREAM_METADATA(namespace:key)%" + append_action: APPEND_IF_EXISTS_OR_ADD +)EOF"; + + HeaderParserPtr req_header_parser = + HeaderParser::configure(parseRouteFromV3Yaml(yaml).request_headers_to_add()).value(); + Http::TestRequestHeaderMapImpl header_map{{":method", "POST"}}; + std::shared_ptr> host( + new NiceMock()); + NiceMock stream_info; + auto metadata = std::make_shared(); + stream_info.upstreamInfo()->setUpstreamHost(host); + ON_CALL(*host, metadata()).WillByDefault(Return(metadata)); + req_header_parser->evaluateHeaders(header_map, stream_info); + EXPECT_FALSE(header_map.has("x-key")); +} + TEST(HeaderParserTest, EvaluateStaticHeaders) { const std::string yaml = R"EOF( match: { prefix: "/new_endpoint" } @@ -508,7 +544,7 @@ match: { prefix: "/new_endpoint" } value: "%PROTOCOL%%DOWNSTREAM_REMOTE_ADDRESS_WITHOUT_PORT%" - header: key: "x-metadata" - value: "%UPSTREAM_METADATA([\"namespace\", \"%key%\"])%" + value: "%UPSTREAM_METADATA(namespace:%key%)%" - header: key: "x-per-request" value: "%PER_REQUEST_STATE(testing)%" diff --git a/test/common/router/header_parser_corpus/clusterfuzz-testcase-header_parser_fuzz_test-5163306626580480 b/test/common/router/header_parser_corpus/clusterfuzz-testcase-header_parser_fuzz_test-5163306626580480 index 57041ba397712..e4ba6b448326e 100644 --- a/test/common/router/header_parser_corpus/clusterfuzz-testcase-header_parser_fuzz_test-5163306626580480 +++ b/test/common/router/header_parser_corpus/clusterfuzz-testcase-header_parser_fuzz_test-5163306626580480 @@ -1,7 +1,7 @@ headers_to_add { header { key: "P" - value: "%PER_REQUEST_STATE(oB]$T)%" + value: "%FILTER_STATE(oB]$T)%" } } headers_to_add { @@ -13,13 +13,13 @@ headers_to_add { headers_to_add { header { key: "A" - value: "%PER_REQUEST_STATE(dB]$T)%" + value: "%FILTER_STATE(dB]$T)%" } } headers_to_add { header { key: "\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177\177" - value: "%PER_REQUEST_STATE(dB]$T)%" + value: "%FILTER_STATE(dB]$T)%" } append { value: true diff --git a/test/common/router/route_corpus/regex_parsing_error b/test/common/router/route_corpus/regex_parsing_error index 50caaf4f85e42..a551973008ef2 100644 --- a/test/common/router/route_corpus/regex_parsing_error +++ b/test/common/router/route_corpus/regex_parsing_error @@ -14,7 +14,7 @@ config { name: "." typed_config { type_url: "m/envoy.config.route.v3.Route" - value: "\n\002\n\000\022\t\n\001v*\0015\242\002\000J\005\n\003\n\0011JF\nB\n\001$\022=%START_TIME((%%%fenvoy.filters.http.router%\034f%256\\002\\0N\\ss)% \001J\005\n\003\n\001$J\205\001\n\202\001\n\001$\022}%START_TIME((%%%fenvoy%PER_REQUEST_STATE(%fenvoy.type.v3.Int64Ra%TUEST_STATE(%f%ss[%%s.filters.http.router%\034f%256\\002\\0N\\ss)%J\010\n\006\n\0011\022\001\003b\001?b\021x-forwarded-protob\021x-forwarded-protor\001v\202\001\000" + value: "\n\002\n\000\022\t\n\001v*\0015\242\002\000J\005\n\003\n\0011JF\nB\n\001$\022=%START_TIME((%%%fenvoy.filters.http.router%\034f%256\\002\\0N\\ss)% \001J\005\n\003\n\001$J\205\001\n\202\001\n\001$\022}%START_TIME((%%%fenvoy%FILTER_STATE(%fenvoy.type.v3.Int64Ra%TUEST_STATE(%f%ss[%%s.filters.http.router%\034f%256\\002\\0N\\ss)%J\010\n\006\n\0011\022\001\003b\001?b\021x-forwarded-protob\021x-forwarded-protor\001v\202\001\000" } } } diff --git a/test/common/router/route_corpus/wrong_UPSTREAM_HEADER_BYTES_RECEIVED b/test/common/router/route_corpus/wrong_UPSTREAM_HEADER_BYTES_RECEIVED index 268698223fa0d..07d36871db874 100644 --- a/test/common/router/route_corpus/wrong_UPSTREAM_HEADER_BYTES_RECEIVED +++ b/test/common/router/route_corpus/wrong_UPSTREAM_HEADER_BYTES_RECEIVED @@ -13,7 +13,7 @@ config { request_headers_to_add { header { key: "[" - value: "`%START_TIME()%%REQ(T_||?|STARTO2s)%%UPSTREAM_HEADER_BYTES_RECEIVED%%PER_REQUEST_STATE(%f(%f%sRESPONSE_259462C_Swwwwww`TART_TIME()%%REQ(T_||?|STARTOC_Swwwwww`%START_TIME()%%REQ(T_||?|STARTOC_SwwwwwwUB(UB(OD)%T%START_TIME()%%REQ(T_<|?|STARTOC_SwwwwwwUB(UB(OD)%TA" + value: "`%START_TIME()%%REQ(T_||?|STARTO2s)%%UPSTREAM_HEADER_BYTES_RECEIVED%%FILTER_STATE(%f(%f%sRESPONSE_259462C_Swwwwww`TART_TIME()%%REQ(T_||?|STARTOC_Swwwwww`%START_TIME()%%REQ(T_||?|STARTOC_SwwwwwwUB(UB(OD)%T%START_TIME()%%REQ(T_<|?|STARTOC_SwwwwwwUB(UB(OD)%TA" } } } diff --git a/test/integration/header_integration_test.cc b/test/integration/header_integration_test.cc index 8c1f0c007a662..5248983d35fd1 100644 --- a/test/integration/header_integration_test.cc +++ b/test/integration/header_integration_test.cc @@ -307,25 +307,24 @@ class HeaderIntegrationTest if (use_eds_) { addHeader(route_config->mutable_response_headers_to_add(), "x-routeconfig-dynamic", - R"(%UPSTREAM_METADATA(["test.namespace", "key"])%)", append); + R"(%UPSTREAM_METADATA(test.namespace:key)%)", append); // Iterate over VirtualHosts, nested Routes and WeightedClusters, adding a dynamic // response header. for (auto& vhost : *route_config->mutable_virtual_hosts()) { addHeader(vhost.mutable_response_headers_to_add(), "x-vhost-dynamic", - R"(vhost:%UPSTREAM_METADATA(["test.namespace", "key"])%)", append); + R"(vhost:%UPSTREAM_METADATA(test.namespace:key)%)", append); for (auto& route : *vhost.mutable_routes()) { addHeader(route.mutable_response_headers_to_add(), "x-route-dynamic", - R"(route:%UPSTREAM_METADATA(["test.namespace", "key"])%)", append); + R"(route:%UPSTREAM_METADATA(test.namespace:key)%)", append); if (route.has_route()) { auto* route_action = route.mutable_route(); if (route_action->has_weighted_clusters()) { for (auto& c : *route_action->mutable_weighted_clusters()->mutable_clusters()) { addHeader(c.mutable_response_headers_to_add(), "x-weighted-cluster-dynamic", - R"(weighted:%UPSTREAM_METADATA(["test.namespace", "key"])%)", - append); + R"(weighted:%UPSTREAM_METADATA(test.namespace:key)%)", append); } } } @@ -1390,24 +1389,24 @@ TEST_P(EmptyHeaderIntegrationTest, AllProtocolsPassEmptyHeaders) { *vhost.add_request_headers_to_add() = TestUtility::parseYaml(R"EOF( header: key: "x-ds-add-empty" - value: "%PER_REQUEST_STATE(does.not.exist)%" + value: "%FILTER_STATE(does.not.exist:PLAIN)%" keep_empty_value: true )EOF"); *vhost.add_request_headers_to_add() = TestUtility::parseYaml(R"EOF( header: key: "x-ds-no-add-empty" - value: "%PER_REQUEST_STATE(does.not.exist)%" + value: "%FILTER_STATE(does.not.exist:PLAIN)%" )EOF"); *vhost.add_response_headers_to_add() = TestUtility::parseYaml(R"EOF( header: key: "x-us-add-empty" - value: "%PER_REQUEST_STATE(does.not.exist)%" + value: "%FILTER_STATE(does.not.exist:PLAIN)%" keep_empty_value: true )EOF"); *vhost.add_response_headers_to_add() = TestUtility::parseYaml(R"EOF( header: key: "x-us-no-add-empty" - value: "%PER_REQUEST_STATE(does.not.exist)%" + value: "%FILTER_STATE(does.not.exist:PLAIN)%" )EOF"); config_helper_.addVirtualHost(vhost); diff --git a/test/integration/http_subset_lb_integration_test.cc b/test/integration/http_subset_lb_integration_test.cc index 11707c624851b..da7eafe04c67b 100644 --- a/test/integration/http_subset_lb_integration_test.cc +++ b/test/integration/http_subset_lb_integration_test.cc @@ -98,8 +98,7 @@ class HttpSubsetLbIntegrationTest auto* resp_header = vhost->add_response_headers_to_add(); auto* header = resp_header->mutable_header(); header->set_key(host_type_header_); - header->set_value( - fmt::format(R"EOF(%UPSTREAM_METADATA(["envoy.lb", "{}"])%)EOF", type_key_)); + header->set_value(fmt::format(R"EOF(%UPSTREAM_METADATA(envoy.lb:{})%)EOF", type_key_)); resp_header = vhost->add_response_headers_to_add(); header = resp_header->mutable_header(); From 1decf60fdf58e97dea069a801aa6f157b7b47f9c Mon Sep 17 00:00:00 2001 From: botengyao Date: Wed, 24 Sep 2025 13:13:39 -0400 Subject: [PATCH 02/10] network ext_proc: improve the test to consider TCP fragmentation (#41207) The TCP read can be partial, improve the integration tests to consider this to eliminate flakiness. Risk Level: low --------- Signed-off-by: Boteng Yao --- .../ext_proc/ext_proc_integration_test.cc | 38 ++++++++++++++++--- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/test/extensions/filters/network/ext_proc/ext_proc_integration_test.cc b/test/extensions/filters/network/ext_proc/ext_proc_integration_test.cc index a1d44f4bf9ac7..e339bd9fa6fb8 100644 --- a/test/extensions/filters/network/ext_proc/ext_proc_integration_test.cc +++ b/test/extensions/filters/network/ext_proc/ext_proc_integration_test.cc @@ -486,13 +486,41 @@ TEST_P(NetworkExtProcFilterIntegrationTest, TcpProxyUpstreamHalfCloseBothWays) { ProcessingRequest write_request; ASSERT_TRUE(processor_stream_->waitForGrpcMessage(*dispatcher_, write_request)); - EXPECT_EQ(write_request.has_write_data(), true); - EXPECT_EQ(write_request.write_data().data(), "server_response"); - EXPECT_EQ(write_request.write_data().end_of_stream(), true); - sendWriteGrpcMessage("server_data_inspected", true); + if (!write_request.write_data().end_of_stream()) { + size_t total_upstream_data = 0; + // We got partial data without end_of_stream + std::string partial_data = write_request.write_data().data(); + std::string partial_response = partial_data + "_inspected"; + sendWriteGrpcMessage(partial_response, false); - tcp_client->waitForData("server_data_inspected"); + // Wait for client to receive the partial data + total_upstream_data += partial_response.length(); + ASSERT_TRUE(tcp_client->waitForData(total_upstream_data)); + + // Wait for the final message with end_of_stream + ProcessingRequest final_request; + ASSERT_TRUE(processor_stream_->waitForGrpcMessage(*dispatcher_, final_request)); + EXPECT_EQ(final_request.has_write_data(), true); + EXPECT_EQ(final_request.write_data().end_of_stream(), true); + + // Respond to the final message + std::string final_data = final_request.write_data().data(); + std::string final_response = final_data.empty() ? "" : final_data + "_inspected"; + sendReadGrpcMessage(final_response, true); + + // Wait for the final data if non-empty + if (!final_response.empty()) { + total_upstream_data += final_response.length(); + ASSERT_TRUE(tcp_client->waitForData(total_upstream_data)); + } + } else { + // We got the complete data with end_of_stream in one message + EXPECT_EQ(write_request.write_data().data(), "server_response"); + EXPECT_EQ(write_request.write_data().end_of_stream(), true); + sendWriteGrpcMessage("server_data_inspected", true); + tcp_client->waitForData("server_data_inspected"); + } // Close everything ASSERT_TRUE(fake_upstream_connection->close()); From f1004fa4ee2203ca706a848ac9b7aa6d375756fa Mon Sep 17 00:00:00 2001 From: Greg Greenway Date: Wed, 24 Sep 2025 10:29:29 -0700 Subject: [PATCH 03/10] load_balancers: fix crash on invalid endpoint update with health checks (#41114) Signed-off-by: Greg Greenway --- envoy/upstream/load_balancer.h | 8 ++ envoy/upstream/upstream.h | 6 +- source/common/common/callback_impl.h | 33 +++++-- source/common/config/context_provider_impl.h | 2 +- source/common/router/rds_impl.h | 2 +- source/common/secret/sds_api.h | 8 +- .../common/upstream/cluster_manager_impl.cc | 4 +- .../common/upstream/outlier_detection_impl.cc | 3 +- source/common/upstream/upstream_impl.cc | 27 ++++-- source/common/upstream/upstream_impl.h | 28 +++--- .../extensions/clusters/aggregate/cluster.cc | 1 - source/extensions/clusters/dns/dns_cluster.cc | 11 +-- source/extensions/clusters/eds/eds.cc | 3 +- .../clusters/static/static_cluster.cc | 4 +- .../clusters/strict_dns/strict_dns_cluster.cc | 5 +- .../local_ratelimit/local_ratelimit_impl.cc | 6 +- .../network/redis_proxy/conn_pool_impl.cc | 3 +- .../filters/udp/udp_proxy/udp_proxy_filter.cc | 1 - .../common/health_checker_base_impl.cc | 3 +- .../client_side_weighted_round_robin_lb.cc | 11 +-- .../client_side_weighted_round_robin_lb.h | 2 +- .../common/load_balancer_impl.cc | 19 ++-- .../common/thread_aware_lb_impl.cc | 94 +++++++++++-------- .../common/thread_aware_lb_impl.h | 4 +- .../subset/subset_lb.cc | 1 - .../subset/subset_lb.h | 4 +- .../subset/subset_lb_config.h | 7 ++ source/server/drain_manager_impl.h | 2 +- test/common/common/callback_impl_test.cc | 4 +- test/coverage.yaml | 1 + test/integration/eds_integration_test.cc | 44 +++++++++ test/mocks/config/mocks.h | 2 +- test/mocks/upstream/host_set.h | 5 +- test/mocks/upstream/priority_set.cc | 4 +- test/mocks/upstream/priority_set.h | 4 +- 35 files changed, 227 insertions(+), 139 deletions(-) diff --git a/envoy/upstream/load_balancer.h b/envoy/upstream/load_balancer.h index ae252e524f3b6..65db38b85d586 100644 --- a/envoy/upstream/load_balancer.h +++ b/envoy/upstream/load_balancer.h @@ -333,6 +333,14 @@ using ThreadAwareLoadBalancerPtr = std::unique_ptr; class LoadBalancerConfig { public: virtual ~LoadBalancerConfig() = default; + + /** + * Optional method to allow a load balancer to validate endpoints before they're applied. If an + * error is returned from this method, the endpoints are rejected. If this method does not return + * an error, the load balancer must be able to use these endpoints in an update from the priority + * set. + */ + virtual absl::Status validateEndpoints(const PriorityState&) const { return absl::OkStatus(); } }; using LoadBalancerConfigPtr = std::unique_ptr; diff --git a/envoy/upstream/upstream.h b/envoy/upstream/upstream.h index 5e269ecb58501..7389dc26cd68e 100644 --- a/envoy/upstream/upstream.h +++ b/envoy/upstream/upstream.h @@ -499,10 +499,10 @@ using HostSetPtr = std::unique_ptr; class PrioritySet { public: using MemberUpdateCb = - std::function; + std::function; - using PriorityUpdateCb = std::function; + using PriorityUpdateCb = std::function; virtual ~PrioritySet() = default; diff --git a/source/common/common/callback_impl.h b/source/common/common/callback_impl.h index 628043e11bdf6..c14aabe63b33a 100644 --- a/source/common/common/callback_impl.h +++ b/source/common/common/callback_impl.h @@ -21,9 +21,9 @@ namespace Common { * * @see ThreadSafeCallbackManager for dealing with callbacks across multiple threads */ -template class CallbackManager { +template class CallbackManager { public: - using Callback = std::function; + using Callback = std::function; /** * Add a callback. @@ -46,12 +46,16 @@ template class CallbackManager { * to change (specifically, it will crash if the next callback in the list is deleted). * @param args supplies the callback arguments. */ - absl::Status runCallbacks(CallbackArgs... args) { + ReturnType runCallbacks(CallbackArgs... args) { for (auto it = callbacks_.cbegin(); it != callbacks_.cend();) { auto current = *(it++); - RETURN_IF_NOT_OK(current->cb_(args...)); + if constexpr (std::is_same_v) { + RETURN_IF_NOT_OK(current->cb_(args...)); + } else { + current->cb_(args...); + } } - return absl::OkStatus(); + return defaultReturn(); } /** @@ -62,12 +66,16 @@ template class CallbackManager { * @param run_with function that is responsible for generating inputs to callbacks. This will be * executed once for each callback. */ - absl::Status runCallbacksWith(std::function(void)> run_with) { + ReturnType runCallbacksWith(std::function(void)> run_with) { for (auto it = callbacks_.cbegin(); it != callbacks_.cend();) { auto cb = *(it++); - RETURN_IF_NOT_OK(std::apply(cb->cb_, run_with())); + if constexpr (std::is_same_v) { + RETURN_IF_NOT_OK(std::apply(cb->cb_, run_with())); + } else { + std::apply(cb->cb_, run_with()); + } } - return absl::OkStatus(); + return defaultReturn(); } size_t size() const noexcept { return callbacks_.size(); } @@ -100,6 +108,15 @@ template class CallbackManager { */ void remove(typename std::list::iterator& it) { callbacks_.erase(it); } + // Templating helper + ReturnType defaultReturn() { + if constexpr (std::is_same_v) { + return absl::OkStatus(); + } else { + return void(); + } + } + std::list callbacks_; // This is a sentinel shared_ptr used for keeping track of whether the manager is still alive. // It is only held by weak reference in the callback holder above. This is used versus having diff --git a/source/common/config/context_provider_impl.h b/source/common/config/context_provider_impl.h index 46156a73ed31e..69200d406ec92 100644 --- a/source/common/config/context_provider_impl.h +++ b/source/common/config/context_provider_impl.h @@ -52,7 +52,7 @@ class ContextProviderImpl : public ContextProvider { const xds::core::v3::ContextParams node_context_; // Map from resource type URL to dynamic context parameters. absl::flat_hash_map dynamic_context_; - mutable Common::CallbackManager update_cb_helper_; + mutable Common::CallbackManager update_cb_helper_; }; } // namespace Config diff --git a/source/common/router/rds_impl.h b/source/common/router/rds_impl.h index 0ffca155a3a94..cdfbfc1bd657f 100644 --- a/source/common/router/rds_impl.h +++ b/source/common/router/rds_impl.h @@ -94,7 +94,7 @@ class RdsRouteConfigSubscription : public Rds::RdsRouteConfigSubscription { VhdsSubscriptionPtr vhds_subscription_; RouteConfigUpdatePtr config_update_info_; - Common::CallbackManager<> update_callback_manager_; + Common::CallbackManager update_callback_manager_; // Access to addUpdateCallback friend class ScopedRdsConfigSubscription; diff --git a/source/common/secret/sds_api.h b/source/common/secret/sds_api.h index a596a03321124..2cd521ab83890 100644 --- a/source/common/secret/sds_api.h +++ b/source/common/secret/sds_api.h @@ -71,7 +71,7 @@ class SdsApi : public Envoy::Config::SubscriptionBase< // Refresh secrets, e.g. re-resolve symlinks in secret paths. virtual void resolveSecret(const FileContentMap& /*files*/) {}; virtual void validateConfig(const envoy::extensions::transport_sockets::tls::v3::Secret&) PURE; - Common::CallbackManager<> update_callback_manager_; + Common::CallbackManager update_callback_manager_; // Config::SubscriptionCallbacks absl::Status onConfigUpdate(const std::vector& resources, @@ -229,6 +229,7 @@ class CertificateValidationContextSdsApi : public SdsApi, CertificateValidationContextPtr resolved_certificate_validation_context_secrets_; // Path based certificates are inlined for future read consistency. Common::CallbackManager< + absl::Status, const envoy::extensions::transport_sockets::tls::v3::CertificateValidationContext&> validation_callback_manager_; }; @@ -282,7 +283,7 @@ class TlsSessionTicketKeysSdsApi : public SdsApi, public TlsSessionTicketKeysCon private: Secret::TlsSessionTicketKeysPtr tls_session_ticket_keys_; Common::CallbackManager< - const envoy::extensions::transport_sockets::tls::v3::TlsSessionTicketKeys&> + absl::Status, const envoy::extensions::transport_sockets::tls::v3::TlsSessionTicketKeys&> validation_callback_manager_; }; @@ -333,7 +334,8 @@ class GenericSecretSdsApi : public SdsApi, public GenericSecretConfigProvider { private: GenericSecretPtr generic_secret_; - Common::CallbackManager + Common::CallbackManager validation_callback_manager_; }; diff --git a/source/common/upstream/cluster_manager_impl.cc b/source/common/upstream/cluster_manager_impl.cc index ab8f102afec56..11cd469e0c67b 100644 --- a/source/common/upstream/cluster_manager_impl.cc +++ b/source/common/upstream/cluster_manager_impl.cc @@ -555,7 +555,7 @@ absl::Status ClusterManagerImpl::onClusterInit(ClusterManagerCluster& cm_cluster // This is used by cluster types such as EDS clusters to drain the connection pools of removed // hosts. cluster_data->second->member_update_cb_ = cluster.prioritySet().addMemberUpdateCb( - [&cluster, this](const HostVector&, const HostVector& hosts_removed) -> absl::Status { + [&cluster, this](const HostVector&, const HostVector& hosts_removed) { if (cluster.info()->lbConfig().close_connections_on_host_set_change()) { for (const auto& host_set : cluster.prioritySet().hostSetsPerPriority()) { // This will drain all tcp and http connection pools. @@ -572,7 +572,6 @@ absl::Status ClusterManagerImpl::onClusterInit(ClusterManagerCluster& cm_cluster postThreadLocalRemoveHosts(cluster, hosts_removed); } } - return absl::OkStatus(); }); // This is used by cluster types such as EDS clusters to update the cluster @@ -613,7 +612,6 @@ absl::Status ClusterManagerImpl::onClusterInit(ClusterManagerCluster& cm_cluster postThreadLocalClusterUpdate( cm_cluster, ThreadLocalClusterUpdateParams(priority, hosts_added, hosts_removed)); } - return absl::OkStatus(); }); // Finally, post updates cross-thread so the per-thread load balancers are ready. First we diff --git a/source/common/upstream/outlier_detection_impl.cc b/source/common/upstream/outlier_detection_impl.cc index 2135f8f016da5..1a8bed06adeb8 100644 --- a/source/common/upstream/outlier_detection_impl.cc +++ b/source/common/upstream/outlier_detection_impl.cc @@ -324,7 +324,7 @@ void DetectorImpl::initialize(Cluster& cluster) { }); } member_update_cb_ = cluster.prioritySet().addMemberUpdateCb( - [this](const HostVector& hosts_added, const HostVector& hosts_removed) -> absl::Status { + [this](const HostVector& hosts_added, const HostVector& hosts_removed) { for (const HostSharedPtr& host : hosts_added) { addHostMonitor(host); } @@ -338,7 +338,6 @@ void DetectorImpl::initialize(Cluster& cluster) { host_monitors_.erase(host); } - return absl::OkStatus(); }); armIntervalTimer(); diff --git a/source/common/upstream/upstream_impl.cc b/source/common/upstream/upstream_impl.cc index d1d9fdddcb007..02d3a076df239 100644 --- a/source/common/upstream/upstream_impl.cc +++ b/source/common/upstream/upstream_impl.cc @@ -748,7 +748,7 @@ void HostSetImpl::updateHosts(PrioritySet::UpdateHostsParams&& update_hosts_para excluded_hosts_per_locality_ = std::move(update_hosts_params.excluded_hosts_per_locality); locality_weights_ = std::move(locality_weights); - THROW_IF_NOT_OK(runUpdateCallbacks(hosts_added, hosts_removed)); + runUpdateCallbacks(hosts_added, hosts_removed); } PrioritySet::UpdateHostsParams @@ -829,7 +829,7 @@ void PrioritySetImpl::updateHosts(uint32_t priority, UpdateHostsParams&& update_ hosts_removed, weighted_priority_health, overprovisioning_factor); if (!batch_update_) { - THROW_IF_NOT_OK(runUpdateCallbacks(hosts_added, hosts_removed)); + runUpdateCallbacks(hosts_added, hosts_removed); } } @@ -843,7 +843,7 @@ void PrioritySetImpl::batchHostUpdate(BatchUpdateCb& callback) { HostVector net_hosts_added = filterHosts(scope.all_hosts_added_, scope.all_hosts_removed_); HostVector net_hosts_removed = filterHosts(scope.all_hosts_removed_, scope.all_hosts_added_); - THROW_IF_NOT_OK(runUpdateCallbacks(net_hosts_added, net_hosts_removed)); + runUpdateCallbacks(net_hosts_added, net_hosts_removed); } void PrioritySetImpl::BatchUpdateScope::updateHosts( @@ -1609,7 +1609,6 @@ ClusterImplBase::ClusterImplBase(const envoy::config::cluster::v3::Cluster& clus info_->endpointStats().membership_healthy_.set(healthy_hosts); info_->endpointStats().membership_degraded_.set(degraded_hosts); info_->endpointStats().membership_excluded_.set(excluded_hosts); - return absl::OkStatus(); }); // Drop overload configuration parsing. SET_AND_RETURN_IF_NOT_OK(parseDropOverloadConfig(cluster.load_assignment()), creation_status); @@ -1867,11 +1866,21 @@ ClusterImplBase::resolveProtoAddress(const envoy::config::core::v3::Address& add return resolve_status; } -absl::Status ClusterImplBase::validateEndpointsForZoneAwareRouting( - const envoy::config::endpoint::v3::LocalityLbEndpoints& endpoints) const { - if (local_cluster_ && endpoints.priority() > 0) { - return absl::InvalidArgumentError( - fmt::format("Unexpected non-zero priority for local cluster '{}'.", info()->name())); +absl::Status ClusterImplBase::validateEndpoints( + absl::Span localities, + OptRef priorities) const { + for (const auto* endpoints : localities) { + if (local_cluster_ && endpoints->priority() > 0) { + return absl::InvalidArgumentError( + fmt::format("Unexpected non-zero priority for local cluster '{}'.", info()->name())); + } + } + + if (priorities.has_value()) { + OptRef lb_config = info_->loadBalancerConfig(); + if (lb_config.has_value()) { + return lb_config->validateEndpoints(*priorities); + } } return absl::OkStatus(); } diff --git a/source/common/upstream/upstream_impl.h b/source/common/upstream/upstream_impl.h index 481c3602d2fe7..d91e610716679 100644 --- a/source/common/upstream/upstream_impl.h +++ b/source/common/upstream/upstream_impl.h @@ -621,9 +621,8 @@ class HostSetImpl : public HostSet { absl::optional overprovisioning_factor = absl::nullopt); protected: - virtual absl::Status runUpdateCallbacks(const HostVector& hosts_added, - const HostVector& hosts_removed) { - return member_update_cb_helper_.runCallbacks(priority_, hosts_added, hosts_removed); + virtual void runUpdateCallbacks(const HostVector& hosts_added, const HostVector& hosts_removed) { + member_update_cb_helper_.runCallbacks(priority_, hosts_added, hosts_removed); } private: @@ -639,7 +638,7 @@ class HostSetImpl : public HostSet { HostsPerLocalityConstSharedPtr degraded_hosts_per_locality_{HostsPerLocalityImpl::empty()}; HostsPerLocalityConstSharedPtr excluded_hosts_per_locality_{HostsPerLocalityImpl::empty()}; // TODO(mattklein123): Remove mutable. - mutable Common::CallbackManager + mutable Common::CallbackManager member_update_cb_helper_; // Locality weights. LocalityWeightsConstSharedPtr locality_weights_; @@ -693,13 +692,12 @@ class PrioritySetImpl : public PrioritySet { overprovisioning_factor); } - virtual absl::Status runUpdateCallbacks(const HostVector& hosts_added, - const HostVector& hosts_removed) { - return member_update_cb_helper_.runCallbacks(hosts_added, hosts_removed); + virtual void runUpdateCallbacks(const HostVector& hosts_added, const HostVector& hosts_removed) { + member_update_cb_helper_.runCallbacks(hosts_added, hosts_removed); } - virtual absl::Status runReferenceUpdateCallbacks(uint32_t priority, const HostVector& hosts_added, - const HostVector& hosts_removed) { - return priority_update_cb_helper_.runCallbacks(priority, hosts_added, hosts_removed); + virtual void runReferenceUpdateCallbacks(uint32_t priority, const HostVector& hosts_added, + const HostVector& hosts_removed) { + priority_update_cb_helper_.runCallbacks(priority, hosts_added, hosts_removed); } // This vector will generally have at least one member, for priority level 0. // It will expand as host sets are added but currently does not shrink to @@ -714,8 +712,9 @@ class PrioritySetImpl : public PrioritySet { // because host_sets_ is directly returned so we avoid translation. std::vector host_sets_priority_update_cbs_; // TODO(mattklein123): Remove mutable. - mutable Common::CallbackManager member_update_cb_helper_; - mutable Common::CallbackManager + mutable Common::CallbackManager + member_update_cb_helper_; + mutable Common::CallbackManager priority_update_cb_helper_; bool batch_update_ : 1; @@ -1234,8 +1233,9 @@ class ClusterImplBase : public Cluster, protected Logger::Loggable endpoints, + OptRef priorities) const; private: static const absl::string_view DoNotValidateAlpnRuntimeKey; diff --git a/source/extensions/clusters/aggregate/cluster.cc b/source/extensions/clusters/aggregate/cluster.cc index 83eee4420f30a..a4a82aa4176e3 100644 --- a/source/extensions/clusters/aggregate/cluster.cc +++ b/source/extensions/clusters/aggregate/cluster.cc @@ -49,7 +49,6 @@ void AggregateClusterLoadBalancer::addMemberUpdateCallbackForCluster( ENVOY_LOG(debug, "member update for cluster '{}' in aggregate cluster '{}'", target_cluster_info->name(), parent_info_->name()); refresh(); - return absl::OkStatus(); }); } diff --git a/source/extensions/clusters/dns/dns_cluster.cc b/source/extensions/clusters/dns/dns_cluster.cc index b9939cb52e718..9919fdb2cfd1e 100644 --- a/source/extensions/clusters/dns/dns_cluster.cc +++ b/source/extensions/clusters/dns/dns_cluster.cc @@ -184,13 +184,10 @@ DnsClusterImpl::DnsClusterImpl(const envoy::config::cluster::v3::Cluster& cluste return; } } else { // Strict DNS - for (const auto& locality_lb_endpoint : locality_lb_endpoints) { - // Strict DNS clusters must ensure that the priority for all localities - // are set to zero when using zone-aware routing. Zone-aware routing only - // works for localities with priority zero (the highest). - SET_AND_RETURN_IF_NOT_OK(validateEndpointsForZoneAwareRouting(locality_lb_endpoint), - creation_status); - } + // Strict DNS clusters must ensure that the priority for all localities + // are set to zero when using zone-aware routing. Zone-aware routing only + // works for localities with priority zero (the highest). + SET_AND_RETURN_IF_NOT_OK(validateEndpoints(locality_lb_endpoints, {}), creation_status); } for (const auto& locality_lb_endpoint : locality_lb_endpoints) { diff --git a/source/extensions/clusters/eds/eds.cc b/source/extensions/clusters/eds/eds.cc index c86a270db4467..ae4eba177a231 100644 --- a/source/extensions/clusters/eds/eds.cc +++ b/source/extensions/clusters/eds/eds.cc @@ -77,8 +77,6 @@ void EdsClusterImpl::BatchUpdateHelper::batchUpdate(PrioritySet::HostUpdateCb& h absl::flat_hash_set all_new_hosts; PriorityStateManager priority_state_manager(parent_, parent_.local_info_, &host_update_cb); for (const auto& locality_lb_endpoint : cluster_load_assignment_.endpoints()) { - THROW_IF_NOT_OK(parent_.validateEndpointsForZoneAwareRouting(locality_lb_endpoint)); - priority_state_manager.initializePriorityFor(locality_lb_endpoint); if (locality_lb_endpoint.has_leds_cluster_locality_config()) { @@ -120,6 +118,7 @@ void EdsClusterImpl::BatchUpdateHelper::batchUpdate(PrioritySet::HostUpdateCb& h // Loop over all priorities that exist in the new configuration. auto& priority_state = priority_state_manager.priorityState(); + THROW_IF_NOT_OK(parent_.validateEndpoints(cluster_load_assignment_.endpoints(), priority_state)); for (size_t i = 0; i < priority_state.size(); ++i) { if (parent_.locality_weights_map_.size() <= i) { parent_.locality_weights_map_.resize(i + 1); diff --git a/source/extensions/clusters/static/static_cluster.cc b/source/extensions/clusters/static/static_cluster.cc index 42a4a8798c769..25f8e18537f36 100644 --- a/source/extensions/clusters/static/static_cluster.cc +++ b/source/extensions/clusters/static/static_cluster.cc @@ -20,7 +20,6 @@ StaticClusterImpl::StaticClusterImpl(const envoy::config::cluster::v3::Cluster& weighted_priority_health_ = cluster_load_assignment.policy().weighted_priority_health(); for (const auto& locality_lb_endpoint : cluster_load_assignment.endpoints()) { - THROW_IF_NOT_OK(validateEndpointsForZoneAwareRouting(locality_lb_endpoint)); priority_state_manager_->initializePriorityFor(locality_lb_endpoint); for (const auto& lb_endpoint : locality_lb_endpoint.lb_endpoints()) { std::vector address_list; @@ -47,6 +46,9 @@ StaticClusterImpl::StaticClusterImpl(const envoy::config::cluster::v3::Cluster& address_list, locality_lb_endpoint, lb_endpoint); } } + + THROW_IF_NOT_OK(validateEndpoints(cluster_load_assignment.endpoints(), + priority_state_manager_->priorityState())); } void StaticClusterImpl::startPreInit() { diff --git a/source/extensions/clusters/strict_dns/strict_dns_cluster.cc b/source/extensions/clusters/strict_dns/strict_dns_cluster.cc index 5f2ab6ac8f36f..49f281be43ddb 100644 --- a/source/extensions/clusters/strict_dns/strict_dns_cluster.cc +++ b/source/extensions/clusters/strict_dns/strict_dns_cluster.cc @@ -42,15 +42,16 @@ StrictDnsClusterImpl::StrictDnsClusterImpl( respect_dns_ttl_(dns_cluster.respect_dns_ttl()), dns_lookup_family_( Envoy::DnsUtils::getDnsLookupFamilyFromEnum(dns_cluster.dns_lookup_family())) { + RETURN_ONLY_IF_NOT_OK_REF(creation_status); + failure_backoff_strategy_ = Config::Utility::prepareDnsRefreshStrategy( dns_cluster, dns_refresh_rate_ms_.count(), context.serverFactoryContext().api().randomGenerator()); std::list resolve_targets; const auto& locality_lb_endpoints = load_assignment_.endpoints(); + THROW_IF_NOT_OK(validateEndpoints(locality_lb_endpoints, {})); for (const auto& locality_lb_endpoint : locality_lb_endpoints) { - THROW_IF_NOT_OK(validateEndpointsForZoneAwareRouting(locality_lb_endpoint)); - for (const auto& lb_endpoint : locality_lb_endpoint.lb_endpoints()) { const auto& socket_address = lb_endpoint.endpoint().address().socket_address(); if (!socket_address.resolver_name().empty()) { diff --git a/source/extensions/filters/common/local_ratelimit/local_ratelimit_impl.cc b/source/extensions/filters/common/local_ratelimit/local_ratelimit_impl.cc index 1ff6b295b0d52..7b2f94da8cb25 100644 --- a/source/extensions/filters/common/local_ratelimit/local_ratelimit_impl.cc +++ b/source/extensions/filters/common/local_ratelimit/local_ratelimit_impl.cc @@ -37,10 +37,8 @@ ShareProviderManager::ShareProviderManager(Event::Dispatcher& main_dispatcher, : main_dispatcher_(main_dispatcher), cluster_(cluster) { // It's safe to capture the local cluster reference here because the local cluster is // guaranteed to be static cluster and should never be removed. - handle_ = cluster_.prioritySet().addMemberUpdateCb([this](const auto&, const auto&) { - share_monitor_->onLocalClusterUpdate(cluster_); - return absl::OkStatus(); - }); + handle_ = cluster_.prioritySet().addMemberUpdateCb( + [this](const auto&, const auto&) { share_monitor_->onLocalClusterUpdate(cluster_); }); share_monitor_ = std::make_shared(); share_monitor_->onLocalClusterUpdate(cluster_); } diff --git a/source/extensions/filters/network/redis_proxy/conn_pool_impl.cc b/source/extensions/filters/network/redis_proxy/conn_pool_impl.cc index 2facc0d8852e8..76eabcc5cf103 100644 --- a/source/extensions/filters/network/redis_proxy/conn_pool_impl.cc +++ b/source/extensions/filters/network/redis_proxy/conn_pool_impl.cc @@ -170,10 +170,9 @@ void InstanceImpl::ThreadLocalPool::onClusterAddOrUpdateNonVirtual( ASSERT(host_set_member_update_cb_handle_ == nullptr); host_set_member_update_cb_handle_ = cluster_->prioritySet().addMemberUpdateCb( [this](const std::vector& hosts_added, - const std::vector& hosts_removed) -> absl::Status { + const std::vector& hosts_removed) { onHostsAdded(hosts_added); onHostsRemoved(hosts_removed); - return absl::OkStatus(); }); ASSERT(host_address_map_.empty()); diff --git a/source/extensions/filters/udp/udp_proxy/udp_proxy_filter.cc b/source/extensions/filters/udp/udp_proxy/udp_proxy_filter.cc index 0d2899dc53385..f5c3322021cdf 100644 --- a/source/extensions/filters/udp/udp_proxy/udp_proxy_filter.cc +++ b/source/extensions/filters/udp/udp_proxy/udp_proxy_filter.cc @@ -225,7 +225,6 @@ UdpProxyFilter::ClusterInfo::ClusterInfo(UdpProxyFilter& filter, host_to_sessions_.erase(host_sessions_it); } } - return absl::OkStatus(); })) {} UdpProxyFilter::ClusterInfo::~ClusterInfo() { diff --git a/source/extensions/health_checkers/common/health_checker_base_impl.cc b/source/extensions/health_checkers/common/health_checker_base_impl.cc index 04eb57f7b6a20..3923e1eaba570 100644 --- a/source/extensions/health_checkers/common/health_checker_base_impl.cc +++ b/source/extensions/health_checkers/common/health_checker_base_impl.cc @@ -40,9 +40,8 @@ HealthCheckerImplBase::HealthCheckerImplBase(const Cluster& cluster, transport_socket_options_(initTransportSocketOptions(config)), transport_socket_match_metadata_(initTransportSocketMatchMetadata(config)), member_update_cb_{cluster_.prioritySet().addMemberUpdateCb( - [this](const HostVector& hosts_added, const HostVector& hosts_removed) -> absl::Status { + [this](const HostVector& hosts_added, const HostVector& hosts_removed) { onClusterMemberUpdate(hosts_added, hosts_removed); - return absl::OkStatus(); })} {} std::shared_ptr diff --git a/source/extensions/load_balancing_policies/client_side_weighted_round_robin/client_side_weighted_round_robin_lb.cc b/source/extensions/load_balancing_policies/client_side_weighted_round_robin/client_side_weighted_round_robin_lb.cc index 6d46954015e2a..44d8f7ed51379 100644 --- a/source/extensions/load_balancing_policies/client_side_weighted_round_robin/client_side_weighted_round_robin_lb.cc +++ b/source/extensions/load_balancing_policies/client_side_weighted_round_robin/client_side_weighted_round_robin_lb.cc @@ -63,10 +63,8 @@ ClientSideWeightedRoundRobinLoadBalancer::WorkerLocalLb::WorkerLocalLb( common_config, healthy_panic_threshold, 100, 50), getRoundRobinConfig(common_config), time_source) { if (tls_shim.has_value()) { - apply_weights_cb_handle_ = tls_shim->apply_weights_cb_helper_.add([this](uint32_t priority) { - refresh(priority); - return absl::OkStatus(); - }); + apply_weights_cb_handle_ = + tls_shim->apply_weights_cb_helper_.add([this](uint32_t priority) { refresh(priority); }); } } @@ -258,7 +256,7 @@ void ClientSideWeightedRoundRobinLoadBalancer::WorkerLocalLbFactory::applyWeight uint32_t priority) { tls_->runOnAllThreads([priority](OptRef tls_shim) -> void { if (tls_shim.has_value()) { - auto status = tls_shim->apply_weights_cb_helper_.runCallbacks(priority); + tls_shim->apply_weights_cb_helper_.runCallbacks(priority); } }); } @@ -295,10 +293,9 @@ absl::Status ClientSideWeightedRoundRobinLoadBalancer::initialize() { // Setup a callback to receive priority set updates. priority_update_cb_ = priority_set_.addPriorityUpdateCb( - [this](uint32_t, const HostVector& hosts_added, const HostVector&) -> absl::Status { + [this](uint32_t, const HostVector& hosts_added, const HostVector&) { addClientSideLbPolicyDataToHosts(hosts_added); updateWeightsOnMainThread(); - return absl::OkStatus(); }); weight_calculation_timer_->enableTimer(weight_update_period_); diff --git a/source/extensions/load_balancing_policies/client_side_weighted_round_robin/client_side_weighted_round_robin_lb.h b/source/extensions/load_balancing_policies/client_side_weighted_round_robin/client_side_weighted_round_robin_lb.h index b9652c0771bd5..2e65e71c519a8 100644 --- a/source/extensions/load_balancing_policies/client_side_weighted_round_robin/client_side_weighted_round_robin_lb.h +++ b/source/extensions/load_balancing_policies/client_side_weighted_round_robin/client_side_weighted_round_robin_lb.h @@ -138,7 +138,7 @@ class ClientSideWeightedRoundRobinLoadBalancer : public Upstream::ThreadAwareLoa // Thread local shim to store callbacks for weight updates of worker local lb. class ThreadLocalShim : public Envoy::ThreadLocal::ThreadLocalObject { public: - Common::CallbackManager apply_weights_cb_helper_; + Common::CallbackManager apply_weights_cb_helper_; }; // This class is used to handle the load balancing on the worker thread. diff --git a/source/extensions/load_balancing_policies/common/load_balancer_impl.cc b/source/extensions/load_balancing_policies/common/load_balancer_impl.cc index 60c31c890e348..11e8138df33aa 100644 --- a/source/extensions/load_balancing_policies/common/load_balancer_impl.cc +++ b/source/extensions/load_balancing_policies/common/load_balancer_impl.cc @@ -108,13 +108,12 @@ LoadBalancerBase::LoadBalancerBase(const PrioritySet& priority_set, ClusterLbSta recalculatePerPriorityPanic(); priority_update_cb_ = priority_set_.addPriorityUpdateCb( - [this](uint32_t priority, const HostVector&, const HostVector&) -> absl::Status { + [this](uint32_t priority, const HostVector&, const HostVector&) { recalculatePerPriorityState(priority, priority_set_, per_priority_load_, per_priority_health_, per_priority_degraded_, total_healthy_hosts_); recalculatePerPriorityPanic(); stashed_random_.clear(); - return absl::OkStatus(); }); } @@ -434,7 +433,7 @@ ZoneAwareLoadBalancerBase::ZoneAwareLoadBalancerBase( } priority_update_cb_ = priority_set_.addPriorityUpdateCb( - [this](uint32_t priority, const HostVector&, const HostVector&) -> absl::Status { + [this](uint32_t priority, const HostVector&, const HostVector&) { // Make sure per_priority_state_ is as large as priority_set_.hostSetsPerPriority() resizePerPriorityState(); // If P=0 changes, regenerate locality routing structures. Locality based routing is @@ -446,7 +445,6 @@ ZoneAwareLoadBalancerBase::ZoneAwareLoadBalancerBase( if (locality_weighted_balancing_) { rebuildLocalityWrrForPriority(priority); } - return absl::OkStatus(); }); if (local_priority_set_) { // Multiple priorities are unsupported for local priority sets. @@ -455,12 +453,11 @@ ZoneAwareLoadBalancerBase::ZoneAwareLoadBalancerBase( // the locality routing structure. ASSERT(local_priority_set_->hostSetsPerPriority().size() == 1); local_priority_set_member_update_cb_handle_ = local_priority_set_->addPriorityUpdateCb( - [this](uint32_t priority, const HostVector&, const HostVector&) -> absl::Status { + [this](uint32_t priority, const HostVector&, const HostVector&) { ASSERT(priority == 0); // If the set of local Envoys changes, regenerate routing for P=0 as it does priority // based routing. regenerateLocalityRoutingStructures(); - return absl::OkStatus(); }); } } @@ -936,16 +933,12 @@ EdfLoadBalancerBase::EdfLoadBalancerBase( // so we will need to do better at delta tracking to scale (see // https://github.com/envoyproxy/envoy/issues/2874). priority_update_cb_ = priority_set.addPriorityUpdateCb( - [this](uint32_t priority, const HostVector&, const HostVector&) { - refresh(priority); - return absl::OkStatus(); - }); - member_update_cb_ = priority_set.addMemberUpdateCb( - [this](const HostVector& hosts_added, const HostVector&) -> absl::Status { + [this](uint32_t priority, const HostVector&, const HostVector&) { refresh(priority); }); + member_update_cb_ = + priority_set.addMemberUpdateCb([this](const HostVector& hosts_added, const HostVector&) { if (isSlowStartEnabled()) { recalculateHostsInSlowStart(hosts_added); } - return absl::OkStatus(); }); } diff --git a/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.cc b/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.cc index 4056b0aae5d82..01a3e2dd21841 100644 --- a/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.cc +++ b/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.cc @@ -14,18 +14,17 @@ namespace Upstream { // HostSetImpl::effectiveLocalityWeight. namespace { -absl::Status normalizeHostWeights(const HostVector& hosts, double normalized_locality_weight, - NormalizedHostWeightVector& normalized_host_weights, - double& min_normalized_weight, double& max_normalized_weight) { +void normalizeHostWeights(const HostVector& hosts, double normalized_locality_weight, + NormalizedHostWeightVector& normalized_host_weights, + double& min_normalized_weight, double& max_normalized_weight) { // sum should be at most uint32_t max value, so we can validate it by accumulating into unit64_t // and making sure there was no overflow uint64_t sum = 0; for (const auto& host : hosts) { sum += host->weight(); if (sum > std::numeric_limits::max()) { - return absl::InvalidArgumentError( - fmt::format("The sum of weights of all upstream hosts in a locality exceeds {}", - std::numeric_limits::max())); + IS_ENVOY_BUG("weights should have been previously validated in validateEndpoints()"); + return; } } @@ -35,14 +34,12 @@ absl::Status normalizeHostWeights(const HostVector& hosts, double normalized_loc min_normalized_weight = std::min(min_normalized_weight, weight); max_normalized_weight = std::max(max_normalized_weight, weight); } - return absl::OkStatus(); } -absl::Status normalizeLocalityWeights(const HostsPerLocality& hosts_per_locality, - const LocalityWeights& locality_weights, - NormalizedHostWeightVector& normalized_host_weights, - double& min_normalized_weight, - double& max_normalized_weight) { +void normalizeLocalityWeights(const HostsPerLocality& hosts_per_locality, + const LocalityWeights& locality_weights, + NormalizedHostWeightVector& normalized_host_weights, + double& min_normalized_weight, double& max_normalized_weight) { ASSERT(locality_weights.size() == hosts_per_locality.get().size()); // sum should be at most uint32_t max value, so we can validate it by accumulating into unit64_t @@ -51,15 +48,13 @@ absl::Status normalizeLocalityWeights(const HostsPerLocality& hosts_per_locality for (const auto weight : locality_weights) { sum += weight; if (sum > std::numeric_limits::max()) { - return absl::InvalidArgumentError( - fmt::format("The sum of weights of all localities at the same priority exceeds {}", - std::numeric_limits::max())); + IS_ENVOY_BUG("locality weights should have been validated in validateEndpoints"); } } // Locality weights (unlike host weights) may be 0. If _all_ locality weights were 0, bail out. if (sum == 0) { - return absl::OkStatus(); + return; } // Compute normalized weights for all hosts in each locality. If a locality was assigned zero @@ -68,33 +63,29 @@ absl::Status normalizeLocalityWeights(const HostsPerLocality& hosts_per_locality if (locality_weights[i] != 0) { const HostVector& hosts = hosts_per_locality.get()[i]; const double normalized_locality_weight = static_cast(locality_weights[i]) / sum; - RETURN_IF_NOT_OK(normalizeHostWeights(hosts, normalized_locality_weight, - normalized_host_weights, min_normalized_weight, - max_normalized_weight)); + normalizeHostWeights(hosts, normalized_locality_weight, normalized_host_weights, + min_normalized_weight, max_normalized_weight); } } - return absl::OkStatus(); } -absl::Status normalizeWeights(const HostSet& host_set, bool in_panic, - NormalizedHostWeightVector& normalized_host_weights, - double& min_normalized_weight, double& max_normalized_weight, - bool locality_weighted_balancing) { +void normalizeWeights(const HostSet& host_set, bool in_panic, + NormalizedHostWeightVector& normalized_host_weights, + double& min_normalized_weight, double& max_normalized_weight, + bool locality_weighted_balancing) { if (!locality_weighted_balancing || host_set.localityWeights() == nullptr || host_set.localityWeights()->empty()) { // If we're not dealing with locality weights, just normalize weights for the flat set of hosts. const auto& hosts = in_panic ? host_set.hosts() : host_set.healthyHosts(); - RETURN_IF_NOT_OK(normalizeHostWeights(hosts, 1.0, normalized_host_weights, - min_normalized_weight, max_normalized_weight)); + normalizeHostWeights(hosts, 1.0, normalized_host_weights, min_normalized_weight, + max_normalized_weight); } else { // Otherwise, normalize weights across all localities. const auto& hosts_per_locality = in_panic ? host_set.hostsPerLocality() : host_set.healthyHostsPerLocality(); - RETURN_IF_NOT_OK(normalizeLocalityWeights(hosts_per_locality, *(host_set.localityWeights()), - normalized_host_weights, min_normalized_weight, - max_normalized_weight)); + normalizeLocalityWeights(hosts_per_locality, *(host_set.localityWeights()), + normalized_host_weights, min_normalized_weight, max_normalized_weight); } - return absl::OkStatus(); } std::string generateCookie(LoadBalancerContext* context, absl::string_view name, @@ -136,12 +127,13 @@ absl::Status ThreadAwareLoadBalancerBase::initialize() { // complicated initialization as the load balancer would need its own initialized callback. I // think the synchronous/asynchronous split is probably the best option. priority_update_cb_ = priority_set_.addPriorityUpdateCb( - [this](uint32_t, const HostVector&, const HostVector&) -> absl::Status { return refresh(); }); + [this](uint32_t, const HostVector&, const HostVector&) { refresh(); }); - return refresh(); + refresh(); + return absl::OkStatus(); } -absl::Status ThreadAwareLoadBalancerBase::refresh() { +void ThreadAwareLoadBalancerBase::refresh() { auto per_priority_state_vector = std::make_shared>( priority_set_.hostSetsPerPriority().size()); auto healthy_per_priority_load = @@ -161,10 +153,8 @@ absl::Status ThreadAwareLoadBalancerBase::refresh() { NormalizedHostWeightVector normalized_host_weights; double min_normalized_weight = 1.0; double max_normalized_weight = 0.0; - absl::Status status = normalizeWeights(*host_set, per_priority_state->global_panic_, - normalized_host_weights, min_normalized_weight, - max_normalized_weight, locality_weighted_balancing_); - RETURN_IF_NOT_OK(status); + normalizeWeights(*host_set, per_priority_state->global_panic_, normalized_host_weights, + min_normalized_weight, max_normalized_weight, locality_weighted_balancing_); per_priority_state->current_lb_ = createLoadBalancer( std::move(normalized_host_weights), min_normalized_weight, max_normalized_weight); } @@ -175,7 +165,6 @@ absl::Status ThreadAwareLoadBalancerBase::refresh() { factory_->degraded_per_priority_load_ = degraded_per_priority_load; factory_->per_priority_state_ = per_priority_state_vector; } - return absl::OkStatus(); } HostSelectionResponse @@ -370,5 +359,34 @@ TypedHashLbConfigBase::TypedHashLbConfigBase(absl::Spanweight(); + if (host_sum > std::numeric_limits::max()) { + return absl::InvalidArgumentError( + fmt::format("The sum of weights of all upstream hosts in a locality exceeds {}", + std::numeric_limits::max())); + } + } + + uint64_t locality_sum = 0; + for (const auto& [_, weight] : locality_weights_map) { + locality_sum += weight; + if (locality_sum > std::numeric_limits::max()) { + return absl::InvalidArgumentError( + fmt::format("The sum of weights of all localities at the same priority exceeds {}", + std::numeric_limits::max())); + } + } + } + + return absl::OkStatus(); +} + } // namespace Upstream } // namespace Envoy diff --git a/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.h b/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.h index e1b1373d82da0..9db1c2247a7df 100644 --- a/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.h +++ b/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.h @@ -176,7 +176,7 @@ class ThreadAwareLoadBalancerBase : public LoadBalancerBase, public ThreadAwareL virtual HashingLoadBalancerSharedPtr createLoadBalancer(const NormalizedHostWeightVector& normalized_host_weights, double min_normalized_weight, double max_normalized_weight) PURE; - absl::Status refresh(); + void refresh(); std::shared_ptr factory_; const bool locality_weighted_balancing_{}; @@ -189,6 +189,8 @@ class TypedHashLbConfigBase : public LoadBalancerConfig { TypedHashLbConfigBase(absl::Span hash_policy, Regex::Engine& regex_engine, absl::Status& creation_status); + absl::Status validateEndpoints(const PriorityState& priorities) const override; + HashPolicySharedPtr hash_policy_; }; diff --git a/source/extensions/load_balancing_policies/subset/subset_lb.cc b/source/extensions/load_balancing_policies/subset/subset_lb.cc index d56bffd4cae44..5fca592be3c8a 100644 --- a/source/extensions/load_balancing_policies/subset/subset_lb.cc +++ b/source/extensions/load_balancing_policies/subset/subset_lb.cc @@ -92,7 +92,6 @@ SubsetLoadBalancer::SubsetLoadBalancer(const SubsetLoadBalancerConfig& lb_config [this](uint32_t priority, const HostVector&, const HostVector&) { refreshSubsets(priority); purgeEmptySubsets(subsets_); - return absl::OkStatus(); }); } diff --git a/source/extensions/load_balancing_policies/subset/subset_lb.h b/source/extensions/load_balancing_policies/subset/subset_lb.h index 9e79f84f1b4df..aa886c14fb0d9 100644 --- a/source/extensions/load_balancing_policies/subset/subset_lb.h +++ b/source/extensions/load_balancing_policies/subset/subset_lb.h @@ -109,7 +109,7 @@ class SubsetLoadBalancer : public LoadBalancer, Logger::Loggable(host_sets_[priority].get()) ->update(matching_hosts, hosts_added, hosts_removed); - THROW_IF_NOT_OK(runUpdateCallbacks(hosts_added, hosts_removed)); + runUpdateCallbacks(hosts_added, hosts_removed); } // Thread aware LB if applicable. diff --git a/source/extensions/load_balancing_policies/subset/subset_lb_config.h b/source/extensions/load_balancing_policies/subset/subset_lb_config.h index af6aac910386b..45d8054d92427 100644 --- a/source/extensions/load_balancing_policies/subset/subset_lb_config.h +++ b/source/extensions/load_balancing_policies/subset/subset_lb_config.h @@ -217,6 +217,13 @@ class SubsetLoadBalancerConfig : public Upstream::LoadBalancerConfig { TypedLoadBalancerFactory* child_factory, LoadBalancerConfigPtr child_config); + absl::Status validateEndpoints(const PriorityState& priorities) const override { + if (child_lb_config_ != nullptr) { + return child_lb_config_->validateEndpoints(priorities); + } + return absl::OkStatus(); + } + Upstream::ThreadAwareLoadBalancerPtr createLoadBalancer(const Upstream::ClusterInfo& cluster_info, const Upstream::PrioritySet& child_priority_set, Runtime::Loader& runtime, diff --git a/source/server/drain_manager_impl.h b/source/server/drain_manager_impl.h index 1fb01e8146679..143c4ea73220d 100644 --- a/source/server/drain_manager_impl.h +++ b/source/server/drain_manager_impl.h @@ -62,7 +62,7 @@ class DrainManagerImpl : Logger::Loggable, public DrainManager std::map drain_deadlines_ = { {Network::DrainDirection::InboundOnly, MonotonicTime()}, {Network::DrainDirection::All, MonotonicTime()}}; - mutable Common::CallbackManager cbs_{}; + mutable Common::CallbackManager cbs_{}; std::vector> drain_complete_cbs_; // Callbacks called by startDrainSequence to cascade/proxy to children diff --git a/test/common/common/callback_impl_test.cc b/test/common/common/callback_impl_test.cc index bf1410fa495da..995fb2b7506c0 100644 --- a/test/common/common/callback_impl_test.cc +++ b/test/common/common/callback_impl_test.cc @@ -21,7 +21,7 @@ class CallbackManagerTest : public testing::Test { TEST_F(CallbackManagerTest, All) { InSequence s; - CallbackManager manager; + CallbackManager manager; auto handle1 = manager.add([this](int arg) { called(arg); return absl::OkStatus(); @@ -55,7 +55,7 @@ TEST_F(CallbackManagerTest, All) { TEST_F(CallbackManagerTest, DestroyManagerBeforeHandle) { CallbackHandlePtr handle; { - CallbackManager manager; + CallbackManager manager; handle = manager.add([this](int arg) { called(arg); return absl::OkStatus(); diff --git a/test/coverage.yaml b/test/coverage.yaml index cf88685697bcd..af05a8249083e 100644 --- a/test/coverage.yaml +++ b/test/coverage.yaml @@ -60,6 +60,7 @@ directories: source/extensions/internal_redirect/safe_cross_scheme: 81.3 source/extensions/internal_redirect/allow_listed_routes: 85.7 source/extensions/internal_redirect/previous_routes: 89.3 + source/extensions/load_balancing_policies/common: 96.3 source/extensions/load_balancing_policies/maglev: 94.9 source/extensions/load_balancing_policies/round_robin: 96.4 source/extensions/load_balancing_policies/ring_hash: 96.9 diff --git a/test/integration/eds_integration_test.cc b/test/integration/eds_integration_test.cc index ecda792d408f5..e7484ab42aa81 100644 --- a/test/integration/eds_integration_test.cc +++ b/test/integration/eds_integration_test.cc @@ -88,6 +88,7 @@ class EdsIntegrationTest uint32_t healthy_endpoints = 0; uint32_t degraded_endpoints = 0; uint32_t disable_active_hc_endpoints = 0; + absl::optional load_balancing_weight = absl::nullopt; absl::optional weighted_priority_health = absl::nullopt; absl::optional overprovisioning_factor = absl::nullopt; absl::optional drop_overload_numerator = absl::nullopt; @@ -138,6 +139,10 @@ class EdsIntegrationTest ->mutable_health_check_config() ->set_disable_active_health_check(true); } + if (endpoint_setting.load_balancing_weight.has_value()) { + endpoint->mutable_load_balancing_weight()->set_value( + endpoint_setting.load_balancing_weight.value()); + } } if (await_update) { @@ -806,5 +811,44 @@ TEST_P(EdsIntegrationTest, DropOverloadTestForEdsClusterNoDrop) { dropOverloadTe TEST_P(EdsIntegrationTest, DropOverloadTestForEdsClusterAllDrop) { dropOverloadTest(100, "503"); } +TEST_P(EdsIntegrationTest, LoadBalancerRejectsEndpoints) { + autonomous_upstream_ = true; + initializeTest(false /* http_active_hc */, [](envoy::config::cluster::v3::Cluster& cluster) { + // Maglev (and all load balancers that inherit `ThreadAwareLoadBalancerBase`) has a + // constraint that the total endpoint weights must not exceed uint32_max. Use this to generate + // errors instead of writing a test load balancing extension. + cluster.set_lb_policy(::envoy::config::cluster::v3::Cluster::MAGLEV); + }); + EndpointSettingOptions options; + options.total_endpoints = 4; + options.healthy_endpoints = 4; + options.load_balancing_weight = UINT32_MAX - 1; + setEndpoints(options, true, false); + test_server_->waitForCounterGe("cluster.cluster_0.update_rejected", 1); +} + +TEST_P(EdsIntegrationTest, LoadBalancerRejectsEndpointsWithHealthcheck) { + cluster_.mutable_common_lb_config()->set_ignore_new_hosts_until_first_hc(true); + autonomous_upstream_ = true; + initializeTest(true /* http_active_hc */, [](envoy::config::cluster::v3::Cluster& cluster) { + // Disable the healthy panic threshold, which causes the initial update from the EDS update + // to not include any of the hosts so that there isn't an error detected for the weights + // until after a health check passes. + cluster.mutable_common_lb_config()->mutable_healthy_panic_threshold(); + + // Maglev (and all load balancers that inherit `ThreadAwareLoadBalancerBase`) has a + // constraint that the total endpoint weights must not exceed uint32_max. Use this to generate + // errors instead of writing a test load balancing extension. + cluster.set_lb_policy(::envoy::config::cluster::v3::Cluster::MAGLEV); + }); + + EndpointSettingOptions options; + options.total_endpoints = 4; + options.healthy_endpoints = 4; + options.load_balancing_weight = UINT32_MAX - 1; + setEndpoints(options, true, false); + test_server_->waitForCounterGe("cluster.cluster_0.update_rejected", 1); +} + } // namespace } // namespace Envoy diff --git a/test/mocks/config/mocks.h b/test/mocks/config/mocks.h index 717c32117d25a..f4d3e60608de0 100644 --- a/test/mocks/config/mocks.h +++ b/test/mocks/config/mocks.h @@ -199,7 +199,7 @@ class MockContextProvider : public ContextProvider { MOCK_METHOD(Common::CallbackHandlePtr, addDynamicContextUpdateCallback, (UpdateNotificationCb callback), (const)); - Common::CallbackManager update_cb_handler_; + Common::CallbackManager update_cb_handler_; }; template diff --git a/test/mocks/upstream/host_set.h b/test/mocks/upstream/host_set.h index 16ced302a8af0..4e1d72dfe40b0 100644 --- a/test/mocks/upstream/host_set.h +++ b/test/mocks/upstream/host_set.h @@ -17,7 +17,7 @@ class MockHostSet : public HostSet { ~MockHostSet() override; void runCallbacks(const HostVector added, const HostVector removed) { - THROW_IF_NOT_OK(member_update_cb_helper_.runCallbacks(priority(), added, removed)); + member_update_cb_helper_.runCallbacks(priority(), added, removed); } ABSL_MUST_USE_RESULT Common::CallbackHandlePtr @@ -59,7 +59,8 @@ class MockHostSet : public HostSet { HostsPerLocalitySharedPtr degraded_hosts_per_locality_{new HostsPerLocalityImpl()}; HostsPerLocalitySharedPtr excluded_hosts_per_locality_{new HostsPerLocalityImpl()}; LocalityWeightsConstSharedPtr locality_weights_{{}}; - Common::CallbackManager member_update_cb_helper_; + Common::CallbackManager + member_update_cb_helper_; uint32_t priority_{}; uint32_t overprovisioning_factor_{}; bool weighted_priority_health_{false}; diff --git a/test/mocks/upstream/priority_set.cc b/test/mocks/upstream/priority_set.cc index 4da1d13f3bae3..2182a7a3c80c2 100644 --- a/test/mocks/upstream/priority_set.cc +++ b/test/mocks/upstream/priority_set.cc @@ -49,8 +49,8 @@ HostSet& MockPrioritySet::getHostSet(uint32_t priority) { void MockPrioritySet::runUpdateCallbacks(uint32_t priority, const HostVector& hosts_added, const HostVector& hosts_removed) { - THROW_IF_NOT_OK(member_update_cb_helper_.runCallbacks(hosts_added, hosts_removed)); - THROW_IF_NOT_OK(priority_update_cb_helper_.runCallbacks(priority, hosts_added, hosts_removed)); + member_update_cb_helper_.runCallbacks(hosts_added, hosts_removed); + priority_update_cb_helper_.runCallbacks(priority, hosts_added, hosts_removed); } } // namespace Upstream diff --git a/test/mocks/upstream/priority_set.h b/test/mocks/upstream/priority_set.h index a19e208d12580..9b2b3100d524a 100644 --- a/test/mocks/upstream/priority_set.h +++ b/test/mocks/upstream/priority_set.h @@ -38,8 +38,8 @@ class MockPrioritySet : public PrioritySet { std::vector host_sets_; std::vector member_update_cbs_; - Common::CallbackManager member_update_cb_helper_; - Common::CallbackManager + Common::CallbackManager member_update_cb_helper_; + Common::CallbackManager priority_update_cb_helper_; HostMapConstSharedPtr cross_priority_host_map_{std::make_shared()}; From c4d23a1e2c19b13d219524a93092e2e7f6aa5f14 Mon Sep 17 00:00:00 2001 From: phlax Date: Wed, 24 Sep 2025 19:49:29 +0100 Subject: [PATCH 04/10] mobile/coverage: Fix and remove awful hack (#41209) Signed-off-by: Ryan Northey --- .github/workflows/mobile-coverage.yml | 7 ------- mobile/.bazelrc | 9 --------- mobile/third_party/rbe_configs/cc/BUILD | 9 ++++++--- 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/.github/workflows/mobile-coverage.yml b/.github/workflows/mobile-coverage.yml index b174a31073685..4c27a3b3e5ad6 100644 --- a/.github/workflows/mobile-coverage.yml +++ b/.github/workflows/mobile-coverage.yml @@ -60,13 +60,6 @@ jobs: run: | cd mobile tar -czf coverage.tar.gz generated/coverage - # TODO(phlax): This is a highly undesirable workaround - remove once - # https://github.com/bazelbuild/bazel/issues/23247 is resolved/available - steps-pre: | - - name: Inject bazel version - shell: bash - run: | - echo "7.1.2" > .bazelversion target: mobile-coverage timeout-minutes: 120 upload-name: coverage.tar.gz diff --git a/mobile/.bazelrc b/mobile/.bazelrc index 5c5131790e249..833394cf13ed3 100644 --- a/mobile/.bazelrc +++ b/mobile/.bazelrc @@ -203,15 +203,6 @@ test:mobile-remote-ci-linux-tsan --test_env=ENVOY_IP_TEST_VERSIONS=v4only ############################################################################# # Clang environment variables (keep in sync with //third_party/rbe_configs) # Coverage environment variables (keep in sync with //third_party/rbe_configs) -build:mobile-ci-linux-coverage --action_env=GCOV=/opt/llvm/bin/llvm-profdata -build:mobile-ci-linux-coverage --test_env=GCOV=/opt/llvm/bin/llvm-profdata -build:mobile-ci-linux-coverage --repo_env=GCOV=/opt/llvm/bin/llvm-profdata -build:mobile-ci-linux-coverage --action_env=BAZEL_LLVM_COV=/opt/llvm/bin/llvm-cov -build:mobile-ci-linux-coverage --test_env=BAZEL_LLVM_COV=/opt/llvm/bin/llvm-cov -build:mobile-ci-linux-coverage --repo_env=BAZEL_LLVM_COV=/opt/llvm/bin/llvm-cov -build:mobile-ci-linux-coverage --action_env=BAZEL_USE_LLVM_NATIVE_COVERAGE=1 -build:mobile-ci-linux-coverage --test_env=BAZEL_USE_LLVM_NATIVE_COVERAGE=1 -build:mobile-ci-linux-coverage --repo_env=BAZEL_USE_LLVM_NATIVE_COVERAGE=1 build:mobile-ci-linux-coverage --build_tests_only build:mobile-ci-linux-coverage --@envoy//tools/coverage:config=@envoy_mobile//test:coverage_config diff --git a/mobile/third_party/rbe_configs/cc/BUILD b/mobile/third_party/rbe_configs/cc/BUILD index 628350f9715a7..dfabf7dcc2f4e 100644 --- a/mobile/third_party/rbe_configs/cc/BUILD +++ b/mobile/third_party/rbe_configs/cc/BUILD @@ -95,7 +95,7 @@ cc_toolchain_config( "cpp": "/usr/bin/cpp", "gcc": "/opt/llvm/bin/clang", "dwp": "/usr/bin/dwp", - "gcov": "/opt/llvm/bin/llvm-cov", + "gcov": "/opt/llvm/bin/llvm-profdata", "nm": "/usr/bin/nm", "objcopy": "/usr/bin/objcopy", "objdump": "/usr/bin/objdump", @@ -128,8 +128,11 @@ cc_toolchain_config( "-D__DATE__=\"redacted\"", "-D__TIMESTAMP__=\"redacted\"", "-D__TIME__=\"redacted\""], - coverage_compile_flags = ["--coverage"], - coverage_link_flags = ["--coverage"], + coverage_compile_flags = [ + "-fprofile-instr-generate", + "-fcoverage-mapping", + ], + coverage_link_flags = ["-fprofile-instr-generate"], supports_start_end_lib = True, ) From d6dbb49c1435e77d4322bb1f5228fb1bc63ea065 Mon Sep 17 00:00:00 2001 From: "dependency-envoy[bot]" <148525496+dependency-envoy[bot]@users.noreply.github.com> Date: Wed, 24 Sep 2025 19:15:02 +0000 Subject: [PATCH 05/10] deps: Bump `rules_python` -> 1.6.3 (#41205) Fix #41162 Signed-off-by: dependency-envoy[bot] <148525496+dependency-envoy[bot]@users.noreply.github.com> Signed-off-by: Ryan Northey --- bazel/foreign_cc/vpp_vcl.patch | 61 +++++++++++++++++----------------- bazel/repositories.bzl | 1 + bazel/repository_locations.bzl | 6 ++-- 3 files changed, 35 insertions(+), 33 deletions(-) diff --git a/bazel/foreign_cc/vpp_vcl.patch b/bazel/foreign_cc/vpp_vcl.patch index 74085b777cef1..f75138e7180e7 100644 --- a/bazel/foreign_cc/vpp_vcl.patch +++ b/bazel/foreign_cc/vpp_vcl.patch @@ -1,7 +1,7 @@ -diff --git src/CMakeLists.txt src/CMakeLists.txt -index 68d0a4f..9bf7ade 100644 ---- src/CMakeLists.txt -+++ src/CMakeLists.txt +diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt +index 68d0a4fe6..9bf7adede 100644 +--- a/src/CMakeLists.txt ++++ b/src/CMakeLists.txt @@ -50,13 +50,8 @@ include(cmake/ccache.cmake) ############################################################################## # VPP Version @@ -27,10 +27,10 @@ index 68d0a4f..9bf7ade 100644 ) elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin") set(SUBDIRS vppinfra) -diff --git src/cmake/ccache.cmake src/cmake/ccache.cmake -index a7b395b..d6a4c5b 100644 ---- src/cmake/ccache.cmake -+++ src/cmake/ccache.cmake +diff --git a/src/cmake/ccache.cmake b/src/cmake/ccache.cmake +index a7b395bc6..d6a4c5b30 100644 +--- a/src/cmake/ccache.cmake ++++ b/src/cmake/ccache.cmake @@ -14,7 +14,7 @@ ############################################################################## # ccache @@ -40,10 +40,10 @@ index a7b395b..d6a4c5b 100644 if(VPP_USE_CCACHE) find_program(CCACHE_FOUND ccache) message(STATUS "Looking for ccache") -diff --git src/cmake/library.cmake src/cmake/library.cmake -index 45b3944..b1dcc56 100644 ---- src/cmake/library.cmake -+++ src/cmake/library.cmake +diff --git a/src/cmake/library.cmake b/src/cmake/library.cmake +index 45b3944eb..b1dcc56e1 100644 +--- a/src/cmake/library.cmake ++++ b/src/cmake/library.cmake @@ -24,7 +24,7 @@ macro(add_vpp_library lib) set_target_properties(${lo} PROPERTIES POSITION_INDEPENDENT_CODE ON) target_compile_options(${lo} PUBLIC ${VPP_DEFAULT_MARCH_FLAGS}) @@ -53,10 +53,10 @@ index 45b3944..b1dcc56 100644 target_sources(${lib} PRIVATE $) if(VPP_LIB_VERSION) -diff --git src/tools/vppapigen/CMakeLists.txt src/tools/vppapigen/CMakeLists.txt -index 04ebed5..bfabc3a 100644 ---- src/tools/vppapigen/CMakeLists.txt -+++ src/tools/vppapigen/CMakeLists.txt +diff --git a/src/tools/vppapigen/CMakeLists.txt b/src/tools/vppapigen/CMakeLists.txt +index 04ebed548..bfabc3a67 100644 +--- a/src/tools/vppapigen/CMakeLists.txt ++++ b/src/tools/vppapigen/CMakeLists.txt @@ -11,22 +11,6 @@ # See the License for the specific language governing permissions and # limitations under the License. @@ -80,28 +80,29 @@ index 04ebed5..bfabc3a 100644 install( FILES vppapigen.py RENAME vppapigen -diff --git src/tools/vppapigen/vppapigen.py src/tools/vppapigen/vppapigen.py -index 2b0ce99..f28a173 100755 ---- src/tools/vppapigen/vppapigen.py -+++ src/tools/vppapigen/vppapigen.py -@@ -7,6 +7,13 @@ import logging +diff --git a/src/tools/vppapigen/vppapigen.py b/src/tools/vppapigen/vppapigen.py +index 2b0ce9999..f8a7586ea 100755 +--- a/src/tools/vppapigen/vppapigen.py ++++ b/src/tools/vppapigen/vppapigen.py +@@ -7,6 +7,14 @@ import logging import binascii import os from subprocess import Popen, PIPE + ++ +# Put ply on the path ... +plypath = os.path.join( + os.environ["EXT_BUILD_ROOT"], -+ os.path.dirname(os.environ["PLYPATHS"].split()[0])) ++ os.path.dirname(os.path.dirname(os.environ["PLYPATHS"].split()[0]))) +sys.path += [plypath] + import ply.lex as lex import ply.yacc as yacc -diff --git src/vcl/CMakeLists.txt src/vcl/CMakeLists.txt -index 610b422..c5e6f8c 100644 ---- src/vcl/CMakeLists.txt -+++ src/vcl/CMakeLists.txt +diff --git a/src/vcl/CMakeLists.txt b/src/vcl/CMakeLists.txt +index 610b422d1..c5e6f8ca8 100644 +--- a/src/vcl/CMakeLists.txt ++++ b/src/vcl/CMakeLists.txt @@ -35,6 +35,8 @@ if (LDP_HAS_GNU_SOURCE) add_compile_definitions(HAVE_GNU_SOURCE) endif(LDP_HAS_GNU_SOURCE) @@ -111,10 +112,10 @@ index 610b422..c5e6f8c 100644 add_vpp_library(vcl_ldpreload SOURCES ldp_socket_wrapper.c -diff --git src/vppinfra/CMakeLists.txt src/vppinfra/CMakeLists.txt -index f34ceed..51fd2be 100644 ---- src/vppinfra/CMakeLists.txt -+++ src/vppinfra/CMakeLists.txt +diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt +index f34ceed9d..51fd2becf 100644 +--- a/src/vppinfra/CMakeLists.txt ++++ b/src/vppinfra/CMakeLists.txt @@ -233,13 +233,28 @@ option(VPP_USE_EXTERNAL_LIBEXECINFO "Use external libexecinfo (useful for non-gl if(VPP_USE_EXTERNAL_LIBEXECINFO) set(EXECINFO_LIB execinfo) diff --git a/bazel/repositories.bzl b/bazel/repositories.bzl index d7d1b716041be..b142dba79cc02 100644 --- a/bazel/repositories.bzl +++ b/bazel/repositories.bzl @@ -1021,6 +1021,7 @@ def _com_github_fdio_vpp_vcl(): name = "com_github_fdio_vpp_vcl", build_file_content = _build_all_content(exclude = ["**/*doc*/**", "**/examples/**", "**/plugins/**"]), patches = ["@envoy//bazel/foreign_cc:vpp_vcl.patch"], + patch_args = ["-p1"], ) def _rules_ruby(): diff --git a/bazel/repository_locations.bzl b/bazel/repository_locations.bzl index e7808719bbdc1..9d90805be8f57 100644 --- a/bazel/repository_locations.bzl +++ b/bazel/repository_locations.bzl @@ -1095,9 +1095,9 @@ REPOSITORY_LOCATIONS_SPEC = dict( project_name = "Python rules for Bazel", project_desc = "Bazel rules for the Python language", project_url = "https://github.com/bazelbuild/rules_python", - version = "1.4.1", - sha256 = "9f9f3b300a9264e4c77999312ce663be5dee9a56e361a1f6fe7ec60e1beef9a3", - release_date = "2025-05-08", + version = "1.6.3", + sha256 = "2f5c284fbb4e86045c2632d3573fc006facbca5d1fa02976e89dc0cd5488b590", + release_date = "2025-09-21", strip_prefix = "rules_python-{version}", urls = ["https://github.com/bazelbuild/rules_python/archive/{version}.tar.gz"], use_category = ["build", "controlplane", "dataplane_core"], From 572ec5b20fb2f2056f9abbf8dfdc0749d1719d56 Mon Sep 17 00:00:00 2001 From: yanavlasov Date: Wed, 24 Sep 2025 15:24:34 -0400 Subject: [PATCH 06/10] Use mutex reference in lock constructors p3 (#41208) Replace deprecated absl::MutexLock::MutexLock(Mutex*) constructor with absl::MutexLock::MutexLock(Mutex&) Risk Level: none Testing: unit tests Docs Changes: n/a Release Notes: n/a Platform Specific Features: n/a Signed-off-by: Yan Avlasov --- source/common/common/assert.cc | 4 +-- source/common/common/fine_grain_logger.cc | 28 ++++++++++----------- source/common/common/logger.cc | 10 ++++---- source/common/common/thread.cc | 8 +++--- source/common/common/thread_synchronizer.cc | 10 ++++---- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/source/common/common/assert.cc b/source/common/common/assert.cc index 94bda22aa3c3f..d624b1d07cff6 100644 --- a/source/common/common/assert.cc +++ b/source/common/common/assert.cc @@ -45,12 +45,12 @@ class EnvoyBugState { static EnvoyBugState& get() { MUTABLE_CONSTRUCT_ON_FIRST_USE(EnvoyBugState); } void clear() { - absl::MutexLock lock(&mutex_); + absl::MutexLock lock(mutex_); counters_.clear(); } uint64_t inc(absl::string_view bug_name) { - absl::MutexLock lock(&mutex_); + absl::MutexLock lock(mutex_); return ++counters_[bug_name]; } diff --git a/source/common/common/fine_grain_logger.cc b/source/common/common/fine_grain_logger.cc index 734e1328bf78e..7ef40f07ec31e 100644 --- a/source/common/common/fine_grain_logger.cc +++ b/source/common/common/fine_grain_logger.cc @@ -18,9 +18,9 @@ namespace Envoy { class FineGrainLogBasicLockable : public Thread::BasicLockable { public: // BasicLockable - void lock() ABSL_EXCLUSIVE_LOCK_FUNCTION() override { mutex_.Lock(); } - bool tryLock() ABSL_EXCLUSIVE_TRYLOCK_FUNCTION(true) override { return mutex_.TryLock(); } - void unlock() ABSL_UNLOCK_FUNCTION() override { mutex_.Unlock(); } + void lock() ABSL_EXCLUSIVE_LOCK_FUNCTION() override { mutex_.lock(); } + bool tryLock() ABSL_EXCLUSIVE_TRYLOCK_FUNCTION(true) override { return mutex_.try_lock(); } + void unlock() ABSL_UNLOCK_FUNCTION() override { mutex_.unlock(); } private: absl::Mutex mutex_; @@ -28,7 +28,7 @@ class FineGrainLogBasicLockable : public Thread::BasicLockable { SpdLoggerSharedPtr FineGrainLogContext::getFineGrainLogEntry(absl::string_view key) ABSL_LOCKS_EXCLUDED(fine_grain_log_lock_) { - absl::ReaderMutexLock l(&fine_grain_log_lock_); + absl::ReaderMutexLock l(fine_grain_log_lock_); auto it = fine_grain_log_map_->find(key); if (it != fine_grain_log_map_->end()) { return it->second; @@ -37,14 +37,14 @@ SpdLoggerSharedPtr FineGrainLogContext::getFineGrainLogEntry(absl::string_view k } spdlog::level::level_enum FineGrainLogContext::getVerbosityDefaultLevel() const { - absl::ReaderMutexLock l(&fine_grain_log_lock_); + absl::ReaderMutexLock l(fine_grain_log_lock_); return verbosity_default_level_; } void FineGrainLogContext::initFineGrainLogger(const std::string& key, std::atomic& logger) ABSL_LOCKS_EXCLUDED(fine_grain_log_lock_) { - absl::WriterMutexLock l(&fine_grain_log_lock_); + absl::WriterMutexLock l(fine_grain_log_lock_); auto it = fine_grain_log_map_->find(key); spdlog::logger* target; if (it == fine_grain_log_map_->end()) { @@ -57,7 +57,7 @@ void FineGrainLogContext::initFineGrainLogger(const std::string& key, bool FineGrainLogContext::setFineGrainLogger(absl::string_view key, level_enum log_level) ABSL_LOCKS_EXCLUDED(fine_grain_log_lock_) { - absl::ReaderMutexLock l(&fine_grain_log_lock_); + absl::ReaderMutexLock l(fine_grain_log_lock_); auto it = fine_grain_log_map_->find(key); if (it != fine_grain_log_map_->end()) { it->second->set_level(log_level); @@ -70,11 +70,11 @@ void FineGrainLogContext::setDefaultFineGrainLogLevelFormat(spdlog::level::level const std::string& format) ABSL_LOCKS_EXCLUDED(fine_grain_log_lock_) { { - absl::WriterMutexLock wl(&fine_grain_log_lock_); + absl::WriterMutexLock wl(fine_grain_log_lock_); verbosity_default_level_ = level; } - absl::ReaderMutexLock rl(&fine_grain_log_lock_); + absl::ReaderMutexLock rl(fine_grain_log_lock_); for (const auto& [key, logger] : *fine_grain_log_map_) { logger->set_level(getLogLevel(key)); Logger::Utility::setLogFormatForLogger(*logger, format); @@ -82,7 +82,7 @@ void FineGrainLogContext::setDefaultFineGrainLogLevelFormat(spdlog::level::level } std::string FineGrainLogContext::listFineGrainLoggers() ABSL_LOCKS_EXCLUDED(fine_grain_log_lock_) { - absl::ReaderMutexLock l(&fine_grain_log_lock_); + absl::ReaderMutexLock l(fine_grain_log_lock_); std::string info = absl::StrJoin(*fine_grain_log_map_, "\n", [](std::string* out, const auto& log_pair) { auto level_str_view = spdlog::level::to_string_view(log_pair.second->level()); @@ -94,7 +94,7 @@ std::string FineGrainLogContext::listFineGrainLoggers() ABSL_LOCKS_EXCLUDED(fine void FineGrainLogContext::setAllFineGrainLoggers(spdlog::level::level_enum level) ABSL_LOCKS_EXCLUDED(fine_grain_log_lock_) { - absl::ReaderMutexLock l(&fine_grain_log_lock_); + absl::ReaderMutexLock l(fine_grain_log_lock_); verbosity_update_info_.clear(); for (const auto& it : *fine_grain_log_map_) { it.second->set_level(level); @@ -104,7 +104,7 @@ void FineGrainLogContext::setAllFineGrainLoggers(spdlog::level::level_enum level FineGrainLogLevelMap FineGrainLogContext::getAllFineGrainLogLevelsForTest() ABSL_LOCKS_EXCLUDED(fine_grain_log_lock_) { FineGrainLogLevelMap log_levels; - absl::ReaderMutexLock l(&fine_grain_log_lock_); + absl::ReaderMutexLock l(fine_grain_log_lock_); for (const auto& it : *fine_grain_log_map_) { log_levels[it.first] = it.second->level(); } @@ -170,7 +170,7 @@ spdlog::logger* FineGrainLogContext::createLogger(const std::string& key) void FineGrainLogContext::updateVerbosityDefaultLevel(level_enum level) { { - absl::WriterMutexLock wl(&fine_grain_log_lock_); + absl::WriterMutexLock wl(fine_grain_log_lock_); verbosity_default_level_ = level; } @@ -179,7 +179,7 @@ void FineGrainLogContext::updateVerbosityDefaultLevel(level_enum level) { void FineGrainLogContext::updateVerbositySetting( const std::vector>& updates) { - absl::WriterMutexLock ul(&fine_grain_log_lock_); + absl::WriterMutexLock ul(fine_grain_log_lock_); verbosity_update_info_.clear(); for (const auto& [glob, level] : updates) { if (level < kLogLevelMin || level > kLogLevelMax) { diff --git a/source/common/common/logger.cc b/source/common/common/logger.cc index 693f1517dde1c..756ba260a04dc 100644 --- a/source/common/common/logger.cc +++ b/source/common/common/logger.cc @@ -64,12 +64,12 @@ void StderrSinkDelegate::flush() { } void DelegatingLogSink::set_formatter(std::unique_ptr formatter) { - absl::MutexLock lock(&format_mutex_); + absl::MutexLock lock(format_mutex_); formatter_ = std::move(formatter); } void DelegatingLogSink::log(const spdlog::details::log_msg& msg) { - absl::ReleasableMutexLock lock(&format_mutex_); + absl::ReleasableMutexLock lock(format_mutex_); absl::string_view msg_view = absl::string_view(msg.payload.data(), msg.payload.size()); // This memory buffer must exist in the scope of the entire function, @@ -95,7 +95,7 @@ void DelegatingLogSink::log(const spdlog::details::log_msg& msg) { // protection is really only needed in tests. It would be nice to figure out a test-only // mechanism for this that does not require extra locking that we don't explicitly need in the // prod code. - absl::ReaderMutexLock sink_lock(&sink_mutex_); + absl::ReaderMutexLock sink_lock(sink_mutex_); log_to_sink(*sink_); } @@ -120,13 +120,13 @@ DelegatingLogSinkSharedPtr DelegatingLogSink::init() { } void DelegatingLogSink::flush() { - absl::ReaderMutexLock lock(&sink_mutex_); + absl::ReaderMutexLock lock(sink_mutex_); sink_->flush(); } void DelegatingLogSink::logWithStableName(absl::string_view stable_name, absl::string_view level, absl::string_view component, absl::string_view message) { - absl::ReaderMutexLock sink_lock(&sink_mutex_); + absl::ReaderMutexLock sink_lock(sink_mutex_); sink_->logWithStableName(stable_name, level, component, message); } diff --git a/source/common/common/thread.cc b/source/common/common/thread.cc index cf661693acdf0..a22c1754439f8 100644 --- a/source/common/common/thread.cc +++ b/source/common/common/thread.cc @@ -21,13 +21,13 @@ namespace { // tests more hermetic. struct ThreadIds { bool inMainThread() const { - absl::MutexLock lock(&mutex_); + absl::MutexLock lock(mutex_); return main_threads_to_usage_count_.find(std::this_thread::get_id()) != main_threads_to_usage_count_.end(); } bool isMainThreadActive() const { - absl::MutexLock lock(&mutex_); + absl::MutexLock lock(mutex_); return !main_threads_to_usage_count_.empty(); } @@ -36,7 +36,7 @@ struct ThreadIds { // Call this from the context of MainThread when it exits. void releaseMainThread() { - absl::MutexLock lock(&mutex_); + absl::MutexLock lock(mutex_); auto it = main_threads_to_usage_count_.find(std::this_thread::get_id()); if (!skipAsserts()) { ASSERT(it != main_threads_to_usage_count_.end()); @@ -52,7 +52,7 @@ struct ThreadIds { // Declares current thread as the main one, or verifies that the current // thread matches any previous declarations. void registerMainThread() { - absl::MutexLock lock(&mutex_); + absl::MutexLock lock(mutex_); auto it = main_threads_to_usage_count_.find(std::this_thread::get_id()); if (it == main_threads_to_usage_count_.end()) { it = main_threads_to_usage_count_.insert({std::this_thread::get_id(), 0}).first; diff --git a/source/common/common/thread_synchronizer.cc b/source/common/common/thread_synchronizer.cc index 492509ab1fb1f..7e230e6a5d65a 100644 --- a/source/common/common/thread_synchronizer.cc +++ b/source/common/common/thread_synchronizer.cc @@ -10,7 +10,7 @@ void ThreadSynchronizer::enable() { ThreadSynchronizer::SynchronizerEntry& ThreadSynchronizer::getOrCreateEntry(absl::string_view event_name) { - absl::MutexLock lock(&data_->mutex_); + absl::MutexLock lock(data_->mutex_); auto& existing_entry = data_->entries_[event_name]; if (existing_entry == nullptr) { ENVOY_LOG(debug, "thread synchronizer: creating entry: {}", event_name); @@ -21,7 +21,7 @@ ThreadSynchronizer::getOrCreateEntry(absl::string_view event_name) { void ThreadSynchronizer::waitOnWorker(absl::string_view event_name) { SynchronizerEntry& entry = getOrCreateEntry(event_name); - absl::MutexLock lock(&entry.mutex_); + absl::MutexLock lock(entry.mutex_); ENVOY_LOG(debug, "thread synchronizer: waiting on next {}", event_name); ASSERT(!entry.wait_on_); entry.wait_on_ = true; @@ -29,7 +29,7 @@ void ThreadSynchronizer::waitOnWorker(absl::string_view event_name) { void ThreadSynchronizer::syncPointWorker(absl::string_view event_name) { SynchronizerEntry& entry = getOrCreateEntry(event_name); - absl::MutexLock lock(&entry.mutex_); + absl::MutexLock lock(entry.mutex_); // See if we are ignoring waits. If so, just return. if (!entry.wait_on_) { @@ -62,7 +62,7 @@ void ThreadSynchronizer::syncPointWorker(absl::string_view event_name) { void ThreadSynchronizer::barrierOnWorker(absl::string_view event_name) { SynchronizerEntry& entry = getOrCreateEntry(event_name); - absl::MutexLock lock(&entry.mutex_); + absl::MutexLock lock(entry.mutex_); ENVOY_LOG(debug, "thread synchronizer: barrier on {}", event_name); while (!entry.mutex_.AwaitWithTimeout(absl::Condition(&entry.at_barrier_), absl::Seconds(10))) { ENVOY_LOG(warn, "thread synchronizer: barrier on {} stuck for 10 seconds", event_name); @@ -72,7 +72,7 @@ void ThreadSynchronizer::barrierOnWorker(absl::string_view event_name) { void ThreadSynchronizer::signalWorker(absl::string_view event_name) { SynchronizerEntry& entry = getOrCreateEntry(event_name); - absl::MutexLock lock(&entry.mutex_); + absl::MutexLock lock(entry.mutex_); ASSERT(!entry.signaled_); ENVOY_LOG(debug, "thread synchronizer: signaling {}", event_name); entry.signaled_ = true; From d2237957032e11f7466ad2307167cdbe5d4826e8 Mon Sep 17 00:00:00 2001 From: Luca Schimweg Date: Wed, 24 Sep 2025 22:41:47 +0200 Subject: [PATCH 07/10] tcp_proxy: Add max_downstream_connection_duration_jitter_percentage (#40999) **Commit Message**: tcp_proxy: Add max_downstream_connection_duration_jitter_percentage **Additional Description**: Adds proto for a new field that can be used to add a jitter to the max_downstream_connection_duration. This allows to prevent thundering herd problems. **Risk Level**: Low (adds new feature not enabled by default) **Testing**: Unit tests & Integration tests added, Functionality manually verified locally **Docs Changes**: Added API docs **Release Notes**: Added **Platform Specific Features**: n/a Fixes #40686 **Optional [API Considerations](https://github.com/envoyproxy/envoy/blob/main/api/review_checklist.md)**: Considered --------- Signed-off-by: lucaschimweg Signed-off-by: Luca Schimweg Co-authored-by: Adi (Suissa) Peleg --- .../network/tcp_proxy/v3/tcp_proxy.proto | 9 +- changelogs/current.yaml | 4 + source/common/tcp_proxy/tcp_proxy.cc | 35 ++++++- source/common/tcp_proxy/tcp_proxy.h | 9 ++ test/common/tcp_proxy/config_test.cc | 94 +++++++++++++++++++ test/common/tcp_proxy/tcp_proxy_test.cc | 27 ++++++ .../integration/tcp_proxy_integration_test.cc | 25 +++++ 7 files changed, 200 insertions(+), 3 deletions(-) diff --git a/api/envoy/extensions/filters/network/tcp_proxy/v3/tcp_proxy.proto b/api/envoy/extensions/filters/network/tcp_proxy/v3/tcp_proxy.proto index 872c736f5a019..95ecdb0313a22 100644 --- a/api/envoy/extensions/filters/network/tcp_proxy/v3/tcp_proxy.proto +++ b/api/envoy/extensions/filters/network/tcp_proxy/v3/tcp_proxy.proto @@ -9,6 +9,7 @@ import "envoy/config/core/v3/config_source.proto"; import "envoy/config/core/v3/proxy_protocol.proto"; import "envoy/extensions/filters/network/http_connection_manager/v3/http_connection_manager.proto"; import "envoy/type/v3/hash_policy.proto"; +import "envoy/type/v3/percent.proto"; import "google/protobuf/duration.proto"; import "google/protobuf/wrappers.proto"; @@ -28,7 +29,7 @@ option (udpa.annotations.file_status).package_version_status = ACTIVE; // TCP Proxy :ref:`configuration overview `. // [#extension: envoy.filters.network.tcp_proxy] -// [#next-free-field: 20] +// [#next-free-field: 21] message TcpProxy { option (udpa.annotations.versioning).previous_message_type = "envoy.config.filter.network.tcp_proxy.v2.TcpProxy"; @@ -247,6 +248,12 @@ message TcpProxy { google.protobuf.Duration max_downstream_connection_duration = 13 [(validate.rules).duration = {gte {nanos: 1000000}}]; + // Percentage-based jitter for max_downstream_connection_duration. The jitter will increase + // the max_downstream_connection_duration by some random duration up to the provided percentage. + // This field is ignored if max_downstream_connection_duration is not set. + // If not set, no jitter will be added. + type.v3.Percent max_downstream_connection_duration_jitter_percentage = 20; + // Note that if both this field and :ref:`access_log_flush_interval // ` // are specified, the former (deprecated field) is ignored. diff --git a/changelogs/current.yaml b/changelogs/current.yaml index d89ec4b7c0169..01606a2cad772 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -499,5 +499,9 @@ new_features: ` configuration field to the c-ares DNS resolver. This allows periodic refresh of the UDP channel to help avoid stale socket states and provide better load distribution across UDP ports. +- area: tcp_proxy + change: | + Added ``max_downstream_connection_duration_jitter_percentage`` to allow adding a jitter to the max downstream connection duration. + This can be used to avoid thundering herd problems with many clients being disconnected and possibly reconnecting at the same time. deprecated: diff --git a/source/common/tcp_proxy/tcp_proxy.cc b/source/common/tcp_proxy/tcp_proxy.cc index 42e574e5c4d05..984e22ba9f945 100644 --- a/source/common/tcp_proxy/tcp_proxy.cc +++ b/source/common/tcp_proxy/tcp_proxy.cc @@ -134,6 +134,10 @@ Config::SharedConfig::SharedConfig( DurationUtil::durationToMilliseconds(config.max_downstream_connection_duration()); max_downstream_connection_duration_ = std::chrono::milliseconds(connection_duration); } + if (config.has_max_downstream_connection_duration_jitter_percentage()) { + max_downstream_connection_duration_jitter_percentage_ = + config.max_downstream_connection_duration_jitter_percentage().value(); + } if (config.has_access_log_options()) { if (config.flush_access_log_on_connected() /* deprecated */) { @@ -262,6 +266,31 @@ RouteConstSharedPtr Config::getRouteFromEntries(Network::Connection& connection) random_generator_.random(), false); } +const absl::optional +Config::calculateMaxDownstreamConnectionDurationWithJitter() { + const auto& max_downstream_connection_duration = maxDownstreamConnectionDuration(); + if (!max_downstream_connection_duration) { + return max_downstream_connection_duration; + } + + const auto& jitter_percentage = maxDownstreamConnectionDurationJitterPercentage(); + if (!jitter_percentage) { + return max_downstream_connection_duration; + } + + // Apply jitter: base_duration * (1 + random(0, jitter_factor)); + const uint64_t max_jitter_ms = std::ceil(max_downstream_connection_duration.value().count() * + (jitter_percentage.value() / 100.0)); + + if (max_jitter_ms == 0) { + return max_downstream_connection_duration; + } + + const uint64_t jitter_ms = randomGenerator().random() % max_jitter_ms; + return std::chrono::milliseconds( + static_cast(max_downstream_connection_duration.value().count() + jitter_ms)); +} + UpstreamDrainManager& Config::drainManager() { return upstream_drain_manager_slot_->getTyped(); } @@ -875,10 +904,12 @@ Network::FilterStatus Filter::onData(Buffer::Instance& data, bool end_stream) { } Network::FilterStatus Filter::onNewConnection() { - if (config_->maxDownstreamConnectionDuration()) { + const auto& max_downstream_connection_duration = + config_->calculateMaxDownstreamConnectionDurationWithJitter(); + if (max_downstream_connection_duration) { connection_duration_timer_ = read_callbacks_->connection().dispatcher().createTimer( [this]() -> void { onMaxDownstreamConnectionDuration(); }); - connection_duration_timer_->enableTimer(config_->maxDownstreamConnectionDuration().value()); + connection_duration_timer_->enableTimer(max_downstream_connection_duration.value()); } if (config_->accessLogFlushInterval().has_value()) { diff --git a/source/common/tcp_proxy/tcp_proxy.h b/source/common/tcp_proxy/tcp_proxy.h index cfbe7c8a457ee..3d75abc07b243 100644 --- a/source/common/tcp_proxy/tcp_proxy.h +++ b/source/common/tcp_proxy/tcp_proxy.h @@ -242,6 +242,9 @@ class Config { const absl::optional& maxDownstreamConnectionDuration() const { return max_downstream_connection_duration_; } + const absl::optional& maxDownstreamConnectionDurationJitterPercentage() const { + return max_downstream_connection_duration_jitter_percentage_; + } const absl::optional& accessLogFlushInterval() const { return access_log_flush_interval_; } @@ -270,6 +273,7 @@ class Config { bool flush_access_log_on_connected_; absl::optional idle_timeout_; absl::optional max_downstream_connection_duration_; + absl::optional max_downstream_connection_duration_jitter_percentage_; absl::optional access_log_flush_interval_; std::unique_ptr tunneling_config_helper_; std::unique_ptr on_demand_config_; @@ -302,6 +306,11 @@ class Config { const absl::optional& maxDownstreamConnectionDuration() const { return shared_config_->maxDownstreamConnectionDuration(); } + const absl::optional& maxDownstreamConnectionDurationJitterPercentage() const { + return shared_config_->maxDownstreamConnectionDurationJitterPercentage(); + } + const absl::optional + calculateMaxDownstreamConnectionDurationWithJitter(); const absl::optional& accessLogFlushInterval() const { return shared_config_->accessLogFlushInterval(); } diff --git a/test/common/tcp_proxy/config_test.cc b/test/common/tcp_proxy/config_test.cc index 1928cf8ad3b57..5c2007b0b11e9 100644 --- a/test/common/tcp_proxy/config_test.cc +++ b/test/common/tcp_proxy/config_test.cc @@ -1,3 +1,5 @@ +#include + #include "envoy/common/hashable.h" #include "test/common/tcp_proxy/tcp_proxy_test_base.h" @@ -223,6 +225,98 @@ max_downstream_connection_duration: 10s EXPECT_EQ(std::chrono::seconds(10), config_obj.maxDownstreamConnectionDuration().value()); } +TEST(ConfigTest, MaxDownstreamConnectionDurationJitterPercentage) { + const std::string yaml = R"EOF( +stat_prefix: name +cluster: foo +max_downstream_connection_duration: 10s +max_downstream_connection_duration_jitter_percentage: + value: 50.0 +)EOF"; + + NiceMock factory_context; + Config config_obj(constructConfigFromYaml(yaml, factory_context)); + EXPECT_EQ(std::chrono::seconds(10), config_obj.maxDownstreamConnectionDuration().value()); + EXPECT_EQ(50.0, config_obj.maxDownstreamConnectionDurationJitterPercentage().value()); +} + +TEST(ConfigTest, CalculateActualMaxDownstreamConnectionDuration) { + struct TestCase { + std::string name; + Protobuf::Duration* max_downstream_connection_duration; + envoy::type::v3::Percent* max_downstream_connection_duration_jitter_percentage; + uint64_t random_value; + absl::optional expected_actual_max_downstream_connection_duration; + }; + + const auto seconds = [](uint64_t seconds) { + auto* d = new Protobuf::Duration(); + d->set_seconds(seconds); + return d; + }; + + const auto percent = [](double value) { + auto* p = new envoy::type::v3::Percent(); + p->set_value(value); + return p; + }; + + std::vector test_cases = { + {/* name */ "0% random value", + /* max_downstream_connection_duration */ seconds(10), + /* max_downstream_connection_duration_jitter_percentage */ percent(50.0), + /* random_value */ 0, + /* expected_actual_max_downstream_connection_duration */ std::chrono::milliseconds(10000)}, + {/* name */ "50% random value", + /* max_downstream_connection_duration */ seconds(10), + /* max_downstream_connection_duration_jitter_percentage */ percent(50.0), + /* random_value */ 2500, + /* expected_actual_max_downstream_connection_duration */ std::chrono::milliseconds(12500)}, + {/* name */ "99.99% random value", + /* max_downstream_connection_duration */ seconds(10), + /* max_downstream_connection_duration_jitter_percentage */ percent(50.0), + /* random_value */ 9999, + /* expected_actual_max_downstream_connection_duration */ std::chrono::milliseconds(14999)}, + {/* name */ "0% jitter", + /* max_downstream_connection_duration */ seconds(10), + /* max_downstream_connection_duration_jitter_percentage */ percent(0), + /* random_value */ 5000, + /* expected_actual_max_downstream_connection_duration */ std::chrono::milliseconds(10000)}, + {/* name */ "100% jitter", + /* max_downstream_connection_duration */ seconds(10), + /* max_downstream_connection_duration_jitter_percentage */ percent(100), + /* random_value */ 5000, + /* expected_actual_max_downstream_connection_duration */ std::chrono::milliseconds(15000)}, + {/* name */ "no jitter", + /* max_downstream_connection_duration */ seconds(10), + /* max_downstream_connection_duration_jitter_percentage */ nullptr, + /* random_value */ 5000, + /* expected_actual_max_downstream_connection_duration */ std::chrono::milliseconds(10000)}, + {/* name */ "no max duration", + /* max_downstream_connection_duration */ nullptr, + /* max_downstream_connection_duration_jitter_percentage */ percent(50), + /* random_value */ 5000, + /* expected_actual_max_downstream_connection_duration */ absl::nullopt}, + }; + + for (const auto& test_case : test_cases) { + SCOPED_TRACE(test_case.name); + NiceMock factory_context; + ON_CALL(factory_context.server_factory_context_.api_.random_, random()) + .WillByDefault(Return(test_case.random_value)); + + envoy::extensions::filters::network::tcp_proxy::v3::TcpProxy proto_config; + proto_config.set_allocated_max_downstream_connection_duration( + test_case.max_downstream_connection_duration); + proto_config.set_allocated_max_downstream_connection_duration_jitter_percentage( + test_case.max_downstream_connection_duration_jitter_percentage); + Config config_obj(proto_config, factory_context); + + EXPECT_EQ(test_case.expected_actual_max_downstream_connection_duration, + config_obj.calculateMaxDownstreamConnectionDurationWithJitter()); + } +} + TEST(ConfigTest, NoRouteConfig) { const std::string yaml = R"EOF( stat_prefix: name diff --git a/test/common/tcp_proxy/tcp_proxy_test.cc b/test/common/tcp_proxy/tcp_proxy_test.cc index 81247dd5a1ad9..4931ffc2538c3 100644 --- a/test/common/tcp_proxy/tcp_proxy_test.cc +++ b/test/common/tcp_proxy/tcp_proxy_test.cc @@ -2145,6 +2145,33 @@ TEST_P(TcpProxyTest, UpstreamStartSecureTransport) { filter_->startUpstreamSecureTransport(); } +// Verify the filter uses the value returned by +// Config::calculateMaxDownstreamConnectionDurationWithJitter. +TEST_P(TcpProxyTest, MaxDownstreamConnectionDurationWithJitterPercentage) { + envoy::extensions::filters::network::tcp_proxy::v3::TcpProxy config = defaultConfig(); + config.mutable_max_downstream_connection_duration()->set_seconds(10); + config.mutable_max_downstream_connection_duration_jitter_percentage()->set_value(50.0); + + // Idle timeout also uses createTimer, clear it to avoid mixing up timers in tests. + config.mutable_idle_timeout()->clear_seconds(); + + EXPECT_CALL(factory_context_.server_factory_context_.api_.random_, random()) + .WillRepeatedly(Return(2500)); + + setup(1, config); + + // Calculation of expected value is verified in config test. Here we just verify that the filter + // uses the value returned by the config. + const auto expected = config_->calculateMaxDownstreamConnectionDurationWithJitter(); + ASSERT_TRUE(expected.has_value()); + + Event::MockTimer* connection_duration_timer = + new Event::MockTimer(&filter_callbacks_.connection_.dispatcher_); + + EXPECT_CALL(*connection_duration_timer, enableTimer(expected.value(), _)); + EXPECT_EQ(Network::FilterStatus::StopIteration, filter_->onNewConnection()); +} + // Test that the proxy protocol TLV is set. TEST_P(TcpProxyTest, SetTLV) { envoy::extensions::filters::network::tcp_proxy::v3::TcpProxy config = defaultConfig(); diff --git a/test/integration/tcp_proxy_integration_test.cc b/test/integration/tcp_proxy_integration_test.cc index bc8de62ae98ee..e07995aca3e0a 100644 --- a/test/integration/tcp_proxy_integration_test.cc +++ b/test/integration/tcp_proxy_integration_test.cc @@ -875,6 +875,31 @@ TEST_P(TcpProxyIntegrationTest, TestMaxDownstreamConnectionDurationWithLargeOuts ASSERT_TRUE(fake_upstream_connection->waitForDisconnect()); } +TEST_P(TcpProxyIntegrationTest, TestMaxDownstreamConnectionDurationWithJitter) { + autonomous_upstream_ = true; + + enableHalfClose(false); + config_helper_.addConfigModifier([&](envoy::config::bootstrap::v3::Bootstrap& bootstrap) -> void { + auto* listener = bootstrap.mutable_static_resources()->mutable_listeners(0); + auto* filter_chain = listener->mutable_filter_chains(0); + auto* config_blob = filter_chain->mutable_filters(0)->mutable_typed_config(); + + ASSERT_TRUE(config_blob->Is()); + auto tcp_proxy_config = + MessageUtil::anyConvert( + *config_blob); + tcp_proxy_config.mutable_max_downstream_connection_duration()->set_nanos( + std::chrono::duration_cast(std::chrono::milliseconds(100)) + .count()); + tcp_proxy_config.mutable_max_downstream_connection_duration_jitter_percentage()->set_value(25); + config_blob->PackFrom(tcp_proxy_config); + }); + + initialize(); + IntegrationTcpClientPtr tcp_client = makeTcpConnection(lookupPort("tcp_proxy")); + tcp_client->waitForDisconnect(); +} + TEST_P(TcpProxyIntegrationTest, TestNoCloseOnHealthFailure) { concurrency_ = 2; From 6fc87f30bcb02515c313752761a8a32866758e38 Mon Sep 17 00:00:00 2001 From: "Adi (Suissa) Peleg" Date: Wed, 24 Sep 2025 17:16:43 -0400 Subject: [PATCH 08/10] list-matcher: replace push_back with emplace_back (#41210) Signed-off-by: Adi Suissa-Peleg --- source/common/matcher/list_matcher.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/common/matcher/list_matcher.h b/source/common/matcher/list_matcher.h index 38ea5e2d1201f..685481fd17578 100644 --- a/source/common/matcher/list_matcher.h +++ b/source/common/matcher/list_matcher.h @@ -43,7 +43,7 @@ template class ListMatcher : public MatchTree { } void addMatcher(FieldMatcherPtr&& matcher, OnMatch action) { - matchers_.push_back({std::move(matcher), std::move(action)}); + matchers_.emplace_back(std::move(matcher), std::move(action)); } private: From 6ffb4f7d6fc1b1c399ed862eba38d47321bb5814 Mon Sep 17 00:00:00 2001 From: phlax Date: Thu, 25 Sep 2025 15:08:17 +0100 Subject: [PATCH 09/10] github/ci: Disable scheduled CVE checker (#41221) temporarily pending resolution to #41168 Signed-off-by: Ryan Northey --- .github/workflows/envoy-dependency.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/envoy-dependency.yml b/.github/workflows/envoy-dependency.yml index cfdfe729416a5..646151a2095ab 100644 --- a/.github/workflows/envoy-dependency.yml +++ b/.github/workflows/envoy-dependency.yml @@ -243,7 +243,7 @@ jobs: run: | TODAY_DATE=$(date -u -I"date") export TODAY_DATE - bazel run --config=ci //tools/dependency:check --action_env=TODAY_DATE -- -c release_issues --fix - bazel run --config=ci //tools/dependency:check --action_env=TODAY_DATE -- -c cves -w error + bazel run --config=ci //tools/dependency:check -- -c release_issues --fix + # bazel run --config=ci //tools/dependency:check --action_env=TODAY_DATE -- -c cves -w error env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 088351e28b33bfe63cee0b1cd24374929b0c637b Mon Sep 17 00:00:00 2001 From: Rohit Agrawal Date: Wed, 24 Sep 2025 00:48:58 -0700 Subject: [PATCH 10/10] reverse_tunnel: add RPING echos in the downstream socket extension at I/O Signed-off-by: Rohit Agrawal --- ..._reverse_connection_socket_interface.proto | 7 + .../common/reverse_connection_utility.cc | 10 +- ...downstream_reverse_connection_io_handle.cc | 91 ++- .../downstream_reverse_connection_io_handle.h | 9 +- .../reverse_tunnel_acceptor_extension.cc | 17 +- .../reverse_tunnel_acceptor_extension.h | 12 +- .../upstream_socket_manager.cc | 153 ++--- .../upstream_socket_manager.h | 19 + .../downstream_socket_interface/BUILD | 18 + ...tream_reverse_connection_io_handle_test.cc | 548 ++++++++++++++++++ .../reverse_connection_io_handle_test.cc | 141 ----- .../upstream_socket_interface/BUILD | 1 + .../reverse_tunnel_acceptor_extension_test.cc | 102 ++++ .../upstream_socket_manager_test.cc | 5 + 14 files changed, 904 insertions(+), 229 deletions(-) create mode 100644 test/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/downstream_reverse_connection_io_handle_test.cc diff --git a/api/envoy/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/v3/upstream_reverse_connection_socket_interface.proto b/api/envoy/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/v3/upstream_reverse_connection_socket_interface.proto index 92bbdbb84ab57..2f6465e0f0cb1 100644 --- a/api/envoy/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/v3/upstream_reverse_connection_socket_interface.proto +++ b/api/envoy/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/v3/upstream_reverse_connection_socket_interface.proto @@ -2,7 +2,10 @@ syntax = "proto3"; package envoy.extensions.bootstrap.reverse_tunnel.upstream_socket_interface.v3; +import "google/protobuf/wrappers.proto"; + import "udpa/annotations/status.proto"; +import "validate/validate.proto"; option java_package = "io.envoyproxy.envoy.extensions.bootstrap.reverse_tunnel.upstream_socket_interface.v3"; option java_outer_classname = "UpstreamReverseConnectionSocketInterfaceProto"; @@ -17,4 +20,8 @@ option (udpa.annotations.file_status).package_version_status = ACTIVE; message UpstreamReverseConnectionSocketInterface { // Stat prefix to be used for upstream reverse connection socket interface stats. string stat_prefix = 1; + + // Number of consecutive ping failures before an idle reverse connection socket is marked dead. + // Defaults to 3 if unset. Must be at least 1. + google.protobuf.UInt32Value ping_failure_threshold = 2 [(validate.rules).uint32 = {gte: 1}]; } diff --git a/source/extensions/bootstrap/reverse_tunnel/common/reverse_connection_utility.cc b/source/extensions/bootstrap/reverse_tunnel/common/reverse_connection_utility.cc index 2f186c0e1eb20..4167e831ef5f5 100644 --- a/source/extensions/bootstrap/reverse_tunnel/common/reverse_connection_utility.cc +++ b/source/extensions/bootstrap/reverse_tunnel/common/reverse_connection_utility.cc @@ -24,7 +24,7 @@ Buffer::InstancePtr ReverseConnectionUtility::createPingResponse() { bool ReverseConnectionUtility::sendPingResponse(Network::Connection& connection) { auto ping_buffer = createPingResponse(); connection.write(*ping_buffer, false); - ENVOY_LOG(debug, "Reverse connection utility: sent RPING response on connection {}", + ENVOY_LOG(trace, "Reverse connection utility: sent RPING response on connection {}.", connection.id()); return true; } @@ -33,10 +33,10 @@ Api::IoCallUint64Result ReverseConnectionUtility::sendPingResponse(Network::IoHa auto ping_buffer = createPingResponse(); Api::IoCallUint64Result result = io_handle.write(*ping_buffer); if (result.ok()) { - ENVOY_LOG(trace, "Reverse connection utility: sent RPING response, bytes: {}", + ENVOY_LOG(trace, "Reverse connection utility: sent RPING response. bytes: {}.", result.return_value_); } else { - ENVOY_LOG(trace, "Reverse connection utility: failed to send RPING response, error: {}", + ENVOY_LOG(trace, "Reverse connection utility: failed to send RPING response. error: {}.", result.err_->getErrorDetails()); } return result; @@ -47,14 +47,14 @@ bool ReverseConnectionUtility::handlePingMessage(absl::string_view data, if (!isPingMessage(data)) { return false; } - ENVOY_LOG(debug, "Reverse connection utility: received RPING on connection {}, echoing back", + ENVOY_LOG(trace, "Reverse connection utility: received RPING on connection {}; echoing back.", connection.id()); return sendPingResponse(connection); } bool ReverseConnectionUtility::extractPingFromHttpData(absl::string_view http_data) { if (http_data.find(PING_MESSAGE) != absl::string_view::npos) { - ENVOY_LOG(debug, "Reverse connection utility: found RPING in HTTP data"); + ENVOY_LOG(trace, "Reverse connection utility: found RPING in HTTP data."); return true; } return false; diff --git a/source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/downstream_reverse_connection_io_handle.cc b/source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/downstream_reverse_connection_io_handle.cc index f778bae8789ee..b2dfad430a8b3 100644 --- a/source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/downstream_reverse_connection_io_handle.cc +++ b/source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/downstream_reverse_connection_io_handle.cc @@ -1,8 +1,11 @@ #include "source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/downstream_reverse_connection_io_handle.h" #include "source/common/common/logger.h" +#include "source/extensions/bootstrap/reverse_tunnel/common/reverse_connection_utility.h" #include "source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/reverse_connection_io_handle.h" +#include "absl/strings/string_view.h" + namespace Envoy { namespace Extensions { namespace Bootstrap { @@ -28,6 +31,81 @@ DownstreamReverseConnectionIOHandle::~DownstreamReverseConnectionIOHandle() { fd_, connection_key_); } +Api::IoCallUint64Result +DownstreamReverseConnectionIOHandle::read(Buffer::Instance& buffer, + absl::optional max_length) { + // Perform the actual read first. + Api::IoCallUint64Result result = IoSocketHandleImpl::read(buffer, max_length); + ENVOY_LOG(trace, "DownstreamReverseConnectionIOHandle: read result: {}", result.return_value_); + + // If RPING keepalives are still active, check whether the incoming data is a RPING message. + if (ping_echo_active_ && result.err_ == nullptr && result.return_value_ > 0) { + const uint64_t expected = + ::Envoy::Extensions::Bootstrap::ReverseConnection::ReverseConnectionUtility::PING_MESSAGE + .size(); + + // Compare up to the expected size using a zero-copy view. + const uint64_t len = std::min(buffer.length(), expected); + const char* data = static_cast(buffer.linearize(len)); + absl::string_view peek_sv{data, static_cast(len)}; + + // Check if we have a complete RPING message. + if (len == expected && + ::Envoy::Extensions::Bootstrap::ReverseConnection::ReverseConnectionUtility::isPingMessage( + peek_sv)) { + // Found a complete RPING. Echo and drain it from the buffer. + buffer.drain(expected); + auto echo_rc = ::Envoy::Extensions::Bootstrap::ReverseConnection::ReverseConnectionUtility:: + sendPingResponse(*this); + if (!echo_rc.ok()) { + ENVOY_LOG(trace, "DownstreamReverseConnectionIOHandle: failed to send RPING echo on FD: {}", + fd_); + } else { + ENVOY_LOG(trace, "DownstreamReverseConnectionIOHandle: echoed RPING on FD: {}", fd_); + } + + // If buffer only contained RPING, return showing we processed it. + if (buffer.length() == 0) { + return Api::IoCallUint64Result{expected, Api::IoError::none()}; + } + + // RPING followed by application data. Disable echo and return the remaining data. + ENVOY_LOG(trace, + "DownstreamReverseConnectionIOHandle: received application data after RPING, " + "disabling RPING echo for FD: {}", + fd_); + ping_echo_active_ = false; + // The adjusted return value is the number of bytes excluding the drained RPING. It should be + // transparent to upper layers that the RPING was processed. + const uint64_t adjusted = + (result.return_value_ >= expected) ? (result.return_value_ - expected) : 0; + return Api::IoCallUint64Result{adjusted, Api::IoError::none()}; + } + + // If partial data could be the start of RPING (only when fewer than expected bytes). + if (len < expected) { + const absl::string_view rping_prefix = + ReverseConnectionUtility::PING_MESSAGE.substr(0, static_cast(len)); + if (peek_sv == rping_prefix) { + ENVOY_LOG(trace, + "DownstreamReverseConnectionIOHandle: partial RPING received ({} bytes), waiting " + "for more.", + len); + return result; // Wait for more data. + } + } + + // Data is not RPING (complete or partial). Disable echo permanently. + ENVOY_LOG(trace, + "DownstreamReverseConnectionIOHandle: received application data ({} bytes), " + "disabling RPING echo for FD: {}", + len, fd_); + ping_echo_active_ = false; + } + + return result; +} + // DownstreamReverseConnectionIOHandle close() implementation. Api::IoCallUint64Result DownstreamReverseConnectionIOHandle::close() { ENVOY_LOG( @@ -45,7 +123,7 @@ Api::IoCallUint64Result DownstreamReverseConnectionIOHandle::close() { return Api::ioCallUint64ResultNoError(); } - // Prevent double-closing by checking if already closed + // Prevent double-closing by checking if already closed. if (fd_ < 0) { ENVOY_LOG(debug, "DownstreamReverseConnectionIOHandle: handle already closed for connection key: {}", @@ -53,8 +131,8 @@ Api::IoCallUint64Result DownstreamReverseConnectionIOHandle::close() { return Api::ioCallUint64ResultNoError(); } - // Notify parent that this downstream connection has been closed - // This will trigger re-initiation of the reverse connection if needed. + // Notify the parent that this downstream connection has been closed. + // This can trigger re-initiation of the reverse connection if needed. if (parent_) { parent_->onDownstreamConnectionClosed(connection_key_); ENVOY_LOG( @@ -70,19 +148,18 @@ Api::IoCallUint64Result DownstreamReverseConnectionIOHandle::close() { return IoSocketHandleImpl::close(); } -// DownstreamReverseConnectionIOHandle shutdown() implementation. Api::SysCallIntResult DownstreamReverseConnectionIOHandle::shutdown(int how) { ENVOY_LOG(trace, "DownstreamReverseConnectionIOHandle: shutdown({}) called for FD: {} with connection " "key: {}", how, fd_, connection_key_); - // If we're ignoring shutdown calls during socket hand-off, just return success. + // If shutdown is ignored during socket hand-off, return success. if (ignore_close_and_shutdown_) { ENVOY_LOG( debug, - "DownstreamReverseConnectionIOHandle: ignoring shutdown() call during socket hand-off " - "for connection key: {}", + "DownstreamReverseConnectionIOHandle: ignoring shutdown() call during socket hand-off for " + "connection key: {}", connection_key_); return Api::SysCallIntResult{0, 0}; } diff --git a/source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/downstream_reverse_connection_io_handle.h b/source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/downstream_reverse_connection_io_handle.h index 27d04f1d2a5cd..cab9ae201e62f 100644 --- a/source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/downstream_reverse_connection_io_handle.h +++ b/source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/downstream_reverse_connection_io_handle.h @@ -32,7 +32,10 @@ class DownstreamReverseConnectionIOHandle : public Network::IoSocketHandleImpl { ~DownstreamReverseConnectionIOHandle() override; - // Network::IoHandle overrides + // Network::IoHandle overrides. + // Intercept reads to handle reverse connection keep-alive pings. + Api::IoCallUint64Result read(Buffer::Instance& buffer, + absl::optional max_length) override; Api::IoCallUint64Result close() override; Api::SysCallIntResult shutdown(int how) override; @@ -57,6 +60,10 @@ class DownstreamReverseConnectionIOHandle : public Network::IoSocketHandleImpl { std::string connection_key_; // Flag to ignore close and shutdown calls during socket hand-off. bool ignore_close_and_shutdown_{false}; + + // Whether to actively echo RPING messages while the connection is idle. + // Disabled permanently after the first non-RPING application byte is observed. + bool ping_echo_active_{true}; }; } // namespace ReverseConnection diff --git a/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/reverse_tunnel_acceptor_extension.cc b/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/reverse_tunnel_acceptor_extension.cc index e253439cd5083..80b53e3a80b62 100644 --- a/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/reverse_tunnel_acceptor_extension.cc +++ b/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/reverse_tunnel_acceptor_extension.cc @@ -16,26 +16,31 @@ UpstreamSocketThreadLocal::UpstreamSocketThreadLocal(Event::Dispatcher& dispatch // ReverseTunnelAcceptorExtension implementation void ReverseTunnelAcceptorExtension::onServerInitialized() { ENVOY_LOG(debug, - "ReverseTunnelAcceptorExtension::onServerInitialized - creating thread local slot"); + "ReverseTunnelAcceptorExtension::onServerInitialized: creating thread-local slot."); // Set the extension reference in the socket interface. if (socket_interface_) { socket_interface_->extension_ = this; } - // Create thread local slot for dispatcher and socket manager. + // Create thread-local slot for the dispatcher and socket manager. tls_slot_ = ThreadLocal::TypedSlot::makeUnique(context_.threadLocal()); - // Set up the thread local dispatcher and socket manager. + // Set up the thread-local dispatcher and socket manager. tls_slot_->set([this](Event::Dispatcher& dispatcher) { - return std::make_shared(dispatcher, this); + auto tls = std::make_shared(dispatcher, this); + // Propagate configured miss threshold into the socket manager. + if (tls->socketManager()) { + tls->socketManager()->setMissThreshold(ping_failure_threshold_); + } + return tls; }); } -// Get thread local registry for the current thread +// Get thread-local registry for the current thread. UpstreamSocketThreadLocal* ReverseTunnelAcceptorExtension::getLocalRegistry() const { if (!tls_slot_) { - ENVOY_LOG(error, "ReverseTunnelAcceptorExtension::getLocalRegistry() - no thread local slot"); + ENVOY_LOG(error, "ReverseTunnelAcceptorExtension::getLocalRegistry(): no thread-local slot."); return nullptr; } diff --git a/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/reverse_tunnel_acceptor_extension.h b/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/reverse_tunnel_acceptor_extension.h index 0b3bb4632873a..73950c60bc4c7 100644 --- a/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/reverse_tunnel_acceptor_extension.h +++ b/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/reverse_tunnel_acceptor_extension.h @@ -66,7 +66,7 @@ class UpstreamSocketThreadLocal : public ThreadLocal::ThreadLocalObject { class ReverseTunnelAcceptorExtension : public Envoy::Network::SocketInterfaceExtension, public Envoy::Logger::Loggable { - // Friend class for testing + // Friend class for testing. friend class ReverseTunnelAcceptorExtensionTest; public: @@ -87,6 +87,10 @@ class ReverseTunnelAcceptorExtension stat_prefix_); stat_prefix_ = PROTOBUF_GET_STRING_OR_DEFAULT(config, stat_prefix, "upstream_reverse_connection"); + // Configure ping miss threshold (minimum 1). + const uint32_t cfg_threshold = + PROTOBUF_GET_WRAPPED_OR_DEFAULT(config, ping_failure_threshold, 3); + ping_failure_threshold_ = std::max(1, cfg_threshold); // Ensure the socket interface has a reference to this extension early, so stats can be // recorded even before onServerInitialized(). if (socket_interface_ != nullptr) { @@ -114,6 +118,11 @@ class ReverseTunnelAcceptorExtension */ const std::string& statPrefix() const { return stat_prefix_; } + /** + * @return the configured miss threshold for ping health-checks. + */ + uint32_t pingFailureThreshold() const { return ping_failure_threshold_; } + /** * Synchronous version for admin API endpoints that require immediate response on reverse * connection stats. @@ -174,6 +183,7 @@ class ReverseTunnelAcceptorExtension std::unique_ptr> tls_slot_; ReverseTunnelAcceptor* socket_interface_; std::string stat_prefix_; + uint32_t ping_failure_threshold_{3}; }; } // namespace ReverseConnection diff --git a/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/upstream_socket_manager.cc b/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/upstream_socket_manager.cc index 66393355bdaf0..1943a9fc506dc 100644 --- a/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/upstream_socket_manager.cc +++ b/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/upstream_socket_manager.cc @@ -8,6 +8,8 @@ #include "source/extensions/bootstrap/reverse_tunnel/common/reverse_connection_utility.h" #include "source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/reverse_tunnel_acceptor_extension.h" +#include "absl/strings/string_view.h" + namespace Envoy { namespace Extensions { namespace Bootstrap { @@ -18,7 +20,7 @@ UpstreamSocketManager::UpstreamSocketManager(Event::Dispatcher& dispatcher, ReverseTunnelAcceptorExtension* extension) : dispatcher_(dispatcher), random_generator_(std::make_unique()), extension_(extension) { - ENVOY_LOG(debug, "reverse_tunnel: creating socket manager with stats integration"); + ENVOY_LOG(debug, "reverse_tunnel: creating socket manager with stats integration."); ping_timer_ = dispatcher_.createTimer([this]() { pingConnections(); }); } @@ -26,13 +28,13 @@ void UpstreamSocketManager::addConnectionSocket(const std::string& node_id, const std::string& cluster_id, Network::ConnectionSocketPtr socket, const std::chrono::seconds& ping_interval, bool) { - ENVOY_LOG(debug, "reverse_tunnel: adding connection for node: {}, cluster: {}", node_id, + ENVOY_LOG(debug, "reverse_tunnel: adding connection for node: {}, cluster: {}.", node_id, cluster_id); // Both node_id and cluster_id are mandatory for consistent state management and stats tracking. if (node_id.empty() || cluster_id.empty()) { ENVOY_LOG(error, - "reverse_tunnel: node_id or cluster_id cannot be empty. node: '{}', cluster: '{}'", + "reverse_tunnel: node_id or cluster_id cannot be empty. node: '{}', cluster: '{}'.", node_id, cluster_id); return; } @@ -40,36 +42,31 @@ void UpstreamSocketManager::addConnectionSocket(const std::string& node_id, const int fd = socket->ioHandle().fdDoNotUse(); const std::string& connectionKey = socket->connectionInfoProvider().localAddress()->asString(); - ENVOY_LOG(debug, "reverse_tunnel: adding socket for node: {}, cluster: {}", node_id, cluster_id); + ENVOY_LOG(debug, "reverse_tunnel: adding socket with FD: {} for node: {}, cluster: {}.", fd, + node_id, cluster_id); // Store node -> cluster mapping. - ENVOY_LOG(trace, "reverse_tunnel: adding mapping node: {} -> cluster: {}", node_id, cluster_id); + ENVOY_LOG(trace, "reverse_tunnel: adding mapping node {} -> cluster {}.", node_id, cluster_id); if (node_to_cluster_map_.find(node_id) == node_to_cluster_map_.end()) { node_to_cluster_map_[node_id] = cluster_id; cluster_to_node_map_[cluster_id].push_back(node_id); } - ENVOY_LOG(trace, - "UpstreamSocketManager: node_to_cluster_map_ has {} entries, cluster_to_node_map_ has " - "{} entries", - node_to_cluster_map_.size(), cluster_to_node_map_.size()); - - ENVOY_LOG(trace, - "UpstreamSocketManager: added socket to accepted_reverse_connections_ for node: {} " - "cluster: {}", - node_id, cluster_id); - // If local envoy is responding to reverse connections, add the socket to + fd_to_node_map_[fd] = node_id; + // Initialize the ping timer before adding the socket to accepted_reverse_connections_. + // This is to prevent a race condition between pingConnections() and addConnectionSocket() + // where the timer is not initialized when pingConnections() tries to enable it. + fd_to_timer_map_[fd] = dispatcher_.createTimer([this, fd]() { onPingTimeout(fd); }); + + // If local Envoy is responding to reverse connections, add the socket to // accepted_reverse_connections_. Thereafter, initiate ping keepalives on the socket. accepted_reverse_connections_[node_id].push_back(std::move(socket)); Network::ConnectionSocketPtr& socket_ref = accepted_reverse_connections_[node_id].back(); - ENVOY_LOG(debug, "reverse_tunnel: mapping fd {} to node: {}", fd, node_id); - fd_to_node_map_[fd] = node_id; - - // Update stats registry + // Update stats registry. if (auto extension = getUpstreamExtension()) { extension->updateConnectionStats(node_id, cluster_id, true /* increment */); - ENVOY_LOG(debug, "UpstreamSocketManager: updated stats registry for node '{}' cluster '{}'", + ENVOY_LOG(debug, "UpstreamSocketManager: updated stats registry for node '{}' cluster '{}'.", node_id, cluster_id); } @@ -83,42 +80,40 @@ void UpstreamSocketManager::addConnectionSocket(const std::string& node_id, }, Event::FileTriggerType::Edge, Event::FileReadyType::Read); - fd_to_timer_map_[fd] = dispatcher_.createTimer([this, fd]() { markSocketDead(fd); }); - // Initiate ping keepalives on the socket. tryEnablePingTimer(std::chrono::seconds(ping_interval.count())); - ENVOY_LOG( - info, - "UpstreamSocketManager: done adding socket to maps with node: {} connection key: {} fd: {}", - node_id, connectionKey, fd); + ENVOY_LOG(debug, + "UpstreamSocketManager: added socket to maps. node: {} connection key: {} fd: {}.", + node_id, connectionKey, fd); } Network::ConnectionSocketPtr UpstreamSocketManager::getConnectionSocket(const std::string& node_id) { - ENVOY_LOG(debug, "UpstreamSocketManager: getConnectionSocket() called with node_id: {}", node_id); + ENVOY_LOG(debug, "UpstreamSocketManager: getConnectionSocket() called with node_id: {}.", + node_id); if (node_to_cluster_map_.find(node_id) == node_to_cluster_map_.end()) { - ENVOY_LOG(error, "UpstreamSocketManager: cluster -> node mapping changed for node: {}", + ENVOY_LOG(error, "UpstreamSocketManager: cluster to node mapping changed for node: {}.", node_id); return nullptr; } const std::string& cluster_id = node_to_cluster_map_[node_id]; - ENVOY_LOG(debug, "UpstreamSocketManager: Looking for socket with node: {} cluster: {}", node_id, + ENVOY_LOG(debug, "UpstreamSocketManager: looking for socket. node: {} cluster: {}.", node_id, cluster_id); // Find first available socket for the node. auto node_sockets_it = accepted_reverse_connections_.find(node_id); if (node_sockets_it == accepted_reverse_connections_.end() || node_sockets_it->second.empty()) { - ENVOY_LOG(debug, "UpstreamSocketManager: No available sockets for node: {}", node_id); + ENVOY_LOG(debug, "UpstreamSocketManager: no available sockets for node: {}.", node_id); return nullptr; } // Debugging: Print the number of free sockets on this worker thread - ENVOY_LOG(debug, "UpstreamSocketManager: Found {} sockets for node: {}", + ENVOY_LOG(trace, "UpstreamSocketManager: found {} sockets for node: {}.", node_sockets_it->second.size(), node_id); // Fetch the socket from the accepted_reverse_connections_ and remove it from the list @@ -130,8 +125,8 @@ UpstreamSocketManager::getConnectionSocket(const std::string& node_id) { socket->connectionInfoProvider().remoteAddress()->asString(); ENVOY_LOG(debug, - "UpstreamSocketManager: Reverse conn socket with FD:{} connection key:{} found for " - "node: {} cluster: {}", + "UpstreamSocketManager: reverse connection socket found. fd: {} connection key: {} " + "node: {} cluster: {}.", fd, remoteConnectionKey, node_id, cluster_id); fd_to_event_map_.erase(fd); @@ -143,7 +138,7 @@ UpstreamSocketManager::getConnectionSocket(const std::string& node_id) { } std::string UpstreamSocketManager::getNodeID(const std::string& key) { - ENVOY_LOG(debug, "UpstreamSocketManager: getNodeID() called with key: {}", key); + ENVOY_LOG(trace, "UpstreamSocketManager: getNodeID() called with key: {}.", key); // First check if the key exists as a cluster ID by checking global stats // This ensures we check across all threads, not just the current thread @@ -164,8 +159,8 @@ std::string UpstreamSocketManager::getNodeID(const std::string& key) { auto node_idx = random_generator_->random() % cluster_nodes_it->second.size(); std::string node_id = cluster_nodes_it->second[node_idx]; ENVOY_LOG(debug, - "UpstreamSocketManager: key '{}' is cluster ID with {} connections, returning " - "random node: {}", + "UpstreamSocketManager: key '{}' is a cluster ID with {} connections; returning " + "random node {}.", key, cluster_gauge.value(), node_id); return node_id; } @@ -175,21 +170,21 @@ std::string UpstreamSocketManager::getNodeID(const std::string& key) { // Key is not a cluster ID, has no connections, or has no local mapping // Treat it as a node ID and return it directly - ENVOY_LOG(debug, "UpstreamSocketManager: key '{}' is node ID, returning as-is", key); + ENVOY_LOG(trace, "UpstreamSocketManager: key '{}' is a node ID; returning as-is.", key); return key; } void UpstreamSocketManager::markSocketDead(const int fd) { - ENVOY_LOG(trace, "UpstreamSocketManager: markSocketDead called for fd {}", fd); + ENVOY_LOG(trace, "UpstreamSocketManager: markSocketDead called for fd {}.", fd); auto node_it = fd_to_node_map_.find(fd); if (node_it == fd_to_node_map_.end()) { - ENVOY_LOG(debug, "UpstreamSocketManager: FD {} not found in fd_to_node_map_", fd); + ENVOY_LOG(debug, "UpstreamSocketManager: fd {} not found in fd_to_node_map_.", fd); return; } const std::string node_id = node_it->second; // Make a COPY, not a reference - ENVOY_LOG(debug, "UpstreamSocketManager: found node '{}' for fd {}", node_id, fd); + ENVOY_LOG(trace, "UpstreamSocketManager: found node '{}' for fd {}.", node_id, fd); std::string cluster_id = (node_to_cluster_map_.find(node_id) != node_to_cluster_map_.end()) ? node_to_cluster_map_[node_id] @@ -201,25 +196,26 @@ void UpstreamSocketManager::markSocketDead(const int fd) { if (sockets.empty()) { // This is a used connection. Mark the stats and return. The socket will be closed by the // owning UpstreamReverseConnectionIOHandle. - ENVOY_LOG(debug, "UpstreamSocketManager: Marking used socket dead. node: {} cluster: {} FD: {}", + ENVOY_LOG(debug, + "UpstreamSocketManager: marking used socket dead. node: {} cluster: {} fd: {}.", node_id, cluster_id, fd); // Update Envoy's stats system for production multi-tenant tracking // This ensures stats are decremented when connections are removed if (auto extension = getUpstreamExtension()) { extension->updateConnectionStats(node_id, cluster_id, false /* decrement */); - ENVOY_LOG(debug, - "UpstreamSocketManager: decremented stats registry for node '{}' cluster '{}'", + ENVOY_LOG(trace, + "UpstreamSocketManager: decremented stats registry for node '{}' cluster '{}'.", node_id, cluster_id); } return; } - // This is an idle connection, find and remove it from the pool + // This is an idle connection, find and remove it from the pool. bool socket_found = false; for (auto itr = sockets.begin(); itr != sockets.end(); itr++) { if (fd == itr->get()->ioHandle().fdDoNotUse()) { - ENVOY_LOG(debug, "UpstreamSocketManager: Marking socket dead; node: {}, cluster: {} FD: {}", + ENVOY_LOG(debug, "UpstreamSocketManager: marking socket dead. node: {} cluster: {} fd: {}.", node_id, cluster_id, fd); ::shutdown(fd, SHUT_RDWR); itr = sockets.erase(itr); @@ -228,12 +224,12 @@ void UpstreamSocketManager::markSocketDead(const int fd) { fd_to_event_map_.erase(fd); fd_to_timer_map_.erase(fd); - // Update Envoy's stats system for production multi-tenant tracking - // This ensures stats are decremented when connections are removed + // Update Envoy's stats system for production multi-tenant tracking. + // This ensures stats are decremented when connections are removed. if (auto extension = getUpstreamExtension()) { extension->updateConnectionStats(node_id, cluster_id, false /* decrement */); - ENVOY_LOG(debug, - "UpstreamSocketManager: decremented stats registry for node '{}' cluster '{}'", + ENVOY_LOG(trace, + "UpstreamSocketManager: decremented stats registry for node '{}' cluster '{}'.", node_id, cluster_id); } break; @@ -241,8 +237,10 @@ void UpstreamSocketManager::markSocketDead(const int fd) { } if (!socket_found) { - ENVOY_LOG(error, "UpstreamSocketManager: Marking an invalid socket dead. node: {} FD: {}", - node_id, fd); + ENVOY_LOG( + error, + "UpstreamSocketManager: attempted to mark a non-existent socket dead. node: {} fd: {}.", + node_id, fd); } if (sockets.size() == 0) { @@ -263,14 +261,14 @@ void UpstreamSocketManager::tryEnablePingTimer(const std::chrono::seconds& ping_ } void UpstreamSocketManager::cleanStaleNodeEntry(const std::string& node_id) { - // Clean the given node-id, if there are no active sockets. + // Clean the given node ID if there are no active sockets. if (accepted_reverse_connections_.find(node_id) != accepted_reverse_connections_.end() && accepted_reverse_connections_[node_id].size() > 0) { - ENVOY_LOG(debug, "Found {} active sockets for node: {}", + ENVOY_LOG(trace, "UpstreamSocketManager: found {} active sockets for node {}.", accepted_reverse_connections_[node_id].size(), node_id); return; } - ENVOY_LOG(debug, "UpstreamSocketManager: Cleaning stale node entry for node: {}", node_id); + ENVOY_LOG(debug, "UpstreamSocketManager: cleaning stale node entry for node {}.", node_id); // Check if given node-id, is present in node_to_cluster_map_. If present, // fetch the corresponding cluster-id. Use cluster-id and node-id to delete entry @@ -283,7 +281,7 @@ void UpstreamSocketManager::cleanStaleNodeEntry(const std::string& node_id) { find(cluster_itr->second.begin(), cluster_itr->second.end(), node_id); if (node_entry_itr != cluster_itr->second.end()) { - ENVOY_LOG(debug, "UpstreamSocketManager:Removing stale node {} from cluster {}", node_id, + ENVOY_LOG(trace, "UpstreamSocketManager: removing stale node {} from cluster {}.", node_id, cluster_itr->first); cluster_itr->second.erase(node_entry_itr); @@ -329,20 +327,26 @@ void UpstreamSocketManager::onPingResponse(Network::IoHandle& io_handle) { return; } + const char* data = static_cast(buffer.linearize(ping_size)); + absl::string_view view{data, static_cast(ping_size)}; if (!::Envoy::Extensions::Bootstrap::ReverseConnection::ReverseConnectionUtility::isPingMessage( - buffer.toString())) { - ENVOY_LOG(debug, "UpstreamSocketManager: FD: {}: response is not RPING", fd); - markSocketDead(fd); + view)) { + ENVOY_LOG(debug, "UpstreamSocketManager: response is not RPING. fd: {}.", fd); + // Treat as a miss; do not immediately kill unless threshold crossed. + onPingTimeout(fd); return; } - ENVOY_LOG(trace, "UpstreamSocketManager: FD: {}: received ping response", fd); + ENVOY_LOG(trace, "UpstreamSocketManager: received ping response. fd: {}.", fd); fd_to_timer_map_[fd]->disableTimer(); + // Reset miss counter on success. + fd_to_miss_count_.erase(fd); } void UpstreamSocketManager::pingConnections(const std::string& node_id) { - ENVOY_LOG(debug, "UpstreamSocketManager: Pinging connections for node: {}", node_id); + ENVOY_LOG(trace, "UpstreamSocketManager: pinging connections for node {}.", node_id); auto& sockets = accepted_reverse_connections_[node_id]; - ENVOY_LOG(debug, "UpstreamSocketManager: node:{} Number of sockets:{}", node_id, sockets.size()); + ENVOY_LOG(trace, "UpstreamSocketManager: number of sockets for node {} is {}.", node_id, + sockets.size()); auto itr = sockets.begin(); while (itr != sockets.end()) { @@ -376,13 +380,13 @@ void UpstreamSocketManager::pingConnections(const std::string& node_id) { } if (buffer->length() > 0) { - // Move to next socket if current one couldn't be fully written + // Move to next socket if current one could not be fully written. ++itr; continue; } if (socket_dead) { - // Socket was marked dead, iterator is now invalid, break out of the loop + // Socket was marked dead; the iterator is now invalid. Break out of the loop. break; } @@ -392,25 +396,38 @@ void UpstreamSocketManager::pingConnections(const std::string& node_id) { } void UpstreamSocketManager::pingConnections() { - ENVOY_LOG(error, "UpstreamSocketManager: Pinging connections"); + ENVOY_LOG(trace, "UpstreamSocketManager: pinging connections."); for (auto& itr : accepted_reverse_connections_) { pingConnections(itr.first); } ping_timer_->enableTimer(ping_interval_); } +void UpstreamSocketManager::onPingTimeout(const int fd) { + ENVOY_LOG(debug, "UpstreamSocketManager: ping timeout or invalid ping. fd: {}.", fd); + // Increment miss count and evaluate threshold. + const uint32_t misses = ++fd_to_miss_count_[fd]; + ENVOY_LOG(trace, "UpstreamSocketManager: miss count {}. fd: {}.", misses, fd); + if (misses >= miss_threshold_) { + ENVOY_LOG(debug, "UpstreamSocketManager: fd {} exceeded miss threshold {}; marking dead.", fd, + miss_threshold_); + fd_to_miss_count_.erase(fd); + markSocketDead(fd); + } +} + UpstreamSocketManager::~UpstreamSocketManager() { - ENVOY_LOG(debug, "UpstreamSocketManager: destructor called"); + ENVOY_LOG(debug, "UpstreamSocketManager: destructor called."); // Clean up all active file events and timers first for (auto& [fd, event] : fd_to_event_map_) { - ENVOY_LOG(debug, "UpstreamSocketManager: cleaning up file event for FD: {}", fd); + ENVOY_LOG(trace, "UpstreamSocketManager: cleaning up file event. fd: {}.", fd); event.reset(); // This will cancel the file event. } fd_to_event_map_.clear(); for (auto& [fd, timer] : fd_to_timer_map_) { - ENVOY_LOG(debug, "UpstreamSocketManager: cleaning up timer for FD: {}", fd); + ENVOY_LOG(trace, "UpstreamSocketManager: cleaning up timer. fd: {}.", fd); timer.reset(); // This will cancel the timer. } fd_to_timer_map_.clear(); @@ -422,7 +439,7 @@ UpstreamSocketManager::~UpstreamSocketManager() { } for (int fd : fds_to_cleanup) { - ENVOY_LOG(trace, "UpstreamSocketManager: marking socket dead in destructor for FD: {}", fd); + ENVOY_LOG(trace, "UpstreamSocketManager: marking socket dead in destructor. fd: {}.", fd); markSocketDead(fd); // false = not used, just cleanup } diff --git a/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/upstream_socket_manager.h b/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/upstream_socket_manager.h index 43d435f8abb96..05e34f79331c7 100644 --- a/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/upstream_socket_manager.h +++ b/source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/upstream_socket_manager.h @@ -91,6 +91,19 @@ class UpstreamSocketManager : public ThreadLocal::ThreadLocalObject, */ void onPingResponse(Network::IoHandle& io_handle); + /** + * Handle ping response timeout for a specific socket. + * Increments miss count and marks socket dead if threshold reached. + * @param fd the file descriptor whose ping timed out. + */ + void onPingTimeout(int fd); + + /** + * Set the miss threshold (consecutive misses before marking a socket dead). + * @param threshold minimum value 1. + */ + void setMissThreshold(uint32_t threshold) { miss_threshold_ = std::max(1, threshold); } + /** * Get the upstream extension for stats integration. * @return pointer to the upstream extension or nullptr if not available. @@ -126,6 +139,12 @@ class UpstreamSocketManager : public ThreadLocal::ThreadLocalObject, absl::flat_hash_map fd_to_event_map_; absl::flat_hash_map fd_to_timer_map_; + // Track consecutive ping misses per file descriptor. + absl::flat_hash_map fd_to_miss_count_; + // Miss threshold before declaring a socket dead. + static constexpr uint32_t kDefaultMissThreshold = 3; + uint32_t miss_threshold_{kDefaultMissThreshold}; + Event::TimerPtr ping_timer_; std::chrono::seconds ping_interval_{0}; diff --git a/test/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/BUILD b/test/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/BUILD index 30d472a1d73c7..e0ab471193d60 100644 --- a/test/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/BUILD +++ b/test/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/BUILD @@ -67,6 +67,24 @@ envoy_cc_test( ], ) +envoy_cc_test( + name = "downstream_reverse_connection_io_handle_test", + size = "medium", + srcs = ["downstream_reverse_connection_io_handle_test.cc"], + deps = [ + "//source/common/buffer:buffer_lib", + "//source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface:reverse_connection_io_handle_lib", + "//source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface:reverse_tunnel_extension_lib", + "//source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface:reverse_tunnel_initiator_lib", + "//test/mocks/event:event_mocks", + "//test/mocks/server:factory_context_mocks", + "//test/mocks/stats:stats_mocks", + "//test/mocks/thread_local:thread_local_mocks", + "//test/mocks/upstream:upstream_mocks", + "@envoy_api//envoy/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/v3:pkg_cc_proto", + ], +) + envoy_cc_test( name = "reverse_connection_address_test", size = "medium", diff --git a/test/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/downstream_reverse_connection_io_handle_test.cc b/test/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/downstream_reverse_connection_io_handle_test.cc new file mode 100644 index 0000000000000..5525b0a2a9c74 --- /dev/null +++ b/test/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/downstream_reverse_connection_io_handle_test.cc @@ -0,0 +1,548 @@ +#include +#include +#include + +#include + +#include "envoy/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/v3/downstream_reverse_connection_socket_interface.pb.h" +#include "envoy/server/factory_context.h" +#include "envoy/thread_local/thread_local.h" + +#include "source/common/buffer/buffer_impl.h" +#include "source/common/network/address_impl.h" +#include "source/common/network/io_socket_error_impl.h" +#include "source/common/network/socket_impl.h" +#include "source/extensions/bootstrap/reverse_tunnel/common/reverse_connection_utility.h" +#include "source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/downstream_reverse_connection_io_handle.h" +#include "source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/reverse_connection_io_handle.h" +#include "source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/reverse_tunnel_initiator.h" +#include "source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/reverse_tunnel_initiator_extension.h" + +#include "test/mocks/event/mocks.h" +#include "test/mocks/network/mocks.h" +#include "test/mocks/server/factory_context.h" +#include "test/mocks/stats/mocks.h" +#include "test/mocks/thread_local/mocks.h" +#include "test/mocks/upstream/mocks.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using testing::_; +using testing::Invoke; +using testing::NiceMock; +using testing::Return; +using testing::ReturnRef; + +namespace Envoy { +namespace Extensions { +namespace Bootstrap { +namespace ReverseConnection { + +// Base test class for ReverseConnectionIOHandle (minimal version for +// DownstreamReverseConnectionIOHandleTest) +class ReverseConnectionIOHandleTestBase : public testing::Test { +protected: + ReverseConnectionIOHandleTestBase() { + // Set up the stats scope. + stats_scope_ = Stats::ScopeSharedPtr(stats_store_.createScope("test_scope.")); + + // Set up the mock context. + EXPECT_CALL(context_, threadLocal()).WillRepeatedly(ReturnRef(thread_local_)); + EXPECT_CALL(context_, scope()).WillRepeatedly(ReturnRef(*stats_scope_)); + EXPECT_CALL(context_, clusterManager()).WillRepeatedly(ReturnRef(cluster_manager_)); + + // Create the socket interface. + socket_interface_ = std::make_unique(context_); + + // Create the extension. + extension_ = std::make_unique(context_, config_); + + // Set up mock dispatcher with default expectations. + EXPECT_CALL(dispatcher_, createTimer_(_)) + .WillRepeatedly(testing::ReturnNew>()); + EXPECT_CALL(dispatcher_, createFileEvent_(_, _, _, _)) + .WillRepeatedly(testing::ReturnNew>()); + } + + void TearDown() override { + io_handle_.reset(); + extension_.reset(); + socket_interface_.reset(); + } + + // Helper to create a ReverseConnectionIOHandle with specified configuration. + std::unique_ptr + createTestIOHandle(const ReverseConnectionSocketConfig& config) { + // Create a test socket file descriptor. + int test_fd = ::socket(AF_INET, SOCK_STREAM, 0); + EXPECT_GE(test_fd, 0); + + // Create the IO handle. + return std::make_unique(test_fd, config, cluster_manager_, + extension_.get(), *stats_scope_); + } + + // Helper to create a default test configuration. + ReverseConnectionSocketConfig createDefaultTestConfig() { + ReverseConnectionSocketConfig config; + config.src_cluster_id = "test-cluster"; + config.src_node_id = "test-node"; + config.enable_circuit_breaker = true; + config.remote_clusters.push_back(RemoteClusterConnectionConfig("remote-cluster", 2)); + return config; + } + + NiceMock context_; + NiceMock thread_local_; + Stats::IsolatedStoreImpl stats_store_; + Stats::ScopeSharedPtr stats_scope_; + NiceMock dispatcher_{"worker_0"}; + + envoy::extensions::bootstrap::reverse_tunnel::downstream_socket_interface::v3:: + DownstreamReverseConnectionSocketInterface config_; + + std::unique_ptr socket_interface_; + std::unique_ptr extension_; + std::unique_ptr io_handle_; + + // Mock cluster manager. + NiceMock cluster_manager_; + + // Set log level to debug for this test class. + LogLevelSetter log_level_setter_ = LogLevelSetter(spdlog::level::debug); + + // Mock socket for testing. + std::unique_ptr mock_socket_; +}; + +/** + * Test class for DownstreamReverseConnectionIOHandle. + */ +class DownstreamReverseConnectionIOHandleTest : public ReverseConnectionIOHandleTestBase { +protected: + void SetUp() override { + ReverseConnectionIOHandleTestBase::SetUp(); + + // Initialize io_handle_ for testing. + auto config = createDefaultTestConfig(); + io_handle_ = createTestIOHandle(config); + EXPECT_NE(io_handle_, nullptr); + + // Create a mock socket for testing. + mock_socket_ = std::make_unique>(); + auto mock_io_handle_unique = std::make_unique>(); + mock_io_handle_ = mock_io_handle_unique.get(); + + // Set up basic mock expectations. + EXPECT_CALL(*mock_io_handle_, fdDoNotUse()).WillRepeatedly(Return(42)); // Arbitrary FD + EXPECT_CALL(*mock_socket_, ioHandle()).WillRepeatedly(ReturnRef(*mock_io_handle_)); + } + + void TearDown() override { + mock_socket_.reset(); + ReverseConnectionIOHandleTestBase::TearDown(); + } + + // Helper to create a DownstreamReverseConnectionIOHandle. + std::unique_ptr + createHandle(ReverseConnectionIOHandle* parent = nullptr, + const std::string& connection_key = "test_connection_key") { + // Create a new mock socket for each handle to avoid releasing the shared one. + auto new_mock_socket = std::make_unique>(); + auto new_mock_io_handle = std::make_unique>(); + + // Store the raw pointer before moving + mock_io_handle_ = new_mock_io_handle.get(); + + // Set up basic mock expectations. + EXPECT_CALL(*mock_io_handle_, fdDoNotUse()).WillRepeatedly(Return(42)); // Arbitrary FD + EXPECT_CALL(*new_mock_socket, ioHandle()).WillRepeatedly(ReturnRef(*mock_io_handle_)); + + auto socket_ptr = std::unique_ptr(new_mock_socket.release()); + return std::make_unique(std::move(socket_ptr), parent, + connection_key); + } + + // Test fixtures. + std::unique_ptr> mock_socket_; + NiceMock* mock_io_handle_; // Raw pointer, managed by socket +}; + +// Test constructor and destructor. +TEST_F(DownstreamReverseConnectionIOHandleTest, Setup) { + // Test constructor with parent. + { + auto handle = createHandle(io_handle_.get(), "test_key_1"); + EXPECT_NE(handle, nullptr); + // Test fdDoNotUse() before any other operations. + EXPECT_EQ(handle->fdDoNotUse(), 42); + } // Destructor called here + + // Test constructor without parent. + { + auto handle = createHandle(nullptr, "test_key_2"); + EXPECT_NE(handle, nullptr); + // Test fdDoNotUse() before any other operations. + EXPECT_EQ(handle->fdDoNotUse(), 42); + } // Destructor called here +} + +// Test close() method and all edge cases. +TEST_F(DownstreamReverseConnectionIOHandleTest, CloseMethod) { + // Test with parent - should notify parent and reset socket. + { + auto handle = createHandle(io_handle_.get(), "test_key"); + + // Verify that parent is set correctly. + EXPECT_NE(io_handle_.get(), nullptr); + + // First close - should notify parent and reset owned_socket. + auto result1 = handle->close(); + EXPECT_EQ(result1.err_, nullptr); + + // Second close - should return immediately without notifying parent (fd < 0). + auto result2 = handle->close(); + EXPECT_EQ(result2.err_, nullptr); + } +} + +// Test getSocket() method. +TEST_F(DownstreamReverseConnectionIOHandleTest, GetSocket) { + auto handle = createHandle(io_handle_.get(), "test_key"); + + // Test getSocket() returns the owned socket. + const auto& socket = handle->getSocket(); + EXPECT_NE(&socket, nullptr); + + // Test getSocket() works on const object. + const auto const_handle = createHandle(io_handle_.get(), "test_key"); + const auto& const_socket = const_handle->getSocket(); + EXPECT_NE(&const_socket, nullptr); + + // Test that getSocket() works before close() is called. + EXPECT_EQ(handle->fdDoNotUse(), 42); +} + +// Test ignoreCloseAndShutdown() functionality. +TEST_F(DownstreamReverseConnectionIOHandleTest, IgnoreCloseAndShutdown) { + auto handle = createHandle(io_handle_.get(), "test_key"); + + // Initially, close and shutdown should work normally + // Test shutdown before ignoring - we don't check the result since it depends on base + // implementation + handle->shutdown(SHUT_RDWR); + + // Now enable ignore mode + handle->ignoreCloseAndShutdown(); + + // Test that close() is ignored when flag is set + auto close_result = handle->close(); + EXPECT_EQ(close_result.err_, nullptr); // Should return success but do nothing + + // Test that shutdown() is ignored when flag is set + auto shutdown_result2 = handle->shutdown(SHUT_RDWR); + EXPECT_EQ(shutdown_result2.return_value_, 0); + EXPECT_EQ(shutdown_result2.errno_, 0); + + // Test different shutdown modes are all ignored + auto shutdown_rd = handle->shutdown(SHUT_RD); + EXPECT_EQ(shutdown_rd.return_value_, 0); + EXPECT_EQ(shutdown_rd.errno_, 0); + + auto shutdown_wr = handle->shutdown(SHUT_WR); + EXPECT_EQ(shutdown_wr.return_value_, 0); + EXPECT_EQ(shutdown_wr.errno_, 0); +} + +// Test read() method with real socket pairs to validate RPING handling. +TEST_F(DownstreamReverseConnectionIOHandleTest, ReadRpingEchoScenarios) { + const std::string rping_msg = std::string(ReverseConnectionUtility::PING_MESSAGE); + + // A complete RPING message should be echoed and drained. + { + // Create a socket pair. + int fds[2]; + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, fds), 0); + + // Create a mock socket with real file descriptor + auto mock_socket = std::make_unique>(); + auto mock_io_handle = std::make_unique(fds[0]); + EXPECT_CALL(*mock_socket, ioHandle()).WillRepeatedly(ReturnRef(*mock_io_handle)); + + // Store the io handle in the socket. + auto* io_handle_ptr = mock_io_handle.release(); + mock_socket->io_handle_.reset(io_handle_ptr); + + // Create handle with the socket. + auto socket_ptr = Network::ConnectionSocketPtr(mock_socket.release()); + auto handle = std::make_unique( + std::move(socket_ptr), io_handle_.get(), "test_key"); + + // Write RPING to the other end of the socket pair. + ssize_t written = write(fds[1], rping_msg.data(), rping_msg.size()); + ASSERT_EQ(written, static_cast(rping_msg.size())); + + // Read should process RPING and return the size (indicating RPING was handled). + Buffer::OwnedImpl buffer; + auto result = handle->read(buffer, absl::nullopt); + + EXPECT_EQ(result.return_value_, rping_msg.size()); + EXPECT_EQ(result.err_, nullptr); + EXPECT_EQ(buffer.length(), 0); // RPING should be drained. + + // Verify RPING echo was sent back. + char echo_buffer[10]; + ssize_t echo_read = read(fds[1], echo_buffer, sizeof(echo_buffer)); + EXPECT_EQ(echo_read, static_cast(rping_msg.size())); + EXPECT_EQ(std::string(echo_buffer, echo_read), rping_msg); + + close(fds[1]); + } + + // When RPING is followed by application data, echo RPING, keep application data, + // and disable echo. + { + // Create another socket pair. + int fds[2]; + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, fds), 0); + + auto mock_socket = std::make_unique>(); + auto mock_io_handle = std::make_unique(fds[0]); + EXPECT_CALL(*mock_socket, ioHandle()).WillRepeatedly(ReturnRef(*mock_io_handle)); + + auto* io_handle_ptr = mock_io_handle.release(); + mock_socket->io_handle_.reset(io_handle_ptr); + + auto socket_ptr = Network::ConnectionSocketPtr(mock_socket.release()); + auto handle = std::make_unique( + std::move(socket_ptr), io_handle_.get(), "test_key2"); + + const std::string app_data = "GET /path HTTP/1.1\r\n"; + const std::string combined = rping_msg + app_data; + + // Write combined data to socket. + ssize_t written = write(fds[1], combined.data(), combined.size()); + ASSERT_EQ(written, static_cast(combined.size())); + + // Read should process RPING and return only app data size. + Buffer::OwnedImpl buffer; + auto result = handle->read(buffer, absl::nullopt); + + EXPECT_EQ(result.return_value_, app_data.size()); // Only app data size + EXPECT_EQ(result.err_, nullptr); + EXPECT_EQ(buffer.toString(), app_data); // Only app data remains + + // Verify RPING echo was sent back. + char echo_buffer[10]; + ssize_t echo_read = read(fds[1], echo_buffer, sizeof(echo_buffer)); + EXPECT_EQ(echo_read, static_cast(rping_msg.size())); + EXPECT_EQ(std::string(echo_buffer, echo_read), rping_msg); + + close(fds[1]); + } + + // Non-RPING data should disable echo and pass through. + { + int fds[2]; + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, fds), 0); + + auto mock_socket = std::make_unique>(); + auto mock_io_handle = std::make_unique(fds[0]); + EXPECT_CALL(*mock_socket, ioHandle()).WillRepeatedly(ReturnRef(*mock_io_handle)); + + auto* io_handle_ptr = mock_io_handle.release(); + mock_socket->io_handle_.reset(io_handle_ptr); + + auto socket_ptr = Network::ConnectionSocketPtr(mock_socket.release()); + auto handle = std::make_unique( + std::move(socket_ptr), io_handle_.get(), "test_key3"); + + const std::string http_data = "GET /path HTTP/1.1\r\n"; + + // Write HTTP data to socket. + ssize_t written = write(fds[1], http_data.data(), http_data.size()); + ASSERT_EQ(written, static_cast(http_data.size())); + + // Read should return all HTTP data without processing. + Buffer::OwnedImpl buffer; + auto result = handle->read(buffer, absl::nullopt); + + EXPECT_EQ(result.return_value_, http_data.size()); + EXPECT_EQ(result.err_, nullptr); + EXPECT_EQ(buffer.toString(), http_data); + + // Verify no echo was sent back. + char echo_buffer[10]; + // Set socket to non-blocking to avoid hanging. + int flags = fcntl(fds[1], F_GETFL, 0); + fcntl(fds[1], F_SETFL, flags | O_NONBLOCK); + ssize_t echo_read = read(fds[1], echo_buffer, sizeof(echo_buffer)); + EXPECT_EQ(echo_read, -1); + EXPECT_EQ(errno, EAGAIN); // No data available + + close(fds[1]); + } +} + +// Test read() method with partial data handling using real sockets. +TEST_F(DownstreamReverseConnectionIOHandleTest, ReadPartialDataAndStateTransitions) { + const std::string rping_msg = std::string(ReverseConnectionUtility::PING_MESSAGE); + + // A partial RPING should pass through and wait for more data. + { + int fds[2]; + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, fds), 0); + + auto mock_socket = std::make_unique>(); + auto mock_io_handle = std::make_unique(fds[0]); + EXPECT_CALL(*mock_socket, ioHandle()).WillRepeatedly(ReturnRef(*mock_io_handle)); + + auto* io_handle_ptr = mock_io_handle.release(); + mock_socket->io_handle_.reset(io_handle_ptr); + + auto socket_ptr = Network::ConnectionSocketPtr(mock_socket.release()); + auto handle = std::make_unique( + std::move(socket_ptr), io_handle_.get(), "test_key"); + + // Write partial RPING (first 3 bytes). + const std::string partial_rping = rping_msg.substr(0, 3); + ssize_t written = write(fds[1], partial_rping.data(), partial_rping.size()); + ASSERT_EQ(written, static_cast(partial_rping.size())); + + // Read should return the partial data as-is. + Buffer::OwnedImpl buffer; + auto result = handle->read(buffer, absl::nullopt); + + EXPECT_EQ(result.return_value_, 3); + EXPECT_EQ(result.err_, nullptr); + EXPECT_EQ(buffer.toString(), partial_rping); + + close(fds[1]); + } + + // Non-RPING data should disable echo permanently. + { + int fds[2]; + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, fds), 0); + + auto mock_socket = std::make_unique>(); + auto mock_io_handle = std::make_unique(fds[0]); + EXPECT_CALL(*mock_socket, ioHandle()).WillRepeatedly(ReturnRef(*mock_io_handle)); + + auto* io_handle_ptr = mock_io_handle.release(); + mock_socket->io_handle_.reset(io_handle_ptr); + + auto socket_ptr = Network::ConnectionSocketPtr(mock_socket.release()); + auto handle = std::make_unique( + std::move(socket_ptr), io_handle_.get(), "test_key2"); + + const std::string http_data = "GET /path"; + + // Write HTTP data. + ssize_t written = write(fds[1], http_data.data(), http_data.size()); + ASSERT_EQ(written, static_cast(http_data.size())); + + // Read should return HTTP data and disable echo. + Buffer::OwnedImpl buffer; + auto result = handle->read(buffer, absl::nullopt); + + EXPECT_EQ(result.return_value_, http_data.size()); + EXPECT_EQ(result.err_, nullptr); + EXPECT_EQ(buffer.toString(), http_data); + + // Verify no echo was sent. + char echo_buffer[10]; + int flags = fcntl(fds[1], F_GETFL, 0); + fcntl(fds[1], F_SETFL, flags | O_NONBLOCK); + ssize_t echo_read = read(fds[1], echo_buffer, sizeof(echo_buffer)); + EXPECT_EQ(echo_read, -1); + EXPECT_EQ(errno, EAGAIN); + + close(fds[1]); + } +} + +// Test read() method in scenarios where echo is disabled. +TEST_F(DownstreamReverseConnectionIOHandleTest, ReadEchoDisabledAndErrorHandling) { + const std::string rping_msg = std::string(ReverseConnectionUtility::PING_MESSAGE); + + // After echo is disabled, RPING should pass through without processing. + { + int fds[2]; + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, fds), 0); + + auto mock_socket = std::make_unique>(); + auto mock_io_handle = std::make_unique(fds[0]); + EXPECT_CALL(*mock_socket, ioHandle()).WillRepeatedly(ReturnRef(*mock_io_handle)); + + auto* io_handle_ptr = mock_io_handle.release(); + mock_socket->io_handle_.reset(io_handle_ptr); + + auto socket_ptr = Network::ConnectionSocketPtr(mock_socket.release()); + auto handle = std::make_unique( + std::move(socket_ptr), io_handle_.get(), "test_key"); + + // First, disable echo by sending HTTP data. + const std::string http_data = "HTTP/1.1"; + ssize_t written = write(fds[1], http_data.data(), http_data.size()); + ASSERT_EQ(written, static_cast(http_data.size())); + + Buffer::OwnedImpl buffer1; + handle->read(buffer1, absl::nullopt); + EXPECT_EQ(buffer1.toString(), http_data); + + // Now send RPING - it should pass through without echo. + written = write(fds[1], rping_msg.data(), rping_msg.size()); + ASSERT_EQ(written, static_cast(rping_msg.size())); + + Buffer::OwnedImpl buffer2; + auto result = handle->read(buffer2, absl::nullopt); + + EXPECT_EQ(result.return_value_, rping_msg.size()); + EXPECT_EQ(result.err_, nullptr); + EXPECT_EQ(buffer2.toString(), rping_msg); // RPING data preserved + + // Verify no echo was sent. + char echo_buffer[10]; + int flags = fcntl(fds[1], F_GETFL, 0); + fcntl(fds[1], F_SETFL, flags | O_NONBLOCK); + ssize_t echo_read = read(fds[1], echo_buffer, sizeof(echo_buffer)); + EXPECT_EQ(echo_read, -1); + EXPECT_EQ(errno, EAGAIN); + + close(fds[1]); + } + + // Test EOF scenario by closing the write end. + { + int fds[2]; + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, fds), 0); + + auto mock_socket = std::make_unique>(); + auto mock_io_handle = std::make_unique(fds[0]); + EXPECT_CALL(*mock_socket, ioHandle()).WillRepeatedly(ReturnRef(*mock_io_handle)); + + auto* io_handle_ptr = mock_io_handle.release(); + mock_socket->io_handle_.reset(io_handle_ptr); + + auto socket_ptr = Network::ConnectionSocketPtr(mock_socket.release()); + auto handle = std::make_unique( + std::move(socket_ptr), io_handle_.get(), "test_key2"); + + // Close write end to simulate EOF. + close(fds[1]); + + Buffer::OwnedImpl buffer; + auto result = handle->read(buffer, absl::nullopt); + + EXPECT_EQ(result.return_value_, 0); // EOF + EXPECT_EQ(result.err_, nullptr); // No error, just EOF + EXPECT_EQ(buffer.length(), 0); + } +} + +} // namespace ReverseConnection +} // namespace Bootstrap +} // namespace Extensions +} // namespace Envoy diff --git a/test/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/reverse_connection_io_handle_test.cc b/test/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/reverse_connection_io_handle_test.cc index ac9dc2aacd62a..94419077a73bd 100644 --- a/test/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/reverse_connection_io_handle_test.cc +++ b/test/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/reverse_connection_io_handle_test.cc @@ -2876,147 +2876,6 @@ TEST_F(ReverseConnectionIOHandleTest, AcceptMethodSocketAndFdFailures) { } } -/** - * Test class for DownstreamReverseConnectionIOHandle. - */ -class DownstreamReverseConnectionIOHandleTest : public ReverseConnectionIOHandleTest { -protected: - void SetUp() override { - ReverseConnectionIOHandleTest::SetUp(); - - // Initialize io_handle_ for testing. - auto config = createDefaultTestConfig(); - io_handle_ = createTestIOHandle(config); - EXPECT_NE(io_handle_, nullptr); - - // Create a mock socket for testing. - mock_socket_ = std::make_unique>(); - mock_io_handle_ = std::make_unique>(); - - // Set up basic mock expectations. - EXPECT_CALL(*mock_io_handle_, fdDoNotUse()).WillRepeatedly(Return(42)); // Arbitrary FD - EXPECT_CALL(*mock_socket_, ioHandle()).WillRepeatedly(ReturnRef(*mock_io_handle_)); - - // Store the mock_io_handle in the socket. - mock_socket_->io_handle_ = std::move(mock_io_handle_); - } - - void TearDown() override { - mock_socket_.reset(); - ReverseConnectionIOHandleTest::TearDown(); - } - - // Helper to create a DownstreamReverseConnectionIOHandle. - std::unique_ptr - createHandle(ReverseConnectionIOHandle* parent = nullptr, - const std::string& connection_key = "test_connection_key") { - // Create a new mock socket for each handle to avoid releasing the shared one. - auto new_mock_socket = std::make_unique>(); - auto new_mock_io_handle = std::make_unique>(); - - // Set up basic mock expectations. - EXPECT_CALL(*new_mock_io_handle, fdDoNotUse()).WillRepeatedly(Return(42)); // Arbitrary FD - EXPECT_CALL(*new_mock_socket, ioHandle()).WillRepeatedly(ReturnRef(*new_mock_io_handle)); - - // Store the mock_io_handle in the socket. - new_mock_socket->io_handle_ = std::move(new_mock_io_handle); - - auto socket_ptr = std::unique_ptr(new_mock_socket.release()); - return std::make_unique(std::move(socket_ptr), parent, - connection_key); - } - - // Test fixtures. - std::unique_ptr> mock_socket_; - std::unique_ptr> mock_io_handle_; -}; - -// Test constructor and destructor. -TEST_F(DownstreamReverseConnectionIOHandleTest, Setup) { - // Test constructor with parent. - { - auto handle = createHandle(io_handle_.get(), "test_key_1"); - EXPECT_NE(handle, nullptr); - // Test fdDoNotUse() before any other operations. - EXPECT_EQ(handle->fdDoNotUse(), 42); - } // Destructor called here - - // Test constructor without parent. - { - auto handle = createHandle(nullptr, "test_key_2"); - EXPECT_NE(handle, nullptr); - // Test fdDoNotUse() before any other operations. - EXPECT_EQ(handle->fdDoNotUse(), 42); - } // Destructor called here -} - -// Test close() method and all edge cases. -TEST_F(DownstreamReverseConnectionIOHandleTest, CloseMethod) { - // Test with parent - should notify parent and reset socket. - { - auto handle = createHandle(io_handle_.get(), "test_key"); - - // Verify that parent is set correctly. - EXPECT_NE(io_handle_.get(), nullptr); - - // First close - should notify parent and reset owned_socket. - auto result1 = handle->close(); - EXPECT_EQ(result1.err_, nullptr); - - // Second close - should return immediately without notifying parent (fd < 0). - auto result2 = handle->close(); - EXPECT_EQ(result2.err_, nullptr); - } -} - -// Test getSocket() method. -TEST_F(DownstreamReverseConnectionIOHandleTest, GetSocket) { - auto handle = createHandle(io_handle_.get(), "test_key"); - - // Test getSocket() returns the owned socket. - const auto& socket = handle->getSocket(); - EXPECT_NE(&socket, nullptr); - - // Test getSocket() works on const object. - const auto const_handle = createHandle(io_handle_.get(), "test_key"); - const auto& const_socket = const_handle->getSocket(); - EXPECT_NE(&const_socket, nullptr); - - // Test that getSocket() works before close() is called. - EXPECT_EQ(handle->fdDoNotUse(), 42); -} - -// Test ignoreCloseAndShutdown() functionality. -TEST_F(DownstreamReverseConnectionIOHandleTest, IgnoreCloseAndShutdown) { - auto handle = createHandle(io_handle_.get(), "test_key"); - - // Initially, close and shutdown should work normally - // Test shutdown before ignoring - we don't check the result since it depends on base - // implementation - handle->shutdown(SHUT_RDWR); - - // Now enable ignore mode - handle->ignoreCloseAndShutdown(); - - // Test that close() is ignored when flag is set - auto close_result = handle->close(); - EXPECT_EQ(close_result.err_, nullptr); // Should return success but do nothing - - // Test that shutdown() is ignored when flag is set - auto shutdown_result2 = handle->shutdown(SHUT_RDWR); - EXPECT_EQ(shutdown_result2.return_value_, 0); - EXPECT_EQ(shutdown_result2.errno_, 0); - - // Test different shutdown modes are all ignored - auto shutdown_rd = handle->shutdown(SHUT_RD); - EXPECT_EQ(shutdown_rd.return_value_, 0); - EXPECT_EQ(shutdown_rd.errno_, 0); - - auto shutdown_wr = handle->shutdown(SHUT_WR); - EXPECT_EQ(shutdown_wr.return_value_, 0); - EXPECT_EQ(shutdown_wr.errno_, 0); -} - } // namespace ReverseConnection } // namespace Bootstrap } // namespace Extensions diff --git a/test/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/BUILD b/test/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/BUILD index 607f4e8222b0f..c2db86a5312b1 100644 --- a/test/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/BUILD +++ b/test/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/BUILD @@ -38,6 +38,7 @@ envoy_cc_test( "//test/mocks/server:factory_context_mocks", "//test/mocks/stats:stats_mocks", "//test/mocks/thread_local:thread_local_mocks", + "//test/test_common:logging_lib", "@envoy_api//envoy/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/v3:pkg_cc_proto", ], ) diff --git a/test/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/reverse_tunnel_acceptor_extension_test.cc b/test/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/reverse_tunnel_acceptor_extension_test.cc index ee8ade1e45a77..41bba716429b5 100644 --- a/test/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/reverse_tunnel_acceptor_extension_test.cc +++ b/test/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/reverse_tunnel_acceptor_extension_test.cc @@ -1,13 +1,16 @@ #include "envoy/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/v3/upstream_reverse_connection_socket_interface.pb.h" +#include "source/common/network/utility.h" #include "source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/reverse_tunnel_acceptor.h" #include "source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/reverse_tunnel_acceptor_extension.h" #include "source/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/upstream_socket_manager.h" #include "test/mocks/event/mocks.h" +#include "test/mocks/network/mocks.h" #include "test/mocks/server/factory_context.h" #include "test/mocks/stats/mocks.h" #include "test/mocks/thread_local/mocks.h" +#include "test/test_common/logging.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -73,6 +76,9 @@ class ReverseTunnelAcceptorExtensionTest : public testing::Test { NiceMock another_dispatcher_{"worker_1"}; std::shared_ptr another_thread_local_registry_; + + // Set log level to debug for this test class. + LogLevelSetter log_level_setter_ = LogLevelSetter(spdlog::level::debug); }; TEST_F(ReverseTunnelAcceptorExtensionTest, InitializeWithDefaultStatPrefix) { @@ -328,6 +334,102 @@ TEST_F(ReverseTunnelAcceptorExtensionTest, CreateEmptyConfigProto) { EXPECT_NE(typed_proto, nullptr); } +TEST_F(ReverseTunnelAcceptorExtensionTest, MissThresholdOneMarksDeadOnFirstInvalidPing) { + // Recreate extension_ with threshold = 1. + envoy::extensions::bootstrap::reverse_tunnel::upstream_socket_interface::v3:: + UpstreamReverseConnectionSocketInterface cfg; + cfg.set_stat_prefix("test_prefix"); + cfg.mutable_ping_failure_threshold()->set_value(1); + extension_.reset(new ReverseTunnelAcceptorExtension(*socket_interface_, context_, cfg)); + + // Provide dispatcher to thread local and set expectations for timers/file events. + thread_local_.setDispatcher(&dispatcher_); + EXPECT_CALL(dispatcher_, createTimer_(_)) + .WillRepeatedly(testing::ReturnNew>()); + EXPECT_CALL(dispatcher_, createFileEvent_(_, _, _, _)) + .WillRepeatedly(testing::ReturnNew>()); + + // Use helper to install TLS registry for the (recreated) extension_. + setupThreadLocalSlot(); + + // Get the registry and socket manager back through the API and apply threshold. + auto* registry = extension_->getLocalRegistry(); + ASSERT_NE(registry, nullptr); + auto* socket_manager = registry->socketManager(); + ASSERT_NE(socket_manager, nullptr); + socket_manager->setMissThreshold(extension_->pingFailureThreshold()); + + // Create a mock socket with FD and addresses. + auto socket = std::make_unique>(); + auto io_handle = std::make_unique>(); + EXPECT_CALL(*io_handle, fdDoNotUse()).WillRepeatedly(testing::Return(123)); + EXPECT_CALL(*socket, ioHandle()).WillRepeatedly(testing::ReturnRef(*io_handle)); + socket->io_handle_ = std::move(io_handle); + + auto local_address = Network::Utility::parseInternetAddressNoThrow("127.0.0.1", 10000); + auto remote_address = Network::Utility::parseInternetAddressNoThrow("127.0.0.1", 10001); + socket->connection_info_provider_->setLocalAddress(local_address); + socket->connection_info_provider_->setRemoteAddress(remote_address); + + const std::string node_id = "n1"; + const std::string cluster_id = "c1"; + socket_manager->addConnectionSocket(node_id, cluster_id, std::move(socket), + std::chrono::seconds(30), false); + + // Simulate an invalid ping response (not RPING). With threshold=1, one miss should kill it. + NiceMock mock_read_handle; + EXPECT_CALL(mock_read_handle, fdDoNotUse()).WillRepeatedly(testing::Return(123)); + EXPECT_CALL(mock_read_handle, read(testing::_, testing::_)) + .WillOnce(testing::Invoke([](Buffer::Instance& buffer, absl::optional) { + buffer.add("XXXXX"); // 5 bytes, not RPING + return Api::IoCallUint64Result{5, Api::IoError::none()}; + })); + + socket_manager->onPingResponse(mock_read_handle); + + // With threshold=1, the socket should be marked dead immediately. + auto retrieved = socket_manager->getConnectionSocket(node_id); + EXPECT_EQ(retrieved, nullptr); +} + +TEST_F(ReverseTunnelAcceptorExtensionTest, PingFailureThresholdConfiguration) { + // Test default threshold value + EXPECT_EQ(extension_->pingFailureThreshold(), 3); // Default threshold should be 3. + + // Create extension with custom threshold = 5 + envoy::extensions::bootstrap::reverse_tunnel::upstream_socket_interface::v3:: + UpstreamReverseConnectionSocketInterface custom_config; + custom_config.set_stat_prefix("test_custom"); + custom_config.mutable_ping_failure_threshold()->set_value(5); + + auto custom_extension = + std::make_unique(*socket_interface_, context_, custom_config); + + EXPECT_EQ(custom_extension->pingFailureThreshold(), 5); + + // Test threshold = 1 (minimum value) + envoy::extensions::bootstrap::reverse_tunnel::upstream_socket_interface::v3:: + UpstreamReverseConnectionSocketInterface min_config; + min_config.set_stat_prefix("test_min"); + min_config.mutable_ping_failure_threshold()->set_value(1); + + auto min_extension = + std::make_unique(*socket_interface_, context_, min_config); + + EXPECT_EQ(min_extension->pingFailureThreshold(), 1); + + // Test very high threshold + envoy::extensions::bootstrap::reverse_tunnel::upstream_socket_interface::v3:: + UpstreamReverseConnectionSocketInterface max_config; + max_config.set_stat_prefix("test_max"); + max_config.mutable_ping_failure_threshold()->set_value(100); + + auto max_extension = + std::make_unique(*socket_interface_, context_, max_config); + + EXPECT_EQ(max_extension->pingFailureThreshold(), 100); +} + TEST_F(ReverseTunnelAcceptorExtensionTest, FactoryName) { EXPECT_EQ(socket_interface_->name(), "envoy.bootstrap.reverse_tunnel.upstream_socket_interface"); } diff --git a/test/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/upstream_socket_manager_test.cc b/test/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/upstream_socket_manager_test.cc index 83dd07f61012a..8f37d69e50e04 100644 --- a/test/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/upstream_socket_manager_test.cc +++ b/test/extensions/bootstrap/reverse_tunnel/upstream_socket_interface/upstream_socket_manager_test.cc @@ -715,8 +715,13 @@ TEST_F(TestUpstreamSocketManager, OnPingResponseInvalidData) { return Api::IoCallUint64Result{invalid_response.size(), Api::IoError::none()}; }); + // First invalid response should increment miss count but not immediately remove the fd. socket_manager_->onPingResponse(*mock_io_handle); + EXPECT_TRUE(verifyFDToNodeMap(123)); + // Simulate two more timeouts to cross the default threshold (3). + socket_manager_->onPingTimeout(123); + socket_manager_->onPingTimeout(123); EXPECT_FALSE(verifyFDToNodeMap(123)); }