From f3cf7c3c75a2dd80b5e852b42efb6dc41e0d073a Mon Sep 17 00:00:00 2001 From: Vindhya Ningegowda Date: Thu, 12 Sep 2024 15:40:20 -0700 Subject: [PATCH] xds: Add xDS node ID in few control plane errors (#11519) --- .../java/io/grpc/xds/CdsLoadBalancer2.java | 18 +++++--- .../java/io/grpc/xds/XdsNameResolver.java | 4 +- .../java/io/grpc/xds/XdsServerWrapper.java | 15 +++++-- .../io/grpc/xds/CdsLoadBalancer2Test.java | 44 ++++++++++++++----- 4 files changed, 59 insertions(+), 22 deletions(-) diff --git a/xds/src/main/java/io/grpc/xds/CdsLoadBalancer2.java b/xds/src/main/java/io/grpc/xds/CdsLoadBalancer2.java index 773fdf20563..3f1eb3e7e4f 100644 --- a/xds/src/main/java/io/grpc/xds/CdsLoadBalancer2.java +++ b/xds/src/main/java/io/grpc/xds/CdsLoadBalancer2.java @@ -206,8 +206,9 @@ private void handleClusterDiscovered() { } loopStatus = Status.UNAVAILABLE.withDescription(String.format( "CDS error: circular aggregate clusters directly under %s for " - + "root cluster %s, named %s", - clusterState.name, root.name, namesCausingLoops)); + + "root cluster %s, named %s, xDS node ID: %s", + clusterState.name, root.name, namesCausingLoops, + xdsClient.getBootstrapInfo().node().getId())); } } } @@ -224,9 +225,9 @@ private void handleClusterDiscovered() { childLb.shutdown(); childLb = null; } - Status unavailable = - Status.UNAVAILABLE.withDescription("CDS error: found 0 leaf (logical DNS or EDS) " - + "clusters for root cluster " + root.name); + Status unavailable = Status.UNAVAILABLE.withDescription(String.format( + "CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster %s" + + " xDS node ID: %s", root.name, xdsClient.getBootstrapInfo().node().getId())); helper.updateBalancingState( TRANSIENT_FAILURE, new FixedResultPicker(PickResult.withError(unavailable))); return; @@ -288,11 +289,14 @@ private void addAncestors(Set ancestors, ClusterState clusterState, } private void handleClusterDiscoveryError(Status error) { + String description = error.getDescription() == null ? "" : error.getDescription() + " "; + Status errorWithNodeId = error.withDescription( + description + "xDS node ID: " + xdsClient.getBootstrapInfo().node().getId()); if (childLb != null) { - childLb.handleNameResolutionError(error); + childLb.handleNameResolutionError(errorWithNodeId); } else { helper.updateBalancingState( - TRANSIENT_FAILURE, new FixedResultPicker(PickResult.withError(error))); + TRANSIENT_FAILURE, new FixedResultPicker(PickResult.withError(errorWithNodeId))); } } diff --git a/xds/src/main/java/io/grpc/xds/XdsNameResolver.java b/xds/src/main/java/io/grpc/xds/XdsNameResolver.java index f0329387fc9..ca73b7d8451 100644 --- a/xds/src/main/java/io/grpc/xds/XdsNameResolver.java +++ b/xds/src/main/java/io/grpc/xds/XdsNameResolver.java @@ -815,10 +815,12 @@ private void cleanUpRoutes(String error) { // the config selector handles the error message itself. Once the LB API allows providing // failure information for addresses yet still providing a service config, the config seector // could be avoided. + String errorWithNodeId = + error + ", xDS node ID: " + xdsClient.getBootstrapInfo().node().getId(); listener.onResult(ResolutionResult.newBuilder() .setAttributes(Attributes.newBuilder() .set(InternalConfigSelector.KEY, - new FailingConfigSelector(Status.UNAVAILABLE.withDescription(error))) + new FailingConfigSelector(Status.UNAVAILABLE.withDescription(errorWithNodeId))) .build()) .setServiceConfig(emptyServiceConfig) .build()); diff --git a/xds/src/main/java/io/grpc/xds/XdsServerWrapper.java b/xds/src/main/java/io/grpc/xds/XdsServerWrapper.java index dfb7c4fb7db..bd622a71124 100644 --- a/xds/src/main/java/io/grpc/xds/XdsServerWrapper.java +++ b/xds/src/main/java/io/grpc/xds/XdsServerWrapper.java @@ -425,7 +425,8 @@ public void onResourceDoesNotExist(final String resourceName) { return; } StatusException statusException = Status.UNAVAILABLE.withDescription( - "Listener " + resourceName + " unavailable").asException(); + String.format("Listener %s unavailable, xDS node ID: %s", resourceName, + xdsClient.getBootstrapInfo().node().getId())).asException(); handleConfigNotFound(statusException); } @@ -434,9 +435,12 @@ public void onError(final Status error) { if (stopped) { return; } - logger.log(Level.FINE, "Error from XdsClient", error); + String description = error.getDescription() == null ? "" : error.getDescription() + " "; + Status errorWithNodeId = error.withDescription( + description + "xDS node ID: " + xdsClient.getBootstrapInfo().node().getId()); + logger.log(Level.FINE, "Error from XdsClient", errorWithNodeId); if (!isServing) { - listener.onNotServing(error.asException()); + listener.onNotServing(errorWithNodeId.asException()); } } @@ -664,8 +668,11 @@ public void run() { if (!routeDiscoveryStates.containsKey(resourceName)) { return; } + String description = error.getDescription() == null ? "" : error.getDescription() + " "; + Status errorWithNodeId = error.withDescription( + description + "xDS node ID: " + xdsClient.getBootstrapInfo().node().getId()); logger.log(Level.WARNING, "Error loading RDS resource {0} from XdsClient: {1}.", - new Object[]{resourceName, error}); + new Object[]{resourceName, errorWithNodeId}); maybeUpdateSelector(); } }); diff --git a/xds/src/test/java/io/grpc/xds/CdsLoadBalancer2Test.java b/xds/src/test/java/io/grpc/xds/CdsLoadBalancer2Test.java index 0884587cd95..da32332a2a5 100644 --- a/xds/src/test/java/io/grpc/xds/CdsLoadBalancer2Test.java +++ b/xds/src/test/java/io/grpc/xds/CdsLoadBalancer2Test.java @@ -58,7 +58,9 @@ import io.grpc.xds.LeastRequestLoadBalancer.LeastRequestConfig; import io.grpc.xds.RingHashLoadBalancer.RingHashConfig; import io.grpc.xds.XdsClusterResource.CdsUpdate; +import io.grpc.xds.client.Bootstrapper.BootstrapInfo; import io.grpc.xds.client.Bootstrapper.ServerInfo; +import io.grpc.xds.client.EnvoyProtoData; import io.grpc.xds.client.XdsClient; import io.grpc.xds.client.XdsResourceType; import io.grpc.xds.internal.security.CommonTlsContextTestsUtil; @@ -94,6 +96,16 @@ public class CdsLoadBalancer2Test { private static final String DNS_HOST_NAME = "backend-service-dns.googleapis.com:443"; private static final ServerInfo LRS_SERVER_INFO = ServerInfo.create("lrs.googleapis.com", InsecureChannelCredentials.create()); + private static final String SERVER_URI = "trafficdirector.googleapis.com"; + private static final String NODE_ID = + "projects/42/networks/default/nodes/5c85b298-6f5b-4722-b74a-f7d1f0ccf5ad"; + private static final EnvoyProtoData.Node BOOTSTRAP_NODE = + EnvoyProtoData.Node.newBuilder().setId(NODE_ID).build(); + private static final BootstrapInfo BOOTSTRAP_INFO = BootstrapInfo.builder() + .servers(ImmutableList.of( + ServerInfo.create(SERVER_URI, InsecureChannelCredentials.create()))) + .node(BOOTSTRAP_NODE) + .build(); private final UpstreamTlsContext upstreamTlsContext = CommonTlsContextTestsUtil.buildUpstreamTlsContext("google_cloud_private_spiffe", true); private final OutlierDetection outlierDetection = OutlierDetection.create( @@ -211,7 +223,8 @@ public void nonAggregateCluster_resourceNotExist_returnErrorPicker() { verify(helper).updateBalancingState( eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture()); Status unavailable = Status.UNAVAILABLE.withDescription( - "CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER); + "CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER + + " xDS node ID: " + NODE_ID); assertPicker(pickerCaptor.getValue(), unavailable, null); assertThat(childBalancers).isEmpty(); } @@ -254,7 +267,8 @@ public void nonAggregateCluster_resourceRevoked() { xdsClient.deliverResourceNotExist(CLUSTER); assertThat(childBalancer.shutdown).isTrue(); Status unavailable = Status.UNAVAILABLE.withDescription( - "CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER); + "CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER + + " xDS node ID: " + NODE_ID); verify(helper).updateBalancingState( eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture()); assertPicker(pickerCaptor.getValue(), unavailable, null); @@ -331,7 +345,8 @@ public void aggregateCluster_noNonAggregateClusterExits_returnErrorPicker() { verify(helper).updateBalancingState( eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture()); Status unavailable = Status.UNAVAILABLE.withDescription( - "CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER); + "CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER + + " xDS node ID: " + NODE_ID); assertPicker(pickerCaptor.getValue(), unavailable, null); assertThat(childBalancers).isEmpty(); } @@ -379,7 +394,8 @@ public void aggregateCluster_descendantClustersRevoked() { verify(helper).updateBalancingState( eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture()); Status unavailable = Status.UNAVAILABLE.withDescription( - "CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER); + "CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER + + " xDS node ID: " + NODE_ID); assertPicker(pickerCaptor.getValue(), unavailable, null); assertThat(childBalancer.shutdown).isTrue(); assertThat(childBalancers).isEmpty(); @@ -418,7 +434,8 @@ public void aggregateCluster_rootClusterRevoked() { verify(helper).updateBalancingState( eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture()); Status unavailable = Status.UNAVAILABLE.withDescription( - "CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER); + "CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER + + " xDS node ID: " + NODE_ID); assertPicker(pickerCaptor.getValue(), unavailable, null); assertThat(childBalancer.shutdown).isTrue(); assertThat(childBalancers).isEmpty(); @@ -466,7 +483,8 @@ public void aggregateCluster_intermediateClusterChanges() { verify(helper).updateBalancingState( eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture()); Status unavailable = Status.UNAVAILABLE.withDescription( - "CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER); + "CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER + + " xDS node ID: " + NODE_ID); assertPicker(pickerCaptor.getValue(), unavailable, null); assertThat(childBalancer.shutdown).isTrue(); assertThat(childBalancers).isEmpty(); @@ -507,7 +525,7 @@ public void aggregateCluster_withLoops() { Status unavailable = Status.UNAVAILABLE.withDescription( "CDS error: circular aggregate clusters directly under cluster-02.googleapis.com for root" + " cluster cluster-foo.googleapis.com, named [cluster-01.googleapis.com," - + " cluster-02.googleapis.com]"); + + " cluster-02.googleapis.com], xDS node ID: " + NODE_ID); assertPicker(pickerCaptor.getValue(), unavailable, null); } @@ -549,7 +567,7 @@ public void aggregateCluster_withLoops_afterEds() { Status unavailable = Status.UNAVAILABLE.withDescription( "CDS error: circular aggregate clusters directly under cluster-02.googleapis.com for root" + " cluster cluster-foo.googleapis.com, named [cluster-01.googleapis.com," - + " cluster-02.googleapis.com]"); + + " cluster-02.googleapis.com], xDS node ID: " + NODE_ID); assertPicker(pickerCaptor.getValue(), unavailable, null); } @@ -617,7 +635,7 @@ public void aggregateCluster_discoveryErrorBeforeChildLbCreated_returnErrorPicke eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture()); Status expectedError = Status.UNAVAILABLE.withDescription( "Unable to load CDS cluster-foo.googleapis.com. xDS server returned: " - + "RESOURCE_EXHAUSTED: OOM"); + + "RESOURCE_EXHAUSTED: OOM xDS node ID: " + NODE_ID); assertPicker(pickerCaptor.getValue(), expectedError, null); assertThat(childBalancers).isEmpty(); } @@ -647,7 +665,8 @@ public void aggregateCluster_discoveryErrorAfterChildLbCreated_propagateToChildL @Test public void handleNameResolutionErrorFromUpstream_beforeChildLbCreated_returnErrorPicker() { - Status upstreamError = Status.UNAVAILABLE.withDescription("unreachable"); + Status upstreamError = Status.UNAVAILABLE.withDescription( + "unreachable xDS node ID: " + NODE_ID); loadBalancer.handleNameResolutionError(upstreamError); verify(helper).updateBalancingState( eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture()); @@ -821,6 +840,11 @@ public void cancelXdsResourceWatch(XdsResourceType } } + @Override + public BootstrapInfo getBootstrapInfo() { + return BOOTSTRAP_INFO; + } + private void deliverCdsUpdate(String clusterName, CdsUpdate update) { if (watchers.containsKey(clusterName)) { List> resourceWatchers =