Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

xds: Add xDS node ID in few control plane errors #11519

Merged
merged 3 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions xds/src/main/java/io/grpc/xds/CdsLoadBalancer2.java
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,9 @@ private void handleClusterDiscovered() {
}
loopStatus = Status.UNAVAILABLE.withDescription(String.format(
"CDS error: circular aggregate clusters directly under %s for "
+ "root cluster %s, named %s",
clusterState.name, root.name, namesCausingLoops));
+ "root cluster %s, named %s, xDS node ID: %s",
clusterState.name, root.name, namesCausingLoops,
xdsClient.getBootstrapInfo().node().getId()));
}
}
}
Expand All @@ -224,9 +225,9 @@ private void handleClusterDiscovered() {
childLb.shutdown();
childLb = null;
}
Status unavailable =
Status.UNAVAILABLE.withDescription("CDS error: found 0 leaf (logical DNS or EDS) "
+ "clusters for root cluster " + root.name);
Status unavailable = Status.UNAVAILABLE.withDescription(String.format(
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster %s"
+ " xDS node ID: %s", root.name, xdsClient.getBootstrapInfo().node().getId()));
helper.updateBalancingState(
TRANSIENT_FAILURE, new FixedResultPicker(PickResult.withError(unavailable)));
return;
Expand Down Expand Up @@ -288,11 +289,14 @@ private void addAncestors(Set<String> ancestors, ClusterState clusterState,
}

private void handleClusterDiscoveryError(Status error) {
String description = error.getDescription() == null ? "" : error.getDescription() + " ";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will have two spaces before " xDS node ID" if getDescription() != null.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When description == null, then there will be a one space indent before "xDS node ID", so I'd leave the extra space here and remove it from the string building.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As Larry suggested, removed extra space from string building in L294.

Status errorWithNodeId = error.withDescription(
description + "xDS node ID: " + xdsClient.getBootstrapInfo().node().getId());
if (childLb != null) {
childLb.handleNameResolutionError(error);
childLb.handleNameResolutionError(errorWithNodeId);
} else {
helper.updateBalancingState(
TRANSIENT_FAILURE, new FixedResultPicker(PickResult.withError(error)));
TRANSIENT_FAILURE, new FixedResultPicker(PickResult.withError(errorWithNodeId)));
}
}

Expand Down
4 changes: 3 additions & 1 deletion xds/src/main/java/io/grpc/xds/XdsNameResolver.java
Original file line number Diff line number Diff line change
Expand Up @@ -815,10 +815,12 @@ private void cleanUpRoutes(String error) {
// the config selector handles the error message itself. Once the LB API allows providing
// failure information for addresses yet still providing a service config, the config seector
// could be avoided.
String errorWithNodeId =
error + ", xDS node ID: " + xdsClient.getBootstrapInfo().node().getId();
listener.onResult(ResolutionResult.newBuilder()
.setAttributes(Attributes.newBuilder()
.set(InternalConfigSelector.KEY,
new FailingConfigSelector(Status.UNAVAILABLE.withDescription(error)))
new FailingConfigSelector(Status.UNAVAILABLE.withDescription(errorWithNodeId)))
.build())
.setServiceConfig(emptyServiceConfig)
.build());
Expand Down
15 changes: 11 additions & 4 deletions xds/src/main/java/io/grpc/xds/XdsServerWrapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,8 @@ public void onResourceDoesNotExist(final String resourceName) {
return;
}
StatusException statusException = Status.UNAVAILABLE.withDescription(
"Listener " + resourceName + " unavailable").asException();
String.format("Listener %s unavailable, xDS node ID: %s", resourceName,
xdsClient.getBootstrapInfo().node().getId())).asException();
handleConfigNotFound(statusException);
}

Expand All @@ -434,9 +435,12 @@ public void onError(final Status error) {
if (stopped) {
return;
}
logger.log(Level.FINE, "Error from XdsClient", error);
String description = error.getDescription() == null ? "" : error.getDescription() + " ";
Status errorWithNodeId = error.withDescription(
description + "xDS node ID: " + xdsClient.getBootstrapInfo().node().getId());
logger.log(Level.FINE, "Error from XdsClient", errorWithNodeId);
if (!isServing) {
listener.onNotServing(error.asException());
listener.onNotServing(errorWithNodeId.asException());
}
}

Expand Down Expand Up @@ -664,8 +668,11 @@ public void run() {
if (!routeDiscoveryStates.containsKey(resourceName)) {
return;
}
String description = error.getDescription() == null ? "" : error.getDescription() + " ";
Status errorWithNodeId = error.withDescription(
description + "xDS node ID: " + xdsClient.getBootstrapInfo().node().getId());
logger.log(Level.WARNING, "Error loading RDS resource {0} from XdsClient: {1}.",
new Object[]{resourceName, error});
new Object[]{resourceName, errorWithNodeId});
maybeUpdateSelector();
}
});
Expand Down
44 changes: 34 additions & 10 deletions xds/src/test/java/io/grpc/xds/CdsLoadBalancer2Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@
import io.grpc.xds.LeastRequestLoadBalancer.LeastRequestConfig;
import io.grpc.xds.RingHashLoadBalancer.RingHashConfig;
import io.grpc.xds.XdsClusterResource.CdsUpdate;
import io.grpc.xds.client.Bootstrapper.BootstrapInfo;
import io.grpc.xds.client.Bootstrapper.ServerInfo;
import io.grpc.xds.client.EnvoyProtoData;
import io.grpc.xds.client.XdsClient;
import io.grpc.xds.client.XdsResourceType;
import io.grpc.xds.internal.security.CommonTlsContextTestsUtil;
Expand Down Expand Up @@ -94,6 +96,16 @@ public class CdsLoadBalancer2Test {
private static final String DNS_HOST_NAME = "backend-service-dns.googleapis.com:443";
private static final ServerInfo LRS_SERVER_INFO =
ServerInfo.create("lrs.googleapis.com", InsecureChannelCredentials.create());
private static final String SERVER_URI = "trafficdirector.googleapis.com";
private static final String NODE_ID =
"projects/42/networks/default/nodes/5c85b298-6f5b-4722-b74a-f7d1f0ccf5ad";
private static final EnvoyProtoData.Node BOOTSTRAP_NODE =
EnvoyProtoData.Node.newBuilder().setId(NODE_ID).build();
private static final BootstrapInfo BOOTSTRAP_INFO = BootstrapInfo.builder()
.servers(ImmutableList.of(
ServerInfo.create(SERVER_URI, InsecureChannelCredentials.create())))
.node(BOOTSTRAP_NODE)
.build();
private final UpstreamTlsContext upstreamTlsContext =
CommonTlsContextTestsUtil.buildUpstreamTlsContext("google_cloud_private_spiffe", true);
private final OutlierDetection outlierDetection = OutlierDetection.create(
Expand Down Expand Up @@ -211,7 +223,8 @@ public void nonAggregateCluster_resourceNotExist_returnErrorPicker() {
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
+ " xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), unavailable, null);
assertThat(childBalancers).isEmpty();
}
Expand Down Expand Up @@ -254,7 +267,8 @@ public void nonAggregateCluster_resourceRevoked() {
xdsClient.deliverResourceNotExist(CLUSTER);
assertThat(childBalancer.shutdown).isTrue();
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
+ " xDS node ID: " + NODE_ID);
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
assertPicker(pickerCaptor.getValue(), unavailable, null);
Expand Down Expand Up @@ -331,7 +345,8 @@ public void aggregateCluster_noNonAggregateClusterExits_returnErrorPicker() {
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
+ " xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), unavailable, null);
assertThat(childBalancers).isEmpty();
}
Expand Down Expand Up @@ -379,7 +394,8 @@ public void aggregateCluster_descendantClustersRevoked() {
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
+ " xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), unavailable, null);
assertThat(childBalancer.shutdown).isTrue();
assertThat(childBalancers).isEmpty();
Expand Down Expand Up @@ -418,7 +434,8 @@ public void aggregateCluster_rootClusterRevoked() {
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
+ " xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), unavailable, null);
assertThat(childBalancer.shutdown).isTrue();
assertThat(childBalancers).isEmpty();
Expand Down Expand Up @@ -466,7 +483,8 @@ public void aggregateCluster_intermediateClusterChanges() {
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER);
"CDS error: found 0 leaf (logical DNS or EDS) clusters for root cluster " + CLUSTER
+ " xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), unavailable, null);
assertThat(childBalancer.shutdown).isTrue();
assertThat(childBalancers).isEmpty();
Expand Down Expand Up @@ -507,7 +525,7 @@ public void aggregateCluster_withLoops() {
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: circular aggregate clusters directly under cluster-02.googleapis.com for root"
+ " cluster cluster-foo.googleapis.com, named [cluster-01.googleapis.com,"
+ " cluster-02.googleapis.com]");
+ " cluster-02.googleapis.com], xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), unavailable, null);
}

Expand Down Expand Up @@ -549,7 +567,7 @@ public void aggregateCluster_withLoops_afterEds() {
Status unavailable = Status.UNAVAILABLE.withDescription(
"CDS error: circular aggregate clusters directly under cluster-02.googleapis.com for root"
+ " cluster cluster-foo.googleapis.com, named [cluster-01.googleapis.com,"
+ " cluster-02.googleapis.com]");
+ " cluster-02.googleapis.com], xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), unavailable, null);
}

Expand Down Expand Up @@ -617,7 +635,7 @@ public void aggregateCluster_discoveryErrorBeforeChildLbCreated_returnErrorPicke
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
Status expectedError = Status.UNAVAILABLE.withDescription(
"Unable to load CDS cluster-foo.googleapis.com. xDS server returned: "
+ "RESOURCE_EXHAUSTED: OOM");
+ "RESOURCE_EXHAUSTED: OOM xDS node ID: " + NODE_ID);
assertPicker(pickerCaptor.getValue(), expectedError, null);
assertThat(childBalancers).isEmpty();
}
Expand Down Expand Up @@ -647,7 +665,8 @@ public void aggregateCluster_discoveryErrorAfterChildLbCreated_propagateToChildL

@Test
public void handleNameResolutionErrorFromUpstream_beforeChildLbCreated_returnErrorPicker() {
Status upstreamError = Status.UNAVAILABLE.withDescription("unreachable");
Status upstreamError = Status.UNAVAILABLE.withDescription(
"unreachable xDS node ID: " + NODE_ID);
loadBalancer.handleNameResolutionError(upstreamError);
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
Expand Down Expand Up @@ -821,6 +840,11 @@ public <T extends ResourceUpdate> void cancelXdsResourceWatch(XdsResourceType<T>
}
}

@Override
public BootstrapInfo getBootstrapInfo() {
return BOOTSTRAP_INFO;
}

private void deliverCdsUpdate(String clusterName, CdsUpdate update) {
if (watchers.containsKey(clusterName)) {
List<ResourceWatcher<CdsUpdate>> resourceWatchers =
Expand Down