Skip to content

Commit

Permalink
Give more control to cancel build in case of a backoff event. (#1584)
Browse files Browse the repository at this point in the history
  • Loading branch information
Vlatombe authored Jul 2, 2024
1 parent 70018c4 commit 7700d91
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package org.csanchez.jenkins.plugins.kubernetes.pod.retention;

import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.LoadingCache;
import edu.umd.cs.findbugs.annotations.CheckForNull;
Expand Down Expand Up @@ -60,6 +61,7 @@
import java.util.logging.Logger;
import jenkins.model.Jenkins;
import jenkins.util.Listeners;
import jenkins.util.SystemProperties;
import jenkins.util.Timer;
import org.csanchez.jenkins.plugins.kubernetes.KubernetesClientProvider;
import org.csanchez.jenkins.plugins.kubernetes.KubernetesCloud;
Expand Down Expand Up @@ -574,6 +576,18 @@ private static void disconnectComputer(KubernetesSlave node, OfflineCause cause)
@Extension
public static class TerminateAgentOnImagePullBackOff implements Listener {

@SuppressFBWarnings(
value = "MS_SHOULD_BE_FINAL",
justification = "Allow tests or groovy console to change the value")
public static long BACKOFF_EVENTS_LIMIT =
SystemProperties.getInteger(Reaper.class.getName() + ".backoffEventsLimit", 3);

public static final String IMAGE_PULL_BACK_OFF = "ImagePullBackOff";

// For each pod with at least 1 backoff, keep track of the first backoff event for 15 minutes.
private Cache<String, Integer> ttlCache =
Caffeine.newBuilder().expireAfterWrite(15, TimeUnit.MINUTES).build();

@Override
public void onEvent(
@NonNull Watcher.Action action,
Expand All @@ -594,19 +608,28 @@ public void onEvent(

if (!backOffContainers.isEmpty()) {
List<String> images = new ArrayList<>();
backOffContainers.forEach(cs -> {
images.add(cs.getImage());
backOffContainers.forEach(cs -> images.add(cs.getImage()));
var podUid = pod.getMetadata().getUid();
var backOffNumber = ttlCache.get(podUid, k -> 0);
ttlCache.put(podUid, ++backOffNumber);
if (backOffNumber >= BACKOFF_EVENTS_LIMIT) {
var imagesString = String.join(",", images);
node.getRunListener()
.error("Unable to pull Docker image \"" + cs.getImage()
.error("Unable to pull container image \"" + imagesString
+ "\". Check if image tag name is spelled correctly.");
});

terminationReasons.add("ImagePullBackOff");
PodUtils.cancelQueueItemFor(pod, "ImagePullBackOff");
node.terminate();
disconnectComputer(
node,
new PodOfflineCause(Messages._PodOfflineCause_ImagePullBackoff("ImagePullBackOff", images)));
terminationReasons.add(IMAGE_PULL_BACK_OFF);
PodUtils.cancelQueueItemFor(pod, IMAGE_PULL_BACK_OFF);
node.terminate();
disconnectComputer(
node,
new PodOfflineCause(
Messages._PodOfflineCause_ImagePullBackoff(IMAGE_PULL_BACK_OFF, images)));
} else {
node.getRunListener()
.error("Image pull backoff detected, waiting for image to be available. Will wait for "
+ (BACKOFF_EVENTS_LIMIT - backOffNumber)
+ " more events before terminating the node.");
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ public void declarativeWithNestedExplicitInheritance() throws Exception {
public void declarativeWithNonexistentDockerImage() throws Exception {
assertNotNull(createJobThenScheduleRun());
r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b));
r.assertLogContains("ERROR: Unable to pull Docker image", b);
r.assertLogContains("ERROR: Unable to pull container image", b);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
import org.csanchez.jenkins.plugins.kubernetes.PodTemplateUtils;
import org.csanchez.jenkins.plugins.kubernetes.pod.decorator.PodDecorator;
import org.csanchez.jenkins.plugins.kubernetes.pod.decorator.PodDecoratorException;
import org.csanchez.jenkins.plugins.kubernetes.pod.retention.Reaper;
import org.hamcrest.MatcherAssert;
import org.htmlunit.html.DomNodeUtil;
import org.htmlunit.html.HtmlElement;
Expand Down Expand Up @@ -847,7 +848,9 @@ public void invalidPodGetsCancelled() throws Exception {

@Test
public void invalidImageGetsCancelled() throws Exception {
Reaper.TerminateAgentOnImagePullBackOff.BACKOFF_EVENTS_LIMIT = 2;
r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b));
r.assertLogContains("Image pull backoff detected, waiting for image to be available.", b);
r.assertLogContains("Queue task was cancelled", b);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import jenkins.model.Jenkins;
Expand Down Expand Up @@ -605,6 +606,7 @@ public void testTerminateAgentOnImagePullBackoff() throws IOException, Interrupt
KubernetesCloud cloud = addCloud("k8s", "foo");
KubernetesSlave node = addNode(cloud, "node-123", "node");
Pod node123 = withContainerImagePullBackoff(createPod(node));
Reaper.TerminateAgentOnImagePullBackOff.BACKOFF_EVENTS_LIMIT = 2;

String watchPodsPath = "/api/v1/namespaces/foo/pods?allowWatchBookmarks=true&watch=true";
server.expect()
Expand All @@ -615,6 +617,10 @@ public void testTerminateAgentOnImagePullBackoff() throws IOException, Interrupt
.andEmit(new WatchEvent(node123, "MODIFIED"))
.waitFor(EVENT_WAIT_PERIOD_MS)
.andEmit(new WatchEvent(node123, "BOOKMARK"))
.waitFor(EVENT_WAIT_PERIOD_MS)
.andEmit(new WatchEvent(node123, "MODIFIED"))
.waitFor(EVENT_WAIT_PERIOD_MS)
.andEmit(new WatchEvent(node123, "BOOKMARK"))
.done()
.always();
// don't remove pod on activate
Expand Down Expand Up @@ -680,6 +686,7 @@ private Pod createPod(KubernetesSlave node) {
.withNewMetadata()
.withName(node.getPodName())
.withNamespace(node.getNamespace())
.withUid(UUID.randomUUID().toString())
.endMetadata()
.withNewSpec()
.endSpec()
Expand Down

0 comments on commit 7700d91

Please sign in to comment.