diff --git a/.github/workflows/job-test-in-container.yml b/.github/workflows/job-test-in-container.yml index 902f8de5a5b..a74ba80137e 100644 --- a/.github/workflows/job-test-in-container.yml +++ b/.github/workflows/job-test-in-container.yml @@ -153,7 +153,17 @@ jobs: sudo sysctl -w net.ipv4.ip_forward=1 # Enable IPv6 for Docker, and configure docker to use containerd for gha sudo mkdir -p /etc/docker - echo '{"ipv6": true, "fixed-cidr-v6": "2001:db8:1::/64", "experimental": true, "ip6tables": true}' | sudo tee /etc/docker/daemon.json + echo '{"ipv6": true, "fixed-cidr-v6": "2001:db8:1::/64", "ip6tables": true}' | sudo tee /etc/docker/daemon.json + - name: "Init: enable Docker experimental features" + run: | + sudo mkdir -p /etc/docker + if [ -f /etc/docker/daemon.json ]; then + tmpfile="$(sudo mktemp)" + sudo jq '.experimental = true' /etc/docker/daemon.json | sudo tee "$tmpfile" >/dev/null + sudo mv "$tmpfile" /etc/docker/daemon.json + else + echo '{"experimental": true}' | sudo tee /etc/docker/daemon.json >/dev/null + fi sudo systemctl restart docker - name: "Run: integration tests" run: | diff --git a/.github/workflows/job-test-in-host.yml b/.github/workflows/job-test-in-host.yml index 8e3b11bdf13..40e2dc02a66 100644 --- a/.github/workflows/job-test-in-host.yml +++ b/.github/workflows/job-test-in-host.yml @@ -107,9 +107,9 @@ jobs: name: "Init (linux): prepare host" run: | if [ "${{ contains(inputs.binary, 'docker') }}" == true ]; then - echo "::group:: configure cdi for docker" + echo "::group:: configure cdi and experimental for docker" sudo mkdir -p /etc/docker - sudo jq '.features.cdi = true' /etc/docker/daemon.json | sudo tee /etc/docker/daemon.json.tmp && sudo mv /etc/docker/daemon.json.tmp /etc/docker/daemon.json + sudo jq -n '.features.cdi = true | .experimental = true' | sudo tee /etc/docker/daemon.json echo "::endgroup::" echo "::group:: downgrade docker to the specific version we want to test (${{ inputs.docker-version }})" sudo apt-get update -qq @@ -122,6 +122,7 @@ jobs: | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null sudo apt-get update -qq sudo apt-get install -qq --allow-downgrades docker-ce=${{ inputs.docker-version }} docker-ce-cli=${{ inputs.docker-version }} + sudo systemctl restart docker echo "::endgroup::" else # FIXME: this is missing runc (see top level workflow note about the state of this) @@ -153,7 +154,8 @@ jobs: # FIXME: remove expect when we are done removing unbuffer from tests echo "::group:: installing test dependencies" - sudo apt-get install -qq expect + sudo add-apt-repository ppa:criu/ppa -y + sudo apt-get install -qq expect criu echo "::endgroup::" # This ensures that bridged traffic goes through netfilter diff --git a/.github/workflows/job-test-unit.yml b/.github/workflows/job-test-unit.yml index 1c7aa9a0069..a7723d88898 100644 --- a/.github/workflows/job-test-unit.yml +++ b/.github/workflows/job-test-unit.yml @@ -68,14 +68,17 @@ jobs: go-version: ${{ env.GO_VERSION }} check-latest: true - # Install CNI + # Install CNI and CRIU - if: ${{ env.GO_VERSION != '' }} - name: "Init: set up CNI" + name: "Init: set up CNI and CRIU" run: | if [ "$RUNNER_OS" == "Windows" ]; then GOPATH=$(go env GOPATH) WINCNI_VERSION=${{ inputs.windows-cni-version }} ./hack/provisioning/windows/cni.sh elif [ "$RUNNER_OS" == "Linux" ]; then ./hack/provisioning/linux/cni.sh install "${{ inputs.linux-cni-version }}" "amd64" "${{ inputs.linux-cni-sha }}" + sudo apt-get update -qq + sudo add-apt-repository ppa:criu/ppa -y + sudo apt-get install -qq criu fi - if: ${{ env.GO_VERSION != '' }} diff --git a/Dockerfile b/Dockerfile index 15868503c24..31c44584821 100644 --- a/Dockerfile +++ b/Dockerfile @@ -309,10 +309,17 @@ ARG DEBIAN_FRONTEND=noninteractive # `expect` package contains `unbuffer(1)`, which is used for emulating TTY for testing # `jq` is required to generate test summaries RUN apt-get update -qq && apt-get install -qq --no-install-recommends \ - expect \ - jq \ - git \ - make + software-properties-common \ + gnupg \ + gpg-agent \ + ca-certificates && \ + add-apt-repository ppa:criu/ppa && \ + apt-get update -qq && apt-get install -qq --no-install-recommends \ + expect \ + jq \ + git \ + make \ + criu # We wouldn't need this if Docker Hub could have "golang:${GO_VERSION}-ubuntu" COPY --from=build-base /usr/local/go /usr/local/go ARG TARGETARCH diff --git a/cmd/nerdctl/checkpoint/checkpoint.go b/cmd/nerdctl/checkpoint/checkpoint.go new file mode 100644 index 00000000000..10a8c00108f --- /dev/null +++ b/cmd/nerdctl/checkpoint/checkpoint.go @@ -0,0 +1,40 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package checkpoint + +import ( + "github.com/spf13/cobra" + + "github.com/containerd/nerdctl/v2/cmd/nerdctl/helpers" +) + +func Command() *cobra.Command { + cmd := &cobra.Command{ + Annotations: map[string]string{helpers.Category: helpers.Management}, + Use: "checkpoint", + Short: "Manage checkpoints.", + RunE: helpers.UnknownSubcommandAction, + SilenceUsage: true, + SilenceErrors: true, + } + + cmd.AddCommand( + CreateCommand(), + ) + + return cmd +} diff --git a/cmd/nerdctl/checkpoint/checkpoint_create.go b/cmd/nerdctl/checkpoint/checkpoint_create.go new file mode 100644 index 00000000000..540acd44f26 --- /dev/null +++ b/cmd/nerdctl/checkpoint/checkpoint_create.go @@ -0,0 +1,93 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package checkpoint + +import ( + "path/filepath" + + "github.com/spf13/cobra" + + "github.com/containerd/nerdctl/v2/cmd/nerdctl/completion" + "github.com/containerd/nerdctl/v2/cmd/nerdctl/helpers" + "github.com/containerd/nerdctl/v2/pkg/api/types" + "github.com/containerd/nerdctl/v2/pkg/clientutil" + "github.com/containerd/nerdctl/v2/pkg/cmd/checkpoint" +) + +func CreateCommand() *cobra.Command { + var cmd = &cobra.Command{ + Use: "create [OPTIONS] CONTAINER CHECKPOINT", + Short: "Create a checkpoint from a running container", + Args: cobra.ExactArgs(2), + RunE: createAction, + ValidArgsFunction: createShellComplete, + SilenceUsage: true, + SilenceErrors: true, + } + cmd.Flags().Bool("leave-running", false, "Leave the container running after checkpointing") + cmd.Flags().String("checkpoint-dir", "", "Checkpoint directory") + return cmd +} + +func processCreateFlags(cmd *cobra.Command) (types.CheckpointCreateOptions, error) { + globalOptions, err := helpers.ProcessRootCmdFlags(cmd) + if err != nil { + return types.CheckpointCreateOptions{}, err + } + + leaveRunning, err := cmd.Flags().GetBool("leave-running") + if err != nil { + return types.CheckpointCreateOptions{}, err + } + checkpointDir, err := cmd.Flags().GetString("checkpoint-dir") + if err != nil { + return types.CheckpointCreateOptions{}, err + } + if checkpointDir == "" { + checkpointDir = filepath.Join(globalOptions.DataRoot, "checkpoints") + } + + return types.CheckpointCreateOptions{ + Stdout: cmd.OutOrStdout(), + GOptions: globalOptions, + LeaveRunning: leaveRunning, + CheckpointDir: checkpointDir, + }, nil +} + +func createAction(cmd *cobra.Command, args []string) error { + createOptions, err := processCreateFlags(cmd) + if err != nil { + return err + } + client, ctx, cancel, err := clientutil.NewClient(cmd.Context(), createOptions.GOptions.Namespace, createOptions.GOptions.Address) + if err != nil { + return err + } + defer cancel() + + err = checkpoint.Create(ctx, client, args[0], args[1], createOptions) + if err != nil { + return err + } + + return nil +} + +func createShellComplete(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { + return completion.ImageNames(cmd) +} diff --git a/cmd/nerdctl/checkpoint/checkpoint_create_linux_test.go b/cmd/nerdctl/checkpoint/checkpoint_create_linux_test.go new file mode 100644 index 00000000000..a2dec881b81 --- /dev/null +++ b/cmd/nerdctl/checkpoint/checkpoint_create_linux_test.go @@ -0,0 +1,126 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package checkpoint + +import ( + "errors" + "testing" + + "github.com/containerd/nerdctl/mod/tigron/expect" + "github.com/containerd/nerdctl/mod/tigron/require" + "github.com/containerd/nerdctl/mod/tigron/test" + + "github.com/containerd/nerdctl/v2/pkg/testutil" + "github.com/containerd/nerdctl/v2/pkg/testutil/nerdtest" +) + +func TestCheckpointCreateErrors(t *testing.T) { + testCase := nerdtest.Setup() + + testCase.Require = require.All( + require.Not(nerdtest.Rootless), + // Docker version 28.x has a known regression that breaks Checkpoint/Restore functionality. + // The issue is tracked in the moby/moby project as https://github.com/moby/moby/issues/50750. + require.Not(nerdtest.Docker), + ) + testCase.SubTests = []*test.Case{ + { + Description: "too-few-arguments", + Command: test.Command("checkpoint", "create", "too-few-arguments"), + Expected: func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: 1, + } + }, + }, + { + Description: "too-many-arguments", + Command: test.Command("checkpoint", "create", "too", "many", "arguments"), + Expected: func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: 1, + } + }, + }, + { + Description: "invalid-container-id", + Command: test.Command("checkpoint", "create", "foo", "bar"), + Expected: func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: 1, + Errors: []error{errors.New("error creating checkpoint for container: foo")}, + } + }, + }, + } + + testCase.Run(t) +} + +func TestCheckpointCreate(t *testing.T) { + const ( + checkpointName = "checkpoint-bar" + checkpointDir = "/dir/foo" + ) + testCase := nerdtest.Setup() + testCase.Require = require.All( + require.Not(nerdtest.Rootless), + // Docker version 28.x has a known regression that breaks Checkpoint/Restore functionality. + // The issue is tracked in the moby/moby project as https://github.com/moby/moby/issues/50750. + require.Not(nerdtest.Docker), + ) + testCase.SubTests = []*test.Case{ + { + Description: "leave-running=true", + Setup: func(data test.Data, helpers test.Helpers) { + helpers.Ensure("run", "-d", "--name", data.Identifier("container-running"), testutil.CommonImage, "sleep", "infinity") + }, + Cleanup: func(data test.Data, helpers test.Helpers) { + helpers.Anyhow("rm", "-f", data.Identifier("container-running")) + }, + Command: func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("checkpoint", "create", "--leave-running", "--checkpoint-dir", checkpointDir, data.Identifier("container-running"), checkpointName+"running") + }, + Expected: func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: 0, + Output: expect.Equals(checkpointName + "running\n"), + } + }, + }, + { + Description: "leave-running=false", + Setup: func(data test.Data, helpers test.Helpers) { + helpers.Ensure("run", "-d", "--name", data.Identifier("container-exit"), testutil.CommonImage, "sleep", "infinity") + }, + Cleanup: func(data test.Data, helpers test.Helpers) { + helpers.Anyhow("rm", "-f", data.Identifier("container-exit")) + }, + Command: func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("checkpoint", "create", "--checkpoint-dir", checkpointDir, data.Identifier("container-exit"), checkpointName+"exit") + }, + Expected: func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: 0, + Output: expect.Equals(checkpointName + "exit\n"), + } + }, + }, + } + + testCase.Run(t) +} diff --git a/cmd/nerdctl/checkpoint/checkpoint_test.go b/cmd/nerdctl/checkpoint/checkpoint_test.go new file mode 100644 index 00000000000..e32a997e219 --- /dev/null +++ b/cmd/nerdctl/checkpoint/checkpoint_test.go @@ -0,0 +1,27 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package checkpoint + +import ( + "testing" + + "github.com/containerd/nerdctl/v2/pkg/testutil" +) + +func TestMain(m *testing.M) { + testutil.M(m) +} diff --git a/cmd/nerdctl/compose/compose_start.go b/cmd/nerdctl/compose/compose_start.go index 88e4cc905de..0d34f04919f 100644 --- a/cmd/nerdctl/compose/compose_start.go +++ b/cmd/nerdctl/compose/compose_start.go @@ -114,7 +114,7 @@ func startContainers(ctx context.Context, client *containerd.Client, containers } // in compose, always disable attach - if err := containerutil.Start(ctx, c, false, false, client, "", (*config.Config)(globalOptions)); err != nil { + if err := containerutil.Start(ctx, c, false, false, client, "", "", (*config.Config)(globalOptions)); err != nil { return err } info, err := c.Info(ctx, containerd.WithoutRefreshedMetadata) diff --git a/cmd/nerdctl/container/container_run.go b/cmd/nerdctl/container/container_run.go index 9b44feb19c8..cd35c735969 100644 --- a/cmd/nerdctl/container/container_run.go +++ b/cmd/nerdctl/container/container_run.go @@ -431,8 +431,18 @@ func runAction(cmd *cobra.Command, args []string) error { } logURI := lab[labels.LogURI] detachC := make(chan struct{}) - task, err := taskutil.NewTask(ctx, client, c, createOpt.Attach, createOpt.Interactive, createOpt.TTY, createOpt.Detach, - con, logURI, createOpt.DetachKeys, createOpt.GOptions.Namespace, detachC) + task, err := taskutil.NewTask(ctx, client, c, taskutil.TaskOptions{ + AttachStreamOpt: createOpt.Attach, + IsInteractive: createOpt.Interactive, + IsTerminal: createOpt.TTY, + IsDetach: createOpt.Detach, + Con: con, + LogURI: logURI, + DetachKeys: createOpt.DetachKeys, + Namespace: createOpt.GOptions.Namespace, + DetachC: detachC, + CheckpointDir: "", + }) if err != nil { return err } diff --git a/cmd/nerdctl/container/container_run_soci_linux_test.go b/cmd/nerdctl/container/container_run_soci_linux_test.go index 07ad11a0f50..c154db65c15 100644 --- a/cmd/nerdctl/container/container_run_soci_linux_test.go +++ b/cmd/nerdctl/container/container_run_soci_linux_test.go @@ -56,7 +56,7 @@ func TestRunSoci(t *testing.T) { } testCase.Command = func(data test.Data, helpers test.Helpers) test.TestableCommand { - return helpers.Command("--snapshotter=soci", "run", "--rm", testutil.FfmpegSociImage) + return helpers.Command("--snapshotter=soci", "run", "--quiet", "--rm", testutil.FfmpegSociImage, "/bin/bash", "-c", "ls") } testCase.Expected = func(data test.Data, helpers test.Helpers) *test.Expected { diff --git a/cmd/nerdctl/container/container_start.go b/cmd/nerdctl/container/container_start.go index 7b770d9b1e6..a1fddadb09c 100644 --- a/cmd/nerdctl/container/container_start.go +++ b/cmd/nerdctl/container/container_start.go @@ -44,6 +44,8 @@ func StartCommand() *cobra.Command { cmd.Flags().BoolP("attach", "a", false, "Attach STDOUT/STDERR and forward signals") cmd.Flags().String("detach-keys", consoleutil.DefaultDetachKeys, "Override the default detach keys") cmd.Flags().BoolP("interactive", "i", false, "Attach container's STDIN") + cmd.Flags().String("checkpoint", "", "checkpoint name") + cmd.Flags().String("checkpoint-dir", "", "checkpoint directory") return cmd } @@ -64,12 +66,22 @@ func startOptions(cmd *cobra.Command) (types.ContainerStartOptions, error) { if err != nil { return types.ContainerStartOptions{}, err } + checkpoint, err := cmd.Flags().GetString("checkpoint") + if err != nil { + return types.ContainerStartOptions{}, err + } + checkpointDir, err := cmd.Flags().GetString("checkpoint-dir") + if err != nil { + return types.ContainerStartOptions{}, err + } return types.ContainerStartOptions{ - Stdout: cmd.OutOrStdout(), - GOptions: globalOptions, - Attach: attach, - DetachKeys: detachKeys, - Interactive: interactive, + Stdout: cmd.OutOrStdout(), + GOptions: globalOptions, + Attach: attach, + DetachKeys: detachKeys, + Interactive: interactive, + Checkpoint: checkpoint, + CheckpointDir: checkpointDir, }, nil } diff --git a/cmd/nerdctl/container/container_start_linux_test.go b/cmd/nerdctl/container/container_start_linux_test.go index b8b82c2d83d..1a86c3026b2 100644 --- a/cmd/nerdctl/container/container_start_linux_test.go +++ b/cmd/nerdctl/container/container_start_linux_test.go @@ -20,12 +20,15 @@ import ( "bytes" "errors" "io" + "strconv" "strings" "testing" + "time" "gotest.tools/v3/assert" "github.com/containerd/nerdctl/mod/tigron/expect" + "github.com/containerd/nerdctl/mod/tigron/require" "github.com/containerd/nerdctl/mod/tigron/test" "github.com/containerd/nerdctl/mod/tigron/tig" @@ -77,3 +80,54 @@ func TestStartDetachKeys(t *testing.T) { testCase.Run(t) } + +func TestStartWithCheckpoint(t *testing.T) { + + testCase := nerdtest.Setup() + testCase.Require = require.All( + require.Not(nerdtest.Rootless), + // Docker version 28.x has a known regression that breaks Checkpoint/Restore functionality. + // The issue is tracked in the moby/moby project as https://github.com/moby/moby/issues/50750. + require.Not(nerdtest.Docker), + ) + testCase.Setup = func(data test.Data, helpers test.Helpers) { + // Use an in-memory tmpfs to model in-memory state without introducing extra processes + // Single PID 1 shell: continuously increment a counter and write to /state/counter (tmpfs) + helpers.Ensure("run", "-d", "--name", data.Identifier(), "--tmpfs", "/state", testutil.CommonImage, + "sh", "-c", `i=0; while true; do i=$((i+1)); printf "%d\n" "$i" >/state/counter; sleep 0.2; done`) + // Give some time for the counter to increase before checkpoint to validate continuity after restore + time.Sleep(1 * time.Second) + helpers.Ensure("checkpoint", "create", data.Identifier(), data.Identifier()+"-checkpoint") + } + + testCase.Cleanup = func(data test.Data, helpers test.Helpers) { + helpers.Anyhow("rm", "-f", data.Identifier()) + } + + testCase.Command = func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("start", "--checkpoint", data.Identifier()+"-checkpoint", data.Identifier()) + } + + testCase.Expected = func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: 0, + Output: expect.All( + func(_ string, t tig.T) { + // Validate in-memory state continuity via tmpfs: counter should not reset and must keep increasing + // Short delay to allow the container to resume; if the counter had reset to 0, it could not reach >5 this fast + time.Sleep(200 * time.Millisecond) + c1Str := strings.TrimSpace(helpers.Capture("exec", data.Identifier(), "cat", "/state/counter")) + var parseErrs []error + c1, err1 := strconv.Atoi(c1Str) + if err1 != nil { + parseErrs = append(parseErrs, err1) + } + assert.Assert(t, len(parseErrs) == 0, "failed to parse counter values: %v", parseErrs) + assert.Assert(t, c1 > 5, "tmpfs in-memory counter seems reset or too small: %d", c1) + }, + ), + } + } + + testCase.Run(t) +} diff --git a/cmd/nerdctl/image/image_convert_linux_test.go b/cmd/nerdctl/image/image_convert_linux_test.go index 07cd2a7003d..a13fc08d595 100644 --- a/cmd/nerdctl/image/image_convert_linux_test.go +++ b/cmd/nerdctl/image/image_convert_linux_test.go @@ -38,6 +38,7 @@ func TestImageConvert(t *testing.T) { require.Not(require.Windows), require.Not(nerdtest.Docker), ), + NoParallel: true, Setup: func(data test.Data, helpers test.Helpers) { helpers.Ensure("pull", "--quiet", "--all-platforms", testutil.CommonImage) }, diff --git a/cmd/nerdctl/main.go b/cmd/nerdctl/main.go index c5abcc60a6c..51dfb26736e 100644 --- a/cmd/nerdctl/main.go +++ b/cmd/nerdctl/main.go @@ -31,6 +31,7 @@ import ( "github.com/containerd/log" "github.com/containerd/nerdctl/v2/cmd/nerdctl/builder" + "github.com/containerd/nerdctl/v2/cmd/nerdctl/checkpoint" "github.com/containerd/nerdctl/v2/cmd/nerdctl/completion" "github.com/containerd/nerdctl/v2/cmd/nerdctl/compose" "github.com/containerd/nerdctl/v2/cmd/nerdctl/container" @@ -350,6 +351,9 @@ Config file ($NERDCTL_TOML): %s // Manifest manifest.Command(), + + // Checkpoint + checkpoint.Command(), ) addApparmorCommand(rootCmd) container.AddCpCommand(rootCmd) diff --git a/docs/command-reference.md b/docs/command-reference.md index 21af3a569f4..7fba7931258 100644 --- a/docs/command-reference.md +++ b/docs/command-reference.md @@ -53,6 +53,8 @@ - [:nerd_face: nerdctl image convert](#nerd_face-nerdctl-image-convert) - [:nerd_face: nerdctl image encrypt](#nerd_face-nerdctl-image-encrypt) - [:nerd_face: nerdctl image decrypt](#nerd_face-nerdctl-image-decrypt) +- [Checkpoint management](#checkpoint-management) + - [:whale: nerdctl checkpoint create](#whale-nerdctl-checkpoint-create) - [Manifest management](#manifest-management) - [:whale: nerdctl manifest annotate](#whale-nerdctl-manifest-annotate) - [:whale: nerdctl manifest create](#whale-nerdctl-manifest-create) @@ -613,8 +615,10 @@ Flags: - :whale: `-a, --attach`: Attach STDOUT/STDERR and forward signals - :whale: `--detach-keys`: Override the default detach keys +- :whale: `--checkpoint`: checkpoint name +- :whale: `--detach-keys`: checkpoint directory -Unimplemented `docker start` flags: `--checkpoint`, `--checkpoint-dir`, `--interactive` +Unimplemented `docker start` flags: `--interactive` ### :whale: nerdctl restart @@ -1060,6 +1064,18 @@ Flags: - `--platform=` : Convert content for a specific platform - `--all-platforms` : Convert content for all platforms (default: false) +## Checkpoint management + +### :whale: nerdctl checkpoint create + +Create a checkpoint from a running container. + +Usage: `nerdctl checkpoint create [OPTIONS] CONTAINER CHECKPOINT` + +Flags: +- :whale: `--leave-running`: Leave the container running after checkpoint +- :whale: `checkpoint-dir`: Use a custom checkpoint storage directory + ## Manifest management ### :whale: nerdctl manifest annotate diff --git a/pkg/api/types/checkpoint_types.go b/pkg/api/types/checkpoint_types.go new file mode 100644 index 00000000000..46b055105c4 --- /dev/null +++ b/pkg/api/types/checkpoint_types.go @@ -0,0 +1,29 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package types + +import "io" + +// CheckpointCreateOptions specifies options for `nerdctl checkpoint create`. +type CheckpointCreateOptions struct { + Stdout io.Writer + GOptions GlobalCommandOptions + // Leave the container running after checkpointing + LeaveRunning bool + // Checkpoint directory + CheckpointDir string +} diff --git a/pkg/api/types/container_types.go b/pkg/api/types/container_types.go index a19fb5aea1f..20462661085 100644 --- a/pkg/api/types/container_types.go +++ b/pkg/api/types/container_types.go @@ -32,6 +32,10 @@ type ContainerStartOptions struct { DetachKeys string // Attach stdin Interactive bool + // Checkpoint is the name of the checkpoint to restore + Checkpoint string + // CheckpointDir is the directory to store checkpoints + CheckpointDir string } // ContainerKillOptions specifies options for `nerdctl (container) kill`. diff --git a/pkg/checkpointutil/checkpointutil.go b/pkg/checkpointutil/checkpointutil.go new file mode 100644 index 00000000000..c3f789af737 --- /dev/null +++ b/pkg/checkpointutil/checkpointutil.go @@ -0,0 +1,48 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package checkpointutil + +import ( + "fmt" + "os" + "path/filepath" +) + +func GetCheckpointDir(checkpointDir, checkpointID, containerID string, create bool) (string, error) { + checkpointAbsDir := filepath.Join(checkpointDir, checkpointID) + stat, err := os.Stat(checkpointAbsDir) + if create { + switch { + case err == nil && stat.IsDir(): + err = fmt.Errorf("checkpoint with name %s already exists for container %s", checkpointID, containerID) + case err != nil && os.IsNotExist(err): + err = os.MkdirAll(checkpointAbsDir, 0o700) + case err != nil: + err = fmt.Errorf("%s exists and is not a directory", checkpointAbsDir) + } + } else { + switch { + case err != nil: + err = fmt.Errorf("checkpoint %s does not exist for container %s", checkpointID, containerID) + case stat.IsDir(): + err = nil + default: + err = fmt.Errorf("%s exists and is not a directory", checkpointAbsDir) + } + } + return checkpointAbsDir, err +} diff --git a/pkg/cmd/checkpoint/create.go b/pkg/cmd/checkpoint/create.go new file mode 100644 index 00000000000..31cd0c8fa31 --- /dev/null +++ b/pkg/cmd/checkpoint/create.go @@ -0,0 +1,139 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package checkpoint + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "path/filepath" + + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + + "github.com/containerd/containerd/api/types/runc/options" + containerd "github.com/containerd/containerd/v2/client" + "github.com/containerd/containerd/v2/core/content" + "github.com/containerd/containerd/v2/core/images" + "github.com/containerd/containerd/v2/pkg/archive" + "github.com/containerd/containerd/v2/plugins" + + "github.com/containerd/nerdctl/v2/pkg/api/types" + "github.com/containerd/nerdctl/v2/pkg/checkpointutil" + "github.com/containerd/nerdctl/v2/pkg/idutil/containerwalker" +) + +func Create(ctx context.Context, client *containerd.Client, containerID string, checkpointName string, options types.CheckpointCreateOptions) error { + var container containerd.Container + + walker := &containerwalker.ContainerWalker{ + Client: client, + OnFound: func(ctx context.Context, found containerwalker.Found) error { + if found.MatchCount > 1 { + return fmt.Errorf("multiple containers found with provided prefix: %s", found.Req) + } + container = found.Container + return nil + }, + } + + n, err := walker.Walk(ctx, containerID) + if err != nil { + return err + } else if n == 0 { + return fmt.Errorf("error creating checkpoint for container: %s, no such container", containerID) + } + + info, err := container.Info(ctx) + if err != nil { + return fmt.Errorf("failed to get info for container %q: %w", containerID, err) + } + + task, err := container.Task(ctx, nil) + if err != nil { + return fmt.Errorf("failed to get task for container %q: %w", containerID, err) + } + + img, err := task.Checkpoint(ctx, withCheckpointOpts(info.Runtime.Name, !options.LeaveRunning)) + if err != nil { + return err + } + + defer client.ImageService().Delete(ctx, img.Name()) + + cs := client.ContentStore() + + rawIndex, err := content.ReadBlob(ctx, cs, img.Target()) + if err != nil { + return fmt.Errorf("failed to retrieve checkpoint data: %w", err) + } + + var index ocispec.Index + if err := json.Unmarshal(rawIndex, &index); err != nil { + return fmt.Errorf("failed to decode checkpoint data: %w", err) + } + + var cpDesc *ocispec.Descriptor + for _, m := range index.Manifests { + if m.MediaType == images.MediaTypeContainerd1Checkpoint { + cpDesc = &m //nolint:gosec + break + } + } + if cpDesc == nil { + return errors.New("invalid checkpoint") + } + + if options.CheckpointDir == "" { + options.CheckpointDir = filepath.Join(options.GOptions.DataRoot, "checkpoints") + } + targetPath, err := checkpointutil.GetCheckpointDir(options.CheckpointDir, checkpointName, container.ID(), true) + if err != nil { + return err + } + + rat, err := cs.ReaderAt(ctx, *cpDesc) + if err != nil { + return fmt.Errorf("failed to get checkpoint reader: %w", err) + } + defer rat.Close() + + _, err = archive.Apply(ctx, targetPath, content.NewReader(rat)) + if err != nil { + return fmt.Errorf("failed to read checkpoint reader: %w", err) + } + + fmt.Fprintf(options.Stdout, "%s\n", checkpointName) + + return nil +} + +func withCheckpointOpts(rt string, exit bool) containerd.CheckpointTaskOpts { + return func(r *containerd.CheckpointTaskInfo) error { + + switch rt { + case plugins.RuntimeRuncV2: + if r.Options == nil { + r.Options = &options.CheckpointOptions{} + } + opts, _ := r.Options.(*options.CheckpointOptions) + + opts.Exit = exit + } + return nil + } +} diff --git a/pkg/cmd/container/restart.go b/pkg/cmd/container/restart.go index bf2b335d390..98c543bc67e 100644 --- a/pkg/cmd/container/restart.go +++ b/pkg/cmd/container/restart.go @@ -48,7 +48,7 @@ func Restart(ctx context.Context, client *containerd.Client, containers []string if err := containerutil.Stop(ctx, found.Container, options.Timeout, options.Signal); err != nil { return err } - if err := containerutil.Start(ctx, found.Container, false, false, client, "", (*config.Config)(&options.GOption)); err != nil { + if err := containerutil.Start(ctx, found.Container, false, false, client, "", "", (*config.Config)(&options.GOption)); err != nil { return err } _, err = fmt.Fprintln(options.Stdout, found.Req) diff --git a/pkg/cmd/container/start.go b/pkg/cmd/container/start.go index 14663b81b9d..604ce9465f5 100644 --- a/pkg/cmd/container/start.go +++ b/pkg/cmd/container/start.go @@ -19,10 +19,12 @@ package container import ( "context" "fmt" + "path/filepath" containerd "github.com/containerd/containerd/v2/client" "github.com/containerd/nerdctl/v2/pkg/api/types" + "github.com/containerd/nerdctl/v2/pkg/checkpointutil" "github.com/containerd/nerdctl/v2/pkg/config" "github.com/containerd/nerdctl/v2/pkg/containerutil" "github.com/containerd/nerdctl/v2/pkg/idutil/containerwalker" @@ -33,15 +35,28 @@ func Start(ctx context.Context, client *containerd.Client, reqs []string, option if options.Attach && len(reqs) > 1 { return fmt.Errorf("you cannot start and attach multiple containers at once") } + if options.Checkpoint != "" && len(reqs) > 1 { + return fmt.Errorf("you cannot start multiple containers with checkpoint at once") + } walker := &containerwalker.ContainerWalker{ Client: client, OnFound: func(ctx context.Context, found containerwalker.Found) error { var err error + var checkpointDir string if found.MatchCount > 1 { return fmt.Errorf("multiple IDs found with provided prefix: %s", found.Req) } - if err := containerutil.Start(ctx, found.Container, options.Attach, options.Interactive, client, options.DetachKeys, (*config.Config)(&options.GOptions)); err != nil { + if options.Checkpoint != "" { + if options.CheckpointDir == "" { + options.CheckpointDir = filepath.Join(options.GOptions.DataRoot, "checkpoints") + } + checkpointDir, err = checkpointutil.GetCheckpointDir(options.CheckpointDir, options.Checkpoint, found.Container.ID(), false) + if err != nil { + return err + } + } + if err := containerutil.Start(ctx, found.Container, options.Attach, options.Interactive, client, options.DetachKeys, checkpointDir, (*config.Config)(&options.GOptions)); err != nil { return err } if !options.Attach { diff --git a/pkg/containerutil/containerutil.go b/pkg/containerutil/containerutil.go index 7805ad10b92..32f99b5229e 100644 --- a/pkg/containerutil/containerutil.go +++ b/pkg/containerutil/containerutil.go @@ -206,7 +206,7 @@ func GenerateSharingPIDOpts(ctx context.Context, targetCon containerd.Container) } // Start starts `container` with `attach` flag. If `attach` is true, it will attach to the container's stdio. -func Start(ctx context.Context, container containerd.Container, isAttach bool, isInteractive bool, client *containerd.Client, detachKeys string, cfg *config.Config) (err error) { +func Start(ctx context.Context, container containerd.Container, isAttach bool, isInteractive bool, client *containerd.Client, detachKeys string, checkpointDir string, cfg *config.Config) (err error) { // defer the storage of start error in the dedicated label defer func() { if err != nil { @@ -280,7 +280,18 @@ func Start(ctx context.Context, container containerd.Container, isAttach bool, i // source: https://github.com/containerd/nerdctl/blob/main/docs/command-reference.md#whale-nerdctl-start attachStreamOpt = []string{"STDOUT", "STDERR"} } - task, err := taskutil.NewTask(ctx, client, container, attachStreamOpt, isInteractive, isTerminal, true, con, logURI, detachKeys, namespace, detachC) + task, err := taskutil.NewTask(ctx, client, container, taskutil.TaskOptions{ + AttachStreamOpt: attachStreamOpt, + IsInteractive: isInteractive, + IsTerminal: isTerminal, + IsDetach: true, + Con: con, + LogURI: logURI, + DetachKeys: detachKeys, + Namespace: namespace, + DetachC: detachC, + CheckpointDir: checkpointDir, + }) if err != nil { return err } diff --git a/pkg/taskutil/taskutil.go b/pkg/taskutil/taskutil.go index 67962ac9065..ec5f96585d6 100644 --- a/pkg/taskutil/taskutil.go +++ b/pkg/taskutil/taskutil.go @@ -19,6 +19,7 @@ package taskutil import ( "context" "errors" + "fmt" "io" "net/url" "os" @@ -27,13 +28,20 @@ import ( "strings" "sync" "syscall" + "time" "github.com/Masterminds/semver/v3" + "github.com/opencontainers/go-digest" "golang.org/x/term" "github.com/containerd/console" + "github.com/containerd/containerd/api/types" containerd "github.com/containerd/containerd/v2/client" + "github.com/containerd/containerd/v2/core/content" + "github.com/containerd/containerd/v2/core/images" + "github.com/containerd/containerd/v2/pkg/archive" "github.com/containerd/containerd/v2/pkg/cio" + "github.com/containerd/errdefs" "github.com/containerd/log" "github.com/containerd/nerdctl/v2/pkg/cioutil" @@ -41,14 +49,68 @@ import ( "github.com/containerd/nerdctl/v2/pkg/infoutil" ) +// TaskOptions contains options for creating a new task +type TaskOptions struct { + AttachStreamOpt []string + IsInteractive bool + IsTerminal bool + IsDetach bool + Con console.Console + LogURI string + DetachKeys string + Namespace string + DetachC chan<- struct{} + CheckpointDir string +} + // NewTask is from https://github.com/containerd/containerd/blob/v1.4.3/cmd/ctr/commands/tasks/tasks_unix.go#L70-L108 -func NewTask(ctx context.Context, client *containerd.Client, container containerd.Container, - attachStreamOpt []string, isInteractive, isTerminal, isDetach bool, con console.Console, logURI, detachKeys, namespace string, detachC chan<- struct{}) (containerd.Task, error) { +func NewTask(ctx context.Context, client *containerd.Client, container containerd.Container, opts TaskOptions) (containerd.Task, error) { + var ( + checkpoint *types.Descriptor + t containerd.Task + err error + ) - var t containerd.Task + if opts.CheckpointDir != "" { + tar := archive.Diff(ctx, "", opts.CheckpointDir) + cs := client.ContentStore() + writer, err := cs.Writer(ctx, content.WithRef(opts.CheckpointDir)) + if err != nil { + return nil, err + } + defer writer.Close() + size, err := io.Copy(writer, tar) + if err != nil { + return nil, err + } + labels := map[string]string{ + "containerd.io/gc.root": time.Now().UTC().Format(time.RFC3339), + } + if err = writer.Commit(ctx, size, "", content.WithLabels(labels)); err != nil { + if !errors.Is(err, errdefs.ErrAlreadyExists) { + return nil, err + } + } + checkpoint = &types.Descriptor{ + MediaType: images.MediaTypeContainerd1Checkpoint, + Digest: writer.Digest().String(), + Size: size, + } + defer func() { + if checkpoint != nil { + _ = cs.Delete(ctx, digest.Digest(checkpoint.Digest)) + } + }() + if err = tar.Close(); err != nil { + return nil, fmt.Errorf("failed to close checkpoint tar stream: %w", err) + } + if err != nil { + return nil, fmt.Errorf("failed to upload checkpoint to containerd: %w", err) + } + } closer := func() { - if detachC != nil { - detachC <- struct{}{} + if opts.DetachC != nil { + opts.DetachC <- struct{}{} } // t will be set by container.NewTask at the end of this function. // @@ -64,30 +126,30 @@ func NewTask(ctx context.Context, client *containerd.Client, container container io.Cancel() } var ioCreator cio.Creator - if len(attachStreamOpt) != 0 { + if len(opts.AttachStreamOpt) != 0 { log.G(ctx).Debug("attaching output instead of using the log-uri") // when attaching a TTY we use writee for stdio and binary for log persistence - if isTerminal { + if opts.IsTerminal { var in io.Reader - if isInteractive { + if opts.IsInteractive { // FIXME: check IsTerminal on Windows too if runtime.GOOS != "windows" && !term.IsTerminal(0) { return nil, errors.New("the input device is not a TTY") } var err error - in, err = consoleutil.NewDetachableStdin(con, detachKeys, closer) + in, err = consoleutil.NewDetachableStdin(opts.Con, opts.DetachKeys, closer) if err != nil { return nil, err } } - ioCreator = cioutil.NewContainerIO(namespace, logURI, true, in, con, nil) + ioCreator = cioutil.NewContainerIO(opts.Namespace, opts.LogURI, true, in, opts.Con, nil) } else { - streams := processAttachStreamsOpt(attachStreamOpt) - ioCreator = cioutil.NewContainerIO(namespace, logURI, false, streams.stdIn, streams.stdOut, streams.stdErr) + streams := processAttachStreamsOpt(opts.AttachStreamOpt) + ioCreator = cioutil.NewContainerIO(opts.Namespace, opts.LogURI, false, streams.stdIn, streams.stdOut, streams.stdErr) } - } else if isTerminal && isDetach { - u, err := url.Parse(logURI) + } else if opts.IsTerminal && opts.IsDetach { + u, err := url.Parse(opts.LogURI) if err != nil { return nil, err } @@ -113,32 +175,32 @@ func NewTask(ctx context.Context, client *containerd.Client, container container ioCreator = cio.TerminalBinaryIO(parsedPath, map[string]string{ args[0]: args[1], }) - } else if isTerminal && !isDetach { - if con == nil { + } else if opts.IsTerminal && !opts.IsDetach { + if opts.Con == nil { return nil, errors.New("got nil con with isTerminal=true") } var in io.Reader - if isInteractive { + if opts.IsInteractive { // FIXME: check IsTerminal on Windows too if runtime.GOOS != "windows" && !term.IsTerminal(0) { return nil, errors.New("the input device is not a TTY") } var err error - in, err = consoleutil.NewDetachableStdin(con, detachKeys, closer) + in, err = consoleutil.NewDetachableStdin(opts.Con, opts.DetachKeys, closer) if err != nil { return nil, err } } - ioCreator = cioutil.NewContainerIO(namespace, logURI, true, in, os.Stdout, os.Stderr) - } else if isDetach && logURI != "" && logURI != "none" { - u, err := url.Parse(logURI) + ioCreator = cioutil.NewContainerIO(opts.Namespace, opts.LogURI, true, in, os.Stdout, os.Stderr) + } else if opts.IsDetach && opts.LogURI != "" && opts.LogURI != "none" { + u, err := url.Parse(opts.LogURI) if err != nil { return nil, err } ioCreator = cio.LogURI(u) } else { var in io.Reader - if isInteractive { + if opts.IsInteractive { if sv, err := infoutil.ServerSemVer(ctx, client); err != nil { log.G(ctx).Warn(err) } else if sv.LessThan(semver.MustParse("1.6.0-0")) { @@ -156,9 +218,17 @@ func NewTask(ctx context.Context, client *containerd.Client, container container } in = stdinC } - ioCreator = cioutil.NewContainerIO(namespace, logURI, false, in, os.Stdout, os.Stderr) + ioCreator = cioutil.NewContainerIO(opts.Namespace, opts.LogURI, false, in, os.Stdout, os.Stderr) } - t, err := container.NewTask(ctx, ioCreator) + + taskOpts := []containerd.NewTaskOpts{ + func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error { + info.Checkpoint = checkpoint + return nil + }, + } + + t, err = container.NewTask(ctx, ioCreator, taskOpts...) if err != nil { return nil, err }