From 01f4bdf6c3853be21e6c04d5a18e56ca019f319c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Rod=C3=A1k?= Date: Tue, 10 Sep 2024 15:53:30 +0200 Subject: [PATCH] Add --health-max-log-count, --health-max-log-size, --health-log-destination flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These flags can affect the output of the HealtCheck log. Currently, when a container is configured with HealthCheck, the output from the HealthCheck command is only logged to the container status file, which is accessible via `podman inspect`. It is also limited to the last five executions and the first 500 characters per execution. This makes debugging past problems very difficult, since the only information available about the failure of the HealthCheck command is the generic `healthcheck service failed` record. - The `--health-log-destination` flag sets the destination of the HealthCheck log. - `none`: (default behavior) `HealthCheckResults` are stored in overlay containers. (For example: `$runroot/healthcheck.log`) - `directory`: creates a log file named `-healthcheck.log` with JSON `HealthCheckResults` in the specified directory. - `events_logger`: The log will be written with logging mechanism set by events_loggeri. It also saves the log to a default directory, for performance on a system with a large number of logs. - The `--health-max-log-count` flag sets the maximum number of attempts in the HealthCheck log file. - A value of `0` indicates an infinite number of attempts in the log file. - The default value is `5` attempts in the log file. - The `--health-max-log-size` flag sets the maximum length of the log stored. - A value of `0` indicates an infinite log length. - The default value is `500` log characters. Add --health-max-log-count flag Signed-off-by: Jan Rodák Add --health-max-log-size flag Signed-off-by: Jan Rodák Add --health-log-destination flag Signed-off-by: Jan Rodák --- cmd/podman/common/create.go | 24 ++ .../options/health-log-destination.md | 11 + .../markdown/options/health-max-log-count.md | 7 + .../markdown/options/health-max-log-size.md | 7 + docs/source/markdown/podman-create.1.md.in | 6 + docs/source/markdown/podman-run.1.md.in | 6 + docs/source/markdown/podman-systemd.unit.5.md | 25 ++ libpod/container_config.go | 8 + libpod/container_inspect.go | 8 +- libpod/container_internal.go | 7 +- libpod/define/container_inspect.go | 8 + libpod/define/healthchecks.go | 10 + libpod/events.go | 21 +- libpod/events/config.go | 4 + libpod/events/events.go | 4 + libpod/events/journal_linux.go | 13 + libpod/healthcheck.go | 85 +++--- libpod/options.go | 53 ++++ libpod/runtime_ctr.go | 2 +- pkg/api/handlers/compat/containers_create.go | 111 ++++---- pkg/domain/entities/pods.go | 261 +++++++++--------- pkg/specgen/generate/container_create.go | 6 + pkg/specgen/specgen.go | 8 + pkg/specgenutil/specgen.go | 6 + pkg/systemd/quadlet/quadlet.go | 9 + test/e2e/healthcheck_run_test.go | 195 +++++++++++++ test/system/220-healthcheck.bats | 211 ++++++++++++++ 27 files changed, 884 insertions(+), 232 deletions(-) create mode 100644 docs/source/markdown/options/health-log-destination.md create mode 100644 docs/source/markdown/options/health-max-log-count.md create mode 100644 docs/source/markdown/options/health-max-log-size.md diff --git a/cmd/podman/common/create.go b/cmd/podman/common/create.go index 6de40987eeef..54a2e781f566 100644 --- a/cmd/podman/common/create.go +++ b/cmd/podman/common/create.go @@ -184,6 +184,30 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions, ) _ = cmd.RegisterFlagCompletionFunc(healthIntervalFlagName, completion.AutocompleteNone) + healthLogDestinationFlagName := "health-log-destination" + createFlags.StringVar( + &cf.HealthLogDestination, + healthLogDestinationFlagName, define.DefaultHealthCheckLocalDestination, + "set the destination of the HealthCheck log. Directory path, local or events_logger (local use container state file)", + ) + _ = cmd.RegisterFlagCompletionFunc(healthLogDestinationFlagName, completion.AutocompleteNone) + + healthMaxLogCountFlagName := "health-max-log-count" + createFlags.UintVar( + &cf.HealthMaxLogCount, + healthMaxLogCountFlagName, define.DefaultHealthMaxLogCount, + "set maximum number of attempts in the HealthCheck log file. ('0' value means an infinite number of attempts in the log file)", + ) + _ = cmd.RegisterFlagCompletionFunc(healthMaxLogCountFlagName, completion.AutocompleteNone) + + healthMaxLogSizeFlagName := "health-max-log-size" + createFlags.UintVar( + &cf.HealthMaxLogSize, + healthMaxLogSizeFlagName, define.DefaultHealthMaxLogSize, + "set maximum length in characters of stored HealthCheck log. ('0' value means an infinite log length)", + ) + _ = cmd.RegisterFlagCompletionFunc(healthMaxLogSizeFlagName, completion.AutocompleteNone) + healthRetriesFlagName := "health-retries" createFlags.UintVar( &cf.HealthRetries, diff --git a/docs/source/markdown/options/health-log-destination.md b/docs/source/markdown/options/health-log-destination.md new file mode 100644 index 000000000000..16b99ecc4c60 --- /dev/null +++ b/docs/source/markdown/options/health-log-destination.md @@ -0,0 +1,11 @@ +####> This option file is used in: +####> podman create, run +####> If file is edited, make sure the changes +####> are applicable to all of those. +#### **--health-log-destination**=*directory_path* + +Set the destination of the HealthCheck log. Directory path, local or events_logger (local use container state file) (Default: local) + +* `local`: (default) HealthCheck logs are stored in overlay containers. (For example: `$runroot/healthcheck.log`) +* `directory`: creates a log file named `-healthcheck.log` with HealthCheck logs in the specified directory. +* `events_logger`: The log will be written with logging mechanism set by events_logger. It also saves the log to a default directory, for performance on a system with a large number of logs. diff --git a/docs/source/markdown/options/health-max-log-count.md b/docs/source/markdown/options/health-max-log-count.md new file mode 100644 index 000000000000..96a7d608614c --- /dev/null +++ b/docs/source/markdown/options/health-max-log-count.md @@ -0,0 +1,7 @@ +####> This option file is used in: +####> podman create, run +####> If file is edited, make sure the changes +####> are applicable to all of those. +#### **--health-max-log-count**=*number of stored logs* + +Set maximum number of attempts in the HealthCheck log file. ('0' value means an infinite number of attempts in the log file) (Default: 5 attempts) diff --git a/docs/source/markdown/options/health-max-log-size.md b/docs/source/markdown/options/health-max-log-size.md new file mode 100644 index 000000000000..96cc399e4a62 --- /dev/null +++ b/docs/source/markdown/options/health-max-log-size.md @@ -0,0 +1,7 @@ +####> This option file is used in: +####> podman create, run +####> If file is edited, make sure the changes +####> are applicable to all of those. +#### **--health-max-log-size**=*size of stored logs* + +Set maximum length in characters of stored HealthCheck log. ("0" value means an infinite log length) (Default: 500 characters) diff --git a/docs/source/markdown/podman-create.1.md.in b/docs/source/markdown/podman-create.1.md.in index d0a939e8733d..ed5852f8f841 100644 --- a/docs/source/markdown/podman-create.1.md.in +++ b/docs/source/markdown/podman-create.1.md.in @@ -169,6 +169,12 @@ See [**Environment**](#environment) note below for precedence and examples. @@option health-interval +@@option health-log-destination + +@@option health-max-log-count + +@@option health-max-log-size + @@option health-on-failure @@option health-retries diff --git a/docs/source/markdown/podman-run.1.md.in b/docs/source/markdown/podman-run.1.md.in index da546a3354e0..6c131b3684a5 100644 --- a/docs/source/markdown/podman-run.1.md.in +++ b/docs/source/markdown/podman-run.1.md.in @@ -203,6 +203,12 @@ See [**Environment**](#environment) note below for precedence and examples. @@option health-interval +@@option health-log-destination + +@@option health-max-log-count + +@@option health-max-log-size + @@option health-on-failure @@option health-retries diff --git a/docs/source/markdown/podman-systemd.unit.5.md b/docs/source/markdown/podman-systemd.unit.5.md index 846fdb97ba4d..976f062dcacc 100644 --- a/docs/source/markdown/podman-systemd.unit.5.md +++ b/docs/source/markdown/podman-systemd.unit.5.md @@ -279,6 +279,9 @@ Valid options for `[Container]` are listed below: | GroupAdd=keep-groups | --group-add=keep-groups | | HealthCmd=/usr/bin/command | --health-cmd=/usr/bin/command | | HealthInterval=2m | --health-interval=2m | +| HealthLogDestination=/foo/log | --health-log-destination=/foo/log | +| HealthMaxLogCount=5 | --health-max-log-count=5 | +| HealthMaxLogSize=500 | --health-max-log-size=500 | | HealthOnFailure=kill | --health-on-failure=kill | | HealthRetries=5 | --health-retries=5 | | HealthStartPeriod=1m | --health-start-period=period=1m | @@ -515,6 +518,28 @@ Equivalent to the Podman `--health-cmd` option. Set an interval for the healthchecks. An interval of disable results in no automatic timer setup. Equivalent to the Podman `--health-interval` option. +### `HealthLogDestination=` + +Set the destination of the HealthCheck log. Directory path, local or events_logger (local use container state file) +(Default: local) +Equivalent to the Podman `--health-log-destination` option. + +* `local`: (default) HealthCheck logs are stored in overlay containers. (For example: `$runroot/healthcheck.log`) +* `directory`: creates a log file named `-healthcheck.log` with HealthCheck logs in the specified directory. +* `events_logger`: The log will be written with logging mechanism set by events_logger. It also saves the log to a default directory, for performance on a system with a large number of logs. + +### `HealthMaxLogCount=` + +Set maximum number of attempts in the HealthCheck log file. ('0' value means an infinite number of attempts in the log file) +(Default: 5 attempts) +Equivalent to the Podman `--Health-max-log-count` option. + +### `HealthMaxLogSize=` + +Set maximum length in characters of stored HealthCheck log. ("0" value means an infinite log length) +(Default: 500 characters) +Equivalent to the Podman `--Health-max-log-size` option. + ### `HealthOnFailure=` Action to take once the container transitions to an unhealthy state. diff --git a/libpod/container_config.go b/libpod/container_config.go index 8c4e0176c5b3..5ed80382a09b 100644 --- a/libpod/container_config.go +++ b/libpod/container_config.go @@ -413,6 +413,14 @@ type ContainerMiscConfig struct { HealthCheckConfig *manifest.Schema2HealthConfig `json:"healthcheck"` // HealthCheckOnFailureAction defines an action to take once the container turns unhealthy. HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"healthcheck_on_failure_action"` + // HealthLogDestination defines the destination where the log is stored + HealthLogDestination string `json:"healthLogDestination,omitempty"` + // HealthMaxLogCount is maximum number of attempts in the HealthCheck log file. + // ('0' value means an infinite number of attempts in the log file) + HealthMaxLogCount uint `json:"healthMaxLogCount,omitempty"` + // HealthMaxLogSize is the maximum length in characters of stored HealthCheck log + // ("0" value means an infinite log length) + HealthMaxLogSize uint `json:"healthMaxLogSize,omitempty"` // StartupHealthCheckConfig is the configuration of the startup // healthcheck for the container. This will run before the regular HC // runs, and when it passes the regular HC will be activated. diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index 33bd465da000..55abe118402c 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -195,7 +195,7 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver // inspect status should be set to nil. if c.config.HealthCheckConfig != nil && !(len(c.config.HealthCheckConfig.Test) == 1 && c.config.HealthCheckConfig.Test[0] == "NONE") { // This container has a healthcheck defined in it; we need to add its state - healthCheckState, err := c.getHealthCheckLog() + healthCheckState, err := c.readHealthCheckLog() if err != nil { // An error here is not considered fatal; no health state will be displayed logrus.Error(err) @@ -426,6 +426,12 @@ func (c *Container) generateInspectContainerConfig(spec *spec.Spec) *define.Insp ctrConfig.HealthcheckOnFailureAction = c.config.HealthCheckOnFailureAction.String() + ctrConfig.HealthLogDestination = c.config.HealthLogDestination + + ctrConfig.HealthMaxLogCount = c.config.HealthMaxLogCount + + ctrConfig.HealthMaxLogSize = c.config.HealthMaxLogSize + ctrConfig.CreateCommand = c.config.CreateCommand ctrConfig.Timezone = c.config.Timezone diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 2839e02433be..c7efd18e4b04 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1123,10 +1123,9 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error { // bugzilla.redhat.com/show_bug.cgi?id=2144754: // In case of a restart, make sure to remove the healthcheck log to // have a clean state. - if path := c.healthCheckLogPath(); path != "" { - if err := os.Remove(path); err != nil && !errors.Is(err, os.ErrNotExist) { - logrus.Error(err) - } + err = c.writeHealthCheckLog(define.HealthCheckResults{Status: define.HealthCheckReset}) + if err != nil { + return err } if err := c.save(); err != nil { diff --git a/libpod/define/container_inspect.go b/libpod/define/container_inspect.go index 89b70e59e659..4be6fd913c4d 100644 --- a/libpod/define/container_inspect.go +++ b/libpod/define/container_inspect.go @@ -61,6 +61,14 @@ type InspectContainerConfig struct { Healthcheck *manifest.Schema2HealthConfig `json:"Healthcheck,omitempty"` // HealthcheckOnFailureAction defines an action to take once the container turns unhealthy. HealthcheckOnFailureAction string `json:"HealthcheckOnFailureAction,omitempty"` + // HealthLogDestination defines the destination where the log is stored + HealthLogDestination string `json:"HealthLogDestination,omitempty"` + // HealthMaxLogCount is maximum number of attempts in the HealthCheck log file. + // ('0' value means an infinite number of attempts in the log file) + HealthMaxLogCount uint `json:"HealthcheckMaxLogCount,omitempty"` + // HealthMaxLogSize is the maximum length in characters of stored HealthCheck log + // ("0" value means an infinite log length) + HealthMaxLogSize uint `json:"HealthcheckMaxLogSize,omitempty"` // CreateCommand is the full command plus arguments of the process the // container has been created with. CreateCommand []string `json:"CreateCommand,omitempty"` diff --git a/libpod/define/healthchecks.go b/libpod/define/healthchecks.go index 15ea79fc20c0..4fec27755572 100644 --- a/libpod/define/healthchecks.go +++ b/libpod/define/healthchecks.go @@ -16,6 +16,8 @@ const ( // and the start-period (time allowed for the container to start and application // to be running) expires. HealthCheckStarting string = "starting" + // HealthCheckReset describes reset of HealthCheck logs + HealthCheckReset string = "reset" ) // HealthCheckStatus represents the current state of a container @@ -56,8 +58,16 @@ const ( DefaultHealthCheckStartPeriod = "0s" // DefaultHealthCheckTimeout default value DefaultHealthCheckTimeout = "30s" + // DefaultHealthMaxLogCount default value + DefaultHealthMaxLogCount uint = 5 + // DefaultHealthMaxLogSize default value + DefaultHealthMaxLogSize uint = 500 + // DefaultHealthCheckLocalDestination default value + DefaultHealthCheckLocalDestination string = "local" ) +const HealthCheckEventsLoggerDestination string = "events_logger" + // HealthConfig.Test options const ( // HealthConfigTestNone disables healthcheck diff --git a/libpod/events.go b/libpod/events.go index 92af63632cd7..6189ee895a96 100644 --- a/libpod/events.go +++ b/libpod/events.go @@ -8,6 +8,7 @@ import ( "path/filepath" "sync" + "github.com/containers/podman/v5/libpod/define" "github.com/containers/podman/v5/libpod/events" "github.com/sirupsen/logrus" ) @@ -28,27 +29,37 @@ func (r *Runtime) newEventer() (events.Eventer, error) { // newContainerEvent creates a new event based on a container func (c *Container) newContainerEvent(status events.Status) { - if err := c.newContainerEventWithInspectData(status, "", false); err != nil { + if err := c.newContainerEventWithInspectData(status, define.HealthCheckResults{}, false); err != nil { logrus.Errorf("Unable to write container event: %v", err) } } // newContainerHealthCheckEvent creates a new healthcheck event with the given status -func (c *Container) newContainerHealthCheckEvent(healthStatus string) { - if err := c.newContainerEventWithInspectData(events.HealthStatus, healthStatus, false); err != nil { +func (c *Container) newContainerHealthCheckEvent(healthCheckResult define.HealthCheckResults) { + if err := c.newContainerEventWithInspectData(events.HealthStatus, healthCheckResult, false); err != nil { logrus.Errorf("Unable to write container event: %v", err) } } // newContainerEventWithInspectData creates a new event and sets the // ContainerInspectData field if inspectData is set. -func (c *Container) newContainerEventWithInspectData(status events.Status, healthStatus string, inspectData bool) error { +func (c *Container) newContainerEventWithInspectData(status events.Status, healthCheckResult define.HealthCheckResults, inspectData bool) error { e := events.NewEvent(status) e.ID = c.ID() e.Name = c.Name() e.Image = c.config.RootfsImageName e.Type = events.Container - e.HealthStatus = healthStatus + e.HealthStatus = healthCheckResult.Status + if c.config.HealthLogDestination == define.HealthCheckEventsLoggerDestination { + if len(healthCheckResult.Log) > 0 { + logData, err := json.Marshal(healthCheckResult.Log[len(healthCheckResult.Log)-1]) + if err != nil { + return fmt.Errorf("unable to marshall healthcheck log for writing: %w", err) + } + e.HealthLog = string(logData) + } + } + e.HealthFailingStreak = healthCheckResult.FailingStreak e.Details = events.Details{ PodID: c.PodID(), diff --git a/libpod/events/config.go b/libpod/events/config.go index d0ab5d45f061..1927c5a6d344 100644 --- a/libpod/events/config.go +++ b/libpod/events/config.go @@ -41,6 +41,10 @@ type Event struct { Type Type // Health status of the current container HealthStatus string `json:"health_status,omitempty"` + // Healthcheck log of the current container + HealthLog string `json:"health_log,omitempty"` + // HealthFailingStreak log of the current container + HealthFailingStreak int `json:"health_failing_streak,omitempty"` // Error code for certain events involving errors. Error string `json:"error,omitempty"` diff --git a/libpod/events/events.go b/libpod/events/events.go index 5eda0033cc8d..7b6d3afdd75f 100644 --- a/libpod/events/events.go +++ b/libpod/events/events.go @@ -79,6 +79,10 @@ func (e *Event) ToHumanReadable(truncate bool) string { if e.HealthStatus != "" { humanFormat += fmt.Sprintf(", health_status=%s", e.HealthStatus) } + if e.HealthLog != "" { + humanFormat += fmt.Sprintf(", health_failing_streak=%d", e.HealthFailingStreak) + humanFormat += fmt.Sprintf(", health_log=%s", e.HealthLog) + } // check if the container has labels and add it to the output if len(e.Attributes) > 0 { for k, v := range e.Attributes { diff --git a/libpod/events/journal_linux.go b/libpod/events/journal_linux.go index 2ee94090f822..aa5ff350d7e4 100644 --- a/libpod/events/journal_linux.go +++ b/libpod/events/journal_linux.go @@ -66,6 +66,10 @@ func (e EventJournalD) Write(ee Event) error { m["PODMAN_LABELS"] = string(b) } m["PODMAN_HEALTH_STATUS"] = ee.HealthStatus + if ee.HealthLog != "" { + m["PODMAN_HEALTH_LOG"] = ee.HealthLog + } + m["PODMAN_HEALTH_FAILING_STREAK"] = strconv.Itoa(ee.HealthFailingStreak) if len(ee.Details.ContainerInspectData) > 0 { m["PODMAN_CONTAINER_INSPECT_DATA"] = ee.Details.ContainerInspectData @@ -225,6 +229,15 @@ func newEventFromJournalEntry(entry *sdjournal.JournalEntry) (*Event, error) { } } newEvent.HealthStatus = entry.Fields["PODMAN_HEALTH_STATUS"] + if log, ok := entry.Fields["PODMAN_HEALTH_LOG"]; ok { + newEvent.HealthLog = log + } + if FailingStreak, ok := entry.Fields["PODMAN_HEALTH_FAILING_STREAK"]; ok { + FailingStreakInt, err := strconv.Atoi(FailingStreak) + if err == nil { + newEvent.HealthFailingStreak = FailingStreakInt + } + } newEvent.Details.ContainerInspectData = entry.Fields["PODMAN_CONTAINER_INSPECT_DATA"] case Network: newEvent.ID = entry.Fields["PODMAN_ID"] diff --git a/libpod/healthcheck.go b/libpod/healthcheck.go index 95f20f2fd1bd..96740d2c132b 100644 --- a/libpod/healthcheck.go +++ b/libpod/healthcheck.go @@ -19,14 +19,6 @@ import ( "golang.org/x/sys/unix" ) -const ( - // MaxHealthCheckNumberLogs is the maximum number of attempts we keep - // in the healthcheck history file - MaxHealthCheckNumberLogs int = 5 - // MaxHealthCheckLogLength in characters - MaxHealthCheckLogLength = 500 -) - // HealthCheck verifies the state and validity of the healthcheck configuration // on the container and then executes the healthcheck func (r *Runtime) HealthCheck(ctx context.Context, name string) (define.HealthCheckStatus, error) { @@ -143,8 +135,8 @@ func (c *Container) runHealthCheck(ctx context.Context, isStartup bool) (define. } eventLog := output.String() - if len(eventLog) > MaxHealthCheckLogLength { - eventLog = eventLog[:MaxHealthCheckLogLength] + if c.config.HealthMaxLogSize != 0 && len(eventLog) > int(c.config.HealthMaxLogSize) { + eventLog = eventLog[:c.config.HealthMaxLogSize] } if timeEnd.Sub(timeStart) > c.HealthCheckConfig().Timeout { @@ -154,21 +146,22 @@ func (c *Container) runHealthCheck(ctx context.Context, isStartup bool) (define. } hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog) - logStatus, err := c.updateHealthCheckLog(hcl, inStartPeriod, isStartup) + + healthCheckResult, err := c.updateHealthCheckLog(hcl, inStartPeriod, isStartup) if err != nil { - return hcResult, "", fmt.Errorf("unable to update health check log %s for %s: %w", c.healthCheckLogPath(), c.ID(), err) + return hcResult, "", fmt.Errorf("unable to update health check log %s for %s: %w", c.config.HealthLogDestination, c.ID(), err) } // Write HC event with appropriate status as the last thing before we // return. if hcResult == define.HealthCheckNotDefined || hcResult == define.HealthCheckInternalError { - return hcResult, logStatus, hcErr + return hcResult, healthCheckResult.Status, hcErr } if c.runtime.config.Engine.HealthcheckEvents { - c.newContainerHealthCheckEvent(logStatus) + c.newContainerHealthCheckEvent(healthCheckResult) } - return hcResult, logStatus, hcErr + return hcResult, healthCheckResult.Status, hcErr } func (c *Container) processHealthCheckStatus(status string) error { @@ -340,16 +333,12 @@ func newHealthCheckLog(start, end time.Time, exitCode int, log string) define.He // updateHealthStatus updates the health status of the container // in the healthcheck log func (c *Container) updateHealthStatus(status string) error { - healthCheck, err := c.getHealthCheckLog() + healthCheck, err := c.readHealthCheckLog() if err != nil { return err } healthCheck.Status = status - newResults, err := json.Marshal(healthCheck) - if err != nil { - return fmt.Errorf("unable to marshall healthchecks for writing status: %w", err) - } - return os.WriteFile(c.healthCheckLogPath(), newResults, 0700) + return c.writeHealthCheckLog(healthCheck) } // isUnhealthy returns true if the current health check status is unhealthy. @@ -357,7 +346,7 @@ func (c *Container) isUnhealthy() (bool, error) { if !c.HasHealthCheck() { return false, nil } - healthCheck, err := c.getHealthCheckLog() + healthCheck, err := c.readHealthCheckLog() if err != nil { return false, err } @@ -365,7 +354,7 @@ func (c *Container) isUnhealthy() (bool, error) { } // UpdateHealthCheckLog parses the health check results and writes the log -func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPeriod, isStartup bool) (string, error) { +func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPeriod, isStartup bool) (define.HealthCheckResults, error) { c.lock.Lock() defer c.lock.Unlock() @@ -373,12 +362,12 @@ func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPerio // both failing and succeeding cases to match kube behavior. // So don't update the health check log till the start period is over if _, ok := c.config.Spec.Annotations[define.KubeHealthCheckAnnotation]; ok && inStartPeriod && !isStartup { - return "", nil + return define.HealthCheckResults{}, nil } - healthCheck, err := c.getHealthCheckLog() + healthCheck, err := c.readHealthCheckLog() if err != nil { - return "", err + return define.HealthCheckResults{}, err } if hcl.ExitCode == 0 { // set status to healthy, reset failing state to 0 @@ -398,28 +387,48 @@ func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPerio } } healthCheck.Log = append(healthCheck.Log, hcl) - if len(healthCheck.Log) > MaxHealthCheckNumberLogs { + if c.config.HealthMaxLogCount != 0 && len(healthCheck.Log) > int(c.config.HealthMaxLogCount) { healthCheck.Log = healthCheck.Log[1:] } - newResults, err := json.Marshal(healthCheck) + return healthCheck, c.writeHealthCheckLog(healthCheck) +} + +func (c *Container) witeToFileHealthCheckResults(path string, result define.HealthCheckResults) error { + newResults, err := json.Marshal(result) if err != nil { - return "", fmt.Errorf("unable to marshall healthchecks for writing: %w", err) + return fmt.Errorf("unable to marshall healthchecks for writing: %w", err) + } + return os.WriteFile(path, newResults, 0700) +} + +func (c *Container) getHealthCheckLogDestination() string { + var destination string + switch c.config.HealthLogDestination { + case define.DefaultHealthCheckLocalDestination, define.HealthCheckEventsLoggerDestination, "": + destination = filepath.Join(filepath.Dir(c.state.RunDir), "healthcheck.log") + default: + destination = filepath.Join(c.config.HealthLogDestination, c.ID()+"-healthcheck.log") } - return healthCheck.Status, os.WriteFile(c.healthCheckLogPath(), newResults, 0700) + return destination +} + +func (c *Container) writeHealthCheckLog(result define.HealthCheckResults) error { + return c.witeToFileHealthCheckResults(c.getHealthCheckLogDestination(), result) } -// HealthCheckLogPath returns the path for where the health check log is -func (c *Container) healthCheckLogPath() string { - return filepath.Join(filepath.Dir(c.state.RunDir), "healthcheck.log") +// readHealthCheckLog read HealthCheck logs from the path or events_logger +// The caller should lock the container before this function is called. +func (c *Container) readHealthCheckLog() (define.HealthCheckResults, error) { + return c.readFromFileHealthCheckLog(c.getHealthCheckLogDestination()) } -// getHealthCheckLog returns HealthCheck results by reading the container's +// readFromFileHealthCheckLog returns HealthCheck results by reading the container's // health check log file. If the health check log file does not exist, then // an empty healthcheck struct is returned // The caller should lock the container before this function is called. -func (c *Container) getHealthCheckLog() (define.HealthCheckResults, error) { +func (c *Container) readFromFileHealthCheckLog(path string) (define.HealthCheckResults, error) { var healthCheck define.HealthCheckResults - b, err := os.ReadFile(c.healthCheckLogPath()) + b, err := os.ReadFile(path) if err != nil { if errors.Is(err, fs.ErrNotExist) { // If the file does not exists just return empty healthcheck and no error. @@ -428,7 +437,7 @@ func (c *Container) getHealthCheckLog() (define.HealthCheckResults, error) { return healthCheck, fmt.Errorf("failed to read health check log file: %w", err) } if err := json.Unmarshal(b, &healthCheck); err != nil { - return healthCheck, fmt.Errorf("failed to unmarshal existing healthcheck results in %s: %w", c.healthCheckLogPath(), err) + return healthCheck, fmt.Errorf("failed to unmarshal existing healthcheck results in %s: %w", path, err) } return healthCheck, nil } @@ -454,7 +463,7 @@ func (c *Container) healthCheckStatus() (string, error) { return "", err } - results, err := c.getHealthCheckLog() + results, err := c.readHealthCheckLog() if err != nil { return "", fmt.Errorf("unable to get healthcheck log for %s: %w", c.ID(), err) } diff --git a/libpod/options.go b/libpod/options.go index 9f30f2f3250a..ca9f99981003 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -6,6 +6,8 @@ import ( "errors" "fmt" "net" + "os" + "path/filepath" "strings" "syscall" "time" @@ -1500,6 +1502,57 @@ func WithHealthCheck(healthCheck *manifest.Schema2HealthConfig) CtrCreateOption } } +// WithHealthCheckLogDestination adds the healthLogDestination to the container config +func WithHealthCheckLogDestination(destination string) CtrCreateOption { + return func(ctr *Container) error { + if ctr.valid { + return define.ErrCtrFinalized + } + switch destination { + case define.HealthCheckEventsLoggerDestination, define.DefaultHealthCheckLocalDestination: + ctr.config.HealthLogDestination = destination + default: + fileInfo, err := os.Stat(destination) + if err != nil { + return fmt.Errorf("HealthCheck Log '%s' destination error: %w", destination, err) + } + mode := fileInfo.Mode() + if !mode.IsDir() { + return fmt.Errorf("HealthCheck Log '%s' destination must be directory", destination) + } + + absPath, err := filepath.Abs(destination) + if err != nil { + return err + } + ctr.config.HealthLogDestination = absPath + } + return nil + } +} + +// WithHealthCheckMaxLogCount adds the healthMaxLogCount to the container config +func WithHealthCheckMaxLogCount(maxLogCount uint) CtrCreateOption { + return func(ctr *Container) error { + if ctr.valid { + return define.ErrCtrFinalized + } + ctr.config.HealthMaxLogCount = maxLogCount + return nil + } +} + +// WithHealthCheckMaxLogSize adds the healthMaxLogSize to the container config +func WithHealthCheckMaxLogSize(maxLogSize uint) CtrCreateOption { + return func(ctr *Container) error { + if ctr.valid { + return define.ErrCtrFinalized + } + ctr.config.HealthMaxLogSize = maxLogSize + return nil + } +} + // WithHealthCheckOnFailureAction adds an on-failure action to health-check config func WithHealthCheckOnFailureAction(action define.HealthCheckOnFailureAction) CtrCreateOption { return func(ctr *Container) error { diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index a3f26cc527f8..3551613a1f0e 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -576,7 +576,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai } if ctr.runtime.config.Engine.EventsContainerCreateInspectData { - if err := ctr.newContainerEventWithInspectData(events.Create, "", true); err != nil { + if err := ctr.newContainerEventWithInspectData(events.Create, define.HealthCheckResults{}, true); err != nil { return nil, err } } else { diff --git a/pkg/api/handlers/compat/containers_create.go b/pkg/api/handlers/compat/containers_create.go index 404c117bb66a..14b37804f31e 100644 --- a/pkg/api/handlers/compat/containers_create.go +++ b/pkg/api/handlers/compat/containers_create.go @@ -423,60 +423,63 @@ func cliOpts(cc handlers.CreateContainerConfig, rtc *config.Config) (*entities.C CPUSetMems: cc.HostConfig.CpusetMems, // Detach: false, // don't need // DetachKeys: "", // don't need - Devices: devices, - DeviceCgroupRule: cc.HostConfig.DeviceCgroupRules, - DeviceReadBPs: readBps, - DeviceReadIOPs: readIops, - DeviceWriteBPs: writeBps, - DeviceWriteIOPs: writeIops, - Entrypoint: entrypoint, - Env: cc.Config.Env, - Expose: expose, - GroupAdd: cc.HostConfig.GroupAdd, - Hostname: cc.Config.Hostname, - ImageVolume: "anonymous", - Init: init, - Interactive: cc.Config.OpenStdin, - IPC: string(cc.HostConfig.IpcMode), - Label: stringMaptoArray(cc.Config.Labels), - LogDriver: cc.HostConfig.LogConfig.Type, - LogOptions: stringMaptoArray(cc.HostConfig.LogConfig.Config), - Name: cc.Name, - OOMScoreAdj: &cc.HostConfig.OomScoreAdj, - Arch: "", - OS: "", - Variant: "", - PID: string(cc.HostConfig.PidMode), - PIDsLimit: cc.HostConfig.PidsLimit, - Privileged: cc.HostConfig.Privileged, - PublishAll: cc.HostConfig.PublishAllPorts, - Quiet: false, - ReadOnly: cc.HostConfig.ReadonlyRootfs, - ReadWriteTmpFS: true, // podman default - Rm: cc.HostConfig.AutoRemove, - Annotation: stringMaptoArray(cc.HostConfig.Annotations), - SecurityOpt: cc.HostConfig.SecurityOpt, - StopSignal: cc.Config.StopSignal, - StopTimeout: rtc.Engine.StopTimeout, // podman default - StorageOpts: stringMaptoArray(cc.HostConfig.StorageOpt), - Sysctl: stringMaptoArray(cc.HostConfig.Sysctls), - Systemd: "true", // podman default - TmpFS: parsedTmp, - TTY: cc.Config.Tty, - EnvMerge: cc.EnvMerge, - UnsetEnv: cc.UnsetEnv, - UnsetEnvAll: cc.UnsetEnvAll, - User: cc.Config.User, - UserNS: string(cc.HostConfig.UsernsMode), - UTS: string(cc.HostConfig.UTSMode), - Mount: mounts, - VolumesFrom: cc.HostConfig.VolumesFrom, - Workdir: cc.Config.WorkingDir, - Net: &netInfo, - HealthInterval: define.DefaultHealthCheckInterval, - HealthRetries: define.DefaultHealthCheckRetries, - HealthTimeout: define.DefaultHealthCheckTimeout, - HealthStartPeriod: define.DefaultHealthCheckStartPeriod, + Devices: devices, + DeviceCgroupRule: cc.HostConfig.DeviceCgroupRules, + DeviceReadBPs: readBps, + DeviceReadIOPs: readIops, + DeviceWriteBPs: writeBps, + DeviceWriteIOPs: writeIops, + Entrypoint: entrypoint, + Env: cc.Config.Env, + Expose: expose, + GroupAdd: cc.HostConfig.GroupAdd, + Hostname: cc.Config.Hostname, + ImageVolume: "anonymous", + Init: init, + Interactive: cc.Config.OpenStdin, + IPC: string(cc.HostConfig.IpcMode), + Label: stringMaptoArray(cc.Config.Labels), + LogDriver: cc.HostConfig.LogConfig.Type, + LogOptions: stringMaptoArray(cc.HostConfig.LogConfig.Config), + Name: cc.Name, + OOMScoreAdj: &cc.HostConfig.OomScoreAdj, + Arch: "", + OS: "", + Variant: "", + PID: string(cc.HostConfig.PidMode), + PIDsLimit: cc.HostConfig.PidsLimit, + Privileged: cc.HostConfig.Privileged, + PublishAll: cc.HostConfig.PublishAllPorts, + Quiet: false, + ReadOnly: cc.HostConfig.ReadonlyRootfs, + ReadWriteTmpFS: true, // podman default + Rm: cc.HostConfig.AutoRemove, + Annotation: stringMaptoArray(cc.HostConfig.Annotations), + SecurityOpt: cc.HostConfig.SecurityOpt, + StopSignal: cc.Config.StopSignal, + StopTimeout: rtc.Engine.StopTimeout, // podman default + StorageOpts: stringMaptoArray(cc.HostConfig.StorageOpt), + Sysctl: stringMaptoArray(cc.HostConfig.Sysctls), + Systemd: "true", // podman default + TmpFS: parsedTmp, + TTY: cc.Config.Tty, + EnvMerge: cc.EnvMerge, + UnsetEnv: cc.UnsetEnv, + UnsetEnvAll: cc.UnsetEnvAll, + User: cc.Config.User, + UserNS: string(cc.HostConfig.UsernsMode), + UTS: string(cc.HostConfig.UTSMode), + Mount: mounts, + VolumesFrom: cc.HostConfig.VolumesFrom, + Workdir: cc.Config.WorkingDir, + Net: &netInfo, + HealthInterval: define.DefaultHealthCheckInterval, + HealthRetries: define.DefaultHealthCheckRetries, + HealthTimeout: define.DefaultHealthCheckTimeout, + HealthStartPeriod: define.DefaultHealthCheckStartPeriod, + HealthLogDestination: define.DefaultHealthCheckLocalDestination, + HealthMaxLogCount: define.DefaultHealthMaxLogCount, + HealthMaxLogSize: define.DefaultHealthMaxLogSize, } if !rootless.IsRootless() { var ulimits []string diff --git a/pkg/domain/entities/pods.go b/pkg/domain/entities/pods.go index 63b88e3355ad..96c4a3bf665b 100644 --- a/pkg/domain/entities/pods.go +++ b/pkg/domain/entities/pods.go @@ -134,135 +134,138 @@ const ( ) type ContainerCreateOptions struct { - Annotation []string - Attach []string - Authfile string - BlkIOWeight string - BlkIOWeightDevice []string - CapAdd []string - CapDrop []string - CgroupNS string - CgroupsMode string - CgroupParent string `json:"cgroup_parent,omitempty"` - CIDFile string - ConmonPIDFile string `json:"container_conmon_pidfile,omitempty"` - CPUPeriod uint64 - CPUQuota int64 - CPURTPeriod uint64 - CPURTRuntime int64 - CPUShares uint64 - CPUS float64 `json:"cpus,omitempty"` - CPUSetCPUs string `json:"cpuset_cpus,omitempty"` - CPUSetMems string - Devices []string `json:"devices,omitempty"` - DeviceCgroupRule []string - DeviceReadBPs []string `json:"device_read_bps,omitempty"` - DeviceReadIOPs []string - DeviceWriteBPs []string - DeviceWriteIOPs []string - Entrypoint *string `json:"container_command,omitempty"` - Env []string - EnvHost bool - EnvFile []string - Expose []string - GIDMap []string - GPUs []string - GroupAdd []string - HealthCmd string - HealthInterval string - HealthRetries uint - HealthStartPeriod string - HealthTimeout string - HealthOnFailure string - Hostname string `json:"hostname,omitempty"` - HTTPProxy bool - HostUsers []string - ImageVolume string - Init bool - InitContainerType string - InitPath string - IntelRdtClosID string - Interactive bool - IPC string - Label []string - LabelFile []string - LogDriver string - LogOptions []string - Memory string - MemoryReservation string - MemorySwap string - MemorySwappiness int64 - Name string `json:"container_name"` - NoHealthCheck bool - OOMKillDisable bool - OOMScoreAdj *int - Arch string - OS string - Variant string - PID string `json:"pid,omitempty"` - PIDsLimit *int64 - Platform string - Pod string - PodIDFile string - Personality string - PreserveFDs uint - PreserveFD []uint - Privileged bool - PublishAll bool - Pull string - Quiet bool - ReadOnly bool - ReadWriteTmpFS bool - Restart string - Replace bool - Requires []string - Retry *uint `json:"retry,omitempty"` - RetryDelay string `json:"retry_delay,omitempty"` - Rm bool - RootFS bool - Secrets []string - SecurityOpt []string `json:"security_opt,omitempty"` - SdNotifyMode string - ShmSize string - ShmSizeSystemd string - SignaturePolicy string - StartupHCCmd string - StartupHCInterval string - StartupHCRetries uint - StartupHCSuccesses uint - StartupHCTimeout string - StopSignal string - StopTimeout uint - StorageOpts []string - SubGIDName string - SubUIDName string - Sysctl []string `json:"sysctl,omitempty"` - Systemd string - Timeout uint - TLSVerify commonFlag.OptionalBool - TmpFS []string - TTY bool - Timezone string - Umask string - EnvMerge []string - UnsetEnv []string - UnsetEnvAll bool - UIDMap []string - Ulimit []string - User string - UserNS string `json:"-"` - UTS string - Mount []string - Volume []string `json:"volume,omitempty"` - VolumesFrom []string `json:"volumes_from,omitempty"` - Workdir string - SeccompPolicy string - PidFile string - ChrootDirs []string - IsInfra bool - IsClone bool - DecryptionKeys []string - Net *NetOptions `json:"net,omitempty"` + Annotation []string + Attach []string + Authfile string + BlkIOWeight string + BlkIOWeightDevice []string + CapAdd []string + CapDrop []string + CgroupNS string + CgroupsMode string + CgroupParent string `json:"cgroup_parent,omitempty"` + CIDFile string + ConmonPIDFile string `json:"container_conmon_pidfile,omitempty"` + CPUPeriod uint64 + CPUQuota int64 + CPURTPeriod uint64 + CPURTRuntime int64 + CPUShares uint64 + CPUS float64 `json:"cpus,omitempty"` + CPUSetCPUs string `json:"cpuset_cpus,omitempty"` + CPUSetMems string + Devices []string `json:"devices,omitempty"` + DeviceCgroupRule []string + DeviceReadBPs []string `json:"device_read_bps,omitempty"` + DeviceReadIOPs []string + DeviceWriteBPs []string + DeviceWriteIOPs []string + Entrypoint *string `json:"container_command,omitempty"` + Env []string + EnvHost bool + EnvFile []string + Expose []string + GIDMap []string + GPUs []string + GroupAdd []string + HealthCmd string + HealthInterval string + HealthRetries uint + HealthLogDestination string + HealthMaxLogCount uint + HealthMaxLogSize uint + HealthStartPeriod string + HealthTimeout string + HealthOnFailure string + Hostname string `json:"hostname,omitempty"` + HTTPProxy bool + HostUsers []string + ImageVolume string + Init bool + InitContainerType string + InitPath string + IntelRdtClosID string + Interactive bool + IPC string + Label []string + LabelFile []string + LogDriver string + LogOptions []string + Memory string + MemoryReservation string + MemorySwap string + MemorySwappiness int64 + Name string `json:"container_name"` + NoHealthCheck bool + OOMKillDisable bool + OOMScoreAdj *int + Arch string + OS string + Variant string + PID string `json:"pid,omitempty"` + PIDsLimit *int64 + Platform string + Pod string + PodIDFile string + Personality string + PreserveFDs uint + PreserveFD []uint + Privileged bool + PublishAll bool + Pull string + Quiet bool + ReadOnly bool + ReadWriteTmpFS bool + Restart string + Replace bool + Requires []string + Retry *uint `json:"retry,omitempty"` + RetryDelay string `json:"retry_delay,omitempty"` + Rm bool + RootFS bool + Secrets []string + SecurityOpt []string `json:"security_opt,omitempty"` + SdNotifyMode string + ShmSize string + ShmSizeSystemd string + SignaturePolicy string + StartupHCCmd string + StartupHCInterval string + StartupHCRetries uint + StartupHCSuccesses uint + StartupHCTimeout string + StopSignal string + StopTimeout uint + StorageOpts []string + SubGIDName string + SubUIDName string + Sysctl []string `json:"sysctl,omitempty"` + Systemd string + Timeout uint + TLSVerify commonFlag.OptionalBool + TmpFS []string + TTY bool + Timezone string + Umask string + EnvMerge []string + UnsetEnv []string + UnsetEnvAll bool + UIDMap []string + Ulimit []string + User string + UserNS string `json:"-"` + UTS string + Mount []string + Volume []string `json:"volume,omitempty"` + VolumesFrom []string `json:"volumes_from,omitempty"` + Workdir string + SeccompPolicy string + PidFile string + ChrootDirs []string + IsInfra bool + IsClone bool + DecryptionKeys []string + Net *NetOptions `json:"net,omitempty"` CgroupConf []string diff --git a/pkg/specgen/generate/container_create.go b/pkg/specgen/generate/container_create.go index 8d4029114bc1..6d0db35ea611 100644 --- a/pkg/specgen/generate/container_create.go +++ b/pkg/specgen/generate/container_create.go @@ -642,6 +642,12 @@ func createContainerOptions(rt *libpod.Runtime, s *specgen.SpecGenerator, pod *l options = append(options, libpod.WithHealthCheckOnFailureAction(s.ContainerHealthCheckConfig.HealthCheckOnFailureAction)) } + if healthCheckSet { + options = append(options, libpod.WithHealthCheckLogDestination(s.ContainerHealthCheckConfig.HealthLogDestination)) + options = append(options, libpod.WithHealthCheckMaxLogCount(s.ContainerHealthCheckConfig.HealthMaxLogCount)) + options = append(options, libpod.WithHealthCheckMaxLogSize(s.ContainerHealthCheckConfig.HealthMaxLogSize)) + } + if s.SdNotifyMode == define.SdNotifyModeHealthy && !healthCheckSet { return nil, fmt.Errorf("%w: sdnotify policy %q requires a healthcheck to be set", define.ErrInvalidArg, s.SdNotifyMode) } diff --git a/pkg/specgen/specgen.go b/pkg/specgen/specgen.go index ab4aeb7aec99..6b9108ee2460 100644 --- a/pkg/specgen/specgen.go +++ b/pkg/specgen/specgen.go @@ -599,6 +599,14 @@ type ContainerHealthCheckConfig struct { // Requires that HealthConfig be set. // Optional. StartupHealthConfig *define.StartupHealthCheck `json:"startupHealthConfig,omitempty"` + // HealthLogDestination defines the destination where the log is stored + HealthLogDestination string `json:"HealthLogDestination"` + // HealthMaxLogCount is maximum number of attempts in the HealthCheck log file. + // ('0' value means an infinite number of attempts in the log file) + HealthMaxLogCount uint `json:"healthMaxLogCount,omitempty"` + // HealthMaxLogSize is the maximum length in characters of stored HealthCheck log + // ("0" value means an infinite log length) + HealthMaxLogSize uint `json:"healthMaxLogSize,omitempty"` } // SpecGenerator creates an OCI spec and Libpod configuration options to create diff --git a/pkg/specgenutil/specgen.go b/pkg/specgenutil/specgen.go index 6cb1f154d5a9..0bc9b419a719 100644 --- a/pkg/specgenutil/specgen.go +++ b/pkg/specgenutil/specgen.go @@ -370,6 +370,12 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions } s.HealthCheckOnFailureAction = onFailureAction + s.HealthLogDestination = c.HealthLogDestination + + s.HealthMaxLogCount = c.HealthMaxLogCount + + s.HealthMaxLogSize = c.HealthMaxLogSize + if c.StartupHCCmd != "" { if c.NoHealthCheck { return errors.New("cannot specify both --no-healthcheck and --health-startup-cmd") diff --git a/pkg/systemd/quadlet/quadlet.go b/pkg/systemd/quadlet/quadlet.go index 092256ec3579..ca3773da7086 100644 --- a/pkg/systemd/quadlet/quadlet.go +++ b/pkg/systemd/quadlet/quadlet.go @@ -93,6 +93,9 @@ const ( KeyGroupAdd = "GroupAdd" KeyHealthCmd = "HealthCmd" KeyHealthInterval = "HealthInterval" + KeyHealthLogDestination = "HealthLogDestination" + KeyHealthMaxLogCount = "HealthMaxLogCount" + KeyHealthMaxLogSize = "HealthMaxLogSize" KeyHealthOnFailure = "HealthOnFailure" KeyHealthRetries = "HealthRetries" KeyHealthStartPeriod = "HealthStartPeriod" @@ -214,6 +217,9 @@ var ( KeyHealthCmd: true, KeyHealthInterval: true, KeyHealthOnFailure: true, + KeyHealthLogDestination: true, + KeyHealthMaxLogCount: true, + KeyHealthMaxLogSize: true, KeyHealthRetries: true, KeyHealthStartPeriod: true, KeyHealthStartupCmd: true, @@ -2065,6 +2071,9 @@ func handleHealth(unitFile *parser.UnitFile, groupName string, podman *PodmanCmd {KeyHealthCmd, "cmd"}, {KeyHealthInterval, "interval"}, {KeyHealthOnFailure, "on-failure"}, + {KeyHealthLogDestination, "log-destination"}, + {KeyHealthMaxLogCount, "max-log-count"}, + {KeyHealthMaxLogSize, "max-log-size"}, {KeyHealthRetries, "retries"}, {KeyHealthStartPeriod, "start-period"}, {KeyHealthTimeout, "timeout"}, diff --git a/test/e2e/healthcheck_run_test.go b/test/e2e/healthcheck_run_test.go index b65419569fcf..e5eae2c9801f 100644 --- a/test/e2e/healthcheck_run_test.go +++ b/test/e2e/healthcheck_run_test.go @@ -7,10 +7,12 @@ import ( "os" "path/filepath" "strconv" + "strings" "time" "github.com/containers/podman/v5/libpod/define" . "github.com/containers/podman/v5/test/utils" + jsoniter "github.com/json-iterator/go" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) @@ -390,4 +392,197 @@ HEALTHCHECK CMD ls -l / 2>&1`, ALPINE) Expect(ps.OutputToStringArray()).To(HaveLen(2)) Expect(ps.OutputToString()).To(ContainSubstring("hc")) }) + + It("Healthcheck with max default value (5) of last execution log", func() { + countOfExecutions := 10 + ctrName := "hc" + ctrRun := podmanTest.Podman([]string{"run", "-dt", "--name", ctrName, "--health-cmd", "echo hello", ALPINE, "top"}) + ctrRun.WaitWithDefaultTimeout() + Expect(ctrRun).Should(ExitCleanly()) + + for i := 0; i < countOfExecutions; i++ { + hc := podmanTest.Podman([]string{"healthcheck", "run", ctrName}) + hc.WaitWithDefaultTimeout() + Expect(hc).Should(ExitCleanly()) + } + + inspect := podmanTest.InspectContainer(ctrName) + Expect(inspect[0].State.Health.Log).To(HaveLen(5)) + }) + + It("Healthcheck with max infinite value (0) of last execution log", func() { + countOfExecutions := 12 + ctrName := "hc" + ctrRun := podmanTest.Podman([]string{"run", "-dt", "--name", ctrName, "--health-cmd", "echo hello", "--health-max-log-count", "0", ALPINE, "top"}) + ctrRun.WaitWithDefaultTimeout() + Expect(ctrRun).Should(ExitCleanly()) + + for i := 0; i < countOfExecutions; i++ { + hc := podmanTest.Podman([]string{"healthcheck", "run", ctrName}) + hc.WaitWithDefaultTimeout() + Expect(hc).Should(ExitCleanly()) + } + + inspect := podmanTest.InspectContainer(ctrName) + Expect(inspect[0].State.Health.Log).To(HaveLen(countOfExecutions)) + }) + + It("Healthcheck with max 10 last execution log", func() { + countOfExecutions := 10 + ctrName := "hc" + ctrRun := podmanTest.Podman([]string{"run", "-dt", "--name", ctrName, "--health-cmd", "echo hello", "--health-max-log-count", strconv.Itoa(countOfExecutions), ALPINE, "top"}) + ctrRun.WaitWithDefaultTimeout() + Expect(ctrRun).Should(ExitCleanly()) + + for i := 0; i < countOfExecutions; i++ { + hc := podmanTest.Podman([]string{"healthcheck", "run", ctrName}) + hc.WaitWithDefaultTimeout() + Expect(hc).Should(ExitCleanly()) + } + + inspect := podmanTest.InspectContainer(ctrName) + Expect(inspect[0].State.Health.Log).To(HaveLen(countOfExecutions)) + }) + + It("Healthcheck with max 10 char len of log", func() { + longMsg := strings.Repeat("Hello word from healthcheck. Loooooong msg.", 3) + ctrName := "hc" + ctrRun := podmanTest.Podman([]string{"run", "-dt", "--name", ctrName, "--health-cmd", "echo '" + longMsg + "'", "--health-max-log-size", "10", ALPINE, "top"}) + ctrRun.WaitWithDefaultTimeout() + Expect(ctrRun).Should(ExitCleanly()) + + hc := podmanTest.Podman([]string{"healthcheck", "run", ctrName}) + hc.WaitWithDefaultTimeout() + Expect(hc).Should(ExitCleanly()) + + inspect := podmanTest.InspectContainer(ctrName) + Expect(inspect[0].State.Health.Log).To(HaveLen(1)) + log := inspect[0].State.Health.Log[0] + Expect(log.Output).To(HaveLen(10)) + }) + + It("Healthcheck with max infinite value (0) char len of log", func() { + longMsg := strings.Repeat("Hello word from healthcheck. Loooooong msg.", 500) + ctrName := "hc" + ctrRun := podmanTest.Podman([]string{"run", "-dt", "--name", ctrName, "--health-cmd", "echo '" + longMsg + "'", "--health-max-log-size", "0", ALPINE, "top"}) + ctrRun.WaitWithDefaultTimeout() + Expect(ctrRun).Should(ExitCleanly()) + + hc := podmanTest.Podman([]string{"healthcheck", "run", ctrName}) + hc.WaitWithDefaultTimeout() + Expect(hc).Should(ExitCleanly()) + + inspect := podmanTest.InspectContainer(ctrName) + Expect(inspect[0].State.Health.Log).To(HaveLen(1)) + log := inspect[0].State.Health.Log[0] + Expect(log.Output).To(HaveLen(len(longMsg + "\n"))) + }) + + It("Healthcheck with max default value (500) char len of log", func() { + longMsg := strings.Repeat("Hello word from healthcheck. Loooooong msg.", 50) + ctrName := "hc" + ctrRun := podmanTest.Podman([]string{"run", "-dt", "--name", ctrName, "--health-cmd", "echo '" + longMsg + "'", ALPINE, "top"}) + ctrRun.WaitWithDefaultTimeout() + Expect(ctrRun).Should(ExitCleanly()) + + hc := podmanTest.Podman([]string{"healthcheck", "run", ctrName}) + hc.WaitWithDefaultTimeout() + Expect(hc).Should(ExitCleanly()) + + inspect := podmanTest.InspectContainer(ctrName) + Expect(inspect[0].State.Health.Log).To(HaveLen(1)) + log := inspect[0].State.Health.Log[0] + Expect(log.Output).To(HaveLen(500)) + }) + + It("Healthcheck with specific log directory destination", func() { + logPath, err := os.MkdirTemp(os.TempDir(), "healthcheckLog") + Expect(err).ShouldNot(HaveOccurred()) + msg := "Hello world" + ctrName := "hc" + ctrRun := podmanTest.Podman([]string{"run", "-dt", "--name", ctrName, "--health-cmd", "echo '" + msg + "'", "--health-log-destination", logPath, ALPINE, "top"}) + ctrRun.WaitWithDefaultTimeout() + Expect(ctrRun).Should(ExitCleanly()) + + hc := podmanTest.Podman([]string{"healthcheck", "run", ctrName}) + hc.WaitWithDefaultTimeout() + Expect(hc).Should(ExitCleanly()) + + inspect := podmanTest.InspectContainer(ctrName) + Expect(inspect[0].State.Health).To(HaveField("Status", define.HealthCheckHealthy)) + Expect(inspect[0].State.Health.Log).To(HaveLen(1)) + Expect(inspect[0].State.Health.Log[0]).To(HaveField("Output", msg+"\n")) + + b, err := os.ReadFile(filepath.Join(logPath, inspect[0].ID+"-healthcheck.log")) + Expect(err).ShouldNot(HaveOccurred()) + + var healthCheckResults define.HealthCheckResults + err = jsoniter.Unmarshal(b, &healthCheckResults) + Expect(err).ShouldNot(HaveOccurred()) + Expect(healthCheckResults).To(HaveField("Status", define.HealthCheckHealthy)) + Expect(healthCheckResults.Log).To(HaveLen(1)) + Expect(healthCheckResults.Log[0]).To(HaveField("Output", msg+"\n")) + }) + + It("Healthcheck with specific log directory destination but log file has been removed", func() { + logPath, err := os.MkdirTemp(os.TempDir(), "healthcheckLog") + Expect(err).ShouldNot(HaveOccurred()) + msg := "Remove log file lol." + ctrName := "hc" + ctrRun := podmanTest.Podman([]string{"run", "-dt", "--name", ctrName, "--health-cmd", "echo '" + msg + "'", "--health-log-destination", logPath, ALPINE, "top"}) + ctrRun.WaitWithDefaultTimeout() + Expect(ctrRun).Should(ExitCleanly()) + + hc := podmanTest.Podman([]string{"healthcheck", "run", ctrName}) + hc.WaitWithDefaultTimeout() + Expect(hc).Should(ExitCleanly()) + + inspect := podmanTest.InspectContainer(ctrName) + Expect(inspect[0].State.Health).To(HaveField("Status", define.HealthCheckHealthy)) + Expect(inspect[0].State.Health.Log).To(HaveLen(1)) + Expect(inspect[0].State.Health.Log[0]).To(HaveField("Output", msg+"\n")) + + logFilePath := filepath.Join(logPath, inspect[0].ID+"-healthcheck.log") + + err = os.Remove(logFilePath) + Expect(err).ShouldNot(HaveOccurred()) + + hc = podmanTest.Podman([]string{"healthcheck", "run", ctrName}) + hc.WaitWithDefaultTimeout() + Expect(hc).Should(ExitCleanly()) + + b, err := os.ReadFile(logFilePath) + Expect(err).ShouldNot(HaveOccurred()) + + var healthCheckResults define.HealthCheckResults + err = jsoniter.Unmarshal(b, &healthCheckResults) + Expect(err).ShouldNot(HaveOccurred()) + Expect(healthCheckResults).To(HaveField("Status", define.HealthCheckHealthy)) + Expect(healthCheckResults.Log).To(HaveLen(1)) + Expect(healthCheckResults.Log[0]).To(HaveField("Output", msg+"\n")) + + err = os.Remove(logFilePath) + Expect(err).ShouldNot(HaveOccurred()) + + inspect = podmanTest.InspectContainer(ctrName) + Expect(inspect[0].State.Health).To(HaveField("Status", "")) + }) + + It("Healthcheck with log event_logger destination", func() { + SkipIfJournaldUnavailable() + ctrName := "hcCtr" + msg := "Bye World" + ctrRun := podmanTest.Podman([]string{"run", "-dt", "--name", ctrName, "--health-cmd", "echo '" + msg + "'", "--health-log-destination", "events_logger", ALPINE, "top"}) + ctrRun.WaitWithDefaultTimeout() + Expect(ctrRun).Should(ExitCleanly()) + + hc := podmanTest.Podman([]string{"healthcheck", "run", ctrName}) + hc.WaitWithDefaultTimeout() + Expect(hc).Should(ExitCleanly()) + + inspect := podmanTest.InspectContainer(ctrName) + Expect(inspect[0].State.Health).To(HaveField("Status", define.HealthCheckHealthy)) + Expect(inspect[0].State.Health.Log).To(HaveLen(1)) + Expect(inspect[0].State.Health.Log[0]).To(HaveField("Output", msg+"\n")) + }) }) diff --git a/test/system/220-healthcheck.bats b/test/system/220-healthcheck.bats index 1cf1f9c0b325..32327f76294a 100644 --- a/test/system/220-healthcheck.bats +++ b/test/system/220-healthcheck.bats @@ -273,4 +273,215 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\\\n\" done } +@test "podman healthcheck --health-max-log-count default value (5)" { + local repeat_count=10 + local msg="Hello, How are you?" + local ctrname="c-h-$(safename)" + run_podman run -d --name $ctrname \ + --health-cmd "echo $msg" \ + $IMAGE /home/podman/pause + cid="$output" + + run_podman inspect $ctrname --format "{{.Config.HealthMaxLogCount}}" + is "$output" "5" "HealthMaxLogCount is set to 5" + + for _ in $(seq 1 $repeat_count); + do + run_podman healthcheck run $ctrname + is "$output" "" "output from 'podman healthcheck run'" + done + + + run_podman inspect $ctrname --format "{{.State.Health.Log}}" + count=$(grep -o "$msg" <<< "$output" | wc -l) + assert "$count" -eq 5 + + run_podman rm -t 0 -f $ctrname +} + +@test "podman healthcheck --health-max-log-count infinite value (0)" { + local repeat_count=10 + local msg="Hello, How are you?" + local ctrname="c-h-$(safename)" + run_podman run -d --name $ctrname \ + --health-cmd "echo $msg" \ + --health-max-log-count 0 \ + $IMAGE /home/podman/pause + cid="$output" + + run_podman inspect $ctrname --format "{{.Config.HealthMaxLogCount}}" + is "$output" "0" "HealthMaxLogCount is set to 0" + + # This is run 11 times to check that the cap is working. + for _ in $(seq 0 $repeat_count); + do + run_podman healthcheck run $ctrname + is "$output" "" "output from 'podman healthcheck run'" + done + + # sys podman tests executes healthcheck twice the first time healthcheck is run + run_podman inspect $ctrname --format "{{.State.Health.Log}}" + count=$(grep -o "$msg" <<< "$output" | wc -l) + assert "$count" -ge 11 + + run_podman rm -t 0 -f $ctrname +} + + +@test "podman healthcheck --health-max-log-count 10" { + local repeat_count=10 + local msg="Hello, How are you?" + local ctrname="c-h-$(safename)" + run_podman run -d --name $ctrname \ + --health-cmd "echo $msg" \ + --health-max-log-count $repeat_count\ + $IMAGE /home/podman/pause + cid="$output" + + run_podman inspect $ctrname --format "{{.Config.HealthMaxLogCount}}" + is "$output" "10" "HealthMaxLogCount is set to 10" + + # This is run 11 times to check that the cap is working. + for _ in $(seq 0 $repeat_count); + do + run_podman healthcheck run $ctrname + is "$output" "" "output from 'podman healthcheck run'" + done + + + run_podman inspect $ctrname --format "{{.State.Health.Log}}" + count=$(grep -o "$msg" <<< "$output" | wc -l) + assert "$count" -eq $repeat_count + + run_podman rm -t 0 -f $ctrname +} + +@test "podman healthcheck --health-max-log-size 10" { + local msg="Hello, How are you?" + local ctrname="c-h-$(safename)" + run_podman run -d --name $ctrname \ + --health-cmd "echo $msg" \ + --health-max-log-size 10 \ + $IMAGE /home/podman/pause + cid="$output" + + run_podman inspect $ctrname --format "{{.Config.HealthMaxLogSize}}" + is "$output" "10" "HealthMaxLogSize is set to 10" + + run_podman healthcheck run $ctrname + is "$output" "" "output from 'podman healthcheck run'" + + run_podman inspect $ctrname --format "{{.State.Health.Log}}" + count=$(grep -o "Hello, How}]$" <<< "$output" | wc -l) + assert "$count" -eq 1 + + run_podman rm -t 0 -f $ctrname +} + +@test "podman healthcheck --health-max-log-size infinite value (0)" { + local s=$(printf "%1000s") + local long_msg=${s// /Hello, How are you?} + local ctrname="c-h-$(safename)" + run_podman run -d --name $ctrname \ + --health-cmd "echo $long_msg" \ + --health-max-log-size 0 \ + $IMAGE /home/podman/pause + cid="$output" + + run_podman inspect $ctrname --format "{{.Config.HealthMaxLogSize}}" + is "$output" "0" "HealthMaxLogSize is set to 0" + + run_podman healthcheck run $ctrname + is "$output" "" "output from 'podman healthcheck run'" + + run_podman inspect $ctrname --format "{{.State.Health.Log}}" + # sys podman tests executes healthcheck twice the first time healthcheck is run + count=$(grep -o "$long_msg" <<< "$output" | wc -l) + assert "$count" -ge 1 + + run_podman rm -t 0 -f $ctrname +} + +@test "podman healthcheck --health-max-log-size default value (500)" { + local s=$(printf "%1000s") + local long_msg=${s// /Hello, How are you?} + local ctrname="c-h-$(safename)" + run_podman run -d --name $ctrname \ + --health-cmd "echo $long_msg" \ + $IMAGE /home/podman/pause + cid="$output" + + run_podman inspect $ctrname --format "{{.Config.HealthMaxLogSize}}" + is "$output" "500" "HealthMaxLogSize is set to 500" + + run_podman healthcheck run $ctrname + is "$output" "" "output from 'podman healthcheck run'" + + local expect_msg="${long_msg:0:500}" + run_podman inspect $ctrname --format "{{.State.Health.Log}}" + # sys podman tests executes healthcheck twice the first time healthcheck is run + count=$(grep -o "$expect_msg" <<< "$output" | wc -l) + assert "$count" -ge 1 + + run_podman rm -t 0 -f $ctrname +} + + +@test "podman healthcheck --health-log-destination file" { + local TMP_DIR_HEALTHCHECK="$BATS_FILE_TMPDIR/healthcheck" + mkdir $TMP_DIR_HEALTHCHECK + local ctrname="c-h-$(safename)" + local msg="Hello, How are you?" + run_podman run -d --name $ctrname \ + --health-cmd "echo $msg" \ + --health-log-destination $TMP_DIR_HEALTHCHECK \ + $IMAGE /home/podman/pause + cid="$output" + + run_podman inspect $ctrname --format "{{.Config.HealthLogDestination}}" + is "$output" "$TMP_DIR_HEALTHCHECK" "HealthLogDestination is set to destination" + + run_podman healthcheck run $ctrname + is "$output" "" "output from 'podman healthcheck run'" + + healthcheck_log_path="${TMP_DIR_HEALTHCHECK}/${cid}-healthcheck.log" + run cat $healthcheck_log_path + # sys podman tests executes healthcheck twice the first time healthcheck is run + count=$(grep -o "$msg" <<< "$output" | wc -l) + assert "$count" -ge 1 + + run_podman rm -t 0 -f $ctrname +} + + +@test "podman healthcheck --health-log-destination journal" { + skip_if_remote "We cannot read journalctl over remote." + + # We can't use journald on RHEL as rootless, either: rhbz#1895105 + skip_if_journald_unavailable + + local ctrname="c-h-$(safename)" + local pidfile="${PODMAN_TMPDIR}/$(random_string 20)" + local msg="Hello $(random_string 20), How are you?" + run_podman run -d --name $ctrname \ + --conmon-pidfile $pidfile \ + --health-cmd "echo $msg" \ + --health-log-destination events_logger \ + $IMAGE /home/podman/pause + cid="$output" + + run_podman inspect $ctrname --format "{{.Config.HealthLogDestination}}" + is "$output" "events_logger" "HealthLogDestination is set to journal" + + run_podman healthcheck run $ctrname + is "$output" "" "output from 'podman healthcheck run'" + + run journalctl --output cat --output-fields=PODMAN_HEALTH_LOG PODMAN_ID=$cid + # sys podman tests executes healthcheck twice the first time healthcheck is run + count=$(grep -o "$msg" <<< "$output" | wc -l) + assert "$count" -ge 1 + + run_podman rm -t 0 -f $ctrname +} + # vim: filetype=sh