diff --git a/managed/cmd/pmm-managed/main.go b/managed/cmd/pmm-managed/main.go index 0dbd6d78755..714ed0d0a8a 100644 --- a/managed/cmd/pmm-managed/main.go +++ b/managed/cmd/pmm-managed/main.go @@ -701,6 +701,32 @@ func main() { //nolint:maintidx,cyclop watchtowerHostF := kingpin.Flag("watchtower-host", "Watchtower host").Default("http://watchtower:8080").Envar("PMM_WATCHTOWER_HOST").URL() + // Nomad garbage collection flags + nomadGCIntervalF := kingpin.Flag("nomad-gc-interval", "Interval at which Nomad attempts to garbage collect terminal allocation directories."). + Default("1m"). + Envar("PMM_NOMAD_GC_INTERVAL"). + Duration() + nomadGCDiskUsageThresholdF := kingpin.Flag("nomad-gc-disk-usage-threshold", + "Disk usage percent which Nomad tries to maintain by garbage collecting terminal allocations."). + Default("80"). + Envar("PMM_NOMAD_GC_DISK_USAGE_THRESHOLD"). + Int() + nomadGCInodeUsageThresholdF := kingpin.Flag("nomad-gc-inode-usage-threshold", + "Inode usage percent which Nomad tries to maintain by garbage collecting terminal allocations."). + Default("70"). + Envar("PMM_NOMAD_GC_INODE_USAGE_THRESHOLD"). + Int() + nomadGCMaxAllocsF := kingpin.Flag("nomad-gc-max-allocs", + "Maximum number of allocations which a client will track before triggering a garbage collection of terminal allocations."). + Default("50"). + Envar("PMM_NOMAD_GC_MAX_ALLOCS"). + Int() + nomadGCParallelDestroysF := kingpin.Flag("nomad-gc-parallel-destroys", + "Maximum number of parallel destroys allowed by the garbage collector."). + Default("2"). + Envar("PMM_NOMAD_GC_PARALLEL_DESTROYS"). + Int() + kingpin.Parse() logger.SetupGlobalLogger() @@ -915,7 +941,15 @@ func main() { //nolint:maintidx,cyclop grafanaClient := grafana.NewClient(*grafanaAddrF) prom.MustRegister(grafanaClient) - nomad, err := nomad.New(db) + + nomadClientConfig := &models.NomadClient{ + GCInterval: *nomadGCIntervalF, + GCDiskUsageThreshold: *nomadGCDiskUsageThresholdF, + GCInodeUsageThreshold: *nomadGCInodeUsageThresholdF, + GCMaxAllocs: *nomadGCMaxAllocsF, + GCParallelDestroys: *nomadGCParallelDestroysF, + } + nomad, err := nomad.New(db, nomadClientConfig) if err != nil { l.Fatalf("Could not create Nomad client: %s", err) } diff --git a/managed/models/nomad.go b/managed/models/nomad.go new file mode 100644 index 00000000000..3863e262b7e --- /dev/null +++ b/managed/models/nomad.go @@ -0,0 +1,28 @@ +// Copyright (C) 2023 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +// Package models contains nomad structs and helpers. +package models + +import "time" + +// NomadClient represents configuration options for Nomad clients. +type NomadClient struct { + GCInterval time.Duration + GCDiskUsageThreshold int + GCInodeUsageThreshold int + GCMaxAllocs int + GCParallelDestroys int +} diff --git a/managed/services/agents/deps.go b/managed/services/agents/deps.go index becee5c1286..15d9f63ba83 100644 --- a/managed/services/agents/deps.go +++ b/managed/services/agents/deps.go @@ -23,6 +23,7 @@ import ( agentv1 "github.com/percona/pmm/api/agent/v1" qanv1 "github.com/percona/pmm/api/qan/v1" + "github.com/percona/pmm/managed/models" ) // prometheusService is a subset of methods of victoriametrics.Service used by this package. @@ -69,4 +70,5 @@ type nomad interface { GetCACert() (string, error) GetClientCert() (string, error) GetClientKey() (string, error) + GetClientConfig() models.NomadClient } diff --git a/managed/services/agents/nomad.go b/managed/services/agents/nomad.go index c1c59682aef..94fe603712b 100644 --- a/managed/services/agents/nomad.go +++ b/managed/services/agents/nomad.go @@ -69,6 +69,13 @@ client { "driver.allowlist" = "raw_exec" } + # Garbage collection settings + gc_interval = "{{ .GCInterval }}" + gc_disk_usage_threshold = {{ .GCDiskUsageThreshold }} + gc_inode_usage_threshold = {{ .GCInodeUsageThreshold }} + gc_max_allocs = {{ .GCMaxAllocs }} + gc_parallel_destroys = {{ .GCParallelDestroys }} + # optional labels assigned to Nomad Client, can be the same as PMM Agent's. meta { pmm-agent = "1" @@ -110,7 +117,7 @@ func nomadClientConfig(n nomad, node *models.Node, exporter *models.Agent) (*age tdp := models.TemplateDelimsPair() - config, err := generateNomadAgentConfig(node, exporter, tdp) + config, err := generateNomadAgentConfig(node, exporter, tdp, n.GetClientConfig()) if err != nil { return nil, err } @@ -143,7 +150,7 @@ func nomadClientConfig(n nomad, node *models.Node, exporter *models.Agent) (*age return params, nil } -func generateNomadAgentConfig(node *models.Node, exporter *models.Agent, tdp models.DelimiterPair) (string, error) { +func generateNomadAgentConfig(node *models.Node, exporter *models.Agent, tdp models.DelimiterPair, clientConfig models.NomadClient) (string, error) { logLevel := "info" if exporter.LogLevel != nil { logLevel = *exporter.LogLevel @@ -154,17 +161,22 @@ func generateNomadAgentConfig(node *models.Node, exporter *models.Agent, tdp mod } nomadConfigParams := map[string]interface{}{ - "NodeName": node.NodeName, - "NodeID": node.NodeID, - "Labels": labels, - "PMMServerAddress": tdp.Left + " .server_host " + tdp.Right + ":4647", - "NodeAddress": node.Address, - "CaFile": tdp.Left + " .TextFiles.caCert " + tdp.Right, - "CertFile": tdp.Left + " .TextFiles.certFile " + tdp.Right, - "KeyFile": tdp.Left + " .TextFiles.keyFile " + tdp.Right, - "DataDir": tdp.Left + " .nomad_data_dir " + tdp.Right, - "listen_port": tdp.Left + " .listen_port " + tdp.Right, - "LogLevel": strings.ToUpper(logLevel), + "NodeName": node.NodeName, + "NodeID": node.NodeID, + "Labels": labels, + "PMMServerAddress": tdp.Left + " .server_host " + tdp.Right + ":4647", + "NodeAddress": node.Address, + "CaFile": tdp.Left + " .TextFiles.caCert " + tdp.Right, + "CertFile": tdp.Left + " .TextFiles.certFile " + tdp.Right, + "KeyFile": tdp.Left + " .TextFiles.keyFile " + tdp.Right, + "DataDir": tdp.Left + " .nomad_data_dir " + tdp.Right, + "listen_port": tdp.Left + " .listen_port " + tdp.Right, + "LogLevel": strings.ToUpper(logLevel), + "GCDiskUsageThreshold": clientConfig.GCDiskUsageThreshold, + "GCInodeUsageThreshold": clientConfig.GCInodeUsageThreshold, + "GCInterval": clientConfig.GCInterval, + "GCMaxAllocs": clientConfig.GCMaxAllocs, + "GCParallelDestroys": clientConfig.GCParallelDestroys, } var configBuffer bytes.Buffer diff --git a/managed/services/agents/nomad_test.go b/managed/services/agents/nomad_test.go index fcab65371f3..83909e7a2b7 100644 --- a/managed/services/agents/nomad_test.go +++ b/managed/services/agents/nomad_test.go @@ -17,6 +17,7 @@ package agents import ( "testing" + "time" "github.com/AlekSi/pointer" "github.com/stretchr/testify/assert" @@ -38,7 +39,13 @@ func TestGenerateNomadAgentConfig(t *testing.T) { LogLevel: pointer.To("debug"), } tdp := models.TemplateDelimsPair() - config, err := generateNomadAgentConfig(node, agent, tdp) + config, err := generateNomadAgentConfig(node, agent, tdp, models.NomadClient{ + GCInterval: time.Minute * 1, + GCDiskUsageThreshold: 20, + GCInodeUsageThreshold: 30, + GCMaxAllocs: 40, + GCParallelDestroys: 50, + }) require.NoError(t, err) expected := `log_level = "DEBUG" @@ -82,6 +89,13 @@ client { "driver.allowlist" = "raw_exec" } + # Garbage collection settings + gc_interval = "1m0s" + gc_disk_usage_threshold = 20 + gc_inode_usage_threshold = 30 + gc_max_allocs = 40 + gc_parallel_destroys = 50 + # optional labels assigned to Nomad Client, can be the same as PMM Agent's. meta { pmm-agent = "1" diff --git a/managed/services/nomad/nomad.go b/managed/services/nomad/nomad.go index c1c83a1a4d6..1c61765c85f 100644 --- a/managed/services/nomad/nomad.go +++ b/managed/services/nomad/nomad.go @@ -56,18 +56,20 @@ type Nomad struct { prefix string cachedPMMAddress string + clientConfig models.NomadClient } // New creates a new Nomad client. -func New(db *reform.DB) (*Nomad, error) { +func New(db *reform.DB, clientConfig *models.NomadClient) (*Nomad, error) { err := os.MkdirAll(pathToCerts, 0o750) //nolint:mnd if err != nil { return nil, err } return &Nomad{ - db: db, - l: logrus.WithField("component", "nomad"), - prefix: "nomad", + db: db, + l: logrus.WithField("component", "nomad"), + prefix: "nomad", + clientConfig: *clientConfig, }, nil } @@ -264,3 +266,8 @@ func (c *Nomad) GetClientKey() (string, error) { } return string(file), nil } + +// GetClientConfig returns the Nomad client configuration. +func (c *Nomad) GetClientConfig() models.NomadClient { + return c.clientConfig +}