Add support for sending down service Running and Desired task counts

dperny · dperny · commit f36042d259fb · 2019-10-14T10:43:00.000-05:00
Adds a new ServiceStatus field to the Service object, which includes the
running and desired task counts. This new field is gated behind a
"status" query parameter.

Signed-off-by: Drew Erny &lt;drew.erny@docker.com&gt;
diff --git a/api/server/router/swarm/cluster_routes.go b/api/server/router/swarm/cluster_routes.go
@@ -167,7 +167,19 @@ func (sr *swarmRouter) getServices(ctx context.Context, w http.ResponseWriter, r
 		return errdefs.InvalidParameter(err)
 	}
 
-	services, err := sr.backend.GetServices(basictypes.ServiceListOptions{Filters: filter})
+	// the status query parameter is only support in API versions >= 1.41. If
+	// the client is using a lesser version, ignore the parameter.
+	cliVersion := httputils.VersionFromContext(ctx)
+	var status bool
+	if value := r.URL.Query().Get("status"); value != "" && !versions.LessThan(cliVersion, "1.41") {
+		var err error
+		status, err = strconv.ParseBool(value)
+		if err != nil {
+			return errors.Wrapf(errdefs.InvalidParameter(err), "invalid value for status: %s", value)
+		}
+	}
+
+	services, err := sr.backend.GetServices(basictypes.ServiceListOptions{Filters: filter, Status: status})
 	if err != nil {
 		logrus.Errorf("Error getting services: %v", err)
 		return err
@@ -178,15 +190,21 @@ func (sr *swarmRouter) getServices(ctx context.Context, w http.ResponseWriter, r
 
 func (sr *swarmRouter) getService(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
 	var insertDefaults bool
+
 	if value := r.URL.Query().Get("insertDefaults"); value != "" {
 		var err error
 		insertDefaults, err = strconv.ParseBool(value)
 		if err != nil {
-			err := fmt.Errorf("invalid value for insertDefaults: %s", value)
 			return errors.Wrapf(errdefs.InvalidParameter(err), "invalid value for insertDefaults: %s", value)
 		}
 	}
 
+	// you may note that there is no code here to handle the "status" query
+	// parameter, as in getServices. the Status field is not supported when
+	// retrieving an individual service because the Backend API changes
+	// required to accommodate it would be too disruptive, and because that
+	// field is so rarely needed as part of an individual service inspection.
+
 	service, err := sr.backend.GetService(vars["id"], insertDefaults)
 	if err != nil {
 		logrus.Errorf("Error getting service %s: %v", vars["id"], err)
diff --git a/api/swagger.yaml b/api/swagger.yaml
@@ -3369,6 +3369,27 @@ definitions:
             format: "dateTime"
           Message:
             type: "string"
+      ServiceStatus:
+        description: |
+          The status of the service's tasks. Provided only when requested as
+          part of a ServiceList operation.
+        type: "object"
+        properties:
+          RunningTasks:
+            description: "The number of tasks for the service currently in the Running state"
+            type: "integer"
+            format: "uint64"
+            example: 7
+          DesiredTasks:
+            description: |
+              The number of tasks for the service desired to be running.
+              For replicated services, this is the replica count from the
+              service spec. For global services, this is computed by taking
+              count of all tasks for the service with a Desired State other
+              than Shutdown.
+            type: "integer"
+            format: "uint64"
+            example: 10
     example:
       ID: "9mnpnzenvg8p8tdbtq4wvbkcz"
       Version:
@@ -9316,6 +9337,10 @@ paths:
             - `label=<service label>`
             - `mode=["replicated"|"global"]`
             - `name=<service name>`
+        - name: "status"
+          in: "query"
+          type: "boolean"
+          description: "Include service status, with count of running and desired tasks"
       tags: ["Service"]
   /services/create:
     post:
diff --git a/api/types/client.go b/api/types/client.go
@@ -363,6 +363,10 @@ type ServiceUpdateOptions struct {
 // ServiceListOptions holds parameters to list services with.
 type ServiceListOptions struct {
 	Filters filters.Args
+
+	// Status indicates whether the server should include the service task
+	// count of running and desired tasks.
+	Status bool
 }
 
 // ServiceInspectOptions holds parameters related to the "service inspect"
diff --git a/api/types/swarm/service.go b/api/types/swarm/service.go
@@ -10,6 +10,13 @@ type Service struct {
 	PreviousSpec *ServiceSpec  `json:",omitempty"`
 	Endpoint     Endpoint      `json:",omitempty"`
 	UpdateStatus *UpdateStatus `json:",omitempty"`
+
+	// ServiceStatus is an optional, extra field indicating the number of
+	// desired and running tasks. It is provided primarily as a shortcut to
+	// calculating these values client-side, which otherwise would require
+	// listing all tasks for a service, an operation that could be
+	// computation and network expensive.
+	ServiceStatus *ServiceStatus `json:",omitempty"`
 }
 
 // ServiceSpec represents the spec of a service.
@@ -122,3 +129,17 @@ type UpdateConfig struct {
 	// started, or the new task is started before the old task is shut down.
 	Order string
 }
+
+// ServiceStatus represents the number of running tasks in a service and the
+// number of tasks desired to be running.
+type ServiceStatus struct {
+	// RunningTasks is the number of tasks for the service actually in the
+	// Running state
+	RunningTasks uint64
+
+	// DesiredTasks is the number of tasks desired to be running by the
+	// service. For replicated services, this is the replica count. For global
+	// services, this is computed by taking the number of tasks with desired
+	// state of not-Shutdown.
+	DesiredTasks uint64
+}
diff --git a/client/service_list.go b/client/service_list.go
@@ -23,6 +23,10 @@ func (cli *Client) ServiceList(ctx context.Context, options types.ServiceListOpt
 		query.Set("filters", filterJSON)
 	}
 
+	if options.Status {
+		query.Set("status", "true")
+	}
+
 	resp, err := cli.get(ctx, "/services", query, nil)
 	defer ensureReaderClosed(resp)
 	if err != nil {
diff --git a/daemon/cluster/services.go b/daemon/cluster/services.go
@@ -77,6 +77,12 @@ func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Serv
 
 	services := make([]types.Service, 0, len(r.Services))
 
+	// if the  user requests the service statuses, we'll store the IDs needed
+	// in this slice
+	var serviceIDs []string
+	if options.Status {
+		serviceIDs = make([]string, 0, len(r.Services))
+	}
 	for _, service := range r.Services {
 		if options.Filters.Contains("mode") {
 			var mode string
@@ -91,13 +97,59 @@ func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Serv
 				continue
 			}
 		}
+		if options.Status {
+			serviceIDs = append(serviceIDs, service.ID)
+		}
 		svcs, err := convert.ServiceFromGRPC(*service)
 		if err != nil {
 			return nil, err
 		}
 		services = append(services, svcs)
 	}
 
+	if options.Status {
+		// Listing service statuses is a separate call because, while it is the
+		// most common UI operation, it is still just a UI operation, and it
+		// would be improper to include this data in swarm's Service object.
+		// We pay the cost with some complexity here, but this is still way
+		// more efficient than marshalling and unmarshalling all the JSON
+		// needed to list tasks and get this data otherwise client-side
+		resp, err := state.controlClient.ListServiceStatuses(
+			ctx,
+			&swarmapi.ListServiceStatusesRequest{Services: serviceIDs},
+			grpc.MaxCallRecvMsgSize(defaultRecvSizeForListResponse),
+		)
+		if err != nil {
+			return nil, err
+		}
+
+		// we'll need to match up statuses in the response with the services in
+		// the list operation. if we did this by operating on two lists, the
+		// result would be quadratic. instead, make a mapping of service IDs to
+		// service statuses so that this is roughly linear. additionally,
+		// convert the status response to an engine api service status here.
+		serviceMap := map[string]*types.ServiceStatus{}
+		for _, status := range resp.Statuses {
+			serviceMap[status.ServiceID] = &types.ServiceStatus{
+				RunningTasks: status.RunningTasks,
+				DesiredTasks: status.DesiredTasks,
+			}
+		}
+
+		// because this is a list of values and not pointers, make sure we
+		// actually alter the value when iterating.
+		for i, service := range services {
+			// the return value of the ListServiceStatuses operation is
+			// guaranteed to contain a value in the response for every argument
+			// in the request, so we can safely do this assignment. and even if
+			// it wasn't, and the service ID was for some reason absent from
+			// this map, the resulting value of service.Status would just be
+			// nil -- the same thing it was before
+			service.ServiceStatus = serviceMap[service.ID]
+			services[i] = service
+		}
+	}
+
 	return services, nil
 }
 
diff --git a/docs/api/version-history.md b/docs/api/version-history.md
@@ -31,6 +31,9 @@ keywords: "API, Docker, rcli, REST, documentation"
 * `GET /info` now  returns an `OSVersion` field, containing the operating system's
   version. This change is not versioned, and affects all API versions if the daemon
   has this patch.
+* `GET /services` now accepts query parameter `status`. When set `true`,
+  services returned will include `ServiceStatus`, which provides Desired and
+  Running task counts for the service.
 
 ## v1.40 API changes
 
diff --git a/integration/service/list_test.go b/integration/service/list_test.go
@@ -0,0 +1,108 @@
+package service // import "github.com/docker/docker/integration/service"
+
+import (
+	"context"
+	"fmt"
+	"testing"
+
+	"github.com/docker/docker/api/types"
+	"github.com/docker/docker/api/types/filters"
+	swarmtypes "github.com/docker/docker/api/types/swarm"
+	"github.com/docker/docker/api/types/versions"
+	"github.com/docker/docker/integration/internal/swarm"
+	"gotest.tools/assert"
+	is "gotest.tools/assert/cmp"
+	"gotest.tools/poll"
+	"gotest.tools/skip"
+)
+
+// TestServiceListWithStatuses tests that performing a ServiceList operation
+// correctly uses the Status parameter, and that the resulting response
+// contains correct service statuses.
+//
+// NOTE(dperny): because it's a pain to elicit the behavior of an unconverged
+// service reliably, I'm not testing that an unconverged service returns X
+// running and Y desired tasks. Instead, I'm just going to trust that I can
+// successfully assign a value to another value without screwing it up. The
+// logic for computing service statuses is in swarmkit anyway, not in the
+// engine, and is well-tested there, so this test just needs to make sure that
+// statuses get correctly associated with the right services.
+func TestServiceListWithStatuses(t *testing.T) {
+	skip.If(t, testEnv.IsRemoteDaemon)
+	skip.If(t, testEnv.DaemonInfo.OSType == "windows")
+	// statuses were added in API version 1.41
+	skip.If(t, versions.LessThan(testEnv.DaemonInfo.ServerVersion, "1.41"))
+	defer setupTest(t)()
+	d := swarm.NewSwarm(t, testEnv)
+	defer d.Stop(t)
+	client := d.NewClientT(t)
+	defer client.Close()
+
+	ctx := context.Background()
+
+	serviceCount := 3
+	// create some services.
+	for i := 0; i < serviceCount; i++ {
+		spec := fullSwarmServiceSpec(fmt.Sprintf("test-list-%d", i), uint64(i+1))
+		// for whatever reason, the args "-u root", when included, cause these
+		// tasks to fail and exit. instead, we'll just pass no args, which
+		// works.
+		spec.TaskTemplate.ContainerSpec.Args = []string{}
+		resp, err := client.ServiceCreate(ctx, spec, types.ServiceCreateOptions{
+			QueryRegistry: false,
+		})
+		assert.NilError(t, err)
+		id := resp.ID
+		// we need to wait specifically for the tasks to be running, which the
+		// serviceContainerCount function does not do. instead, we'll use a
+		// bespoke closure right here.
+		poll.WaitOn(t, func(log poll.LogT) poll.Result {
+			filter := filters.NewArgs()
+			filter.Add("service", id)
+			tasks, err := client.TaskList(context.Background(), types.TaskListOptions{
+				Filters: filter,
+			})
+
+			running := 0
+			for _, task := range tasks {
+				if task.Status.State == swarmtypes.TaskStateRunning {
+					running++
+				}
+			}
+
+			switch {
+			case err != nil:
+				return poll.Error(err)
+			case running == i+1:
+				return poll.Success()
+			default:
+				return poll.Continue(
+					"running task count %d (%d total), waiting for %d",
+					running, len(tasks), i+1,
+				)
+			}
+		})
+	}
+
+	// now, let's do the list operation with no status arg set.
+	resp, err := client.ServiceList(ctx, types.ServiceListOptions{})
+	assert.NilError(t, err)
+	assert.Check(t, is.Len(resp, serviceCount))
+	for _, service := range resp {
+		assert.Check(t, is.Nil(service.ServiceStatus))
+	}
+
+	// now try again, but with Status: true. This time, we should have statuses
+	resp, err = client.ServiceList(ctx, types.ServiceListOptions{Status: true})
+	assert.NilError(t, err)
+	assert.Check(t, is.Len(resp, serviceCount))
+	for _, service := range resp {
+		replicas := *service.Spec.Mode.Replicated.Replicas
+
+		assert.Assert(t, service.ServiceStatus != nil)
+		// Use assert.Check to not fail out of the test if this fails
+		assert.Check(t, is.Equal(service.ServiceStatus.DesiredTasks, replicas))
+		assert.Check(t, is.Equal(service.ServiceStatus.RunningTasks, replicas))
+	}
+
+}