diff --git a/Makefile b/Makefile index 6f6fb95e..81126930 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ help: ## Display this help. .PHONY: manifests manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. - $(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases + $(CONTROLLER_GEN) rbac:roleName=manager-role crd:generateEmbeddedObjectMeta=true webhook paths="./..." output:crd:artifacts:config=config/crd/bases .PHONY: generate generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. diff --git a/api/v1/pathwaysjob_types.go b/api/v1/pathwaysjob_types.go index e5097fde..c7dbcb1a 100644 --- a/api/v1/pathwaysjob_types.go +++ b/api/v1/pathwaysjob_types.go @@ -172,6 +172,8 @@ type ControllerSpec struct { // https://pkg.go.dev/k8s.io/api/core/v1#PodTemplateSpec // +optional // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="userPodTemplate is immutable" + // +kubebuilder:pruning:PreserveUnknownFields + // +crd:generateEmbeddedObjectMeta=true UserPodTemplate *corev1.PodTemplateSpec `json:"template,omitempty" protobuf:"bytes,6,opt,name=template"` // Enables elasticity and sets the maximum number of slices diff --git a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml index 7a8ac1f5..106f73bf 100644 --- a/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml +++ b/config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml @@ -102,6 +102,23 @@ spec: description: |- Standard object's metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata + properties: + annotations: + additionalProperties: + type: string + type: object + finalizers: + items: + type: string + type: array + labels: + additionalProperties: + type: string + type: object + name: + type: string + namespace: + type: string type: object spec: description: |- @@ -6852,6 +6869,23 @@ spec: May contain labels and annotations that will be copied into the PVC when creating it. No other fields are allowed and will be rejected during validation. + properties: + annotations: + additionalProperties: + type: string + type: object + finalizers: + items: + type: string + type: array + labels: + additionalProperties: + type: string + type: object + name: + type: string + namespace: + type: string type: object spec: description: |- @@ -8181,6 +8215,7 @@ spec: - containers type: object type: object + x-kubernetes-preserve-unknown-fields: true x-kubernetes-validations: - message: userPodTemplate is immutable rule: self == oldSelf diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 9b986dc0..53db6260 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -18,5 +18,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: controller - newName: us-docker.pkg.dev/cloud-tpu-v2-images/pathways-job/pathwaysjob-controller - newTag: v0.1.2 + newName: us-docker.pkg.dev/cloud-tpu-multipod-dev/pathways/controller + newTag: akshu-ssi-test-4 diff --git a/internal/controller/pathwaysjob_controller.go b/internal/controller/pathwaysjob_controller.go index 301564c2..fa3ed4b1 100644 --- a/internal/controller/pathwaysjob_controller.go +++ b/internal/controller/pathwaysjob_controller.go @@ -218,11 +218,14 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo workerJob, _ := MakeWorkerJob(ctx, pw) successPolicy := MakeSuccessPolicy(pw) + log.Info("findme1", "pw", pw) + log.Info("findme2", "meta", pw.GetObjectMeta()) mainJobSetConfig := jobsetv1alpha2.JobSet{ ObjectMeta: metav1.ObjectMeta{ Name: pw.GetName(), Namespace: pw.GetNamespace(), Labels: pw.GetObjectMeta().GetLabels(), + Annotations: pw.GetObjectMeta().GetAnnotations(), }, Spec: jobsetv1alpha2.JobSetSpec{ StartupPolicy: &jobsetv1alpha2.StartupPolicy{ @@ -848,6 +851,13 @@ func MakePathwaysHeadPodSpec(pw *pathwaysjob.PathwaysJob) *corev1.PodSpec { Containers: containerList, } // end PodSpec } + // The user pod template can have its own annotations. + // We should merge them with the annotations from the PathwaysJob. + if isUserPodProvided(pw) && pw.Spec.Controller.UserPodTemplate.Annotations != nil { + for k, v := range pw.GetObjectMeta().GetAnnotations() { + pw.Spec.Controller.UserPodTemplate.Annotations[k] = v + } + } return pathwaysHeadPodSpec } @@ -872,13 +882,20 @@ func injectJAXBackendTargetIntoMainContainer(pw *pathwaysjob.PathwaysJob, pathwa } -func MakePathwaysHeadReplicatedJob(pw *pathwaysjob.PathwaysJob, pathwaysHeadPodSpec corev1.PodSpec) jobsetv1alpha2.ReplicatedJob { +func MakePathwaysHeadReplicatedJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, pathwaysHeadPodSpec corev1.PodSpec) jobsetv1alpha2.ReplicatedJob { var annotations map[string]string - annotations = nil + log := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw)) + ctx = ctrl.LoggerInto(ctx, log) + log.Info("findme3", "anno", pw.GetObjectMeta().GetAnnotations()) + // Start with annotations from the PathwaysJob. + annotations = make(map[string]string) + for k, v := range pw.GetObjectMeta().GetAnnotations() { + annotations[k] = v + } + if pw.Spec.Controller.DeploymentMode == pathwaysjob.Default { - annotations = map[string]string{ - "alpha.jobset.sigs.k8s.io/exclusive-topology": "kubernetes.io/hostname", - } // needed so that head pods are placed exclusively on CPU nodes. + // needed so that head pods are placed exclusively on CPU nodes. + annotations["alpha.jobset.sigs.k8s.io/exclusive-topology"] = "kubernetes.io/hostname" } pathwaysHeadJob := jobsetv1alpha2.ReplicatedJob{ Name: PathwaysHeadJobName, @@ -893,6 +910,9 @@ func MakePathwaysHeadReplicatedJob(pw *pathwaysjob.PathwaysJob, pathwaysHeadPodS Parallelism: ptr.To(int32(1)), Template: corev1.PodTemplateSpec{ Spec: pathwaysHeadPodSpec, + ObjectMeta: metav1.ObjectMeta{ + Annotations: annotations, + }, }, }, }, @@ -910,12 +930,13 @@ func MakePathwaysHeadJobForColocateHeadWithWorkersDeployment(ctx context.Context podSpec.Affinity = affinitySpec podSpec.Tolerations = tolerations - return MakePathwaysHeadReplicatedJob(pw, podSpec), nil + return MakePathwaysHeadReplicatedJob(ctx, pw, podSpec), nil } // Construct pathways-head replicated job containing Pathways RM, Pathways Proxy and the user job containers for the 'default' deployment mode. // In the default mode, the Pathways head pod is placed on CPU nodes. func MakePathwaysHeadJobForDefaultDeployment(ctx context.Context, pw *pathwaysjob.PathwaysJob) (jobsetv1alpha2.ReplicatedJob, error) { podSpec := *MakePathwaysHeadPodSpec(pw) - return MakePathwaysHeadReplicatedJob(pw, podSpec), nil + return MakePathwaysHeadReplicatedJob(ctx, pw, podSpec), nil } +