Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ help: ## Display this help.

.PHONY: manifests
manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
$(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases
$(CONTROLLER_GEN) rbac:roleName=manager-role crd:generateEmbeddedObjectMeta=true webhook paths="./..." output:crd:artifacts:config=config/crd/bases

.PHONY: generate
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
Expand Down
2 changes: 2 additions & 0 deletions api/v1/pathwaysjob_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ type ControllerSpec struct {
// https://pkg.go.dev/k8s.io/api/core/v1#PodTemplateSpec
// +optional
// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="userPodTemplate is immutable"
// +kubebuilder:pruning:PreserveUnknownFields
// +crd:generateEmbeddedObjectMeta=true
UserPodTemplate *corev1.PodTemplateSpec `json:"template,omitempty" protobuf:"bytes,6,opt,name=template"`

// Enables elasticity and sets the maximum number of slices
Expand Down
35 changes: 35 additions & 0 deletions config/crd/bases/pathways-job.pathways.domain_pathwaysjobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,23 @@ spec:
description: |-
Standard object's metadata.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata
properties:
annotations:
additionalProperties:
type: string
type: object
finalizers:
items:
type: string
type: array
labels:
additionalProperties:
type: string
type: object
name:
type: string
namespace:
type: string
type: object
spec:
description: |-
Expand Down Expand Up @@ -6852,6 +6869,23 @@ spec:
May contain labels and annotations that will be copied into the PVC
when creating it. No other fields are allowed and will be rejected during
validation.
properties:
annotations:
additionalProperties:
type: string
type: object
finalizers:
items:
type: string
type: array
labels:
additionalProperties:
type: string
type: object
name:
type: string
namespace:
type: string
type: object
spec:
description: |-
Expand Down Expand Up @@ -8181,6 +8215,7 @@ spec:
- containers
type: object
type: object
x-kubernetes-preserve-unknown-fields: true
x-kubernetes-validations:
- message: userPodTemplate is immutable
rule: self == oldSelf
Expand Down
4 changes: 2 additions & 2 deletions config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
images:
- name: controller
newName: us-docker.pkg.dev/cloud-tpu-v2-images/pathways-job/pathwaysjob-controller
newTag: v0.1.2
newName: us-docker.pkg.dev/cloud-tpu-multipod-dev/pathways/controller
newTag: akshu-ssi-test-4
35 changes: 28 additions & 7 deletions internal/controller/pathwaysjob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,11 +218,14 @@ func (r *PathwaysJobReconciler) createJobSet(ctx context.Context, pw *pathwaysjo
workerJob, _ := MakeWorkerJob(ctx, pw)
successPolicy := MakeSuccessPolicy(pw)

log.Info("findme1", "pw", pw)
log.Info("findme2", "meta", pw.GetObjectMeta())
mainJobSetConfig := jobsetv1alpha2.JobSet{
ObjectMeta: metav1.ObjectMeta{
Name: pw.GetName(),
Namespace: pw.GetNamespace(),
Labels: pw.GetObjectMeta().GetLabels(),
Annotations: pw.GetObjectMeta().GetAnnotations(),
},
Spec: jobsetv1alpha2.JobSetSpec{
StartupPolicy: &jobsetv1alpha2.StartupPolicy{
Expand Down Expand Up @@ -848,6 +851,13 @@ func MakePathwaysHeadPodSpec(pw *pathwaysjob.PathwaysJob) *corev1.PodSpec {
Containers: containerList,
} // end PodSpec
}
// The user pod template can have its own annotations.
// We should merge them with the annotations from the PathwaysJob.
if isUserPodProvided(pw) && pw.Spec.Controller.UserPodTemplate.Annotations != nil {
for k, v := range pw.GetObjectMeta().GetAnnotations() {
pw.Spec.Controller.UserPodTemplate.Annotations[k] = v
}
}
return pathwaysHeadPodSpec
}

Expand All @@ -872,13 +882,20 @@ func injectJAXBackendTargetIntoMainContainer(pw *pathwaysjob.PathwaysJob, pathwa

}

func MakePathwaysHeadReplicatedJob(pw *pathwaysjob.PathwaysJob, pathwaysHeadPodSpec corev1.PodSpec) jobsetv1alpha2.ReplicatedJob {
func MakePathwaysHeadReplicatedJob(ctx context.Context, pw *pathwaysjob.PathwaysJob, pathwaysHeadPodSpec corev1.PodSpec) jobsetv1alpha2.ReplicatedJob {
var annotations map[string]string
annotations = nil
log := ctrl.LoggerFrom(ctx).WithValues("pathwaysjob", klog.KObj(pw))
ctx = ctrl.LoggerInto(ctx, log)
log.Info("findme3", "anno", pw.GetObjectMeta().GetAnnotations())
// Start with annotations from the PathwaysJob.
annotations = make(map[string]string)
for k, v := range pw.GetObjectMeta().GetAnnotations() {
annotations[k] = v
}

if pw.Spec.Controller.DeploymentMode == pathwaysjob.Default {
annotations = map[string]string{
"alpha.jobset.sigs.k8s.io/exclusive-topology": "kubernetes.io/hostname",
} // needed so that head pods are placed exclusively on CPU nodes.
// needed so that head pods are placed exclusively on CPU nodes.
annotations["alpha.jobset.sigs.k8s.io/exclusive-topology"] = "kubernetes.io/hostname"
}
pathwaysHeadJob := jobsetv1alpha2.ReplicatedJob{
Name: PathwaysHeadJobName,
Expand All @@ -893,6 +910,9 @@ func MakePathwaysHeadReplicatedJob(pw *pathwaysjob.PathwaysJob, pathwaysHeadPodS
Parallelism: ptr.To(int32(1)),
Template: corev1.PodTemplateSpec{
Spec: pathwaysHeadPodSpec,
ObjectMeta: metav1.ObjectMeta{
Annotations: annotations,
},
},
},
},
Expand All @@ -910,12 +930,13 @@ func MakePathwaysHeadJobForColocateHeadWithWorkersDeployment(ctx context.Context
podSpec.Affinity = affinitySpec
podSpec.Tolerations = tolerations

return MakePathwaysHeadReplicatedJob(pw, podSpec), nil
return MakePathwaysHeadReplicatedJob(ctx, pw, podSpec), nil
}

// Construct pathways-head replicated job containing Pathways RM, Pathways Proxy and the user job containers for the 'default' deployment mode.
// In the default mode, the Pathways head pod is placed on CPU nodes.
func MakePathwaysHeadJobForDefaultDeployment(ctx context.Context, pw *pathwaysjob.PathwaysJob) (jobsetv1alpha2.ReplicatedJob, error) {
podSpec := *MakePathwaysHeadPodSpec(pw)
return MakePathwaysHeadReplicatedJob(pw, podSpec), nil
return MakePathwaysHeadReplicatedJob(ctx, pw, podSpec), nil
}