Skip to content

Commit

Permalink
Create charm tolerations for ceph-rbd and cephfs deployments and daem…
Browse files Browse the repository at this point in the history
…onsets (#34)
  • Loading branch information
addyess authored Jan 31, 2025
1 parent 6edfaad commit cd37e2c
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 6 deletions.
26 changes: 26 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,32 @@ options:
https://github.com/ceph/ceph-csi/blob/devel/examples/rbd/storageclass.yaml
type: string

cephfs-tolerations:
default: "$csi-cephfsplugin-legacy$"
description: |
Tolerations to be used when creating the cephfs pods for Daemonsets and Deployments.
Declare tolerations in key=value,operator,effect format, separating each by spaces.
Optional tolerations can be found in the kubernetes documentation:
https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/
Note: The default value "$csi-cephfsplugin-legacy$" is a special token
which applies a "=,Exists" toleration only to pods associated with
csi-cephfsplugin.
type: string

ceph-rbd-tolerations:
default: ""
description: |
Tolerations to be used when creating the rbd pods for Daemonsets and Deployments.
Declare tolerations in key=value,operator,effect format, separating each by spaces.
Optional tolerations can be found in the kubernetes documentation:
https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/
type: string

ceph-xfs-storage-class-parameters:
default: "imageFeatures=layering"
description: |
Expand Down
44 changes: 43 additions & 1 deletion src/manifests_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import pickle
from abc import ABCMeta, abstractmethod
from hashlib import md5
from typing import Any, Dict, Generator, Optional
from typing import Any, Dict, Generator, List, Optional

from lightkube.codecs import AnyResource
from lightkube.core.resource import NamespacedResource
from lightkube.models.core_v1 import Toleration
from ops.manifests import Addition, Manifests, Patch

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -118,3 +119,44 @@ def update_parameters(self, parameters: Dict[str, str]) -> None:
parameters.pop(adjustment[:-1], None)
else:
log.warning("Invalid parameter: %s in %s", adjustment, config)


class CephToleration(Toleration):
@classmethod
def _from_string(cls, toleration_str: str) -> "CephToleration":
"""Parses a toleration string into a Toleration object.
Raises:
ValueError: If the string is not a valid toleration.
"""
# A missing ',' will raise ValueError
key_value, operator, *effects = toleration_str.split(",")
# A missing '=' will raise ValueError
key, value = key_value.split("=", 1)

if operator not in ["Exists", "Equal"]:
raise ValueError(f"Invalid {operator=}")
if len(effects) > 1:
raise ValueError(f"Too many effects='{','.join(effects)}'")
effect = effects[0] if effects else ""
if effect not in ["NoSchedule", "PreferNoSchedule", "NoExecute", ""]:
raise ValueError(f"Invalid {effect=}")

return cls(
key=key if key else None, # Convert empty string to None
value=value if value else None,
operator=operator,
effect=effect if effect else None,
)

@classmethod
def from_space_separated(cls, tolerations: str) -> List["CephToleration"]:
"""Parses a space separated string of tolerations into a list of Toleration objects.
Raises:
ValueError: If any of the tolerations are invalid
"""
try:
return [cls._from_string(toleration) for toleration in tolerations.split()]
except ValueError as e:
raise ValueError(f"Invalid tolerations: {e}") from e
29 changes: 25 additions & 4 deletions src/manifests_cephfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@
"""Implementation of rbd specific details of the kubernetes manifests."""

import logging
from typing import TYPE_CHECKING, Any, Dict, Optional
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple

from lightkube.codecs import AnyResource
from lightkube.models.core_v1 import Toleration
from lightkube.resources.core_v1 import Secret
from lightkube.resources.storage_v1 import StorageClass
from ops.manifests import Addition, ConfigRegistry, ManifestLabel, Patch
from ops.manifests.manipulations import Subtraction

from manifests_base import (
AdjustNamespace,
CephToleration,
ConfigureLivenessPrometheus,
SafeManifest,
StorageClassAddition,
Expand Down Expand Up @@ -121,8 +121,15 @@ def __call__(self) -> Optional[AnyResource]:
class ProvisionerAdjustments(Patch):
"""Update Cephfs provisioner."""

def tolerations(self) -> Tuple[List[CephToleration], bool]:
cfg = self.manifests.config.get("cephfs-tolerations") or ""
if cfg == "$csi-cephfsplugin-legacy$":
return [], True
return CephToleration.from_space_separated(cfg), False

def __call__(self, obj: AnyResource) -> None:
"""Use the provisioner-replicas and enable-host-networking to update obj."""
tolerations, legacy = self.tolerations()
if (
obj.kind == "Deployment"
and obj.metadata
Expand All @@ -131,13 +138,18 @@ def __call__(self, obj: AnyResource) -> None:
obj.spec.replicas = replica = self.manifests.config.get("provisioner-replicas")
log.info(f"Updating deployment replicas to {replica}")

obj.spec.template.spec.tolerations = tolerations
log.info("Updating deployment tolerations")

obj.spec.template.spec.hostNetwork = host_network = self.manifests.config.get(
"enable-host-networking"
)
log.info(f"Updating deployment hostNetwork to {host_network}")
if obj.kind == "DaemonSet" and obj.metadata and obj.metadata.name == "csi-cephfsplugin":
obj.spec.template.spec.tolerations = [Toleration(operator="Exists")]
log.info("Updating daemonset tolerations to operator=Exists")
obj.spec.template.spec.tolerations = (
tolerations if not legacy else [CephToleration(operator="Exists")]
)
log.info("Updating daemonset tolerations")

kubelet_dir = self.manifests.config.get("kubelet_dir", "/var/lib/kubelet")

Expand Down Expand Up @@ -231,4 +243,13 @@ def evaluate(self) -> Optional[str]:
value = self.config.get(prop)
if not value:
return f"CephFS manifests require the definition of '{prop}'"

pa_manipulator = next(
m for m in self.manipulations if isinstance(m, ProvisionerAdjustments)
)
try:
pa_manipulator.tolerations()
except ValueError as err:
return f"Cannot adjust CephFS Pods: {err}"

return None
23 changes: 22 additions & 1 deletion src/manifests_rbd.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""Implementation of rbd specific details of the kubernetes manifests."""

import logging
from typing import TYPE_CHECKING, Any, Dict, Optional
from typing import TYPE_CHECKING, Any, Dict, List, Optional

from lightkube.codecs import AnyResource
from lightkube.resources.core_v1 import Secret
Expand All @@ -12,6 +12,7 @@

from manifests_base import (
AdjustNamespace,
CephToleration,
ConfigureLivenessPrometheus,
SafeManifest,
StorageClassAddition,
Expand Down Expand Up @@ -105,8 +106,13 @@ def __call__(self) -> Optional[AnyResource]:
class ProvisionerAdjustments(Patch):
"""Update RBD provisioner."""

def tolerations(self) -> List[CephToleration]:
cfg = self.manifests.config.get("ceph-rbd-tolerations") or ""
return CephToleration.from_space_separated(cfg)

def __call__(self, obj: AnyResource) -> None:
"""Mutates CSI RBD Provisioner Deployment replicas/hostNetwork and DaemonSet kubelet_dir paths."""
tolerations = self.tolerations()
if (
obj.kind == "Deployment"
and obj.metadata
Expand All @@ -115,12 +121,18 @@ def __call__(self, obj: AnyResource) -> None:
obj.spec.replicas = replica = self.manifests.config.get("provisioner-replicas")
log.info(f"Updating deployment replicas to {replica}")

obj.spec.template.spec.tolerations = tolerations
log.info("Updating deployment tolerations")

obj.spec.template.spec.hostNetwork = host_network = self.manifests.config.get(
"enable-host-networking"
)
log.info(f"Updating deployment hostNetwork to {host_network}")

if obj.kind == "DaemonSet" and obj.metadata and obj.metadata.name == "csi-rbdplugin":
obj.spec.template.spec.tolerations = tolerations
log.info("Updating daemonset tolerations to operator=Exists")

kubelet_dir = self.manifests.config.get("kubelet_dir", "/var/lib/kubelet")

for c in obj.spec.template.spec.containers:
Expand Down Expand Up @@ -197,4 +209,13 @@ def evaluate(self) -> Optional[str]:
value = self.config.get(prop)
if not value:
return f"RBD manifests require the definition of '{prop}'"

pa_manipulator = next(
m for m in self.manipulations if isinstance(m, ProvisionerAdjustments)
)
try:
pa_manipulator.tolerations()
except ValueError as err:
return f"Cannot adjust CephRBD Pods: {err}"

return None
18 changes: 18 additions & 0 deletions tests/unit/test_manifests_cephfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,21 @@ def test_manifest_evaluation(caplog):
"kubernetes_key": "123",
}
assert manifests.evaluate() is None

charm.config["cephfs-tolerations"] = "key=value,Foo"
assert (
manifests.evaluate()
== "Cannot adjust CephFS Pods: Invalid tolerations: Invalid operator='Foo'"
)

charm.config["cephfs-tolerations"] = "key=value,Exists,Foo"
assert (
manifests.evaluate()
== "Cannot adjust CephFS Pods: Invalid tolerations: Invalid effect='Foo'"
)

charm.config["cephfs-tolerations"] = "key=value,Exists,NoSchedule,Foo"
assert (
manifests.evaluate()
== "Cannot adjust CephFS Pods: Invalid tolerations: Too many effects='NoSchedule,Foo'"
)
18 changes: 18 additions & 0 deletions tests/unit/test_manifests_rbd.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,21 @@ def test_manifest_evaluation(caplog):

charm.config = {"user": "cephx", "fsid": "cluster", "kubernetes_key": "123"}
assert manifests.evaluate() is None

charm.config["ceph-rbd-tolerations"] = "key=value,Foo"
assert (
manifests.evaluate()
== "Cannot adjust CephRBD Pods: Invalid tolerations: Invalid operator='Foo'"
)

charm.config["ceph-rbd-tolerations"] = "key=value,Exists,Foo"
assert (
manifests.evaluate()
== "Cannot adjust CephRBD Pods: Invalid tolerations: Invalid effect='Foo'"
)

charm.config["ceph-rbd-tolerations"] = "key=value,Exists,NoSchedule,Foo"
assert (
manifests.evaluate()
== "Cannot adjust CephRBD Pods: Invalid tolerations: Too many effects='NoSchedule,Foo'"
)

0 comments on commit cd37e2c

Please sign in to comment.