diff --git a/api/scaler/v1alpha1/budaiscaler_types.go b/api/scaler/v1alpha1/budaiscaler_types.go
index cabba86..a26e437 100644
--- a/api/scaler/v1alpha1/budaiscaler_types.go
+++ b/api/scaler/v1alpha1/budaiscaler_types.go
@@ -102,6 +102,10 @@ type BudAIScalerSpec struct {
 	// Behavior configures the scaling behavior for scale up and scale down.
 	// +optional
 	Behavior *ScalingBehavior `json:"behavior,omitempty"`
+
+	// ScaleToZeroConfig configures scale-to-zero behavior.
+	// +optional
+	ScaleToZeroConfig *ScaleToZeroConfig `json:"scaleToZeroConfig,omitempty"`
 }
 
 // ScalingStrategyType defines the type of scaling algorithm.
@@ -488,6 +492,25 @@ type StartingPodsConfig struct {
 	BypassGateOnPanic *bool `json:"bypassGateOnPanic,omitempty"`
 }
 
+// ScaleToZeroConfig configures scale-to-zero behavior.
+type ScaleToZeroConfig struct {
+	// Enabled turns on scale-to-zero behavior.
+	// +optional
+	// +kubebuilder:default=false
+	Enabled bool `json:"enabled,omitempty"`
+
+	// ActivationScale is the number of replicas to scale to when waking from zero.
+	// +optional
+	// +kubebuilder:default=1
+	// +kubebuilder:validation:Minimum=1
+	ActivationScale *int32 `json:"activationScale,omitempty"`
+
+	// GracePeriod is the duration to wait with zero demand before scaling to zero.
+	// +optional
+	// +kubebuilder:default="5m"
+	GracePeriod *metav1.Duration `json:"gracePeriod,omitempty"`
+}
+
 // ScalingRules defines rules for scaling in a particular direction.
 type ScalingRules struct {
 	// StabilizationWindowSeconds is the number of seconds to look back
@@ -564,6 +587,11 @@ type BudAIScalerStatus struct {
 	// MultiClusterStatus contains federation status.
 	// +optional
 	MultiClusterStatus *MultiClusterStatus `json:"multiClusterStatus,omitempty"`
+
+	// ZeroDemandSince tracks when zero demand was first detected.
+	// Used for scale-to-zero grace period calculation.
+	// +optional
+	ZeroDemandSince *metav1.Time `json:"zeroDemandSince,omitempty"`
 }
 
 // ScalingDecision records a single scaling decision.
diff --git a/api/scaler/v1alpha1/zz_generated.deepcopy.go b/api/scaler/v1alpha1/zz_generated.deepcopy.go
index 6928154..4d89d91 100644
--- a/api/scaler/v1alpha1/zz_generated.deepcopy.go
+++ b/api/scaler/v1alpha1/zz_generated.deepcopy.go
@@ -135,6 +135,11 @@ func (in *BudAIScalerSpec) DeepCopyInto(out *BudAIScalerSpec) {
 		*out = new(ScalingBehavior)
 		(*in).DeepCopyInto(*out)
 	}
+	if in.ScaleToZeroConfig != nil {
+		in, out := &in.ScaleToZeroConfig, &out.ScaleToZeroConfig
+		*out = new(ScaleToZeroConfig)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BudAIScalerSpec.
@@ -193,6 +198,10 @@ func (in *BudAIScalerStatus) DeepCopyInto(out *BudAIScalerStatus) {
 		*out = new(MultiClusterStatus)
 		(*in).DeepCopyInto(*out)
 	}
+	if in.ZeroDemandSince != nil {
+		in, out := &in.ZeroDemandSince, &out.ZeroDemandSince
+		*out = (*in).DeepCopy()
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BudAIScalerStatus.
@@ -463,6 +472,31 @@ func (in *PredictionStatus) DeepCopy() *PredictionStatus {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ScaleToZeroConfig) DeepCopyInto(out *ScaleToZeroConfig) {
+	*out = *in
+	if in.ActivationScale != nil {
+		in, out := &in.ActivationScale, &out.ActivationScale
+		*out = new(int32)
+		**out = **in
+	}
+	if in.GracePeriod != nil {
+		in, out := &in.GracePeriod, &out.GracePeriod
+		*out = new(v1.Duration)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ScaleToZeroConfig.
+func (in *ScaleToZeroConfig) DeepCopy() *ScaleToZeroConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(ScaleToZeroConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *ScalingBehavior) DeepCopyInto(out *ScalingBehavior) {
 	*out = *in
diff --git a/config/crd/scaler.bud.studio_budaiscalers.yaml b/config/crd/scaler.bud.studio_budaiscalers.yaml
index f8d42d8..d78c40f 100644
--- a/config/crd/scaler.bud.studio_budaiscalers.yaml
+++ b/config/crd/scaler.bud.studio_budaiscalers.yaml
@@ -140,8 +140,8 @@ spec:
                           maxStartingPodPercent:
                             description: |-
                               MaxStartingPodPercent is the maximum percentage of total pods that can
-                              be in starting state before gating scale-up.
-                              Set to 0 to disable. Default: 0 (disabled)
+                              be in starting state before gating scale-up. Set to 0 to disable.
+                              Default: 50 (gate scale-up if more than 50% of pods are starting)
                             format: int32
                             maximum: 100
                             minimum: 0
@@ -149,8 +149,8 @@ spec:
                           maxStartingPods:
                             description: |-
                               MaxStartingPods is the maximum number of starting pods allowed before
-                              gating further scale-up operations. Set to 0 to disable the gate.
-                              Default: 0 (disabled)
+                              gating further scale-up operations. Set to 0 to disable this gate.
+                              Default: 0 (disabled, uses MaxStartingPodPercent instead)
                             format: int32
                             minimum: 0
                             type: integer
@@ -242,8 +242,8 @@ spec:
                           maxStartingPodPercent:
                             description: |-
                               MaxStartingPodPercent is the maximum percentage of total pods that can
-                              be in starting state before gating scale-up.
-                              Set to 0 to disable. Default: 0 (disabled)
+                              be in starting state before gating scale-up. Set to 0 to disable.
+                              Default: 50 (gate scale-up if more than 50% of pods are starting)
                             format: int32
                             maximum: 100
                             minimum: 0
@@ -251,8 +251,8 @@ spec:
                           maxStartingPods:
                             description: |-
                               MaxStartingPods is the maximum number of starting pods allowed before
-                              gating further scale-up operations. Set to 0 to disable the gate.
-                              Default: 0 (disabled)
+                              gating further scale-up operations. Set to 0 to disable this gate.
+                              Default: 0 (disabled, uses MaxStartingPodPercent instead)
                             format: int32
                             minimum: 0
                             type: integer
@@ -583,6 +583,26 @@ spec:
                     type: string
                 type: object
                 x-kubernetes-map-type: atomic
+              scaleToZeroConfig:
+                description: ScaleToZeroConfig configures scale-to-zero behavior.
+                properties:
+                  activationScale:
+                    default: 1
+                    description: ActivationScale is the number of replicas to scale
+                      to when waking from zero.
+                    format: int32
+                    minimum: 1
+                    type: integer
+                  enabled:
+                    default: false
+                    description: Enabled turns on scale-to-zero behavior.
+                    type: boolean
+                  gracePeriod:
+                    default: 5m
+                    description: GracePeriod is the duration to wait with zero demand
+                      before scaling to zero.
+                    type: string
+                type: object
               scalingStrategy:
                 default: BudScaler
                 description: ScalingStrategy defines the algorithm to use for scaling
@@ -910,6 +930,12 @@ spec:
                   type: object
                 maxItems: 10
                 type: array
+              zeroDemandSince:
+                description: |-
+                  ZeroDemandSince tracks when zero demand was first detected.
+                  Used for scale-to-zero grace period calculation.
+                format: date-time
+                type: string
             type: object
         type: object
     served: true
diff --git a/pkg/context/context.go b/pkg/context/context.go
index b653ca5..d9a0bcc 100644
--- a/pkg/context/context.go
+++ b/pkg/context/context.go
@@ -38,6 +38,7 @@ const (
 	DefaultPanicWindow              = 60 * time.Second
 	DefaultStableWindow             = 180 * time.Second
 	DefaultActivationScale          = int32(1)
+	DefaultScaleToZeroGrace         = 5 * time.Minute
 
 	// Starting pods defaults - enabled by default for LLM workloads with long cold starts
 	DefaultStartingPodWeight     = 0.5 // Count starting pods as 50% capacity
@@ -79,6 +80,7 @@ type ScalingContext interface {
 	// Scale-to-zero configuration
 	GetScaleToZero() bool
 	GetActivationScale() int32
+	GetScaleToZeroGrace() time.Duration
 
 	// Per-metric target values
 	GetTargetValueForMetric(metricName string) (float64, bool)
@@ -147,8 +149,9 @@ type baseScalingContext struct {
 	inPanicMode    bool
 
 	// Scale-to-zero
-	scaleToZero     bool
-	activationScale int32
+	scaleToZero      bool
+	activationScale  int32
+	scaleToZeroGrace time.Duration
 
 	// Per-metric targets
 	metricTargets map[string]float64
@@ -214,7 +217,7 @@ func NewBaseScalingContext() ScalingContext {
 		startingPodWeight:     DefaultStartingPodWeight,
 		maxStartingPods:       DefaultMaxStartingPods,
 		maxStartingPodPercent: DefaultMaxStartingPodPercent,
-		bypassGateOnPanic:     DefaultBypassGateOnPanic
+		bypassGateOnPanic:     DefaultBypassGateOnPanic,
 	}
 }
 
@@ -322,6 +325,19 @@ func NewScalingContextFromScaler(scaler *scalerv1alpha1.BudAIScaler) ScalingCont
 		}
 	}
 
+	// Parse scale-to-zero configuration from CRD (takes precedence over annotations)
+	if scaler.Spec.ScaleToZeroConfig != nil {
+		if scaler.Spec.ScaleToZeroConfig.Enabled {
+			ctx.scaleToZero = true
+		}
+		if scaler.Spec.ScaleToZeroConfig.ActivationScale != nil {
+			ctx.activationScale = *scaler.Spec.ScaleToZeroConfig.ActivationScale
+		}
+		if scaler.Spec.ScaleToZeroConfig.GracePeriod != nil {
+			ctx.scaleToZeroGrace = scaler.Spec.ScaleToZeroConfig.GracePeriod.Duration
+		}
+	}
+
 	// Parse per-metric target values
 	for _, ms := range scaler.Spec.MetricsSources {
 		if targetValue, err := strconv.ParseFloat(ms.TargetValue, 64); err == nil {
@@ -403,6 +419,11 @@ func (c *baseScalingContext) parseAnnotations(annotations map[string]string) {
 			c.activationScale = int32(scale)
 		}
 	}
+	if v, ok := annotations[types.ScaleToZeroGraceAnnotation]; ok {
+		if dur, err := time.ParseDuration(v); err == nil {
+			c.scaleToZeroGrace = dur
+		}
+	}
 }
 
 // Replica bounds implementation
@@ -437,6 +458,12 @@ func (c *baseScalingContext) SetInPanicMode(inPanic bool) { c.inPanicMode = inPa
 // Scale-to-zero implementation
 func (c *baseScalingContext) GetScaleToZero() bool      { return c.scaleToZero }
 func (c *baseScalingContext) GetActivationScale() int32 { return c.activationScale }
+func (c *baseScalingContext) GetScaleToZeroGrace() time.Duration {
+	if c.scaleToZeroGrace == 0 {
+		return DefaultScaleToZeroGrace
+	}
+	return c.scaleToZeroGrace
+}
 
 // Per-metric targets implementation
 func (c *baseScalingContext) GetTargetValueForMetric(metricName string) (float64, bool) {
diff --git a/pkg/controller/budaiscaler/algorithm/algorithm.go b/pkg/controller/budaiscaler/algorithm/algorithm.go
index 63b27db..8c882f3 100644
--- a/pkg/controller/budaiscaler/algorithm/algorithm.go
+++ b/pkg/controller/budaiscaler/algorithm/algorithm.go
@@ -70,6 +70,10 @@ type ScalingRequest struct {
 	// LastScaleTime is when scaling last occurred.
 	LastScaleTime *time.Time
 
+	// ZeroDemandSince tracks when zero demand was first detected.
+	// Used for scale-to-zero grace period calculation.
+	ZeroDemandSince *time.Time
+
 	// ScalingContext provides scaling configuration.
 	ScalingContext ScalingContextProvider
 }
@@ -95,6 +99,10 @@ type ScalingContextProvider interface {
 	GetMaxStartingPods() int32
 	GetMaxStartingPodPercent() int32
 	GetBypassGateOnPanic() bool
+	// Scale-to-zero configuration
+	GetScaleToZero() bool
+	GetActivationScale() int32
+	GetScaleToZeroGrace() time.Duration
 }
 
 // ScalingRecommendation contains the result of a scaling decision.
@@ -253,8 +261,8 @@ func ApplyScaleDownPolicies(currentReplicas, desiredReplicas int32, sctx Scaling
 	}
 
 	minReplicas := currentReplicas - maxAllowed
-	if minReplicas < 1 {
-		minReplicas = 1
+	if minReplicas < 0 {
+		minReplicas = 0
 	}
 	if desiredReplicas < minReplicas {
 		return minReplicas
diff --git a/pkg/controller/budaiscaler/algorithm/algorithm_test.go b/pkg/controller/budaiscaler/algorithm/algorithm_test.go
index 011e4fe..c1af515 100644
--- a/pkg/controller/budaiscaler/algorithm/algorithm_test.go
+++ b/pkg/controller/budaiscaler/algorithm/algorithm_test.go
@@ -18,6 +18,7 @@ package algorithm
 
 import (
 	"testing"
+	"time"
 
 	scalerv1alpha1 "github.com/BudEcosystem/scaler/api/scaler/v1alpha1"
 )
@@ -43,6 +44,10 @@ type mockScalingContext struct {
 	maxStartingPods       int32
 	maxStartingPodPercent int32
 	bypassGateOnPanic     bool
+	// Scale-to-zero config
+	scaleToZero      bool
+	activationScale  int32
+	scaleToZeroGrace time.Duration
 }
 
 func (m *mockScalingContext) GetMinReplicas() int32                 { return m.minReplicas }
@@ -73,6 +78,14 @@ func (m *mockScalingContext) GetStartingPodWeight() float64   { return m.startin
 func (m *mockScalingContext) GetMaxStartingPods() int32       { return m.maxStartingPods }
 func (m *mockScalingContext) GetMaxStartingPodPercent() int32 { return m.maxStartingPodPercent }
 func (m *mockScalingContext) GetBypassGateOnPanic() bool      { return m.bypassGateOnPanic }
+func (m *mockScalingContext) GetScaleToZero() bool            { return m.scaleToZero }
+func (m *mockScalingContext) GetActivationScale() int32       { return m.activationScale }
+func (m *mockScalingContext) GetScaleToZeroGrace() time.Duration {
+	if m.scaleToZeroGrace == 0 {
+		return 5 * time.Minute
+	}
+	return m.scaleToZeroGrace
+}
 
 func TestApplyScaleUpPolicies(t *testing.T) {
 	tests := []struct {
@@ -249,12 +262,12 @@ func TestApplyScaleDownPolicies(t *testing.T) {
 			expected:        8, // desired is within limit
 		},
 		{
-			name:            "ensure minimum 1 replica",
+			name:            "allow scale to 0 when policy permits",
 			currentReplicas: 2,
 			desiredReplicas: 0,
 			policies:        []scalerv1alpha1.ScalingPolicy{{Type: scalerv1alpha1.PodsScalingPolicy, Value: 10, PeriodSeconds: 60}},
 			selectPolicy:    scalerv1alpha1.MaxChangePolicySelect,
-			expected:        1, // minimum 1 replica
+			expected:        0, // scale-to-zero now allowed by policy
 		},
 	}
 
diff --git a/pkg/controller/budaiscaler/algorithm/budscaler.go b/pkg/controller/budaiscaler/algorithm/budscaler.go
index 31503db..92ca690 100644
--- a/pkg/controller/budaiscaler/algorithm/budscaler.go
+++ b/pkg/controller/budaiscaler/algorithm/budscaler.go
@@ -148,6 +148,32 @@ func (a *BudScalerAlgorithm) ComputeRecommendation(ctx context.Context, request
 	// Step 6: Apply min/max constraints
 	rec.DesiredReplicas = a.applyConstraints(costRec, sctx.GetMinReplicas(), sctx.GetMaxReplicas())
 
+	// Step 6.5: Handle scale-to-zero with grace period
+	if sctx.GetScaleToZero() && rec.DesiredReplicas == 0 && request.CurrentReplicas > 0 {
+		gracePeriod := sctx.GetScaleToZeroGrace()
+		if request.ZeroDemandSince != nil {
+			elapsed := time.Since(*request.ZeroDemandSince)
+			if elapsed < gracePeriod {
+				// Not enough time at zero demand, keep at 1
+				rec.DesiredReplicas = 1
+				rec.Reason = fmt.Sprintf("Scale-to-zero grace period: %v remaining", gracePeriod-elapsed)
+			}
+		} else {
+			// First detection of zero demand, keep at 1 and start tracking
+			rec.DesiredReplicas = 1
+			rec.Reason = "Zero demand detected, starting grace period"
+		}
+	}
+
+	// Step 6.6: Apply activation scale when scaling from zero
+	if request.CurrentReplicas == 0 && rec.DesiredReplicas > 0 {
+		activationScale := sctx.GetActivationScale()
+		if activationScale > rec.DesiredReplicas {
+			rec.DesiredReplicas = activationScale
+			rec.Reason = fmt.Sprintf("Scaling from zero to activation scale %d", activationScale)
+		}
+	}
+
 	// Step 7: Apply scaling policies (limit rate of change)
 	if rec.DesiredReplicas > request.CurrentReplicas {
 		rec.DesiredReplicas = ApplyScaleUpPolicies(request.CurrentReplicas, rec.DesiredReplicas, sctx)
@@ -328,8 +354,13 @@ func (a *BudScalerAlgorithm) calculateDesiredForMetric(currentValue, targetValue
 		}
 	}
 
-	if desired < 1 {
-		desired = 1
+	// Apply minimum based on scale-to-zero configuration
+	minAllowed := int32(1)
+	if sctx.GetScaleToZero() && sctx.GetMinReplicas() == 0 {
+		minAllowed = 0
+	}
+	if desired < minAllowed {
+		desired = minAllowed
 	}
 
 	return desired
@@ -375,9 +406,13 @@ func (a *BudScalerAlgorithm) calculateGPUBasedRecommendation(request ScalingRequ
 	desiredFloat := float64(request.CurrentReplicas) * ratio
 	desired := int32(math.Ceil(desiredFloat))
 
-	// Apply constraints
-	if desired < 1 {
-		desired = 1
+	// Apply minimum based on scale-to-zero configuration
+	minAllowed := int32(1)
+	if sctx.GetScaleToZero() && sctx.GetMinReplicas() == 0 {
+		minAllowed = 0
+	}
+	if desired < minAllowed {
+		desired = minAllowed
 	}
 
 	return desired
@@ -461,8 +496,8 @@ func (a *BudScalerAlgorithm) applyPredictionAdjustment(desired int32, request Sc
 	adjustedFloat := float64(desired)*(1-weight) + float64(predictedReplicas)*weight
 	adjusted := int32(math.Round(adjustedFloat))
 
-	if adjusted < 1 {
-		adjusted = 1
+	if adjusted < 0 {
+		adjusted = 0
 	}
 
 	klog.V(5).InfoS("Applied prediction adjustment",
diff --git a/pkg/controller/budaiscaler/algorithm/kpa.go b/pkg/controller/budaiscaler/algorithm/kpa.go
index a3ecc74..cbca708 100644
--- a/pkg/controller/budaiscaler/algorithm/kpa.go
+++ b/pkg/controller/budaiscaler/algorithm/kpa.go
@@ -130,6 +130,30 @@ func (a *KPAAlgorithm) ComputeRecommendation(ctx context.Context, request Scalin
 	// Apply min/max constraints
 	rec.DesiredReplicas = a.applyConstraints(maxDesired, sctx.GetMinReplicas(), sctx.GetMaxReplicas())
 
+	// Handle scale-to-zero with grace period
+	if sctx.GetScaleToZero() && rec.DesiredReplicas == 0 && request.CurrentReplicas > 0 {
+		gracePeriod := sctx.GetScaleToZeroGrace()
+		if request.ZeroDemandSince != nil {
+			elapsed := time.Since(*request.ZeroDemandSince)
+			if elapsed < gracePeriod {
+				// Not enough time at zero demand, keep at 1
+				rec.DesiredReplicas = 1
+			}
+		} else {
+			// First detection of zero demand, keep at 1 and start tracking
+			rec.DesiredReplicas = 1
+		}
+	}
+
+	// Apply activation scale when scaling from zero
+	if request.CurrentReplicas == 0 && rec.DesiredReplicas > 0 {
+		activationScale := sctx.GetActivationScale()
+		if activationScale > rec.DesiredReplicas {
+			rec.DesiredReplicas = activationScale
+			rec.Reason = fmt.Sprintf("Scaling from zero to activation scale %d", activationScale)
+		}
+	}
+
 	// Apply scaling policies (limit rate of change)
 	if rec.DesiredReplicas > request.CurrentReplicas && !inPanicMode {
 		rec.DesiredReplicas = ApplyScaleUpPolicies(request.CurrentReplicas, rec.DesiredReplicas, sctx)
@@ -265,9 +289,13 @@ func (a *KPAAlgorithm) calculateDesiredReplicas(
 		}
 	}
 
-	// Ensure at least 1 replica
-	if desired < 1 {
-		desired = 1
+	// Apply minimum based on scale-to-zero configuration
+	minAllowed := int32(1)
+	if sctx.GetScaleToZero() && sctx.GetMinReplicas() == 0 {
+		minAllowed = 0
+	}
+	if desired < minAllowed {
+		desired = minAllowed
 	}
 
 	return desired, inPanicMode
diff --git a/pkg/controller/budaiscaler/autoscaler.go b/pkg/controller/budaiscaler/autoscaler.go
index 7cfecdd..3773f0c 100644
--- a/pkg/controller/budaiscaler/autoscaler.go
+++ b/pkg/controller/budaiscaler/autoscaler.go
@@ -153,6 +153,7 @@ func (a *AutoScaler) Scale(ctx context.Context, scaler *scalerv1alpha1.BudAIScal
 		StartingPodCount: startingPodCount,
 		MetricSnapshots:  metricSnapshots,
 		ScalingContext:   scalingCtx,
+		ZeroDemandSince:  a.getZeroDemandSince(scaler),
 	}
 
 	klog.V(4).InfoS("Pod counts", "scaler", scaler.Name,
@@ -747,3 +748,12 @@ func (a *AutoScaler) CleanupLearningSystem(scaler *scalerv1alpha1.BudAIScaler) {
 	key := scaler.Namespace + "/" + scaler.Name
 	delete(a.learningSystems, key)
 }
+
+// getZeroDemandSince returns the time when zero demand was first detected.
+func (a *AutoScaler) getZeroDemandSince(scaler *scalerv1alpha1.BudAIScaler) *time.Time {
+	if scaler.Status.ZeroDemandSince != nil {
+		t := scaler.Status.ZeroDemandSince.Time
+		return &t
+	}
+	return nil
+}
diff --git a/pkg/controller/budaiscaler/budaiscaler_controller.go b/pkg/controller/budaiscaler/budaiscaler_controller.go
index c9bf1bf..727154a 100644
--- a/pkg/controller/budaiscaler/budaiscaler_controller.go
+++ b/pkg/controller/budaiscaler/budaiscaler_controller.go
@@ -299,6 +299,20 @@ func (r *BudAIScalerReconciler) reconcileCustomScaler(ctx context.Context, scale
 	scaler.Status.ActualScale = result.DesiredReplicas
 	scaler.Status.DesiredScale = result.DesiredReplicas
 
+	// Track zero demand state for grace period
+	if result.Recommendation != nil {
+		if result.Recommendation.DesiredReplicas == 0 && result.CurrentReplicas > 0 {
+			// Entering zero demand state
+			if scaler.Status.ZeroDemandSince == nil {
+				zeroDemandTimestamp := metav1.Now()
+				scaler.Status.ZeroDemandSince = &zeroDemandTimestamp
+			}
+		} else if result.Recommendation.DesiredReplicas > 0 {
+			// Exiting zero demand state
+			scaler.Status.ZeroDemandSince = nil
+		}
+	}
+
 	if result.Scaled {
 		scaler.Status.LastScaleTime = &now
 
diff --git a/pkg/metrics/collector.go b/pkg/metrics/collector.go
index 1ccc57e..a11b631 100644
--- a/pkg/metrics/collector.go
+++ b/pkg/metrics/collector.go
@@ -47,7 +47,20 @@ func NewDefaultMetricCollector(factory MetricFetcherFactory, aggregator *aggrega
 // CollectMetrics collects metrics from all pods and returns a snapshot.
 func (c *DefaultMetricCollector) CollectMetrics(ctx context.Context, pods []corev1.Pod, source scalerv1alpha1.MetricSource) (*types.MetricSnapshot, error) {
 	if len(pods) == 0 {
-		return nil, fmt.Errorf("no pods to collect metrics from")
+		// External sources can still be collected without pods
+		if IsExternalSource(source.MetricSourceType) {
+			return c.collectExternalMetrics(ctx, pods, source)
+		}
+		// For pod-based sources, return empty snapshot (zero demand indicator)
+		return &types.MetricSnapshot{
+			Values:    make(map[string]types.MetricValue),
+			Timestamp: time.Now(),
+			Average:   0,
+			Sum:       0,
+			Min:       0,
+			Max:       0,
+			Count:     0,
+		}, nil
 	}
 
 	// Check if this is an external source
diff --git a/pkg/types/annotations.go b/pkg/types/annotations.go
index 0f8d6e6..fd707cf 100644
--- a/pkg/types/annotations.go
+++ b/pkg/types/annotations.go
@@ -110,6 +110,10 @@ const (
 	// ActivationScaleAnnotation defines the minimum non-zero scale.
 	// Value format: integer (e.g., "1", "2").
 	ActivationScaleAnnotation = AnnotationPrefix + "activation-scale"
+
+	// ScaleToZeroGraceAnnotation defines the grace period before scaling to zero.
+	// Value format: duration string (e.g., "5m", "300s").
+	ScaleToZeroGraceAnnotation = AnnotationPrefix + "scale-to-zero-grace"
 )
 
 // Label constants.