fix(provider): apply reasoning_effort to ZAI providers via SetZaiThinking

tmchow · tmchow · commit 785dbea5b994 · 2026-04-29T08:24:00.000-07:00
diff --git a/crates/forge_app/src/dto/openai/transformers/zai_reasoning.rs b/crates/forge_app/src/dto/openai/transformers/zai_reasoning.rs
@@ -13,31 +13,50 @@ use crate::dto::openai::{Request, ThinkingConfig, ThinkingType};
 ///
 /// - If `reasoning.enabled == Some(true)` → `thinking = {"type": "enabled"}`
 /// - If `reasoning.enabled == Some(false)` → `thinking = {"type": "disabled"}`
-/// - If `reasoning` is None or `enabled` is None → no `thinking` field added
+/// - If `reasoning.enabled == None` and `reasoning.effort ==
+///   Some(Effort::None)` → `thinking = {"type": "disabled"}`. The orchestrator
+///   preserves this opt-out for z.ai providers so it can be mapped here.
+/// - If `reasoning.enabled == None` and `reasoning.effort` is any other value →
+///   `thinking = {"type": "enabled"}`
+/// - If `reasoning` is None or both `enabled` and `effort` are None → no
+///   `thinking` field added
 /// - Original `reasoning` field is removed after transformation
 ///
 /// # Note
 ///
-/// Z.ai only supports enabled/disabled state. Other ReasoningConfig fields
-/// (`max_tokens`, `effort`, `exclude`) are ignored as they are not supported by
-/// z.ai's API.
+/// Z.ai only supports enabled/disabled state. `effort` is mapped to that
+/// on/off state when `enabled` is unset. Other ReasoningConfig fields
+/// (`max_tokens`, `exclude`) are ignored as they are not supported by z.ai's
+/// API.
 pub struct SetZaiThinking;
 
 impl Transformer for SetZaiThinking {
     type Value = Request;
 
     fn transform(&mut self, mut request: Self::Value) -> Self::Value {
-        // Check if reasoning config exists and has enabled field set
-        if let Some(reasoning) = request.reasoning.take()
-            && let Some(enabled) = reasoning.enabled
-        {
-            request.thinking = Some(ThinkingConfig {
-                r#type: if enabled {
-                    ThinkingType::Enabled
-                } else {
-                    ThinkingType::Disabled
-                },
-            });
+        if let Some(reasoning) = request.reasoning.take() {
+            // Effort::None is a strong opt-out and wins over `enabled`, matching
+            // Context::is_reasoning_supported's resolution order in
+            // forge_domain. Without this, an explicit `:reasoning-effort none`
+            // followed later by `enabled: true` (e.g. from defaults) would
+            // re-enable thinking, contradicting the user's most recent intent.
+            let enabled = if matches!(reasoning.effort, Some(forge_domain::Effort::None)) {
+                Some(false)
+            } else if reasoning.enabled.is_some() {
+                reasoning.enabled
+            } else {
+                reasoning.effort.map(|_| true)
+            };
+
+            if let Some(enabled) = enabled {
+                request.thinking = Some(ThinkingConfig {
+                    r#type: if enabled {
+                        ThinkingType::Enabled
+                    } else {
+                        ThinkingType::Disabled
+                    },
+                });
+            }
         }
 
         request
@@ -111,6 +130,79 @@ mod tests {
         assert_eq!(actual.reasoning, None);
     }
 
+    #[test]
+    fn test_effort_none_overrides_enabled_true() {
+        // Matches Context::is_reasoning_supported precedence: Effort::None wins
+        // over enabled: true. Otherwise an opt-out at the effort level would
+        // be silently re-enabled by a stale `enabled: true` from defaults.
+        let fixture = Request::default().reasoning(forge_domain::ReasoningConfig {
+            enabled: Some(true),
+            effort: Some(forge_domain::Effort::None),
+            max_tokens: None,
+            exclude: None,
+        });
+
+        let mut transformer = SetZaiThinking;
+        let actual = transformer.transform(fixture);
+
+        let expected_thinking = Some(ThinkingConfig { r#type: ThinkingType::Disabled });
+        assert_eq!(actual.thinking, expected_thinking);
+        assert_eq!(actual.reasoning, None);
+    }
+
+    #[test]
+    fn test_reasoning_effort_none_converts_to_thinking_disabled() {
+        let fixture = Request::default().reasoning(forge_domain::ReasoningConfig {
+            enabled: None,
+            effort: Some(forge_domain::Effort::None),
+            max_tokens: None,
+            exclude: None,
+        });
+
+        let mut transformer = SetZaiThinking;
+        let actual = transformer.transform(fixture);
+
+        let expected_thinking = Some(ThinkingConfig { r#type: ThinkingType::Disabled });
+        assert_eq!(actual.thinking, expected_thinking);
+        assert_eq!(actual.reasoning, None);
+    }
+
+    #[test]
+    fn test_glm5_effort_none_passthrough_converts_to_thinking_disabled() {
+        let fixture = Request::default()
+            .model(forge_domain::ModelId::new("glm-5"))
+            .reasoning(forge_domain::ReasoningConfig {
+                enabled: None,
+                effort: Some(forge_domain::Effort::None),
+                max_tokens: None,
+                exclude: None,
+            });
+
+        let mut transformer = SetZaiThinking;
+        let actual = transformer.transform(fixture);
+
+        let expected_thinking = Some(ThinkingConfig { r#type: ThinkingType::Disabled });
+        assert_eq!(actual.thinking, expected_thinking);
+        assert_eq!(actual.reasoning, None);
+    }
+
+    #[test]
+    fn test_reasoning_effort_high_converts_to_thinking_enabled() {
+        let fixture = Request::default().reasoning(forge_domain::ReasoningConfig {
+            enabled: None,
+            effort: Some(forge_domain::Effort::High),
+            max_tokens: None,
+            exclude: None,
+        });
+
+        let mut transformer = SetZaiThinking;
+        let actual = transformer.transform(fixture);
+
+        let expected_thinking = Some(ThinkingConfig { r#type: ThinkingType::Enabled });
+        assert_eq!(actual.thinking, expected_thinking);
+        assert_eq!(actual.reasoning, None);
+    }
+
     #[test]
     fn test_reasoning_with_max_tokens_ignores_max_tokens() {
         let fixture = Request::default().reasoning(forge_domain::ReasoningConfig {
diff --git a/crates/forge_app/src/orch.rs b/crates/forge_app/src/orch.rs
@@ -11,9 +11,15 @@ use tokio::sync::Notify;
 use tracing::warn;
 
 use crate::agent::AgentService;
-use crate::transformers::{DropReasoningOnlyMessages, ModelSpecificReasoning};
+use crate::transformers::{
+    DropMessageReasoningDetails, DropReasoningOnlyMessages, ModelSpecificReasoning,
+};
 use crate::{EnvironmentInfra, TemplateEngine};
 
+fn is_zai_provider_id(id: &ProviderId) -> bool {
+    id == &ProviderId::ZAI || id == &ProviderId::ZAI_CODING
+}
+
 #[derive(Clone, Setters)]
 #[setters(into)]
 pub struct Orchestrator<S> {
@@ -198,15 +204,26 @@ impl<S: AgentService + EnvironmentInfra<Config = forge_config::ForgeConfig>> Orc
         model_id: &ModelId,
         context: Context,
         reasoning_supported: bool,
+        provider_id: &ProviderId,
     ) -> anyhow::Result<ChatCompletionMessageFull> {
         let tool_supported = self.is_tool_supported()?;
         let mut transformers = DefaultTransformation::default()
             .pipe(SortTools::new(self.agent.tool_order()))
             .pipe(NormalizeToolCallArguments::new())
             .pipe(TransformToolCalls::new().when(|_| !tool_supported))
             .pipe(ImageHandling::new())
-            // Drop ALL reasoning (including config) when reasoning is not supported by the model
-            .pipe(DropReasoningDetails.when(|_| !reasoning_supported))
+            // Drop ALL reasoning (including config) when reasoning is not supported by the model.
+            // For z.ai providers we still want to clear stale per-message reasoning_details so they
+            // don't replay against an opted-out turn, but we keep `context.reasoning` so
+            // `SetZaiThinking` can map it to `thinking: { type: disabled }` downstream.
+            .pipe(
+                DropReasoningDetails
+                    .when(|_| !reasoning_supported && !is_zai_provider_id(provider_id)),
+            )
+            .pipe(
+                DropMessageReasoningDetails
+                    .when(|_| !reasoning_supported && is_zai_provider_id(provider_id)),
+            )
             // Strip all reasoning from messages when the model has changed (signatures are
             // model-specific and invalid across models). No-op when model is unchanged.
             .pipe(ReasoningNormalizer::new(model_id.clone()))
@@ -286,6 +303,7 @@ impl<S: AgentService + EnvironmentInfra<Config = forge_config::ForgeConfig>> Orc
                         &model_id,
                         context.clone(),
                         context.is_reasoning_supported(),
+                        &self.agent.provider,
                     )
                 },
                 self.sender.as_ref().map(|sender| {
diff --git a/crates/forge_app/src/transformers/drop_message_reasoning_details.rs b/crates/forge_app/src/transformers/drop_message_reasoning_details.rs
@@ -0,0 +1,66 @@
+use forge_domain::{Context, ContextMessage, Transformer};
+
+/// Strips assistant `reasoning_details` from every message but leaves
+/// `context.reasoning` (the per-turn reasoning config) intact.
+///
+/// Used on the z.ai provider path: when reasoning is opted out (e.g.
+/// `:reasoning-effort none`), the orchestrator must keep `context.reasoning`
+/// alive so [`SetZaiThinking`] can map it to `thinking: { type: disabled }`.
+/// At the same time, prior assistant `reasoning_details` from earlier turns
+/// must NOT replay into the request, otherwise the user's opt-out is
+/// undermined and the upstream API may reject unsupported fields.
+///
+/// Compare to `forge_domain::DropReasoningDetails`, which clears both the
+/// per-message details and `context.reasoning`.
+pub(crate) struct DropMessageReasoningDetails;
+
+impl Transformer for DropMessageReasoningDetails {
+    type Value = Context;
+
+    fn transform(&mut self, mut context: Self::Value) -> Self::Value {
+        context.messages.iter_mut().for_each(|message| {
+            if let ContextMessage::Text(text) = &mut **message {
+                text.reasoning_details = None;
+            }
+        });
+        context
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use forge_domain::{ContextMessage, ReasoningConfig, ReasoningFull, Role, TextMessage};
+
+    use super::*;
+
+    fn make_reasoning() -> Vec<ReasoningFull> {
+        vec![ReasoningFull {
+            text: Some("prior thinking".to_string()),
+            signature: None,
+            ..Default::default()
+        }]
+    }
+
+    #[test]
+    fn drops_message_reasoning_details_but_keeps_context_reasoning() {
+        let reasoning = make_reasoning();
+        let fixture = Context::default()
+            .add_message(ContextMessage::Text(
+                TextMessage::new(Role::Assistant, "hi").reasoning_details(reasoning),
+            ))
+            .reasoning(ReasoningConfig { enabled: Some(false), ..Default::default() });
+
+        let mut transformer = DropMessageReasoningDetails;
+        let actual = transformer.transform(fixture);
+
+        let cleared = actual.messages.iter().all(|entry| match &**entry {
+            ContextMessage::Text(t) => t.reasoning_details.is_none(),
+            _ => true,
+        });
+        assert!(cleared, "all message reasoning_details should be cleared");
+        assert!(
+            actual.reasoning.is_some(),
+            "context.reasoning must survive so SetZaiThinking can map it"
+        );
+    }
+}
diff --git a/crates/forge_app/src/transformers/mod.rs b/crates/forge_app/src/transformers/mod.rs
@@ -1,11 +1,13 @@
 mod compaction;
 mod dedupe_role;
+mod drop_message_reasoning_details;
 mod drop_reasoning_only_messages;
 mod drop_role;
 mod model_specific_reasoning;
 mod strip_working_dir;
 mod trim_context_summary;
 
 pub use compaction::SummaryTransformer;
+pub(crate) use drop_message_reasoning_details::DropMessageReasoningDetails;
 pub(crate) use drop_reasoning_only_messages::DropReasoningOnlyMessages;
 pub(crate) use model_specific_reasoning::ModelSpecificReasoning;