Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ members = ["crates/*"]
resolver = "2"

[workspace.package]
version = "0.1.113"
version = "0.1.114"
edition = "2024"
rust-version = "1.85"
license = "Apache-2.0"
Expand Down
15 changes: 13 additions & 2 deletions crates/cli-sub-agent/src/debate_cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,19 @@ pub(crate) async fn handle_debate(
)
.await?;

// 7. Get env injection from global config
let extra_env = global_config.env_vars(executor.tool_name());
// 7. Get env injection from global config (with no-flash + api key fallback)
let extra_env_owned = {
let mut env = global_config
.env_vars(executor.tool_name())
.cloned()
.unwrap_or_default();
env.insert("_CSA_NO_FLASH_FALLBACK".to_string(), "1".to_string());
if let Some(key) = global_config.api_key_fallback(executor.tool_name()) {
env.insert("_CSA_API_KEY_FALLBACK".to_string(), key.to_string());
}
env
};
let extra_env = Some(&extra_env_owned);
let idle_timeout_seconds =
crate::pipeline::resolve_idle_timeout_seconds(config.as_ref(), args.idle_timeout);

Expand Down
13 changes: 12 additions & 1 deletion crates/cli-sub-agent/src/review_cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,18 @@ async fn execute_review(
prompt
};

let extra_env = global_config.env_vars(executor.tool_name());
let extra_env_owned = {
let mut env = global_config
.env_vars(executor.tool_name())
.cloned()
.unwrap_or_default();
env.insert("_CSA_NO_FLASH_FALLBACK".to_string(), "1".to_string());
if let Some(key) = global_config.api_key_fallback(executor.tool_name()) {
env.insert("_CSA_API_KEY_FALLBACK".to_string(), key.to_string());
}
env
};
let extra_env = Some(&extra_env_owned);
let _slot_guard = crate::pipeline::acquire_slot(&executor, global_config)?;

if session.is_none() {
Expand Down
40 changes: 21 additions & 19 deletions crates/csa-executor/src/transport.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ pub struct LegacyTransport {
}

const GEMINI_RATE_LIMIT_MAX_ATTEMPTS: u8 = 3;
const GEMINI_RATE_LIMIT_NO_FLASH_ATTEMPTS: u8 = 2;
const API_KEY_FALLBACK_ENV_KEY: &str = "_CSA_API_KEY_FALLBACK";
const NO_FLASH_FALLBACK_ENV_KEY: &str = "_CSA_NO_FLASH_FALLBACK";
const GEMINI_API_KEY_ENV: &str = "GEMINI_API_KEY";
#[cfg(test)]
const GEMINI_RATE_LIMIT_BASE_BACKOFF_MS: u64 = 10;
Expand All @@ -96,16 +98,26 @@ impl LegacyTransport {
&self,
execution: &ExecutionResult,
attempt: u8,
extra_env: Option<&HashMap<String, String>>,
) -> Option<Duration> {
let max = if Self::is_no_flash(extra_env) {
GEMINI_RATE_LIMIT_NO_FLASH_ATTEMPTS
} else {
GEMINI_RATE_LIMIT_MAX_ATTEMPTS
};
if !matches!(self.executor, Executor::GeminiCli { .. })
|| attempt >= GEMINI_RATE_LIMIT_MAX_ATTEMPTS
|| attempt >= max
|| !Self::is_gemini_rate_limited(execution)
{
return None;
}
Some(Self::gemini_rate_limit_backoff(attempt))
}

fn is_no_flash(extra_env: Option<&HashMap<String, String>>) -> bool {
extra_env.is_some_and(|env| env.contains_key(NO_FLASH_FALLBACK_ENV_KEY))
}

fn is_gemini_rate_limited(execution: &ExecutionResult) -> bool {
if execution.exit_code == 0 {
return false;
Expand Down Expand Up @@ -294,23 +306,18 @@ impl LegacyTransport {
idle_timeout_seconds,
)
.await?;
if let Some(backoff) = self.should_retry_gemini_rate_limited(&result.execution, attempt)
if let Some(backoff) =
self.should_retry_gemini_rate_limited(&result.execution, attempt, extra_env)
{
tracing::debug!(
attempt,
max_attempts = GEMINI_RATE_LIMIT_MAX_ATTEMPTS,
"gemini-cli rate limit; retrying with model switch"
);
tracing::debug!(attempt, "gemini-cli rate limit; retrying with model switch");
tokio::time::sleep(backoff).await;
attempt = attempt.saturating_add(1);
continue;
}
// API key fallback: all model retries exhausted, still quota error.
if Self::is_gemini_rate_limited(&result.execution) {
if let Some(env_with_key) = Self::inject_api_key_fallback(extra_env) {
tracing::info!(
"gemini-cli quota exhausted after all retries; falling back to API key auth"
);
tracing::info!("gemini-cli quota exhausted; falling back to API key auth");
return self
.execute_in_single_attempt(
&self.executor,
Expand Down Expand Up @@ -351,23 +358,18 @@ impl Transport for LegacyTransport {
options.clone(),
)
.await?;
if let Some(backoff) = self.should_retry_gemini_rate_limited(&result.execution, attempt)
if let Some(backoff) =
self.should_retry_gemini_rate_limited(&result.execution, attempt, extra_env)
{
tracing::debug!(
attempt,
max_attempts = GEMINI_RATE_LIMIT_MAX_ATTEMPTS,
"gemini-cli rate limit; retrying with model switch"
);
tracing::debug!(attempt, "gemini-cli rate limit; retrying with model switch");
tokio::time::sleep(backoff).await;
attempt = attempt.saturating_add(1);
continue;
}
// API key fallback: all model retries exhausted, still quota error.
if Self::is_gemini_rate_limited(&result.execution) {
if let Some(env_with_key) = Self::inject_api_key_fallback(extra_env) {
tracing::info!(
"gemini-cli quota exhausted after all retries; falling back to API key auth"
);
tracing::info!("gemini-cli quota exhausted; falling back to API key auth");
return self
.execute_single_attempt(
&self.executor,
Expand Down
32 changes: 27 additions & 5 deletions crates/csa-executor/src/transport_tests_tail.rs
Original file line number Diff line number Diff line change
Expand Up @@ -378,9 +378,9 @@ fn test_should_retry_gemini_rate_limited_until_final_attempt() {
exit_code: 1,
};

assert!(transport.should_retry_gemini_rate_limited(&execution, 1).is_some());
assert!(transport.should_retry_gemini_rate_limited(&execution, 2).is_some());
assert!(transport.should_retry_gemini_rate_limited(&execution, 3).is_none());
assert!(transport.should_retry_gemini_rate_limited(&execution, 1, None).is_some());
assert!(transport.should_retry_gemini_rate_limited(&execution, 2, None).is_some());
assert!(transport.should_retry_gemini_rate_limited(&execution, 3, None).is_none());
}

#[test]
Expand All @@ -395,7 +395,7 @@ fn test_should_not_retry_on_success_exit_code() {
stderr_output: String::new(),
exit_code: 0,
};
assert!(transport.should_retry_gemini_rate_limited(&execution, 1).is_none());
assert!(transport.should_retry_gemini_rate_limited(&execution, 1, None).is_none());
}

#[test]
Expand All @@ -410,7 +410,29 @@ fn test_should_retry_on_quota_exhausted_marker() {
stderr_output: "reason: 'QUOTA_EXHAUSTED'".to_string(),
exit_code: 1,
};
assert!(transport.should_retry_gemini_rate_limited(&execution, 1).is_some());
assert!(transport.should_retry_gemini_rate_limited(&execution, 1, None).is_some());
}

#[test]
fn test_no_flash_fallback_stops_retry_after_attempt_2() {
let transport = LegacyTransport::new(Executor::GeminiCli {
model_override: None,
thinking_budget: None,
});
let execution = ExecutionResult {
summary: "failed".to_string(),
output: String::new(),
stderr_output: "HTTP 429 Too Many Requests".to_string(),
exit_code: 1,
};
let mut env = HashMap::new();
env.insert("_CSA_NO_FLASH_FALLBACK".to_string(), "1".to_string());
// Attempt 1 retries (switches to pro)
assert!(transport.should_retry_gemini_rate_limited(&execution, 1, Some(&env)).is_some());
// Attempt 2 does NOT retry (would switch to flash, which is forbidden)
assert!(transport.should_retry_gemini_rate_limited(&execution, 2, Some(&env)).is_none());
// Without the flag, attempt 2 would still retry
assert!(transport.should_retry_gemini_rate_limited(&execution, 2, None).is_some());
}

#[test]
Expand Down
Loading