Skip to content

Commit b6aee7a

Browse files
committed
feat: /evaluate endpoint using stored agent + TRUSTED_VALIDATORS whitelist
1 parent 3db10c4 commit b6aee7a

File tree

4 files changed

+198
-0
lines changed

4 files changed

+198
-0
lines changed

src/config.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ pub struct Config {
3232
pub consensus_ttl_secs: u64,
3333
pub max_pending_consensus: usize,
3434
pub sudo_password: Option<String>,
35+
pub trusted_validators: Vec<String>,
3536
}
3637

3738
impl Config {
@@ -75,6 +76,12 @@ impl Config {
7576
sudo_password: std::env::var("SUDO_PASSWORD")
7677
.ok()
7778
.filter(|s| !s.is_empty()),
79+
trusted_validators: std::env::var("TRUSTED_VALIDATORS")
80+
.unwrap_or_default()
81+
.split(',')
82+
.map(|s| s.trim().to_string())
83+
.filter(|s| !s.is_empty())
84+
.collect(),
7885
})
7986
}
8087

src/handlers.rs

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ pub fn router(state: Arc<AppState>) -> Router {
5454
.route("/instance", get(instance_info))
5555
.route("/dataset", get(fetch_dataset))
5656
.route("/submit_tasks", post(submit_tasks))
57+
.route("/evaluate", post(evaluate_with_stored_agent))
5758
.route("/ws", get(ws::ws_handler))
5859
.with_state(state)
5960
}
@@ -1095,3 +1096,179 @@ async fn submit_tasks(
10951096
})),
10961097
))
10971098
}
1099+
1100+
/// Evaluate using the stored agent archive (from /upload-agent).
1101+
/// Accepts JSON body: { "task_ids": [...], "split": "train" }
1102+
/// Auth: validator hotkey OR sudo password.
1103+
async fn evaluate_with_stored_agent(
1104+
State(state): State<Arc<AppState>>,
1105+
headers: axum::http::HeaderMap,
1106+
Json(body): Json<serde_json::Value>,
1107+
) -> Result<impl IntoResponse, (StatusCode, Json<serde_json::Value>)> {
1108+
// Auth: try validator hotkey first, then sudo password
1109+
let mut authed = false;
1110+
1111+
if let Some(auth_headers) = auth::extract_auth_headers(&headers) {
1112+
authed = state
1113+
.config
1114+
.trusted_validators
1115+
.contains(&auth_headers.hotkey)
1116+
|| (state.validator_whitelist.validator_count() > 0
1117+
&& auth::verify_request(
1118+
&auth_headers,
1119+
&state.nonce_store,
1120+
&state.validator_whitelist,
1121+
)
1122+
.is_ok());
1123+
}
1124+
1125+
if !authed {
1126+
if let Some(password) = headers
1127+
.get("X-Password")
1128+
.or_else(|| headers.get("x-password"))
1129+
.and_then(|v| v.to_str().ok())
1130+
{
1131+
if let Some(ref sudo_pw) = state.config.sudo_password {
1132+
if constant_time_eq(password.as_bytes(), sudo_pw.as_bytes()) {
1133+
authed = true;
1134+
}
1135+
}
1136+
}
1137+
}
1138+
1139+
if !authed {
1140+
return Err((
1141+
StatusCode::UNAUTHORIZED,
1142+
Json(
1143+
serde_json::json!({"error": "unauthorized", "message": "Valid validator hotkey or sudo password required"}),
1144+
),
1145+
));
1146+
}
1147+
1148+
// Parse task_ids
1149+
let task_ids: Vec<String> = body
1150+
.get("task_ids")
1151+
.and_then(|v| serde_json::from_value(v.clone()).ok())
1152+
.unwrap_or_default();
1153+
let split = body
1154+
.get("split")
1155+
.and_then(|v| v.as_str())
1156+
.unwrap_or("train")
1157+
.to_string();
1158+
1159+
if task_ids.is_empty() || task_ids.len() > 50 {
1160+
return Err((
1161+
StatusCode::BAD_REQUEST,
1162+
Json(serde_json::json!({"error": "task_ids must have 1-50 entries"})),
1163+
));
1164+
}
1165+
1166+
// Get stored agent archive
1167+
let archive_bytes = {
1168+
let guard = state.agent_archive.read().await;
1169+
guard.clone().ok_or_else(|| {
1170+
(
1171+
StatusCode::PRECONDITION_FAILED,
1172+
Json(serde_json::json!({"error": "no_agent", "message": "No agent uploaded yet. Use /upload-agent first."})),
1173+
)
1174+
})?
1175+
};
1176+
1177+
// Fetch dataset from HuggingFace
1178+
let hf_client = crate::swe_forge::client::HuggingFaceClient::new().map_err(|e| {
1179+
(
1180+
StatusCode::INTERNAL_SERVER_ERROR,
1181+
Json(serde_json::json!({"error": format!("HF client error: {}", e)})),
1182+
)
1183+
})?;
1184+
1185+
let dataset_config = crate::swe_forge::types::DatasetConfig {
1186+
dataset_id: "CortexLM/swe-forge".to_string(),
1187+
split,
1188+
limit: 100,
1189+
offset: 0,
1190+
};
1191+
1192+
let dataset = hf_client
1193+
.fetch_dataset(&dataset_config)
1194+
.await
1195+
.map_err(|e| {
1196+
(
1197+
StatusCode::BAD_GATEWAY,
1198+
Json(serde_json::json!({"error": format!("Failed to fetch HF dataset: {}", e)})),
1199+
)
1200+
})?;
1201+
1202+
let matched: Vec<&crate::swe_forge::types::DatasetEntry> = dataset
1203+
.entries
1204+
.iter()
1205+
.filter(|e| task_ids.contains(&e.instance_id))
1206+
.collect();
1207+
1208+
if matched.is_empty() {
1209+
return Err((
1210+
StatusCode::NOT_FOUND,
1211+
Json(
1212+
serde_json::json!({"error": "No matching tasks found", "available": dataset.entries.len()}),
1213+
),
1214+
));
1215+
}
1216+
1217+
// Build tasks from HF
1218+
let mut registry = crate::task::registry::TaskRegistry::new();
1219+
let hf_dataset = crate::swe_forge::types::HuggingFaceDataset {
1220+
dataset_id: dataset.dataset_id.clone(),
1221+
split: dataset.split.clone(),
1222+
entries: matched.into_iter().cloned().collect(),
1223+
total_count: dataset.total_count,
1224+
};
1225+
registry.load_from_huggingface(&hf_dataset).map_err(|e| {
1226+
(
1227+
StatusCode::INTERNAL_SERVER_ERROR,
1228+
Json(serde_json::json!({"error": format!("Failed to load tasks: {}", e)})),
1229+
)
1230+
})?;
1231+
1232+
// Extract agent code
1233+
let extract_dir = state.config.workspace_base.join("_extract_evaluate");
1234+
let _ = tokio::fs::remove_dir_all(&extract_dir).await;
1235+
let extracted = crate::task::extract_uploaded_archive(&archive_bytes, &extract_dir)
1236+
.await
1237+
.map_err(|e| {
1238+
(
1239+
StatusCode::BAD_REQUEST,
1240+
Json(serde_json::json!({"error": format!("Failed to extract agent: {}", e)})),
1241+
)
1242+
})?;
1243+
let _ = tokio::fs::remove_dir_all(&extract_dir).await;
1244+
1245+
let hf_tasks: Vec<crate::task::SweForgeTask> = registry.get_tasks().to_vec();
1246+
let final_archive = crate::task::ExtractedArchive {
1247+
tasks: hf_tasks,
1248+
agent_code: extracted.agent_code,
1249+
agent_language: extracted.agent_language,
1250+
};
1251+
1252+
if state.sessions.has_active_batch() {
1253+
return Err((
1254+
StatusCode::SERVICE_UNAVAILABLE,
1255+
Json(serde_json::json!({"error": "A batch is already running. Try again later."})),
1256+
));
1257+
}
1258+
1259+
let total_tasks = final_archive.tasks.len();
1260+
let batch = state.sessions.create_batch(total_tasks);
1261+
let batch_id = batch.id.clone();
1262+
let concurrent = state.config.max_concurrent_tasks;
1263+
1264+
state.executor.spawn_batch(batch, final_archive, concurrent);
1265+
1266+
Ok((
1267+
StatusCode::ACCEPTED,
1268+
Json(serde_json::json!({
1269+
"batch_id": batch_id,
1270+
"total_tasks": total_tasks,
1271+
"matched_task_ids": task_ids,
1272+
})),
1273+
))
1274+
}

src/main.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,13 @@ async fn main() {
5151
));
5252

5353
let validator_whitelist = validator_whitelist::ValidatorWhitelist::new();
54+
if !config.trusted_validators.is_empty() {
55+
info!(
56+
"Adding {} trusted validators (bypass consensus)",
57+
config.trusted_validators.len()
58+
);
59+
validator_whitelist.add_trusted(&config.trusted_validators);
60+
}
5461
let consensus_manager = consensus::ConsensusManager::new(config.max_pending_consensus);
5562

5663
let state = Arc::new(handlers::AppState {

src/validator_whitelist.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@ impl ValidatorWhitelist {
1919
})
2020
}
2121

22+
pub fn add_trusted(&self, hotkeys: &[String]) {
23+
let mut set = self.hotkeys.write();
24+
for hk in hotkeys {
25+
set.insert(hk.clone());
26+
}
27+
}
28+
2229
pub fn is_whitelisted(&self, ss58_hotkey: &str) -> bool {
2330
self.hotkeys.read().contains(ss58_hotkey)
2431
}

0 commit comments

Comments
 (0)