@@ -54,6 +54,7 @@ pub fn router(state: Arc<AppState>) -> Router {
5454 . route ( "/instance" , get ( instance_info) )
5555 . route ( "/dataset" , get ( fetch_dataset) )
5656 . route ( "/submit_tasks" , post ( submit_tasks) )
57+ . route ( "/evaluate" , post ( evaluate_with_stored_agent) )
5758 . route ( "/ws" , get ( ws:: ws_handler) )
5859 . with_state ( state)
5960}
@@ -1095,3 +1096,179 @@ async fn submit_tasks(
10951096 } ) ) ,
10961097 ) )
10971098}
1099+
1100+ /// Evaluate using the stored agent archive (from /upload-agent).
1101+ /// Accepts JSON body: { "task_ids": [...], "split": "train" }
1102+ /// Auth: validator hotkey OR sudo password.
1103+ async fn evaluate_with_stored_agent (
1104+ State ( state) : State < Arc < AppState > > ,
1105+ headers : axum:: http:: HeaderMap ,
1106+ Json ( body) : Json < serde_json:: Value > ,
1107+ ) -> Result < impl IntoResponse , ( StatusCode , Json < serde_json:: Value > ) > {
1108+ // Auth: try validator hotkey first, then sudo password
1109+ let mut authed = false ;
1110+
1111+ if let Some ( auth_headers) = auth:: extract_auth_headers ( & headers) {
1112+ authed = state
1113+ . config
1114+ . trusted_validators
1115+ . contains ( & auth_headers. hotkey )
1116+ || ( state. validator_whitelist . validator_count ( ) > 0
1117+ && auth:: verify_request (
1118+ & auth_headers,
1119+ & state. nonce_store ,
1120+ & state. validator_whitelist ,
1121+ )
1122+ . is_ok ( ) ) ;
1123+ }
1124+
1125+ if !authed {
1126+ if let Some ( password) = headers
1127+ . get ( "X-Password" )
1128+ . or_else ( || headers. get ( "x-password" ) )
1129+ . and_then ( |v| v. to_str ( ) . ok ( ) )
1130+ {
1131+ if let Some ( ref sudo_pw) = state. config . sudo_password {
1132+ if constant_time_eq ( password. as_bytes ( ) , sudo_pw. as_bytes ( ) ) {
1133+ authed = true ;
1134+ }
1135+ }
1136+ }
1137+ }
1138+
1139+ if !authed {
1140+ return Err ( (
1141+ StatusCode :: UNAUTHORIZED ,
1142+ Json (
1143+ serde_json:: json!( { "error" : "unauthorized" , "message" : "Valid validator hotkey or sudo password required" } ) ,
1144+ ) ,
1145+ ) ) ;
1146+ }
1147+
1148+ // Parse task_ids
1149+ let task_ids: Vec < String > = body
1150+ . get ( "task_ids" )
1151+ . and_then ( |v| serde_json:: from_value ( v. clone ( ) ) . ok ( ) )
1152+ . unwrap_or_default ( ) ;
1153+ let split = body
1154+ . get ( "split" )
1155+ . and_then ( |v| v. as_str ( ) )
1156+ . unwrap_or ( "train" )
1157+ . to_string ( ) ;
1158+
1159+ if task_ids. is_empty ( ) || task_ids. len ( ) > 50 {
1160+ return Err ( (
1161+ StatusCode :: BAD_REQUEST ,
1162+ Json ( serde_json:: json!( { "error" : "task_ids must have 1-50 entries" } ) ) ,
1163+ ) ) ;
1164+ }
1165+
1166+ // Get stored agent archive
1167+ let archive_bytes = {
1168+ let guard = state. agent_archive . read ( ) . await ;
1169+ guard. clone ( ) . ok_or_else ( || {
1170+ (
1171+ StatusCode :: PRECONDITION_FAILED ,
1172+ Json ( serde_json:: json!( { "error" : "no_agent" , "message" : "No agent uploaded yet. Use /upload-agent first." } ) ) ,
1173+ )
1174+ } ) ?
1175+ } ;
1176+
1177+ // Fetch dataset from HuggingFace
1178+ let hf_client = crate :: swe_forge:: client:: HuggingFaceClient :: new ( ) . map_err ( |e| {
1179+ (
1180+ StatusCode :: INTERNAL_SERVER_ERROR ,
1181+ Json ( serde_json:: json!( { "error" : format!( "HF client error: {}" , e) } ) ) ,
1182+ )
1183+ } ) ?;
1184+
1185+ let dataset_config = crate :: swe_forge:: types:: DatasetConfig {
1186+ dataset_id : "CortexLM/swe-forge" . to_string ( ) ,
1187+ split,
1188+ limit : 100 ,
1189+ offset : 0 ,
1190+ } ;
1191+
1192+ let dataset = hf_client
1193+ . fetch_dataset ( & dataset_config)
1194+ . await
1195+ . map_err ( |e| {
1196+ (
1197+ StatusCode :: BAD_GATEWAY ,
1198+ Json ( serde_json:: json!( { "error" : format!( "Failed to fetch HF dataset: {}" , e) } ) ) ,
1199+ )
1200+ } ) ?;
1201+
1202+ let matched: Vec < & crate :: swe_forge:: types:: DatasetEntry > = dataset
1203+ . entries
1204+ . iter ( )
1205+ . filter ( |e| task_ids. contains ( & e. instance_id ) )
1206+ . collect ( ) ;
1207+
1208+ if matched. is_empty ( ) {
1209+ return Err ( (
1210+ StatusCode :: NOT_FOUND ,
1211+ Json (
1212+ serde_json:: json!( { "error" : "No matching tasks found" , "available" : dataset. entries. len( ) } ) ,
1213+ ) ,
1214+ ) ) ;
1215+ }
1216+
1217+ // Build tasks from HF
1218+ let mut registry = crate :: task:: registry:: TaskRegistry :: new ( ) ;
1219+ let hf_dataset = crate :: swe_forge:: types:: HuggingFaceDataset {
1220+ dataset_id : dataset. dataset_id . clone ( ) ,
1221+ split : dataset. split . clone ( ) ,
1222+ entries : matched. into_iter ( ) . cloned ( ) . collect ( ) ,
1223+ total_count : dataset. total_count ,
1224+ } ;
1225+ registry. load_from_huggingface ( & hf_dataset) . map_err ( |e| {
1226+ (
1227+ StatusCode :: INTERNAL_SERVER_ERROR ,
1228+ Json ( serde_json:: json!( { "error" : format!( "Failed to load tasks: {}" , e) } ) ) ,
1229+ )
1230+ } ) ?;
1231+
1232+ // Extract agent code
1233+ let extract_dir = state. config . workspace_base . join ( "_extract_evaluate" ) ;
1234+ let _ = tokio:: fs:: remove_dir_all ( & extract_dir) . await ;
1235+ let extracted = crate :: task:: extract_uploaded_archive ( & archive_bytes, & extract_dir)
1236+ . await
1237+ . map_err ( |e| {
1238+ (
1239+ StatusCode :: BAD_REQUEST ,
1240+ Json ( serde_json:: json!( { "error" : format!( "Failed to extract agent: {}" , e) } ) ) ,
1241+ )
1242+ } ) ?;
1243+ let _ = tokio:: fs:: remove_dir_all ( & extract_dir) . await ;
1244+
1245+ let hf_tasks: Vec < crate :: task:: SweForgeTask > = registry. get_tasks ( ) . to_vec ( ) ;
1246+ let final_archive = crate :: task:: ExtractedArchive {
1247+ tasks : hf_tasks,
1248+ agent_code : extracted. agent_code ,
1249+ agent_language : extracted. agent_language ,
1250+ } ;
1251+
1252+ if state. sessions . has_active_batch ( ) {
1253+ return Err ( (
1254+ StatusCode :: SERVICE_UNAVAILABLE ,
1255+ Json ( serde_json:: json!( { "error" : "A batch is already running. Try again later." } ) ) ,
1256+ ) ) ;
1257+ }
1258+
1259+ let total_tasks = final_archive. tasks . len ( ) ;
1260+ let batch = state. sessions . create_batch ( total_tasks) ;
1261+ let batch_id = batch. id . clone ( ) ;
1262+ let concurrent = state. config . max_concurrent_tasks ;
1263+
1264+ state. executor . spawn_batch ( batch, final_archive, concurrent) ;
1265+
1266+ Ok ( (
1267+ StatusCode :: ACCEPTED ,
1268+ Json ( serde_json:: json!( {
1269+ "batch_id" : batch_id,
1270+ "total_tasks" : total_tasks,
1271+ "matched_task_ids" : task_ids,
1272+ } ) ) ,
1273+ ) )
1274+ }
0 commit comments