Skip to content

Commit b9ce5c9

Browse files
committed
feat(cli): default --dataset to CortexLM/swe-forge in swe harness
The swe harness command now downloads tasks from https://huggingface.co/datasets/CortexLM/swe-forge by default. Use --no-dataset to skip HF download and use local tasks only.
1 parent dcfa5d5 commit b9ce5c9

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

src/cli/commands.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -339,12 +339,17 @@ pub struct SweHarnessArgs {
339339
#[arg(short = 'j', long)]
340340
pub json: bool,
341341

342-
/// HuggingFace dataset repo ID to download tasks from (e.g. "CortexLM/swe-forge").
343-
/// When provided, tasks are downloaded from the HF dataset's `tasks/` directory
342+
/// HuggingFace dataset repo ID to download tasks from.
343+
/// Tasks are downloaded from the HF dataset's `tasks/` directory
344344
/// into the input directory before running the harness.
345-
#[arg(long)]
345+
/// Use --no-dataset to skip downloading.
346+
#[arg(long, default_value = "CortexLM/swe-forge")]
346347
pub dataset: Option<String>,
347348

349+
/// Skip downloading tasks from HuggingFace and use local tasks only.
350+
#[arg(long, default_value_t = false)]
351+
pub no_dataset: bool,
352+
348353
/// Filter to only run a specific task by ID (e.g. "pygments/pygments-3027").
349354
#[arg(long)]
350355
pub task_id: Option<String>,
@@ -719,8 +724,9 @@ async fn run_swe_harness_command(args: SweHarnessArgs) -> anyhow::Result<()> {
719724

720725
let mut input_path = std::path::PathBuf::from(&args.input);
721726

722-
// If --dataset is provided, download tasks from HuggingFace first
723-
if let Some(ref dataset_repo) = args.dataset {
727+
// Download tasks from HuggingFace unless --no-dataset is set
728+
let effective_dataset = if args.no_dataset { None } else { args.dataset.clone() };
729+
if let Some(ref dataset_repo) = effective_dataset {
724730
download_hf_tasks(dataset_repo, args.task_id.as_deref(), &input_path).await?;
725731

726732
// The HF dataset stores tasks under tasks/ subdirectory
@@ -757,7 +763,7 @@ async fn run_swe_harness_command(args: SweHarnessArgs) -> anyhow::Result<()> {
757763
};
758764

759765
// If --task-id is specified without --dataset, filter the input directory
760-
let effective_input = if args.dataset.is_none() {
766+
let effective_input = if effective_dataset.is_none() {
761767
if let Some(ref tid) = args.task_id {
762768
let task_dir = input_dir.join(tid);
763769
if !task_dir.exists() {

0 commit comments

Comments
 (0)