diff --git a/eval/distributed/launch.py b/eval/distributed/launch.py index 6b4ad0a8..fb108ded 100755 --- a/eval/distributed/launch.py +++ b/eval/distributed/launch.py @@ -189,13 +189,13 @@ def check_dataset_exists(repo_id): return False -def create_evaluation_dataset(tasks, system_instruction=None): +def create_evaluation_dataset(tasks, namespace, system_instruction=None): """Create or use cached evaluation dataset.""" print_header("Preparing Evaluation Dataset") # Generate a cached dataset name based on tasks eval_dataset_hash = generate_evaluation_dataset_hash(tasks, system_instruction) - cached_dataset_id = f"mlfoundations-dev/evalset_{eval_dataset_hash}" + cached_dataset_id = f"{namespace}/evalset_{eval_dataset_hash}" # Check if the cached dataset exists if check_dataset_exists(cached_dataset_id): @@ -693,6 +693,13 @@ def main(): parser.add_argument("--tp4", action="store_true", help="Use Tensor Parallelism with 4 GPUs") parser.add_argument("--timestamp", action="store_true", help="Add a timestamp to the output evaluation dataset") parser.add_argument("--on_login", action="store_true", help="Run on login node instead of sbatch job") + parser.add_argument( + "--hf_namespace", + type=str, + default=os.environ.get("EVALCHEMY_HF_NAMESPACE", "mlfoundations-dev"), + help="Hugging Face namespace (user or org) for cached datasets and uploads. " + "Set via CLI or EVALCHEMY_HF_NAMESPACE env var.", + ) args = parser.parse_args() @@ -721,10 +728,10 @@ def main(): suffix = f"_eval_{evaluation_dataset_hash}" remaining_characters = 96 - len(suffix) model_name_short = args.model_name.split("/")[-1][:remaining_characters] - output_dataset = f"mlfoundations-dev/{model_name_short}{suffix}" + output_dataset = f"{args.hf_namespace}/{model_name_short}{suffix}" # Create or get cached evaluation dataset - input_dataset = create_evaluation_dataset(tasks, args.system_instruction) + input_dataset = create_evaluation_dataset(tasks, args.hf_namespace, args.system_instruction) if not input_dataset: sys.exit(1) diff --git a/eval/distributed/launch_local.py b/eval/distributed/launch_local.py index b2a7a25c..a74ed83c 100644 --- a/eval/distributed/launch_local.py +++ b/eval/distributed/launch_local.py @@ -210,13 +210,13 @@ def check_dataset_exists(repo_id): return False -def create_evaluation_dataset(tasks, system_instruction=None): +def create_evaluation_dataset(tasks, namespace, system_instruction=None): """Create or use cached evaluation dataset.""" print_header("Preparing Evaluation Dataset") # Generate a cached dataset name based on tasks eval_dataset_hash = generate_evaluation_dataset_hash(tasks, system_instruction) - cached_dataset_id = f"mlfoundations-dev/evalset_{eval_dataset_hash}" + cached_dataset_id = f"{namespace}/evalset_{eval_dataset_hash}" # Check if the cached dataset exists if check_dataset_exists(cached_dataset_id): @@ -886,6 +886,13 @@ def main(): parser.add_argument("--system_instruction", type=str, default=None, help="System instruction for the model") parser.add_argument("--tp4", action="store_true", help="Use Tensor Parallelism with 4 GPUs") parser.add_argument("--timestamp", action="store_true", help="Add a timestamp to the output evaluation dataset") + parser.add_argument( + "--hf_namespace", + type=str, + default=os.environ.get("EVALCHEMY_HF_NAMESPACE", "mlfoundations-dev"), + help="Hugging Face namespace (user or org) for cached datasets and uploads. " + "Set via CLI or EVALCHEMY_HF_NAMESPACE env var.", + ) args = parser.parse_args() @@ -921,10 +928,10 @@ def main(): suffix = f"_eval_{evaluation_dataset_hash}" remaining_characters = 96 - len(suffix) model_name_short = args.model_name.split("/")[-1][:remaining_characters] - output_dataset = f"mlfoundations-dev/{model_name_short}{suffix}" + output_dataset = f"{args.hf_namespace}/{model_name_short}{suffix}" # Create or get cached evaluation dataset - input_dataset = create_evaluation_dataset(tasks, args.system_instruction) + input_dataset = create_evaluation_dataset(tasks, args.hf_namespace, args.system_instruction) if not input_dataset: sys.exit(1) diff --git a/eval/distributed/launch_simple.py b/eval/distributed/launch_simple.py index 2328e13c..56e5dd7d 100755 --- a/eval/distributed/launch_simple.py +++ b/eval/distributed/launch_simple.py @@ -95,9 +95,9 @@ def execute_command(cmd): return stdout.strip() -def create_evaluation_dataset(tasks, eval_dataset_hash, system_instruction=None): +def create_evaluation_dataset(tasks, eval_dataset_hash, namespace, system_instruction=None): """Create or use cached evaluation dataset.""" - cached_dataset_id = f"mlfoundations-dev/evalset_{eval_dataset_hash}" + cached_dataset_id = f"{namespace}/evalset_{eval_dataset_hash}" if check_dataset_exists(cached_dataset_id): print(f"Using cached evaluation dataset: {cached_dataset_id}") else: @@ -149,6 +149,13 @@ def main(): parser.add_argument( "--dependency", type=str, default=None, help="Dependency for the sbatch job. (e.g. afterok:123456)" ) + parser.add_argument( + "--hf_namespace", + type=str, + default=os.environ.get("EVALCHEMY_HF_NAMESPACE", "DCAgent2"), + help="Hugging Face namespace (user or org) for cached datasets and uploads. " + "Set via CLI or EVALCHEMY_HF_NAMESPACE env var.", + ) args = parser.parse_args() # Generate evaluation dataset hash @@ -156,7 +163,9 @@ def main(): evaluation_dataset_hash = generate_evaluation_dataset_hash(tasks, args.system_instruction) # Download or create input dataset - input_dataset = create_evaluation_dataset(tasks, evaluation_dataset_hash, args.system_instruction) + input_dataset = create_evaluation_dataset( + tasks, evaluation_dataset_hash, args.hf_namespace, args.system_instruction + ) # Create output dataset name if args.timestamp: @@ -165,7 +174,7 @@ def main(): else: suffix = f"_eval_{evaluation_dataset_hash}" output_dataset_name = args.model_name.split("/")[-1] + suffix - output_dataset = f"mlfoundations-dev/{output_dataset_name}" + output_dataset = f"{args.hf_namespace}/{output_dataset_name}" print(f"Output dataset: {output_dataset}") # Create output log dir diff --git a/pyproject.toml b/pyproject.toml index e39e4043..27e57f73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,6 @@ dependencies = [ "reka-api", "together", "dashscope", - "fschat @ file:eval/chat_benchmarks/MTBench", # Use relative path that pip will resolve during installation # Cloud & Storage "gcsfs",