Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions eval/distributed/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,13 +189,13 @@ def check_dataset_exists(repo_id):
return False


def create_evaluation_dataset(tasks, system_instruction=None):
def create_evaluation_dataset(tasks, namespace, system_instruction=None):
"""Create or use cached evaluation dataset."""
print_header("Preparing Evaluation Dataset")

# Generate a cached dataset name based on tasks
eval_dataset_hash = generate_evaluation_dataset_hash(tasks, system_instruction)
cached_dataset_id = f"mlfoundations-dev/evalset_{eval_dataset_hash}"
cached_dataset_id = f"{namespace}/evalset_{eval_dataset_hash}"

# Check if the cached dataset exists
if check_dataset_exists(cached_dataset_id):
Expand Down Expand Up @@ -693,6 +693,13 @@ def main():
parser.add_argument("--tp4", action="store_true", help="Use Tensor Parallelism with 4 GPUs")
parser.add_argument("--timestamp", action="store_true", help="Add a timestamp to the output evaluation dataset")
parser.add_argument("--on_login", action="store_true", help="Run on login node instead of sbatch job")
parser.add_argument(
"--hf_namespace",
type=str,
default=os.environ.get("EVALCHEMY_HF_NAMESPACE", "mlfoundations-dev"),
help="Hugging Face namespace (user or org) for cached datasets and uploads. "
"Set via CLI or EVALCHEMY_HF_NAMESPACE env var.",
)

args = parser.parse_args()

Expand Down Expand Up @@ -721,10 +728,10 @@ def main():
suffix = f"_eval_{evaluation_dataset_hash}"
remaining_characters = 96 - len(suffix)
model_name_short = args.model_name.split("/")[-1][:remaining_characters]
output_dataset = f"mlfoundations-dev/{model_name_short}{suffix}"
output_dataset = f"{args.hf_namespace}/{model_name_short}{suffix}"

# Create or get cached evaluation dataset
input_dataset = create_evaluation_dataset(tasks, args.system_instruction)
input_dataset = create_evaluation_dataset(tasks, args.hf_namespace, args.system_instruction)
if not input_dataset:
sys.exit(1)

Expand Down
15 changes: 11 additions & 4 deletions eval/distributed/launch_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,13 +210,13 @@ def check_dataset_exists(repo_id):
return False


def create_evaluation_dataset(tasks, system_instruction=None):
def create_evaluation_dataset(tasks, namespace, system_instruction=None):
"""Create or use cached evaluation dataset."""
print_header("Preparing Evaluation Dataset")

# Generate a cached dataset name based on tasks
eval_dataset_hash = generate_evaluation_dataset_hash(tasks, system_instruction)
cached_dataset_id = f"mlfoundations-dev/evalset_{eval_dataset_hash}"
cached_dataset_id = f"{namespace}/evalset_{eval_dataset_hash}"

# Check if the cached dataset exists
if check_dataset_exists(cached_dataset_id):
Expand Down Expand Up @@ -886,6 +886,13 @@ def main():
parser.add_argument("--system_instruction", type=str, default=None, help="System instruction for the model")
parser.add_argument("--tp4", action="store_true", help="Use Tensor Parallelism with 4 GPUs")
parser.add_argument("--timestamp", action="store_true", help="Add a timestamp to the output evaluation dataset")
parser.add_argument(
"--hf_namespace",
type=str,
default=os.environ.get("EVALCHEMY_HF_NAMESPACE", "mlfoundations-dev"),
help="Hugging Face namespace (user or org) for cached datasets and uploads. "
"Set via CLI or EVALCHEMY_HF_NAMESPACE env var.",
)

args = parser.parse_args()

Expand Down Expand Up @@ -921,10 +928,10 @@ def main():
suffix = f"_eval_{evaluation_dataset_hash}"
remaining_characters = 96 - len(suffix)
model_name_short = args.model_name.split("/")[-1][:remaining_characters]
output_dataset = f"mlfoundations-dev/{model_name_short}{suffix}"
output_dataset = f"{args.hf_namespace}/{model_name_short}{suffix}"

# Create or get cached evaluation dataset
input_dataset = create_evaluation_dataset(tasks, args.system_instruction)
input_dataset = create_evaluation_dataset(tasks, args.hf_namespace, args.system_instruction)
if not input_dataset:
sys.exit(1)

Expand Down
17 changes: 13 additions & 4 deletions eval/distributed/launch_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ def execute_command(cmd):
return stdout.strip()


def create_evaluation_dataset(tasks, eval_dataset_hash, system_instruction=None):
def create_evaluation_dataset(tasks, eval_dataset_hash, namespace, system_instruction=None):
"""Create or use cached evaluation dataset."""
cached_dataset_id = f"mlfoundations-dev/evalset_{eval_dataset_hash}"
cached_dataset_id = f"{namespace}/evalset_{eval_dataset_hash}"
if check_dataset_exists(cached_dataset_id):
print(f"Using cached evaluation dataset: {cached_dataset_id}")
else:
Expand Down Expand Up @@ -149,14 +149,23 @@ def main():
parser.add_argument(
"--dependency", type=str, default=None, help="Dependency for the sbatch job. (e.g. afterok:123456)"
)
parser.add_argument(
"--hf_namespace",
type=str,
default=os.environ.get("EVALCHEMY_HF_NAMESPACE", "DCAgent2"),
help="Hugging Face namespace (user or org) for cached datasets and uploads. "
"Set via CLI or EVALCHEMY_HF_NAMESPACE env var.",
)
args = parser.parse_args()

# Generate evaluation dataset hash
tasks = [task.strip() for task in args.tasks.split(",")]
evaluation_dataset_hash = generate_evaluation_dataset_hash(tasks, args.system_instruction)

# Download or create input dataset
input_dataset = create_evaluation_dataset(tasks, evaluation_dataset_hash, args.system_instruction)
input_dataset = create_evaluation_dataset(
tasks, evaluation_dataset_hash, args.hf_namespace, args.system_instruction
)

# Create output dataset name
if args.timestamp:
Expand All @@ -165,7 +174,7 @@ def main():
else:
suffix = f"_eval_{evaluation_dataset_hash}"
output_dataset_name = args.model_name.split("/")[-1] + suffix
output_dataset = f"mlfoundations-dev/{output_dataset_name}"
output_dataset = f"{args.hf_namespace}/{output_dataset_name}"
print(f"Output dataset: {output_dataset}")

# Create output log dir
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ dependencies = [
"reka-api",
"together",
"dashscope",
"fschat @ file:eval/chat_benchmarks/MTBench", # Use relative path that pip will resolve during installation

# Cloud & Storage
"gcsfs",
Expand Down