-
| Ease-of-use Task Register for Autotune APIPrinciples
 Requirement
 @register_task(name="text-generation")
def eval_func(model, model_name=None):
    eval_dataset = init_dataset("xxx")
    accuracy = Accuracy()
    for data, label in eval_dataset:
        output = model(data)
        accuracy.update(output, label)
    return accuracyRepo Architecture 
 Exampleslm_eval@register_task(name="lm_eval")
def eval_func(model, model_name, tasks=["lambada_openai"]):
    from intel_extension_for_transformers.llm.evaluation.lm_eval import evaluate
    results = evaluate(
        model="hf-causal",
        model_args="pretrained=" + model_name + ",tokenizer=" + model_name + ",dtype=float32",
        user_model=model,
        batch_size=32,
        tasks=tasks,
    )
    return results["accuracy"]lm_code_eval@register_task(name="lm_code_eval")
def eval_func(model, model_name, tasks=None):
    from intel_extension_for_transformers.llm.evaluation.lm_code_eval import evaluate
    from transformers import AutoTokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    results = evaluate(
        model=user_model,
        tokenizer=tokenizer,
        tasks=",".join(tasks),
        batch_size=args.batch_size,
    )
    return results["accuracy"]Usagemodel = autotune(
    model,
    conf,
    example_inputs, # example_inputs for jit.trace
    run_fn, # calibration function
    task="lm_eval", # registered evaluation task
    task_args={
        "model_name": "facebook/opt-125m",
        "tasks": ["lambada_openai", "hellaswag", "winogrande", "piqa", "wikitext"],
    }
) | 
Beta Was this translation helpful? Give feedback.
Replies: 4 comments 4 replies
-
| One general comments: Given current pile calibration dataset, some Chinese evaluation tasks like CEval/CMMLU highly likely perform poor. If we consider to support different types of evaluation (Chinese, Math, Code), we need to have a new better (most likely mixed) calibration dataset. | 
Beta Was this translation helpful? Give feedback.
-
| I also consider adding task_name, like  | 
Beta Was this translation helpful? Give feedback.
-
| May I know what's the purpose of  | 
Beta Was this translation helpful? Give feedback.
-
| Decision: 
 Reasons: 
 | 
Beta Was this translation helpful? Give feedback.
Decision:
Reasons: