diff --git a/backend/examples/run_prometheus_direct_judge.py b/backend/examples/run_prometheus_direct_judge.py
index 7d1c7fa7..6b6d2dcf 100644
--- a/backend/examples/run_prometheus_direct_judge.py
+++ b/backend/examples/run_prometheus_direct_judge.py
@@ -1,7 +1,7 @@
 from evalassist.judges import Criteria, CriteriaOption, Instance, MPrometheusDirectJudge
 
 if __name__ == "__main__":
-    judge = MPrometheusDirectJudge(m_prometheus_b_params=3)
+    judge = MPrometheusDirectJudge(billions_of_params=3)
 
     instances = [
         Instance(
diff --git a/backend/examples/run_prometheus_pairwise_judge.py b/backend/examples/run_prometheus_pairwise_judge.py
index 18e04322..0f05ae8c 100644
--- a/backend/examples/run_prometheus_pairwise_judge.py
+++ b/backend/examples/run_prometheus_pairwise_judge.py
@@ -1,7 +1,7 @@
 from evalassist.judges import Criteria, Instance, MPrometheusPairwiseJudge
 
 if __name__ == "__main__":
-    judge = MPrometheusPairwiseJudge(m_prometheus_b_params=3)
+    judge = MPrometheusPairwiseJudge(billions_of_params=3)
 
     instances = [
         Instance(
diff --git a/backend/src/evalassist/api_types.py b/backend/src/evalassist/api_types.py
index 038a55ac..d25c8dc4 100644
--- a/backend/src/evalassist/api_types.py
+++ b/backend/src/evalassist/api_types.py
@@ -211,6 +211,8 @@ class NotebookParams(BaseModel):
     instances: list[DirectInstanceDTO] | list[PairwiseInstanceDTO]
     examples: list[DirectInstanceDTO] | list[PairwiseInstanceDTO]
     criteria: CriteriaDTO | CriteriaWithOptionsDTO
+    judge: str
+    judge_params: dict
 
 
 # class DownloadTestCaseParams(BaseModel):
diff --git a/backend/src/evalassist/judges/direct_judge.py b/backend/src/evalassist/judges/direct_judge.py
index 339edfb0..0179739d 100644
--- a/backend/src/evalassist/judges/direct_judge.py
+++ b/backend/src/evalassist/judges/direct_judge.py
@@ -56,7 +56,7 @@ def __init__(
                 "Either provide set generate_synthetic_persona to False or don't provide a judge_description_prompt."
             )
 
-        if self.self_consistency:
+        if self.self_consistency is True or self.self_consistency > 1:
             temp = getattr(self.inference_engine, "temperature", None)
             if temp is not None:
                 try:
diff --git a/backend/src/evalassist/judges/mprometheus_judge.py b/backend/src/evalassist/judges/mprometheus_judge.py
index dd3be704..9f5fc35a 100644
--- a/backend/src/evalassist/judges/mprometheus_judge.py
+++ b/backend/src/evalassist/judges/mprometheus_judge.py
@@ -1,18 +1,19 @@
 from typing import Literal, cast
 
-from evalassist.judges.utils import get_to_evaluate_text
+from fastapi import HTTPException
 
 from .base import BaseDirectJudge, BasePairwiseJudge
 from .types import Criteria, DirectInstanceResult, Instance, PairwiseInstanceResult
+from .utils import get_to_evaluate_text
 
 
 class MPrometheusJudge:
     m_prometheus_model_name: str
 
-    def __init__(self, m_prometheus_b_params: Literal[3, 7, 14] = 3, **kwargs):
+    def __init__(self, billions_of_params: Literal[3, 7, 14] = 3, **kwargs):
         super().__init__(**kwargs)
         self.m_prometheus_model_name = (
-            f"Unbabel/M-Prometheus-{str(m_prometheus_b_params)}B"
+            f"Unbabel/M-Prometheus-{str(billions_of_params)}B"
         )
 
 
@@ -42,12 +43,18 @@ def _run(
         instances: list[Instance],
         criteria: list[Criteria],
     ) -> list[DirectInstanceResult]:
-        from prometheus_eval import PrometheusEval
-        from prometheus_eval.prompts import (
-            ABSOLUTE_PROMPT_WO_REF,
-            SCORE_RUBRIC_TEMPLATE,
-        )
-        from prometheus_eval.vllm import VLLM
+        try:
+            from prometheus_eval import PrometheusEval
+            from prometheus_eval.prompts import (
+                ABSOLUTE_PROMPT_WO_REF,
+                SCORE_RUBRIC_TEMPLATE,
+            )
+            from prometheus_eval.vllm import VLLM
+        except ModuleNotFoundError:
+            raise HTTPException(
+                status_code=404,
+                detail="Failed to import 'prometheus_eval' package. Make sure it is installed correctly.",
+            )
 
         self._validate_criteria(criteria)
         self._validate_instances(instances)
diff --git a/backend/src/evalassist/main.py b/backend/src/evalassist/main.py
index 2dccde6c..8cdc4ec3 100644
--- a/backend/src/evalassist/main.py
+++ b/backend/src/evalassist/main.py
@@ -7,6 +7,7 @@
 import nbformat as nbf
 import nest_asyncio
 import pandas as pd
+from evalassist.judges.base import UnitxtInferenceEngineMixin
 from fastapi import (
     APIRouter,
     BackgroundTasks,
@@ -311,16 +312,23 @@ def run():
         else:
             temperature = 0.0
 
-        inference_engine: InferenceEngine = get_inference_engine_from_judge_metadata(
-            evaluator_name=evaluator_name,
-            custom_model_name=custom_model_name,
-            provider=req.provider,
-            llm_provider_credentials=req.llm_provider_credentials,
-            custom_params={
-                **DEFAULT_JUDGE_INFERENCE_PARAMS,
-                "temperature": temperature,
-            },
-        )
+        judge_class = JUDGE_CLASS_MAP[req.type][req.judge]
+        judge_requires_model = issubclass(judge_class, UnitxtInferenceEngineMixin)
+
+        if judge_requires_model:
+            inference_engine: InferenceEngine = (
+                get_inference_engine_from_judge_metadata(
+                    evaluator_name=evaluator_name,
+                    custom_model_name=custom_model_name,
+                    provider=req.provider,
+                    llm_provider_credentials=req.llm_provider_credentials,
+                    custom_params={
+                        **DEFAULT_JUDGE_INFERENCE_PARAMS,
+                        "temperature": temperature,
+                    },
+                )
+            )
+
         if (
             req.criteria.to_evaluate_field is None
             or req.criteria.context_fields is None
@@ -337,14 +345,17 @@ def run():
             example.to_instance_result(req.criteria.to_evaluate_field)
             for example in req.examples
         ]
-        judge = JUDGE_CLASS_MAP[req.type][req.judge](
-            **{"inference_engine": inference_engine, **req.judge_params}
-        )  # type: ignore
+
+        params = req.judge_params
+        if judge_requires_model:
+            params["inference_engine"] = inference_engine  # type: ignore
+
+        judge = judge_class(**params)  # type: ignore
 
         if req.type == EvaluatorTypeEnum.DIRECT:
             criteria = req.criteria.to_criteria(examples=examples)
             if evaluator_name.name.startswith("GRANITE_GUARDIAN"):
-                judge = GraniteGuardianJudge(inference_engine=inference_engine)
+                judge = GraniteGuardianJudge(inference_engine=inference_engine)  # type: ignore
             per_instance_result = judge.evaluate(
                 instances=instances,
                 criteria=criteria,
@@ -534,7 +545,10 @@ def download_notebook(params: NotebookParams, background_tasks: BackgroundTasks)
         "evaluator_type": params.evaluator_type,
         "model_name": model_name,
         "plain_python_script": params.plain_python_script,
+        "judge": params.judge,
+        "judge_params": params.judge_params,
     }
+
     if params.evaluator_type == EvaluatorTypeEnum.DIRECT:
         nb = DirectEvaluationNotebook(**p).generate_notebook()  # type: ignore
     else:
diff --git a/backend/src/evalassist/notebook_generation.py b/backend/src/evalassist/notebook_generation.py
index 74ab6494..08e0a126 100644
--- a/backend/src/evalassist/notebook_generation.py
+++ b/backend/src/evalassist/notebook_generation.py
@@ -4,7 +4,8 @@
 from typing import Literal, cast
 
 import nbformat as nbf
-from evalassist.judges import Criteria, Instance
+from evalassist.judges import JUDGE_CLASS_MAP, Criteria, Instance
+from evalassist.judges.base import UnitxtInferenceEngineMixin
 
 from .api_types import (
     EvaluatorNameEnum,
@@ -16,6 +17,58 @@
 from .utils import get_cross_inference_engine_params
 
 
+def format_value(value, indent=0):
+    """
+    Format values as valid Python code.
+    Supports nested dicts, literals, and variable references.
+    """
+    pad = " " * indent
+
+    # Variable reference
+    if isinstance(value, VariableRef):
+        return value.name
+
+    # Basic literals
+    if isinstance(value, (bool, int, float)):
+        return repr(value)
+
+    # Strings
+    if isinstance(value, str):
+        return repr(value)
+
+    # Nested dictionary
+    if isinstance(value, dict):
+        if not value:
+            return "{}"
+
+        inner_indent = indent + 4
+        inner_pad = " " * inner_indent
+
+        items = []
+        for k, v in value.items():
+            items.append(f"{inner_pad}{repr(k)}: {format_value(v, inner_indent)},")
+
+        return "{\n" + "\n".join(items) + f"\n{pad}}}"
+
+    raise ValueError(f"Unsupported param type: {type(value)}")
+
+
+def generate_constructor_code(class_name: str, params: dict) -> str:
+    lines = [f"{class_name}("]
+    for key, value in params.items():
+        formatted = format_value(value, indent=4)
+        lines.append(f"    {key}={formatted},")
+    lines.append(")")
+    return "\n".join(lines)
+
+
+class VariableRef:
+    """Wrapper representing a variable name in generated code."""
+
+    def __init__(self, name: str):
+        self.name = name
+
+
 class Cell:
     type: Literal["code", "md"]
     content: str
@@ -37,6 +90,8 @@ def __init__(
         evaluator_type: EvaluatorTypeEnum,
         model_name: str,
         plain_python_script: bool,
+        judge: str,
+        judge_params: dict,
     ):
         self.instances = instances
         self.criteria = criteria
@@ -44,12 +99,25 @@ def __init__(
         self.evaluator_name = evaluator_name
         self.evaluator_type = evaluator_type
         self.plain_python_script = plain_python_script
-        self.inference_engine_params = get_cross_inference_engine_params(
-            credentials=credentials,
-            provider=provider,
-            model_name=model_name,
-            custom_params=DEFAULT_JUDGE_INFERENCE_PARAMS,
+        self.judge = judge
+        self.judge_params = judge_params
+
+        self.judge_class = JUDGE_CLASS_MAP[evaluator_type][judge]
+
+        self.judge_requires_model = issubclass(
+            self.judge_class, UnitxtInferenceEngineMixin
         )
+
+        if self.judge_requires_model:
+            self.inference_engine_params = get_cross_inference_engine_params(
+                credentials=credentials,
+                provider=provider,
+                model_name=model_name,
+                custom_params=DEFAULT_JUDGE_INFERENCE_PARAMS,
+            )
+        else:
+            self.inference_engine_params = None
+
         self.cells: list[Cell] = []
 
     def generate_notebook(self):
@@ -148,15 +216,24 @@ def get_import_code(self):
 """
 
     def get_setup_and_run_eval_code(self):
-        params = re.sub(
-            r"\btrue\b", "True", json.dumps(self.inference_engine_params, indent=4)
+        if self.judge_requires_model:
+            inference_engine_construct_str = generate_constructor_code(
+                "CrossProviderInferenceEngine", params=self.inference_engine_params
+            )  # type: ignore
+        else:
+            inference_engine_construct_str = ""
+
+        judge_params = self.judge_params
+        if self.judge_requires_model:
+            judge_params["inference_engine"] = VariableRef("inference_engine")
+        judge_construct_str = generate_constructor_code(
+            self.judge_class.__name__, params=judge_params
         )
+
         return f"""\
-inference_engine = CrossProviderInferenceEngine(**{params})
+inference_engine = {inference_engine_construct_str}
 
-judge = DirectJudge(
-    inference_engine=inference_engine,
-)
+judge = {judge_construct_str}
 
 results: list[DirectInstanceResult] = judge(instances, criteria)
 
diff --git a/frontend/src/components/SingleExampleEvaluation/Modals/ConfigurationModal.module.scss b/frontend/src/components/SingleExampleEvaluation/Modals/ConfigurationModal.module.scss
index f705d7d8..ae35a379 100644
--- a/frontend/src/components/SingleExampleEvaluation/Modals/ConfigurationModal.module.scss
+++ b/frontend/src/components/SingleExampleEvaluation/Modals/ConfigurationModal.module.scss
@@ -3,18 +3,28 @@
 .container {
   display: flex;
   flex-direction: column;
-  gap: 2rem;
 }
 
 .configOptions {
   display: grid;
-  grid-template-columns: repeat(2, 1fr);
-  gap: 1rem; /* optional spacing */
+  grid-template-columns: repeat(3, 1fr);
+  gap: 1rem;
   align-items: center;
 }
 
 .section {
   display: flex;
   flex-direction: column;
-  gap: 0.75rem;
+  gap: 1rem;
+}
+
+.subSection {
+  display: flex;
+  flex-direction: column;
+  gap: 1rem;
+}
+
+.topDivider {
+  border-top: 1px solid $border-subtle-00;
+  padding-top: 1rem;
 }
diff --git a/frontend/src/components/SingleExampleEvaluation/Modals/ConfigurationModal.tsx b/frontend/src/components/SingleExampleEvaluation/Modals/ConfigurationModal.tsx
index e0efca51..7a651ea5 100644
--- a/frontend/src/components/SingleExampleEvaluation/Modals/ConfigurationModal.tsx
+++ b/frontend/src/components/SingleExampleEvaluation/Modals/ConfigurationModal.tsx
@@ -10,6 +10,7 @@ import {
   BASE_JUDGE_PARAMS_MAP,
   JUDGE_DEFAULT_PARAMS_MAP,
   JUDGE_PARAMS_MAP,
+  JUDGE_REQUIRES_MODEL_SELECTION_MAP,
 } from '@constants'
 import { useCurrentTestCase } from '@providers/CurrentTestCaseProvider'
 import { useEvaluatorOptionsContext } from '@providers/EvaluatorOptionsProvider'
@@ -32,15 +33,25 @@ export const ConfigurationModal = ({ open, setOpen }: Props) => {
 
   const onJudgeSelect = useCallback(
     (e: { target: { value: string } }) => {
-      setCurrentTestCase({
-        ...currentTestCase,
-        judge: {
-          name: e.target.value,
-          params: {
-            ...BASE_JUDGE_DEFAULT_PARAMS_MAP,
-            ...JUDGE_DEFAULT_PARAMS_MAP[currentTestCase.type][e.target.value],
+      setCurrentTestCase((prev) => {
+        const params = {
+          ...BASE_JUDGE_DEFAULT_PARAMS_MAP,
+          ...JUDGE_DEFAULT_PARAMS_MAP[currentTestCase.type][e.target.value],
+        }
+
+        Object.entries(prev.judge.params).forEach(([prevParam, prevValue]) => {
+          if (prevParam in params) {
+            params[prevParam] = prevValue
+          }
+        })
+
+        return {
+          ...currentTestCase,
+          judge: {
+            name: e.target.value,
+            params,
           },
-        },
+        }
       })
     },
     [currentTestCase, setCurrentTestCase],
@@ -105,124 +116,133 @@ export const ConfigurationModal = ({ open, setOpen }: Props) => {
     >
       <Layer type={'outline'}>
         <div className={classes.container}>
-          <div className={cx(classes.section)}>
-            <h5>{'Select judge'}</h5>
-            <Select id={'judge_selector'} noLabel onChange={onJudgeSelect} value={currentTestCase.judge.name}>
-              {judges[currentTestCase.type].map((judge, i) => (
-                <SelectItem key={i} text={`${capitalizeFirstWord(judge)} judge`} value={judge} />
-              ))}
-            </Select>
-          </div>
-          <div className={classes.section}>
-            <h5>{'Judge configuration'}</h5>
-            <div className={classes.configOptions}>
-              {Object.entries(currentTestCase.judge.params)
-                // sort the params by type alphabetically to make it look more clean
-                .sort(([k, v], [k2, v2]) => {
-                  return allParamTypes[k].toString().localeCompare(allParamTypes[k2].toString())
-                })
-                .toReversed()
-                .map(([param, value], i) =>
-                  allParamTypes[param] === 'boolean' ? (
-                    <Checkbox
-                      key={i}
-                      id={`checkbox-${i}`}
-                      labelText={capitalizeFirstWord(param)}
-                      onChange={(event, state) => onChangeParamValue(param, state.checked)}
-                      checked={currentTestCase.judge.params[param]}
-                    />
-                  ) : Array.isArray(allParamTypes[param]) ? (
-                    <Select
-                      key={i}
-                      id={`select_${i}`}
-                      labelText={capitalizeFirstWord(param)}
-                      onChange={(e) => onChangeParamValue(param, e.target.value)}
-                      value={currentTestCase.judge.params[param]}
-                    >
-                      {allParamTypes[param].map((option, i) => (
-                        <SelectItem key={i} text={capitalizeFirstWord(option)} value={option} />
-                      ))}
-                    </Select>
-                  ) : allParamTypes[param] === 'number' ? (
-                    <NumberInput
-                      key={i}
-                      id={`number-input-${i}`}
-                      max={5}
-                      min={1}
-                      size="md"
-                      step={1}
-                      onChange={(event, state) => onChangeParamValue(param, state.value)}
-                      value={currentTestCase.judge.params[param]}
-                      label={capitalizeFirstWord(param)}
-                    />
-                  ) : (
-                    'unknown type config'
-                  ),
-                )}
+          <div className={cx(classes.section, classes.topDivider)}>
+            <h4>{'Judge Configuration'}</h4>
+            <div className={classes.subSection}>
+              <h5>{'Select judge'}</h5>
+              <Select id={'judge_selector'} noLabel onChange={onJudgeSelect} value={currentTestCase.judge.name}>
+                {judges[currentTestCase.type].map((judge, i) => (
+                  <SelectItem key={i} text={`${capitalizeFirstWord(judge)} judge`} value={judge} />
+                ))}
+              </Select>
+            </div>
+            <div className={classes.subSection}>
+              <h5>{'Configuration'}</h5>
+              <div className={classes.configOptions}>
+                {Object.entries(currentTestCase.judge.params)
+                  // sort the params by type alphabetically to make it look more clean
+                  .sort(([k, v], [k2, v2]) => {
+                    return allParamTypes[k].toString().localeCompare(allParamTypes[k2].toString())
+                  })
+                  .toReversed()
+                  .map(([param, value], i) =>
+                    allParamTypes[param] === 'boolean' ? (
+                      <Checkbox
+                        key={i}
+                        id={`checkbox-${i}`}
+                        labelText={capitalizeFirstWord(param)}
+                        onChange={(event, state) => onChangeParamValue(param, state.checked)}
+                        checked={currentTestCase.judge.params[param]}
+                      />
+                    ) : Array.isArray(allParamTypes[param]) ? (
+                      <Select
+                        key={i}
+                        id={`select_${i}`}
+                        labelText={capitalizeFirstWord(param)}
+                        onChange={(e) => onChangeParamValue(param, e.target.value)}
+                        value={currentTestCase.judge.params[param]}
+                      >
+                        {allParamTypes[param].map((option, i) => (
+                          <SelectItem key={i} text={capitalizeFirstWord(option)} value={option} />
+                        ))}
+                      </Select>
+                    ) : allParamTypes[param] === 'number' ? (
+                      <NumberInput
+                        key={i}
+                        id={`number-input-${i}`}
+                        max={5}
+                        min={1}
+                        size="md"
+                        step={1}
+                        onChange={(event, state) => onChangeParamValue(param, state.value)}
+                        value={currentTestCase.judge.params[param]}
+                        label={capitalizeFirstWord(param)}
+                      />
+                    ) : (
+                      'unknown type config'
+                    ),
+                  )}
+              </div>
+            </div>
+            <div className={classes.subSection}>
+              {JUDGE_REQUIRES_MODEL_SELECTION_MAP[currentTestCase.type][currentTestCase.judge.name] && (
+                <>
+                  <h5>{'Judge model'}</h5>
+                  <PipelineSelect
+                    dropdownLabel={'Evaluator'}
+                    style={{ marginBottom: '2rem' }}
+                    className={classes['left-padding']}
+                    evaluatorOptions={evaluatorOptions}
+                    selectedEvaluator={currentTestCase.evaluator}
+                    setSelectedEvaluator={(evaluator: Evaluator | null) => {
+                      setCurrentTestCase({ ...currentTestCase, evaluator })
+                    }}
+                    helperChildren={
+                      <>
+                        <Link
+                          rel="noopener noreferrer"
+                          target="_blank"
+                          href="https://github.com/IBM/eval-assist/wiki#evaluation-methodology"
+                        >
+                          {'How do evaluators work?'}
+                        </Link>
+                        <ConnectionTest model={currentTestCase.evaluator} />
+                      </>
+                    }
+                  />
+                </>
+              )}
             </div>
           </div>
-          <div className={classes.section}>
-            <h5>{'Judge model'}</h5>
-            <div>
+
+          <div className={cx(classes.section, classes.topDivider)}>
+            <h4>{'Synthetic generation'}</h4>
+            <div className={classes.subSection}>
+              {/* <h5>{'Model'}</h5> */}
               <PipelineSelect
-                dropdownLabel={'Evaluator'}
-                style={{ marginBottom: '2rem' }}
-                className={classes['left-padding']}
-                evaluatorOptions={evaluatorOptions}
-                selectedEvaluator={currentTestCase.evaluator}
-                setSelectedEvaluator={(evaluator: Evaluator | null) => {
-                  setCurrentTestCase({ ...currentTestCase, evaluator })
-                }}
+                selectedEvaluator={currentTestCase.syntheticGenerationConfig.evaluator}
+                setSelectedEvaluator={(newValue) =>
+                  setCurrentTestCase({
+                    ...currentTestCase,
+                    syntheticGenerationConfig: {
+                      ...currentTestCase.syntheticGenerationConfig,
+                      evaluator: newValue,
+                    },
+                  })
+                }
+                evaluatorOptions={
+                  returnByPipelineType(
+                    currentTestCase.type,
+                    nonGraniteGuardianDirectEvaluators,
+                    nonGraniteGuardianPairwiseEvaluators,
+                  ) || []
+                }
+                dropdownLabel={'Synthetic generation model'}
+                selectionComponentNameWithArticle="a model"
+                selectionComponentName="model"
                 helperChildren={
                   <>
                     <Link
                       rel="noopener noreferrer"
                       target="_blank"
-                      href="https://github.com/IBM/eval-assist/wiki#evaluation-methodology"
+                      href="https://github.com/IBM/eval-assist/wiki#refining-criteria-with-synthetic-data"
                     >
-                      {'How do evaluators work?'}
+                      {'What is synthetic generation?'}
                     </Link>
-                    <ConnectionTest model={currentTestCase.evaluator} />
+                    <ConnectionTest model={currentTestCase.syntheticGenerationConfig.evaluator!} />
                   </>
                 }
               />
-              <div className={cx(classes.section, classes.topDivider)}>
-                <h5>{'Synthetic generation model'}</h5>
-                <PipelineSelect
-                  selectedEvaluator={currentTestCase.syntheticGenerationConfig.evaluator}
-                  setSelectedEvaluator={(newValue) =>
-                    setCurrentTestCase({
-                      ...currentTestCase,
-                      syntheticGenerationConfig: {
-                        ...currentTestCase.syntheticGenerationConfig,
-                        evaluator: newValue,
-                      },
-                    })
-                  }
-                  evaluatorOptions={
-                    returnByPipelineType(
-                      currentTestCase.type,
-                      nonGraniteGuardianDirectEvaluators,
-                      nonGraniteGuardianPairwiseEvaluators,
-                    ) || []
-                  }
-                  dropdownLabel={'Synthetic generation'}
-                  selectionComponentNameWithArticle="a model"
-                  selectionComponentName="model"
-                  helperChildren={
-                    <>
-                      <Link
-                        rel="noopener noreferrer"
-                        target="_blank"
-                        href="https://github.com/IBM/eval-assist/wiki#refining-criteria-with-synthetic-data"
-                      >
-                        {'What is synthetic generation?'}
-                      </Link>
-                      <ConnectionTest model={currentTestCase.syntheticGenerationConfig.evaluator!} />
-                    </>
-                  }
-                />
-              </div>
             </div>
           </div>
         </div>
diff --git a/frontend/src/components/SingleExampleEvaluation/Modals/DownloadModal.tsx b/frontend/src/components/SingleExampleEvaluation/Modals/DownloadModal.tsx
index 6e203968..9c75684e 100644
--- a/frontend/src/components/SingleExampleEvaluation/Modals/DownloadModal.tsx
+++ b/frontend/src/components/SingleExampleEvaluation/Modals/DownloadModal.tsx
@@ -35,9 +35,11 @@ export const DownloadModal = ({ open, setOpen }: Props) => {
     if (['ipynb', 'py'].includes(selected)) {
       downloadUnitxtCode({ downloadAsScript: selected === 'py' })
     } else if (selected === 'test_case') {
+      downloadTestCase()
     } else {
+      downloadTestData()
     }
-  }, [setOpen, selected, downloadUnitxtCode])
+  }, [setOpen, selected, downloadUnitxtCode, downloadTestCase, downloadTestData])
 
   return (
     <Modal
diff --git a/frontend/src/constants.ts b/frontend/src/constants.ts
index bb05e786..06c36b15 100644
--- a/frontend/src/constants.ts
+++ b/frontend/src/constants.ts
@@ -152,7 +152,9 @@ export const JUDGE_PARAMS_MAP: Record<string, Record<string, any>> = {
       generate_feedback: 'boolean',
       on_generation_failure: ['raise', 'random'],
     },
-    m_prometheus: {},
+    m_prometheus: {
+      billions_of_params: ['3', '5', '7'],
+    },
     criticized: {},
     thesis_antithesis: {},
     unitxt: {},
@@ -160,7 +162,9 @@ export const JUDGE_PARAMS_MAP: Record<string, Record<string, any>> = {
   },
   pairwise: {
     eval_assist: {},
-    m_prometheus: {},
+    m_prometheus: {
+      billions_of_params: ['3', '5', '7'],
+    },
     unitxt: {},
   },
 }
@@ -172,7 +176,9 @@ export const JUDGE_DEFAULT_PARAMS_MAP: Record<string, Record<string, any>> = {
       generate_feedback: true,
       on_generation_failure: 'random',
     },
-    m_prometheus: {},
+    m_prometheus: {
+      billions_of_params: '3',
+    },
     criticized: {},
     thesis_antithesis: {},
     unitxt: {},
@@ -180,7 +186,25 @@ export const JUDGE_DEFAULT_PARAMS_MAP: Record<string, Record<string, any>> = {
   },
   pairwise: {
     eval_assist: {},
-    m_prometheus: {},
+    m_prometheus: {
+      billions_of_params: '3',
+    },
     unitxt: {},
   },
 }
+
+export const JUDGE_REQUIRES_MODEL_SELECTION_MAP: Record<string, Record<string, any>> = {
+  direct: {
+    eval_assist: true,
+    m_prometheus: false,
+    criticized: true,
+    thesis_antithesis: true,
+    unitxt: true,
+    granite_guardian: true,
+  },
+  pairwise: {
+    eval_assist: true,
+    m_prometheus: false,
+    unitxt: true,
+  },
+}
diff --git a/frontend/src/customHooks/useNotebookGeneration.ts b/frontend/src/customHooks/useNotebookGeneration.ts
index 46785eee..d7685518 100644
--- a/frontend/src/customHooks/useNotebookGeneration.ts
+++ b/frontend/src/customHooks/useNotebookGeneration.ts
@@ -42,6 +42,8 @@ export const useUnitxtCodeGeneration = () => {
         evaluator_type: currentTestCase.type,
         test_case_name: currentTestCase.name || '',
         plain_python_script: downloadAsScript,
+        judge: currentTestCase.judge.name,
+        judge_params: currentTestCase.judge.params,
       })
 
       if (!response.ok) {