|
17 | 17 | from pydantic import BaseModel, ConfigDict |
18 | 18 |
|
19 | 19 | from openhands.sdk.logger import get_logger |
20 | | -from tests.integration.base import BaseIntegrationTest, TestResult |
| 20 | +from tests.integration.base import BaseIntegrationTest, SkipTest, TestResult |
21 | 21 | from tests.integration.schemas import ModelTestResults |
22 | 22 | from tests.integration.utils.format_costs import format_cost |
23 | 23 |
|
@@ -171,6 +171,20 @@ def process_instance(instance: TestInstance, llm_config: dict[str, Any]) -> Eval |
171 | 171 | log_file_path=log_file_path, |
172 | 172 | ) |
173 | 173 |
|
| 174 | + except SkipTest as e: |
| 175 | + # Test should be skipped (e.g., LLM doesn't support required capabilities) |
| 176 | + logger.info("Test %s skipped: %s", instance.instance_id, str(e)) |
| 177 | + return EvalOutput( |
| 178 | + instance_id=instance.instance_id, |
| 179 | + test_result=TestResult( |
| 180 | + success=False, |
| 181 | + reason=str(e), |
| 182 | + skipped=True, |
| 183 | + ), |
| 184 | + llm_model=llm_config.get("model", "unknown"), |
| 185 | + cost=0.0, |
| 186 | + ) |
| 187 | + |
174 | 188 | except Exception as e: |
175 | 189 | logger.error("Error running test %s: %s", instance.instance_id, e) |
176 | 190 | return EvalOutput( |
@@ -274,11 +288,17 @@ def generate_structured_results( |
274 | 288 | # Print summary for console output |
275 | 289 | success_rate = structured_results.success_rate |
276 | 290 | successful = structured_results.successful_tests |
| 291 | + skipped = structured_results.skipped_tests |
277 | 292 | total = structured_results.total_tests |
278 | 293 | logger.info("Success rate: %.2f%% (%d/%d)", success_rate * 100, successful, total) |
| 294 | + if skipped > 0: |
| 295 | + logger.info("Skipped tests: %d", skipped) |
279 | 296 | logger.info("Evaluation Results:") |
280 | 297 | for instance in structured_results.test_instances: |
281 | | - status = "✓" if instance.test_result.success else "✗" |
| 298 | + if instance.test_result.skipped: |
| 299 | + status = "⊘" # Skipped symbol |
| 300 | + else: |
| 301 | + status = "✓" if instance.test_result.success else "✗" |
282 | 302 | reason = instance.test_result.reason or "N/A" |
283 | 303 | logger.info("%s: %s - %s", instance.instance_id, status, reason) |
284 | 304 | logger.info("Total cost: %s", format_cost(structured_results.total_cost)) |
|
0 commit comments