From 9b972beecb53baff2121440e8c208a5ccf68dd25 Mon Sep 17 00:00:00 2001 From: akash Date: Mon, 19 Jan 2026 17:45:53 +0530 Subject: [PATCH 1/3] logging added for prompts, raw responses and parsing errors --- src/agentunit/generators/llm_generator.py | 67 +++++++++++++++-------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/src/agentunit/generators/llm_generator.py b/src/agentunit/generators/llm_generator.py index 5388296..2aa3484 100644 --- a/src/agentunit/generators/llm_generator.py +++ b/src/agentunit/generators/llm_generator.py @@ -3,12 +3,14 @@ from __future__ import annotations import asyncio +import logging import json from dataclasses import dataclass from typing import Any from agentunit.datasets.base import DatasetCase, DatasetSource +logger = logging.getLogger(__name__) try: import openai @@ -41,10 +43,10 @@ class LlamaDatasetGenerator: """Generate synthetic datasets using Llama models via HuggingFace Inference API.""" def __init__( - self, - model: str = "meta-llama/Meta-Llama-3.1-70B-Instruct", - api_token: str | None = None, - config: GeneratorConfig | None = None, + self, + model: str = "meta-llama/Meta-Llama-3.1-70B-Instruct", + api_token: str | None = None, + config: GeneratorConfig | None = None, ): """Initialize Llama dataset generator. @@ -97,7 +99,7 @@ def _create_generation_prompt(self, domain: str, task_description: str) -> str: Ensure diversity in query formulation and complexity.""" async def generate( - self, domain: str, task_description: str, constraints: list[str] | None = None + self, domain: str, task_description: str, constraints: list[str] | None = None ) -> DatasetSource: """Generate synthetic dataset. @@ -114,6 +116,8 @@ async def generate( if constraints: prompt += "\n\nAdditional constraints:\n" + "\n".join(f"- {c}" for c in constraints) + logger.debug("Llama generated prompt:\n%s", prompt) + # Generate with Llama response = await asyncio.to_thread( self.client.text_generation, @@ -123,6 +127,7 @@ async def generate( temperature=self.config.temperature, return_full_text=False, ) + logger.debug("Llama raw response:\n%s", response) # Parse JSON response try: @@ -155,11 +160,16 @@ async def generate( ) except json.JSONDecodeError as e: - msg = f"Failed to parse generated dataset: {e}\nResponse: {response}" - raise ValueError(msg) + # msg = f"Failed to parse generated dataset: {e}\nResponse: {response}" + logger.error( + "Failed to parse OpenAI response JSON. Raw response:\n%s", + response, + exc_info=True, + ) + raise def generate_sync( - self, domain: str, task_description: str, constraints: list[str] | None = None + self, domain: str, task_description: str, constraints: list[str] | None = None ) -> DatasetSource: """Synchronous version of generate.""" return asyncio.run(self.generate(domain, task_description, constraints)) @@ -169,10 +179,10 @@ class OpenAIDatasetGenerator: """Generate synthetic datasets using OpenAI models (GPT-4, etc.).""" def __init__( - self, - model: str = "gpt-4o", - api_key: str | None = None, - config: GeneratorConfig | None = None, + self, + model: str = "gpt-4o", + api_key: str | None = None, + config: GeneratorConfig | None = None, ): """Initialize OpenAI dataset generator. @@ -221,11 +231,11 @@ def _create_generation_prompt(self, domain: str, task_description: str) -> str: Ensure diversity in query formulation and complexity.""" async def generate( - self, - domain: str, - task_description: str, - constraints: list[str] | None = None, - seed_examples: list[dict[str, Any]] | None = None, + self, + domain: str, + task_description: str, + constraints: list[str] | None = None, + seed_examples: list[dict[str, Any]] | None = None, ) -> DatasetSource: """Generate synthetic dataset. @@ -251,6 +261,10 @@ async def generate( if seed_examples: messages[1]["content"] += f"\n\nSeed examples:\n{json.dumps(seed_examples, indent=2)}" + logger.debug( + "OpenAI generated prompt (messages):\n%s", + json.dumps(messages, indent=2) + ) # Generate with GPT response = await self.client.chat.completions.create( model=self.model, @@ -261,6 +275,7 @@ async def generate( ) response_text = response.choices[0].message.content + logger.debug("OpenAI raw response text:\n%s", response_text) # Parse JSON response try: @@ -295,15 +310,19 @@ async def generate( ) except json.JSONDecodeError as e: - msg = f"Failed to parse generated dataset: {e}\nResponse: {response_text}" - raise ValueError(msg) + logger.error( + "Failed to parse Llama response JSON. Raw response:\n%s", + response_text, + exc_info=True, + ) + raise def generate_sync( - self, - domain: str, - task_description: str, - constraints: list[str] | None = None, - seed_examples: list[dict[str, Any]] | None = None, + self, + domain: str, + task_description: str, + constraints: list[str] | None = None, + seed_examples: list[dict[str, Any]] | None = None, ) -> DatasetSource: """Synchronous version of generate.""" return asyncio.run(self.generate(domain, task_description, constraints, seed_examples)) From d51f15908f7bbbb4da80dcbba2b3872f20e33629 Mon Sep 17 00:00:00 2001 From: akash Date: Mon, 19 Jan 2026 18:12:30 +0530 Subject: [PATCH 2/3] formatting done --- src/agentunit/generators/llm_generator.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/agentunit/generators/llm_generator.py b/src/agentunit/generators/llm_generator.py index 2aa3484..852b607 100644 --- a/src/agentunit/generators/llm_generator.py +++ b/src/agentunit/generators/llm_generator.py @@ -3,13 +3,14 @@ from __future__ import annotations import asyncio -import logging import json +import logging from dataclasses import dataclass from typing import Any from agentunit.datasets.base import DatasetCase, DatasetSource + logger = logging.getLogger(__name__) try: @@ -159,7 +160,7 @@ async def generate( cases, name=f"llama_generated_{domain.replace(' ', '_')}" ) - except json.JSONDecodeError as e: + except json.JSONDecodeError: # msg = f"Failed to parse generated dataset: {e}\nResponse: {response}" logger.error( "Failed to parse OpenAI response JSON. Raw response:\n%s", @@ -309,7 +310,7 @@ async def generate( cases, name=f"openai_generated_{domain.replace(' ', '_')}" ) - except json.JSONDecodeError as e: + except json.JSONDecodeError: logger.error( "Failed to parse Llama response JSON. Raw response:\n%s", response_text, From 988bf1146de8192254ed5e9ba4092345984c7858 Mon Sep 17 00:00:00 2001 From: akash Date: Tue, 20 Jan 2026 18:01:56 +0530 Subject: [PATCH 3/3] fix error logging message and formatting. --- src/agentunit/generators/llm_generator.py | 49 +++++++++++------------ 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/src/agentunit/generators/llm_generator.py b/src/agentunit/generators/llm_generator.py index 852b607..836f011 100644 --- a/src/agentunit/generators/llm_generator.py +++ b/src/agentunit/generators/llm_generator.py @@ -44,10 +44,10 @@ class LlamaDatasetGenerator: """Generate synthetic datasets using Llama models via HuggingFace Inference API.""" def __init__( - self, - model: str = "meta-llama/Meta-Llama-3.1-70B-Instruct", - api_token: str | None = None, - config: GeneratorConfig | None = None, + self, + model: str = "meta-llama/Meta-Llama-3.1-70B-Instruct", + api_token: str | None = None, + config: GeneratorConfig | None = None, ): """Initialize Llama dataset generator. @@ -100,7 +100,7 @@ def _create_generation_prompt(self, domain: str, task_description: str) -> str: Ensure diversity in query formulation and complexity.""" async def generate( - self, domain: str, task_description: str, constraints: list[str] | None = None + self, domain: str, task_description: str, constraints: list[str] | None = None ) -> DatasetSource: """Generate synthetic dataset. @@ -163,14 +163,14 @@ async def generate( except json.JSONDecodeError: # msg = f"Failed to parse generated dataset: {e}\nResponse: {response}" logger.error( - "Failed to parse OpenAI response JSON. Raw response:\n%s", + "Failed to parse Llama response JSON. Raw response:\n%s", response, exc_info=True, ) raise def generate_sync( - self, domain: str, task_description: str, constraints: list[str] | None = None + self, domain: str, task_description: str, constraints: list[str] | None = None ) -> DatasetSource: """Synchronous version of generate.""" return asyncio.run(self.generate(domain, task_description, constraints)) @@ -180,10 +180,10 @@ class OpenAIDatasetGenerator: """Generate synthetic datasets using OpenAI models (GPT-4, etc.).""" def __init__( - self, - model: str = "gpt-4o", - api_key: str | None = None, - config: GeneratorConfig | None = None, + self, + model: str = "gpt-4o", + api_key: str | None = None, + config: GeneratorConfig | None = None, ): """Initialize OpenAI dataset generator. @@ -232,11 +232,11 @@ def _create_generation_prompt(self, domain: str, task_description: str) -> str: Ensure diversity in query formulation and complexity.""" async def generate( - self, - domain: str, - task_description: str, - constraints: list[str] | None = None, - seed_examples: list[dict[str, Any]] | None = None, + self, + domain: str, + task_description: str, + constraints: list[str] | None = None, + seed_examples: list[dict[str, Any]] | None = None, ) -> DatasetSource: """Generate synthetic dataset. @@ -262,10 +262,7 @@ async def generate( if seed_examples: messages[1]["content"] += f"\n\nSeed examples:\n{json.dumps(seed_examples, indent=2)}" - logger.debug( - "OpenAI generated prompt (messages):\n%s", - json.dumps(messages, indent=2) - ) + logger.debug("OpenAI generated prompt (messages):\n%s", json.dumps(messages, indent=2)) # Generate with GPT response = await self.client.chat.completions.create( model=self.model, @@ -312,18 +309,18 @@ async def generate( except json.JSONDecodeError: logger.error( - "Failed to parse Llama response JSON. Raw response:\n%s", + "Failed to parse OpenAI response JSON. Raw response:\n%s", response_text, exc_info=True, ) raise def generate_sync( - self, - domain: str, - task_description: str, - constraints: list[str] | None = None, - seed_examples: list[dict[str, Any]] | None = None, + self, + domain: str, + task_description: str, + constraints: list[str] | None = None, + seed_examples: list[dict[str, Any]] | None = None, ) -> DatasetSource: """Synchronous version of generate.""" return asyncio.run(self.generate(domain, task_description, constraints, seed_examples))