Skip to content

Add class that treats Codex as a backup #11

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 42 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
3acb048
add class to configure a decorator that treats Codex as a backup
elisno Jan 24, 2025
74755d2
formatting
elisno Jan 24, 2025
f7f8156
Move is_bad_response helper functions to a validation.py module for c…
elisno Jan 30, 2025
d20d31c
update import
elisno Jan 30, 2025
a223b04
formatting and typing (wip)
elisno Jan 30, 2025
f15424c
Remove response_validators.py module
elisno Jan 30, 2025
23eeb58
remove is_bad_response_contains_phrase
elisno Jan 30, 2025
9e8690c
Improve helpfer functions for detecting bad responses
elisno Jan 31, 2025
d0ad8df
formatting and add dependencies
elisno Jan 31, 2025
b4bff54
formatting
elisno Feb 1, 2025
038a475
address type checker complaints
elisno Feb 1, 2025
5fbb48e
temporarily skip tests for codex_backup module
elisno Feb 1, 2025
3892b52
formatting
elisno Feb 1, 2025
22253e9
address comments
elisno Feb 5, 2025
e4bdf2c
Merge branch 'main' into codex-as-backup
elisno Feb 5, 2025
e5a6164
formatting & type hints
elisno Feb 6, 2025
807d7fa
comment out to_decorator
elisno Feb 6, 2025
00def49
Merge branch 'main' into codex-as-backup
elisno Feb 7, 2025
d8a6e86
enhance CodexBackup
elisno Feb 7, 2025
2630a2c
delete commented-out to_decorator method
elisno Feb 7, 2025
3286674
Merge branch 'main' into codex-as-backup
elisno Feb 7, 2025
0ebd4fe
formatting
elisno Feb 7, 2025
4eca7d3
fix tests for CodexBackup
elisno Feb 7, 2025
c59cec5
formatting and typing
elisno Feb 7, 2025
6026179
formatting
elisno Feb 8, 2025
a94ffb5
formatting
elisno Feb 8, 2025
2510255
fix unused fixture
elisno Feb 8, 2025
b439113
remove Self imported from typing, doesn't work for Python 3.8
elisno Feb 8, 2025
38666de
remove unused type ignore
elisno Feb 8, 2025
a5d655b
Add explanation to is_unhelpful_response question
elisno Feb 8, 2025
e776dfe
Remove quotes from type annotation
elisno Feb 8, 2025
7866f0c
remove _TLM protocol
elisno Feb 10, 2025
26adbf1
formatting
elisno Feb 10, 2025
36f80e9
threshold -> trustworthiness_threshold
elisno Feb 11, 2025
febbfd0
update is_bad_response docstring
elisno Feb 11, 2025
dc1d003
update docstrings for is_unhelpful_response
elisno Feb 11, 2025
739ffc6
unhelpful_trustworthiness_threshold -> unhelpfulness_confidence_thres…
elisno Feb 11, 2025
3e4864a
update module docstring for validation.py
elisno Feb 11, 2025
81cc934
rename module validation.py -> response_validation.py
elisno Feb 11, 2025
9e91e9b
move is_bad_response optional parameters to a parameter object (typed…
elisno Feb 11, 2025
c5843c9
formatting
elisno Feb 11, 2025
49f9a9d
rename test_validation.py -> test_response_validation.py
elisno Feb 11, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -43,6 +43,8 @@ extra-dependencies = [
"pytest",
"llama-index-core",
"smolagents",
"cleanlab-studio",
"thefuzz",
"langchain-core",
]
[tool.hatch.envs.types.scripts]
@@ -54,6 +56,8 @@ allow-direct-references = true
extra-dependencies = [
"llama-index-core",
"smolagents; python_version >= '3.10'",
"cleanlab-studio",
"thefuzz",
"langchain-core",
]

3 changes: 2 additions & 1 deletion src/cleanlab_codex/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: MIT
from cleanlab_codex.client import Client
from cleanlab_codex.codex_backup import CodexBackup
from cleanlab_codex.codex_tool import CodexTool
from cleanlab_codex.project import Project

__all__ = ["Client", "CodexTool", "Project"]
__all__ = ["Client", "CodexTool", "CodexBackup", "Project"]
142 changes: 142 additions & 0 deletions src/cleanlab_codex/codex_backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Optional, Protocol, cast

from cleanlab_codex.response_validation import BadResponseDetectionConfig, is_bad_response

if TYPE_CHECKING:
from cleanlab_studio.studio.trustworthy_language_model import TLM # type: ignore

from cleanlab_codex.project import Project


def handle_backup_default(codex_response: str, primary_system: Any) -> None: # noqa: ARG001
"""Default implementation is a no-op."""
return None


class BackupHandler(Protocol):
"""Protocol defining how to handle backup responses from Codex.

This protocol defines a callable interface for processing Codex responses that are
retrieved when the primary response system (e.g., a RAG system) fails to provide
an adequate answer. Implementations of this protocol can be used to:

- Update the primary system's context or knowledge base
- Log Codex responses for analysis
- Trigger system improvements or retraining
- Perform any other necessary side effects

Args:
codex_response (str): The response received from Codex
primary_system (Any): The instance of the primary RAG system that
generated the inadequate response. This allows the handler to
update or modify the primary system if needed.

Returns:
None: The handler performs side effects but doesn't return a value
"""

def __call__(self, codex_response: str, primary_system: Any) -> None: ...


class CodexBackup:
"""A backup decorator that connects to a Codex project to answer questions that
cannot be adequately answered by the existing agent.

Args:
project: The Codex project to use for backup responses
fallback_answer: The fallback answer to use if the primary system fails to provide an adequate response
backup_handler: A callback function that processes Codex's response and updates the primary RAG system. This handler is called whenever Codex provides a backup response after the primary system fails. By default, the backup handler is a no-op.
primary_system: The existing RAG system that needs to be backed up by Codex
tlm: The client for the Trustworthy Language Model, which evaluates the quality of responses from the primary system
is_bad_response_kwargs: Additional keyword arguments to pass to the is_bad_response function, for detecting inadequate responses from the primary system
"""

DEFAULT_FALLBACK_ANSWER = "Based on the available information, I cannot provide a complete answer to this question."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Delete this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll hold off on removing this until we've finalized the code in "validation.py".
The intention was to pass the fallback answer from the backup object to the relevant is_fallback_response helper function before deciding to call Codex as Backup.


def __init__(
self,
*,
project: Project,
fallback_answer: str = DEFAULT_FALLBACK_ANSWER,
backup_handler: BackupHandler = handle_backup_default,
primary_system: Optional[Any] = None,
tlm: Optional[TLM] = None,
is_bad_response_kwargs: Optional[dict[str, Any]] = None,
):
self._project = project
self._fallback_answer = fallback_answer
self._backup_handler = backup_handler
self._primary_system: Optional[Any] = primary_system
self._tlm = tlm
self._is_bad_response_kwargs = is_bad_response_kwargs

@classmethod
def from_project(cls, project: Project, **kwargs: Any) -> CodexBackup:
return cls(project=project, **kwargs)

@property
def primary_system(self) -> Any:
if self._primary_system is None:
error_message = "Primary system not set. Please set a primary system using the `add_primary_system` method."
raise ValueError(error_message)
return self._primary_system

@primary_system.setter
def primary_system(self, primary_system: Any) -> None:
"""Set the primary RAG system that will be used to generate responses."""
self._primary_system = primary_system

@property
def is_bad_response_kwargs(self) -> dict[str, Any]:
return self._is_bad_response_kwargs or {}

@is_bad_response_kwargs.setter
def is_bad_response_kwargs(self, is_bad_response_kwargs: dict[str, Any]) -> None:
self._is_bad_response_kwargs = is_bad_response_kwargs

def run(
self,
response: str,
query: str,
context: Optional[str] = None,
) -> str:
"""Check if a response is adequate and provide a backup from Codex if needed.

Args:
primary_system: The system that generated the original response
response: The response to evaluate
query: The original query that generated the response
context: Optional context used to generate the response

Returns:
str: Either the original response if adequate, or a backup response from Codex
"""

_is_bad_response_kwargs = self.is_bad_response_kwargs
if not is_bad_response(
response,
query=query,
context=context,
config=cast(
BadResponseDetectionConfig,
{
"tlm": self._tlm,
"fallback_answer": self._fallback_answer,
**_is_bad_response_kwargs,
},
),
):
return response

cache_result = self._project.query(query, fallback_answer=self._fallback_answer)[0]
if not cache_result:
return response

if self._primary_system is not None:
self._backup_handler(
codex_response=cache_result,
primary_system=self._primary_system,
)
return cache_result
Loading