refuel-ai · krrishdholakia · Aug 11, 2023
diff --git a/.gitignore b/.gitignore
@@ -55,6 +55,9 @@ coverage.xml
 *.mo
 *.pot
 
+# Litellm stuff
+litellm_uuid.txt
+
 # Django stuff:
 *.log
 local_settings.py

diff --git a/src/autolabel/models/__init__.py b/src/autolabel/models/__init__.py
@@ -6,6 +6,7 @@
 
 logger = logging.getLogger(__name__)
 
+from autolabel.models.litellm import LiteLLM
 from autolabel.models.openai import OpenAILLM
 from autolabel.models.anthropic import AnthropicLLM
 from autolabel.models.cohere import CohereLLM
@@ -14,11 +15,11 @@
 from autolabel.models.refuel import RefuelLLM
 
 MODEL_REGISTRY = {
-    ModelProvider.OPENAI: OpenAILLM,
-    ModelProvider.ANTHROPIC: AnthropicLLM,
-    ModelProvider.COHERE: CohereLLM,
+    ModelProvider.OPENAI: LiteLLM,
+    ModelProvider.ANTHROPIC: LiteLLM,
+    ModelProvider.COHERE: LiteLLM,
     ModelProvider.HUGGINGFACE_PIPELINE: HFPipelineLLM,
-    ModelProvider.GOOGLE: PaLMLLM,
+    ModelProvider.GOOGLE: LiteLLM,
     ModelProvider.REFUEL: RefuelLLM,
 }
 

diff --git a/src/autolabel/models/litellm.py b/src/autolabel/models/litellm.py
@@ -0,0 +1,101 @@
+from typing import List, Optional
+
+from autolabel.configs import AutolabelConfig
+from autolabel.models import BaseModel
+from autolabel.cache import BaseCache
+from autolabel.schema import RefuelLLMResult
+from autolabel.schema import (
+    GenerationCacheEntry,
+    LabelingError,
+    RefuelLLMResult,
+    ErrorType,
+)
+from langchain.schema import HumanMessage, Generation
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class LiteLLM(BaseModel):
+    SEP_REPLACEMENT_TOKEN = "@@"
+    DEFAULT_MODEL = "gpt-3.5-turbo"
+    DEFAULT_PARAMS = {
+        "max_tokens_to_sample": 1000,
+        "temperature": 0.0,
+    }
+
+    def __init__(self, config: AutolabelConfig, cache: BaseCache = None) -> None:
+        super().__init__(config, cache)
+
+        try:
+            import litellm
+            from litellm import completion, completion_cost, batch_completion
+        except ImportError:
+            raise ImportError(
+                "anthropic is required to use the anthropic LLM. Please install it with the following command: pip install 'refuel-autolabel[anthropic]'"
+            )
+
+        # populate model name
+        self.model_name = config.model_name() or self.DEFAULT_MODEL
+        # populate model params
+        model_params = config.model_params()
+        self.model_params = {**self.DEFAULT_PARAMS, **model_params}
+        self.completion_cost = completion_cost
+        self.completion = completion
+        self.batch_completion = batch_completion
+
+    def _label_individually(self, prompts: List[str]) -> RefuelLLMResult:
+        """Label each prompt individually. Should be used only after trying as a batch first.
+
+        Args:
+            prompts (List[str]): List of prompts to label
+
+        Returns:
+            LLMResult: LLMResult object with generations
+            List[LabelingError]: List of errors encountered while labeling
+        """
+        generations = []
+        errors = []
+        for prompt in prompts:
+            try:
+                messages = [{"role": "user", "content": prompt}]
+                custom_model_name = "claude-instant-1"  # [TEST Variable] - using to debug an issue in testing
+                response = self.completion(model=custom_model_name, messages=messages)
+                generations.append(
+                    Generation(text=response["choices"][0]["message"]["content"])
+                )
+                errors.append(None)
+            except Exception as e:
+                print(f"Error generating from LLM: {e}")
+                print(f"self.model_name: {self.model_name}")
+                generations.append([Generation(text="")])
+                errors.append(
+                    LabelingError(
+                        error_type=ErrorType.LLM_PROVIDER_ERROR, error_message=str(e)
+                    )
+                )
+
+        return RefuelLLMResult(generations=generations, errors=errors)
+
+    def _label(self, prompts: List[str]) -> RefuelLLMResult:
+        prompts = [[{"role": "user", "content": prompt}] for prompt in prompts]
+        try:
+            custom_model_name = "claude-instant-1"  # [TEST Variable] - using to debug an issue in testing
+            results = self.batch_completion(model=custom_model_name, messages=prompts)
+            # translate to RefuelLLMResult Generations type
+            result_generations = [
+                Generation(text=result["choices"][0]["message"]["content"])
+                for result in results
+            ]
+            return RefuelLLMResult(
+                generations=result_generations, errors=[None] * len(result_generations)
+            )
+        except Exception as e:
+            return self._label_individually(prompts)
+
+    def get_cost(self, prompt: str, label: Optional[str] = "") -> float:
+        # total_cost = self.completion_cost(model=self.model_name, prompt=prompt, completion=label)
+        return 0  # [TEST Variable] - using to debug an issue in testing
+
+    def returns_token_probs(self) -> bool:
+        return False
diff --git a/src/autolabel/models/openai.py b/src/autolabel/models/openai.py
@@ -89,7 +89,7 @@ def __init__(self, config: AutolabelConfig, cache: BaseCache = None) -> None:
             import tiktoken
         except ImportError:
             raise ImportError(
-                "anthropic is required to use the anthropic LLM. Please install it with the following command: pip install 'refuel-autolabel[openai]'"
+                "openai is required to use the openai LLM. Please install it with the following command: pip install 'refuel-autolabel[openai]'"
             )
 
         # populate model name

diff --git a/tests/unit/litellm_test.py b/tests/unit/litellm_test.py
@@ -0,0 +1,71 @@
+from autolabel.configs import AutolabelConfig
+from autolabel.models.litellm import LiteLLM
+from langchain.schema import Generation, LLMResult
+from pytest import approx
+
+
+################### ANTHROPIC TESTS #######################
+def test_anthropic_initialization():
+    model = LiteLLM(
+        config=AutolabelConfig(
+            config="tests/assets/banking/config_banking_anthropic.json"
+        )
+    )
+
+
+# def test_anthropic_label(mocker):
+#     model = LiteLLM(
+#         config=AutolabelConfig(
+#             config="tests/assets/banking/config_banking_anthropic.json"
+#         )
+#     )
+#     prompts = ["test1", "test2"]
+#     mocker.patch(
+#         "langchain.chat_models.ChatAnthropic.generate",
+#         return_value=LLMResult(
+#             generations=[[Generation(text="Answers")] for _ in prompts]
+#         ),
+#     )
+#     x = model.label(prompts)
+#     print(x)
+#     assert [i[0].text for i in x.generations] == ["Answers", "Answers"]
+#     assert sum(x.costs) == approx(0.00010944, rel=1e-3)
+
+# def test_anthropic_label():
+#     model = LiteLLM(
+#         config=AutolabelConfig(
+#             config="tests/assets/banking/config_banking_anthropic.json"
+#         )
+#     )
+#     prompts = ["test1", "test2"]
+#     x = model._label(prompts)
+#     print(x)
+
+# test_anthropic_label()
+model = LiteLLM(
+    config=AutolabelConfig(config="tests/assets/banking/config_banking_anthropic.json")
+)
+print(model.get_cost("hello world"))
+
+
+def test_anthropic_get_cost():
+    model = LiteLLM(
+        config=AutolabelConfig(
+            config="tests/assets/banking/config_banking_anthropic.json"
+        )
+    )
+    example_prompt = "TestingExamplePrompt"
+    curr_cost = model.get_cost(example_prompt)
+    assert curr_cost == approx(0.03271306, rel=1e-3)
+
+
+def test_anthropic_return_probs():
+    model = LiteLLM(
+        config=AutolabelConfig(
+            config="tests/assets/banking/config_banking_anthropic.json"
+        )
+    )
+    assert model.returns_token_probs() is False
+
+
+################### ANTHROPIC TESTS #######################