huggingface · maxbrunet · Sep 8, 2024
diff --git a/README.md b/README.md
@@ -27,7 +27,7 @@ Note that **llm-ls** does not export any data anywhere (other than setting a use
 
 ### Multiple backends
 
-**llm-ls** is compatible with Hugging Face's [Inference API](https://huggingface.co/docs/api-inference/en/index), Hugging Face's [text-generation-inference](https://github.com/huggingface/text-generation-inference), [ollama](https://github.com/ollama/ollama) and OpenAI compatible APIs, like the [python llama.cpp server bindings](https://github.com/abetlen/llama-cpp-python?tab=readme-ov-file#openai-compatible-web-server).
+**llm-ls** is compatible with Hugging Face's [Inference API](https://huggingface.co/docs/api-inference/en/index), Hugging Face's [text-generation-inference](https://github.com/huggingface/text-generation-inference), [Cohere](https://docs.cohere.com/reference/chat), [ollama](https://github.com/ollama/ollama) and OpenAI compatible APIs, like the [python llama.cpp server bindings](https://github.com/abetlen/llama-cpp-python?tab=readme-ov-file#openai-compatible-web-server).
 
 ## Compatible extensions
 

diff --git a/crates/custom-types/src/llm_ls.rs b/crates/custom-types/src/llm_ls.rs
@@ -63,6 +63,9 @@ fn hf_default_url() -> String {
 #[derive(Clone, Debug, Deserialize, Serialize)]
 #[serde(rename_all = "lowercase", tag = "backend")]
 pub enum Backend {
+    Cohere {
+        url: String,
+    },
     HuggingFace {
         #[serde(default = "hf_default_url", deserialize_with = "parse_url")]
         url: String,
@@ -99,6 +102,7 @@ impl Backend {
 
     pub fn url(self) -> String {
         match self {
+            Self::Cohere { url } => url,
             Self::HuggingFace { url } => url,
             Self::LlamaCpp { url } => url,
             Self::Ollama { url } => url,

diff --git a/crates/llm-ls/src/backend.rs b/crates/llm-ls/src/backend.rs
@@ -67,6 +67,54 @@ fn parse_api_text(text: &str) -> Result<Vec<Generation>> {
     }
 }
 
+#[derive(Debug, Serialize, Deserialize)]
+struct CohereGeneration {
+    text: String,
+}
+
+impl From<CohereGeneration> for Generation {
+    fn from(value: CohereGeneration) -> Self {
+        Generation {
+            generated_text: value.text,
+        }
+    }
+}
+
+#[derive(Debug, Deserialize)]
+pub struct CohereError {
+    message: String,
+}
+
+impl std::error::Error for CohereError {
+    fn description(&self) -> &str {
+        &self.message
+    }
+}
+
+impl Display for CohereError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.message)
+    }
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(untagged)]
+enum CohereAPIResponse {
+    Generation(CohereGeneration),
+    Error(CohereError),
+}
+
+fn build_cohere_headers(api_token: Option<&String>, ide: Ide) -> Result<HeaderMap> {
+    build_api_headers(api_token, ide)
+}
+
+fn parse_cohere_text(text: &str) -> Result<Vec<Generation>> {
+    match serde_json::from_str(text)? {
+        CohereAPIResponse::Generation(gen) => Ok(vec![gen.into()]),
+        CohereAPIResponse::Error(err) => Err(Error::Cohere(err)),
+    }
+}
+
 #[derive(Debug, Serialize, Deserialize)]
 struct LlamaCppGeneration {
     content: String,
@@ -214,6 +262,11 @@ pub(crate) fn build_body(
     mut request_body: Map<String, Value>,
 ) -> Map<String, Value> {
     match backend {
+        Backend::Cohere { .. } => {
+            request_body.insert("message".to_owned(), Value::String(prompt));
+            request_body.insert("model".to_owned(), Value::String(model));
+            request_body.insert("stream".to_owned(), Value::Bool(false));
+        }
         Backend::HuggingFace { .. } | Backend::Tgi { .. } => {
             request_body.insert("inputs".to_owned(), Value::String(prompt));
             if let Some(Value::Object(params)) = request_body.get_mut("parameters") {
@@ -241,6 +294,7 @@ pub(crate) fn build_headers(
     ide: Ide,
 ) -> Result<HeaderMap> {
     match backend {
+        Backend::Cohere { .. } => build_cohere_headers(api_token, ide),
         Backend::HuggingFace { .. } => build_api_headers(api_token, ide),
         Backend::LlamaCpp { .. } => Ok(build_llamacpp_headers()),
         Backend::Ollama { .. } => Ok(build_ollama_headers()),
@@ -251,6 +305,7 @@ pub(crate) fn build_headers(
 
 pub(crate) fn parse_generations(backend: &Backend, text: &str) -> Result<Vec<Generation>> {
     match backend {
+        Backend::Cohere { .. } => parse_cohere_text(text),
         Backend::HuggingFace { .. } => parse_api_text(text),
         Backend::LlamaCpp { .. } => parse_llamacpp_text(text),
         Backend::Ollama { .. } => parse_ollama_text(text),

diff --git a/crates/llm-ls/src/error.rs b/crates/llm-ls/src/error.rs
@@ -33,6 +33,8 @@ pub enum Error {
     InvalidRepositoryId,
     #[error("invalid tokenizer path")]
     InvalidTokenizerPath,
+    #[error("cohere error: {0}")]
+    Cohere(crate::backend::CohereError),
     #[error("llama.cpp error: {0}")]
     LlamaCpp(crate::backend::APIError),
     #[error("ollama error: {0}")]

diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs
@@ -428,6 +428,23 @@ fn build_url(backend: Backend, model: &str, disable_url_path_completion: bool) -
     }
 
     match backend {
+        Backend::Cohere { mut url } => {
+            if url.ends_with("/v1/chat") {
+                url
+            } else if url.ends_with("/v1/") {
+                url.push_str("chat");
+                url
+            } else if url.ends_with("/v1") {
+                url.push_str("/chat");
+                url
+            } else if url.ends_with('/') {
+                url.push_str("v1/chat");
+                url
+            } else {
+                url.push_str("/v1/chat");
+                url
+            }
+        }
         Backend::HuggingFace { url } => format!("{url}/models/{model}"),
         Backend::LlamaCpp { mut url } => {
             if url.ends_with("/completions") {