Skip to content

feat: add cohere backend #104

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Note that **llm-ls** does not export any data anywhere (other than setting a use

### Multiple backends

**llm-ls** is compatible with Hugging Face's [Inference API](https://huggingface.co/docs/api-inference/en/index), Hugging Face's [text-generation-inference](https://github.com/huggingface/text-generation-inference), [ollama](https://github.com/ollama/ollama) and OpenAI compatible APIs, like the [python llama.cpp server bindings](https://github.com/abetlen/llama-cpp-python?tab=readme-ov-file#openai-compatible-web-server).
**llm-ls** is compatible with Hugging Face's [Inference API](https://huggingface.co/docs/api-inference/en/index), Hugging Face's [text-generation-inference](https://github.com/huggingface/text-generation-inference), [Cohere](https://docs.cohere.com/reference/chat), [ollama](https://github.com/ollama/ollama) and OpenAI compatible APIs, like the [python llama.cpp server bindings](https://github.com/abetlen/llama-cpp-python?tab=readme-ov-file#openai-compatible-web-server).

## Compatible extensions

Expand Down
4 changes: 4 additions & 0 deletions crates/custom-types/src/llm_ls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ fn hf_default_url() -> String {
#[derive(Clone, Debug, Deserialize, Serialize)]
#[serde(rename_all = "lowercase", tag = "backend")]
pub enum Backend {
Cohere {
url: String,
},
HuggingFace {
#[serde(default = "hf_default_url", deserialize_with = "parse_url")]
url: String,
Expand Down Expand Up @@ -99,6 +102,7 @@ impl Backend {

pub fn url(self) -> String {
match self {
Self::Cohere { url } => url,
Self::HuggingFace { url } => url,
Self::LlamaCpp { url } => url,
Self::Ollama { url } => url,
Expand Down
55 changes: 55 additions & 0 deletions crates/llm-ls/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,54 @@ fn parse_api_text(text: &str) -> Result<Vec<Generation>> {
}
}

#[derive(Debug, Serialize, Deserialize)]
struct CohereGeneration {
text: String,
}

impl From<CohereGeneration> for Generation {
fn from(value: CohereGeneration) -> Self {
Generation {
generated_text: value.text,
}
}
}

#[derive(Debug, Deserialize)]
pub struct CohereError {
message: String,
}

impl std::error::Error for CohereError {
fn description(&self) -> &str {
&self.message
}
}

impl Display for CohereError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.message)
}
}

#[derive(Debug, Deserialize)]
#[serde(untagged)]
enum CohereAPIResponse {
Generation(CohereGeneration),
Error(CohereError),
}

fn build_cohere_headers(api_token: Option<&String>, ide: Ide) -> Result<HeaderMap> {
build_api_headers(api_token, ide)
}

fn parse_cohere_text(text: &str) -> Result<Vec<Generation>> {
match serde_json::from_str(text)? {
CohereAPIResponse::Generation(gen) => Ok(vec![gen.into()]),
CohereAPIResponse::Error(err) => Err(Error::Cohere(err)),
}
}

#[derive(Debug, Serialize, Deserialize)]
struct LlamaCppGeneration {
content: String,
Expand Down Expand Up @@ -214,6 +262,11 @@ pub(crate) fn build_body(
mut request_body: Map<String, Value>,
) -> Map<String, Value> {
match backend {
Backend::Cohere { .. } => {
request_body.insert("message".to_owned(), Value::String(prompt));
request_body.insert("model".to_owned(), Value::String(model));
request_body.insert("stream".to_owned(), Value::Bool(false));
}
Backend::HuggingFace { .. } | Backend::Tgi { .. } => {
request_body.insert("inputs".to_owned(), Value::String(prompt));
if let Some(Value::Object(params)) = request_body.get_mut("parameters") {
Expand Down Expand Up @@ -241,6 +294,7 @@ pub(crate) fn build_headers(
ide: Ide,
) -> Result<HeaderMap> {
match backend {
Backend::Cohere { .. } => build_cohere_headers(api_token, ide),
Backend::HuggingFace { .. } => build_api_headers(api_token, ide),
Backend::LlamaCpp { .. } => Ok(build_llamacpp_headers()),
Backend::Ollama { .. } => Ok(build_ollama_headers()),
Expand All @@ -251,6 +305,7 @@ pub(crate) fn build_headers(

pub(crate) fn parse_generations(backend: &Backend, text: &str) -> Result<Vec<Generation>> {
match backend {
Backend::Cohere { .. } => parse_cohere_text(text),
Backend::HuggingFace { .. } => parse_api_text(text),
Backend::LlamaCpp { .. } => parse_llamacpp_text(text),
Backend::Ollama { .. } => parse_ollama_text(text),
Expand Down
2 changes: 2 additions & 0 deletions crates/llm-ls/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ pub enum Error {
InvalidRepositoryId,
#[error("invalid tokenizer path")]
InvalidTokenizerPath,
#[error("cohere error: {0}")]
Cohere(crate::backend::CohereError),
#[error("llama.cpp error: {0}")]
LlamaCpp(crate::backend::APIError),
#[error("ollama error: {0}")]
Expand Down
17 changes: 17 additions & 0 deletions crates/llm-ls/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,23 @@ fn build_url(backend: Backend, model: &str, disable_url_path_completion: bool) -
}

match backend {
Backend::Cohere { mut url } => {
if url.ends_with("/v1/chat") {
url
} else if url.ends_with("/v1/") {
url.push_str("chat");
url
} else if url.ends_with("/v1") {
url.push_str("/chat");
url
} else if url.ends_with('/') {
url.push_str("v1/chat");
url
} else {
url.push_str("/v1/chat");
url
}
}
Backend::HuggingFace { url } => format!("{url}/models/{model}"),
Backend::LlamaCpp { mut url } => {
if url.ends_with("/completions") {
Expand Down