diff --git a/Cargo.lock b/Cargo.lock index 061ad06..65e7513 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,6 +37,56 @@ dependencies = [ "memchr", ] +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + [[package]] name = "anyhow" version = "1.0.100" @@ -52,6 +102,28 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -226,6 +298,52 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "clap" +version = "4.5.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "compact_str" version = "0.9.0" @@ -254,6 +372,15 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "convert_case" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -421,6 +548,29 @@ dependencies = [ "syn", ] +[[package]] +name = "derive_more" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10b768e943bed7bf2cab53df09f4bc34bfd217cdb57d971e769874c9a6710618" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d286bfdaf75e988b4a78e013ecd79c581e06399ab53fbacd2d916c2f904f30b" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn", + "unicode-xid", +] + [[package]] name = "dirs" version = "6.0.0" @@ -453,6 +603,12 @@ dependencies = [ "syn", ] +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + [[package]] name = "dyn-stack" version = "0.10.0" @@ -1291,6 +1447,12 @@ dependencies = [ "serde", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itertools" version = "0.14.0" @@ -1316,6 +1478,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.178" @@ -1382,6 +1550,15 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30" +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + [[package]] name = "memchr" version = "2.7.6" @@ -1480,6 +1657,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "num" version = "0.4.3" @@ -1599,6 +1785,12 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + [[package]] name = "onig" version = "6.5.1" @@ -1889,6 +2081,26 @@ dependencies = [ "thiserror 2.0.17", ] +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "regex" version = "1.12.2" @@ -1975,6 +2187,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustix" version = "1.1.2" @@ -2063,6 +2284,31 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "schemars" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9558e172d4e8533736ba97870c4b2cd63f84b382a3d6eb063da41b91cce17289" +dependencies = [ + "dyn-clone", + "ref-cast", + "schemars_derive", + "serde", + "serde_json", +] + +[[package]] +name = "schemars_derive" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301858a4023d78debd2353c7426dc486001bddc91ae31a76fb1f55132f7e2633" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn", +] + [[package]] name = "security-framework" version = "2.11.1" @@ -2086,6 +2332,12 @@ dependencies = [ "libc", ] +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + [[package]] name = "seq-macro" version = "0.3.6" @@ -2122,6 +2374,17 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde_json" version = "1.0.145" @@ -2144,6 +2407,15 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_spanned" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776" +dependencies = [ + "serde_core", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -2156,6 +2428,15 @@ dependencies = [ "serde", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -2370,6 +2651,15 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "tinystr" version = "0.8.2" @@ -2473,6 +2763,21 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.9.9+spec-1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb5238e643fc34a1d5d7e753e1532a91912d74b63b92b3ea51fde8d1b7bc79dd" +dependencies = [ + "indexmap", + "serde_core", + "serde_spanned", + "toml_datetime", + "toml_parser", + "toml_writer", + "winnow", +] + [[package]] name = "toml_datetime" version = "0.7.4+spec-1.0.0" @@ -2503,6 +2808,12 @@ dependencies = [ "winnow", ] +[[package]] +name = "toml_writer" +version = "1.0.5+spec-1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9cd6190959dce0994aa8970cd32ab116d1851ead27e866039acaf2524ce44fa" + [[package]] name = "tower" version = "0.5.2" @@ -2577,6 +2888,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -2606,6 +2947,50 @@ dependencies = [ "yoke 0.7.5", ] +[[package]] +name = "ullm" +version = "0.0.9" +dependencies = [ + "anyhow", + "clap", + "dirs", + "futures-util", + "serde", + "tokio", + "toml", + "tracing", + "tracing-subscriber", + "ullm-core", + "ullm-deepseek", +] + +[[package]] +name = "ullm-core" +version = "0.0.9" +dependencies = [ + "anyhow", + "derive_more", + "futures-core", + "reqwest", + "schemars", + "serde", + "serde_json", +] + +[[package]] +name = "ullm-deepseek" +version = "0.0.9" +dependencies = [ + "anyhow", + "async-stream", + "futures-core", + "futures-util", + "serde", + "serde_json", + "tracing", + "ullm-core", +] + [[package]] name = "unicode-ident" version = "1.0.22" @@ -2633,6 +3018,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "unicode_categories" version = "0.1.1" @@ -2683,6 +3074,18 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index fe7fea1..6e8efad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace] resolver = "3" -members = ["legacy/*"] +members = ["crates/*", "llm/*", "legacy/*"] [workspace.package] version = "0.0.9" @@ -8,10 +8,15 @@ edition = "2024" authors = ["clearloop "] license = "MIT" repository = "https://github.com/clearloop/cydonia" +documentation = "https://cydonia.docs.rs" +keywords = ["llm", "agent", "ai"] [workspace.dependencies] model = { path = "legacy/model", package = "cydonia-model" } candle = { path = "crates/candle", package = "cydonia-candle" } +ucore = { path = "crates/core", package = "ullm-core" } +deepseek = { path = "llm/deepseek", package = "ullm-deepseek" } + # crates.io anyhow = "1" @@ -31,6 +36,7 @@ toml = "0.9.8" tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } + # legacy dependencies candle-core = "0.8.1" candle-nn = "0.8.1" @@ -40,5 +46,3 @@ llamac-sys = { version = "0.1.86", package = "llama-cpp-sys-2" } once_cell = "1.21" rand = "0.9.2" tokenizers = "0.21.0" - - diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 056703a..e10ef4b 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -15,3 +15,6 @@ serde.workspace = true futures-core.workspace = true reqwest.workspace = true schemars.workspace = true + +[dev-dependencies] +serde_json.workspace = true diff --git a/crates/core/src/chat.rs b/crates/core/src/chat.rs new file mode 100644 index 0000000..4b745fa --- /dev/null +++ b/crates/core/src/chat.rs @@ -0,0 +1,70 @@ +//! Chat abstractions for the unified LLM Interfaces + +use crate::{ + LLM, Response, Role, StreamChunk, + message::{AssistantMessage, Message, ToolMessage}, +}; +use anyhow::Result; +use futures_core::Stream; +use serde::Serialize; + +/// A chat for the LLM +pub struct Chat { + /// The chat configuration + pub config: P::ChatConfig, + + /// Chat history in memory + pub messages: Vec, + + /// The LLM provider + pub provider: P, +} + +impl Chat

{ + /// Send a message to the LLM + pub async fn send(&mut self, message: Message) -> Result { + self.messages.push(message.into()); + self.provider.send(&self.config, &self.messages).await + } + + /// Send a message to the LLM with streaming + pub fn stream(&mut self, message: Message) -> impl Stream> { + self.messages.push(message.into()); + self.provider.stream(&self.config, &self.messages) + } +} + +/// A chat message in memory +#[derive(Debug, Clone, Serialize)] +#[serde(untagged)] +pub enum ChatMessage { + /// A user message + User(Message), + + /// An assistant message + Assistant(AssistantMessage), + + /// A tool message + Tool(ToolMessage), + + /// A system message + System(Message), +} + +impl From for ChatMessage { + fn from(message: Message) -> Self { + match message.role { + Role::User => ChatMessage::User(message), + Role::Assistant => ChatMessage::Assistant(AssistantMessage { + message, + prefix: false, + reasoning: String::new(), + }), + Role::System => ChatMessage::System(message), + Role::Tool => ChatMessage::Tool(ToolMessage { + tool: String::new(), + message, + }), + } + } +} diff --git a/crates/core/src/config.rs b/crates/core/src/config.rs index 566f59c..6e43d7f 100644 --- a/crates/core/src/config.rs +++ b/crates/core/src/config.rs @@ -1,14 +1,11 @@ //! Configuration for a chat +use crate::{Tool, ToolChoice}; +use serde::{Deserialize, Serialize}; + /// Chat configuration -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct Config { - /// The model to use - pub model: &'static str, - - /// Whether to enable thinking - pub think: bool, - /// The frequency penalty of the model pub frequency: i8, @@ -18,15 +15,27 @@ pub struct Config { /// Whether to return the log probabilities pub logprobs: bool, + /// The model to use + pub model: String, + /// The presence penalty of the model pub presence: i8, - /// Whether to stream the response - pub stream: bool, + /// Stop sequences to halt generation + pub stop: Vec, /// The temperature of the model pub temperature: f32, + /// Whether to enable thinking + pub think: bool, + + /// Controls which tool is called by the model + pub tool_choice: ToolChoice, + + /// A list of tools the model may call + pub tools: Vec, + /// The top probability of the model pub top_p: f32, @@ -36,6 +45,61 @@ pub struct Config { /// The number of max tokens to generate pub tokens: usize, - /// Whether to return the usage information + /// Whether to return the usage information in stream mode pub usage: bool, } + +impl Config { + /// Create a new configuration + pub fn new(model: impl Into) -> Self { + Self { + model: model.into(), + ..Default::default() + } + } + + /// Add a tool to the configuration + pub fn tool(mut self, tool: Tool) -> Self { + self.tools.push(tool); + self + } + + /// Set tools for the configuration + pub fn tools(mut self, tools: Vec) -> Self { + self.tools = tools; + self + } + + /// Set the tool choice for the configuration + pub fn tool_choice(mut self, choice: ToolChoice) -> Self { + self.tool_choice = choice; + self + } + + /// Set stop sequences for the configuration + pub fn stop(mut self, sequences: Vec) -> Self { + self.stop = sequences; + self + } +} + +impl Default for Config { + fn default() -> Self { + Self { + frequency: 0, + json: false, + logprobs: false, + model: "deepseek-chat".into(), + presence: 0, + stop: Vec::new(), + temperature: 1.0, + think: false, + tool_choice: ToolChoice::None, + tools: Vec::new(), + top_logprobs: 0, + top_p: 1.0, + tokens: 1000, + usage: true, + } + } +} diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 5800ab1..1e2aa12 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -1,13 +1,25 @@ //! Core abstractions for Unified LLM Interface pub use { + chat::{Chat, ChatMessage}, config::Config, message::{Message, Role}, provider::LLM, - tool::Tool, + reqwest::{self, Client}, + response::{ + Choice, CompletionTokensDetails, FinishReason, LogProb, LogProbs, Response, + ResponseMessage, TopLogProb, Usage, + }, + stream::{Delta, StreamChoice, StreamChunk}, + template::Template, + tool::{FunctionCall, Tool, ToolCall, ToolChoice}, }; +mod chat; mod config; mod message; mod provider; +mod response; +mod stream; +mod template; mod tool; diff --git a/crates/core/src/message.rs b/crates/core/src/message.rs index 4b2e7cf..c9434a2 100644 --- a/crates/core/src/message.rs +++ b/crates/core/src/message.rs @@ -1,15 +1,18 @@ //! Turbofish LLM message -use derive_more::Display; +use serde::{Deserialize, Serialize}; /// A message in the chat -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct Message { - /// The role of the message - pub role: Role, - /// The content of the message pub content: String, + + /// The name of the message + pub name: String, + + /// The role of the message + pub role: Role, } impl Message { @@ -17,6 +20,7 @@ impl Message { pub fn system(content: impl Into) -> Self { Self { role: Role::System, + name: String::new(), content: content.into(), } } @@ -25,6 +29,7 @@ impl Message { pub fn user(content: impl Into) -> Self { Self { role: Role::User, + name: String::new(), content: content.into(), } } @@ -33,32 +38,52 @@ impl Message { pub fn assistant(content: impl Into) -> Self { Self { role: Role::Assistant, + name: String::new(), content: content.into(), } } +} - /// Create a new tool message - pub fn tool(content: impl Into) -> Self { - Self { - role: Role::Tool, - content: content.into(), - } - } +/// A tool message in the chat +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct ToolMessage { + /// The message + #[serde(flatten)] + pub message: Message, + + /// The tool call id + #[serde(alias = "tool_call_id")] + pub tool: String, +} + +/// An assistant message in the chat +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct AssistantMessage { + /// The message + #[serde(flatten)] + pub message: Message, + + /// Whether to prefix the message + pub prefix: bool, + + /// The reasoning content + #[serde(alias = "reasoning_content")] + pub reasoning: String, } /// The role of a message -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize, Serialize)] pub enum Role { /// The user role - #[display("user")] + #[serde(rename = "user")] User, /// The assistant role - #[display("assistant")] + #[serde(rename = "assistant")] Assistant, /// The system role - #[display("system")] + #[serde(rename = "system")] System, /// The tool role - #[display("tool")] + #[serde(rename = "tool")] Tool, } diff --git a/crates/core/src/provider.rs b/crates/core/src/provider.rs index f4aa5d3..6fbbe91 100644 --- a/crates/core/src/provider.rs +++ b/crates/core/src/provider.rs @@ -1,19 +1,40 @@ //! Provider abstractions for the unified LLM Interfaces -use crate::Message; +use crate::{Chat, ChatMessage, Config, Response, StreamChunk}; use anyhow::Result; +use futures_core::Stream; use reqwest::Client; /// A trait for LLM providers -pub trait LLM: Sized { - /// The model of the LLM - type Model; +pub trait LLM: Sized + Clone { + /// The chat configuration. + type ChatConfig: From; /// Create a new LLM provider - fn new(client: Client, key: &str, model: Self::Model) -> Result + fn new(client: Client, key: &str) -> Result where Self: Sized; + /// Create a new chat + fn chat(&self, config: Config) -> Chat { + Chat { + config: config.into(), + messages: Vec::new(), + provider: self.clone(), + } + } + /// Send a message to the LLM - fn send(&mut self, _message: Message) -> impl Future>; + fn send( + &mut self, + config: &Self::ChatConfig, + messages: &[ChatMessage], + ) -> impl Future>; + + /// Send a message to the LLM with streaming + fn stream( + &mut self, + config: &Self::ChatConfig, + messages: &[ChatMessage], + ) -> impl Stream>; } diff --git a/crates/core/src/response.rs b/crates/core/src/response.rs new file mode 100644 index 0000000..dce5960 --- /dev/null +++ b/crates/core/src/response.rs @@ -0,0 +1,165 @@ +//! Chat response abstractions for the unified LLM Interfaces + +use crate::{Role, tool::ToolCall}; +use serde::{Deserialize, Serialize}; + +/// A chat completion response from the LLM +#[derive(Debug, Clone, Deserialize)] +pub struct Response { + /// A unique identifier for the chat completion + pub id: String, + + /// The object type, always "chat.completion" + pub object: String, + + /// Unix timestamp (in seconds) of when the response was created + pub created: u64, + + /// The model used for the completion + pub model: String, + + /// Backend configuration identifier + pub system_fingerprint: Option, + + /// The list of completion choices + pub choices: Vec, + + /// Token usage statistics + pub usage: Usage, +} + +impl Response { + /// Get the first message from the response + pub fn message(&self) -> Option<&String> { + self.choices + .first() + .and_then(|choice| choice.message.content.as_ref()) + } + + /// Get the first message from the response + pub fn reasoning(&self) -> Option<&String> { + self.choices + .first() + .and_then(|choice| choice.message.reasoning_content.as_ref()) + } +} + +/// A completion choice in a non-streaming response +#[derive(Debug, Clone, Deserialize)] +pub struct Choice { + /// The index of this choice in the list + pub index: u32, + + /// The generated message + pub message: ResponseMessage, + + /// The reason the model stopped generating + pub finish_reason: Option, + + /// Log probability information + pub logprobs: Option, +} + +/// A message in the response +#[derive(Debug, Clone, Deserialize)] +pub struct ResponseMessage { + /// The role of the message author (always "assistant") + pub role: Role, + + /// The content of the message + pub content: Option, + + /// The reasoning content (for deepseek-reasoner model) + pub reasoning_content: Option, + + /// Tool calls made by the model + pub tool_calls: Option>, +} + +/// The reason the model stopped generating +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum FinishReason { + /// The model finished naturally + Stop, + + /// The model hit the max token limit + Length, + + /// Content was filtered + ContentFilter, + + /// The model is making tool calls + ToolCalls, + + /// Insufficient system resources + InsufficientSystemResource, +} + +/// Token usage statistics +#[derive(Debug, Clone, Deserialize)] +pub struct Usage { + /// Number of tokens in the prompt + pub prompt_tokens: u32, + + /// Number of tokens in the completion + pub completion_tokens: u32, + + /// Total number of tokens used + pub total_tokens: u32, + + /// Number of prompt tokens from cache hits + pub prompt_cache_hit_tokens: Option, + + /// Number of prompt tokens not in cache + pub prompt_cache_miss_tokens: Option, + + /// Detailed breakdown of completion tokens + pub completion_tokens_details: Option, +} + +/// Detailed breakdown of completion tokens +#[derive(Debug, Clone, Deserialize)] +pub struct CompletionTokensDetails { + /// Number of tokens used for reasoning + pub reasoning_tokens: Option, +} + +/// Log probability information +#[derive(Debug, Clone, Deserialize)] +pub struct LogProbs { + /// Log probabilities for each token + pub content: Option>, +} + +/// Log probability for a single token +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct LogProb { + /// The token string + pub token: String, + + /// The log probability of this token + pub logprob: f64, + + /// Byte representation of the token + #[serde(skip_serializing_if = "Option::is_none")] + pub bytes: Option>, + + /// Top log probabilities for this position + #[serde(skip_serializing_if = "Option::is_none")] + pub top_logprobs: Option>, +} + +/// Top log probability entry +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct TopLogProb { + /// The token string + pub token: String, + + /// The log probability + pub logprob: f64, + + /// Byte representation of the token + #[serde(skip_serializing_if = "Option::is_none")] + pub bytes: Option>, +} diff --git a/crates/core/src/stream.rs b/crates/core/src/stream.rs new file mode 100644 index 0000000..5a744e1 --- /dev/null +++ b/crates/core/src/stream.rs @@ -0,0 +1,77 @@ +//! Streaming response abstractions for the unified LLM Interfaces + +use crate::{Role, tool::ToolCall}; +use serde::Deserialize; + +/// A streaming chat completion chunk +#[derive(Debug, Clone, Deserialize)] +pub struct StreamChunk { + /// A unique identifier for the chat completion + pub id: String, + + /// The object type, always "chat.completion.chunk" + pub object: String, + + /// Unix timestamp (in seconds) of when the chunk was created + pub created: u64, + + /// The model used for the completion + pub model: String, + + /// Backend configuration identifier + pub system_fingerprint: Option, + + /// The list of completion choices (with delta content) + pub choices: Vec, + + /// Token usage statistics (only in final chunk) + pub usage: Option, +} + +impl StreamChunk { + /// Get the content of the first choice + pub fn content(&self) -> Option<&str> { + self.choices + .first() + .and_then(|choice| choice.delta.content.as_deref()) + } + + /// Get the reasoning content of the first choice + pub fn reasoning_content(&self) -> Option<&str> { + self.choices + .first() + .and_then(|choice| choice.delta.reasoning_content.as_deref()) + } +} + +/// A completion choice in a streaming response +#[derive(Debug, Clone, Deserialize)] +pub struct StreamChoice { + /// The index of this choice in the list + pub index: u32, + + /// The delta content for this chunk + pub delta: Delta, + + /// The reason the model stopped generating + pub finish_reason: Option, + + /// Log probability information + pub logprobs: Option, +} + +/// Delta content in a streaming response +#[derive(Debug, Clone, Deserialize)] +pub struct Delta { + /// The role of the message author + pub role: Option, + + /// The content delta + pub content: Option, + + /// The reasoning content delta (for deepseek-reasoner model) + pub reasoning_content: Option, + + /// Tool calls delta + pub tool_calls: Option>, +} diff --git a/crates/core/src/template.rs b/crates/core/src/template.rs new file mode 100644 index 0000000..5510338 --- /dev/null +++ b/crates/core/src/template.rs @@ -0,0 +1,37 @@ +//! Turbofish Agent library + +use crate::{Message, Role}; +use serde::{Deserialize, Serialize}; + +/// A template of the system prompt +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Template { + /// The system prompt for the agent + pub system: String, + + /// The input example + pub input: String, + + /// The output json example + pub output: String, +} + +impl Template { + /// Create a new message from the template + pub fn message(&self) -> Message { + Message { + content: format!( + r#"{} + + EXAMPLE INPUT: + {} + + EXAMPLE JSON OUTPUT: + {}"#, + self.system, self.input, self.output + ), + name: String::new(), + role: Role::System, + } + } +} diff --git a/crates/core/src/tool.rs b/crates/core/src/tool.rs index 6ad3426..c36a607 100644 --- a/crates/core/src/tool.rs +++ b/crates/core/src/tool.rs @@ -1,14 +1,16 @@ //! Tool abstractions for the unified LLM Interfaces use schemars::Schema; +use serde::{Deserialize, Serialize}; /// A tool for the LLM +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct Tool { /// The name of the tool - pub name: &'static str, + pub name: String, /// The description of the tool - pub description: &'static str, + pub description: String, /// The parameters of the tool pub parameters: Schema, @@ -16,3 +18,66 @@ pub struct Tool { /// Whether to strictly validate the parameters pub strict: bool, } + +/// A tool call made by the model +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct ToolCall { + /// The ID of the tool call + pub id: String, + + /// The type of tool (currently only "function") + #[serde(rename = "type")] + pub call_type: String, + + /// The function to call + pub function: FunctionCall, +} + +/// A function call within a tool call +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct FunctionCall { + /// The name of the function to call + pub name: String, + + /// The arguments to pass to the function (JSON string) + pub arguments: String, +} + +/// Controls which tool is called by the model +#[derive(Debug, Clone, Deserialize, Serialize)] +pub enum ToolChoice { + /// Model will not call any tool + #[serde(rename = "none")] + None, + + /// Model can pick between generating a message or calling tools + #[serde(rename = "auto")] + Auto, + + /// Model must call one or more tools + #[serde(rename = "required")] + Required, + + /// Model must call the specified function + Function { + r#type: String, + function: ToolChoiceFunction, + }, +} + +/// A specific function to call +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct ToolChoiceFunction { + /// The name of the function to call + pub name: String, +} + +impl ToolChoice { + /// Create a tool choice for a specific function + pub fn function(name: impl Into) -> Self { + ToolChoice::Function { + r#type: "function".into(), + function: ToolChoiceFunction { name: name.into() }, + } + } +} diff --git a/crates/core/templates/deepseek/response.json b/crates/core/templates/deepseek/response.json new file mode 100644 index 0000000..7b7d3b8 --- /dev/null +++ b/crates/core/templates/deepseek/response.json @@ -0,0 +1,68 @@ +{ + "id": "string", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": "string", + "reasoning_content": "string", + "tool_calls": [ + { + "id": "string", + "type": "function", + "function": { + "name": "string", + "arguments": "string" + } + } + ], + "role": "assistant" + }, + "logprobs": { + "content": [ + { + "token": "string", + "logprob": 0, + "bytes": [0], + "top_logprobs": [ + { + "token": "string", + "logprob": 0, + "bytes": [0] + } + ] + } + ], + "reasoning_content": [ + { + "token": "string", + "logprob": 0, + "bytes": [0], + "top_logprobs": [ + { + "token": "string", + "logprob": 0, + "bytes": [0] + } + ] + } + ] + } + } + ], + "created": 0, + "model": "string", + "system_fingerprint": "string", + "object": "chat.completion", + "usage": { + "completion_tokens": 0, + "prompt_tokens": 0, + "prompt_cache_hit_tokens": 0, + "prompt_cache_miss_tokens": 0, + "total_tokens": 0, + "completion_tokens_details": { + "reasoning_tokens": 0 + } + } +} diff --git a/crates/core/templates/deepseek/stream.json b/crates/core/templates/deepseek/stream.json new file mode 100644 index 0000000..9198e9b --- /dev/null +++ b/crates/core/templates/deepseek/stream.json @@ -0,0 +1,183 @@ +[ + { + "id": "1f633d8bfc032625086f14113c411638", + "choices": [ + { + "index": 0, + "delta": { "content": "", "role": "assistant" }, + "finish_reason": null, + "logprobs": null + } + ], + "created": 1718345013, + "model": "deepseek-chat", + "system_fingerprint": "fp_a49d71b8a1", + "object": "chat.completion.chunk", + "usage": null + }, + + { + "choices": [ + { + "delta": { "content": "Hello", "role": "assistant" }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1718345013, + "id": "1f633d8bfc032625086f14113c411638", + "model": "deepseek-chat", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_a49d71b8a1" + }, + + { + "choices": [ + { + "delta": { "content": "!", "role": "assistant" }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1718345013, + "id": "1f633d8bfc032625086f14113c411638", + "model": "deepseek-chat", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_a49d71b8a1" + }, + + { + "choices": [ + { + "delta": { "content": " How", "role": "assistant" }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1718345013, + "id": "1f633d8bfc032625086f14113c411638", + "model": "deepseek-chat", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_a49d71b8a1" + }, + + { + "choices": [ + { + "delta": { "content": " can", "role": "assistant" }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1718345013, + "id": "1f633d8bfc032625086f14113c411638", + "model": "deepseek-chat", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_a49d71b8a1" + }, + + { + "choices": [ + { + "delta": { "content": " I", "role": "assistant" }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1718345013, + "id": "1f633d8bfc032625086f14113c411638", + "model": "deepseek-chat", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_a49d71b8a1" + }, + + { + "choices": [ + { + "delta": { "content": " assist", "role": "assistant" }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1718345013, + "id": "1f633d8bfc032625086f14113c411638", + "model": "deepseek-chat", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_a49d71b8a1" + }, + + { + "choices": [ + { + "delta": { "content": " you", "role": "assistant" }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1718345013, + "id": "1f633d8bfc032625086f14113c411638", + "model": "deepseek-chat", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_a49d71b8a1" + }, + + { + "choices": [ + { + "delta": { "content": " today", "role": "assistant" }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1718345013, + "id": "1f633d8bfc032625086f14113c411638", + "model": "deepseek-chat", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_a49d71b8a1" + }, + + { + "choices": [ + { + "delta": { "content": "?", "role": "assistant" }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1718345013, + "id": "1f633d8bfc032625086f14113c411638", + "model": "deepseek-chat", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_a49d71b8a1" + }, + + { + "choices": [ + { + "delta": { "content": "", "role": null }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1718345013, + "id": "1f633d8bfc032625086f14113c411638", + "model": "deepseek-chat", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_a49d71b8a1", + "usage": { + "completion_tokens": 9, + "prompt_tokens": 17, + "total_tokens": 26 + } + } +] diff --git a/crates/core/tests/response.rs b/crates/core/tests/response.rs new file mode 100644 index 0000000..607b42f --- /dev/null +++ b/crates/core/tests/response.rs @@ -0,0 +1,17 @@ +//! Tests for the response module + +use ullm_core::{Response, StreamChunk}; + +const DEEPSEEK_RESPONSE_JSON: &str = include_str!("../templates/deepseek/response.json"); +const DEEPSEEK_STREAM_CHUNK_JSON: &str = include_str!("../templates/deepseek/stream.json"); + +#[test] +fn parse_response() { + let _response: Response = serde_json::from_str(DEEPSEEK_RESPONSE_JSON).unwrap(); +} + +#[test] +fn parse_stream_chunk() { + let _stream_chunks: Vec = + serde_json::from_str(DEEPSEEK_STREAM_CHUNK_JSON).unwrap(); +} diff --git a/crates/ullm/Cargo.toml b/crates/ullm/Cargo.toml index f30afae..424190d 100644 --- a/crates/ullm/Cargo.toml +++ b/crates/ullm/Cargo.toml @@ -9,3 +9,16 @@ documentation.workspace = true keywords.workspace = true [dependencies] +deepseek.workspace = true +ucore.workspace = true + +# crates-io dependencies +anyhow.workspace = true +clap.workspace = true +dirs.workspace = true +futures-util.workspace = true +serde.workspace = true +tokio.workspace = true +toml.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true diff --git a/crates/ullm/src/bin/ullm.rs b/crates/ullm/src/bin/ullm.rs new file mode 100644 index 0000000..6aef0fa --- /dev/null +++ b/crates/ullm/src/bin/ullm.rs @@ -0,0 +1,16 @@ +use anyhow::Result; +use clap::Parser; +use ullm::cmd::{App, Command, Config}; + +#[tokio::main] +async fn main() -> Result<()> { + let app = App::parse(); + app.init_tracing(); + + match app.command { + Command::Chat(chat) => chat.run(app.stream).await?, + Command::Generate => Config::default().save()?, + } + + Ok(()) +} diff --git a/crates/ullm/src/cmd/chat.rs b/crates/ullm/src/cmd/chat.rs new file mode 100644 index 0000000..2f27c3d --- /dev/null +++ b/crates/ullm/src/cmd/chat.rs @@ -0,0 +1,127 @@ +//! Chat command + +use super::Config; +use crate::DeepSeek; +use anyhow::Result; +use clap::{Args, ValueEnum}; +use futures_util::StreamExt; +use std::{ + fmt::{Display, Formatter}, + io::{BufRead, Write}, +}; +use ucore::{Chat, Client, LLM, Message}; + +/// Chat command arguments +#[derive(Debug, Args)] +pub struct ChatCmd { + /// The model provider to use + #[arg(short, long, default_value = "deepseek")] + pub model: Model, + + /// The message to send (if empty, starts interactive mode) + pub message: Option, +} + +impl ChatCmd { + /// Run the chat command + pub async fn run(&self, stream: bool) -> Result<()> { + let config = Config::load()?; + let key = config + .key + .get(&self.model.to_string()) + .ok_or_else(|| anyhow::anyhow!("missing {:?} API key in config", self.model))?; + let provider = match self.model { + Model::Deepseek => DeepSeek::new(Client::new(), key)?, + }; + + let mut chat = provider.chat(config.config().clone()); + if let Some(msg) = &self.message { + Self::send(&mut chat, Message::user(msg), stream).await?; + } else { + let stdin = std::io::stdin(); + let mut stdout = std::io::stdout(); + loop { + print!("> "); + stdout.flush()?; + + let mut input = String::new(); + if stdin.lock().read_line(&mut input)? == 0 { + break; + } + + let input = input.trim(); + if input.is_empty() { + continue; + } + if input == "/quit" || input == "/exit" { + break; + } + + Self::send(&mut chat, Message::user(input), stream).await?; + } + } + + Ok(()) + } + + async fn send(chat: &mut Chat, message: Message, stream: bool) -> Result<()> { + if stream { + let mut response_content = String::new(); + { + let mut reasoning = false; + let mut stream = std::pin::pin!(chat.stream(message)); + while let Some(chunk) = stream.next().await { + let chunk = chunk?; + if let Some(content) = chunk.content() { + if reasoning { + print!("\ncontent: "); + reasoning = false; + } + print!("{content}"); + response_content.push_str(content); + } + + if let Some(reasoning_content) = chunk.reasoning_content() { + if !reasoning { + print!("thinking: "); + reasoning = true; + } + print!("{reasoning_content}"); + response_content.push_str(reasoning_content); + } + } + } + println!(); + chat.messages + .push(Message::assistant(&response_content).into()); + } else { + let response = chat.send(message).await?; + if let Some(reasoning_content) = response.reasoning() { + println!("reasoning: {reasoning_content}"); + } + + if let Some(content) = response.message() { + println!("{content}"); + } + chat.messages + .push(Message::assistant(response.message().unwrap_or(&String::new())).into()); + } + Ok(()) + } +} + +/// Available model providers +#[derive(Debug, Clone, Copy, Default, ValueEnum)] +pub enum Model { + /// DeepSeek model + #[default] + Deepseek, +} + +impl Display for Model { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Model::Deepseek => write!(f, "deepseek"), + } + } +} diff --git a/crates/ullm/src/cmd/config.rs b/crates/ullm/src/cmd/config.rs new file mode 100644 index 0000000..5935d52 --- /dev/null +++ b/crates/ullm/src/cmd/config.rs @@ -0,0 +1,48 @@ +//! Configuration for the CLI + +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use std::{collections::BTreeMap, path::PathBuf, sync::LazyLock}; + +static CONFIG: LazyLock = + LazyLock::new(|| dirs::home_dir().unwrap().join(".config/ullm.toml")); + +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct Config { + /// The configuration for the CLI + config: ucore::Config, + + /// The API keys for LLMs + pub key: BTreeMap, +} + +impl Config { + /// Load the configuration from the file + pub fn load() -> Result { + let config = toml::from_str(&std::fs::read_to_string(CONFIG.as_path())?)?; + Ok(config) + } + + /// Save the configuration to the file + pub fn save(&self) -> Result<()> { + std::fs::write(CONFIG.as_path(), toml::to_string(self)?)?; + tracing::info!("Configuration saved to {}", CONFIG.display()); + Ok(()) + } + + /// Get the core config + pub fn config(&self) -> &ucore::Config { + &self.config + } +} + +impl Default for Config { + fn default() -> Self { + Self { + config: ucore::Config::default(), + key: [("deepseek".to_string(), "YOUR_API_KEY".to_string())] + .into_iter() + .collect::<_>(), + } + } +} diff --git a/crates/ullm/src/cmd/mod.rs b/crates/ullm/src/cmd/mod.rs new file mode 100644 index 0000000..5fcd4df --- /dev/null +++ b/crates/ullm/src/cmd/mod.rs @@ -0,0 +1,57 @@ +//! CLI commands for ullm + +mod chat; +mod config; + +use clap::{Parser, Subcommand}; +use tracing_subscriber::{EnvFilter, fmt}; +pub use {chat::ChatCmd, config::Config}; + +/// Unified LLM Interface CLI +#[derive(Debug, Parser)] +#[command(name = "ullm", version, about)] +pub struct App { + /// Enable streaming mode + #[arg(short, long, global = true)] + pub stream: bool, + + /// Verbosity level (use -v, -vv, -vvv, etc.) + #[arg(short, long, action = clap::ArgAction::Count, global = true)] + pub verbose: u8, + + /// Subcommand to run + #[command(subcommand)] + pub command: Command, +} + +/// Available commands +#[derive(Debug, Subcommand)] +pub enum Command { + /// Chat with an LLM + Chat(chat::ChatCmd), + + /// Generate the configuration file + Generate, +} + +impl App { + /// Initialize tracing subscriber based on verbosity + pub fn init_tracing(&self) { + let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| { + let directive = match self.verbose { + 0 => "info", + 1 => "ullm=debug", + 2 => "ullm=trace", + 3 => "debug", + _ => "trace", + }; + EnvFilter::new(directive) + }); + + fmt() + .without_time() + .with_env_filter(filter) + .with_target(self.verbose != 0) + .init(); + } +} diff --git a/crates/ullm/src/lib.rs b/crates/ullm/src/lib.rs index c0c6a6b..ea218b1 100644 --- a/crates/ullm/src/lib.rs +++ b/crates/ullm/src/lib.rs @@ -1,2 +1,6 @@ //! Unified LLM Interface +pub mod cmd; + +pub use deepseek::DeepSeek; +pub use ucore::{Chat, ChatMessage, Client, Config, LLM, Message, Response, StreamChunk}; diff --git a/llm/deepseek/Cargo.toml b/llm/deepseek/Cargo.toml index 0339847..613940f 100644 --- a/llm/deepseek/Cargo.toml +++ b/llm/deepseek/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "deepseek" +name = "ullm-deepseek" version.workspace = true edition.workspace = true authors.workspace = true @@ -13,3 +13,9 @@ ucore.workspace = true # crates-io dependencies anyhow.workspace = true +async-stream.workspace = true +futures-core.workspace = true +futures-util.workspace = true +serde.workspace = true +serde_json.workspace = true +tracing.workspace = true diff --git a/llm/deepseek/src/lib.rs b/llm/deepseek/src/lib.rs index 8b13789..c4c9461 100644 --- a/llm/deepseek/src/lib.rs +++ b/llm/deepseek/src/lib.rs @@ -1 +1,18 @@ +//! The LLM provider +pub use request::Request; +use ucore::{Client, reqwest::header::HeaderMap}; + +mod llm; +mod request; + +/// The DeepSeek LLM provider + +#[derive(Clone)] +pub struct DeepSeek { + /// The HTTP client + pub client: Client, + + /// The request headers + headers: HeaderMap, +} diff --git a/llm/deepseek/src/llm.rs b/llm/deepseek/src/llm.rs new file mode 100644 index 0000000..c2bc608 --- /dev/null +++ b/llm/deepseek/src/llm.rs @@ -0,0 +1,90 @@ +//! The LLM implementation + +use crate::{DeepSeek, Request}; +use anyhow::Result; +use async_stream::try_stream; +use futures_core::Stream; +use futures_util::StreamExt; +use ucore::{ + Chat, ChatMessage, Client, Config, LLM, Response, StreamChunk, + reqwest::{ + Method, + header::{self, HeaderMap}, + }, +}; + +const ENDPOINT: &str = "https://api.deepseek.com/chat/completions"; + +impl LLM for DeepSeek { + /// The chat configuration. + type ChatConfig = Config; + + /// Create a new LLM provider + fn new(client: Client, key: &str) -> Result { + let mut headers = HeaderMap::new(); + headers.insert(header::CONTENT_TYPE, "application/json".parse()?); + headers.insert(header::ACCEPT, "application/json".parse()?); + headers.insert(header::AUTHORIZATION, format!("Bearer {}", key).parse()?); + Ok(Self { client, headers }) + } + + /// Create a new chat + fn chat(&self, config: Config) -> Chat { + Chat { + config, + messages: Vec::new(), + provider: self.clone(), + } + } + + /// Send a message to the LLM + async fn send(&mut self, config: &Config, messages: &[ChatMessage]) -> Result { + let text = self + .client + .request(Method::POST, ENDPOINT) + .headers(self.headers.clone()) + .json(&Request::from(config).messages(messages)) + .send() + .await? + .text() + .await?; + + serde_json::from_str(&text).map_err(Into::into) + // self.client + // .request(Method::POST, ENDPOINT) + // .headers(self.headers.clone()) + // .json(&Request::from(config).messages(messages)) + // .send() + // .await? + // .json::() + // .await + // .map_err(Into::into) + } + + /// Send a message to the LLM with streaming + fn stream( + &mut self, + config: &Config, + messages: &[ChatMessage], + ) -> impl Stream> { + let request = self + .client + .request(Method::POST, ENDPOINT) + .headers(self.headers.clone()) + .json( + &Request::from(config) + .messages(messages) + .stream(config.usage), + ); + + try_stream! { + let mut stream = request.send().await?.bytes_stream(); + while let Some(chunk) = stream.next().await { + let text = String::from_utf8_lossy(&chunk?).into_owned(); + for data in text.split("data: ").skip(1).filter(|s| !s.starts_with("[DONE]")) { + yield serde_json::from_str(data.trim())?; + } + } + } + } +} diff --git a/llm/deepseek/src/request.rs b/llm/deepseek/src/request.rs new file mode 100644 index 0000000..f428ec3 --- /dev/null +++ b/llm/deepseek/src/request.rs @@ -0,0 +1,156 @@ +//! The request body for the DeepSeek API + +use serde::Serialize; +use serde_json::{Number, Value, json}; +use ucore::{ChatMessage, Config, Tool}; + +/// The request body for the DeepSeek API +#[derive(Debug, Clone, Serialize)] +pub struct Request { + /// The frequency penalty to use for the response + #[serde(skip_serializing_if = "Value::is_null")] + pub frequency_penalty: Value, + + /// Whether to return the log probabilities + #[serde(skip_serializing_if = "Value::is_null")] + pub logprobs: Value, + + /// The maximum number of tokens to generate + pub max_tokens: usize, + + /// The messages to send to the API + pub messages: Vec, + + /// The model we are using + pub model: String, + + /// The presence penalty to use for the response + #[serde(skip_serializing_if = "Value::is_null")] + pub presence_penalty: Value, + + /// The response format to use + #[serde(skip_serializing_if = "Value::is_null")] + pub response_format: Value, + + /// Stop sequences + #[serde(skip_serializing_if = "Value::is_null")] + pub stop: Value, + + /// Whether to stream the response + pub stream: bool, + + /// Stream options + #[serde(skip_serializing_if = "Value::is_null")] + pub stream_options: Value, + + /// Whether to enable thinking + #[serde(skip_serializing_if = "Value::is_null")] + pub thinking: Value, + + /// The temperature to use for the response + #[serde(skip_serializing_if = "Value::is_null")] + pub temperature: Value, + + /// Controls which (if any) tool is called by the model + #[serde(skip_serializing_if = "Value::is_null")] + pub tool_choice: Value, + + /// A list of tools the model may call + #[serde(skip_serializing_if = "Value::is_null")] + pub tools: Value, + + /// An integer between 0 and 20 specifying the number of most likely tokens to + /// return at each token position, each with an associated log probability. + #[serde(skip_serializing_if = "Value::is_null")] + pub top_logprobs: Value, + + /// The top probability to use for the response + #[serde(skip_serializing_if = "Value::is_null")] + pub top_p: Value, +} + +impl Request { + /// Construct the messages for the request + pub fn messages(&mut self, messages: &[ChatMessage]) -> Self { + Self { + messages: messages.to_vec(), + ..self.clone() + } + } + + /// Enable streaming for the request + pub fn stream(mut self, usage: bool) -> Self { + self.stream = true; + self.stream_options = if usage { + json!({ "include_usage": true }) + } else { + Value::Null + }; + self + } +} + +impl From<&Config> for Request { + fn from(config: &Config) -> Self { + Self { + frequency_penalty: Number::from_f64(config.frequency as f64) + .map(Value::Number) + .unwrap_or(Value::Null), + logprobs: if config.logprobs { + Value::Bool(true) + } else { + Value::Null + }, + max_tokens: config.tokens, + messages: Vec::new(), + model: config.model.clone(), + presence_penalty: Number::from_f64(config.presence as f64) + .map(Value::Number) + .unwrap_or(Value::Null), + response_format: if config.json { + json!({ "type": "json_object" }) + } else { + Value::Null + }, + stop: if config.stop.is_empty() { + Value::Null + } else { + config.stop.iter().map(|s| json!(s)).collect() + }, + stream: false, + stream_options: Value::Null, + temperature: Number::from_f64(config.temperature as f64) + .map(Value::Number) + .unwrap_or(Value::Null), + thinking: if config.think { + json!({ "type": "enabled" }) + } else { + Value::Null + }, + tool_choice: serde_json::to_value(&config.tool_choice).unwrap_or(Value::Null), + tools: serialize_tools(&config.tools), + top_logprobs: if config.logprobs { + Value::Number(config.top_logprobs.into()) + } else { + Value::Null + }, + top_p: Number::from_f64(config.top_p as f64) + .map(Value::Number) + .unwrap_or(Value::Null), + } + } +} + +/// Serialize tools to JSON value +fn serialize_tools(tools: &[Tool]) -> Value { + if tools.is_empty() { + return Value::Null; + } + + let tools: Vec = tools + .iter() + .map(|tool| json!({ "type": "function", "function": tool })) + .collect(); + + Value::Array(tools) +}