. prep for v0.1.3

2025-08-22 16:25:27 +00:00 · 2024-07-18 10:46:09 -07:00
parent 5dac587749
commit 803df63924
4 changed files with 72 additions and 52 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,25 @@
 `.` minor | `-` Fix | `+` Addition | `^` improvement | `!` Change | `*` Refactor

+> **IMPORTANT:** `0.1.x` will still have some breaking changes in patches.
+> - Make sure to **lock** your version, e.g., `genai = "=0.1.3"`.
+> - Version `0.2.x` will follow semver more strictly.
+> - API changes will be denoted as "`!` - **API CHANGE** ...."
+
+## 2024-07-18 - `0.1.3`
+
+- `!` **error refactor** - added `genai::adapter::Error` and `genai::resolver::Error`, and updated `genai::Error` with appropriate `Froms`
+- `+` **Added token usage** for ALL adapters/providers - `ChatResponse.usage` and `ChatRequestOption` `.capture_usage`/`.capture_content` (for streaming) support for all Adapters (see note in Readme for Ollama for streaming)
+- `!` **API CHANGE**: `ClientConfig::with_chat_request_options` (was `with_default_chat_request_options`)
+- `!` **API CHANGE**: `PrintChatStreamOptions::from_print_events` (was `from_stream_events`)
+- `^` `AdapterKind` - added `as_str` and `as_lower_str`
+- `^` `ChatRequest` - added `.iter_systems()` and `.combine_systems()` (includes eventual `chat_req.system` as part of the system messages)
+- `!` **API CHANGE**: `Client::all_model_names(..)` (was `Client::list_models(..)`) 
+- `^` **groq** - add gemma2-9b-it to the list of Groq models
+- `!` **API CHANGE**: `genai::Client` (was `genai::client::Client`, same for `ClientBuilder` `ClientConfig`)
+- `-` **groq** - remove groq whisper model from list_models as it is not a chat completion model
+- `^` **ollama** - implement live list_models for ollama
+- `!` Makes AdapterDispatcher crate only (should be internal only)
+
 ## 2024-07-08 - `0.1.2`

 - `+` `ChatRequestOptions` - added `temperature`, `max_tokens`, `top_p` for all adapters (see readme for property mapping). 
--- a/README.md
+++ b/README.md
@ -12,14 +12,14 @@ Currently supports natively: **Ollama**, **OpenAI**, **Gemini**, **Anthropic**,

 ```toml
 # cargo.toml
-genai = "=0.1.2" # Version lock for `0.1.x`
+genai = "=0.1.3" # Version lock for `0.1.x`
 ```

 <br />

 The goal of this library is to provide a common and ergonomic single API to many generative AI Providers, such as OpenAI, Anthropic, Cohere, Ollama.

- **IMPORTANT 1** `0.1.x` will still have some breaking changes in patches, so make sure to **lock** your version, e.g., `genai = "=0.1.2"`. In short, `0.1.x` can be considered "beta releases." Version `0.2.x` will follow semver more strictly.
+- **IMPORTANT 1** `0.1.x` will still have some breaking changes in patches, so make sure to **lock** your version, e.g., `genai = "=0.1.3"`. In short, `0.1.x` can be considered "beta releases." Version `0.2.x` will follow semver more strictly.

 - **IMPORTANT 2** `genai` is focused on normalizing chat completion APIs across AI providers and is not intended to be a full representation of a given AI provider. For this, there are excellent libraries such as [async-openai](https://crates.io/search?q=async-openai) for OpenAI and [ollama-rs](https://crates.io/crates/ollama-rs) for Ollama.

@ -38,18 +38,19 @@ const MODEL_OPENAI: &str = "gpt-3.5-turbo";
 const MODEL_ANTHROPIC: &str = "claude-3-haiku-20240307";
 const MODEL_COHERE: &str = "command-light";
 const MODEL_GEMINI: &str = "gemini-1.5-flash-latest";
-const MODEL_GROQ: &str = "llama3-8b-8192";
-const MODEL_OLLAMA: &str = "mixtral";
+const MODEL_GROQ: &str = "gemma-7b-it";
+const MODEL_OLLAMA: &str = "gemma:2b"; // sh: `ollama pull gemma:2b`

-// NOTE: Those are the default env keys for each AI Provider type.
+// NOTE: Those are the default environment keys for each AI Adapter Type.
+//       Can be customized, see `examples/c02-auth.rs`
 const MODEL_AND_KEY_ENV_NAME_LIST: &[(&str, &str)] = &[
-    // -- de/activate models/providers
-    (MODEL_OPENAI, "OPENAI_API_KEY"),
-    (MODEL_ANTHROPIC, "ANTHROPIC_API_KEY"),
-    (MODEL_COHERE, "COHERE_API_KEY"),
-    (MODEL_GEMINI, "GEMINI_API_KEY"),
-    (MODEL_GROQ, "GROQ_API_KEY"),
-    (MODEL_OLLAMA, ""),
+	// -- de/activate models/providers
+	(MODEL_OPENAI, "OPENAI_API_KEY"),
+	(MODEL_ANTHROPIC, "ANTHROPIC_API_KEY"),
+	(MODEL_COHERE, "COHERE_API_KEY"),
+	(MODEL_GEMINI, "GEMINI_API_KEY"),
+	(MODEL_GROQ, "GROQ_API_KEY"),
+	(MODEL_OLLAMA, ""),
 ];

 // NOTE: Model to AdapterKind (AI Provider) type mapping rule
@ -60,47 +61,47 @@ const MODEL_AND_KEY_ENV_NAME_LIST: &[(&str, &str)] = &[
 //  - model in Groq models   -> Groq
 //  - For anything else      -> Ollama
 //
-// Refined mapping rules will be added later and extended as provider support grows.
+// Can be customized, see `examples/c03-kind.rs`

 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let question = "Why is the sky red?";
+	let question = "Why is the sky red?";

-    let chat_req = ChatRequest::new(vec![
-        // -- Messages (de/activate to see the differences)
-        ChatMessage::system("Answer in one sentence"),
-        ChatMessage::user(question),
-    ]);
+	let chat_req = ChatRequest::new(vec![
+		// -- Messages (de/activate to see the differences)
+		ChatMessage::system("Answer in one sentence"),
+		ChatMessage::user(question),
+	]);

-    let client = Client::default();
+	let client = Client::default();

-    let print_options = PrintChatStreamOptions::from_stream_events(false);
+	let print_options = PrintChatStreamOptions::from_print_events(false);

-    for (model, env_name) in MODEL_AND_KEY_ENV_NAME_LIST {
-        // Skip if does not have the environment name set
-        if !env_name.is_empty() && std::env::var(env_name).is_err() {
-            println!("===== Skipping model: {model} (env var not set: {env_name})");
-            continue;
-        }
+	for (model, env_name) in MODEL_AND_KEY_ENV_NAME_LIST {
+		// Skip if does not have the environment name set
+		if !env_name.is_empty() && std::env::var(env_name).is_err() {
+			println!("===== Skipping model: {model} (env var not set: {env_name})");
+			continue;
+		}

-        let adapter_kind = client.resolve_adapter_kind(model)?;
+		let adapter_kind = client.resolve_adapter_kind(model)?;

-        println!("\n===== MODEL: {model} ({adapter_kind}) =====");
+		println!("\n===== MODEL: {model} ({adapter_kind}) =====");

-        println!("\n--- Question:\n{question}");
+		println!("\n--- Question:\n{question}");

-        println!("\n--- Answer:");
-        let chat_res = client.exec_chat(model, chat_req.clone(), None).await?;
-        println!("{}", chat_res.content.as_deref().unwrap_or("NO ANSWER"));
+		println!("\n--- Answer:");
+		let chat_res = client.exec_chat(model, chat_req.clone(), None).await?;
+		println!("{}", chat_res.content.as_deref().unwrap_or("NO ANSWER"));

-        println!("\n--- Answer: (streaming)");
-        let chat_res = client.exec_chat_stream(model, chat_req.clone(), None).await?;
-        print_chat_stream(chat_res, Some(&print_options)).await?;
+		println!("\n--- Answer: (streaming)");
+		let chat_res = client.exec_chat_stream(model, chat_req.clone(), None).await?;
+		print_chat_stream(chat_res, Some(&print_options)).await?;

-        println!();
-    }
+		println!();
+	}

-    Ok(())
+	Ok(())
 }
 ```

@ -143,13 +144,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {

 ## MetaUsage

-_NoImpl_ - Not Implemented Yet
-
-| Property        | OpenAI `usage.`     | Ollama `usage.`         | Groq `x_groq.usage.` | Anthropic               | Gemini `usageMetadata.`    | Cohere `meta.tokens.` |
-|-----------------|---------------------|-------------------------|----------------------|-------------------------|----------------------------|-----------------------|
-| `input_tokens`  | `prompt_tokens`     | `prompt_tokens` (1)     | `prompt_tokens`      | `input_tokens` (added)  | `promptTokenCount` (2)     | `input_tokens`        |
-| `output_tokens` | `completion_tokens` | `completion_tokens` (1) | `completion_tokens`  | `output_tokens` (added) | `candidatesTokenCount` (2) | `output_tokens`       |
-| `total_tokens`  | `total_tokens`      | `total_tokens` (1)      | `completion_tokens`  | (computed)              | `totalTokenCount`  (2)     | (computed)            |
+| Property        | OpenAI <br />`usage.` | Ollama <br />`usage.`   | Groq `x_groq.usage.` | Anthropic `usage.`      | Gemini `usageMetadata.`    | Cohere `meta.tokens.` |
+|-----------------|-----------------------|-------------------------|----------------------|-------------------------|----------------------------|-----------------------|
+| `input_tokens`  | `prompt_tokens`       | `prompt_tokens` (1)     | `prompt_tokens`      | `input_tokens` (added)  | `promptTokenCount` (2)     | `input_tokens`        |
+| `output_tokens` | `completion_tokens`   | `completion_tokens` (1) | `completion_tokens`  | `output_tokens` (added) | `candidatesTokenCount` (2) | `output_tokens`       |
+| `total_tokens`  | `total_tokens`        | `total_tokens` (1)      | `completion_tokens`  | (computed)              | `totalTokenCount`  (2)     | (computed)            |

 > **Note (1)**: At this point, `Ollama` does not emit input/output tokens when streaming due to the Ollama OpenAI compatibility layer limitation. (see [ollama #4448 - Streaming Chat Completion via OpenAI API should support stream option to include Usage](https://github.com/ollama/ollama/issues/4448))

--- a/examples/c00-readme.rs
+++ b/examples/c00-readme.rs
@ -9,7 +9,8 @@ const MODEL_GEMINI: &str = "gemini-1.5-flash-latest";
 const MODEL_GROQ: &str = "gemma-7b-it";
 const MODEL_OLLAMA: &str = "gemma:2b"; // sh: `ollama pull gemma:2b`

-// NOTE: Those are the default env keys for each AI Provider type.
+// NOTE: Those are the default environment keys for each AI Adapter Type.
+//       Can be customized, see `examples/c02-auth.rs`
 const MODEL_AND_KEY_ENV_NAME_LIST: &[(&str, &str)] = &[
 	// -- de/activate models/providers
 	(MODEL_OPENAI, "OPENAI_API_KEY"),
@ -28,7 +29,7 @@ const MODEL_AND_KEY_ENV_NAME_LIST: &[(&str, &str)] = &[
 //  - model in Groq models   -> Groq
 //  - For anything else      -> Ollama
 //
-// Refined mapping rules will be added later and extended as provider support grows.
+// Can be customized, see `examples/c03-kind.rs`

 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
--- a/src/adapter/adapters/anthropic/adapter_impl.rs
+++ b/src/adapter/adapters/anthropic/adapter_impl.rs
@ -62,7 +62,7 @@ impl Adapter for AnthropicAdapter {
 			("anthropic-version".to_string(), ANTRHOPIC_VERSION.to_string()),
 		];

-		let AnthropicsRequestParts { system, messages } = Self::into_anthropic_request_parts(chat_req)?;
+		let AnthropicRequestParts { system, messages } = Self::into_anthropic_request_parts(chat_req)?;

 		// -- Build the basic payload
 		let mut payload = json!({
@ -146,7 +146,7 @@ impl AnthropicAdapter {

 	/// Takes the genai ChatMessages and build the System string and json Messages for Anthropic.
 	/// - Will push the `ChatRequest.system` and systems message to `AnthropicsRequestParts.system`
-	fn into_anthropic_request_parts(chat_req: ChatRequest) -> Result<AnthropicsRequestParts> {
+	fn into_anthropic_request_parts(chat_req: ChatRequest) -> Result<AnthropicRequestParts> {
 		let mut messages: Vec<Value> = Vec::new();
 		let mut systems: Vec<String> = Vec::new();

@ -170,11 +170,11 @@ impl AnthropicAdapter {
 			None
 		};

-		Ok(AnthropicsRequestParts { system, messages })
+		Ok(AnthropicRequestParts { system, messages })
 	}
 }

-struct AnthropicsRequestParts {
+struct AnthropicRequestParts {
 	system: Option<String>,
 	messages: Vec<Value>,
 	// TODO: need to add tools