From d946290be45dea816c765bafd6d508b69d035b37 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Mon, 4 Nov 2024 18:39:40 -0800 Subject: [PATCH] ^ anthropic - updated the default max_token to the max for given the model (i.e. 3-5 will be 8k) --- CHANGELOG.md | 7 +++++++ Cargo.toml | 2 +- .../adapters/anthropic/adapter_impl.rs | 20 +++++++++++++++---- tests/tests_p_anthropic.rs | 3 +++ 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0024b2d..59a4f5d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ > - Version `0.2.x` will follow semver more strictly. > - API changes will be denoted as "`!` - **API CHANGE** ...." +## 2024-11-04 - `0.1.11` + +- `^` anthropic - updated the default max_token to the max for given the model (i.e. 3-5 will be 8k) +- `+` tool - First pass at adding Function Calling for OpenAI and Anthropic (rel #24) + - **NOTE**: The tool is still work in progress, but this should be a good first start. +- `.` update version to 0.1.11-WIP + ## 2024-10-05 - `0.1.10` (minor release) diff --git a/Cargo.toml b/Cargo.toml index 08986e2..cea63ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,5 +28,5 @@ reqwest-eventsource = "0.6" eventsource-stream = "0.2" bytes = "1.6" # -- Others -derive_more = { version = "1.0.0-beta", features = ["from", "display"] } +derive_more = { version = "1.0.0", features = ["from", "display"] } value-ext = "0.0.3" # JC Authored. Early release (API might change). Be cautious when using in other projects. \ No newline at end of file diff --git a/src/adapter/adapters/anthropic/adapter_impl.rs b/src/adapter/adapters/anthropic/adapter_impl.rs index 3ce6f71..36a8c70 100644 --- a/src/adapter/adapters/anthropic/adapter_impl.rs +++ b/src/adapter/adapters/anthropic/adapter_impl.rs @@ -16,11 +16,17 @@ use value_ext::JsonValueExt; pub struct AnthropicAdapter; const BASE_URL: &str = "https://api.anthropic.com/v1/"; -const MAX_TOKENS: u32 = 1024; + +// NOTE: For Anthropic, the max_tokens must be specified. +// To avoid surprises, the default value for genai is the maximum for a given model. +// The 3-5 models have an 8k max token limit, while the 3 models have a 4k limit. +const MAX_TOKENS_8K: u32 = 8192; +const MAX_TOKENS_4K: u32 = 4096; + const ANTRHOPIC_VERSION: &str = "2023-06-01"; const MODELS: &[&str] = &[ "claude-3-5-sonnet-20241022", - "claude-3-5-sonnet-20240620", + "claude-3-5-haiku-20241022", "claude-3-opus-20240229", "claude-3-haiku-20240307", ]; @@ -66,7 +72,7 @@ impl Adapter for AnthropicAdapter { system, messages, tools, - } = Self::into_anthropic_request_parts(model_iden, chat_req)?; + } = Self::into_anthropic_request_parts(model_iden.clone(), chat_req)?; // -- Build the basic payload let mut payload = json!({ @@ -88,7 +94,13 @@ impl Adapter for AnthropicAdapter { payload.x_insert("temperature", temperature)?; } - let max_tokens = options_set.max_tokens().unwrap_or(MAX_TOKENS); + let max_tokens = options_set.max_tokens().unwrap_or_else(|| { + if model_iden.model_name.contains("3-5") { + MAX_TOKENS_8K + } else { + MAX_TOKENS_4K + } + }); payload.x_insert("max_tokens", max_tokens)?; // required for Anthropic if let Some(top_p) = options_set.top_p() { diff --git a/tests/tests_p_anthropic.rs b/tests/tests_p_anthropic.rs index c54cc57..ab83375 100644 --- a/tests/tests_p_anthropic.rs +++ b/tests/tests_p_anthropic.rs @@ -5,7 +5,10 @@ use genai::resolver::AuthData; type Result = core::result::Result>; // For tests. +// 4k const MODEL: &str = "claude-3-haiku-20240307"; +// 8k output context +// const MODEL: &str = "claude-3-5-haiku-20241022"; // region: --- Chat