mirror of
https://github.com/mii443/rust-genai.git
synced 2025-08-22 16:25:27 +00:00
Initial image support
Fix image structure for OpenAI API Implement base64 image support for OpenAI Image Support: Some APIs (Gemini) require mime type for URL and Base64 format Image Support: Update OpenAI and Anthropic API to support new image structure Image Support: Add Gemini 2.0 Flash Experimental support and implement Image support Image Support: Create example with Image support Image Support: Fix rebase issue Image Support: Fix example and make it runnable from cargo
This commit is contained in:
committed by
Jeremy Chone
parent
974489e69a
commit
59f0b149b0
@ -9,6 +9,10 @@ keywords = ["generative-ai","openai","chatgpt","gemini","ollama"]
|
||||
homepage = "https://github.com/jeremychone/rust-genai"
|
||||
repository = "https://github.com/jeremychone/rust-genai"
|
||||
|
||||
[[example]]
|
||||
name = "images"
|
||||
path = "examples/c07-image.rs"
|
||||
|
||||
[lints.rust]
|
||||
unsafe_code = "forbid"
|
||||
# unused = { level = "allow", priority = -1 } # For exploratory dev.
|
||||
|
35
examples/c07-image.rs
Normal file
35
examples/c07-image.rs
Normal file
@ -0,0 +1,35 @@
|
||||
//! This example demonstrates how to properly attach image to the conversations
|
||||
|
||||
use genai::chat::printer::print_chat_stream;
|
||||
use genai::chat::{ChatMessage, ChatRequest, ContentPart, ImageSource};
|
||||
use genai::Client;
|
||||
|
||||
const MODEL: &str = "gpt-4o-mini";
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let client = Client::default();
|
||||
|
||||
let question = "What is in this picture?";
|
||||
|
||||
let mut chat_req = ChatRequest::default().with_system("Answer in one sentence");
|
||||
// This is similar to sending initial system chat messages (which will be cumulative with system chat messages)
|
||||
chat_req = chat_req.append_message(ChatMessage::user(
|
||||
vec![
|
||||
ContentPart::Text(question.to_string()),
|
||||
ContentPart::Image {
|
||||
content: "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg".to_string(),
|
||||
content_type: "image/png".to_string(),
|
||||
source: ImageSource::Url,
|
||||
}
|
||||
]
|
||||
));
|
||||
|
||||
println!("\n--- Question:\n{question}");
|
||||
let chat_res = client.exec_chat_stream(MODEL, chat_req.clone(), None).await?;
|
||||
|
||||
println!("\n--- Answer: (streaming)");
|
||||
let assistant_answer = print_chat_stream(chat_res, None).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
@ -3,7 +3,7 @@ use crate::adapter::anthropic::AnthropicStreamer;
|
||||
use crate::adapter::{Adapter, AdapterKind, ServiceType, WebRequestData};
|
||||
use crate::chat::{
|
||||
ChatOptionsSet, ChatRequest, ChatResponse, ChatRole, ChatStream, ChatStreamResponse, MessageContent, MetaUsage,
|
||||
ToolCall,
|
||||
ToolCall, ContentPart, ImageSource,
|
||||
};
|
||||
use crate::resolver::{AuthData, Endpoint};
|
||||
use crate::webc::WebResponse;
|
||||
@ -236,10 +236,35 @@ impl AnthropicAdapter {
|
||||
// TODO: Needs to trace/warn that other type are not supported
|
||||
}
|
||||
ChatRole::User => {
|
||||
if let MessageContent::Text(content) = msg.content {
|
||||
messages.push(json! ({"role": "user", "content": content}))
|
||||
}
|
||||
// TODO: Needs to trace/warn that other type are not supported
|
||||
let content = match msg.content {
|
||||
MessageContent::Text(content) => json!(content),
|
||||
MessageContent::Parts(parts) => {
|
||||
json!(parts.iter().map(|part| match part {
|
||||
ContentPart::Text(text) => json!({"type": "text", "text": text.clone()}),
|
||||
ContentPart::Image{content, content_type, source} => {
|
||||
match source {
|
||||
ImageSource::Url => todo!("Anthropic doesn't support images from URL, need to handle it gracefully"),
|
||||
ImageSource::Base64 => json!({
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": content_type,
|
||||
"data": content,
|
||||
},
|
||||
}),
|
||||
}
|
||||
},
|
||||
}).collect::<Vec<Value>>())
|
||||
},
|
||||
// Use `match` instead of `if let`. This will allow to future-proof this
|
||||
// implementation in case some new message content types would appear,
|
||||
// this way library would not compile if not all methods are implemented
|
||||
// continue would allow to gracefully skip pushing unserializable message
|
||||
// TODO: Probably need to warn if it is a ToolCalls type of content
|
||||
MessageContent::ToolCalls(_) => continue,
|
||||
MessageContent::ToolResponses(_) => continue,
|
||||
};
|
||||
messages.push(json! ({"role": "user", "content": content}));
|
||||
}
|
||||
ChatRole::Assistant => {
|
||||
//
|
||||
@ -266,6 +291,7 @@ impl AnthropicAdapter {
|
||||
}));
|
||||
}
|
||||
// TODO: Probably need to trace/warn that this will be ignored
|
||||
MessageContent::Parts(_) => (),
|
||||
MessageContent::ToolResponses(_) => (),
|
||||
}
|
||||
}
|
||||
|
@ -3,7 +3,7 @@ use crate::adapter::gemini::GeminiStreamer;
|
||||
use crate::adapter::{Adapter, AdapterKind, ServiceType, WebRequestData};
|
||||
use crate::chat::{
|
||||
ChatOptionsSet, ChatRequest, ChatResponse, ChatResponseFormat, ChatRole, ChatStream, ChatStreamResponse,
|
||||
MessageContent, MetaUsage,
|
||||
MessageContent, MetaUsage, ContentPart, ImageSource
|
||||
};
|
||||
use crate::resolver::{AuthData, Endpoint};
|
||||
use crate::webc::{WebResponse, WebStream};
|
||||
@ -21,6 +21,7 @@ const MODELS: &[&str] = &[
|
||||
"gemini-1.5-flash-8b",
|
||||
"gemini-1.0-pro",
|
||||
"gemini-1.5-flash-latest",
|
||||
"gemini-2.0-flash-exp"
|
||||
];
|
||||
|
||||
// curl \
|
||||
@ -214,19 +215,61 @@ impl GeminiAdapter {
|
||||
|
||||
// -- Build
|
||||
for msg in chat_req.messages {
|
||||
// TODO: Needs to implement tool_calls
|
||||
let MessageContent::Text(content) = msg.content else {
|
||||
return Err(Error::MessageContentTypeNotSupported {
|
||||
model_iden,
|
||||
cause: "Only MessageContent::Text supported for this model (for now)",
|
||||
});
|
||||
};
|
||||
|
||||
match msg.role {
|
||||
// For now, system goes as "user" (later, we might have adapter_config.system_to_user_impl)
|
||||
ChatRole::System => systems.push(content),
|
||||
ChatRole::User => contents.push(json! ({"role": "user", "parts": [{"text": content}]})),
|
||||
ChatRole::Assistant => contents.push(json! ({"role": "model", "parts": [{"text": content}]})),
|
||||
ChatRole::System => {
|
||||
let MessageContent::Text(content) = msg.content else {
|
||||
return Err(Error::MessageContentTypeNotSupported {
|
||||
model_iden,
|
||||
cause: "Only MessageContent::Text supported for this model (for now)",
|
||||
});
|
||||
};
|
||||
systems.push(content)
|
||||
},
|
||||
ChatRole::User => {
|
||||
let content = match msg.content {
|
||||
MessageContent::Text(content) => json!([{"text": content}]),
|
||||
MessageContent::Parts(parts) => {
|
||||
json!(parts.iter().map(|part| match part {
|
||||
ContentPart::Text(text) => json!({"text": text.clone()}),
|
||||
ContentPart::Image{content, content_type, source} => {
|
||||
match source {
|
||||
ImageSource::Url => json!({
|
||||
"file_data": {
|
||||
"mime_type": content_type,
|
||||
"file_uri": content
|
||||
}
|
||||
}),
|
||||
ImageSource::Base64 => json!({
|
||||
"inline_data": {
|
||||
"mime_type": content_type,
|
||||
"data": content
|
||||
}
|
||||
}),
|
||||
}
|
||||
},
|
||||
}).collect::<Vec<Value>>())
|
||||
},
|
||||
// Use `match` instead of `if let`. This will allow to future-proof this
|
||||
// implementation in case some new message content types would appear,
|
||||
// this way library would not compile if not all methods are implemented
|
||||
// continue would allow to gracefully skip pushing unserializable message
|
||||
// TODO: Probably need to warn if it is a ToolCalls type of content
|
||||
MessageContent::ToolCalls(_) => continue,
|
||||
MessageContent::ToolResponses(_) => continue,
|
||||
};
|
||||
|
||||
contents.push(json!({"role": "user", "parts": content}));
|
||||
},
|
||||
ChatRole::Assistant => {
|
||||
let MessageContent::Text(content) = msg.content else {
|
||||
return Err(Error::MessageContentTypeNotSupported {
|
||||
model_iden,
|
||||
cause: "Only MessageContent::Text supported for this model (for now)",
|
||||
});
|
||||
};
|
||||
contents.push(json!({"role": "model", "parts": [{"text": content}]}))
|
||||
},
|
||||
ChatRole::Tool => {
|
||||
return Err(Error::MessageRoleNotSupported {
|
||||
model_iden,
|
||||
|
@ -3,7 +3,7 @@ use crate::adapter::openai::OpenAIStreamer;
|
||||
use crate::adapter::{Adapter, AdapterDispatcher, AdapterKind, ServiceType, WebRequestData};
|
||||
use crate::chat::{
|
||||
ChatOptionsSet, ChatRequest, ChatResponse, ChatResponseFormat, ChatRole, ChatStream, ChatStreamResponse,
|
||||
MessageContent, MetaUsage, ToolCall,
|
||||
MessageContent, MetaUsage, ToolCall, ContentPart, ImageSource
|
||||
};
|
||||
use crate::resolver::{AuthData, Endpoint};
|
||||
use crate::webc::WebResponse;
|
||||
@ -250,10 +250,31 @@ impl OpenAIAdapter {
|
||||
// TODO: Probably need to warn if it is a ToolCalls type of content
|
||||
}
|
||||
ChatRole::User => {
|
||||
if let MessageContent::Text(content) = msg.content {
|
||||
messages.push(json! ({"role": "user", "content": content}));
|
||||
}
|
||||
// TODO: Probably need to warn if it is a ToolCalls type of content
|
||||
let content = match msg.content {
|
||||
MessageContent::Text(content) => json!(content),
|
||||
MessageContent::Parts(parts) => {
|
||||
json!(parts.iter().map(|part| match part {
|
||||
ContentPart::Text(text) => json!({"type": "text", "text": text.clone()}),
|
||||
ContentPart::Image{content, content_type, source} => {
|
||||
match source {
|
||||
ImageSource::Url => json!({"type": "image_url", "image_url": {"url": content}}),
|
||||
ImageSource::Base64 => {
|
||||
let image_url = format!("data:{content_type};base64,{content}");
|
||||
json!({"type": "image_url", "image_url": {"url": image_url}})
|
||||
},
|
||||
}
|
||||
},
|
||||
}).collect::<Vec<Value>>())
|
||||
},
|
||||
// Use `match` instead of `if let`. This will allow to future-proof this
|
||||
// implementation in case some new message content types would appear,
|
||||
// this way library would not compile if not all methods are implemented
|
||||
// continue would allow to gracefully skip pushing unserializable message
|
||||
// TODO: Probably need to warn if it is a ToolCalls type of content
|
||||
MessageContent::ToolCalls(_) => continue,
|
||||
MessageContent::ToolResponses(_) => continue,
|
||||
};
|
||||
messages.push(json! ({"role": "user", "content": content}));
|
||||
}
|
||||
|
||||
ChatRole::Assistant => match msg.content {
|
||||
@ -275,6 +296,7 @@ impl OpenAIAdapter {
|
||||
messages.push(json! ({"role": "assistant", "tool_calls": tool_calls}))
|
||||
}
|
||||
// TODO: Probably need to trace/warn that this will be ignored
|
||||
MessageContent::Parts(_) => (),
|
||||
MessageContent::ToolResponses(_) => (),
|
||||
},
|
||||
|
||||
|
@ -2,13 +2,14 @@ use crate::chat::{ToolCall, ToolResponse};
|
||||
use derive_more::derive::From;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Currently, it only supports Text,
|
||||
/// but the goal is to support multi-part message content (see below)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, From)]
|
||||
pub enum MessageContent {
|
||||
/// Text content
|
||||
Text(String),
|
||||
|
||||
/// Content parts
|
||||
Parts(Vec<ContentPart>),
|
||||
|
||||
/// Tool calls
|
||||
#[from]
|
||||
ToolCalls(Vec<ToolCall>),
|
||||
@ -25,6 +26,9 @@ impl MessageContent {
|
||||
MessageContent::Text(content.into())
|
||||
}
|
||||
|
||||
/// Create a new MessageContent from provided content parts
|
||||
pub fn from_parts(parts: impl Into<Vec<ContentPart>>) -> Self { MessageContent::Parts(parts.into()) }
|
||||
|
||||
/// Create a new MessageContent with the ToolCalls variant
|
||||
pub fn from_tool_calls(tool_calls: Vec<ToolCall>) -> Self {
|
||||
MessageContent::ToolCalls(tool_calls)
|
||||
@ -40,6 +44,12 @@ impl MessageContent {
|
||||
pub fn text_as_str(&self) -> Option<&str> {
|
||||
match self {
|
||||
MessageContent::Text(content) => Some(content.as_str()),
|
||||
MessageContent::Parts(parts) => {
|
||||
Some(parts.iter().filter_map(|part| match part {
|
||||
ContentPart::Text(content) => Some(content.clone()),
|
||||
_ => None,
|
||||
}).collect::<Vec<String>>().join("\n").leak()) // TODO revisit this, should we leak &str?
|
||||
},
|
||||
MessageContent::ToolCalls(_) => None,
|
||||
MessageContent::ToolResponses(_) => None,
|
||||
}
|
||||
@ -53,6 +63,12 @@ impl MessageContent {
|
||||
pub fn text_into_string(self) -> Option<String> {
|
||||
match self {
|
||||
MessageContent::Text(content) => Some(content),
|
||||
MessageContent::Parts(parts) => {
|
||||
Some(parts.into_iter().filter_map(|part| match part {
|
||||
ContentPart::Text(content) => Some(content),
|
||||
_ => None,
|
||||
}).collect::<Vec<String>>().join("\n"))
|
||||
},
|
||||
MessageContent::ToolCalls(_) => None,
|
||||
MessageContent::ToolResponses(_) => None,
|
||||
}
|
||||
@ -62,6 +78,7 @@ impl MessageContent {
|
||||
pub fn is_empty(&self) -> bool {
|
||||
match self {
|
||||
MessageContent::Text(content) => content.is_empty(),
|
||||
MessageContent::Parts(parts) => parts.is_empty(),
|
||||
MessageContent::ToolCalls(tool_calls) => tool_calls.is_empty(),
|
||||
MessageContent::ToolResponses(tool_responses) => tool_responses.is_empty(),
|
||||
}
|
||||
@ -94,27 +111,39 @@ impl From<ToolResponse> for MessageContent {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<ContentPart>> for MessageContent {
|
||||
fn from(parts: Vec<ContentPart>) -> Self { MessageContent::Parts(parts) }
|
||||
}
|
||||
|
||||
// endregion: --- Froms
|
||||
|
||||
// NOTE: The goal is to add a Parts variant with ContentPart for multipart support
|
||||
//
|
||||
// ````
|
||||
// pub enum MessageContent {
|
||||
// Text(String),
|
||||
// Parts(Vec<ContentPart>)` variant to `MessageContent`
|
||||
// }
|
||||
// ```
|
||||
//
|
||||
// With something like this:
|
||||
// ```
|
||||
// pub enum ContentPart {
|
||||
// Text(String),
|
||||
// Image(ImagePart)
|
||||
// }
|
||||
//
|
||||
// pub enum ImagePart {
|
||||
// Local(PathBuf),
|
||||
// Remote(Url),
|
||||
// Base64(String)
|
||||
// }
|
||||
// ```
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, From)]
|
||||
pub enum ContentPart {
|
||||
Text(String),
|
||||
Image {
|
||||
content: String,
|
||||
content_type: String,
|
||||
source: ImageSource,
|
||||
},
|
||||
}
|
||||
|
||||
// region: --- Froms
|
||||
|
||||
impl<'a> From<&'a str> for ContentPart {
|
||||
fn from(s: &'a str) -> Self {
|
||||
ContentPart::Text(s.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
// endregion: --- Froms
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, From)]
|
||||
pub enum ImageSource {
|
||||
Url,
|
||||
Base64
|
||||
|
||||
// No `Local` location, this would require handling errors like "file not found" etc.
|
||||
// Such file can be easily provided by user as Base64, also can implement convenient
|
||||
// TryFrom<File> to Base64 version. All LLMs accepts local Images only as Base64
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
use genai::chat::{ChatMessage, ChatRequest, Tool};
|
||||
use genai::chat::{ChatMessage, ChatRequest, ContentPart, ImageSource, Tool};
|
||||
use serde_json::json;
|
||||
|
||||
pub fn seed_chat_req_simple() -> ChatRequest {
|
||||
@ -9,6 +9,21 @@ pub fn seed_chat_req_simple() -> ChatRequest {
|
||||
])
|
||||
}
|
||||
|
||||
pub fn seed_chat_req_with_image() -> ChatRequest {
|
||||
ChatRequest::new(vec![
|
||||
// -- Messages (deactivate to see the differences)
|
||||
ChatMessage::system("Answer in one sentence"),
|
||||
ChatMessage::user(vec![
|
||||
ContentPart::from("What is in this image?"),
|
||||
ContentPart::Image {
|
||||
content: "BASE64 ENCODED IMAGE".to_string(),
|
||||
content_type:"image/png".to_string(),
|
||||
source: ImageSource::Base64,
|
||||
}
|
||||
]),
|
||||
])
|
||||
}
|
||||
|
||||
pub fn seed_chat_req_tool_simple() -> ChatRequest {
|
||||
ChatRequest::new(vec![
|
||||
// -- Messages (deactivate to see the differences)
|
||||
|
Reference in New Issue
Block a user