mirror of
https://github.com/mii443/openai-api-rs.git
synced 2025-08-22 15:15:34 +00:00
Add realtime api
This commit is contained in:
@ -32,3 +32,11 @@ version = "1"
|
||||
|
||||
[dependencies.bytes]
|
||||
version = "1.7.1"
|
||||
|
||||
[dependencies.tokio-tungstenite]
|
||||
version = "0.24.0"
|
||||
features = ["connect", "native-tls"]
|
||||
|
||||
[dependencies.futures-util]
|
||||
version = "0.3.31"
|
||||
features = ["sink", "std"]
|
||||
|
@ -94,6 +94,7 @@ Check out the [full API documentation](https://platform.openai.com/docs/api-refe
|
||||
- [x] [Function calling](https://platform.openai.com/docs/guides/gpt/function-calling)
|
||||
- [x] [Assistants](https://platform.openai.com/docs/assistants/overview)
|
||||
- [x] [Batch](https://platform.openai.com/docs/api-reference/batch)
|
||||
- [x] [Realtime](https://platform.openai.com/docs/api-reference/realtime)
|
||||
|
||||
## License
|
||||
This project is licensed under [MIT license](https://github.com/dongri/openai-api-rs/blob/main/LICENSE).
|
||||
|
1
examples/realtime/.gitignore
vendored
Normal file
1
examples/realtime/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
target
|
1543
examples/realtime/Cargo.lock
generated
Normal file
1543
examples/realtime/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
13
examples/realtime/Cargo.toml
Normal file
13
examples/realtime/Cargo.toml
Normal file
@ -0,0 +1,13 @@
|
||||
[package]
|
||||
name = "realtime"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
openai-api-rs = { path = "../../../openai-api-rs" }
|
||||
serde = { version = "1.0.210", features = ["derive"] }
|
||||
serde_json = "1.0.128"
|
||||
tokio = { version = "1.40.0", features = ["full"] }
|
||||
tokio-tungstenite = { version = "0.24.0", features = ["connect", "native-tls"] }
|
||||
futures-util = { version = "0.3.31", features = ["sink", "std"] }
|
||||
futures-channel = "0.3.31"
|
91
examples/realtime/src/main.rs
Normal file
91
examples/realtime/src/main.rs
Normal file
@ -0,0 +1,91 @@
|
||||
use std::process::exit;
|
||||
use std::env;
|
||||
|
||||
use futures_util::{future, pin_mut, StreamExt};
|
||||
use openai_api_rs::realtime::api::RealtimeClient;
|
||||
use openai_api_rs::realtime::client_event::{ConversationItemCreate, ResponseCreate};
|
||||
use openai_api_rs::realtime::server_event::ServerEvent;
|
||||
use openai_api_rs::realtime::types::Item;
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio_tungstenite::tungstenite::protocol::Message;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let api_key = env::var("OPENAI_API_KEY").unwrap().to_string();
|
||||
let model = "gpt-4o-realtime-preview-2024-10-01".to_string();
|
||||
|
||||
let (stdin_tx, stdin_rx) = futures_channel::mpsc::unbounded();
|
||||
tokio::spawn(read_stdin(stdin_tx));
|
||||
|
||||
let realtime_client = RealtimeClient::new(api_key, model);
|
||||
|
||||
let (write, read) = realtime_client.connect().await.unwrap();
|
||||
println!("WebSocket handshake complete");
|
||||
|
||||
let stdin_to_ws = stdin_rx.map(Ok).forward(write);
|
||||
|
||||
let ws_to_stdout = {
|
||||
read.for_each(|message| async {
|
||||
let message = message.unwrap();
|
||||
match message {
|
||||
Message::Text(_) => {
|
||||
let data = message.clone().into_data();
|
||||
let server_event: ServerEvent = serde_json::from_slice(&data).unwrap();
|
||||
match server_event {
|
||||
ServerEvent::ResponseOutputItemDone(_event) => {
|
||||
eprintln!();
|
||||
}
|
||||
ServerEvent::ResponseAudioTranscriptDelta(event) => {
|
||||
eprint!("{}", event.delta.trim());
|
||||
}
|
||||
ServerEvent::Error(e) => {
|
||||
eprint!("{e:?}");
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Message::Close(_) => {
|
||||
eprintln!("Close");
|
||||
exit(0);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
})
|
||||
};
|
||||
|
||||
pin_mut!(stdin_to_ws, ws_to_stdout);
|
||||
future::select(stdin_to_ws, ws_to_stdout).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn read_stdin(tx: futures_channel::mpsc::UnboundedSender<Message>) {
|
||||
let mut stdin = tokio::io::stdin();
|
||||
loop {
|
||||
let mut buf = vec![0; 2048];
|
||||
let n = match stdin.read(&mut buf).await {
|
||||
Err(_) | Ok(0) => break,
|
||||
Ok(n) => n,
|
||||
};
|
||||
buf.truncate(n);
|
||||
let text = String::from_utf8_lossy(&buf).into_owned();
|
||||
let item = Item::try_from(serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": text.trim()
|
||||
}
|
||||
]
|
||||
}))
|
||||
.unwrap();
|
||||
let event = ConversationItemCreate {
|
||||
item,
|
||||
..Default::default()
|
||||
};
|
||||
let message: Message = event.into();
|
||||
tx.unbounded_send(message).unwrap();
|
||||
tx.unbounded_send(ResponseCreate::default().into()).unwrap();
|
||||
}
|
||||
}
|
@ -1 +1,2 @@
|
||||
pub mod realtime;
|
||||
pub mod v1;
|
||||
|
54
src/realtime/api.rs
Normal file
54
src/realtime/api.rs
Normal file
@ -0,0 +1,54 @@
|
||||
use futures_util::stream::{SplitSink, SplitStream};
|
||||
use futures_util::StreamExt;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio_tungstenite::{
|
||||
connect_async,
|
||||
tungstenite::{client::IntoClientRequest, protocol::Message},
|
||||
MaybeTlsStream, WebSocketStream,
|
||||
};
|
||||
|
||||
const WSS_URL: &str = "wss://api.openai.com/v1/realtime";
|
||||
|
||||
pub struct RealtimeClient {
|
||||
pub wss_url: String,
|
||||
pub api_key: String,
|
||||
pub model: String,
|
||||
}
|
||||
|
||||
impl RealtimeClient {
|
||||
pub fn new(api_key: String, model: String) -> Self {
|
||||
let wss_url = std::env::var("WSS_URL").unwrap_or_else(|_| WSS_URL.to_owned());
|
||||
Self::new_with_endpoint(wss_url, api_key, model)
|
||||
}
|
||||
|
||||
pub fn new_with_endpoint(wss_url: String, api_key: String, model: String) -> Self {
|
||||
Self {
|
||||
wss_url,
|
||||
api_key,
|
||||
model,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn connect(
|
||||
&self,
|
||||
) -> Result<
|
||||
(
|
||||
SplitSink<WebSocketStream<MaybeTlsStream<TcpStream>>, Message>,
|
||||
SplitStream<WebSocketStream<MaybeTlsStream<TcpStream>>>,
|
||||
),
|
||||
Box<dyn std::error::Error>,
|
||||
> {
|
||||
let url = format!("{}?model={}", self.wss_url, self.model);
|
||||
let mut request = url.into_client_request()?;
|
||||
let api_key = self.api_key.clone();
|
||||
request
|
||||
.headers_mut()
|
||||
.insert("Authorization", format!("Bearer {api_key}").parse()?);
|
||||
request
|
||||
.headers_mut()
|
||||
.insert("OpenAI-Beta", "realtime=v1".parse()?);
|
||||
let (ws_stream, _) = connect_async(request).await?;
|
||||
let (write, read) = ws_stream.split();
|
||||
Ok((write, read))
|
||||
}
|
||||
}
|
157
src/realtime/client_event.rs
Normal file
157
src/realtime/client_event.rs
Normal file
@ -0,0 +1,157 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio_tungstenite::tungstenite::Message;
|
||||
|
||||
use crate::realtime::types::{Item, Session};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
|
||||
pub struct SessionUpdate {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub event_id: Option<String>,
|
||||
pub session: Session,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
|
||||
pub struct InputAudioBufferAppend {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub event_id: Option<String>,
|
||||
pub audio: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
|
||||
pub struct InputAudioBufferCommit {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub event_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
|
||||
pub struct InputAudioBufferClear {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub event_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
|
||||
pub struct ConversationItemCreate {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub event_id: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub previous_item_id: Option<String>,
|
||||
pub item: Item,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
|
||||
pub struct ConversationItemTruncate {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub event_id: Option<String>,
|
||||
pub item_id: String,
|
||||
pub content_index: u32,
|
||||
pub audio_end_ms: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
|
||||
pub struct ConversationItemDelete {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub event_id: Option<String>,
|
||||
pub item_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
|
||||
pub struct ResponseCreate {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub event_id: Option<String>,
|
||||
pub response: Option<Session>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
|
||||
pub struct ResponseCancel {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub event_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum ClientEvent {
|
||||
#[serde(rename = "session.update")]
|
||||
SessionUpdate(SessionUpdate),
|
||||
#[serde(rename = "input_audio_buffer.append")]
|
||||
InputAudioBufferAppend(InputAudioBufferAppend),
|
||||
#[serde(rename = "input_audio_buffer.commit")]
|
||||
InputAudioBufferCommit(InputAudioBufferCommit),
|
||||
#[serde(rename = "input_audio_buffer.clear")]
|
||||
InputAudioBufferClear(InputAudioBufferClear),
|
||||
#[serde(rename = "conversation.item.create")]
|
||||
ConversationItemCreate(ConversationItemCreate),
|
||||
#[serde(rename = "conversation.item.truncate")]
|
||||
ConversationItemTruncate(ConversationItemTruncate),
|
||||
#[serde(rename = "conversation.item.delete")]
|
||||
ConversationItemDelete(ConversationItemDelete),
|
||||
#[serde(rename = "response.create")]
|
||||
ResponseCreate(ResponseCreate),
|
||||
#[serde(rename = "response.cancel")]
|
||||
ResponseCancel(ResponseCancel),
|
||||
}
|
||||
|
||||
impl From<ClientEvent> for Message {
|
||||
fn from(value: ClientEvent) -> Self {
|
||||
Message::Text(String::from(&value))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&ClientEvent> for String {
|
||||
fn from(value: &ClientEvent) -> Self {
|
||||
serde_json::to_string(value).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ConversationItemCreate> for Message {
|
||||
fn from(value: ConversationItemCreate) -> Self {
|
||||
Self::from(ClientEvent::ConversationItemCreate(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<InputAudioBufferAppend> for Message {
|
||||
fn from(value: InputAudioBufferAppend) -> Self {
|
||||
Self::from(ClientEvent::InputAudioBufferAppend(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<InputAudioBufferCommit> for Message {
|
||||
fn from(value: InputAudioBufferCommit) -> Self {
|
||||
Self::from(ClientEvent::InputAudioBufferCommit(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<InputAudioBufferClear> for Message {
|
||||
fn from(value: InputAudioBufferClear) -> Self {
|
||||
Self::from(ClientEvent::InputAudioBufferClear(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SessionUpdate> for Message {
|
||||
fn from(value: SessionUpdate) -> Self {
|
||||
Self::from(ClientEvent::SessionUpdate(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ConversationItemTruncate> for Message {
|
||||
fn from(value: ConversationItemTruncate) -> Self {
|
||||
Self::from(ClientEvent::ConversationItemTruncate(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ConversationItemDelete> for Message {
|
||||
fn from(value: ConversationItemDelete) -> Self {
|
||||
Self::from(ClientEvent::ConversationItemDelete(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ResponseCreate> for Message {
|
||||
fn from(value: ResponseCreate) -> Self {
|
||||
Self::from(ClientEvent::ResponseCreate(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ResponseCancel> for Message {
|
||||
fn from(value: ResponseCancel) -> Self {
|
||||
Self::from(ClientEvent::ResponseCancel(value))
|
||||
}
|
||||
}
|
4
src/realtime/mod.rs
Normal file
4
src/realtime/mod.rs
Normal file
@ -0,0 +1,4 @@
|
||||
pub mod api;
|
||||
pub mod client_event;
|
||||
pub mod server_event;
|
||||
pub mod types;
|
288
src/realtime/server_event.rs
Normal file
288
src/realtime/server_event.rs
Normal file
@ -0,0 +1,288 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::realtime::types::{
|
||||
APIError, ContentPart, Conversation, Item, RateLimit, Response, Session,
|
||||
};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct Error {
|
||||
pub event_id: String,
|
||||
pub error: APIError,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct SessionCreated {
|
||||
pub event_id: String,
|
||||
pub session: Session,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct SessionUpdated {
|
||||
pub event_id: String,
|
||||
pub session: Session,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ConversationCreated {
|
||||
pub event_id: String,
|
||||
pub conversation: Conversation,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct InputAudioBufferCommited {
|
||||
pub event_id: String,
|
||||
pub previous_item_id: String,
|
||||
pub item_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct InputAudioBufferCleared {
|
||||
pub event_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct InputAudioBufferSpeechStarted {
|
||||
pub event_id: String,
|
||||
pub audio_start_ms: u32,
|
||||
pub item_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct InputAudioBufferSpeechStopped {
|
||||
pub event_id: String,
|
||||
pub audio_end_ms: u32,
|
||||
pub item_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ConversationItemCreated {
|
||||
pub event_id: String,
|
||||
pub previous_item_id: Option<String>,
|
||||
pub item: Item,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ConversationItemInputAudioTranscriptionCompleted {
|
||||
pub event_id: String,
|
||||
pub item_id: String,
|
||||
pub content_index: u32,
|
||||
pub transcript: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ConversationItemInputAudioTranscriptionFailed {
|
||||
pub event_id: String,
|
||||
pub item_id: String,
|
||||
pub content_index: u32,
|
||||
pub error: APIError,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ConversationItemTruncated {
|
||||
pub event_id: String,
|
||||
pub item_id: String,
|
||||
pub content_index: u32,
|
||||
pub audio_end_ms: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ConversationItemDeleted {
|
||||
pub event_id: String,
|
||||
pub item_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ResponseCreated {
|
||||
pub event_id: String,
|
||||
pub response: Response,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ResponseDone {
|
||||
pub event_id: String,
|
||||
pub response: Response,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ResponseOutputItemAdded {
|
||||
pub event_id: String,
|
||||
pub response_id: String,
|
||||
pub output_index: u32,
|
||||
pub item: Item,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ResponseOutputItemDone {
|
||||
pub event_id: String,
|
||||
pub response_id: String,
|
||||
pub output_index: u32,
|
||||
pub item: Item,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ResponseContentPartAdded {
|
||||
pub event_id: String,
|
||||
pub response_id: String,
|
||||
pub item_id: String,
|
||||
pub output_index: u32,
|
||||
pub content_index: u32,
|
||||
pub part: ContentPart,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ResponseContentPartDone {
|
||||
pub event_id: String,
|
||||
pub response_id: String,
|
||||
pub item_id: String,
|
||||
pub output_index: u32,
|
||||
pub content_index: u32,
|
||||
pub part: ContentPart,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ResponseTextDelta {
|
||||
pub event_id: String,
|
||||
pub response_id: String,
|
||||
pub item_id: String,
|
||||
pub output_index: u32,
|
||||
pub content_index: u32,
|
||||
pub delta: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ResponseTextDone {
|
||||
pub event_id: String,
|
||||
pub response_id: String,
|
||||
pub item_id: String,
|
||||
pub output_index: u32,
|
||||
pub content_index: u32,
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ResponseAudioTranscriptDelta {
|
||||
pub event_id: String,
|
||||
pub response_id: String,
|
||||
pub item_id: String,
|
||||
pub output_index: u32,
|
||||
pub content_index: u32,
|
||||
pub delta: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ResponseAudioTranscriptDone {
|
||||
pub event_id: String,
|
||||
pub response_id: String,
|
||||
pub item_id: String,
|
||||
pub output_index: u32,
|
||||
pub content_index: u32,
|
||||
pub transcript: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ResponseAudioDelta {
|
||||
pub event_id: String,
|
||||
pub response_id: String,
|
||||
pub item_id: String,
|
||||
pub output_index: u32,
|
||||
pub content_index: u32,
|
||||
pub delta: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ResponseAudioDone {
|
||||
pub event_id: String,
|
||||
pub response_id: String,
|
||||
pub item_id: String,
|
||||
pub output_index: u32,
|
||||
pub content_index: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ResponseFunctionCallArgumentsDelta {
|
||||
pub event_id: String,
|
||||
pub response_id: String,
|
||||
pub item_id: String,
|
||||
pub output_index: u32,
|
||||
pub call_id: String,
|
||||
pub delta: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ResponseFunctionCallArgumentsDone {
|
||||
pub event_id: String,
|
||||
pub response_id: String,
|
||||
pub item_id: String,
|
||||
pub output_index: u32,
|
||||
pub call_id: String,
|
||||
pub arguments: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct RateLimitsUpdated {
|
||||
pub event_id: String,
|
||||
pub rate_limits: Vec<RateLimit>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum ServerEvent {
|
||||
#[serde(rename = "error")]
|
||||
Error(Error),
|
||||
#[serde(rename = "session.created")]
|
||||
SessionCreated(SessionCreated),
|
||||
#[serde(rename = "session.updated")]
|
||||
SessionUpdated(SessionUpdated),
|
||||
#[serde(rename = "conversation.created")]
|
||||
ConversationCreated(ConversationCreated),
|
||||
#[serde(rename = "input_audio_buffer.committed")]
|
||||
InputAudioBufferCommited(InputAudioBufferCommited),
|
||||
#[serde(rename = "input_audio_buffer.cleared")]
|
||||
InputAudioBufferCleared(InputAudioBufferCleared),
|
||||
#[serde(rename = "input_audio_buffer.speech_started")]
|
||||
InputAudioBufferSpeechStarted(InputAudioBufferSpeechStarted),
|
||||
#[serde(rename = "input_audio_buffer.speech_stopped")]
|
||||
InputAudioBufferSpeechStopped(InputAudioBufferSpeechStopped),
|
||||
#[serde(rename = "conversation.item.created")]
|
||||
ConversationItemCreated(ConversationItemCreated),
|
||||
#[serde(rename = "conversation.item.input_audio_transcription.completed")]
|
||||
ConversationItemInputAudioTranscriptionCompleted(
|
||||
ConversationItemInputAudioTranscriptionCompleted,
|
||||
),
|
||||
#[serde(rename = "conversation.item.input_audio_transcription.failed")]
|
||||
ConversationItemInputAudioTranscriptionFailed(ConversationItemInputAudioTranscriptionFailed),
|
||||
#[serde(rename = "conversation.item.truncated")]
|
||||
ConversationItemTruncated(ConversationItemTruncated),
|
||||
#[serde(rename = "conversation.item.deleted")]
|
||||
ConversationItemDeleted(ConversationItemDeleted),
|
||||
#[serde(rename = "response.created")]
|
||||
ResponseCreated(ResponseCreated),
|
||||
#[serde(rename = "response.done")]
|
||||
ResponseDone(ResponseDone),
|
||||
#[serde(rename = "response.output_item.added")]
|
||||
ResponseOutputItemAdded(ResponseOutputItemAdded),
|
||||
#[serde(rename = "response.output_item.done")]
|
||||
ResponseOutputItemDone(ResponseOutputItemDone),
|
||||
#[serde(rename = "response.content_part.added")]
|
||||
ResponseContentPartAdded(ResponseContentPartAdded),
|
||||
#[serde(rename = "response.content_part.done")]
|
||||
ResponseContentPartDone(ResponseContentPartDone),
|
||||
#[serde(rename = "response.text.delta")]
|
||||
ResponseTextDelta(ResponseTextDelta),
|
||||
#[serde(rename = "response.text.done")]
|
||||
ResponseTextDone(ResponseTextDone),
|
||||
#[serde(rename = "response.audio_transcript.delta")]
|
||||
ResponseAudioTranscriptDelta(ResponseAudioTranscriptDelta),
|
||||
#[serde(rename = "response.audio_transcript.done")]
|
||||
ResponseAudioTranscriptDone(ResponseAudioTranscriptDone),
|
||||
#[serde(rename = "response.audio.delta")]
|
||||
ResponseAudioDelta(ResponseAudioDelta),
|
||||
#[serde(rename = "response.audio.done")]
|
||||
ResponseAudioDone(ResponseAudioDone),
|
||||
#[serde(rename = "response.function_call_arguments.delta")]
|
||||
ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDelta),
|
||||
#[serde(rename = "response.function_call_arguments.done")]
|
||||
ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDone),
|
||||
#[serde(rename = "rate_limits.updated")]
|
||||
RateLimitsUpdated(RateLimitsUpdated),
|
||||
}
|
259
src/realtime/types.rs
Normal file
259
src/realtime/types.rs
Normal file
@ -0,0 +1,259 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
|
||||
pub struct Session {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub modalities: Option<Vec<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub instructions: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub voice: Option<RealtimeVoice>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub input_audio_format: Option<AudioFormat>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub output_audio_format: Option<AudioFormat>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub input_audio_transcription: Option<AudioTranscription>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub turn_detection: Option<TurnDetection>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub tools: Option<Vec<ToolDefinition>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub tool_choice: Option<ToolChoice>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub temperature: Option<f32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub max_output_tokens: Option<MaxOutputTokens>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum RealtimeVoice {
|
||||
Alloy,
|
||||
Shimmer,
|
||||
Echo,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub enum AudioFormat {
|
||||
#[serde(rename = "pcm16")]
|
||||
PCM16,
|
||||
#[serde(rename = "g711-ulaw")]
|
||||
G711ULAW,
|
||||
#[serde(rename = "g711-alaw")]
|
||||
G711ALAW,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct AudioTranscription {
|
||||
pub enabled: bool,
|
||||
pub model: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum TurnDetection {
|
||||
#[serde(rename = "server_vad")]
|
||||
ServerVAD {
|
||||
threshold: f32,
|
||||
prefix_padding_ms: u32,
|
||||
silence_duration_ms: u32,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum ToolDefinition {
|
||||
#[serde(rename = "function")]
|
||||
Function {
|
||||
name: String,
|
||||
description: String,
|
||||
parameters: serde_json::Value,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum ToolChoice {
|
||||
Auto,
|
||||
None,
|
||||
Required,
|
||||
#[serde(untagged)]
|
||||
Function {
|
||||
r#type: FunctionType,
|
||||
name: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum FunctionType {
|
||||
Function,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(untagged)]
|
||||
pub enum MaxOutputTokens {
|
||||
Num(u16),
|
||||
#[serde(rename = "inf")]
|
||||
Inf,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ItemType {
|
||||
Message,
|
||||
FunctionCall,
|
||||
FunctionCallOutput,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ItemStatus {
|
||||
Completed,
|
||||
InProgress,
|
||||
Incomplete,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum ItemRole {
|
||||
User,
|
||||
Assistant,
|
||||
System,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ItemContentType {
|
||||
InputText,
|
||||
InputAudio,
|
||||
Text,
|
||||
Audio,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ItemContent {
|
||||
pub r#type: ItemContentType,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub text: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub audio: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub transcript: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
|
||||
pub struct Item {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub id: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub r#type: Option<ItemType>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub status: Option<ItemStatus>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub role: Option<ItemRole>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub content: Option<Vec<ItemContent>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub call_id: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub name: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub arguments: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub output: Option<String>,
|
||||
}
|
||||
|
||||
impl TryFrom<serde_json::Value> for Item {
|
||||
type Error = serde_json::Error;
|
||||
|
||||
fn try_from(value: serde_json::Value) -> Result<Self, Self::Error> {
|
||||
serde_json::from_value(value)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct APIError {
|
||||
pub r#type: String,
|
||||
pub code: Option<String>,
|
||||
pub message: String,
|
||||
pub param: Option<String>,
|
||||
pub event_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct Conversation {
|
||||
pub id: String,
|
||||
pub object: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct Response {
|
||||
pub id: String,
|
||||
pub object: String,
|
||||
pub status: ResponseStatus,
|
||||
pub status_details: Option<ResponseStatusDetail>,
|
||||
pub output: Vec<Item>,
|
||||
pub usage: Option<Usage>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct Usage {
|
||||
pub total_tokens: u32,
|
||||
pub input_tokens: u32,
|
||||
pub output_tokens: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ResponseStatus {
|
||||
InProgress,
|
||||
Completed,
|
||||
Cancelled,
|
||||
Failed,
|
||||
Incomplete,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum ResponseStatusDetail {
|
||||
#[serde(rename = "incomplete")]
|
||||
Incomplete { reason: IncompleteReason },
|
||||
#[serde(rename = "failed")]
|
||||
Failed { error: Option<FailedError> },
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct FailedError {
|
||||
pub code: String,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum IncompleteReason {
|
||||
Interruption,
|
||||
MaxOutputTokens,
|
||||
ContentFilter,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum ContentPart {
|
||||
#[serde(rename = "text")]
|
||||
Text { text: String },
|
||||
#[serde(rename = "audio")]
|
||||
Audio {
|
||||
audio: Option<String>,
|
||||
transcript: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct RateLimit {
|
||||
pub name: String,
|
||||
pub limit: u32,
|
||||
pub remaining: u32,
|
||||
pub reset_seconds: f32,
|
||||
}
|
Reference in New Issue
Block a user