Addressed comments and added a basic doc

This commit is contained in:
Johnathan Sharratt
2023-10-23 07:27:12 +11:00
parent ec74b83f25
commit 8f11ac78b0
12 changed files with 218 additions and 70 deletions

78
docs/snapshots.md Normal file
View File

@@ -0,0 +1,78 @@
# WASM Snapshot Functionality
Wasmer now supports snapshots of the current running process into a journal
log file which allows for the resumption from an earlier point in time.
# Triggers
Various triggers are possible that will cause a snapshot to be taken at
a specific point in time, these are:
## On Idle
Triggered when all the threads in the process goes idle.
## On Listen
Triggered when a listen syscall is invoked on a socket.
## On Stdin
Triggered when the process reads stdin for the first time
## On Timer
Triggered periodically based on a timer (default 10 seconds) which can be specified using the `snapshot-timer` option
## On Sigint (Ctrl+C)
Issued if the user sends an interrupt signal (Ctrl + C).
## On Sigalrm
Alarm clock signal (used for timers)
(see `man alarm`)
## On Sigtstp
The SIGTSTP signal is sent to a process by its controlling terminal to request it to stop temporarily. It is commonly initiated by the user pressing Ctrl-Z.
# On Sigstop
The SIGSTOP signal instructs the operating system to stop a process for later resumption
# On Non Deterministic Call
When a non-determinstic call is made from WASM
# Limitations
- The WASM process must have had the `asyncify` post processing step applied to the binary.
- Taking a snapshot can consume large amounts of memory while its processing.
- Snapshots are not instant and have overhead when generating.
- The layout of the memory must be known by the runtime in order to take snapshots.
# Design
On startup if the restore snapshot file is specified then the runtime will restore the
state of the WASM process by reading and processing the log entries in the snapshot
journal. This restoration will bring the memory and the thread stacks back to a previous
point in time and then resume all the threads.
When a trigger occurs a new snapshot will be taken of the WASM process which will
take the following steps:
1. Pause all threads
2. Capture the stack of each thread
3. Write the thread state to the journal
4. Write the memory (excluding stacks) to the journal
5. Resume execution.
The implementation is currently able to save and restore the following:
- WASM Memory
- Stack memory
- Call stack
- Open sockets
- Open files
- Terminal text

View File

@@ -160,11 +160,11 @@ default = [
"wat",
"wast",
"compiler",
"snapshooter",
"snapshot",
"wasmer-artifact-create",
"static-artifact-create",
]
snapshooter = ["wasmer-wasix/snapshooter"]
snapshot = ["wasmer-wasix/snapshot"]
backend = []
coredump = ["wasm-coredump-builder"]
sys = ["compiler", "wasmer-vm"]

View File

@@ -109,7 +109,7 @@ pub struct Wasi {
/// Specifies the snapshot file that Wasmer will use to store
/// the state of the WASM process so that it can be later restored
#[cfg(feature = "snapshooter")]
#[cfg(feature = "snapshot")]
#[clap(long = "snapshot-to")]
pub snapshot_to: Option<PathBuf>,
@@ -119,19 +119,20 @@ pub struct Wasi {
/// If not specified, the default is to snapshot on idle plus if a
/// snapshot period is provided it will also default to periodic snapshots
/// as well.
#[cfg(feature = "snapshooter")]
#[cfg(feature = "snapshot")]
#[clap(long = "snapshot-on")]
pub snapshot_on: Vec<SnapshotTrigger>,
/// Time in seconds between taking snapshots of the process and dumping
/// them to the snapshot file.
#[cfg(feature = "snapshooter")]
#[clap(long = "snapshot-period")]
pub snapshot_period: Option<u64>,
/// Adds a timer (measured in seconds) that takes snapshots of the process and dumps the
/// journal of events to the snapshot file. When specifying this parameter it implies
/// that `--snapshot-on timer` has also been specified.
#[cfg(feature = "snapshot")]
#[clap(long = "snapshot-timer")]
pub snapshot_timer: Option<u64>,
/// When specified, the runtime will restore a previous snapshot
/// using the supplied file.
#[cfg(feature = "snapshooter")]
#[cfg(feature = "snapshot")]
#[clap(long = "resume-from")]
pub resume_from: Option<PathBuf>,
@@ -156,8 +157,8 @@ pub enum SnapshotTrigger {
Listen,
/// Triggered when the process reads stdin for the first time
Stdin,
/// Triggered periodically (default 10 seconds) which can be specified using the `snapshot-period` option
Periodic,
/// Triggered periodically based on a timer (default 10 seconds) which can be specified using the `snapshot-timer` option
Timer,
/// Issued if the user sends an interrupt signal (Ctrl + C).
Sigint,
/// Alarm clock signal (used for timers)
@@ -166,6 +167,8 @@ pub enum SnapshotTrigger {
Sigtstp,
/// The SIGSTOP signal instructs the operating system to stop a process for later resumption.
Sigstop,
/// When a non-determinstic call is made
NonDeterministicCall,
}
impl FromStr for SnapshotTrigger {
@@ -177,7 +180,7 @@ impl FromStr for SnapshotTrigger {
"idle" => Self::Idle,
"listen" => Self::Listen,
"stdin" => Self::Stdin,
"periodic" => Self::Periodic,
"periodic" => Self::Timer,
"intr" | "sigint" | "ctrlc" | "ctrl-c" => Self::Sigint,
"alarm" | "timer" | "sigalrm" => Self::Sigalrm,
"sigtstp" | "ctrlz" | "ctrl-z" => Self::Sigtstp,
@@ -338,9 +341,9 @@ impl Wasi {
rt.set_networking_implementation(virtual_net::UnsupportedVirtualNetworking::default());
}
#[cfg(feature = "snapshooter")]
#[cfg(feature = "snapshot")]
if let Some(path) = &self.resume_from {
rt.set_snapshooter(Arc::new(snapshot::LogFileSnapShooter::new_std(path)?));
rt.set_snapshot_capturer(Arc::new(snapshot::LogFileSnapshotCapturer::new_std(path)?));
}
if !self.no_tty {

View File

@@ -17,17 +17,28 @@ cfg-if = "1.0"
thiserror = "1"
tracing = { version = "0.1.37" }
getrandom = "0.2"
wasmer-wasix-types = { path = "../wasi-types", version = "0.15.0", features = [ "enable-serde" ] }
wasmer-wasix-types = { path = "../wasi-types", version = "0.15.0", features = [
"enable-serde",
] }
wasmer-types = { path = "../types", version = "=4.2.2", default-features = false }
wasmer = { path = "../api", version = "=4.2.2", default-features = false, features = ["wat", "js-serializable-module"] }
wasmer = { path = "../api", version = "=4.2.2", default-features = false, features = [
"wat",
"js-serializable-module",
] }
virtual-mio = { path = "../virtual-io", version = "0.3.0", default-features = false }
virtual-fs = { path = "../virtual-fs", version = "0.9.0", default-features = false, features = ["webc-fs"] }
virtual-fs = { path = "../virtual-fs", version = "0.9.0", default-features = false, features = [
"webc-fs",
] }
virtual-net = { path = "../virtual-net", version = "0.6.1", default-features = false }
wasmer-emscripten = { path = "../emscripten", version = "=4.2.2", optional = true }
typetag = { version = "0.1", optional = true }
serde = { version = "1.0", default-features = false, features = ["derive"] }
bincode = { version = "1.3" }
chrono = { version = "^0.4", default-features = false, features = [ "wasmbind", "std", "clock" ], optional = true }
chrono = { version = "^0.4", default-features = false, features = [
"wasmbind",
"std",
"clock",
], optional = true }
derivative = { version = "^2" }
bytes = "1"
webc = { workspace = true }
@@ -38,7 +49,12 @@ sha2 = { version = "0.10" }
waker-fn = { version = "1.1" }
cooked-waker = "^5"
rand = "0.8"
tokio = { version = "1", features = ["sync", "macros", "time", "rt"], default_features = false }
tokio = { version = "1", features = [
"sync",
"macros",
"time",
"rt",
], default_features = false }
futures = { version = "0.3" }
# used by feature='os'
async-trait = { version = "^0.1" }
@@ -51,7 +67,9 @@ hex = { version = "^0.4" }
term_size = { version = "0.3" }
linked_hash_set = { version = "0.1" }
http = "0.2.8"
wai-bindgen-wasmer = { path = "../wai-bindgen-wasmer", version = "0.15.0", features = ["tracing"] }
wai-bindgen-wasmer = { path = "../wai-bindgen-wasmer", version = "0.15.0", features = [
"tracing",
] }
heapless = "0.7.16"
once_cell = "1.17.0"
pin-project = "1.0.12"
@@ -62,7 +80,12 @@ tempfile = "3.6.0"
hyper = { version = "0.14", features = ["server", "stream"], optional = true }
wcgi = { version = "0.1.2", optional = true }
wcgi-host = { version = "0.1.2", optional = true }
tower-http = { version = "0.4.0", features = ["trace", "util", "catch-panic", "cors"], optional = true }
tower-http = { version = "0.4.0", features = [
"trace",
"util",
"catch-panic",
"cors",
], optional = true }
tower = { version = "0.4.13", features = ["make", "util"], optional = true }
url = "2.3.1"
petgraph = "0.6.3"
@@ -70,7 +93,15 @@ rayon = { version = "1.7.0", optional = true }
wasm-bindgen = { version = "0.2.87", optional = true }
js-sys = { version = "0.3.64", optional = true }
wasm-bindgen-futures = { version = "0.4.37", optional = true }
web-sys = { version = "0.3.64", features = ["Request", "RequestInit", "Window", "WorkerGlobalScope", "RequestMode", "Response", "Headers"], optional = true }
web-sys = { version = "0.3.64", features = [
"Request",
"RequestInit",
"Window",
"WorkerGlobalScope",
"RequestMode",
"Response",
"Headers",
], optional = true }
[target.'cfg(not(target_arch = "riscv64"))'.dependencies.reqwest]
version = "0.11"
@@ -94,8 +125,15 @@ termios = { version = "0.3" }
winapi = "0.3"
[dev-dependencies]
wasmer = { path = "../api", version = "=4.2.2", default-features = false, features = ["wat", "js-serializable-module"] }
tokio = { version = "1", features = [ "sync", "macros", "rt" ], default_features = false }
wasmer = { path = "../api", version = "=4.2.2", default-features = false, features = [
"wat",
"js-serializable-module",
] }
tokio = { version = "1", features = [
"sync",
"macros",
"rt",
], default_features = false }
pretty_assertions = "1.3.0"
wasm-bindgen-test = "0.3.0"
@@ -105,7 +143,11 @@ tracing-wasm = "0.2"
[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
tracing-subscriber = { version = "^0.3" }
wasmer = { path = "../api", version = "=4.2.2", default-features = false, features = ["wat", "js-serializable-module", "cranelift"] }
wasmer = { path = "../api", version = "=4.2.2", default-features = false, features = [
"wat",
"js-serializable-module",
"cranelift",
] }
[features]
default = ["sys-default"]
@@ -116,15 +158,32 @@ webc_runner_rt_wcgi = ["hyper", "wcgi", "wcgi-host", "tower", "tower-http"]
webc_runner_rt_emscripten = ["wasmer-emscripten"]
sys = ["webc/mmap", "time", "virtual-mio/sys"]
sys-default = ["sys", "logging", "host-fs", "sys-poll", "sys-thread", "host-vnet", "host-threads", "host-reqwest"]
sys-default = [
"sys",
"logging",
"host-fs",
"sys-poll",
"sys-thread",
"host-vnet",
"host-threads",
"host-reqwest",
]
sys-poll = []
sys-thread = ["tokio/rt", "tokio/time", "tokio/rt-multi-thread", "rayon"]
snapshooter = []
snapshot = []
# Deprecated. Kept it for compatibility
compiler = []
js = ["virtual-fs/no-time", "getrandom/js", "chrono", "js-sys", "wasm-bindgen", "wasm-bindgen-futures", "web-sys"]
js = [
"virtual-fs/no-time",
"getrandom/js",
"chrono",
"js-sys",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]
js-default = ["js"]
test-js = ["js", "wasmer/wat"]
@@ -136,11 +195,17 @@ remote-vnet = ["virtual-net/remote"]
logging = ["tracing/log"]
disable-all-logging = ["tracing/release_max_level_off", "tracing/max_level_off"]
enable-serde = ["typetag", "virtual-fs/enable-serde", "wasmer-wasix-types/enable-serde"]
enable-serde = [
"typetag",
"virtual-fs/enable-serde",
"wasmer-wasix-types/enable-serde",
]
[package.metadata.docs.rs]
features = [
"wasmer/sys", "webc_runner_rt_wcgi",
"webc_runner_rt_emscripten", "sys-default",
"wasmer/sys",
"webc_runner_rt_wcgi",
"webc_runner_rt_emscripten",
"sys-default",
]
rustc-args = ["--cfg", "docsrs"]

View File

@@ -50,7 +50,7 @@ pub mod http;
mod rewind;
pub mod runners;
pub mod runtime;
#[cfg(feature = "snapshooter")]
#[cfg(feature = "snapshot")]
pub mod snapshot;
mod state;
mod syscalls;

View File

@@ -8,7 +8,7 @@ use self::{
module_cache::{CacheError, ModuleHash},
task_manager::InlineWaker,
};
use crate::snapshot::UNSUPPORTED_SNAP_SHOOTER;
use crate::snapshot::UNSUPPORTED_SNAPSHOT_CAPTURER;
use std::{
fmt,
@@ -20,8 +20,8 @@ use futures::future::BoxFuture;
use virtual_net::{DynVirtualNetworking, VirtualNetworking};
use wasmer::Module;
#[cfg(feature = "snapshooter")]
use crate::snapshot::{DynSnapShooter, UnsupportedSnapShooter};
#[cfg(feature = "snapshot")]
use crate::snapshot::{DynSnapshotCapturer, UnsupportedSnapshotCapturer};
use crate::{
http::{DynHttpClient, HttpClient},
os::TtyBridge,
@@ -109,11 +109,11 @@ where
InlineWaker::block_on(self.load_module(wasm))
}
/// The snap shooter takes and restores snapshots of the WASM process at specific
/// The snapshot capturer takes and restores snapshots of the WASM process at specific
/// points in time by reading and writing log entries
#[cfg(feature = "snapshooter")]
fn snap_shooter<'a>(&'a self) -> &'_ DynSnapShooter {
&UNSUPPORTED_SNAP_SHOOTER
#[cfg(feature = "snapshot")]
fn snapshot_capturer<'a>(&'a self) -> &'_ DynSnapshotCapturer {
&UNSUPPORTED_SNAPSHOT_CAPTURER
}
}
@@ -190,9 +190,9 @@ pub struct PluggableRuntime {
pub module_cache: Arc<dyn ModuleCache + Send + Sync>,
#[derivative(Debug = "ignore")]
pub tty: Option<Arc<dyn TtyBridge + Send + Sync>>,
#[cfg(feature = "snapshooter")]
#[cfg(feature = "snapshot")]
#[derivative(Debug = "ignore")]
pub snapshooter: Arc<DynSnapShooter>,
pub snapshot_capturer: Arc<DynSnapshotCapturer>,
}
impl PluggableRuntime {
@@ -227,7 +227,8 @@ impl PluggableRuntime {
source: Arc::new(source),
package_loader: Arc::new(loader),
module_cache: Arc::new(module_cache::in_memory()),
snapshooter: Arc::new(UnsupportedSnapShooter::default()) as Arc<DynSnapShooter>,
snapshot_capturer: Arc::new(UnsupportedSnapshotCapturer::default())
as Arc<DynSnapshotCapturer>,
}
}
@@ -278,8 +279,8 @@ impl PluggableRuntime {
self
}
pub fn set_snapshooter(&mut self, snapshooter: Arc<DynSnapShooter>) -> &mut Self {
self.snapshooter = snapshooter;
pub fn set_snapshot_capturer(&mut self, capturer: Arc<DynSnapshotCapturer>) -> &mut Self {
self.snapshot_capturer = capturer;
self
}
}
@@ -328,8 +329,8 @@ impl Runtime for PluggableRuntime {
self.module_cache.clone()
}
#[cfg(feature = "snapshooter")]
fn snap_shooter<'a>(&'a self) -> &DynSnapShooter {
self.snapshooter.as_ref()
#[cfg(feature = "snapshot")]
fn snapshot_capturer<'a>(&'a self) -> &DynSnapshotCapturer {
self.snapshot_capturer.as_ref()
}
}

View File

@@ -66,11 +66,11 @@ pub enum SnapshotLog<'a> {
Snapshot,
}
/// The snap shooter will take a series of objects that represents the state of
/// The snapshot capturer will take a series of objects that represents the state of
/// a WASM process at a point in time and saves it so that it can be restored.
/// It also allows for the restoration of that state at a later moment
#[allow(unused_variables)]
pub trait SnapShooter {
pub trait SnapshotCapturer {
/// Takes in a stream of snapshot log entries and saves them so that they
/// may be restored at a later moment
fn write<'a>(&'a self, entry: SnapshotLog<'a>) -> BoxFuture<'a, anyhow::Result<()>>;
@@ -80,4 +80,4 @@ pub trait SnapShooter {
fn read<'a>(&'a self) -> BoxFuture<'a, anyhow::Result<Option<SnapshotLog<'a>>>>;
}
pub type DynSnapShooter = dyn SnapShooter + Send + Sync;
pub type DynSnapshotCapturer = dyn SnapshotCapturer + Send + Sync;

View File

@@ -35,7 +35,7 @@ impl SnapshotEffector {
.map_err(mem_error_to_wasi)?;
ctx.data()
.runtime()
.snap_shooter()
.snapshot_capturer()
.write(SnapshotLog::TerminalData {
data: Cow::Borrowed(buf.as_ref()),
})
@@ -57,7 +57,7 @@ impl SnapshotEffector {
wasi_try_ok_ok!(__asyncify_light(env, None, async {
ctx.data()
.runtime()
.snap_shooter()
.snapshot_capturer()
.write(SnapshotLog::SetThread {
id,
call_stack: Cow::Owned(rewind_stack.into()),
@@ -110,7 +110,7 @@ impl SnapshotEffector {
// file in an orderly manner.
wasi_try_ok_ok!(__asyncify_light(env, None, async {
let memory = unsafe { env.memory_view(ctx) };
let shooter = ctx.data().runtime().snap_shooter();
let capturer = ctx.data().runtime().snapshot_capturer();
for region in regions {
// We grab this region of memory as a vector and hash
@@ -120,8 +120,8 @@ impl SnapshotEffector {
.copy_range_to_vec(region.clone())
.map_err(mem_error_to_wasi)?;
// Now we write it to the snap shooter
shooter
// Now we write it to the snap snapshot capturer
capturer
.write(SnapshotLog::UpdateMemoryRegion {
region,
data: data.into(),
@@ -132,7 +132,7 @@ impl SnapshotEffector {
// Finally we mark the end of the snapshot so that
// it can act as a restoration point
shooter
capturer
.write(SnapshotLog::Snapshot)
.await
.map_err(map_snapshot_err)?;

View File

@@ -163,7 +163,7 @@ struct State {
at_end: bool,
}
/// The LogFile snap shooter will write its snapshots to a linear journal
/// The LogFile snapshot capturer will write its snapshots to a linear journal
/// and read them when restoring. It uses the `bincode` serializer which
/// means that forwards and backwards compatibility must be dealt with
/// carefully.
@@ -172,13 +172,13 @@ struct State {
/// then new entries will be added to the end regardless of if
/// its been read.
///
/// The logfile snapshooter uses a 64bit number as a entry encoding
/// The logfile snapshot capturer uses a 64bit number as a entry encoding
/// delimiter.
pub struct LogFileSnapShooter {
pub struct LogFileSnapshotCapturer {
state: tokio::sync::Mutex<State>,
}
impl LogFileSnapShooter {
impl LogFileSnapshotCapturer {
pub async fn new(path: impl AsRef<Path>) -> io::Result<Self> {
let state = State {
file: tokio::fs::File::options()
@@ -211,7 +211,7 @@ impl LogFileSnapShooter {
}
#[async_trait::async_trait]
impl SnapShooter for LogFileSnapShooter {
impl SnapshotCapturer for LogFileSnapshotCapturer {
fn write<'a>(&'a self, entry: SnapshotLog<'a>) -> BoxFuture<'a, anyhow::Result<()>> {
Box::pin(async {
let entry: SnapshotLogEntry = entry.into();

View File

@@ -1,9 +1,9 @@
mod capturer;
mod effector;
mod log_file;
mod shooter;
mod unsupported;
pub use capturer::*;
pub use effector::*;
pub use log_file::*;
pub use shooter::*;
pub use unsupported::*;

View File

@@ -2,14 +2,15 @@ use futures::future::BoxFuture;
use super::*;
pub static UNSUPPORTED_SNAP_SHOOTER: UnsupportedSnapShooter = UnsupportedSnapShooter {};
pub static UNSUPPORTED_SNAPSHOT_CAPTURER: UnsupportedSnapshotCapturer =
UnsupportedSnapshotCapturer {};
/// The default for runtime is to use the unsupported snap-shooter
/// The default for runtime is to use the unsupported snapshot capturer
/// which will fail to snapshot if one attempts to do so.
#[derive(Debug, Default)]
pub struct UnsupportedSnapShooter {}
pub struct UnsupportedSnapshotCapturer {}
impl SnapShooter for UnsupportedSnapShooter {
impl SnapshotCapturer for UnsupportedSnapshotCapturer {
fn write<'a>(&'a self, _entry: SnapshotLog<'a>) -> BoxFuture<'a, anyhow::Result<()>> {
Box::pin(async { Err(anyhow::format_err!("unsupported")) })
}

View File

@@ -106,7 +106,7 @@ pub(crate) fn fd_write_internal<M: MemorySize>(
// If snap-shooting is enabled and this is to stdio then we
// will record a terminal event.
#[cfg(feature = "snapshooter")]
#[cfg(feature = "snapshot")]
if is_stdio && ctx.data().enable_snapshot_feed {
SnapshotEffector::write_terminal_data(&mut ctx, iovs, iovs_len)?;
env = ctx.data();