diff --git a/lib/cli/src/commands/run2.rs b/lib/cli/src/commands/run2.rs index 414046c85..57c320963 100644 --- a/lib/cli/src/commands/run2.rs +++ b/lib/cli/src/commands/run2.rs @@ -7,10 +7,12 @@ use std::{ net::SocketAddr, path::{Path, PathBuf}, str::FromStr, + time::{Duration, SystemTime}, }; use anyhow::{Context, Error}; use clap::Parser; +use sha2::{Digest, Sha256}; use tempfile::NamedTempFile; use url::Url; use wasmer::{Function, Imports, Instance, Module, Store, Type, TypedFunction, Value}; @@ -19,7 +21,11 @@ use wasmer_registry::Package; use wasmer_wasix::runners::{Runner, WapmContainer}; use webc::metadata::Manifest; -use crate::store::StoreOptions; +use crate::{ + commands::Cache, + store::StoreOptions, + wasmer_home::{DownloadCached, WasmerHome}, +}; /// The `wasmer run` subcommand. #[derive(Debug, Parser)] @@ -325,76 +331,6 @@ fn get_cached_package(pkg: &Package, home: &WasmerHome) -> Result, Error todo!(); } -/// Something which can fetch resources from the internet and will cache them -/// locally. -trait DownloadCached { - fn download_package(&self, pkg: &Package) -> Result; - fn download_url(&self, url: &url::Url) -> Result; -} - -#[derive(Debug, Parser)] -struct WasmerHome { - /// The Wasmer home directory. - #[clap(long = "wasmer-dir", env = "WASMER_DIR")] - home: Option, - /// Override the registry packages are downloaded from. - #[clap(long, env = "WASMER_REGISTRY")] - registry: Option, - /// Skip all caching. - #[clap(long)] - no_cache: bool, -} - -impl WasmerHome { - fn wasmer_home(&self) -> Result { - if let Some(wasmer_home) = &self.home { - return Ok(wasmer_home.clone()); - } - - if let Some(user_home) = dirs::home_dir() { - return Ok(user_home.join(".wasmer")); - } - - anyhow::bail!("Unable to determine the Wasmer directory"); - } -} - -impl DownloadCached for WasmerHome { - fn download_package(&self, pkg: &Package) -> Result { - let home = self.wasmer_home()?; - let checkouts = wasmer_registry::get_checkouts_dir(&home); - todo!(); - } - - fn download_url(&self, url: &url::Url) -> Result { - let home = self.wasmer_home()?; - let checkouts = wasmer_registry::get_checkouts_dir(&home); - let temp = NamedTempFile::new()?; - todo!(); - } -} - -impl wasmer_cache::Cache for WasmerHome { - type SerializeError = wasmer::SerializeError; - type DeserializeError = wasmer::DeserializeError; - - unsafe fn load( - &self, - engine: &impl wasmer::AsEngineRef, - key: wasmer_cache::Hash, - ) -> Result { - todo!() - } - - fn store( - &mut self, - key: wasmer_cache::Hash, - module: &wasmer::Module, - ) -> Result<(), Self::SerializeError> { - todo!() - } -} - /// A file/directory on disk that will be executed. /// /// Depending on the type of target and the command-line arguments, this might @@ -561,3 +497,4 @@ impl Default for WcgiOptions { } } } + diff --git a/lib/cli/src/lib.rs b/lib/cli/src/lib.rs index 5e9d4597a..12c0f8f95 100644 --- a/lib/cli/src/lib.rs +++ b/lib/cli/src/lib.rs @@ -27,6 +27,7 @@ pub mod package_source; pub mod store; pub mod suggestions; pub mod utils; +pub mod wasmer_home; /// Version number of this crate. pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/lib/cli/src/wasmer_home.rs b/lib/cli/src/wasmer_home.rs new file mode 100644 index 000000000..0086df420 --- /dev/null +++ b/lib/cli/src/wasmer_home.rs @@ -0,0 +1,370 @@ +#![allow(missing_docs)] + +use std::{ + io::Write, + path::{Path, PathBuf}, + time::{Duration, SystemTime}, +}; + +use anyhow::{Context, Error}; +use reqwest::{blocking::Client, Url}; +use tempfile::NamedTempFile; +use wasmer::{AsEngineRef, DeserializeError, Module, SerializeError}; +use wasmer_cache::Hash; +use wasmer_registry::Package; + +/// Something which can fetch resources from the internet and will cache them +/// locally. +pub trait DownloadCached { + fn download_url(&self, url: &Url) -> Result; + fn download_package(&self, pkg: &Package) -> Result; +} + +#[derive(Debug, clap::Parser)] +pub struct WasmerHome { + /// The Wasmer home directory. + #[clap(long = "wasmer-dir", env = "WASMER_DIR")] + pub home: Option, + /// Override the registry packages are downloaded from. + #[clap(long, env = "WASMER_REGISTRY")] + registry: Option, + /// Skip all caching. + #[clap(long)] + pub no_cache: bool, +} + +impl WasmerHome { + pub fn wasmer_home(&self) -> Result { + if let Some(wasmer_home) = &self.home { + return Ok(wasmer_home.clone()); + } + + if let Some(user_home) = dirs::home_dir() { + return Ok(user_home.join(".wasmer")); + } + + anyhow::bail!("Unable to determine the Wasmer directory"); + } +} + +impl DownloadCached for WasmerHome { + #[tracing::instrument(skip_all)] + fn download_url(&self, url: &Url) -> Result { + tracing::debug!(%url, "Downloading"); + + let home = self.wasmer_home()?; + let checkouts = wasmer_registry::get_checkouts_dir(&home); + + // This function is a bit tricky because we go to great lengths to avoid + // unnecessary downloads. + + let cache_key = Hash::generate(url.to_string().as_bytes()); + + // First, we figure out some basic information about the item + let cache_info = CacheInfo::for_url(&cache_key, &checkouts, self.no_cache); + + // Next we check if we definitely got a cache hit + let state = match classify_cache_using_mtime(cache_info) { + Ok(path) => { + tracing::debug!(path=%path.display(), "Cache hit"); + return Ok(path); + } + Err(s) => s, + }; + + // Okay, looks like we're going to have to download the item + tracing::debug!(%url, "Sending a GET request"); + + let client = Client::new(); + + let request = client.get(url.clone()).header("Accept", "application/webc"); + + let mut response = match request.send() { + Ok(r) => r + .error_for_status() + .with_context(|| format!("The GET request to \"{url}\" was unsuccessful"))?, + Err(e) => { + // Something went wrong. If it was a connection issue and we've + // got a cached file, let's use that and emit a warning. + if e.is_connect() { + if let Some(path) = state.take_path() { + tracing::warn!( + path=%path.display(), + error=&e as &dyn std::error::Error, + "An error occurred while connecting to the resource. Falling back to a cached version." + ); + return Ok(path); + } + } + + // Oh well, we tried. + let msg = format!("Unable to send a GET request to \"{url}\""); + return Err(Error::from(e).context(msg)); + } + }; + + tracing::debug!( + status_code=%response.status(), + url=%response.url(), + content_length=response.content_length(), + "Download started", + ); + tracing::trace!(headers=?response.headers()); + + // Now there is one last chance to avoid downloading the full file. If + // it has an ETag header, we can use that to see whether the (possibly) + // cached file is outdated. + let etag = response + .headers() + .get("Etag") + .and_then(|v| v.to_str().ok()) + .map(|etag| etag.trim().to_string()); + + if let Some(cached) = state.use_etag_to_resolve_cached_file(etag.as_deref()) { + tracing::debug!( + path=%cached.display(), + "Reusing the cached file because the ETag header is still valid", + ); + return Ok(cached); + } + + // Note: we want to copy directly into a file so we don't hold + // everything in memory. + let (mut f, path) = if self.no_cache { + // Leave the temporary file where it is. The OS will clean it up + // for us later, and hopefully the caller will open it before the + // temp file cleaner comes along. + let temp = NamedTempFile::new().context("Unable to create a temporary file")?; + temp.keep() + .context("Unable to persist the temporary file")? + } else { + let cached_path = checkouts.join(cache_key.to_string()); + let f = std::fs::File::create(&cached_path).with_context(|| { + format!("Unable to open \"{}\" for writing", cached_path.display()) + })?; + + (f, cached_path) + }; + + std::io::copy(&mut response, &mut f) + .and_then(|_| f.flush()) + .with_context(|| format!("Unable to save the response to \"{}\"", path.display()))?; + + if !self.no_cache { + if let Some(etag) = etag { + let etag_path = path.with_extension("etag"); + tracing::debug!( + path=%etag_path.display(), + %etag, + "Saving the ETag to disk", + ); + + if let Err(e) = std::fs::write(&etag_path, etag.as_bytes()) { + tracing::warn!( + error=&e as &dyn std::error::Error, + path=%etag_path.display(), + %etag, + "Unable to save the ETag to disk", + ); + } + } + } + + Ok(path) + } + + fn download_package(&self, pkg: &Package) -> Result { + const DEFAULT_REGISTRY: &str = "https://wapm.io/"; + let registry = self.registry.as_deref().unwrap_or(DEFAULT_REGISTRY); + let url = package_url(registry, pkg)?; + + self.download_url(&url) + } +} + +#[derive(Debug, Clone, PartialEq)] +enum CacheInfo { + /// Caching has been disabled. + Disabled, + /// An item isn't in the cache, but could be cached later on. + Miss, + /// An item in the cache. + Hit { + path: PathBuf, + etag: Option, + last_modified: Option, + }, +} + +impl CacheInfo { + fn for_url(key: &Hash, checkout_dir: &Path, disabled: bool) -> CacheInfo { + if disabled { + return CacheInfo::Disabled; + } + + let path = checkout_dir.join(key.to_string()); + + if !path.exists() { + return CacheInfo::Miss; + } + + let etag = std::fs::read_to_string(path.with_extension("etag")).ok(); + let last_modified = path.metadata().and_then(|m| m.modified()).ok(); + + CacheInfo::Hit { + etag, + last_modified, + path, + } + } +} + +fn classify_cache_using_mtime(info: CacheInfo) -> Result { + const CACHE_INVALIDATION_THRESHOLD: Duration = Duration::from_secs(5 * 60); + + let (path, last_modified, etag) = match info { + CacheInfo::Hit { + path, + last_modified: Some(last_modified), + etag, + .. + } => (path, last_modified, etag), + CacheInfo::Hit { + path, + last_modified: None, + etag: Some(etag), + .. + } => return Err(CacheState::PossiblyDirty { etag, path }), + CacheInfo::Hit { + etag: None, + last_modified: None, + path, + .. + } => { + return Err(CacheState::UnableToVerify { path }); + } + CacheInfo::Disabled | CacheInfo::Miss { .. } => return Err(CacheState::Miss), + }; + + if let Ok(time_since_last_modified) = last_modified.elapsed() { + if time_since_last_modified <= CACHE_INVALIDATION_THRESHOLD { + return Ok(path); + } + } + + match etag { + Some(etag) => Err(CacheState::PossiblyDirty { etag, path }), + None => Err(CacheState::UnableToVerify { path }), + } +} + +/// Classification of how valid an item is based on filesystem metadata. +#[derive(Debug)] +enum CacheState { + /// The item isn't in the cache. + Miss, + /// The cached item might be invalid, but it has an ETag we can use for + /// further validation. + PossiblyDirty { etag: String, path: PathBuf }, + /// The cached item exists on disk, but we weren't able to tell whether it is still + /// valid, and there aren't any other ways to validate it further. You can + /// probably reuse this if you are having internet issues. + UnableToVerify { path: PathBuf }, +} + +impl CacheState { + fn take_path(self) -> Option { + match self { + CacheState::PossiblyDirty { path, .. } | CacheState::UnableToVerify { path } => { + Some(path) + } + _ => None, + } + } + + fn use_etag_to_resolve_cached_file(self, new_etag: Option<&str>) -> Option { + match (new_etag, self) { + ( + Some(new_etag), + CacheState::PossiblyDirty { + etag: cached_etag, + path, + }, + ) if cached_etag == new_etag => Some(path), + _ => None, + } + } +} + +fn package_url(registry: &str, pkg: &Package) -> Result { + let registry: Url = registry + .parse() + .with_context(|| format!("Unable to parse \"{registry}\" as a URL"))?; + + let Package { + name, + namespace, + version, + } = pkg; + + let mut path = format!("{namespace}/{name}"); + if let Some(version) = version { + path.push('@'); + path.push_str(version); + } + + let url = registry + .join(&path) + .context("Unable to construct the package URL")?; + Ok(url) +} + +impl wasmer_cache::Cache for WasmerHome { + type SerializeError = SerializeError; + type DeserializeError = DeserializeError; + + unsafe fn load( + &self, + _engine: &impl AsEngineRef, + _key: Hash, + ) -> Result { + todo!() + } + + fn store(&mut self, _key: Hash, _module: &Module) -> Result<(), Self::SerializeError> { + todo!() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn construct_package_urls() { + let inputs = [ + ( + "https://wapm.io/", + "syrusakbary/python", + "https://wapm.io/syrusakbary/python", + ), + ( + "https://wapm.dev", + "syrusakbary/python@1.2.3", + "https://wapm.dev/syrusakbary/python@1.2.3", + ), + ( + "https://localhost:8000/path/to/nested/dir/", + "syrusakbary/python", + "https://localhost:8000/path/to/nested/dir/syrusakbary/python", + ), + ]; + + for (registry, package, expected) in inputs { + let package: Package = package.parse().unwrap(); + + let got = package_url(registry, &package).unwrap(); + assert_eq!(got.to_string(), expected); + } + } +} diff --git a/tests/integration/cli/tests/run2.rs b/tests/integration/cli/tests/run2.rs index 3a6328899..85a008ad4 100644 --- a/tests/integration/cli/tests/run2.rs +++ b/tests/integration/cli/tests/run2.rs @@ -186,32 +186,32 @@ mod remote_webc { use super::*; #[test] - #[ignore] fn quickjs_as_package_name() { let assert = Command::new(wasmer_executable()) .arg("run2") .arg("saghul/quickjs") + .arg("--entrypoint=quickjs") .arg("--registry=https://wapm.io/") .arg("--") .arg("--eval") .arg("console.log('Hello, World!')") .assert(); - assert.success().stdout("Hello, World!"); + assert.success().stdout(contains("Hello, World!")); } #[test] - #[ignore] fn quickjs_as_url() { let assert = Command::new(wasmer_executable()) .arg("run2") .arg("https://wapm.io/saghul/quickjs") + .arg("--entrypoint=quickjs") .arg("--") .arg("--eval") .arg("console.log('Hello, World!')") .assert(); - assert.success().stdout("Hello, World!"); + assert.success().stdout(contains("Hello, World!")); } }