mirror of
https://github.com/mii443/wasmer.git
synced 2025-12-06 12:48:20 +00:00
Implemented URL downloading & caching
This commit is contained in:
@@ -7,10 +7,12 @@ use std::{
|
||||
net::SocketAddr,
|
||||
path::{Path, PathBuf},
|
||||
str::FromStr,
|
||||
time::{Duration, SystemTime},
|
||||
};
|
||||
|
||||
use anyhow::{Context, Error};
|
||||
use clap::Parser;
|
||||
use sha2::{Digest, Sha256};
|
||||
use tempfile::NamedTempFile;
|
||||
use url::Url;
|
||||
use wasmer::{Function, Imports, Instance, Module, Store, Type, TypedFunction, Value};
|
||||
@@ -19,7 +21,11 @@ use wasmer_registry::Package;
|
||||
use wasmer_wasix::runners::{Runner, WapmContainer};
|
||||
use webc::metadata::Manifest;
|
||||
|
||||
use crate::store::StoreOptions;
|
||||
use crate::{
|
||||
commands::Cache,
|
||||
store::StoreOptions,
|
||||
wasmer_home::{DownloadCached, WasmerHome},
|
||||
};
|
||||
|
||||
/// The `wasmer run` subcommand.
|
||||
#[derive(Debug, Parser)]
|
||||
@@ -325,76 +331,6 @@ fn get_cached_package(pkg: &Package, home: &WasmerHome) -> Result<Vec<u8>, Error
|
||||
todo!();
|
||||
}
|
||||
|
||||
/// Something which can fetch resources from the internet and will cache them
|
||||
/// locally.
|
||||
trait DownloadCached {
|
||||
fn download_package(&self, pkg: &Package) -> Result<PathBuf, Error>;
|
||||
fn download_url(&self, url: &url::Url) -> Result<PathBuf, Error>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct WasmerHome {
|
||||
/// The Wasmer home directory.
|
||||
#[clap(long = "wasmer-dir", env = "WASMER_DIR")]
|
||||
home: Option<PathBuf>,
|
||||
/// Override the registry packages are downloaded from.
|
||||
#[clap(long, env = "WASMER_REGISTRY")]
|
||||
registry: Option<String>,
|
||||
/// Skip all caching.
|
||||
#[clap(long)]
|
||||
no_cache: bool,
|
||||
}
|
||||
|
||||
impl WasmerHome {
|
||||
fn wasmer_home(&self) -> Result<PathBuf, Error> {
|
||||
if let Some(wasmer_home) = &self.home {
|
||||
return Ok(wasmer_home.clone());
|
||||
}
|
||||
|
||||
if let Some(user_home) = dirs::home_dir() {
|
||||
return Ok(user_home.join(".wasmer"));
|
||||
}
|
||||
|
||||
anyhow::bail!("Unable to determine the Wasmer directory");
|
||||
}
|
||||
}
|
||||
|
||||
impl DownloadCached for WasmerHome {
|
||||
fn download_package(&self, pkg: &Package) -> Result<PathBuf, Error> {
|
||||
let home = self.wasmer_home()?;
|
||||
let checkouts = wasmer_registry::get_checkouts_dir(&home);
|
||||
todo!();
|
||||
}
|
||||
|
||||
fn download_url(&self, url: &url::Url) -> Result<PathBuf, Error> {
|
||||
let home = self.wasmer_home()?;
|
||||
let checkouts = wasmer_registry::get_checkouts_dir(&home);
|
||||
let temp = NamedTempFile::new()?;
|
||||
todo!();
|
||||
}
|
||||
}
|
||||
|
||||
impl wasmer_cache::Cache for WasmerHome {
|
||||
type SerializeError = wasmer::SerializeError;
|
||||
type DeserializeError = wasmer::DeserializeError;
|
||||
|
||||
unsafe fn load(
|
||||
&self,
|
||||
engine: &impl wasmer::AsEngineRef,
|
||||
key: wasmer_cache::Hash,
|
||||
) -> Result<wasmer::Module, Self::DeserializeError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn store(
|
||||
&mut self,
|
||||
key: wasmer_cache::Hash,
|
||||
module: &wasmer::Module,
|
||||
) -> Result<(), Self::SerializeError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
/// A file/directory on disk that will be executed.
|
||||
///
|
||||
/// Depending on the type of target and the command-line arguments, this might
|
||||
@@ -561,3 +497,4 @@ impl Default for WcgiOptions {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ pub mod package_source;
|
||||
pub mod store;
|
||||
pub mod suggestions;
|
||||
pub mod utils;
|
||||
pub mod wasmer_home;
|
||||
|
||||
/// Version number of this crate.
|
||||
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||
|
||||
370
lib/cli/src/wasmer_home.rs
Normal file
370
lib/cli/src/wasmer_home.rs
Normal file
@@ -0,0 +1,370 @@
|
||||
#![allow(missing_docs)]
|
||||
|
||||
use std::{
|
||||
io::Write,
|
||||
path::{Path, PathBuf},
|
||||
time::{Duration, SystemTime},
|
||||
};
|
||||
|
||||
use anyhow::{Context, Error};
|
||||
use reqwest::{blocking::Client, Url};
|
||||
use tempfile::NamedTempFile;
|
||||
use wasmer::{AsEngineRef, DeserializeError, Module, SerializeError};
|
||||
use wasmer_cache::Hash;
|
||||
use wasmer_registry::Package;
|
||||
|
||||
/// Something which can fetch resources from the internet and will cache them
|
||||
/// locally.
|
||||
pub trait DownloadCached {
|
||||
fn download_url(&self, url: &Url) -> Result<PathBuf, Error>;
|
||||
fn download_package(&self, pkg: &Package) -> Result<PathBuf, Error>;
|
||||
}
|
||||
|
||||
#[derive(Debug, clap::Parser)]
|
||||
pub struct WasmerHome {
|
||||
/// The Wasmer home directory.
|
||||
#[clap(long = "wasmer-dir", env = "WASMER_DIR")]
|
||||
pub home: Option<PathBuf>,
|
||||
/// Override the registry packages are downloaded from.
|
||||
#[clap(long, env = "WASMER_REGISTRY")]
|
||||
registry: Option<String>,
|
||||
/// Skip all caching.
|
||||
#[clap(long)]
|
||||
pub no_cache: bool,
|
||||
}
|
||||
|
||||
impl WasmerHome {
|
||||
pub fn wasmer_home(&self) -> Result<PathBuf, Error> {
|
||||
if let Some(wasmer_home) = &self.home {
|
||||
return Ok(wasmer_home.clone());
|
||||
}
|
||||
|
||||
if let Some(user_home) = dirs::home_dir() {
|
||||
return Ok(user_home.join(".wasmer"));
|
||||
}
|
||||
|
||||
anyhow::bail!("Unable to determine the Wasmer directory");
|
||||
}
|
||||
}
|
||||
|
||||
impl DownloadCached for WasmerHome {
|
||||
#[tracing::instrument(skip_all)]
|
||||
fn download_url(&self, url: &Url) -> Result<PathBuf, Error> {
|
||||
tracing::debug!(%url, "Downloading");
|
||||
|
||||
let home = self.wasmer_home()?;
|
||||
let checkouts = wasmer_registry::get_checkouts_dir(&home);
|
||||
|
||||
// This function is a bit tricky because we go to great lengths to avoid
|
||||
// unnecessary downloads.
|
||||
|
||||
let cache_key = Hash::generate(url.to_string().as_bytes());
|
||||
|
||||
// First, we figure out some basic information about the item
|
||||
let cache_info = CacheInfo::for_url(&cache_key, &checkouts, self.no_cache);
|
||||
|
||||
// Next we check if we definitely got a cache hit
|
||||
let state = match classify_cache_using_mtime(cache_info) {
|
||||
Ok(path) => {
|
||||
tracing::debug!(path=%path.display(), "Cache hit");
|
||||
return Ok(path);
|
||||
}
|
||||
Err(s) => s,
|
||||
};
|
||||
|
||||
// Okay, looks like we're going to have to download the item
|
||||
tracing::debug!(%url, "Sending a GET request");
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
let request = client.get(url.clone()).header("Accept", "application/webc");
|
||||
|
||||
let mut response = match request.send() {
|
||||
Ok(r) => r
|
||||
.error_for_status()
|
||||
.with_context(|| format!("The GET request to \"{url}\" was unsuccessful"))?,
|
||||
Err(e) => {
|
||||
// Something went wrong. If it was a connection issue and we've
|
||||
// got a cached file, let's use that and emit a warning.
|
||||
if e.is_connect() {
|
||||
if let Some(path) = state.take_path() {
|
||||
tracing::warn!(
|
||||
path=%path.display(),
|
||||
error=&e as &dyn std::error::Error,
|
||||
"An error occurred while connecting to the resource. Falling back to a cached version."
|
||||
);
|
||||
return Ok(path);
|
||||
}
|
||||
}
|
||||
|
||||
// Oh well, we tried.
|
||||
let msg = format!("Unable to send a GET request to \"{url}\"");
|
||||
return Err(Error::from(e).context(msg));
|
||||
}
|
||||
};
|
||||
|
||||
tracing::debug!(
|
||||
status_code=%response.status(),
|
||||
url=%response.url(),
|
||||
content_length=response.content_length(),
|
||||
"Download started",
|
||||
);
|
||||
tracing::trace!(headers=?response.headers());
|
||||
|
||||
// Now there is one last chance to avoid downloading the full file. If
|
||||
// it has an ETag header, we can use that to see whether the (possibly)
|
||||
// cached file is outdated.
|
||||
let etag = response
|
||||
.headers()
|
||||
.get("Etag")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(|etag| etag.trim().to_string());
|
||||
|
||||
if let Some(cached) = state.use_etag_to_resolve_cached_file(etag.as_deref()) {
|
||||
tracing::debug!(
|
||||
path=%cached.display(),
|
||||
"Reusing the cached file because the ETag header is still valid",
|
||||
);
|
||||
return Ok(cached);
|
||||
}
|
||||
|
||||
// Note: we want to copy directly into a file so we don't hold
|
||||
// everything in memory.
|
||||
let (mut f, path) = if self.no_cache {
|
||||
// Leave the temporary file where it is. The OS will clean it up
|
||||
// for us later, and hopefully the caller will open it before the
|
||||
// temp file cleaner comes along.
|
||||
let temp = NamedTempFile::new().context("Unable to create a temporary file")?;
|
||||
temp.keep()
|
||||
.context("Unable to persist the temporary file")?
|
||||
} else {
|
||||
let cached_path = checkouts.join(cache_key.to_string());
|
||||
let f = std::fs::File::create(&cached_path).with_context(|| {
|
||||
format!("Unable to open \"{}\" for writing", cached_path.display())
|
||||
})?;
|
||||
|
||||
(f, cached_path)
|
||||
};
|
||||
|
||||
std::io::copy(&mut response, &mut f)
|
||||
.and_then(|_| f.flush())
|
||||
.with_context(|| format!("Unable to save the response to \"{}\"", path.display()))?;
|
||||
|
||||
if !self.no_cache {
|
||||
if let Some(etag) = etag {
|
||||
let etag_path = path.with_extension("etag");
|
||||
tracing::debug!(
|
||||
path=%etag_path.display(),
|
||||
%etag,
|
||||
"Saving the ETag to disk",
|
||||
);
|
||||
|
||||
if let Err(e) = std::fs::write(&etag_path, etag.as_bytes()) {
|
||||
tracing::warn!(
|
||||
error=&e as &dyn std::error::Error,
|
||||
path=%etag_path.display(),
|
||||
%etag,
|
||||
"Unable to save the ETag to disk",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(path)
|
||||
}
|
||||
|
||||
fn download_package(&self, pkg: &Package) -> Result<PathBuf, Error> {
|
||||
const DEFAULT_REGISTRY: &str = "https://wapm.io/";
|
||||
let registry = self.registry.as_deref().unwrap_or(DEFAULT_REGISTRY);
|
||||
let url = package_url(registry, pkg)?;
|
||||
|
||||
self.download_url(&url)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
enum CacheInfo {
|
||||
/// Caching has been disabled.
|
||||
Disabled,
|
||||
/// An item isn't in the cache, but could be cached later on.
|
||||
Miss,
|
||||
/// An item in the cache.
|
||||
Hit {
|
||||
path: PathBuf,
|
||||
etag: Option<String>,
|
||||
last_modified: Option<SystemTime>,
|
||||
},
|
||||
}
|
||||
|
||||
impl CacheInfo {
|
||||
fn for_url(key: &Hash, checkout_dir: &Path, disabled: bool) -> CacheInfo {
|
||||
if disabled {
|
||||
return CacheInfo::Disabled;
|
||||
}
|
||||
|
||||
let path = checkout_dir.join(key.to_string());
|
||||
|
||||
if !path.exists() {
|
||||
return CacheInfo::Miss;
|
||||
}
|
||||
|
||||
let etag = std::fs::read_to_string(path.with_extension("etag")).ok();
|
||||
let last_modified = path.metadata().and_then(|m| m.modified()).ok();
|
||||
|
||||
CacheInfo::Hit {
|
||||
etag,
|
||||
last_modified,
|
||||
path,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn classify_cache_using_mtime(info: CacheInfo) -> Result<PathBuf, CacheState> {
|
||||
const CACHE_INVALIDATION_THRESHOLD: Duration = Duration::from_secs(5 * 60);
|
||||
|
||||
let (path, last_modified, etag) = match info {
|
||||
CacheInfo::Hit {
|
||||
path,
|
||||
last_modified: Some(last_modified),
|
||||
etag,
|
||||
..
|
||||
} => (path, last_modified, etag),
|
||||
CacheInfo::Hit {
|
||||
path,
|
||||
last_modified: None,
|
||||
etag: Some(etag),
|
||||
..
|
||||
} => return Err(CacheState::PossiblyDirty { etag, path }),
|
||||
CacheInfo::Hit {
|
||||
etag: None,
|
||||
last_modified: None,
|
||||
path,
|
||||
..
|
||||
} => {
|
||||
return Err(CacheState::UnableToVerify { path });
|
||||
}
|
||||
CacheInfo::Disabled | CacheInfo::Miss { .. } => return Err(CacheState::Miss),
|
||||
};
|
||||
|
||||
if let Ok(time_since_last_modified) = last_modified.elapsed() {
|
||||
if time_since_last_modified <= CACHE_INVALIDATION_THRESHOLD {
|
||||
return Ok(path);
|
||||
}
|
||||
}
|
||||
|
||||
match etag {
|
||||
Some(etag) => Err(CacheState::PossiblyDirty { etag, path }),
|
||||
None => Err(CacheState::UnableToVerify { path }),
|
||||
}
|
||||
}
|
||||
|
||||
/// Classification of how valid an item is based on filesystem metadata.
|
||||
#[derive(Debug)]
|
||||
enum CacheState {
|
||||
/// The item isn't in the cache.
|
||||
Miss,
|
||||
/// The cached item might be invalid, but it has an ETag we can use for
|
||||
/// further validation.
|
||||
PossiblyDirty { etag: String, path: PathBuf },
|
||||
/// The cached item exists on disk, but we weren't able to tell whether it is still
|
||||
/// valid, and there aren't any other ways to validate it further. You can
|
||||
/// probably reuse this if you are having internet issues.
|
||||
UnableToVerify { path: PathBuf },
|
||||
}
|
||||
|
||||
impl CacheState {
|
||||
fn take_path(self) -> Option<PathBuf> {
|
||||
match self {
|
||||
CacheState::PossiblyDirty { path, .. } | CacheState::UnableToVerify { path } => {
|
||||
Some(path)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn use_etag_to_resolve_cached_file(self, new_etag: Option<&str>) -> Option<PathBuf> {
|
||||
match (new_etag, self) {
|
||||
(
|
||||
Some(new_etag),
|
||||
CacheState::PossiblyDirty {
|
||||
etag: cached_etag,
|
||||
path,
|
||||
},
|
||||
) if cached_etag == new_etag => Some(path),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn package_url(registry: &str, pkg: &Package) -> Result<Url, Error> {
|
||||
let registry: Url = registry
|
||||
.parse()
|
||||
.with_context(|| format!("Unable to parse \"{registry}\" as a URL"))?;
|
||||
|
||||
let Package {
|
||||
name,
|
||||
namespace,
|
||||
version,
|
||||
} = pkg;
|
||||
|
||||
let mut path = format!("{namespace}/{name}");
|
||||
if let Some(version) = version {
|
||||
path.push('@');
|
||||
path.push_str(version);
|
||||
}
|
||||
|
||||
let url = registry
|
||||
.join(&path)
|
||||
.context("Unable to construct the package URL")?;
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
impl wasmer_cache::Cache for WasmerHome {
|
||||
type SerializeError = SerializeError;
|
||||
type DeserializeError = DeserializeError;
|
||||
|
||||
unsafe fn load(
|
||||
&self,
|
||||
_engine: &impl AsEngineRef,
|
||||
_key: Hash,
|
||||
) -> Result<Module, Self::DeserializeError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn store(&mut self, _key: Hash, _module: &Module) -> Result<(), Self::SerializeError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn construct_package_urls() {
|
||||
let inputs = [
|
||||
(
|
||||
"https://wapm.io/",
|
||||
"syrusakbary/python",
|
||||
"https://wapm.io/syrusakbary/python",
|
||||
),
|
||||
(
|
||||
"https://wapm.dev",
|
||||
"syrusakbary/python@1.2.3",
|
||||
"https://wapm.dev/syrusakbary/python@1.2.3",
|
||||
),
|
||||
(
|
||||
"https://localhost:8000/path/to/nested/dir/",
|
||||
"syrusakbary/python",
|
||||
"https://localhost:8000/path/to/nested/dir/syrusakbary/python",
|
||||
),
|
||||
];
|
||||
|
||||
for (registry, package, expected) in inputs {
|
||||
let package: Package = package.parse().unwrap();
|
||||
|
||||
let got = package_url(registry, &package).unwrap();
|
||||
assert_eq!(got.to_string(), expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -186,32 +186,32 @@ mod remote_webc {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn quickjs_as_package_name() {
|
||||
let assert = Command::new(wasmer_executable())
|
||||
.arg("run2")
|
||||
.arg("saghul/quickjs")
|
||||
.arg("--entrypoint=quickjs")
|
||||
.arg("--registry=https://wapm.io/")
|
||||
.arg("--")
|
||||
.arg("--eval")
|
||||
.arg("console.log('Hello, World!')")
|
||||
.assert();
|
||||
|
||||
assert.success().stdout("Hello, World!");
|
||||
assert.success().stdout(contains("Hello, World!"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn quickjs_as_url() {
|
||||
let assert = Command::new(wasmer_executable())
|
||||
.arg("run2")
|
||||
.arg("https://wapm.io/saghul/quickjs")
|
||||
.arg("--entrypoint=quickjs")
|
||||
.arg("--")
|
||||
.arg("--eval")
|
||||
.arg("console.log('Hello, World!')")
|
||||
.assert();
|
||||
|
||||
assert.success().stdout("Hello, World!");
|
||||
assert.success().stdout(contains("Hello, World!"));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user