Add prefix mapping to create-exe and create-obj

This commit is contained in:
Felix Schütt
2022-12-27 04:56:17 +01:00
parent b06c04b6d6
commit 62bdc08326
5 changed files with 259 additions and 62 deletions

View File

@@ -5,6 +5,7 @@ use crate::store::CompilerOptions;
use anyhow::{Context, Result};
use clap::Parser;
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::env;
use std::path::{Path, PathBuf};
use std::process::Command;
@@ -16,7 +17,6 @@ use webc::{ParseOptions, WebCMmap};
/// The `prefixer` returns the a String to prefix each of the
/// functions in the static object generated by the
/// so we can assure no collisions.
#[cfg(feature = "static-artifact-create")]
pub type PrefixerFn = Box<dyn Fn(&[u8]) -> String + Send>;
#[derive(Debug, Parser)]
@@ -35,6 +35,20 @@ pub struct CreateExe {
#[clap(long, name = "DEBUG PATH", parse(from_os_str))]
debug_dir: Option<PathBuf>,
/// Prefix for every input file, e.g. "wat2wasm:sha256abc123" would
/// prefix every function in the wat2wasm input object with the "sha256abc123" hash
///
/// If only a single value is given without containing a ":", this value is used for
/// all input files. If no value is given, the prefix is always equal to
/// the sha256 of the input .wasm file
#[clap(
long,
use_value_delimiter = true,
value_delimiter = ',',
name = "FILE:PATH:PREFIX"
)]
precompiled_atom: Vec<String>,
/// Compilation Target triple
///
/// Accepted target triple values must follow the
@@ -50,8 +64,12 @@ pub struct CreateExe {
#[clap(long = "target")]
target_triple: Option<Triple>,
/// Optional object format (serialized | symbols)
#[clap(long)]
/// Object format options
///
/// This flag accepts two options: `symbols` or `serialized`.
/// - (default) `symbols` creates an object where all functions and metadata of the module are regular object symbols
/// - `serialized` creates an object where the module is zero-copy serialized as raw data
#[clap(long = "object-format", name = "OBJECT_FORMAT", verbatim_doc_comment)]
object_format: Option<ObjectFormat>,
#[clap(long, short = 'm', multiple = true, number_of_values = 1)]
@@ -151,6 +169,7 @@ impl CreateExe {
&cross_compilation,
&self.libraries,
true,
&self.precompiled_atom,
)?;
} else if let Ok(pirita) = WebCMmap::parse(input_path.clone(), &ParseOptions::default()) {
// pirita file
@@ -167,6 +186,7 @@ impl CreateExe {
&self.cpu_features,
&cross_compilation.target,
self.object_format.unwrap_or_default(),
&self.precompiled_atom,
)?;
link_exe_from_dir(
&tempdir,
@@ -174,6 +194,7 @@ impl CreateExe {
&cross_compilation,
&self.libraries,
self.debug_dir.is_some(),
&self.precompiled_atom,
)?;
} else {
// wasm file
@@ -190,6 +211,7 @@ impl CreateExe {
&cross_compilation.target,
&self.cpu_features,
self.object_format.unwrap_or_default(),
&self.precompiled_atom,
)?;
link_exe_from_dir(
&tempdir,
@@ -197,6 +219,7 @@ impl CreateExe {
&cross_compilation,
&self.libraries,
self.debug_dir.is_some(),
&self.precompiled_atom,
)?;
}
@@ -225,6 +248,7 @@ pub(super) fn compile_pirita_into_directory(
cpu_features: &[CpuFeature],
triple: &Triple,
object_format: ObjectFormat,
prefixes: &[String],
) -> anyhow::Result<()> {
std::fs::create_dir_all(target_dir)
.map_err(|e| anyhow::anyhow!("cannot create / dir in {}: {e}", target_dir.display()))?;
@@ -282,13 +306,14 @@ pub(super) fn compile_pirita_into_directory(
));
}
let prefix_map = PrefixMapCompilation::from_input(&atoms_from_file, prefixes, false)?;
conpile_atoms(
&atoms_from_file,
&target_dir.join("atoms"),
&target_dir.join("include"),
compiler,
target,
object_format,
&prefix_map,
)?;
// target_dir
@@ -333,67 +358,188 @@ pub(super) fn compile_pirita_into_directory(
Ok(())
}
/// Prefix map used during compilation of object files
#[derive(Debug, Default)]
struct PrefixMapCompilation {
/// Sha256 hashes for the input files
input_hashes: BTreeMap<String, String>,
/// Manual prefixes for input files (file:prefix)
manual_prefixes: BTreeMap<String, String>,
/// Cached compilation objects for files on disk
#[allow(dead_code)]
compilation_objects: BTreeMap<String, Vec<u8>>,
}
impl PrefixMapCompilation {
/// Sets up the prefix map from a collection like "sha123123" or "wasmfile:sha123123" or "wasmfile:/tmp/filepath/:sha123123"
fn from_input(
atoms: &[(String, Vec<u8>)],
prefixes: &[String],
compilation_object_mode: bool,
) -> Result<Self, anyhow::Error> {
if atoms.is_empty() {
return Ok(Self::default());
}
if prefixes.is_empty() {
return Ok(Self {
input_hashes: atoms
.iter()
.map(|(name, bytes)| (name.clone(), Self::hash_for_bytes(bytes)))
.collect(),
manual_prefixes: BTreeMap::new(),
compilation_objects: BTreeMap::new(),
});
}
if prefixes.len() != atoms.len() {
return Err(anyhow::anyhow!(
"invalid mapping of prefix and atoms: expected prefixes for {} atoms, got {} prefixes",
atoms.len(), prefixes.len()
));
}
// Only on single atom mapping is using a raw input allowed, all other prefix
// have to carry something like "nameofatom:sha256hash" instead of just "sha256hash"
if atoms.len() == 1 && !compilation_object_mode {
let prefix = &prefixes[0];
let (atom_name, _atom_bytes) = &atoms[0];
let atom_prefix = format!("{atom_name}:");
if prefix.contains(':') && !prefix.contains(&atom_prefix) {
return Err(anyhow::anyhow!("invalid prefix in prefix {prefix}"));
}
let prefix_without_atom_name = prefix.replacen(&atom_prefix, "", 1);
if !prefix_without_atom_name
.chars()
.all(|c| c.is_alphanumeric() || c == '_' || c == '-')
{
return Err(anyhow::anyhow!("invalid prefix {prefix}"));
}
return Ok(Self {
input_hashes: BTreeMap::new(),
manual_prefixes: IntoIterator::into_iter([(
atom_name.clone(),
prefix_without_atom_name,
)])
.collect(),
compilation_objects: BTreeMap::new(),
});
}
let mut manual_prefixes = BTreeMap::new();
let mut compilation_objects = BTreeMap::new();
for (atom_name, _atom_bytes) in atoms {
let prefix_start_str = format!("{atom_name}:");
let prefix = match prefixes.iter().find(|p| p.contains(&prefix_start_str)) {
Some(s) => s,
None => {
return Err(anyhow::anyhow!(
"could not find prefix for atom {atom_name:?}"
))
}
};
let prefix_without_atom_name = prefix.replacen(&prefix_start_str, "", 1);
match prefix_without_atom_name
.split(':')
.collect::<Vec<_>>()
.as_slice()
{
&[path, prefix] => {
let bytes = std::fs::read(path).map_err(|e| {
anyhow::anyhow!("could not read file for prefix {prefix} ({path}): {e}")
})?;
compilation_objects.insert(atom_name.clone(), bytes);
}
_ => {
if compilation_object_mode {
return Err(anyhow::anyhow!("invalid prefix format {prefix}"));
}
}
};
if !prefix_without_atom_name
.chars()
.all(|c| c.is_alphanumeric() || c == '_' || c == '-')
{
return Err(anyhow::anyhow!("invalid prefix {prefix}"));
}
manual_prefixes.insert(atom_name.clone(), prefix_without_atom_name);
}
Ok(Self {
input_hashes: BTreeMap::new(),
manual_prefixes,
compilation_objects,
})
}
fn hash_for_bytes(bytes: &[u8]) -> String {
use sha2::{Digest, Sha256};
let mut hasher = Sha256::new();
hasher.update(bytes);
let result = hasher.finalize();
hex::encode(&result[..])
}
fn get_prefix_for_atom(&self, atom_name: &str) -> Option<String> {
self.manual_prefixes
.get(atom_name)
.or_else(|| self.input_hashes.get(atom_name))
.cloned()
}
#[allow(dead_code)]
fn get_compilation_object_for_atom(&self, atom_name: &str) -> Option<&[u8]> {
self.compilation_objects
.get(atom_name)
.map(|s| s.as_slice())
}
}
fn conpile_atoms(
atoms: &[(String, Vec<u8>)],
output_dir: &Path,
header_dir: &Path,
compiler: &CompilerOptions,
target: &Target,
object_format: ObjectFormat,
prefixes: &PrefixMapCompilation,
) -> Result<()> {
use std::fs::File;
use std::io::BufWriter;
use std::io::Write;
for (a, data) in atoms {
let prefix = prefixes
.get_prefix_for_atom(a)
.ok_or_else(|| anyhow::anyhow!("no prefix given for atom {a}"))?;
let (store, _) = compiler.get_store_for_target(target.clone())?;
let atom_name = utils::normalize_atom_name(a);
let atom_name_uppercase = atom_name.to_uppercase();
let output_object_path = output_dir.join(format!("{atom_name}.o"));
let module_name = format!(
"WASMER_MODULE_{}",
utils::normalize_atom_name(&atom_name).to_uppercase()
);
let module_name = format!("WASMER_{prefix}_METADATA");
match object_format {
ObjectFormat::Symbols => {
let output_header_path = header_dir.join(format!("static_defs_{atom_name}.h"));
let engine = store.engine();
let engine_inner = engine.inner();
let compiler = engine_inner.compiler()?;
let features = engine_inner.features();
let tunables = store.tunables();
let atom_name_copy = atom_name.clone();
let prefixer: Option<PrefixerFn> = Some(Box::new(move |_| {
utils::normalize_atom_name(&atom_name_copy)
}));
let (module_info, obj, metadata_length, symbol_registry) =
Artifact::generate_object(
compiler,
data,
&module_name,
prefixer,
target,
tunables,
features,
)?;
let header_file_src = crate::c_gen::staticlib_header::generate_header_file(
&atom_name,
&format!("WASMER_MODULE_{atom_name_uppercase}"),
&module_info,
&*symbol_registry,
metadata_length,
);
let prefix_copy = prefix.to_string();
let prefixer: Option<PrefixerFn> = Some(Box::new(move |_| prefix_copy.to_string()));
let (_, obj, _, _) = Artifact::generate_object(
compiler,
data,
&module_name,
prefixer,
target,
tunables,
features,
)?;
// Write object file with functions
let mut writer = BufWriter::new(File::create(&output_object_path)?);
obj.write_stream(&mut writer)
.map_err(|err| anyhow::anyhow!(err.to_string()))?;
writer.flush()?;
// Write down header file that includes pointer arrays and the deserialize function
let mut writer = BufWriter::new(File::create(output_header_path)?);
writer.write_all(header_file_src.as_bytes())?;
writer.flush()?;
}
ObjectFormat::Serialized => {
let module = Module::from_binary(&store, data).context("failed to compile Wasm")?;
@@ -490,6 +636,7 @@ pub(super) fn prepare_directory_from_single_wasm_file(
triple: &Triple,
cpu_features: &[CpuFeature],
object_format: ObjectFormat,
prefix: &[String],
) -> anyhow::Result<()> {
let bytes = std::fs::read(wasm_file)?;
let target = &utils::target_triple_to_target(triple, cpu_features);
@@ -536,13 +683,14 @@ pub(super) fn prepare_directory_from_single_wasm_file(
target_paths.push((atom_name, atom_path, header_path));
}
let prefix_map = PrefixMapCompilation::from_input(&atoms_from_file, prefix, false)?;
conpile_atoms(
&atoms_from_file,
&target_dir.join("atoms"),
&target_dir.join("include"),
compiler,
target,
object_format,
&prefix_map,
)?;
let mut atoms = Vec::new();
@@ -583,6 +731,7 @@ fn link_exe_from_dir(
cross_compilation: &CrossCompileSetup,
additional_libraries: &[String],
debug: bool,
prefixes: &[String],
) -> anyhow::Result<()> {
let entrypoint_json =
std::fs::read_to_string(directory.join("entrypoint.json")).map_err(|e| {
@@ -603,7 +752,14 @@ fn link_exe_from_dir(
return Err(anyhow::anyhow!("file has no atoms to compile"));
}
let wasmer_main_c = generate_wasmer_main_c(&entrypoint).map_err(|e| {
let prefix_map = entrypoint
.atoms
.iter()
.map(|a| (a.atom.clone(), Vec::new()))
.collect::<Vec<_>>();
let prefixes = PrefixMapCompilation::from_input(&prefix_map, prefixes, true)?;
let wasmer_main_c = generate_wasmer_main_c(&entrypoint, &prefixes).map_err(|e| {
anyhow::anyhow!(
"could not generate wasmer_main.c in dir {}: {e}",
directory.display()
@@ -690,12 +846,12 @@ fn link_exe_from_dir(
object_paths.push(object_path);
return link_objects_system_linker(
&library_path,
library_path,
linker,
&optimization_flag,
optimization_flag,
&object_paths,
&cross_compilation.target,
&additional_libraries,
additional_libraries,
&output_path,
debug,
);
@@ -798,6 +954,7 @@ fn link_exe_from_dir(
}
/// Link compiled objects using the system linker
#[allow(clippy::too_many_arguments)]
fn link_objects_system_linker(
libwasmer_path: &Path,
linker_cmd: &str,
@@ -855,9 +1012,13 @@ fn link_objects_system_linker(
}
Ok(())
}
/// Generate the wasmer_main.c that links all object files together
/// (depending on the object format / atoms number)
fn generate_wasmer_main_c(entrypoint: &Entrypoint) -> Result<String, anyhow::Error> {
fn generate_wasmer_main_c(
entrypoint: &Entrypoint,
prefixes: &PrefixMapCompilation,
) -> Result<String, anyhow::Error> {
use std::fmt::Write;
const WASMER_MAIN_C_SOURCE: &str = include_str!("wasmer_create_exe_main.c");
@@ -877,9 +1038,12 @@ fn generate_wasmer_main_c(entrypoint: &Entrypoint) -> Result<String, anyhow::Err
let mut extra_headers = Vec::new();
for a in atom_names.iter() {
let atom_name = utils::normalize_atom_name(a);
let prefix = prefixes.get_prefix_for_atom(a).ok_or_else(|| {
anyhow::anyhow!("cannot find prefix for atom {a} when generating wasmer_main.c")
})?;
let atom_name = utils::normalize_atom_name(&prefix);
let atom_name_uppercase = atom_name.to_uppercase();
let module_name = format!("WASMER_MODULE_{atom_name_uppercase}");
let module_name = format!("WASMER_{atom_name_uppercase}_METADATA");
write!(
c_code_to_add,
@@ -1053,6 +1217,7 @@ pub(super) mod utils {
.filter_map(|e| {
let path = format!("{}", e.path().display());
if path.ends_with(".tar.gz") {
println!("found {} (target: {target})", e.path().display());
Some(e.path())
} else {
None
@@ -1119,13 +1284,17 @@ pub(super) mod utils {
pub(super) fn filter_tarballs(p: &Path, target: &Triple) -> Option<PathBuf> {
if let Architecture::Aarch64(_) = target.architecture {
if !p.file_name()?.to_str()?.contains("aarch64") {
if !(p.file_name()?.to_str()?.contains("aarch64")
|| p.file_name()?.to_str()?.contains("-arm"))
{
return None;
}
}
if let Architecture::X86_64 = target.architecture {
if !p.file_name()?.to_str()?.contains("x86_64") {
if !(p.file_name()?.to_str()?.contains("x86_64")
|| p.file_name()?.to_str()?.contains("-gnu64"))
{
return None;
}
}