Use trampolines for all libcalls in engine-universal and engine-dylib

In both of these engines, the compiled code may be loaded in memory far
from the Wasmer runtime which means that libcalls may not be reachable
through the normal relocation types. Instead a trampoline is needed to
allow reaching any address in the 64-bit address space.

In the case of engine-dylib, this is even worse since the symbols are
not exported by the executable without some special linker flags. The
solution here is to manually patch in the addresses at load time into
a data table of function pointers.
This commit is contained in:
Amanieu d'Antras
2022-01-11 19:34:35 +00:00
parent 3501e48567
commit ffb9cd33b9
29 changed files with 430 additions and 201 deletions

View File

@@ -1,5 +0,0 @@
[target.'cfg(all(target_os = "linux", target_env = "gnu"))']
rustflags = [
# Put the VM functions in the dynamic symbol table.
"-C", "link-arg=-Wl,-E",
]

View File

@@ -166,6 +166,7 @@ jobs:
shell: bash
- name: Setup Rust target
run: |
mkdir -p .cargo
cat << EOF > .cargo/config.toml
[build]
target = "${{ matrix.target }}"

40
Cargo.lock generated
View File

@@ -121,7 +121,7 @@ dependencies = [
"cfg-if 1.0.0",
"libc",
"miniz_oxide",
"object",
"object 0.27.1",
"rustc-demangle",
]
@@ -779,6 +779,26 @@ dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "enum-iterator"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4eeac5c5edb79e4e39fe8439ef35207780a11f69c52cbe424ce3dfad4cb78de6"
dependencies = [
"enum-iterator-derive",
]
[[package]]
name = "enum-iterator-derive"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c134c37760b27a871ba422106eedbb8247da973a09e82558bf26d619c882b159"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "enumset"
version = "1.0.8"
@@ -1419,8 +1439,18 @@ name = "object"
version = "0.27.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67ac1d3f9a1d3616fd9a60c8d74296f22406a238b6a72f5cc1e6f314df4ffbf9"
dependencies = [
"memchr",
]
[[package]]
name = "object"
version = "0.28.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40bec70ba014595f99f7aa110b84331ffe1ee9aece7fe6f387cc7e3ecda4d456"
dependencies = [
"crc32fast",
"hashbrown 0.11.2",
"indexmap",
"memchr",
]
@@ -2940,7 +2970,7 @@ dependencies = [
"lazy_static",
"libc",
"loupe",
"object",
"object 0.28.3",
"rayon",
"regex",
"rustc_version 0.4.0",
@@ -3036,10 +3066,12 @@ name = "wasmer-engine-dylib"
version = "2.1.1"
dependencies = [
"cfg-if 1.0.0",
"enum-iterator",
"enumset",
"leb128",
"libloading",
"loupe",
"object 0.28.3",
"rkyv",
"serde",
"tempfile",
@@ -3077,6 +3109,7 @@ name = "wasmer-engine-universal"
version = "2.1.1"
dependencies = [
"cfg-if 1.0.0",
"enum-iterator",
"enumset",
"leb128",
"loupe",
@@ -3115,7 +3148,7 @@ dependencies = [
name = "wasmer-object"
version = "2.1.1"
dependencies = [
"object",
"object 0.28.3",
"thiserror",
"wasmer-compiler",
"wasmer-types",
@@ -3151,6 +3184,7 @@ dependencies = [
"backtrace",
"cc",
"cfg-if 1.0.0",
"enum-iterator",
"indexmap",
"libc",
"loupe",

View File

@@ -211,14 +211,13 @@ compilers_engines :=
ifeq ($(ENABLE_CRANELIFT), 1)
compilers_engines += cranelift-universal
ifneq (, $(filter 1, $(IS_DARWIN) $(IS_LINUX)))
ifneq (, $(filter 1, $(IS_WINDOWS) $(IS_DARWIN) $(IS_LINUX)))
ifeq ($(IS_AMD64), 1)
ifneq ($(LIBC), musl)
compilers_engines += cranelift-dylib
endif
else ifeq ($(IS_AARCH64), 1)
# The object crate doesn't support yet Darwin + Aarch64 relocations
ifneq ($(IS_DARWIN), 1)
ifneq ($(LIBC), musl)
compilers_engines += cranelift-dylib
endif
endif
@@ -230,7 +229,7 @@ endif
##
ifeq ($(ENABLE_LLVM), 1)
ifneq (, $(filter 1, $(IS_DARWIN) $(IS_LINUX)))
ifneq (, $(filter 1, $(IS_WINDOWS) $(IS_DARWIN) $(IS_LINUX)))
ifeq ($(IS_AMD64), 1)
compilers_engines += llvm-universal
compilers_engines += llvm-dylib
@@ -246,7 +245,7 @@ endif
##
ifeq ($(ENABLE_SINGLEPASS), 1)
ifneq (, $(filter 1, $(IS_DARWIN) $(IS_LINUX)))
ifneq (, $(filter 1, $(IS_WINDOWS) $(IS_DARWIN) $(IS_LINUX)))
ifeq ($(IS_AMD64), 1)
compilers_engines += singlepass-universal
endif

View File

@@ -179,6 +179,7 @@ skip = [
{ name = "semver-parser", version = "=0.7.0" },
{ name = "rustc_version", version = "=0.2.3" },
{ name = "itoa", version = "=0.4.8" },
{ name = "object", version = "=0.27.1" },
]
# Similarly to `skip` allows you to skip certain crates during duplicate
# detection. Unlike skip, it also includes the entire tree of transitive

View File

@@ -295,7 +295,6 @@ impl Compiler for CraneliftCompiler {
function_call_trampolines,
dynamic_function_trampolines,
dwarf,
None,
))
}
}

View File

@@ -135,6 +135,12 @@ impl Cranelift {
flags.enable("is_pic").expect("should be a valid flag");
}
// We set up libcall trampolines in engine-dylib and engine-universal.
// These trampolines are always reachable through short jumps.
flags
.enable("use_colocated_libcalls")
.expect("should be a valid flag");
// Invert cranelift's default-on verification to instead default off.
let enable_verifier = if self.enable_verifier {
"true"

View File

@@ -1132,9 +1132,7 @@ impl<'module_environment> BaseFuncEnvironment for FuncEnvironment<'module_enviro
Ok(func.import_function(ir::ExtFuncData {
name,
signature,
// We currently allocate all code segments independently, so nothing
// is colocated.
colocated: false,
colocated: true,
}))
}

View File

@@ -90,6 +90,7 @@ pub fn irreloc_to_relocationkind(reloc: Reloc) -> RelocationKind {
Reloc::X86CallPCRel4 => RelocationKind::X86CallPCRel4,
Reloc::X86CallPLTRel4 => RelocationKind::X86CallPLTRel4,
Reloc::X86GOTPCRel4 => RelocationKind::X86GOTPCRel4,
Reloc::Arm64Call => RelocationKind::Arm64Call,
_ => panic!("The relocation {} is not yet supported.", reloc),
}
}

View File

@@ -12,12 +12,14 @@ readme = "README.md"
edition = "2018"
[dependencies]
wasmer-compiler = { path = "../compiler", version = "=2.1.1", features = ["translator"] }
wasmer-compiler = { path = "../compiler", version = "=2.1.1", features = [
"translator",
] }
wasmer-vm = { path = "../vm", version = "=2.1.1" }
wasmer-types = { path = "../types", version = "=2.1.1" }
target-lexicon = { version = "0.12.2", default-features = false }
smallvec = "1.6"
object = { version = "0.27", default-features = false, features = ["read"] }
object = { version = "0.28.3", default-features = false, features = ["read"] }
libc = { version = "^0.2", default-features = false }
byteorder = "1"
itertools = "0.10"

View File

@@ -12,10 +12,9 @@ use rayon::iter::ParallelBridge;
use rayon::prelude::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator};
use std::sync::Arc;
use wasmer_compiler::{
Architecture, Compilation, CompileError, CompileModuleInfo, Compiler, CustomSection,
CustomSectionProtection, Dwarf, FunctionBodyData, ModuleMiddleware, ModuleTranslationState,
RelocationTarget, SectionBody, SectionIndex, Symbol, SymbolRegistry, Target,
TrampolinesSection,
Compilation, CompileError, CompileModuleInfo, Compiler, CustomSection, CustomSectionProtection,
Dwarf, FunctionBodyData, ModuleMiddleware, ModuleTranslationState, RelocationTarget,
SectionBody, SectionIndex, Symbol, SymbolRegistry, Target,
};
use wasmer_types::entity::{EntityRef, PrimaryMap};
use wasmer_types::{FunctionIndex, LocalFunctionIndex, SignatureIndex};
@@ -305,37 +304,6 @@ impl Compiler for LLVMCompiler {
})
.collect::<PrimaryMap<LocalFunctionIndex, _>>();
let trampolines = match target.triple().architecture {
Architecture::Aarch64(_) => {
let nj = 16;
// We create a jump to an absolute 64bits address
// using x17 as a scratch register, SystemV declare both x16 and x17 as Intra-Procedural scratch register
// but Apple ask to just not use x16
// LDR x17, #8 51 00 00 58
// BR x17 20 02 1f d6
// JMPADDR 00 00 00 00 00 00 00 00
let onejump = [
0x51, 0x00, 0x00, 0x58, 0x20, 0x02, 0x1f, 0xd6, 0, 0, 0, 0, 0, 0, 0, 0,
];
let trampolines = Some(TrampolinesSection::new(
SectionIndex::from_u32(module_custom_sections.len() as u32),
nj,
onejump.len(),
));
let mut alljmps = vec![];
for _ in 0..nj {
alljmps.extend(onejump.iter().copied());
}
module_custom_sections.push(CustomSection {
protection: CustomSectionProtection::ReadExecute,
bytes: SectionBody::new_with_vec(alljmps),
relocations: vec![],
});
trampolines
}
_ => None,
};
let dwarf = if !frame_section_bytes.is_empty() {
let dwarf = Some(Dwarf::new(SectionIndex::from_u32(
module_custom_sections.len() as u32,
@@ -400,7 +368,6 @@ impl Compiler for LLVMCompiler {
function_call_trampolines,
dynamic_function_trampolines,
dwarf,
trampolines,
))
}
}

View File

@@ -187,7 +187,6 @@ impl Compiler for SinglepassCompiler {
function_call_trampolines,
dynamic_function_trampolines,
None,
None,
))
}
}

View File

@@ -109,33 +109,6 @@ impl Dwarf {
}
}
/// Trampolines section used by ARM short jump (26bits)
#[cfg_attr(feature = "enable-serde", derive(Deserialize, Serialize))]
#[cfg_attr(
feature = "enable-rkyv",
derive(RkyvSerialize, RkyvDeserialize, Archive)
)]
#[derive(Debug, PartialEq, Eq, Clone, MemoryUsage)]
pub struct TrampolinesSection {
/// SectionIndex for the actual Trampolines code
pub section_index: SectionIndex,
/// Number of jump slots in the section
pub slots: usize,
/// Slot size
pub size: usize,
}
impl TrampolinesSection {
/// Creates a `Trampolines` struct with the indice for its section, and number of slots and size of slot
pub fn new(section_index: SectionIndex, slots: usize, size: usize) -> Self {
Self {
section_index,
slots,
size,
}
}
}
/// The result of compiling a WebAssembly module's functions.
#[cfg_attr(feature = "enable-serde", derive(Deserialize, Serialize))]
#[derive(Debug, PartialEq, Eq)]
@@ -182,9 +155,6 @@ pub struct Compilation {
/// Section ids corresponding to the Dwarf debug info
debug: Option<Dwarf>,
/// Trampolines for the arch that needs it
trampolines: Option<TrampolinesSection>,
}
impl Compilation {
@@ -195,7 +165,6 @@ impl Compilation {
function_call_trampolines: PrimaryMap<SignatureIndex, FunctionBody>,
dynamic_function_trampolines: PrimaryMap<FunctionIndex, FunctionBody>,
debug: Option<Dwarf>,
trampolines: Option<TrampolinesSection>,
) -> Self {
Self {
functions,
@@ -203,7 +172,6 @@ impl Compilation {
function_call_trampolines,
dynamic_function_trampolines,
debug,
trampolines,
}
}
@@ -281,11 +249,6 @@ impl Compilation {
pub fn get_debug(&self) -> Option<Dwarf> {
self.debug.clone()
}
/// Returns the Trampilines info.
pub fn get_trampolines(&self) -> Option<TrampolinesSection> {
self.trampolines.clone()
}
}
impl<'a> IntoIterator for &'a Compilation {

View File

@@ -74,7 +74,7 @@ pub use crate::error::{
};
pub use crate::function::{
Compilation, CompiledFunction, CompiledFunctionFrameInfo, CustomSections, Dwarf, FunctionBody,
Functions, TrampolinesSection,
Functions,
};
pub use crate::jump_table::{JumpTable, JumpTableOffsets};
pub use crate::module::CompileModuleInfo;

View File

@@ -26,6 +26,8 @@ which = "4.0"
rkyv = "0.7.20"
loupe = "0.1"
enumset = "1.0"
enum-iterator = "0.7.0"
object = { version = "0.28.3", default-features = false, features = ["write"] }
[features]
# Enable the `compiler` feature if you want the engine to compile

View File

@@ -3,9 +3,11 @@
use crate::engine::{DylibEngine, DylibEngineInner};
use crate::serialize::ModuleMetadata;
use crate::trampoline::{emit_trampolines, fill_trampoline_table, WASMER_TRAMPOLINES_SYMBOL};
use enumset::EnumSet;
use libloading::{Library, Symbol as LibrarySymbol};
use loupe::MemoryUsage;
use object::{write::CoffExportStyle, BinaryFormat};
use std::error::Error;
use std::fs::{self, File};
use std::io::{Read, Write};
@@ -232,8 +234,31 @@ impl DylibArtifact {
&metadata_binary,
);
let mut extra_filepath = None;
let filepath = match maybe_obj_bytes {
Some(obj_bytes) => {
extra_filepath = {
// Create a separate object file with the trampolines.
let mut obj =
get_object_for_target(&target_triple).map_err(to_compile_error)?;
emit_trampolines(&mut obj, engine.target());
if obj.format() == BinaryFormat::Coff {
obj.add_coff_exports(CoffExportStyle::Gnu);
}
let file = tempfile::Builder::new()
.prefix("wasmer_dylib_")
.suffix(".o")
.tempfile()
.map_err(to_compile_error)?;
// Re-open it.
let (mut file, filepath) = file.keep().map_err(to_compile_error)?;
let obj_bytes = obj.write().map_err(to_compile_error)?;
file.write_all(&obj_bytes).map_err(to_compile_error)?;
Some(filepath)
};
// Write the object file generated by the compiler.
let obj_bytes = obj_bytes?;
let file = tempfile::Builder::new()
.prefix("wasmer_dylib_")
@@ -254,6 +279,7 @@ impl DylibArtifact {
function_body_inputs,
)?;
let mut obj = get_object_for_target(&target_triple).map_err(to_compile_error)?;
emit_trampolines(&mut obj, engine.target());
emit_data(
&mut obj,
WASMER_METADATA_SYMBOL,
@@ -263,6 +289,9 @@ impl DylibArtifact {
.map_err(to_compile_error)?;
emit_compilation(&mut obj, compilation, &symbol_registry, &target_triple)
.map_err(to_compile_error)?;
if obj.format() == BinaryFormat::Coff {
obj.add_coff_exports(CoffExportStyle::Gnu);
}
let file = tempfile::Builder::new()
.prefix("wasmer_dylib_")
.suffix(".o")
@@ -371,6 +400,7 @@ impl DylibArtifact {
let linker = engine_inner.linker().executable();
let output = Command::new(linker)
.arg(&filepath)
.args(&extra_filepath)
.arg("-o")
.arg(&output_filepath)
.args(&target_args)
@@ -385,6 +415,11 @@ impl DylibArtifact {
if fs::metadata(&filepath).is_ok() {
fs::remove_file(filepath).map_err(to_compile_error)?;
}
if let Some(filepath) = extra_filepath {
if fs::metadata(&filepath).is_ok() {
fs::remove_file(filepath).map_err(to_compile_error)?;
}
}
let output = output?;
@@ -453,6 +488,13 @@ impl DylibArtifact {
dylib_path: PathBuf,
lib: Library,
) -> Result<Self, CompileError> {
unsafe {
let trampolines_symbol: LibrarySymbol<usize> = lib
.get(WASMER_TRAMPOLINES_SYMBOL)
.expect("missing WASMER_TRAMPOLINES symbol");
fill_trampoline_table(trampolines_symbol.into_raw().into_raw() as *mut usize);
}
let mut finished_functions: PrimaryMap<LocalFunctionIndex, FunctionBodyPtr> =
PrimaryMap::new();
for (function_local_index, _function_len) in metadata.function_body_lengths.iter() {
@@ -621,7 +663,7 @@ impl DylibArtifact {
DeserializeError::CorruptedBinary(format!("Library loading failed: {}", e))
})?;
let shared_path: PathBuf = PathBuf::from(path);
let symbol: LibrarySymbol<*mut [u8; MetadataHeader::LEN]> =
let metadata_symbol: LibrarySymbol<*mut [u8; MetadataHeader::LEN]> =
lib.get(WASMER_METADATA_SYMBOL).map_err(|e| {
DeserializeError::CorruptedBinary(format!(
"The provided object file doesn't seem to be generated by Wasmer: {}",
@@ -630,7 +672,7 @@ impl DylibArtifact {
})?;
use std::slice;
let metadata = &**symbol;
let metadata = &**metadata_symbol;
let metadata_len = MetadataHeader::parse(metadata)?;
let metadata_slice: &'static [u8] =
slice::from_raw_parts(metadata.as_ptr().add(MetadataHeader::LEN), metadata_len);

View File

@@ -27,6 +27,7 @@ mod artifact;
mod builder;
mod engine;
mod serialize;
mod trampoline;
pub use crate::artifact::DylibArtifact;
pub use crate::builder::Dylib;

View File

@@ -0,0 +1,166 @@
//! Trampolines for libcalls.
//!
//! This is needed because the target of libcall relocations are not reachable
//! through normal branch instructions.
//!
//! There is an additional complexity for dynamic libraries: we can't just
//! import the symbol from the host executable because executables don't export
//! dynamic symbols (it's possible but requires special linker options).
//!
//! Instead, we export a table of function pointers in the data section which is
//! manually filled in by the runtime after the dylib is loaded.
use enum_iterator::IntoEnumIterator;
use object::{
elf, macho,
write::{Object, Relocation, SectionId, StandardSection, Symbol, SymbolId, SymbolSection},
BinaryFormat, RelocationEncoding, RelocationKind, SymbolFlags, SymbolKind, SymbolScope,
};
use wasmer_compiler::{Architecture, Target};
use wasmer_vm::libcalls::LibCall;
/// Symbol exported from the dynamic library which points to the trampoline table.
pub const WASMER_TRAMPOLINES_SYMBOL: &[u8] = b"WASMER_TRAMPOLINES";
// SystemV says that both x16 and x17 are available as intra-procedural scratch
// registers but Apple's ABI restricts us to use x17.
// ADRP x17, #... 11 00 00 90
// LDR x17, [x17, #...] 31 02 40 f9
// BR x17 20 02 1f d6
const AARCH64_TRAMPOLINE: [u8; 12] = [
0x11, 0x00, 0x00, 0x90, 0x31, 0x02, 0x40, 0xf9, 0x20, 0x02, 0x1f, 0xd6,
];
// JMP [RIP + ...] FF 25 00 00 00 00
const X86_64_TRAMPOLINE: [u8; 6] = [0xff, 0x25, 0x00, 0x00, 0x00, 0x00];
fn emit_trampoline(
obj: &mut Object,
text: SectionId,
trampoline_table_symbols: &[SymbolId],
libcall: LibCall,
target: &Target,
) {
let function_name = libcall.to_function_name();
let libcall_symbol = obj.add_symbol(Symbol {
name: function_name.as_bytes().to_vec(),
value: 0,
size: 0,
kind: SymbolKind::Text,
scope: SymbolScope::Linkage,
weak: false,
section: SymbolSection::Section(text),
flags: SymbolFlags::None,
});
match target.triple().architecture {
Architecture::Aarch64(_) => {
let (reloc1, reloc2) = match obj.format() {
BinaryFormat::Elf => (
RelocationKind::Elf(elf::R_AARCH64_ADR_PREL_PG_HI21),
RelocationKind::Elf(elf::R_AARCH64_LDST64_ABS_LO12_NC),
),
BinaryFormat::MachO => (
RelocationKind::MachO {
value: macho::ARM64_RELOC_PAGE21,
relative: true,
},
RelocationKind::MachO {
value: macho::ARM64_RELOC_PAGEOFF12,
relative: false,
},
),
_ => panic!("Unsupported binary format on AArch64"),
};
let offset = obj.add_symbol_data(libcall_symbol, text, &AARCH64_TRAMPOLINE, 4);
obj.add_relocation(
text,
Relocation {
offset,
size: 32,
kind: reloc1,
encoding: RelocationEncoding::Generic,
symbol: trampoline_table_symbols[libcall as usize],
addend: 0,
},
)
.unwrap();
obj.add_relocation(
text,
Relocation {
offset: offset + 4,
size: 32,
kind: reloc2,
encoding: RelocationEncoding::Generic,
symbol: trampoline_table_symbols[libcall as usize],
addend: 0,
},
)
.unwrap();
}
Architecture::X86_64 => {
let offset = obj.add_symbol_data(libcall_symbol, text, &X86_64_TRAMPOLINE, 1);
obj.add_relocation(
text,
Relocation {
offset: offset + 2,
size: 32,
kind: RelocationKind::Relative,
encoding: RelocationEncoding::Generic,
symbol: trampoline_table_symbols[libcall as usize],
// -4 because RIP-relative addressing starts from the end of
// the instruction.
addend: -4,
},
)
.unwrap();
}
arch => panic!("Unsupported architecture: {}", arch),
};
}
/// Emits the libcall trampolines and table to the object file.
pub fn emit_trampolines(obj: &mut Object, target: &Target) {
let text = obj.section_id(StandardSection::Text);
let bss = obj.section_id(StandardSection::UninitializedData);
let trampoline_table = obj.add_symbol(Symbol {
name: WASMER_TRAMPOLINES_SYMBOL.to_vec(),
value: 0,
size: 0,
kind: SymbolKind::Data,
scope: SymbolScope::Dynamic,
weak: false,
section: SymbolSection::Section(bss),
flags: SymbolFlags::None,
});
let table_offset =
obj.add_symbol_bss(trampoline_table, bss, LibCall::VARIANT_COUNT as u64 * 8, 8);
// Create a symbol for each entry in the table. We could avoid this and use
// an addend, but this isn't supported in all object formats.
let mut trampoline_table_symbols = vec![];
for libcall in LibCall::into_enum_iter() {
trampoline_table_symbols.push(obj.add_symbol(Symbol {
name: format!("__WASMER_TRAMPOLINE{}", libcall as usize).into_bytes(),
value: table_offset + libcall as u64 * 8,
size: 0,
kind: SymbolKind::Data,
scope: SymbolScope::Compilation,
weak: false,
section: SymbolSection::Section(bss),
flags: SymbolFlags::None,
}));
}
for libcall in LibCall::into_enum_iter() {
emit_trampoline(obj, text, &trampoline_table_symbols, libcall, target);
}
}
/// Fills in the libcall trampoline table at the given address.
pub unsafe fn fill_trampoline_table(table: *mut usize) {
for libcall in LibCall::into_enum_iter() {
*table.add(libcall as usize) = libcall.function_pointer();
}
}

View File

@@ -27,6 +27,7 @@ leb128 = "0.2"
rkyv = "0.7.20"
loupe = "0.1"
enumset = "1.0"
enum-iterator = "0.7.0"
[target.'cfg(target_os = "windows")'.dependencies]
winapi = { version = "0.3", features = ["winnt", "impl-default"] }

View File

@@ -6,6 +6,7 @@ use crate::link::link_module;
#[cfg(feature = "compiler")]
use crate::serialize::SerializableCompilation;
use crate::serialize::SerializableModule;
use crate::trampoline::{libcall_trampoline_len, make_libcall_trampolines};
use enumset::EnumSet;
use loupe::MemoryUsage;
use std::mem;
@@ -111,6 +112,14 @@ impl UniversalArtifact {
let frame_infos = compilation.get_frame_info();
// Synthesize a custom section to hold the libcall trampolines.
let mut custom_sections = compilation.get_custom_sections();
let mut custom_section_relocations = compilation.get_custom_section_relocations();
let libcall_trampolines_section = make_libcall_trampolines(engine.target());
custom_section_relocations.push(libcall_trampolines_section.relocations.clone());
let libcall_trampolines = custom_sections.push(libcall_trampolines_section);
let libcall_trampoline_len = libcall_trampoline_len(engine.target()) as u32;
let serializable_compilation = SerializableCompilation {
function_bodies: compilation.get_function_bodies(),
function_relocations: compilation.get_relocations(),
@@ -118,10 +127,11 @@ impl UniversalArtifact {
function_frame_info: frame_infos,
function_call_trampolines,
dynamic_function_trampolines,
custom_sections: compilation.get_custom_sections(),
custom_section_relocations: compilation.get_custom_section_relocations(),
custom_sections,
custom_section_relocations,
debug: compilation.get_debug(),
trampolines: compilation.get_trampolines(),
libcall_trampolines,
libcall_trampoline_len,
};
let serializable = SerializableModule {
compilation: serializable_compilation,
@@ -187,7 +197,8 @@ impl UniversalArtifact {
serializable.compilation.function_relocations.clone(),
&custom_sections,
&serializable.compilation.custom_section_relocations,
&serializable.compilation.trampolines,
serializable.compilation.libcall_trampolines,
serializable.compilation.libcall_trampoline_len as usize,
);
// Compute indices into the shared signature table.

View File

@@ -30,6 +30,7 @@ mod code_memory;
mod engine;
mod link;
mod serialize;
mod trampoline;
mod unwind;
pub use crate::artifact::UniversalArtifact;

View File

@@ -1,86 +1,40 @@
//! Linking for Universal-compiled code.
use std::collections::HashMap;
use crate::trampoline::get_libcall_trampoline;
use std::ptr::{read_unaligned, write_unaligned};
use wasmer_compiler::{
JumpTable, JumpTableOffsets, Relocation, RelocationKind, RelocationTarget, Relocations,
SectionIndex, TrampolinesSection,
SectionIndex,
};
use wasmer_engine::FunctionExtent;
use wasmer_types::entity::{EntityRef, PrimaryMap};
use wasmer_types::{LocalFunctionIndex, ModuleInfo};
use wasmer_vm::SectionBodyPtr;
/// Add a new trampoline address, given the base adress of the Section. Return the address of the jump
/// The trampoline itself still have to be writen
fn trampolines_add(
map: &mut HashMap<usize, usize>,
trampoline: &TrampolinesSection,
address: usize,
baseaddress: usize,
) -> usize {
if let Some(target) = map.get(&address) {
return *target;
}
let ret = map.len();
if ret == trampoline.slots {
panic!("No more slot in Trampolines");
}
map.insert(address, baseaddress + ret * trampoline.size);
baseaddress + ret * trampoline.size
}
fn use_trampoline(
address: usize,
allocated_sections: &PrimaryMap<SectionIndex, SectionBodyPtr>,
trampolines: &Option<TrampolinesSection>,
map: &mut HashMap<usize, usize>,
) -> Option<usize> {
match trampolines {
Some(trampolines) => Some(trampolines_add(
map,
trampolines,
address,
*allocated_sections[trampolines.section_index] as usize,
)),
_ => None,
}
}
fn fill_trampolin_map(
allocated_sections: &PrimaryMap<SectionIndex, SectionBodyPtr>,
trampolines: &Option<TrampolinesSection>,
) -> HashMap<usize, usize> {
let mut map: HashMap<usize, usize> = HashMap::new();
match trampolines {
Some(trampolines) => {
let baseaddress = *allocated_sections[trampolines.section_index] as usize;
for i in 0..trampolines.size {
let jmpslot: usize = unsafe {
read_unaligned((baseaddress + i * trampolines.size + 8) as *mut usize)
};
if jmpslot != 0 {
map.insert(jmpslot, baseaddress + i * trampolines.size);
}
}
}
_ => {}
};
map
}
fn apply_relocation(
body: usize,
r: &Relocation,
allocated_functions: &PrimaryMap<LocalFunctionIndex, FunctionExtent>,
jt_offsets: &PrimaryMap<LocalFunctionIndex, JumpTableOffsets>,
allocated_sections: &PrimaryMap<SectionIndex, SectionBodyPtr>,
trampolines: &Option<TrampolinesSection>,
trampolines_map: &mut HashMap<usize, usize>,
libcall_trampolines: SectionIndex,
libcall_trampoline_len: usize,
) {
let target_func_address: usize = match r.reloc_target {
RelocationTarget::LocalFunc(index) => *allocated_functions[index].ptr as usize,
RelocationTarget::LibCall(libcall) => libcall.function_pointer(),
RelocationTarget::LibCall(libcall) => {
// Use the direct target of the libcall if the relocation supports
// a full 64-bit address. Otherwise use a trampoline.
if r.kind == RelocationKind::Abs8 || r.kind == RelocationKind::X86PCRel8 {
libcall.function_pointer()
} else {
get_libcall_trampoline(
libcall,
allocated_sections[libcall_trampolines].0 as usize,
libcall_trampoline_len,
)
}
}
RelocationTarget::CustomSection(custom_section) => {
*allocated_sections[custom_section] as usize
}
@@ -115,26 +69,15 @@ fn apply_relocation(
},
RelocationKind::X86PCRelRodata4 => {}
RelocationKind::Arm64Call => unsafe {
let (reloc_address, mut reloc_delta) = r.for_address(body, target_func_address as u64);
let (reloc_address, reloc_delta) = r.for_address(body, target_func_address as u64);
if (reloc_delta as i64).abs() >= 0x1000_0000 {
let new_address = match use_trampoline(
target_func_address,
allocated_sections,
trampolines,
trampolines_map,
) {
Some(new_address) => new_address,
_ => panic!(
"Relocation to big for {:?} for {:?} with {:x}, current val {:x}",
r.kind,
r.reloc_target,
reloc_delta,
read_unaligned(reloc_address as *mut u32)
),
};
write_unaligned((new_address + 8) as *mut u64, target_func_address as u64); // write the jump address
let (_, new_delta) = r.for_address(body, new_address as u64);
reloc_delta = new_delta;
panic!(
"Relocation to big for {:?} for {:?} with {:x}, current val {:x}",
r.kind,
r.reloc_target,
reloc_delta,
read_unaligned(reloc_address as *mut u32)
)
}
let reloc_delta = (((reloc_delta / 4) as u32) & 0x3ff_ffff)
| read_unaligned(reloc_address as *mut u32);
@@ -180,9 +123,9 @@ pub fn link_module(
function_relocations: Relocations,
allocated_sections: &PrimaryMap<SectionIndex, SectionBodyPtr>,
section_relocations: &PrimaryMap<SectionIndex, Vec<Relocation>>,
trampolines: &Option<TrampolinesSection>,
libcall_trampolines: SectionIndex,
trampoline_len: usize,
) {
let mut trampolines_map = fill_trampolin_map(allocated_sections, trampolines);
for (i, section_relocs) in section_relocations.iter() {
let body = *allocated_sections[i] as usize;
for r in section_relocs {
@@ -192,8 +135,8 @@ pub fn link_module(
allocated_functions,
jt_offsets,
allocated_sections,
trampolines,
&mut trampolines_map,
libcall_trampolines,
trampoline_len,
);
}
}
@@ -206,8 +149,8 @@ pub fn link_module(
allocated_functions,
jt_offsets,
allocated_sections,
trampolines,
&mut trampolines_map,
libcall_trampolines,
trampoline_len,
);
}
}

View File

@@ -6,7 +6,7 @@ use rkyv::{
};
use wasmer_compiler::{
CompileModuleInfo, CompiledFunctionFrameInfo, CustomSection, Dwarf, FunctionBody,
JumpTableOffsets, Relocation, SectionIndex, TrampolinesSection,
JumpTableOffsets, Relocation, SectionIndex,
};
use wasmer_engine::{DeserializeError, SerializeError};
use wasmer_types::entity::PrimaryMap;
@@ -25,8 +25,10 @@ pub struct SerializableCompilation {
pub custom_section_relocations: PrimaryMap<SectionIndex, Vec<Relocation>>,
// The section indices corresponding to the Dwarf debug info
pub debug: Option<Dwarf>,
// the Trampoline for Arm arch
pub trampolines: Option<TrampolinesSection>,
// Custom section containing libcall trampolines.
pub libcall_trampolines: SectionIndex,
// Length of each libcall trampoline.
pub libcall_trampoline_len: u32,
}
/// Serializable struct that is able to serialize from and to

View File

@@ -0,0 +1,88 @@
//! Trampolines for libcalls.
//!
//! This is needed because the target of libcall relocations are not reachable
//! through normal branch instructions.
use enum_iterator::IntoEnumIterator;
use wasmer_compiler::{
Architecture, CustomSection, CustomSectionProtection, Relocation, RelocationKind,
RelocationTarget, SectionBody, Target,
};
use wasmer_vm::libcalls::LibCall;
// SystemV says that both x16 and x17 are available as intra-procedural scratch
// registers but Apple's ABI restricts us to use x17.
// LDR x17, [PC, #8] 51 00 00 58
// BR x17 20 02 1f d6
// JMPADDR 00 00 00 00 00 00 00 00
const AARCH64_TRAMPOLINE: [u8; 16] = [
0x51, 0x00, 0x00, 0x58, 0x20, 0x02, 0x1f, 0xd6, 0, 0, 0, 0, 0, 0, 0, 0,
];
// 2 padding bytes are used to preserve alignment.
// JMP [RIP + 2] FF 25 02 00 00 00 [00 00]
// 64-bit ADDR 00 00 00 00 00 00 00 00
const X86_64_TRAMPOLINE: [u8; 16] = [
0xff, 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
];
fn make_trampoline(
target: &Target,
libcall: LibCall,
code: &mut Vec<u8>,
relocations: &mut Vec<Relocation>,
) {
match target.triple().architecture {
Architecture::Aarch64(_) => {
code.extend(&AARCH64_TRAMPOLINE);
relocations.push(Relocation {
kind: RelocationKind::Abs8,
reloc_target: RelocationTarget::LibCall(libcall),
offset: code.len() as u32 - 8,
addend: 0,
});
}
Architecture::X86_64 => {
code.extend(&X86_64_TRAMPOLINE);
relocations.push(Relocation {
kind: RelocationKind::Abs8,
reloc_target: RelocationTarget::LibCall(libcall),
offset: code.len() as u32 - 8,
addend: 0,
});
}
arch => panic!("Unsupported architecture: {}", arch),
};
}
/// Returns the length of a libcall trampoline.
pub fn libcall_trampoline_len(target: &Target) -> usize {
match target.triple().architecture {
Architecture::Aarch64(_) => AARCH64_TRAMPOLINE.len(),
Architecture::X86_64 => X86_64_TRAMPOLINE.len(),
arch => panic!("Unsupported architecture: {}", arch),
}
}
/// Creates a custom section containing the libcall trampolines.
pub fn make_libcall_trampolines(target: &Target) -> CustomSection {
let mut code = vec![];
let mut relocations = vec![];
for libcall in LibCall::into_enum_iter() {
make_trampoline(target, libcall, &mut code, &mut relocations);
}
CustomSection {
protection: CustomSectionProtection::ReadExecute,
bytes: SectionBody::new_with_vec(code),
relocations,
}
}
/// Returns the address of a trampoline in the libcall trampolines section.
pub fn get_libcall_trampoline(
libcall: LibCall,
libcall_trampolines: usize,
libcall_trampoline_len: usize,
) -> usize {
libcall_trampolines + libcall as usize * libcall_trampoline_len
}

View File

@@ -14,7 +14,7 @@ edition = "2018"
wasmer-types = { path = "../types", version = "=2.1.1" }
wasmer-compiler = { path = "../compiler", version = "=2.1.1", default-features = false, features = [
"std",
"translator"
"translator",
] }
object = { version = "0.27", default-features = false, features = ["write"] }
object = { version = "0.28.3", default-features = false, features = ["write"] }
thiserror = "1.0"

View File

@@ -3,7 +3,8 @@ use object::write::{
Object, Relocation, StandardSection, StandardSegment, Symbol as ObjSymbol, SymbolSection,
};
use object::{
elf, RelocationEncoding, RelocationKind, SectionKind, SymbolFlags, SymbolKind, SymbolScope,
elf, macho, RelocationEncoding, RelocationKind, SectionKind, SymbolFlags, SymbolKind,
SymbolScope,
};
use wasmer_compiler::{
Architecture, BinaryFormat, Compilation, CustomSectionProtection, Endianness,
@@ -292,7 +293,14 @@ pub fn emit_compilation(
// Reloc::X86PCRelRodata4 => {
// }
Reloc::Arm64Call => (
RelocationKind::Elf(elf::R_AARCH64_CALL26),
match obj.format() {
object::BinaryFormat::Elf => RelocationKind::Elf(elf::R_AARCH64_CALL26),
object::BinaryFormat::MachO => RelocationKind::MachO {
value: macho::ARM64_RELOC_BRANCH26,
relative: true,
},
fmt => panic!("unsupported binary format {:?}", fmt),
},
RelocationEncoding::Generic,
32,
),

View File

@@ -23,6 +23,7 @@ backtrace = "0.3"
serde = { version = "1.0", features = ["derive", "rc"] }
rkyv = { version = "0.7.20", optional = true }
loupe = { version = "0.1", features = ["enable-indexmap"] }
enum-iterator = "0.7.0"
[target.'cfg(target_os = "windows")'.dependencies]
winapi = { version = "0.3", features = ["winbase", "memoryapi", "errhandlingapi"] }

View File

@@ -43,6 +43,7 @@ use crate::table::{RawTableElement, TableElement};
use crate::trap::{raise_lib_trap, Trap, TrapCode};
use crate::vmcontext::VMContext;
use crate::VMExternRef;
use enum_iterator::IntoEnumIterator;
use loupe::MemoryUsage;
#[cfg(feature = "enable-rkyv")]
use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
@@ -688,7 +689,9 @@ pub static wasmer_vm_probestack: unsafe extern "C" fn() = PROBESTACK;
feature = "enable-rkyv",
derive(RkyvSerialize, RkyvDeserialize, Archive)
)]
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, MemoryUsage)]
#[derive(
Copy, Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, MemoryUsage, IntoEnumIterator,
)]
pub enum LibCall {
/// ceil.f32
CeilF32,

View File

@@ -33,14 +33,9 @@ llvm traps::start_trap_pretty
dylib traps::start_trap_pretty
aarch64 traps::start_trap_pretty
cranelift multi_value_imports::dylib # Needs investigation
singlepass multi_value_imports::dylib # Singlepass doesn't support multivalue
singlepass multi_value_imports::dynamic # Singlepass doesn't support multivalue
# LLVM doesn't fully work in macOS M1
llvm+universal+macos+aarch64 * # We are using the object crate, it was not fully supporting aarch64 relocations emitted by LLVM. Needs reassesment
llvm+dylib+macos+aarch64 * # Tests seem to be randomly failing
# TODO: We need to fix this in ARM. The issue is caused by libunwind overflowing
# the stack while creating the stacktrace.
# https://github.com/rust-lang/backtrace-rs/issues/356