feat(compiler) Make CallingConvention a paramter in SinglePass

This commit is contained in:
ptitSeb
2021-09-16 11:03:39 +02:00
parent 7b27a81221
commit 2e5dae0019
5 changed files with 306 additions and 271 deletions

View File

@ -8,9 +8,9 @@ use wasmer_compiler::wasmparser::{
MemoryImmediate, Operator, Type as WpType, TypeOrFuncType as WpTypeOrFuncType,
};
use wasmer_compiler::{
CompiledFunction, CompiledFunctionFrameInfo, CustomSection, CustomSectionProtection,
FunctionBody, FunctionBodyData, InstructionAddressMap, Relocation, RelocationKind,
RelocationTarget, SectionBody, SectionIndex, SourceLoc, TrapInformation,
CallingConvention, CompiledFunction, CompiledFunctionFrameInfo, CustomSection,
CustomSectionProtection, FunctionBody, FunctionBodyData, InstructionAddressMap, Relocation,
RelocationKind, RelocationTarget, SectionBody, SectionIndex, SourceLoc, TrapInformation,
};
use wasmer_types::{
entity::{EntityRef, PrimaryMap, SecondaryMap},
@ -1010,17 +1010,18 @@ impl<'a> FuncGen<'a> {
self.machine.state.stack_values.push(content);
}
}
let calling_convention = self.config.calling_convention;
#[cfg(target_os = "windows")]
let stack_padding: usize = 32;
#[cfg(not(target_os = "windows"))]
let stack_padding: usize = 0;
let stack_padding: usize = match calling_convention {
CallingConvention::WindowsFastcall => 32,
_ => 0,
};
let mut stack_offset: usize = 0;
// Calculate stack offset.
for (i, _param) in params.iter().enumerate() {
if let Location::Memory(_, _) = Machine::get_param_location(1 + i) {
if let Location::Memory(_, _) = Machine::get_param_location(1 + i, calling_convention) {
stack_offset += 8;
}
}
@ -1043,10 +1044,9 @@ impl<'a> FuncGen<'a> {
}
let mut call_movs: Vec<(Location, GPR)> = vec![];
// Prepare register & stack parameters.
for (i, param) in params.iter().enumerate().rev() {
let loc = Machine::get_param_location(1 + i);
let loc = Machine::get_param_location(1 + i, calling_convention);
match loc {
Location::GPR(x) => {
call_movs.push((*param, x));
@ -1144,7 +1144,7 @@ impl<'a> FuncGen<'a> {
self.assembler.emit_mov(
Size::S64,
Location::GPR(Machine::get_vmctx_reg()),
Machine::get_param_location(0),
Machine::get_param_location(0, calling_convention),
); // vmctx
if (self.machine.state.stack_values.len() % 2) != 1 {
@ -1756,6 +1756,7 @@ impl<'a> FuncGen<'a> {
&mut self.assembler,
self.local_types.len(),
self.signature.params().len(),
self.config.calling_convention,
);
// Mark vmctx register. The actual loading of the vmctx value is handled by init_local.
@ -5403,6 +5404,7 @@ impl<'a> FuncGen<'a> {
self.vmoffsets.vmcaller_checked_anyfunc_func_ptr() as usize;
let vmcaller_checked_anyfunc_vmctx =
self.vmoffsets.vmcaller_checked_anyfunc_vmctx() as usize;
let calling_convention = self.config.calling_convention;
self.emit_call_native(
|this| {
@ -5423,7 +5425,7 @@ impl<'a> FuncGen<'a> {
this.assembler.emit_mov(
Size::S64,
Location::Memory(GPR::RAX, vmcaller_checked_anyfunc_vmctx as i32),
Machine::get_param_location(0),
Machine::get_param_location(0, calling_convention),
);
this.assembler.emit_call_location(Location::Memory(
@ -8802,20 +8804,23 @@ fn sort_call_movs(movs: &mut [(Location, GPR)]) {
}
// Standard entry trampoline.
pub fn gen_std_trampoline(sig: &FunctionType) -> FunctionBody {
pub fn gen_std_trampoline(
sig: &FunctionType,
calling_convention: CallingConvention,
) -> FunctionBody {
let mut a = Assembler::new().unwrap();
// Calculate stack offset.
let mut stack_offset: u32 = 0;
for (i, _param) in sig.params().iter().enumerate() {
if let Location::Memory(_, _) = Machine::get_param_location(1 + i) {
if let Location::Memory(_, _) = Machine::get_param_location(1 + i, calling_convention) {
stack_offset += 8;
}
}
#[cfg(target_os = "windows")]
let stack_padding: u32 = 32;
#[cfg(not(target_os = "windows"))]
let stack_padding: u32 = 0;
let stack_padding: u32 = match calling_convention {
CallingConvention::WindowsFastcall => 32,
_ => 0,
};
// Align to 16 bytes. We push two 8-byte registers below, so here we need to ensure stack_offset % 16 == 8.
if stack_offset % 16 != 8 {
@ -8836,12 +8841,12 @@ pub fn gen_std_trampoline(sig: &FunctionType) -> FunctionBody {
// Arguments
a.emit_mov(
Size::S64,
Machine::get_param_location(1),
Machine::get_param_location(1, calling_convention),
Location::GPR(GPR::R15),
); // func_ptr
a.emit_mov(
Size::S64,
Machine::get_param_location(2),
Machine::get_param_location(2, calling_convention),
Location::GPR(GPR::R14),
); // args_rets
@ -8851,7 +8856,7 @@ pub fn gen_std_trampoline(sig: &FunctionType) -> FunctionBody {
let mut n_stack_args: usize = 0;
for (i, _param) in sig.params().iter().enumerate() {
let src_loc = Location::Memory(GPR::R14, (i * 16) as _); // args_rets[i]
let dst_loc = Machine::get_param_location(1 + i);
let dst_loc = Machine::get_param_location(1 + i, calling_convention);
match dst_loc {
Location::GPR(_) => {
@ -8911,15 +8916,16 @@ pub fn gen_std_trampoline(sig: &FunctionType) -> FunctionBody {
pub fn gen_std_dynamic_import_trampoline(
vmoffsets: &VMOffsets,
sig: &FunctionType,
calling_convention: CallingConvention,
) -> FunctionBody {
let mut a = Assembler::new().unwrap();
// Allocate argument array.
let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()) + 8; // 16 bytes each + 8 bytes sysv call padding
#[cfg(target_os = "windows")]
let stack_padding: usize = 32;
#[cfg(not(target_os = "windows"))]
let stack_padding: usize = 0;
let stack_padding: usize = match calling_convention {
CallingConvention::WindowsFastcall => 32,
_ => 0,
};
a.emit_sub(
Size::S64,
Location::Imm32((stack_offset + stack_padding) as _),
@ -8929,12 +8935,12 @@ pub fn gen_std_dynamic_import_trampoline(
// Copy arguments.
if !sig.params().is_empty() {
let mut argalloc = ArgumentRegisterAllocator::default();
argalloc.next(Type::I64).unwrap(); // skip VMContext
argalloc.next(Type::I64, calling_convention).unwrap(); // skip VMContext
let mut stack_param_count: usize = 0;
for (i, ty) in sig.params().iter().enumerate() {
let source_loc = match argalloc.next(*ty) {
let source_loc = match argalloc.next(*ty, calling_convention) {
Some(X64Register::GPR(gpr)) => Location::GPR(gpr),
Some(X64Register::XMM(xmm)) => Location::XMM(xmm),
None => {
@ -8965,8 +8971,9 @@ pub fn gen_std_dynamic_import_trampoline(
}
}
match calling_convention {
CallingConvention::WindowsFastcall => {
// Load target address.
#[cfg(target_os = "windows")]
a.emit_mov(
Size::S64,
Location::Memory(
@ -8975,7 +8982,15 @@ pub fn gen_std_dynamic_import_trampoline(
),
Location::GPR(GPR::RAX),
);
#[cfg(target_os = "linux")]
// Load values array.
a.emit_lea(
Size::S64,
Location::Memory(GPR::RSP, stack_padding as i32),
Location::GPR(GPR::RDX),
);
}
_ => {
// Load target address.
a.emit_mov(
Size::S64,
Location::Memory(
@ -8984,16 +8999,10 @@ pub fn gen_std_dynamic_import_trampoline(
),
Location::GPR(GPR::RAX),
);
// Load values array.
#[cfg(target_os = "windows")]
a.emit_lea(
Size::S64,
Location::Memory(GPR::RSP, stack_padding as i32),
Location::GPR(GPR::RDX),
);
#[cfg(target_os = "linux")]
a.emit_mov(Size::S64, Location::GPR(GPR::RSP), Location::GPR(GPR::RSI));
}
};
// Call target.
a.emit_call_location(Location::GPR(GPR::RAX));
@ -9029,20 +9038,26 @@ pub fn gen_import_call_trampoline(
vmoffsets: &VMOffsets,
index: FunctionIndex,
sig: &FunctionType,
calling_convention: CallingConvention,
) -> CustomSection {
let mut a = Assembler::new().unwrap();
// TODO: ARM entry trampoline is not emitted.
// Singlepass internally treats all arguments as integers, but the standard Windows calling convention requires
// Singlepass internally treats all arguments as integers
// For the standard Windows calling convention requires
// floating point arguments to be passed in XMM registers for the 4 first arguments only
// That's the only change to do, other arguments are not to be changed
#[cfg(target_os = "windows")]
// For the standard System V calling convention requires
// floating point arguments to be passed in XMM registers.
// Translation is expensive, so only do it if needed.
if sig
.params()
.iter()
.any(|&x| x == Type::F32 || x == Type::F64)
{
match calling_convention {
CallingConvention::WindowsFastcall => {
let mut param_locations: Vec<Location> = vec![];
for i in 0..sig.params().len() {
let loc = match i {
@ -9058,26 +9073,16 @@ pub fn gen_import_call_trampoline(
let mut argalloc = ArgumentRegisterAllocator::default();
for (i, ty) in sig.params().iter().enumerate() {
let prev_loc = param_locations[i];
match argalloc.next(*ty) {
match argalloc.next(*ty, calling_convention) {
Some(X64Register::GPR(_gpr)) => continue,
Some(X64Register::XMM(xmm)) => a.emit_mov(Size::S64, prev_loc, Location::XMM(xmm)),
Some(X64Register::XMM(xmm)) => {
a.emit_mov(Size::S64, prev_loc, Location::XMM(xmm))
}
None => continue,
};
}
}
// Singlepass internally treats all arguments as integers, but the standard System V calling convention requires
// floating point arguments to be passed in XMM registers.
//
// FIXME: This is only a workaround. We should fix singlepass to use the standard CC.
// Translation is expensive, so only do it if needed.
#[cfg(not(target_os = "windows"))]
if sig
.params()
.iter()
.any(|&x| x == Type::F32 || x == Type::F64)
{
_ => {
let mut param_locations: Vec<Location> = vec![];
// Allocate stack space for arguments.
@ -9098,7 +9103,8 @@ pub fn gen_import_call_trampoline(
for i in 0..sig.params().len() {
let loc = match i {
0..=4 => {
static PARAM_REGS: &[GPR] = &[GPR::RSI, GPR::RDX, GPR::RCX, GPR::R8, GPR::R9];
static PARAM_REGS: &[GPR] =
&[GPR::RSI, GPR::RDX, GPR::RCX, GPR::R8, GPR::R9];
let loc = Location::Memory(GPR::RSP, (i * 8) as i32);
a.emit_mov(Size::S64, Location::GPR(PARAM_REGS[i]), loc);
loc
@ -9110,11 +9116,11 @@ pub fn gen_import_call_trampoline(
// Copy arguments.
let mut argalloc = ArgumentRegisterAllocator::default();
argalloc.next(Type::I64).unwrap(); // skip VMContext
argalloc.next(Type::I64, calling_convention).unwrap(); // skip VMContext
let mut caller_stack_offset: i32 = 0;
for (i, ty) in sig.params().iter().enumerate() {
let prev_loc = param_locations[i];
let target = match argalloc.next(*ty) {
let targ = match argalloc.next(*ty, calling_convention) {
Some(X64Register::GPR(gpr)) => Location::GPR(gpr),
Some(X64Register::XMM(xmm)) => Location::XMM(xmm),
None => {
@ -9133,7 +9139,7 @@ pub fn gen_import_call_trampoline(
continue;
}
};
a.emit_mov(Size::S64, prev_loc, target);
a.emit_mov(Size::S64, prev_loc, targ);
}
// Restore stack pointer.
@ -9145,14 +9151,16 @@ pub fn gen_import_call_trampoline(
);
}
}
}
}
// Emits a tail call trampoline that loads the address of the target import function
// from Ctx and jumps to it.
let offset = vmoffsets.vmctx_vmfunction_import(index);
#[cfg(target_os = "windows")]
{
match calling_convention {
CallingConvention::WindowsFastcall => {
a.emit_mov(
Size::S64,
Location::Memory(GPR::RCX, offset as i32), // function pointer
@ -9164,8 +9172,7 @@ pub fn gen_import_call_trampoline(
Location::GPR(GPR::RCX),
);
}
#[cfg(not(target_os = "windows"))]
{
_ => {
a.emit_mov(
Size::S64,
Location::Memory(GPR::RDI, offset as i32), // function pointer
@ -9177,6 +9184,7 @@ pub fn gen_import_call_trampoline(
Location::GPR(GPR::RDI),
);
}
}
a.emit_host_redirection(GPR::RAX);
let section_body = SectionBody::new_with_vec(a.finalize().unwrap().to_vec());

View File

@ -12,10 +12,10 @@ use loupe::MemoryUsage;
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
use std::sync::Arc;
use wasmer_compiler::{
Architecture, Compilation, CompileError, CompileModuleInfo, CompiledFunction, Compiler,
CompilerConfig, FunctionBinaryReader, FunctionBody, FunctionBodyData, MiddlewareBinaryReader,
ModuleMiddleware, ModuleMiddlewareChain, ModuleTranslationState, OperatingSystem, SectionIndex,
Target, TrapInformation,
Architecture, CallingConvention, Compilation, CompileError, CompileModuleInfo,
CompiledFunction, Compiler, CompilerConfig, FunctionBinaryReader, FunctionBody,
FunctionBodyData, MiddlewareBinaryReader, ModuleMiddleware, ModuleMiddlewareChain,
ModuleTranslationState, OperatingSystem, SectionIndex, Target, TrapInformation,
};
use wasmer_types::entity::{EntityRef, PrimaryMap};
use wasmer_types::{
@ -68,6 +68,13 @@ impl Compiler for SinglepassCompiler {
if compile_info.features.multi_value {
return Err(CompileError::UnsupportedFeature("multivalue".to_string()));
}
let calling_convention = match target.triple().default_calling_convention() {
Ok(CallingConvention::WindowsFastcall) => CallingConvention::WindowsFastcall,
Ok(CallingConvention::SystemV) => CallingConvention::SystemV,
//Ok(CallingConvention::AppleAarch64) => AppleAarch64,
_ => panic!("Unsupported Calling convention for Singlepass compiler"),
};
let memory_styles = &compile_info.memory_styles;
let table_styles = &compile_info.table_styles;
let vmoffsets = VMOffsets::new(8, &compile_info.module);
@ -77,7 +84,12 @@ impl Compiler for SinglepassCompiler {
.collect::<Vec<_>>()
.into_par_iter_if_rayon()
.map(|i| {
gen_import_call_trampoline(&vmoffsets, i, &module.signatures[module.functions[i]])
gen_import_call_trampoline(
&vmoffsets,
i,
&module.signatures[module.functions[i]],
calling_convention,
)
})
.collect::<Vec<_>>()
.into_iter()
@ -133,7 +145,7 @@ impl Compiler for SinglepassCompiler {
.values()
.collect::<Vec<_>>()
.into_par_iter_if_rayon()
.map(gen_std_trampoline)
.map(|func_type| gen_std_trampoline(&func_type, calling_convention))
.collect::<Vec<_>>()
.into_iter()
.collect::<PrimaryMap<_, _>>();
@ -142,7 +154,9 @@ impl Compiler for SinglepassCompiler {
.imported_function_types()
.collect::<Vec<_>>()
.into_par_iter_if_rayon()
.map(|func_type| gen_std_dynamic_import_trampoline(&vmoffsets, &func_type))
.map(|func_type| {
gen_std_dynamic_import_trampoline(&vmoffsets, &func_type, calling_convention)
})
.collect::<Vec<_>>()
.into_iter()
.collect::<PrimaryMap<FunctionIndex, FunctionBody>>();

View File

@ -4,7 +4,9 @@
use crate::compiler::SinglepassCompiler;
use loupe::MemoryUsage;
use std::sync::Arc;
use wasmer_compiler::{Compiler, CompilerConfig, CpuFeature, ModuleMiddleware, Target};
use wasmer_compiler::{
CallingConvention, Compiler, CompilerConfig, CpuFeature, ModuleMiddleware, Target,
};
use wasmer_types::Features;
#[derive(Debug, Clone, MemoryUsage)]
@ -13,6 +15,8 @@ pub struct Singlepass {
pub(crate) enable_stack_check: bool,
/// The middleware chain.
pub(crate) middlewares: Vec<Arc<dyn ModuleMiddleware>>,
#[loupe(skip)]
pub(crate) calling_convention: CallingConvention,
}
impl Singlepass {
@ -23,6 +27,12 @@ impl Singlepass {
enable_nan_canonicalization: true,
enable_stack_check: false,
middlewares: vec![],
calling_convention: match Target::default().triple().default_calling_convention() {
Ok(CallingConvention::WindowsFastcall) => CallingConvention::WindowsFastcall,
Ok(CallingConvention::SystemV) => CallingConvention::SystemV,
//Ok(CallingConvention::AppleAarch64) => AppleAarch64,
_ => panic!("Unsupported Calling convention for Singlepass"),
},
}
}

View File

@ -6,6 +6,7 @@ use smallvec::SmallVec;
use std::cmp;
use std::collections::HashSet;
use wasmer_compiler::wasmparser::Type as WpType;
use wasmer_compiler::{CallingConvention, Target};
const NATIVE_PAGE_SIZE: usize = 4096;
@ -330,6 +331,7 @@ impl Machine {
a: &mut E,
n: usize,
n_params: usize,
calling_convention: CallingConvention,
) -> Vec<Location> {
// Determine whether a local should be allocated on the stack.
fn is_local_on_stack(idx: usize) -> bool {
@ -432,7 +434,7 @@ impl Machine {
// Locals are allocated on the stack from higher address to lower address,
// so we won't skip the stack guard page here.
for i in 0..n_params {
let loc = Self::get_param_location(i + 1);
let loc = Self::get_param_location(i + 1, calling_convention);
match loc {
Location::GPR(_) => {
a.emit_mov(Size::S64, loc, locations[i]);
@ -454,7 +456,7 @@ impl Machine {
// Load vmctx into R15.
a.emit_mov(
Size::S64,
Self::get_param_location(0),
Self::get_param_location(0, calling_convention),
Location::GPR(GPR::R15),
);
@ -521,19 +523,16 @@ impl Machine {
}
}
#[cfg(target_os = "windows")]
pub fn get_param_location(idx: usize) -> Location {
match idx {
pub fn get_param_location(idx: usize, calling_convention: CallingConvention) -> Location {
match calling_convention {
CallingConvention::WindowsFastcall => match idx {
0 => Location::GPR(GPR::RCX),
1 => Location::GPR(GPR::RDX),
2 => Location::GPR(GPR::R8),
3 => Location::GPR(GPR::R9),
_ => Location::Memory(GPR::RBP, (16 + 32 + (idx - 4) * 8) as i32),
}
}
#[cfg(not(target_os = "windows"))]
pub fn get_param_location(idx: usize) -> Location {
match idx {
},
_ => match idx {
0 => Location::GPR(GPR::RDI),
1 => Location::GPR(GPR::RSI),
2 => Location::GPR(GPR::RDX),
@ -541,6 +540,7 @@ impl Machine {
4 => Location::GPR(GPR::R8),
5 => Location::GPR(GPR::R9),
_ => Location::Memory(GPR::RBP, (16 + (idx - 6) * 8) as i32),
},
}
}
}

View File

@ -2,6 +2,7 @@
use crate::common_decl::{MachineState, MachineValue, RegisterIndex};
use std::collections::BTreeMap;
use wasmer_compiler::{CallingConvention, Target};
use wasmer_types::Type;
/// General-purpose registers.
@ -170,8 +171,9 @@ pub struct ArgumentRegisterAllocator {
impl ArgumentRegisterAllocator {
/// Allocates a register for argument type `ty`. Returns `None` if no register is available for this type.
#[cfg(target_os = "windows")]
pub fn next(&mut self, ty: Type) -> Option<X64Register> {
pub fn next(&mut self, ty: Type, calling_convention: CallingConvention) -> Option<X64Register> {
match calling_convention {
CallingConvention::WindowsFastcall => {
static GPR_SEQ: &'static [GPR] = &[GPR::RCX, GPR::RDX, GPR::R8, GPR::R9];
static XMM_SEQ: &'static [XMM] = &[XMM::XMM0, XMM::XMM1, XMM::XMM2, XMM::XMM3];
let idx = self.n_gprs + self.n_xmms;
@ -200,8 +202,7 @@ impl ArgumentRegisterAllocator {
),
}
}
#[cfg(not(target_os = "windows"))]
pub fn next(&mut self, ty: Type) -> Option<X64Register> {
_ => {
static GPR_SEQ: &'static [GPR] =
&[GPR::RDI, GPR::RSI, GPR::RDX, GPR::RCX, GPR::R8, GPR::R9];
static XMM_SEQ: &'static [XMM] = &[
@ -239,6 +240,8 @@ impl ArgumentRegisterAllocator {
),
}
}
}
}
}
/// Create a new `MachineState` with default values.