feat(compiler) Make CallingConvention a paramter in SinglePass

This commit is contained in:
ptitSeb
2021-09-16 11:03:39 +02:00
parent 7b27a81221
commit 2e5dae0019
5 changed files with 306 additions and 271 deletions

View File

@ -8,9 +8,9 @@ use wasmer_compiler::wasmparser::{
MemoryImmediate, Operator, Type as WpType, TypeOrFuncType as WpTypeOrFuncType, MemoryImmediate, Operator, Type as WpType, TypeOrFuncType as WpTypeOrFuncType,
}; };
use wasmer_compiler::{ use wasmer_compiler::{
CompiledFunction, CompiledFunctionFrameInfo, CustomSection, CustomSectionProtection, CallingConvention, CompiledFunction, CompiledFunctionFrameInfo, CustomSection,
FunctionBody, FunctionBodyData, InstructionAddressMap, Relocation, RelocationKind, CustomSectionProtection, FunctionBody, FunctionBodyData, InstructionAddressMap, Relocation,
RelocationTarget, SectionBody, SectionIndex, SourceLoc, TrapInformation, RelocationKind, RelocationTarget, SectionBody, SectionIndex, SourceLoc, TrapInformation,
}; };
use wasmer_types::{ use wasmer_types::{
entity::{EntityRef, PrimaryMap, SecondaryMap}, entity::{EntityRef, PrimaryMap, SecondaryMap},
@ -1010,17 +1010,18 @@ impl<'a> FuncGen<'a> {
self.machine.state.stack_values.push(content); self.machine.state.stack_values.push(content);
} }
} }
let calling_convention = self.config.calling_convention;
#[cfg(target_os = "windows")] let stack_padding: usize = match calling_convention {
let stack_padding: usize = 32; CallingConvention::WindowsFastcall => 32,
#[cfg(not(target_os = "windows"))] _ => 0,
let stack_padding: usize = 0; };
let mut stack_offset: usize = 0; let mut stack_offset: usize = 0;
// Calculate stack offset. // Calculate stack offset.
for (i, _param) in params.iter().enumerate() { for (i, _param) in params.iter().enumerate() {
if let Location::Memory(_, _) = Machine::get_param_location(1 + i) { if let Location::Memory(_, _) = Machine::get_param_location(1 + i, calling_convention) {
stack_offset += 8; stack_offset += 8;
} }
} }
@ -1043,10 +1044,9 @@ impl<'a> FuncGen<'a> {
} }
let mut call_movs: Vec<(Location, GPR)> = vec![]; let mut call_movs: Vec<(Location, GPR)> = vec![];
// Prepare register & stack parameters. // Prepare register & stack parameters.
for (i, param) in params.iter().enumerate().rev() { for (i, param) in params.iter().enumerate().rev() {
let loc = Machine::get_param_location(1 + i); let loc = Machine::get_param_location(1 + i, calling_convention);
match loc { match loc {
Location::GPR(x) => { Location::GPR(x) => {
call_movs.push((*param, x)); call_movs.push((*param, x));
@ -1144,7 +1144,7 @@ impl<'a> FuncGen<'a> {
self.assembler.emit_mov( self.assembler.emit_mov(
Size::S64, Size::S64,
Location::GPR(Machine::get_vmctx_reg()), Location::GPR(Machine::get_vmctx_reg()),
Machine::get_param_location(0), Machine::get_param_location(0, calling_convention),
); // vmctx ); // vmctx
if (self.machine.state.stack_values.len() % 2) != 1 { if (self.machine.state.stack_values.len() % 2) != 1 {
@ -1756,6 +1756,7 @@ impl<'a> FuncGen<'a> {
&mut self.assembler, &mut self.assembler,
self.local_types.len(), self.local_types.len(),
self.signature.params().len(), self.signature.params().len(),
self.config.calling_convention,
); );
// Mark vmctx register. The actual loading of the vmctx value is handled by init_local. // Mark vmctx register. The actual loading of the vmctx value is handled by init_local.
@ -5403,6 +5404,7 @@ impl<'a> FuncGen<'a> {
self.vmoffsets.vmcaller_checked_anyfunc_func_ptr() as usize; self.vmoffsets.vmcaller_checked_anyfunc_func_ptr() as usize;
let vmcaller_checked_anyfunc_vmctx = let vmcaller_checked_anyfunc_vmctx =
self.vmoffsets.vmcaller_checked_anyfunc_vmctx() as usize; self.vmoffsets.vmcaller_checked_anyfunc_vmctx() as usize;
let calling_convention = self.config.calling_convention;
self.emit_call_native( self.emit_call_native(
|this| { |this| {
@ -5423,7 +5425,7 @@ impl<'a> FuncGen<'a> {
this.assembler.emit_mov( this.assembler.emit_mov(
Size::S64, Size::S64,
Location::Memory(GPR::RAX, vmcaller_checked_anyfunc_vmctx as i32), Location::Memory(GPR::RAX, vmcaller_checked_anyfunc_vmctx as i32),
Machine::get_param_location(0), Machine::get_param_location(0, calling_convention),
); );
this.assembler.emit_call_location(Location::Memory( this.assembler.emit_call_location(Location::Memory(
@ -8802,20 +8804,23 @@ fn sort_call_movs(movs: &mut [(Location, GPR)]) {
} }
// Standard entry trampoline. // Standard entry trampoline.
pub fn gen_std_trampoline(sig: &FunctionType) -> FunctionBody { pub fn gen_std_trampoline(
sig: &FunctionType,
calling_convention: CallingConvention,
) -> FunctionBody {
let mut a = Assembler::new().unwrap(); let mut a = Assembler::new().unwrap();
// Calculate stack offset. // Calculate stack offset.
let mut stack_offset: u32 = 0; let mut stack_offset: u32 = 0;
for (i, _param) in sig.params().iter().enumerate() { for (i, _param) in sig.params().iter().enumerate() {
if let Location::Memory(_, _) = Machine::get_param_location(1 + i) { if let Location::Memory(_, _) = Machine::get_param_location(1 + i, calling_convention) {
stack_offset += 8; stack_offset += 8;
} }
} }
#[cfg(target_os = "windows")] let stack_padding: u32 = match calling_convention {
let stack_padding: u32 = 32; CallingConvention::WindowsFastcall => 32,
#[cfg(not(target_os = "windows"))] _ => 0,
let stack_padding: u32 = 0; };
// Align to 16 bytes. We push two 8-byte registers below, so here we need to ensure stack_offset % 16 == 8. // Align to 16 bytes. We push two 8-byte registers below, so here we need to ensure stack_offset % 16 == 8.
if stack_offset % 16 != 8 { if stack_offset % 16 != 8 {
@ -8836,12 +8841,12 @@ pub fn gen_std_trampoline(sig: &FunctionType) -> FunctionBody {
// Arguments // Arguments
a.emit_mov( a.emit_mov(
Size::S64, Size::S64,
Machine::get_param_location(1), Machine::get_param_location(1, calling_convention),
Location::GPR(GPR::R15), Location::GPR(GPR::R15),
); // func_ptr ); // func_ptr
a.emit_mov( a.emit_mov(
Size::S64, Size::S64,
Machine::get_param_location(2), Machine::get_param_location(2, calling_convention),
Location::GPR(GPR::R14), Location::GPR(GPR::R14),
); // args_rets ); // args_rets
@ -8851,7 +8856,7 @@ pub fn gen_std_trampoline(sig: &FunctionType) -> FunctionBody {
let mut n_stack_args: usize = 0; let mut n_stack_args: usize = 0;
for (i, _param) in sig.params().iter().enumerate() { for (i, _param) in sig.params().iter().enumerate() {
let src_loc = Location::Memory(GPR::R14, (i * 16) as _); // args_rets[i] let src_loc = Location::Memory(GPR::R14, (i * 16) as _); // args_rets[i]
let dst_loc = Machine::get_param_location(1 + i); let dst_loc = Machine::get_param_location(1 + i, calling_convention);
match dst_loc { match dst_loc {
Location::GPR(_) => { Location::GPR(_) => {
@ -8911,15 +8916,16 @@ pub fn gen_std_trampoline(sig: &FunctionType) -> FunctionBody {
pub fn gen_std_dynamic_import_trampoline( pub fn gen_std_dynamic_import_trampoline(
vmoffsets: &VMOffsets, vmoffsets: &VMOffsets,
sig: &FunctionType, sig: &FunctionType,
calling_convention: CallingConvention,
) -> FunctionBody { ) -> FunctionBody {
let mut a = Assembler::new().unwrap(); let mut a = Assembler::new().unwrap();
// Allocate argument array. // Allocate argument array.
let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()) + 8; // 16 bytes each + 8 bytes sysv call padding let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()) + 8; // 16 bytes each + 8 bytes sysv call padding
#[cfg(target_os = "windows")] let stack_padding: usize = match calling_convention {
let stack_padding: usize = 32; CallingConvention::WindowsFastcall => 32,
#[cfg(not(target_os = "windows"))] _ => 0,
let stack_padding: usize = 0; };
a.emit_sub( a.emit_sub(
Size::S64, Size::S64,
Location::Imm32((stack_offset + stack_padding) as _), Location::Imm32((stack_offset + stack_padding) as _),
@ -8929,12 +8935,12 @@ pub fn gen_std_dynamic_import_trampoline(
// Copy arguments. // Copy arguments.
if !sig.params().is_empty() { if !sig.params().is_empty() {
let mut argalloc = ArgumentRegisterAllocator::default(); let mut argalloc = ArgumentRegisterAllocator::default();
argalloc.next(Type::I64).unwrap(); // skip VMContext argalloc.next(Type::I64, calling_convention).unwrap(); // skip VMContext
let mut stack_param_count: usize = 0; let mut stack_param_count: usize = 0;
for (i, ty) in sig.params().iter().enumerate() { for (i, ty) in sig.params().iter().enumerate() {
let source_loc = match argalloc.next(*ty) { let source_loc = match argalloc.next(*ty, calling_convention) {
Some(X64Register::GPR(gpr)) => Location::GPR(gpr), Some(X64Register::GPR(gpr)) => Location::GPR(gpr),
Some(X64Register::XMM(xmm)) => Location::XMM(xmm), Some(X64Register::XMM(xmm)) => Location::XMM(xmm),
None => { None => {
@ -8965,35 +8971,38 @@ pub fn gen_std_dynamic_import_trampoline(
} }
} }
// Load target address. match calling_convention {
#[cfg(target_os = "windows")] CallingConvention::WindowsFastcall => {
a.emit_mov( // Load target address.
Size::S64, a.emit_mov(
Location::Memory( Size::S64,
GPR::RCX, Location::Memory(
vmoffsets.vmdynamicfunction_import_context_address() as i32, GPR::RCX,
), vmoffsets.vmdynamicfunction_import_context_address() as i32,
Location::GPR(GPR::RAX), ),
); Location::GPR(GPR::RAX),
#[cfg(target_os = "linux")] );
a.emit_mov( // Load values array.
Size::S64, a.emit_lea(
Location::Memory( Size::S64,
GPR::RDI, Location::Memory(GPR::RSP, stack_padding as i32),
vmoffsets.vmdynamicfunction_import_context_address() as i32, Location::GPR(GPR::RDX),
), );
Location::GPR(GPR::RAX), }
); _ => {
// Load target address.
// Load values array. a.emit_mov(
#[cfg(target_os = "windows")] Size::S64,
a.emit_lea( Location::Memory(
Size::S64, GPR::RDI,
Location::Memory(GPR::RSP, stack_padding as i32), vmoffsets.vmdynamicfunction_import_context_address() as i32,
Location::GPR(GPR::RDX), ),
); Location::GPR(GPR::RAX),
#[cfg(target_os = "linux")] );
a.emit_mov(Size::S64, Location::GPR(GPR::RSP), Location::GPR(GPR::RSI)); // Load values array.
a.emit_mov(Size::S64, Location::GPR(GPR::RSP), Location::GPR(GPR::RSI));
}
};
// Call target. // Call target.
a.emit_call_location(Location::GPR(GPR::RAX)); a.emit_call_location(Location::GPR(GPR::RAX));
@ -9029,120 +9038,119 @@ pub fn gen_import_call_trampoline(
vmoffsets: &VMOffsets, vmoffsets: &VMOffsets,
index: FunctionIndex, index: FunctionIndex,
sig: &FunctionType, sig: &FunctionType,
calling_convention: CallingConvention,
) -> CustomSection { ) -> CustomSection {
let mut a = Assembler::new().unwrap(); let mut a = Assembler::new().unwrap();
// TODO: ARM entry trampoline is not emitted. // TODO: ARM entry trampoline is not emitted.
// Singlepass internally treats all arguments as integers, but the standard Windows calling convention requires // Singlepass internally treats all arguments as integers
// floating point arguments to be passed in XMM registers for the 4 first arguments only // For the standard Windows calling convention requires
// That's the only change to do, other arguments are not to be changed // floating point arguments to be passed in XMM registers for the 4 first arguments only
#[cfg(target_os = "windows")] // That's the only change to do, other arguments are not to be changed
// For the standard System V calling convention requires
// floating point arguments to be passed in XMM registers.
// Translation is expensive, so only do it if needed.
if sig if sig
.params() .params()
.iter() .iter()
.any(|&x| x == Type::F32 || x == Type::F64) .any(|&x| x == Type::F32 || x == Type::F64)
{ {
let mut param_locations: Vec<Location> = vec![]; match calling_convention {
for i in 0..sig.params().len() { CallingConvention::WindowsFastcall => {
let loc = match i { let mut param_locations: Vec<Location> = vec![];
0..=2 => { for i in 0..sig.params().len() {
static PARAM_REGS: &[GPR] = &[GPR::RDX, GPR::R8, GPR::R9]; let loc = match i {
Location::GPR(PARAM_REGS[i]) 0..=2 => {
static PARAM_REGS: &[GPR] = &[GPR::RDX, GPR::R8, GPR::R9];
Location::GPR(PARAM_REGS[i])
}
_ => Location::Memory(GPR::RSP, 32 + 8 + ((i - 3) * 8) as i32), // will not be used anyway
};
param_locations.push(loc);
} }
_ => Location::Memory(GPR::RSP, 32 + 8 + ((i - 3) * 8) as i32), // will not be used anyway // Copy Float arguments to XMM from GPR.
}; let mut argalloc = ArgumentRegisterAllocator::default();
param_locations.push(loc); for (i, ty) in sig.params().iter().enumerate() {
} let prev_loc = param_locations[i];
// Copy Float arguments to XMM from GPR. match argalloc.next(*ty, calling_convention) {
let mut argalloc = ArgumentRegisterAllocator::default(); Some(X64Register::GPR(_gpr)) => continue,
for (i, ty) in sig.params().iter().enumerate() { Some(X64Register::XMM(xmm)) => {
let prev_loc = param_locations[i]; a.emit_mov(Size::S64, prev_loc, Location::XMM(xmm))
match argalloc.next(*ty) { }
Some(X64Register::GPR(_gpr)) => continue, None => continue,
Some(X64Register::XMM(xmm)) => a.emit_mov(Size::S64, prev_loc, Location::XMM(xmm)), };
None => continue,
};
}
}
// Singlepass internally treats all arguments as integers, but the standard System V calling convention requires
// floating point arguments to be passed in XMM registers.
//
// FIXME: This is only a workaround. We should fix singlepass to use the standard CC.
// Translation is expensive, so only do it if needed.
#[cfg(not(target_os = "windows"))]
if sig
.params()
.iter()
.any(|&x| x == Type::F32 || x == Type::F64)
{
let mut param_locations: Vec<Location> = vec![];
// Allocate stack space for arguments.
let stack_offset: i32 = if sig.params().len() > 5 {
5 * 8
} else {
(sig.params().len() as i32) * 8
};
if stack_offset > 0 {
a.emit_sub(
Size::S64,
Location::Imm32(stack_offset as u32),
Location::GPR(GPR::RSP),
);
}
// Store all arguments to the stack to prevent overwrite.
for i in 0..sig.params().len() {
let loc = match i {
0..=4 => {
static PARAM_REGS: &[GPR] = &[GPR::RSI, GPR::RDX, GPR::RCX, GPR::R8, GPR::R9];
let loc = Location::Memory(GPR::RSP, (i * 8) as i32);
a.emit_mov(Size::S64, Location::GPR(PARAM_REGS[i]), loc);
loc
} }
_ => Location::Memory(GPR::RSP, stack_offset + 8 + ((i - 5) * 8) as i32), }
}; _ => {
param_locations.push(loc); let mut param_locations: Vec<Location> = vec![];
}
// Copy arguments. // Allocate stack space for arguments.
let mut argalloc = ArgumentRegisterAllocator::default(); let stack_offset: i32 = if sig.params().len() > 5 {
argalloc.next(Type::I64).unwrap(); // skip VMContext 5 * 8
let mut caller_stack_offset: i32 = 0; } else {
for (i, ty) in sig.params().iter().enumerate() { (sig.params().len() as i32) * 8
let prev_loc = param_locations[i]; };
let target = match argalloc.next(*ty) { if stack_offset > 0 {
Some(X64Register::GPR(gpr)) => Location::GPR(gpr), a.emit_sub(
Some(X64Register::XMM(xmm)) => Location::XMM(xmm),
None => {
// No register can be allocated. Put this argument on the stack.
//
// Since here we never use fewer registers than by the original call, on the caller's frame
// we always have enough space to store the rearranged arguments, and the copy "backward" between different
// slots in the caller argument region will always work.
a.emit_mov(Size::S64, prev_loc, Location::GPR(GPR::RAX));
a.emit_mov(
Size::S64, Size::S64,
Location::GPR(GPR::RAX), Location::Imm32(stack_offset as u32),
Location::Memory(GPR::RSP, stack_offset + 8 + caller_stack_offset), Location::GPR(GPR::RSP),
); );
caller_stack_offset += 8;
continue;
} }
};
a.emit_mov(Size::S64, prev_loc, target);
}
// Restore stack pointer. // Store all arguments to the stack to prevent overwrite.
if stack_offset > 0 { for i in 0..sig.params().len() {
a.emit_add( let loc = match i {
Size::S64, 0..=4 => {
Location::Imm32(stack_offset as u32), static PARAM_REGS: &[GPR] =
Location::GPR(GPR::RSP), &[GPR::RSI, GPR::RDX, GPR::RCX, GPR::R8, GPR::R9];
); let loc = Location::Memory(GPR::RSP, (i * 8) as i32);
a.emit_mov(Size::S64, Location::GPR(PARAM_REGS[i]), loc);
loc
}
_ => Location::Memory(GPR::RSP, stack_offset + 8 + ((i - 5) * 8) as i32),
};
param_locations.push(loc);
}
// Copy arguments.
let mut argalloc = ArgumentRegisterAllocator::default();
argalloc.next(Type::I64, calling_convention).unwrap(); // skip VMContext
let mut caller_stack_offset: i32 = 0;
for (i, ty) in sig.params().iter().enumerate() {
let prev_loc = param_locations[i];
let targ = match argalloc.next(*ty, calling_convention) {
Some(X64Register::GPR(gpr)) => Location::GPR(gpr),
Some(X64Register::XMM(xmm)) => Location::XMM(xmm),
None => {
// No register can be allocated. Put this argument on the stack.
//
// Since here we never use fewer registers than by the original call, on the caller's frame
// we always have enough space to store the rearranged arguments, and the copy "backward" between different
// slots in the caller argument region will always work.
a.emit_mov(Size::S64, prev_loc, Location::GPR(GPR::RAX));
a.emit_mov(
Size::S64,
Location::GPR(GPR::RAX),
Location::Memory(GPR::RSP, stack_offset + 8 + caller_stack_offset),
);
caller_stack_offset += 8;
continue;
}
};
a.emit_mov(Size::S64, prev_loc, targ);
}
// Restore stack pointer.
if stack_offset > 0 {
a.emit_add(
Size::S64,
Location::Imm32(stack_offset as u32),
Location::GPR(GPR::RSP),
);
}
}
} }
} }
@ -9151,31 +9159,31 @@ pub fn gen_import_call_trampoline(
let offset = vmoffsets.vmctx_vmfunction_import(index); let offset = vmoffsets.vmctx_vmfunction_import(index);
#[cfg(target_os = "windows")] match calling_convention {
{ CallingConvention::WindowsFastcall => {
a.emit_mov( a.emit_mov(
Size::S64, Size::S64,
Location::Memory(GPR::RCX, offset as i32), // function pointer Location::Memory(GPR::RCX, offset as i32), // function pointer
Location::GPR(GPR::RAX), Location::GPR(GPR::RAX),
); );
a.emit_mov( a.emit_mov(
Size::S64, Size::S64,
Location::Memory(GPR::RCX, offset as i32 + 8), // target vmctx Location::Memory(GPR::RCX, offset as i32 + 8), // target vmctx
Location::GPR(GPR::RCX), Location::GPR(GPR::RCX),
); );
} }
#[cfg(not(target_os = "windows"))] _ => {
{ a.emit_mov(
a.emit_mov( Size::S64,
Size::S64, Location::Memory(GPR::RDI, offset as i32), // function pointer
Location::Memory(GPR::RDI, offset as i32), // function pointer Location::GPR(GPR::RAX),
Location::GPR(GPR::RAX), );
); a.emit_mov(
a.emit_mov( Size::S64,
Size::S64, Location::Memory(GPR::RDI, offset as i32 + 8), // target vmctx
Location::Memory(GPR::RDI, offset as i32 + 8), // target vmctx Location::GPR(GPR::RDI),
Location::GPR(GPR::RDI), );
); }
} }
a.emit_host_redirection(GPR::RAX); a.emit_host_redirection(GPR::RAX);

View File

@ -12,10 +12,10 @@ use loupe::MemoryUsage;
use rayon::prelude::{IntoParallelIterator, ParallelIterator}; use rayon::prelude::{IntoParallelIterator, ParallelIterator};
use std::sync::Arc; use std::sync::Arc;
use wasmer_compiler::{ use wasmer_compiler::{
Architecture, Compilation, CompileError, CompileModuleInfo, CompiledFunction, Compiler, Architecture, CallingConvention, Compilation, CompileError, CompileModuleInfo,
CompilerConfig, FunctionBinaryReader, FunctionBody, FunctionBodyData, MiddlewareBinaryReader, CompiledFunction, Compiler, CompilerConfig, FunctionBinaryReader, FunctionBody,
ModuleMiddleware, ModuleMiddlewareChain, ModuleTranslationState, OperatingSystem, SectionIndex, FunctionBodyData, MiddlewareBinaryReader, ModuleMiddleware, ModuleMiddlewareChain,
Target, TrapInformation, ModuleTranslationState, OperatingSystem, SectionIndex, Target, TrapInformation,
}; };
use wasmer_types::entity::{EntityRef, PrimaryMap}; use wasmer_types::entity::{EntityRef, PrimaryMap};
use wasmer_types::{ use wasmer_types::{
@ -68,6 +68,13 @@ impl Compiler for SinglepassCompiler {
if compile_info.features.multi_value { if compile_info.features.multi_value {
return Err(CompileError::UnsupportedFeature("multivalue".to_string())); return Err(CompileError::UnsupportedFeature("multivalue".to_string()));
} }
let calling_convention = match target.triple().default_calling_convention() {
Ok(CallingConvention::WindowsFastcall) => CallingConvention::WindowsFastcall,
Ok(CallingConvention::SystemV) => CallingConvention::SystemV,
//Ok(CallingConvention::AppleAarch64) => AppleAarch64,
_ => panic!("Unsupported Calling convention for Singlepass compiler"),
};
let memory_styles = &compile_info.memory_styles; let memory_styles = &compile_info.memory_styles;
let table_styles = &compile_info.table_styles; let table_styles = &compile_info.table_styles;
let vmoffsets = VMOffsets::new(8, &compile_info.module); let vmoffsets = VMOffsets::new(8, &compile_info.module);
@ -77,7 +84,12 @@ impl Compiler for SinglepassCompiler {
.collect::<Vec<_>>() .collect::<Vec<_>>()
.into_par_iter_if_rayon() .into_par_iter_if_rayon()
.map(|i| { .map(|i| {
gen_import_call_trampoline(&vmoffsets, i, &module.signatures[module.functions[i]]) gen_import_call_trampoline(
&vmoffsets,
i,
&module.signatures[module.functions[i]],
calling_convention,
)
}) })
.collect::<Vec<_>>() .collect::<Vec<_>>()
.into_iter() .into_iter()
@ -133,7 +145,7 @@ impl Compiler for SinglepassCompiler {
.values() .values()
.collect::<Vec<_>>() .collect::<Vec<_>>()
.into_par_iter_if_rayon() .into_par_iter_if_rayon()
.map(gen_std_trampoline) .map(|func_type| gen_std_trampoline(&func_type, calling_convention))
.collect::<Vec<_>>() .collect::<Vec<_>>()
.into_iter() .into_iter()
.collect::<PrimaryMap<_, _>>(); .collect::<PrimaryMap<_, _>>();
@ -142,7 +154,9 @@ impl Compiler for SinglepassCompiler {
.imported_function_types() .imported_function_types()
.collect::<Vec<_>>() .collect::<Vec<_>>()
.into_par_iter_if_rayon() .into_par_iter_if_rayon()
.map(|func_type| gen_std_dynamic_import_trampoline(&vmoffsets, &func_type)) .map(|func_type| {
gen_std_dynamic_import_trampoline(&vmoffsets, &func_type, calling_convention)
})
.collect::<Vec<_>>() .collect::<Vec<_>>()
.into_iter() .into_iter()
.collect::<PrimaryMap<FunctionIndex, FunctionBody>>(); .collect::<PrimaryMap<FunctionIndex, FunctionBody>>();

View File

@ -4,7 +4,9 @@
use crate::compiler::SinglepassCompiler; use crate::compiler::SinglepassCompiler;
use loupe::MemoryUsage; use loupe::MemoryUsage;
use std::sync::Arc; use std::sync::Arc;
use wasmer_compiler::{Compiler, CompilerConfig, CpuFeature, ModuleMiddleware, Target}; use wasmer_compiler::{
CallingConvention, Compiler, CompilerConfig, CpuFeature, ModuleMiddleware, Target,
};
use wasmer_types::Features; use wasmer_types::Features;
#[derive(Debug, Clone, MemoryUsage)] #[derive(Debug, Clone, MemoryUsage)]
@ -13,6 +15,8 @@ pub struct Singlepass {
pub(crate) enable_stack_check: bool, pub(crate) enable_stack_check: bool,
/// The middleware chain. /// The middleware chain.
pub(crate) middlewares: Vec<Arc<dyn ModuleMiddleware>>, pub(crate) middlewares: Vec<Arc<dyn ModuleMiddleware>>,
#[loupe(skip)]
pub(crate) calling_convention: CallingConvention,
} }
impl Singlepass { impl Singlepass {
@ -23,6 +27,12 @@ impl Singlepass {
enable_nan_canonicalization: true, enable_nan_canonicalization: true,
enable_stack_check: false, enable_stack_check: false,
middlewares: vec![], middlewares: vec![],
calling_convention: match Target::default().triple().default_calling_convention() {
Ok(CallingConvention::WindowsFastcall) => CallingConvention::WindowsFastcall,
Ok(CallingConvention::SystemV) => CallingConvention::SystemV,
//Ok(CallingConvention::AppleAarch64) => AppleAarch64,
_ => panic!("Unsupported Calling convention for Singlepass"),
},
} }
} }

View File

@ -6,6 +6,7 @@ use smallvec::SmallVec;
use std::cmp; use std::cmp;
use std::collections::HashSet; use std::collections::HashSet;
use wasmer_compiler::wasmparser::Type as WpType; use wasmer_compiler::wasmparser::Type as WpType;
use wasmer_compiler::{CallingConvention, Target};
const NATIVE_PAGE_SIZE: usize = 4096; const NATIVE_PAGE_SIZE: usize = 4096;
@ -330,6 +331,7 @@ impl Machine {
a: &mut E, a: &mut E,
n: usize, n: usize,
n_params: usize, n_params: usize,
calling_convention: CallingConvention,
) -> Vec<Location> { ) -> Vec<Location> {
// Determine whether a local should be allocated on the stack. // Determine whether a local should be allocated on the stack.
fn is_local_on_stack(idx: usize) -> bool { fn is_local_on_stack(idx: usize) -> bool {
@ -432,7 +434,7 @@ impl Machine {
// Locals are allocated on the stack from higher address to lower address, // Locals are allocated on the stack from higher address to lower address,
// so we won't skip the stack guard page here. // so we won't skip the stack guard page here.
for i in 0..n_params { for i in 0..n_params {
let loc = Self::get_param_location(i + 1); let loc = Self::get_param_location(i + 1, calling_convention);
match loc { match loc {
Location::GPR(_) => { Location::GPR(_) => {
a.emit_mov(Size::S64, loc, locations[i]); a.emit_mov(Size::S64, loc, locations[i]);
@ -454,7 +456,7 @@ impl Machine {
// Load vmctx into R15. // Load vmctx into R15.
a.emit_mov( a.emit_mov(
Size::S64, Size::S64,
Self::get_param_location(0), Self::get_param_location(0, calling_convention),
Location::GPR(GPR::R15), Location::GPR(GPR::R15),
); );
@ -521,26 +523,24 @@ impl Machine {
} }
} }
#[cfg(target_os = "windows")] pub fn get_param_location(idx: usize, calling_convention: CallingConvention) -> Location {
pub fn get_param_location(idx: usize) -> Location { match calling_convention {
match idx { CallingConvention::WindowsFastcall => match idx {
0 => Location::GPR(GPR::RCX), 0 => Location::GPR(GPR::RCX),
1 => Location::GPR(GPR::RDX), 1 => Location::GPR(GPR::RDX),
2 => Location::GPR(GPR::R8), 2 => Location::GPR(GPR::R8),
3 => Location::GPR(GPR::R9), 3 => Location::GPR(GPR::R9),
_ => Location::Memory(GPR::RBP, (16 + 32 + (idx - 4) * 8) as i32), _ => Location::Memory(GPR::RBP, (16 + 32 + (idx - 4) * 8) as i32),
} },
} _ => match idx {
#[cfg(not(target_os = "windows"))] 0 => Location::GPR(GPR::RDI),
pub fn get_param_location(idx: usize) -> Location { 1 => Location::GPR(GPR::RSI),
match idx { 2 => Location::GPR(GPR::RDX),
0 => Location::GPR(GPR::RDI), 3 => Location::GPR(GPR::RCX),
1 => Location::GPR(GPR::RSI), 4 => Location::GPR(GPR::R8),
2 => Location::GPR(GPR::RDX), 5 => Location::GPR(GPR::R9),
3 => Location::GPR(GPR::RCX), _ => Location::Memory(GPR::RBP, (16 + (idx - 6) * 8) as i32),
4 => Location::GPR(GPR::R8), },
5 => Location::GPR(GPR::R9),
_ => Location::Memory(GPR::RBP, (16 + (idx - 6) * 8) as i32),
} }
} }
} }

View File

@ -2,6 +2,7 @@
use crate::common_decl::{MachineState, MachineValue, RegisterIndex}; use crate::common_decl::{MachineState, MachineValue, RegisterIndex};
use std::collections::BTreeMap; use std::collections::BTreeMap;
use wasmer_compiler::{CallingConvention, Target};
use wasmer_types::Type; use wasmer_types::Type;
/// General-purpose registers. /// General-purpose registers.
@ -170,73 +171,75 @@ pub struct ArgumentRegisterAllocator {
impl ArgumentRegisterAllocator { impl ArgumentRegisterAllocator {
/// Allocates a register for argument type `ty`. Returns `None` if no register is available for this type. /// Allocates a register for argument type `ty`. Returns `None` if no register is available for this type.
#[cfg(target_os = "windows")] pub fn next(&mut self, ty: Type, calling_convention: CallingConvention) -> Option<X64Register> {
pub fn next(&mut self, ty: Type) -> Option<X64Register> { match calling_convention {
static GPR_SEQ: &'static [GPR] = &[GPR::RCX, GPR::RDX, GPR::R8, GPR::R9]; CallingConvention::WindowsFastcall => {
static XMM_SEQ: &'static [XMM] = &[XMM::XMM0, XMM::XMM1, XMM::XMM2, XMM::XMM3]; static GPR_SEQ: &'static [GPR] = &[GPR::RCX, GPR::RDX, GPR::R8, GPR::R9];
let idx = self.n_gprs + self.n_xmms; static XMM_SEQ: &'static [XMM] = &[XMM::XMM0, XMM::XMM1, XMM::XMM2, XMM::XMM3];
match ty { let idx = self.n_gprs + self.n_xmms;
Type::I32 | Type::I64 => { match ty {
if idx < 4 { Type::I32 | Type::I64 => {
let gpr = GPR_SEQ[idx]; if idx < 4 {
self.n_gprs += 1; let gpr = GPR_SEQ[idx];
Some(X64Register::GPR(gpr)) self.n_gprs += 1;
} else { Some(X64Register::GPR(gpr))
None } else {
None
}
}
Type::F32 | Type::F64 => {
if idx < 4 {
let xmm = XMM_SEQ[idx];
self.n_xmms += 1;
Some(X64Register::XMM(xmm))
} else {
None
}
}
_ => todo!(
"ArgumentRegisterAllocator::next: Unsupported type: {:?}",
ty
),
} }
} }
Type::F32 | Type::F64 => { _ => {
if idx < 4 { static GPR_SEQ: &'static [GPR] =
let xmm = XMM_SEQ[idx]; &[GPR::RDI, GPR::RSI, GPR::RDX, GPR::RCX, GPR::R8, GPR::R9];
self.n_xmms += 1; static XMM_SEQ: &'static [XMM] = &[
Some(X64Register::XMM(xmm)) XMM::XMM0,
} else { XMM::XMM1,
None XMM::XMM2,
XMM::XMM3,
XMM::XMM4,
XMM::XMM5,
XMM::XMM6,
XMM::XMM7,
];
match ty {
Type::I32 | Type::I64 => {
if self.n_gprs < GPR_SEQ.len() {
let gpr = GPR_SEQ[self.n_gprs];
self.n_gprs += 1;
Some(X64Register::GPR(gpr))
} else {
None
}
}
Type::F32 | Type::F64 => {
if self.n_xmms < XMM_SEQ.len() {
let xmm = XMM_SEQ[self.n_xmms];
self.n_xmms += 1;
Some(X64Register::XMM(xmm))
} else {
None
}
}
_ => todo!(
"ArgumentRegisterAllocator::next: Unsupported type: {:?}",
ty
),
} }
} }
_ => todo!(
"ArgumentRegisterAllocator::next: Unsupported type: {:?}",
ty
),
}
}
#[cfg(not(target_os = "windows"))]
pub fn next(&mut self, ty: Type) -> Option<X64Register> {
static GPR_SEQ: &'static [GPR] =
&[GPR::RDI, GPR::RSI, GPR::RDX, GPR::RCX, GPR::R8, GPR::R9];
static XMM_SEQ: &'static [XMM] = &[
XMM::XMM0,
XMM::XMM1,
XMM::XMM2,
XMM::XMM3,
XMM::XMM4,
XMM::XMM5,
XMM::XMM6,
XMM::XMM7,
];
match ty {
Type::I32 | Type::I64 => {
if self.n_gprs < GPR_SEQ.len() {
let gpr = GPR_SEQ[self.n_gprs];
self.n_gprs += 1;
Some(X64Register::GPR(gpr))
} else {
None
}
}
Type::F32 | Type::F64 => {
if self.n_xmms < XMM_SEQ.len() {
let xmm = XMM_SEQ[self.n_xmms];
self.n_xmms += 1;
Some(X64Register::XMM(xmm))
} else {
None
}
}
_ => todo!(
"ArgumentRegisterAllocator::next: Unsupported type: {:?}",
ty
),
} }
} }
} }