diff --git a/lib/compiler-singlepass/Cargo.toml b/lib/compiler-singlepass/Cargo.toml index b04f114cd..8cc641eb9 100644 --- a/lib/compiler-singlepass/Cargo.toml +++ b/lib/compiler-singlepass/Cargo.toml @@ -19,6 +19,11 @@ wasm-common = { path = "../wasm-common", version = "0.16.2", default-features = rayon = "1.3.0" serde = { version = "1.0.106", features = ["derive"] } more-asserts = "0.2.1" +dynasm = "0.5" +dynasmrt = "0.5" +lazy_static = "1.4" +byteorder = "1.3" +smallvec = "1" [badges] maintenance = { status = "actively-developed" } diff --git a/lib/compiler-singlepass/src/codegen_x64.rs b/lib/compiler-singlepass/src/codegen_x64.rs new file mode 100644 index 000000000..e51db5b7c --- /dev/null +++ b/lib/compiler-singlepass/src/codegen_x64.rs @@ -0,0 +1,10931 @@ +#![allow(clippy::forget_copy)] // Used by dynasm. +#![warn(unused_imports)] + +use crate::emitter_x64::*; +use crate::machine::*; +#[cfg(target_arch = "aarch64")] +use dynasmrt::aarch64::Assembler; +#[cfg(target_arch = "x86_64")] +use dynasmrt::x64::Assembler; +use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi}; +use smallvec::SmallVec; +use std::{ + any::Any, + collections::{BTreeMap, HashMap}, + ffi::c_void, + iter, mem, + ptr::NonNull, + slice, + sync::{Arc, RwLock}, + usize, +}; +use wasmer_runtime_core::{ + backend::{ + sys::{Memory, Protect}, + Architecture, CacheGen, CompilerConfig, ExceptionCode, ExceptionTable, InlineBreakpoint, + InlineBreakpointType, MemoryBoundCheckMode, RunnableModule, Token, + }, + cache::{Artifact, Error as CacheError}, + codegen::*, + fault::{self, raw::register_preservation_trampoline}, + loader::CodeMemory, + memory::MemoryType, + module::{ModuleInfo, ModuleInner}, + state::{ + x64::new_machine_state, x64::X64Register, x64_decl::ArgumentRegisterAllocator, + FunctionStateMap, MachineState, MachineValue, ModuleStateMap, OffsetInfo, SuspendOffset, + WasmAbstractValue, + }, + structures::{Map, TypedIndex}, + typed_func::{Trampoline, Wasm}, + types::{ + FuncIndex, FuncSig, GlobalIndex, ImportedGlobalIndex, LocalFuncIndex, LocalGlobalIndex, + LocalOrImport, MemoryIndex, SigIndex, TableIndex, Type, + }, + vm::{self, LocalGlobal, LocalTable, INTERNALS_SIZE}, + wasmparser::{MemoryImmediate, Operator, Type as WpType, TypeOrFuncType as WpTypeOrFuncType}, +}; + +#[cfg(target_arch = "aarch64")] +#[allow(dead_code)] +static ARCH: Architecture = Architecture::Aarch64; +#[cfg(target_arch = "x86_64")] +#[allow(dead_code)] +static ARCH: Architecture = Architecture::X64; + +/// Inline breakpoint size for x86-64. +pub const INLINE_BREAKPOINT_SIZE_X86_SINGLEPASS: usize = 7; + +/// Inline breakpoint size for aarch64. +pub const INLINE_BREAKPOINT_SIZE_AARCH64_SINGLEPASS: usize = 12; + +static BACKEND_ID: &str = "singlepass"; + +#[cfg(target_arch = "x86_64")] +lazy_static! { + /// Performs a System V call to `target` with [stack_top..stack_base] as the argument list, from right to left. + static ref CONSTRUCT_STACK_AND_CALL_WASM: unsafe extern "C" fn (stack_top: *const u64, stack_base: *const u64, ctx: *mut vm::Ctx, target: *const vm::Func) -> u64 = { + let mut assembler = Assembler::new().unwrap(); + let offset = assembler.offset(); + dynasm!( + assembler + ; push r15 + ; push r14 + ; push r13 + ; push r12 + ; push r11 + ; push rbp + ; mov rbp, rsp + + ; mov r15, rdi + ; mov r14, rsi + ; mov r13, rdx + ; mov r12, rcx + + ; mov rdi, r13 // ctx + + ; sub r14, 8 + ; cmp r14, r15 + ; jb >stack_ready + + ; mov rsi, [r14] + ; sub r14, 8 + ; cmp r14, r15 + ; jb >stack_ready + + ; mov rdx, [r14] + ; sub r14, 8 + ; cmp r14, r15 + ; jb >stack_ready + + ; mov rcx, [r14] + ; sub r14, 8 + ; cmp r14, r15 + ; jb >stack_ready + + ; mov r8, [r14] + ; sub r14, 8 + ; cmp r14, r15 + ; jb >stack_ready + + ; mov r9, [r14] + ; sub r14, 8 + ; cmp r14, r15 + ; jb >stack_ready + + ; mov rax, r14 + ; sub rax, r15 + ; sub rsp, rax + ; sub rsp, 8 + ; mov rax, QWORD 0xfffffffffffffff0u64 as i64 + ; and rsp, rax + ; mov rax, rsp + ; loop_begin: + ; mov r11, [r14] + ; mov [rax], r11 + ; sub r14, 8 + ; add rax, 8 + ; cmp r14, r15 + ; jb >stack_ready + ; jmp u64, userdata: *mut u8) -> u64 = { + let mut assembler = Assembler::new().unwrap(); + let offset = assembler.offset(); + dynasm!( + assembler + ; .arch aarch64 + ; sub x0, x0, 16 + ; mov x8, sp + ; str x8, [x0, 0] + ; str x30, [x0, 8] + ; adr x30, >done + ; mov sp, x0 + ; mov x0, x2 + ; br x1 + ; done: + ; ldr x30, [sp, 8] + ; ldr x8, [sp, 0] + ; mov sp, x8 + ; br x30 + ); + let buf = assembler.finalize().unwrap(); + let ret = unsafe { mem::transmute(buf.ptr(offset)) }; + mem::forget(buf); + ret + }; +} + +pub struct X64ModuleCodeGenerator { + functions: Vec, + signatures: Option>>, + function_signatures: Option>>, + function_labels: Option)>>, + assembler: Option, + func_import_count: usize, + + config: Option>, +} + +pub struct X64FunctionCode { + local_function_id: usize, + + signatures: Arc>, + function_signatures: Arc>, + signature: FuncSig, + fsm: FunctionStateMap, + offset: usize, + + assembler: Option, + function_labels: Option)>>, + breakpoints: Option< + HashMap< + AssemblyOffset, + Box Result<(), Box> + Send + Sync + 'static>, + >, + >, + returns: SmallVec<[WpType; 1]>, + locals: Vec, + num_params: usize, + local_types: Vec, + value_stack: Vec, + + /// Metadata about floating point values on the stack. + fp_stack: Vec, + + control_stack: Vec, + machine: Machine, + unreachable_depth: usize, + + config: Arc, + + exception_table: Option, +} + +/// Metadata about a floating-point value. +#[derive(Copy, Clone, Debug)] +struct FloatValue { + /// Do we need to canonicalize the value before its bit pattern is next observed? If so, how? + canonicalization: Option, + + /// Corresponding depth in the main value stack. + depth: usize, +} + +impl FloatValue { + fn new(depth: usize) -> Self { + FloatValue { + canonicalization: None, + depth, + } + } + + fn cncl_f32(depth: usize) -> Self { + FloatValue { + canonicalization: Some(CanonicalizeType::F32), + depth, + } + } + + fn cncl_f64(depth: usize) -> Self { + FloatValue { + canonicalization: Some(CanonicalizeType::F64), + depth, + } + } + + fn promote(self, depth: usize) -> FloatValue { + FloatValue { + canonicalization: match self.canonicalization { + Some(CanonicalizeType::F32) => Some(CanonicalizeType::F64), + Some(CanonicalizeType::F64) => panic!("cannot promote F64"), + None => None, + }, + depth, + } + } + + fn demote(self, depth: usize) -> FloatValue { + FloatValue { + canonicalization: match self.canonicalization { + Some(CanonicalizeType::F64) => Some(CanonicalizeType::F32), + Some(CanonicalizeType::F32) => panic!("cannot demote F32"), + None => None, + }, + depth, + } + } +} + +/// Type of a pending canonicalization floating point value. +/// Sometimes we don't have the type information elsewhere and therefore we need to track it here. +#[derive(Copy, Clone, Debug)] +enum CanonicalizeType { + F32, + F64, +} + +impl CanonicalizeType { + fn to_size(&self) -> Size { + match self { + CanonicalizeType::F32 => Size::S32, + CanonicalizeType::F64 => Size::S64, + } + } +} + +trait PopMany { + fn peek1(&self) -> Result<&T, CodegenError>; + fn pop1(&mut self) -> Result; + fn pop2(&mut self) -> Result<(T, T), CodegenError>; +} + +impl PopMany for Vec { + fn peek1(&self) -> Result<&T, CodegenError> { + match self.last() { + Some(x) => Ok(x), + None => Err(CodegenError { + message: "peek1() expects at least 1 element".into(), + }), + } + } + fn pop1(&mut self) -> Result { + match self.pop() { + Some(x) => Ok(x), + None => Err(CodegenError { + message: "pop1() expects at least 1 element".into(), + }), + } + } + fn pop2(&mut self) -> Result<(T, T), CodegenError> { + if self.len() < 2 { + return Err(CodegenError { + message: "pop2() expects at least 2 elements".into(), + }); + } + + let right = self.pop().unwrap(); + let left = self.pop().unwrap(); + Ok((left, right)) + } +} + +trait WpTypeExt { + fn is_float(&self) -> bool; +} + +impl WpTypeExt for WpType { + fn is_float(&self) -> bool { + match self { + WpType::F32 | WpType::F64 => true, + _ => false, + } + } +} + +enum FuncPtrInner {} +#[repr(transparent)] +#[derive(Copy, Clone, Debug)] +struct FuncPtr(*const FuncPtrInner); +unsafe impl Send for FuncPtr {} +unsafe impl Sync for FuncPtr {} + +pub struct X64ExecutionContext { + #[allow(dead_code)] + code: CodeMemory, + function_pointers: Vec, + function_offsets: Vec, + signatures: Arc>, + breakpoints: BreakpointMap, + func_import_count: usize, + msm: ModuleStateMap, + exception_table: ExceptionTable, +} + +/// On-disk cache format. +/// Offsets are relative to the start of the executable image. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct CacheImage { + /// The executable image. + code: Vec, + + /// Offsets to the start of each function. Including trampoline, if any. + /// Trampolines are only present on AArch64. + /// On x86-64, `function_pointers` are identical to `function_offsets`. + function_pointers: Vec, + + /// Offsets to the start of each function after trampoline. + function_offsets: Vec, + + /// Number of imported functions. + func_import_count: usize, + + /// Module state map. + msm: ModuleStateMap, + + /// An exception table that maps instruction offsets to exception codes. + exception_table: ExceptionTable, +} + +#[derive(Debug)] +pub struct ControlFrame { + pub label: DynamicLabel, + pub loop_like: bool, + pub if_else: IfElseState, + pub returns: SmallVec<[WpType; 1]>, + pub value_stack_depth: usize, + pub fp_stack_depth: usize, + pub state: MachineState, + pub state_diff_id: usize, +} + +#[derive(Debug, Copy, Clone)] +pub enum IfElseState { + None, + If(DynamicLabel), + Else, +} + +pub struct SinglepassCache { + buffer: Arc<[u8]>, +} + +impl CacheGen for SinglepassCache { + fn generate_cache(&self) -> Result<(Box<[u8]>, Memory), CacheError> { + let mut memory = Memory::with_size_protect(self.buffer.len(), Protect::ReadWrite) + .map_err(CacheError::SerializeError)?; + + let buffer = &*self.buffer; + + unsafe { + memory.as_slice_mut()[..buffer.len()].copy_from_slice(buffer); + } + + Ok(([].as_ref().into(), memory)) + } +} + +impl RunnableModule for X64ExecutionContext { + fn get_func( + &self, + _: &ModuleInfo, + local_func_index: LocalFuncIndex, + ) -> Option> { + self.function_pointers[self.func_import_count..] + .get(local_func_index.index()) + .and_then(|ptr| NonNull::new(ptr.0 as *mut vm::Func)) + } + + fn get_module_state_map(&self) -> Option { + Some(self.msm.clone()) + } + + fn get_breakpoints(&self) -> Option { + Some(self.breakpoints.clone()) + } + + fn get_exception_table(&self) -> Option<&ExceptionTable> { + Some(&self.exception_table) + } + + unsafe fn patch_local_function(&self, idx: usize, target_address: usize) -> bool { + /* + 0: 48 b8 42 42 42 42 42 42 42 42 movabsq $4774451407313060418, %rax + a: 49 bb 43 43 43 43 43 43 43 43 movabsq $4846791580151137091, %r11 + 14: 41 ff e3 jmpq *%r11 + */ + #[repr(packed)] + struct LocalTrampoline { + movabsq_rax: [u8; 2], + addr_rax: u64, + movabsq_r11: [u8; 2], + addr_r11: u64, + jmpq_r11: [u8; 3], + } + + self.code.make_writable(); + + let trampoline = &mut *(self.function_pointers[self.func_import_count + idx].0 + as *const LocalTrampoline as *mut LocalTrampoline); + trampoline.movabsq_rax[0] = 0x48; + trampoline.movabsq_rax[1] = 0xb8; + trampoline.addr_rax = target_address as u64; + trampoline.movabsq_r11[0] = 0x49; + trampoline.movabsq_r11[1] = 0xbb; + trampoline.addr_r11 = + register_preservation_trampoline as unsafe extern "C" fn() as usize as u64; + trampoline.jmpq_r11[0] = 0x41; + trampoline.jmpq_r11[1] = 0xff; + trampoline.jmpq_r11[2] = 0xe3; + + self.code.make_executable(); + true + } + + fn get_trampoline(&self, _: &ModuleInfo, sig_index: SigIndex) -> Option { + // Correctly unwinding from `catch_unsafe_unwind` on hardware exceptions depends + // on the signal handlers being installed. Here we call `ensure_sighandler` "statically" + // outside `invoke()`. + fault::ensure_sighandler(); + + unsafe extern "C" fn invoke( + _trampoline: Trampoline, + ctx: *mut vm::Ctx, + func: NonNull, + args: *const u64, + rets: *mut u64, + error_out: *mut Option>, + num_params_plus_one: Option>, + ) -> bool { + let rm: &Box = &(&*(*ctx).module).runnable_module; + + let args = + slice::from_raw_parts(args, num_params_plus_one.unwrap().as_ptr() as usize - 1); + + let ret = match fault::catch_unsafe_unwind( + || { + // Puts the arguments onto the stack and calls Wasm entry. + #[cfg(target_arch = "x86_64")] + { + let args_reverse: SmallVec<[u64; 8]> = args.iter().cloned().rev().collect(); + CONSTRUCT_STACK_AND_CALL_WASM( + args_reverse.as_ptr(), + args_reverse.as_ptr().offset(args_reverse.len() as isize), + ctx, + func.as_ptr(), + ) + } + + // FIXME: Currently we are doing a hack here to convert between native aarch64 and + // "emulated" x86 ABIs. Ideally, this should be done using handwritten assembly. + #[cfg(target_arch = "aarch64")] + { + struct CallCtx<'a> { + args: &'a [u64], + ctx: *mut vm::Ctx, + callable: NonNull, + } + extern "C" fn call_fn(f: *mut u8) -> u64 { + unsafe { + let f = &*(f as *const CallCtx); + let callable: extern "C" fn( + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + ) + -> u64 = std::mem::transmute(f.callable); + let mut args = f.args.iter(); + callable( + f.ctx as u64, + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + ) + } + } + let mut cctx = CallCtx { + args: &args, + ctx: ctx, + callable: func, + }; + use libc::{ + mmap, munmap, MAP_ANON, MAP_NORESERVE, MAP_PRIVATE, PROT_READ, + PROT_WRITE, + }; + const STACK_SIZE: usize = 1048576 * 1024; // 1GB of virtual address space for stack. + let stack_ptr = mmap( + ::std::ptr::null_mut(), + STACK_SIZE, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, + -1, + 0, + ); + if stack_ptr as isize == -1 { + panic!("unable to allocate stack"); + } + // TODO: Mark specific regions in the stack as PROT_NONE. + let ret = SWITCH_STACK( + (stack_ptr as *mut u8).offset(STACK_SIZE as isize) as *mut u64, + call_fn, + &mut cctx as *mut CallCtx as *mut u8, + ); + munmap(stack_ptr, STACK_SIZE); + ret + } + }, + rm.get_breakpoints(), + ) { + Ok(x) => { + if !rets.is_null() { + *rets = x; + } + true + } + Err(err) => { + *error_out = Some(err); + false + } + }; + ret + } + + unsafe extern "C" fn dummy_trampoline( + _: *mut vm::Ctx, + _: NonNull, + _: *const u64, + _: *mut u64, + ) { + unreachable!() + } + + Some(unsafe { + Wasm::from_raw_parts( + dummy_trampoline, + invoke, + NonNull::new((self.signatures.get(sig_index).unwrap().params().len() + 1) as _), // +1 to keep it non-zero + ) + }) + } + + unsafe fn do_early_trap(&self, data: Box) -> ! { + fault::begin_unsafe_unwind(data); + } + + fn get_code(&self) -> Option<&[u8]> { + Some(&self.code) + } + + fn get_offsets(&self) -> Option> { + Some(self.function_offsets.iter().map(|x| x.0).collect()) + } + + fn get_local_function_offsets(&self) -> Option> { + Some( + self.function_offsets[self.func_import_count..] + .iter() + .map(|x| x.0) + .collect(), + ) + } + + /// Returns the inline breakpoint size corresponding to an Architecture. + fn get_inline_breakpoint_size(&self, arch: Architecture) -> Option { + match arch { + Architecture::X64 => Some(INLINE_BREAKPOINT_SIZE_X86_SINGLEPASS), + Architecture::Aarch64 => Some(INLINE_BREAKPOINT_SIZE_AARCH64_SINGLEPASS), + } + } + + /// Attempts to read an inline breakpoint from the code. + /// + /// Inline breakpoints are detected by special instruction sequences that never + /// appear in valid code. + fn read_inline_breakpoint(&self, arch: Architecture, code: &[u8]) -> Option { + match arch { + Architecture::X64 => { + if code.len() < INLINE_BREAKPOINT_SIZE_X86_SINGLEPASS { + None + } else if &code[..INLINE_BREAKPOINT_SIZE_X86_SINGLEPASS - 1] + == &[ + 0x0f, 0x0b, // ud2 + 0x0f, 0xb9, // ud + 0xcd, 0xff, // int 0xff + ] + { + Some(InlineBreakpoint { + size: INLINE_BREAKPOINT_SIZE_X86_SINGLEPASS, + ty: match code[INLINE_BREAKPOINT_SIZE_X86_SINGLEPASS - 1] { + 0 => InlineBreakpointType::Middleware, + _ => return None, + }, + }) + } else { + None + } + } + Architecture::Aarch64 => { + if code.len() < INLINE_BREAKPOINT_SIZE_AARCH64_SINGLEPASS { + None + } else if &code[..INLINE_BREAKPOINT_SIZE_AARCH64_SINGLEPASS - 4] + == &[ + 0, 0, 0, 0, // udf #0 + 0xff, 0xff, 0x00, 0x00, // udf #65535 + ] + { + Some(InlineBreakpoint { + size: INLINE_BREAKPOINT_SIZE_AARCH64_SINGLEPASS, + ty: match code[INLINE_BREAKPOINT_SIZE_AARCH64_SINGLEPASS - 4] { + 0 => InlineBreakpointType::Middleware, + _ => return None, + }, + }) + } else { + None + } + } + } + } +} + +#[derive(Debug)] +pub struct CodegenError { + pub message: String, +} + +#[derive(Copy, Clone, Debug)] +struct CodegenConfig { + memory_bound_check_mode: MemoryBoundCheckMode, + enforce_stack_check: bool, + track_state: bool, + full_preemption: bool, + nan_canonicalization: bool, +} + +impl ModuleCodeGenerator + for X64ModuleCodeGenerator +{ + fn new() -> X64ModuleCodeGenerator { + let a = Assembler::new().unwrap(); + + X64ModuleCodeGenerator { + functions: vec![], + signatures: None, + function_signatures: None, + function_labels: Some(HashMap::new()), + assembler: Some(a), + func_import_count: 0, + config: None, + } + } + + /// Singlepass does validation as it compiles + fn requires_pre_validation() -> bool { + false + } + + fn backend_id() -> &'static str { + BACKEND_ID + } + + fn new_with_target(_: Option, _: Option, _: Option) -> Self { + unimplemented!("cross compilation is not available for singlepass backend") + } + + fn check_precondition(&mut self, _module_info: &ModuleInfo) -> Result<(), CodegenError> { + Ok(()) + } + + fn next_function( + &mut self, + _module_info: Arc>, + _loc: WasmSpan, + ) -> Result<&mut X64FunctionCode, CodegenError> { + let (mut assembler, mut function_labels, breakpoints, exception_table) = + match self.functions.last_mut() { + Some(x) => ( + x.assembler.take().unwrap(), + x.function_labels.take().unwrap(), + x.breakpoints.take().unwrap(), + x.exception_table.take().unwrap(), + ), + None => ( + self.assembler.take().unwrap(), + self.function_labels.take().unwrap(), + HashMap::new(), + ExceptionTable::new(), + ), + }; + + let begin_offset = assembler.offset(); + let begin_label_info = function_labels + .entry(self.functions.len() + self.func_import_count) + .or_insert_with(|| (assembler.new_dynamic_label(), None)); + + begin_label_info.1 = Some(begin_offset); + assembler.arch_emit_entry_trampoline(); + let begin_label = begin_label_info.0; + let mut machine = Machine::new(); + machine.track_state = self.config.as_ref().unwrap().track_state; + + assembler.emit_label(begin_label); + + let signatures = self.signatures.as_ref().unwrap(); + let function_signatures = self.function_signatures.as_ref().unwrap(); + let sig_index = function_signatures + .get(FuncIndex::new( + self.functions.len() + self.func_import_count, + )) + .unwrap() + .clone(); + let sig = signatures.get(sig_index).unwrap().clone(); + let code = X64FunctionCode { + local_function_id: self.functions.len(), + + signatures: signatures.clone(), + function_signatures: function_signatures.clone(), + signature: sig, + fsm: FunctionStateMap::new(new_machine_state(), self.functions.len(), 32, vec![]), // only a placeholder; this is initialized later in `begin_body` + offset: begin_offset.0, + + assembler: Some(assembler), + function_labels: Some(function_labels), + breakpoints: Some(breakpoints), + returns: smallvec![], + locals: vec![], + local_types: vec![], + num_params: 0, + value_stack: vec![], + fp_stack: vec![], + control_stack: vec![], + machine, + unreachable_depth: 0, + config: self.config.as_ref().unwrap().clone(), + exception_table: Some(exception_table), + }; + self.functions.push(code); + Ok(self.functions.last_mut().unwrap()) + } + + fn finalize( + mut self, + _: &ModuleInfo, + ) -> Result< + ( + X64ExecutionContext, + Option, + Box, + ), + CodegenError, + > { + let (assembler, function_labels, breakpoints, exception_table) = + match self.functions.last_mut() { + Some(x) => ( + x.assembler.take().unwrap(), + x.function_labels.take().unwrap(), + x.breakpoints.take().unwrap(), + x.exception_table.take().unwrap(), + ), + None => ( + self.assembler.take().unwrap(), + self.function_labels.take().unwrap(), + HashMap::new(), + ExceptionTable::new(), + ), + }; + + let total_size = assembler.get_offset().0; + let _output = assembler.finalize().unwrap(); + let mut output = CodeMemory::new(_output.len()); + output[0.._output.len()].copy_from_slice(&_output); + output.make_executable(); + + let mut out_labels: Vec = vec![]; + let mut out_offsets: Vec = vec![]; + + for i in 0..function_labels.len() { + let (_, offset) = match function_labels.get(&i) { + Some(x) => x, + None => { + return Err(CodegenError { + message: format!("label not found"), + }); + } + }; + let offset = match offset { + Some(x) => x, + None => { + return Err(CodegenError { + message: format!("offset is none"), + }); + } + }; + out_labels.push(FuncPtr( + unsafe { output.as_ptr().offset(offset.0 as isize) } as _, + )); + out_offsets.push(*offset); + } + + let breakpoints: Arc> = Arc::new( + breakpoints + .into_iter() + .map(|(offset, f)| { + ( + unsafe { output.as_ptr().offset(offset.0 as isize) } as usize, + f, + ) + }) + .collect(), + ); + + let local_function_maps: BTreeMap = self + .functions + .iter() + .map(|x| (x.offset, x.fsm.clone())) + .collect(); + + let msm = ModuleStateMap { + local_functions: local_function_maps, + total_size, + }; + + let cache_image = CacheImage { + code: output.to_vec(), + function_pointers: out_labels + .iter() + .map(|x| { + (x.0 as usize) + .checked_sub(output.as_ptr() as usize) + .unwrap() + }) + .collect(), + function_offsets: out_offsets.iter().map(|x| x.0 as usize).collect(), + func_import_count: self.func_import_count, + msm: msm.clone(), + exception_table: exception_table.clone(), + }; + + let cache = SinglepassCache { + buffer: Arc::from(bincode::serialize(&cache_image).unwrap().into_boxed_slice()), + }; + + Ok(( + X64ExecutionContext { + code: output, + signatures: self.signatures.as_ref().unwrap().clone(), + breakpoints: breakpoints, + func_import_count: self.func_import_count, + function_pointers: out_labels, + function_offsets: out_offsets, + msm: msm, + exception_table, + }, + None, + Box::new(cache), + )) + } + + fn feed_signatures(&mut self, signatures: Map) -> Result<(), CodegenError> { + self.signatures = Some(Arc::new(signatures)); + Ok(()) + } + + fn feed_function_signatures( + &mut self, + assoc: Map, + ) -> Result<(), CodegenError> { + self.function_signatures = Some(Arc::new(assoc)); + Ok(()) + } + + fn feed_import_function(&mut self, sigindex: SigIndex) -> Result<(), CodegenError> { + let labels = self.function_labels.as_mut().unwrap(); + let id = labels.len(); + + let a = self.assembler.as_mut().unwrap(); + let offset = a.offset(); + a.arch_emit_entry_trampoline(); + let label = a.get_label(); + a.emit_label(label); + labels.insert(id, (label, Some(offset))); + + // Singlepass internally treats all arguments as integers, but the standard System V calling convention requires + // floating point arguments to be passed in XMM registers. + // + // FIXME: This is only a workaround. We should fix singlepass to use the standard CC. + let sig = self + .signatures + .as_ref() + .expect("signatures itself") + .get(sigindex) + .expect("signatures"); + // Translation is expensive, so only do it if needed. + if sig + .params() + .iter() + .find(|&&x| x == Type::F32 || x == Type::F64) + .is_some() + { + let mut param_locations: Vec = vec![]; + + // Allocate stack space for arguments. + let stack_offset: i32 = if sig.params().len() > 5 { + 5 * 8 + } else { + (sig.params().len() as i32) * 8 + }; + if stack_offset > 0 { + a.emit_sub( + Size::S64, + Location::Imm32(stack_offset as u32), + Location::GPR(GPR::RSP), + ); + } + + // Store all arguments to the stack to prevent overwrite. + for i in 0..sig.params().len() { + let loc = match i { + 0..=4 => { + static PARAM_REGS: &'static [GPR] = + &[GPR::RSI, GPR::RDX, GPR::RCX, GPR::R8, GPR::R9]; + let loc = Location::Memory(GPR::RSP, (i * 8) as i32); + a.emit_mov(Size::S64, Location::GPR(PARAM_REGS[i]), loc); + loc + } + _ => Location::Memory(GPR::RSP, stack_offset + 8 + ((i - 5) * 8) as i32), + }; + param_locations.push(loc); + } + + // Copy arguments. + let mut argalloc = ArgumentRegisterAllocator::default(); + argalloc.next(Type::I32).unwrap(); // skip vm::Ctx + let mut caller_stack_offset: i32 = 0; + for (i, ty) in sig.params().iter().enumerate() { + let prev_loc = param_locations[i]; + let target = match argalloc.next(*ty) { + Some(X64Register::GPR(gpr)) => Location::GPR(gpr), + Some(X64Register::XMM(xmm)) => Location::XMM(xmm), + None => { + // No register can be allocated. Put this argument on the stack. + // + // Since here we never use fewer registers than by the original call, on the caller's frame + // we always have enough space to store the rearranged arguments, and the copy "backward" between different + // slots in the caller argument region will always work. + a.emit_mov(Size::S64, prev_loc, Location::GPR(GPR::RAX)); + a.emit_mov( + Size::S64, + Location::GPR(GPR::RAX), + Location::Memory(GPR::RSP, stack_offset + 8 + caller_stack_offset), + ); + caller_stack_offset += 8; + continue; + } + }; + a.emit_mov(Size::S64, prev_loc, target); + } + + // Restore stack pointer. + if stack_offset > 0 { + a.emit_add( + Size::S64, + Location::Imm32(stack_offset as u32), + Location::GPR(GPR::RSP), + ); + } + } + + // Emits a tail call trampoline that loads the address of the target import function + // from Ctx and jumps to it. + + let imported_funcs_addr = vm::Ctx::offset_imported_funcs(); + let imported_func = vm::ImportedFunc::size() as usize * id; + let imported_func_addr = imported_func + vm::ImportedFunc::offset_func() as usize; + let imported_func_ctx_addr = imported_func + vm::ImportedFunc::offset_func_ctx() as usize; + let imported_func_ctx_vmctx_addr = vm::FuncCtx::offset_vmctx() as usize; + + a.emit_mov( + Size::S64, + Location::Memory(GPR::RDI, imported_funcs_addr as i32), + Location::GPR(GPR::RAX), + ); + a.emit_mov( + Size::S64, + Location::Memory(GPR::RAX, imported_func_ctx_addr as i32), + Location::GPR(GPR::RDI), + ); + a.emit_mov( + Size::S64, + Location::Memory(GPR::RDI, imported_func_ctx_vmctx_addr as i32), + Location::GPR(GPR::RDI), + ); + a.emit_mov( + Size::S64, + Location::Memory(GPR::RAX, imported_func_addr as i32), + Location::GPR(GPR::RAX), + ); + a.emit_host_redirection(GPR::RAX); + + self.func_import_count += 1; + + Ok(()) + } + + fn feed_compiler_config(&mut self, config: &CompilerConfig) -> Result<(), CodegenError> { + self.config = Some(Arc::new(CodegenConfig { + memory_bound_check_mode: config.memory_bound_check_mode, + enforce_stack_check: config.enforce_stack_check, + track_state: config.track_state, + full_preemption: config.full_preemption, + nan_canonicalization: config.nan_canonicalization, + })); + Ok(()) + } + unsafe fn from_cache(artifact: Artifact, _: Token) -> Result { + let (info, _, memory) = artifact.consume(); + + let cache_image: CacheImage = bincode::deserialize(memory.as_slice()) + .map_err(|x| CacheError::DeserializeError(format!("{:?}", x)))?; + + let mut code_mem = CodeMemory::new(cache_image.code.len()); + code_mem[0..cache_image.code.len()].copy_from_slice(&cache_image.code); + code_mem.make_executable(); + + let function_pointers: Vec = cache_image + .function_pointers + .iter() + .map(|&x| FuncPtr(code_mem.as_ptr().offset(x as isize) as *const FuncPtrInner)) + .collect(); + let function_offsets: Vec = cache_image + .function_offsets + .iter() + .cloned() + .map(AssemblyOffset) + .collect(); + + let ec = X64ExecutionContext { + code: code_mem, + function_pointers, + function_offsets, + signatures: Arc::new(info.signatures.clone()), + breakpoints: Arc::new(HashMap::new()), + func_import_count: cache_image.func_import_count, + msm: cache_image.msm, + exception_table: cache_image.exception_table, + }; + Ok(ModuleInner { + runnable_module: Arc::new(Box::new(ec)), + cache_gen: Box::new(SinglepassCache { + buffer: Arc::from(memory.as_slice().to_vec().into_boxed_slice()), + }), + info, + }) + } +} + +impl X64FunctionCode { + fn mark_trappable( + a: &mut Assembler, + m: &Machine, + fsm: &mut FunctionStateMap, + control_stack: &mut [ControlFrame], + ) { + let state_diff_id = Self::get_state_diff(m, fsm, control_stack); + let offset = a.get_offset().0; + fsm.trappable_offsets.insert( + offset, + OffsetInfo { + end_offset: offset + 1, + activate_offset: offset, + diff_id: state_diff_id, + }, + ); + fsm.wasm_offset_to_target_offset + .insert(m.state.wasm_inst_offset, SuspendOffset::Trappable(offset)); + } + + /// Marks each address in the code range emitted by `f` with the exception code `code`. + fn mark_range_with_exception_code R, R>( + a: &mut Assembler, + etable: &mut ExceptionTable, + code: ExceptionCode, + f: F, + ) -> R { + let begin = a.get_offset().0; + let ret = f(a); + let end = a.get_offset().0; + for i in begin..end { + etable.offset_to_code.insert(i, code); + } + ret + } + + /// Canonicalizes the floating point value at `input` into `output`. + fn canonicalize_nan( + a: &mut Assembler, + m: &mut Machine, + sz: Size, + input: Location, + output: Location, + ) { + let tmp1 = m.acquire_temp_xmm().unwrap(); + let tmp2 = m.acquire_temp_xmm().unwrap(); + let tmp3 = m.acquire_temp_xmm().unwrap(); + let tmpg1 = m.acquire_temp_gpr().unwrap(); + + Self::emit_relaxed_binop(a, m, Assembler::emit_mov, sz, input, Location::XMM(tmp1)); + + match sz { + Size::S32 => { + a.emit_vcmpunordss(tmp1, XMMOrMemory::XMM(tmp1), tmp2); + a.emit_mov( + Size::S32, + Location::Imm32(0x7FC0_0000), // Canonical NaN + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp3)); + a.emit_vblendvps(tmp2, XMMOrMemory::XMM(tmp3), tmp1, tmp1); + } + Size::S64 => { + a.emit_vcmpunordsd(tmp1, XMMOrMemory::XMM(tmp1), tmp2); + a.emit_mov( + Size::S64, + Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp3)); + a.emit_vblendvpd(tmp2, XMMOrMemory::XMM(tmp3), tmp1, tmp1); + } + _ => unreachable!(), + } + + Self::emit_relaxed_binop(a, m, Assembler::emit_mov, sz, Location::XMM(tmp1), output); + + m.release_temp_gpr(tmpg1); + m.release_temp_xmm(tmp3); + m.release_temp_xmm(tmp2); + m.release_temp_xmm(tmp1); + } + + /// Moves `loc` to a valid location for `div`/`idiv`. + fn emit_relaxed_xdiv( + a: &mut Assembler, + m: &mut Machine, + etable: &mut ExceptionTable, + op: fn(&mut Assembler, Size, Location), + sz: Size, + loc: Location, + fsm: &mut FunctionStateMap, + control_stack: &mut [ControlFrame], + ) { + m.state.wasm_stack_private_depth += 1; + match loc { + Location::Imm64(_) | Location::Imm32(_) => { + a.emit_mov(sz, loc, Location::GPR(GPR::RCX)); // must not be used during div (rax, rdx) + Self::mark_trappable(a, m, fsm, control_stack); + etable + .offset_to_code + .insert(a.get_offset().0, ExceptionCode::IllegalArithmetic); + op(a, sz, Location::GPR(GPR::RCX)); + } + _ => { + Self::mark_trappable(a, m, fsm, control_stack); + etable + .offset_to_code + .insert(a.get_offset().0, ExceptionCode::IllegalArithmetic); + op(a, sz, loc); + } + } + m.state.wasm_stack_private_depth -= 1; + } + + /// Moves `src` and `dst` to valid locations for `movzx`/`movsx`. + fn emit_relaxed_zx_sx( + a: &mut Assembler, + m: &mut Machine, + op: fn(&mut Assembler, Size, Location, Size, Location), + sz_src: Size, + mut src: Location, + sz_dst: Size, + dst: Location, + ) -> Result<(), CodegenError> { + let inner = |m: &mut Machine, a: &mut Assembler, src: Location| match dst { + Location::Imm32(_) | Location::Imm64(_) => { + return Err(CodegenError { + message: format!("emit_relaxed_zx_sx dst Imm: unreachable code"), + }) + } + Location::Memory(_, _) => { + let tmp_dst = m.acquire_temp_gpr().unwrap(); + op(a, sz_src, src, sz_dst, Location::GPR(tmp_dst)); + a.emit_mov(Size::S64, Location::GPR(tmp_dst), dst); + + m.release_temp_gpr(tmp_dst); + Ok(()) + } + Location::GPR(_) => { + op(a, sz_src, src, sz_dst, dst); + Ok(()) + } + _ => { + return Err(CodegenError { + message: format!("emit_relaxed_zx_sx dst: unreachable code"), + }) + } + }; + + match src { + Location::Imm32(_) | Location::Imm64(_) => { + let tmp_src = m.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S64, src, Location::GPR(tmp_src)); + src = Location::GPR(tmp_src); + + inner(m, a, src)?; + + m.release_temp_gpr(tmp_src); + } + Location::GPR(_) | Location::Memory(_, _) => inner(m, a, src)?, + _ => { + return Err(CodegenError { + message: format!("emit_relaxed_zx_sx src: unreachable code"), + }) + } + } + Ok(()) + } + + /// Moves `src` and `dst` to valid locations for generic instructions. + fn emit_relaxed_binop( + a: &mut Assembler, + m: &mut Machine, + op: fn(&mut Assembler, Size, Location, Location), + sz: Size, + src: Location, + dst: Location, + ) { + enum RelaxMode { + Direct, + SrcToGPR, + DstToGPR, + BothToGPR, + } + let mode = match (src, dst) { + (Location::GPR(_), Location::GPR(_)) + if (op as *const u8 == Assembler::emit_imul as *const u8) => + { + RelaxMode::Direct + } + _ if (op as *const u8 == Assembler::emit_imul as *const u8) => RelaxMode::BothToGPR, + + (Location::Memory(_, _), Location::Memory(_, _)) => RelaxMode::SrcToGPR, + (Location::Imm64(_), Location::Imm64(_)) | (Location::Imm64(_), Location::Imm32(_)) => { + RelaxMode::BothToGPR + } + (_, Location::Imm32(_)) | (_, Location::Imm64(_)) => RelaxMode::DstToGPR, + (Location::Imm64(_), Location::Memory(_, _)) => RelaxMode::SrcToGPR, + (Location::Imm64(_), Location::GPR(_)) + if (op as *const u8 != Assembler::emit_mov as *const u8) => + { + RelaxMode::SrcToGPR + } + (_, Location::XMM(_)) => RelaxMode::SrcToGPR, + _ => RelaxMode::Direct, + }; + + match mode { + RelaxMode::SrcToGPR => { + let temp = m.acquire_temp_gpr().unwrap(); + a.emit_mov(sz, src, Location::GPR(temp)); + op(a, sz, Location::GPR(temp), dst); + m.release_temp_gpr(temp); + } + RelaxMode::DstToGPR => { + let temp = m.acquire_temp_gpr().unwrap(); + a.emit_mov(sz, dst, Location::GPR(temp)); + op(a, sz, src, Location::GPR(temp)); + m.release_temp_gpr(temp); + } + RelaxMode::BothToGPR => { + let temp_src = m.acquire_temp_gpr().unwrap(); + let temp_dst = m.acquire_temp_gpr().unwrap(); + a.emit_mov(sz, src, Location::GPR(temp_src)); + a.emit_mov(sz, dst, Location::GPR(temp_dst)); + op(a, sz, Location::GPR(temp_src), Location::GPR(temp_dst)); + match dst { + Location::Memory(_, _) | Location::GPR(_) => { + a.emit_mov(sz, Location::GPR(temp_dst), dst); + } + _ => {} + } + m.release_temp_gpr(temp_dst); + m.release_temp_gpr(temp_src); + } + RelaxMode::Direct => { + op(a, sz, src, dst); + } + } + } + + /// Moves `src1` and `src2` to valid locations and possibly adds a layer of indirection for `dst` for AVX instructions. + fn emit_relaxed_avx( + a: &mut Assembler, + m: &mut Machine, + op: fn(&mut Assembler, XMM, XMMOrMemory, XMM), + src1: Location, + src2: Location, + dst: Location, + ) -> Result<(), CodegenError> { + Self::emit_relaxed_avx_base( + a, + m, + |a, _, src1, src2, dst| op(a, src1, src2, dst), + src1, + src2, + dst, + )?; + Ok(()) + } + + /// Moves `src1` and `src2` to valid locations and possibly adds a layer of indirection for `dst` for AVX instructions. + fn emit_relaxed_avx_base( + a: &mut Assembler, + m: &mut Machine, + op: F, + src1: Location, + src2: Location, + dst: Location, + ) -> Result<(), CodegenError> { + let tmp1 = m.acquire_temp_xmm().unwrap(); + let tmp2 = m.acquire_temp_xmm().unwrap(); + let tmp3 = m.acquire_temp_xmm().unwrap(); + let tmpg = m.acquire_temp_gpr().unwrap(); + + let src1 = match src1 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src1, Location::XMM(tmp1)); + tmp1 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src1, Location::GPR(tmpg)); + a.emit_mov(Size::S32, Location::GPR(tmpg), Location::XMM(tmp1)); + tmp1 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src1, Location::GPR(tmpg)); + a.emit_mov(Size::S64, Location::GPR(tmpg), Location::XMM(tmp1)); + tmp1 + } + _ => { + return Err(CodegenError { + message: format!("emit_relaxed_avx_base src1: unreachable code"), + }) + } + }; + + let src2 = match src2 { + Location::XMM(x) => XMMOrMemory::XMM(x), + Location::Memory(base, disp) => XMMOrMemory::Memory(base, disp), + Location::GPR(_) => { + a.emit_mov(Size::S64, src2, Location::XMM(tmp2)); + XMMOrMemory::XMM(tmp2) + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src2, Location::GPR(tmpg)); + a.emit_mov(Size::S32, Location::GPR(tmpg), Location::XMM(tmp2)); + XMMOrMemory::XMM(tmp2) + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src2, Location::GPR(tmpg)); + a.emit_mov(Size::S64, Location::GPR(tmpg), Location::XMM(tmp2)); + XMMOrMemory::XMM(tmp2) + } + _ => { + return Err(CodegenError { + message: format!("emit_relaxed_avx_base src2: unreachable code"), + }) + } + }; + + match dst { + Location::XMM(x) => { + op(a, m, src1, src2, x); + } + Location::Memory(_, _) | Location::GPR(_) => { + op(a, m, src1, src2, tmp3); + a.emit_mov(Size::S64, Location::XMM(tmp3), dst); + } + _ => { + return Err(CodegenError { + message: format!("emit_relaxed_avx_base dst: unreachable code"), + }) + } + } + + m.release_temp_gpr(tmpg); + m.release_temp_xmm(tmp3); + m.release_temp_xmm(tmp2); + m.release_temp_xmm(tmp1); + Ok(()) + } + + /// I32 binary operation with both operands popped from the virtual stack. + fn emit_binop_i32( + a: &mut Assembler, + m: &mut Machine, + value_stack: &mut Vec, + f: fn(&mut Assembler, Size, Location, Location), + ) { + // Using Red Zone here. + let loc_b = get_location_released(a, m, value_stack.pop().unwrap()); + let loc_a = get_location_released(a, m, value_stack.pop().unwrap()); + let ret = m.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(value_stack.len()))], + false, + )[0]; + + if loc_a != ret { + let tmp = m.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S32, + loc_a, + Location::GPR(tmp), + ); + Self::emit_relaxed_binop(a, m, f, Size::S32, loc_b, Location::GPR(tmp)); + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S32, + Location::GPR(tmp), + ret, + ); + m.release_temp_gpr(tmp); + } else { + Self::emit_relaxed_binop(a, m, f, Size::S32, loc_b, ret); + } + + value_stack.push(ret); + } + + /// I64 binary operation with both operands popped from the virtual stack. + fn emit_binop_i64( + a: &mut Assembler, + m: &mut Machine, + value_stack: &mut Vec, + f: fn(&mut Assembler, Size, Location, Location), + ) { + // Using Red Zone here. + let loc_b = get_location_released(a, m, value_stack.pop().unwrap()); + let loc_a = get_location_released(a, m, value_stack.pop().unwrap()); + let ret = m.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(value_stack.len()))], + false, + )[0]; + + if loc_a != ret { + let tmp = m.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S64, + loc_a, + Location::GPR(tmp), + ); + Self::emit_relaxed_binop(a, m, f, Size::S64, loc_b, Location::GPR(tmp)); + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S64, + Location::GPR(tmp), + ret, + ); + m.release_temp_gpr(tmp); + } else { + Self::emit_relaxed_binop(a, m, f, Size::S64, loc_b, ret); + } + + value_stack.push(ret); + } + + /// I32 comparison with `loc_b` from input. + fn emit_cmpop_i32_dynamic_b( + a: &mut Assembler, + m: &mut Machine, + value_stack: &mut Vec, + c: Condition, + loc_b: Location, + ) -> Result<(), CodegenError> { + // Using Red Zone here. + let loc_a = get_location_released(a, m, value_stack.pop().unwrap()); + + let ret = m.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(value_stack.len()))], + false, + )[0]; + match ret { + Location::GPR(x) => { + Self::emit_relaxed_binop(a, m, Assembler::emit_cmp, Size::S32, loc_b, loc_a); + a.emit_set(c, x); + a.emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(x)); + } + Location::Memory(_, _) => { + let tmp = m.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop(a, m, Assembler::emit_cmp, Size::S32, loc_b, loc_a); + a.emit_set(c, tmp); + a.emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(tmp)); + a.emit_mov(Size::S32, Location::GPR(tmp), ret); + m.release_temp_gpr(tmp); + } + _ => { + return Err(CodegenError { + message: format!("emit_cmpop_i32_dynamic_b ret: unreachable code"), + }) + } + } + value_stack.push(ret); + Ok(()) + } + + /// I32 comparison with both operands popped from the virtual stack. + fn emit_cmpop_i32( + a: &mut Assembler, + m: &mut Machine, + value_stack: &mut Vec, + c: Condition, + ) -> Result<(), CodegenError> { + let loc_b = get_location_released(a, m, value_stack.pop().unwrap()); + Self::emit_cmpop_i32_dynamic_b(a, m, value_stack, c, loc_b)?; + Ok(()) + } + + /// I64 comparison with `loc_b` from input. + fn emit_cmpop_i64_dynamic_b( + a: &mut Assembler, + m: &mut Machine, + value_stack: &mut Vec, + c: Condition, + loc_b: Location, + ) -> Result<(), CodegenError> { + // Using Red Zone here. + let loc_a = get_location_released(a, m, value_stack.pop().unwrap()); + + let ret = m.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(value_stack.len()))], + false, + )[0]; + match ret { + Location::GPR(x) => { + Self::emit_relaxed_binop(a, m, Assembler::emit_cmp, Size::S64, loc_b, loc_a); + a.emit_set(c, x); + a.emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(x)); + } + Location::Memory(_, _) => { + let tmp = m.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop(a, m, Assembler::emit_cmp, Size::S64, loc_b, loc_a); + a.emit_set(c, tmp); + a.emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(tmp)); + a.emit_mov(Size::S32, Location::GPR(tmp), ret); + m.release_temp_gpr(tmp); + } + _ => { + return Err(CodegenError { + message: format!("emit_cmpop_i64_dynamic_b ret: unreachable code"), + }) + } + } + value_stack.push(ret); + Ok(()) + } + + /// I64 comparison with both operands popped from the virtual stack. + fn emit_cmpop_i64( + a: &mut Assembler, + m: &mut Machine, + value_stack: &mut Vec, + c: Condition, + ) -> Result<(), CodegenError> { + let loc_b = get_location_released(a, m, value_stack.pop().unwrap()); + Self::emit_cmpop_i64_dynamic_b(a, m, value_stack, c, loc_b)?; + Ok(()) + } + + /// I32 `lzcnt`/`tzcnt`/`popcnt` with operand popped from the virtual stack. + fn emit_xcnt_i32( + a: &mut Assembler, + m: &mut Machine, + value_stack: &mut Vec, + f: fn(&mut Assembler, Size, Location, Location), + ) -> Result<(), CodegenError> { + let loc = get_location_released(a, m, value_stack.pop().unwrap()); + let ret = m.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(value_stack.len()))], + false, + )[0]; + + match loc { + Location::Imm32(_) => { + let tmp = m.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(tmp)); + if let Location::Memory(_, _) = ret { + let out_tmp = m.acquire_temp_gpr().unwrap(); + f(a, Size::S32, Location::GPR(tmp), Location::GPR(out_tmp)); + a.emit_mov(Size::S32, Location::GPR(out_tmp), ret); + m.release_temp_gpr(out_tmp); + } else { + f(a, Size::S32, Location::GPR(tmp), ret); + } + m.release_temp_gpr(tmp); + } + Location::Memory(_, _) | Location::GPR(_) => { + if let Location::Memory(_, _) = ret { + let out_tmp = m.acquire_temp_gpr().unwrap(); + f(a, Size::S32, loc, Location::GPR(out_tmp)); + a.emit_mov(Size::S32, Location::GPR(out_tmp), ret); + m.release_temp_gpr(out_tmp); + } else { + f(a, Size::S32, loc, ret); + } + } + _ => { + return Err(CodegenError { + message: format!("emit_xcnt_i32 loc: unreachable code"), + }) + } + } + value_stack.push(ret); + Ok(()) + } + + /// I64 `lzcnt`/`tzcnt`/`popcnt` with operand popped from the virtual stack. + fn emit_xcnt_i64( + a: &mut Assembler, + m: &mut Machine, + value_stack: &mut Vec, + f: fn(&mut Assembler, Size, Location, Location), + ) -> Result<(), CodegenError> { + let loc = get_location_released(a, m, value_stack.pop().unwrap()); + let ret = m.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(value_stack.len()))], + false, + )[0]; + + match loc { + Location::Imm64(_) | Location::Imm32(_) => { + let tmp = m.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S64, loc, Location::GPR(tmp)); + if let Location::Memory(_, _) = ret { + let out_tmp = m.acquire_temp_gpr().unwrap(); + f(a, Size::S64, Location::GPR(tmp), Location::GPR(out_tmp)); + a.emit_mov(Size::S64, Location::GPR(out_tmp), ret); + m.release_temp_gpr(out_tmp); + } else { + f(a, Size::S64, Location::GPR(tmp), ret); + } + m.release_temp_gpr(tmp); + } + Location::Memory(_, _) | Location::GPR(_) => { + if let Location::Memory(_, _) = ret { + let out_tmp = m.acquire_temp_gpr().unwrap(); + f(a, Size::S64, loc, Location::GPR(out_tmp)); + a.emit_mov(Size::S64, Location::GPR(out_tmp), ret); + m.release_temp_gpr(out_tmp); + } else { + f(a, Size::S64, loc, ret); + } + } + _ => { + return Err(CodegenError { + message: format!("emit_xcnt_i64 loc: unreachable code"), + }) + } + } + value_stack.push(ret); + Ok(()) + } + + /// I32 shift with both operands popped from the virtual stack. + fn emit_shift_i32( + a: &mut Assembler, + m: &mut Machine, + value_stack: &mut Vec, + f: fn(&mut Assembler, Size, Location, Location), + ) { + let loc_b = get_location_released(a, m, value_stack.pop().unwrap()); + let loc_a = get_location_released(a, m, value_stack.pop().unwrap()); + let ret = m.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(value_stack.len()))], + false, + )[0]; + + a.emit_mov(Size::S32, loc_b, Location::GPR(GPR::RCX)); + + if loc_a != ret { + Self::emit_relaxed_binop(a, m, Assembler::emit_mov, Size::S32, loc_a, ret); + } + + f(a, Size::S32, Location::GPR(GPR::RCX), ret); + value_stack.push(ret); + } + + /// I64 shift with both operands popped from the virtual stack. + fn emit_shift_i64( + a: &mut Assembler, + m: &mut Machine, + value_stack: &mut Vec, + f: fn(&mut Assembler, Size, Location, Location), + ) { + let loc_b = get_location_released(a, m, value_stack.pop().unwrap()); + let loc_a = get_location_released(a, m, value_stack.pop().unwrap()); + let ret = m.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(value_stack.len()))], + false, + )[0]; + + a.emit_mov(Size::S64, loc_b, Location::GPR(GPR::RCX)); + + if loc_a != ret { + Self::emit_relaxed_binop(a, m, Assembler::emit_mov, Size::S64, loc_a, ret); + } + + f(a, Size::S64, Location::GPR(GPR::RCX), ret); + value_stack.push(ret); + } + + /// Floating point (AVX) binary operation with both operands popped from the virtual stack. + fn emit_fp_binop_avx( + a: &mut Assembler, + m: &mut Machine, + value_stack: &mut Vec, + f: fn(&mut Assembler, XMM, XMMOrMemory, XMM), + ) -> Result<(), CodegenError> { + let loc_b = get_location_released(a, m, value_stack.pop().unwrap()); + let loc_a = get_location_released(a, m, value_stack.pop().unwrap()); + let ret = m.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(value_stack.len()))], + false, + )[0]; + value_stack.push(ret); + + Self::emit_relaxed_avx(a, m, f, loc_a, loc_b, ret)?; + Ok(()) + } + + /// Floating point (AVX) comparison with both operands popped from the virtual stack. + fn emit_fp_cmpop_avx( + a: &mut Assembler, + m: &mut Machine, + value_stack: &mut Vec, + f: fn(&mut Assembler, XMM, XMMOrMemory, XMM), + ) -> Result<(), CodegenError> { + let loc_b = get_location_released(a, m, value_stack.pop().unwrap()); + let loc_a = get_location_released(a, m, value_stack.pop().unwrap()); + let ret = m.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(value_stack.len()))], + false, + )[0]; + value_stack.push(ret); + + Self::emit_relaxed_avx(a, m, f, loc_a, loc_b, ret)?; + + // Workaround for behavior inconsistency among different backing implementations. + // (all bits or only the least significant bit are set to one?) + a.emit_and(Size::S32, Location::Imm32(1), ret); + Ok(()) + } + + /// Floating point (AVX) binary operation with both operands popped from the virtual stack. + fn emit_fp_unop_avx( + a: &mut Assembler, + m: &mut Machine, + value_stack: &mut Vec, + f: fn(&mut Assembler, XMM, XMMOrMemory, XMM), + ) -> Result<(), CodegenError> { + let loc = get_location_released(a, m, value_stack.pop().unwrap()); + let ret = m.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(value_stack.len()))], + false, + )[0]; + value_stack.push(ret); + + Self::emit_relaxed_avx(a, m, f, loc, loc, ret)?; + Ok(()) + } + + /// Emits a System V call sequence. + /// + /// This function must not use RAX before `cb` is called. + fn emit_call_sysv, F: FnOnce(&mut Assembler)>( + a: &mut Assembler, + m: &mut Machine, + cb: F, + params: I, + state_context: Option<(&mut FunctionStateMap, &mut [ControlFrame])>, + ) -> Result<(), CodegenError> { + // Values pushed in this function are above the shadow region. + m.state.stack_values.push(MachineValue::ExplicitShadow); + + let params: Vec<_> = params.collect(); + + // Save used GPRs. + let used_gprs = m.get_used_gprs(); + for r in used_gprs.iter() { + a.emit_push(Size::S64, Location::GPR(*r)); + let content = m.state.register_values[X64Register::GPR(*r).to_index().0].clone(); + if content == MachineValue::Undefined { + return Err(CodegenError { + message: format!("emit_call_sysv: Undefined used_gprs content"), + }); + } + m.state.stack_values.push(content); + } + + // Save used XMM registers. + let used_xmms = m.get_used_xmms(); + if used_xmms.len() > 0 { + a.emit_sub( + Size::S64, + Location::Imm32((used_xmms.len() * 8) as u32), + Location::GPR(GPR::RSP), + ); + + for (i, r) in used_xmms.iter().enumerate() { + a.emit_mov( + Size::S64, + Location::XMM(*r), + Location::Memory(GPR::RSP, (i * 8) as i32), + ); + } + for r in used_xmms.iter().rev() { + let content = m.state.register_values[X64Register::XMM(*r).to_index().0].clone(); + if content == MachineValue::Undefined { + return Err(CodegenError { + message: format!("emit_call_sysv: Undefined used_xmms content"), + }); + } + m.state.stack_values.push(content); + } + } + + let mut stack_offset: usize = 0; + + // Calculate stack offset. + for (i, _param) in params.iter().enumerate() { + let loc = Machine::get_param_location(1 + i); + match loc { + Location::Memory(_, _) => { + stack_offset += 8; + } + _ => {} + } + } + + // Align stack to 16 bytes. + if (m.get_stack_offset() + used_gprs.len() * 8 + used_xmms.len() * 8 + stack_offset) % 16 + != 0 + { + a.emit_sub(Size::S64, Location::Imm32(8), Location::GPR(GPR::RSP)); + stack_offset += 8; + m.state.stack_values.push(MachineValue::Undefined); + } + + let mut call_movs: Vec<(Location, GPR)> = vec![]; + + // Prepare register & stack parameters. + for (i, param) in params.iter().enumerate().rev() { + let loc = Machine::get_param_location(1 + i); + match loc { + Location::GPR(x) => { + call_movs.push((*param, x)); + } + Location::Memory(_, _) => { + match *param { + Location::GPR(x) => { + let content = + m.state.register_values[X64Register::GPR(x).to_index().0].clone(); + // FIXME: There might be some corner cases (release -> emit_call_sysv -> acquire?) that cause this assertion to fail. + // Hopefully nothing would be incorrect at runtime. + + //assert!(content != MachineValue::Undefined); + m.state.stack_values.push(content); + } + Location::XMM(x) => { + let content = + m.state.register_values[X64Register::XMM(x).to_index().0].clone(); + //assert!(content != MachineValue::Undefined); + m.state.stack_values.push(content); + } + Location::Memory(reg, offset) => { + if reg != GPR::RBP { + return Err(CodegenError { + message: format!("emit_call_sysv loc param: unreachable code"), + }); + } + m.state + .stack_values + .push(MachineValue::CopyStackBPRelative(offset)); + // TODO: Read value at this offset + } + _ => { + m.state.stack_values.push(MachineValue::Undefined); + } + } + match *param { + Location::Imm64(_) => { + // Dummy value slot to be filled with `mov`. + a.emit_push(Size::S64, Location::GPR(GPR::RAX)); + + // Use R10 as the temporary register here, since it is callee-saved and not + // used by the callback `cb`. + a.emit_mov(Size::S64, *param, Location::GPR(GPR::R10)); + a.emit_mov( + Size::S64, + Location::GPR(GPR::R10), + Location::Memory(GPR::RSP, 0), + ); + } + Location::XMM(_) => { + // Dummy value slot to be filled with `mov`. + a.emit_push(Size::S64, Location::GPR(GPR::RAX)); + + // XMM registers can be directly stored to memory. + a.emit_mov(Size::S64, *param, Location::Memory(GPR::RSP, 0)); + } + _ => a.emit_push(Size::S64, *param), + } + } + _ => { + return Err(CodegenError { + message: format!("emit_call_sysv loc: unreachable code"), + }) + } + } + } + + // Sort register moves so that register are not overwritten before read. + sort_call_movs(&mut call_movs); + + // Emit register moves. + for (loc, gpr) in call_movs { + if loc != Location::GPR(gpr) { + a.emit_mov(Size::S64, loc, Location::GPR(gpr)); + } + } + + // Put vmctx as the first parameter. + a.emit_mov( + Size::S64, + Location::GPR(Machine::get_vmctx_reg()), + Machine::get_param_location(0), + ); // vmctx + + if (m.state.stack_values.len() % 2) != 1 { + return Err(CodegenError { + message: format!("emit_call_sysv: explicit shadow takes one slot"), + }); + } + + cb(a); + + // Offset needs to be after the 'call' instruction. + if let Some((fsm, control_stack)) = state_context { + let state_diff_id = Self::get_state_diff(m, fsm, control_stack); + let offset = a.get_offset().0; + fsm.call_offsets.insert( + offset, + OffsetInfo { + end_offset: offset + 1, + activate_offset: offset, + diff_id: state_diff_id, + }, + ); + fsm.wasm_offset_to_target_offset + .insert(m.state.wasm_inst_offset, SuspendOffset::Call(offset)); + } + + // Restore stack. + if stack_offset > 0 { + a.emit_add( + Size::S64, + Location::Imm32(stack_offset as u32), + Location::GPR(GPR::RSP), + ); + if (stack_offset % 8) != 0 { + return Err(CodegenError { + message: format!("emit_call_sysv: Bad restoring stack alignement"), + }); + } + for _ in 0..stack_offset / 8 { + m.state.stack_values.pop().unwrap(); + } + } + + // Restore XMMs. + if used_xmms.len() > 0 { + for (i, r) in used_xmms.iter().enumerate() { + a.emit_mov( + Size::S64, + Location::Memory(GPR::RSP, (i * 8) as i32), + Location::XMM(*r), + ); + } + a.emit_add( + Size::S64, + Location::Imm32((used_xmms.len() * 8) as u32), + Location::GPR(GPR::RSP), + ); + for _ in 0..used_xmms.len() { + m.state.stack_values.pop().unwrap(); + } + } + + // Restore GPRs. + for r in used_gprs.iter().rev() { + a.emit_pop(Size::S64, Location::GPR(*r)); + m.state.stack_values.pop().unwrap(); + } + + if m.state.stack_values.pop().unwrap() != MachineValue::ExplicitShadow { + return Err(CodegenError { + message: format!("emit_call_sysv: Popped value is not ExplicitShadow"), + }); + } + Ok(()) + } + + /// Emits a System V call sequence, specialized for labels as the call target. + fn emit_call_sysv_label>( + a: &mut Assembler, + m: &mut Machine, + label: DynamicLabel, + params: I, + state_context: Option<(&mut FunctionStateMap, &mut [ControlFrame])>, + ) -> Result<(), CodegenError> { + Self::emit_call_sysv(a, m, |a| a.emit_call_label(label), params, state_context)?; + Ok(()) + } + + /// Emits a memory operation. + fn emit_memory_op Result<(), CodegenError>>( + module_info: &ModuleInfo, + config: &CodegenConfig, + a: &mut Assembler, + m: &mut Machine, + etable: &mut ExceptionTable, + addr: Location, + memarg: &MemoryImmediate, + check_alignment: bool, + value_size: usize, + cb: F, + ) -> Result<(), CodegenError> { + // If the memory is dynamic, we need to do bound checking at runtime. + let mem_desc = match MemoryIndex::new(0).local_or_import(module_info) { + LocalOrImport::Local(local_mem_index) => &module_info.memories[local_mem_index], + LocalOrImport::Import(import_mem_index) => { + &module_info.imported_memories[import_mem_index].1 + } + }; + let need_check = match config.memory_bound_check_mode { + MemoryBoundCheckMode::Default => match mem_desc.memory_type() { + MemoryType::Dynamic => true, + MemoryType::Static | MemoryType::SharedStatic => false, + }, + MemoryBoundCheckMode::Enable => true, + MemoryBoundCheckMode::Disable => false, + }; + + let tmp_addr = m.acquire_temp_gpr().unwrap(); + let tmp_base = m.acquire_temp_gpr().unwrap(); + + // Load base into temporary register. + a.emit_mov( + Size::S64, + Location::Memory( + Machine::get_vmctx_reg(), + vm::Ctx::offset_memory_base() as i32, + ), + Location::GPR(tmp_base), + ); + + if need_check { + let tmp_bound = m.acquire_temp_gpr().unwrap(); + + a.emit_mov( + Size::S64, + Location::Memory( + Machine::get_vmctx_reg(), + vm::Ctx::offset_memory_bound() as i32, + ), + Location::GPR(tmp_bound), + ); + // Adds base to bound so `tmp_bound` now holds the end of linear memory. + a.emit_add(Size::S64, Location::GPR(tmp_base), Location::GPR(tmp_bound)); + a.emit_mov(Size::S32, addr, Location::GPR(tmp_addr)); + + // This branch is used for emitting "faster" code for the special case of (offset + value_size) not exceeding u32 range. + match (memarg.offset as u32).checked_add(value_size as u32) { + Some(0) => {} + Some(x) => { + a.emit_add(Size::S64, Location::Imm32(x), Location::GPR(tmp_addr)); + } + None => { + a.emit_add( + Size::S64, + Location::Imm32(memarg.offset as u32), + Location::GPR(tmp_addr), + ); + a.emit_add( + Size::S64, + Location::Imm32(value_size as u32), + Location::GPR(tmp_addr), + ); + } + } + + // Trap if the end address of the requested area is above that of the linear memory. + a.emit_add(Size::S64, Location::GPR(tmp_base), Location::GPR(tmp_addr)); + a.emit_cmp(Size::S64, Location::GPR(tmp_bound), Location::GPR(tmp_addr)); + + Self::mark_range_with_exception_code( + a, + etable, + ExceptionCode::MemoryOutOfBounds, + |a| a.emit_conditional_trap(Condition::Above), + ); + + m.release_temp_gpr(tmp_bound); + } + + // Calculates the real address, and loads from it. + a.emit_mov(Size::S32, addr, Location::GPR(tmp_addr)); + if memarg.offset != 0 { + a.emit_add( + Size::S64, + Location::Imm32(memarg.offset as u32), + Location::GPR(tmp_addr), + ); + } + a.emit_add(Size::S64, Location::GPR(tmp_base), Location::GPR(tmp_addr)); + m.release_temp_gpr(tmp_base); + + let align = match memarg.flags & 3 { + 0 => 1, + 1 => 2, + 2 => 4, + 3 => 8, + _ => { + return Err(CodegenError { + message: format!("emit_memory_op align: unreachable value"), + }) + } + }; + if check_alignment && align != 1 { + let tmp_aligncheck = m.acquire_temp_gpr().unwrap(); + a.emit_mov( + Size::S32, + Location::GPR(tmp_addr), + Location::GPR(tmp_aligncheck), + ); + a.emit_and( + Size::S64, + Location::Imm32(align - 1), + Location::GPR(tmp_aligncheck), + ); + Self::mark_range_with_exception_code( + a, + etable, + ExceptionCode::MemoryOutOfBounds, + |a| a.emit_conditional_trap(Condition::NotEqual), + ); + m.release_temp_gpr(tmp_aligncheck); + } + + Self::mark_range_with_exception_code(a, etable, ExceptionCode::MemoryOutOfBounds, |a| { + cb(a, m, tmp_addr) + })?; + + m.release_temp_gpr(tmp_addr); + Ok(()) + } + + /// Emits a memory operation. + fn emit_compare_and_swap( + module_info: &ModuleInfo, + config: &CodegenConfig, + a: &mut Assembler, + m: &mut Machine, + etable: &mut ExceptionTable, + loc: Location, + target: Location, + ret: Location, + memarg: &MemoryImmediate, + value_size: usize, + memory_sz: Size, + stack_sz: Size, + cb: F, + ) -> Result<(), CodegenError> { + if memory_sz > stack_sz { + return Err(CodegenError { + message: format!("emit_compare_and_swap: memory size > stac size"), + }); + } + + let compare = m.reserve_unused_temp_gpr(GPR::RAX); + let value = if loc == Location::GPR(GPR::R14) { + GPR::R13 + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + + a.emit_mov(stack_sz, loc, Location::GPR(value)); + + let retry = a.get_label(); + a.emit_label(retry); + + Self::emit_memory_op( + module_info, + config, + a, + m, + etable, + target, + memarg, + true, + value_size, + |a, m, addr| { + // Memory moves with size < 32b do not zero upper bits. + if memory_sz < Size::S32 { + a.emit_xor(Size::S32, Location::GPR(compare), Location::GPR(compare)); + } + a.emit_mov(memory_sz, Location::Memory(addr, 0), Location::GPR(compare)); + a.emit_mov(stack_sz, Location::GPR(compare), ret); + cb(a, m, compare, value); + a.emit_lock_cmpxchg(memory_sz, Location::GPR(value), Location::Memory(addr, 0)); + Ok(()) + }, + )?; + + a.emit_jmp(Condition::NotEqual, retry); + + a.emit_pop(Size::S64, Location::GPR(value)); + m.release_temp_gpr(compare); + Ok(()) + } + + // Checks for underflow/overflow/nan. + fn emit_f32_int_conv_check( + a: &mut Assembler, + m: &mut Machine, + reg: XMM, + lower_bound: f32, + upper_bound: f32, + underflow_label: ::Label, + overflow_label: ::Label, + nan_label: ::Label, + succeed_label: ::Label, + ) { + let lower_bound = f32::to_bits(lower_bound); + let upper_bound = f32::to_bits(upper_bound); + + let tmp = m.acquire_temp_gpr().unwrap(); + let tmp_x = m.acquire_temp_xmm().unwrap(); + + // Underflow. + a.emit_mov(Size::S32, Location::Imm32(lower_bound), Location::GPR(tmp)); + a.emit_mov(Size::S32, Location::GPR(tmp), Location::XMM(tmp_x)); + a.emit_vcmpless(reg, XMMOrMemory::XMM(tmp_x), tmp_x); + a.emit_mov(Size::S32, Location::XMM(tmp_x), Location::GPR(tmp)); + a.emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp)); + a.emit_jmp(Condition::NotEqual, underflow_label); + + // Overflow. + a.emit_mov(Size::S32, Location::Imm32(upper_bound), Location::GPR(tmp)); + a.emit_mov(Size::S32, Location::GPR(tmp), Location::XMM(tmp_x)); + a.emit_vcmpgess(reg, XMMOrMemory::XMM(tmp_x), tmp_x); + a.emit_mov(Size::S32, Location::XMM(tmp_x), Location::GPR(tmp)); + a.emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp)); + a.emit_jmp(Condition::NotEqual, overflow_label); + + // NaN. + a.emit_vcmpeqss(reg, XMMOrMemory::XMM(reg), tmp_x); + a.emit_mov(Size::S32, Location::XMM(tmp_x), Location::GPR(tmp)); + a.emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp)); + a.emit_jmp(Condition::Equal, nan_label); + + a.emit_jmp(Condition::None, succeed_label); + + m.release_temp_xmm(tmp_x); + m.release_temp_gpr(tmp); + } + + // Checks for underflow/overflow/nan before IxxTrunc{U/S}F32. + fn emit_f32_int_conv_check_trap( + a: &mut Assembler, + m: &mut Machine, + etable: &mut ExceptionTable, + reg: XMM, + lower_bound: f32, + upper_bound: f32, + ) { + let trap = a.get_label(); + let end = a.get_label(); + + Self::emit_f32_int_conv_check(a, m, reg, lower_bound, upper_bound, trap, trap, trap, end); + a.emit_label(trap); + etable + .offset_to_code + .insert(a.get_offset().0, ExceptionCode::IllegalArithmetic); + a.emit_ud2(); + a.emit_label(end); + } + + fn emit_f32_int_conv_check_sat< + F1: FnOnce(&mut Assembler, &mut Machine), + F2: FnOnce(&mut Assembler, &mut Machine), + F3: FnOnce(&mut Assembler, &mut Machine), + F4: FnOnce(&mut Assembler, &mut Machine), + >( + a: &mut Assembler, + m: &mut Machine, + reg: XMM, + lower_bound: f32, + upper_bound: f32, + underflow_cb: F1, + overflow_cb: F2, + nan_cb: Option, + convert_cb: F4, + ) { + // As an optimization nan_cb is optional, and when set to None we turn + // use 'underflow' as the 'nan' label. This is useful for callers who + // set the return value to zero for both underflow and nan. + + let underflow = a.get_label(); + let overflow = a.get_label(); + let nan = if nan_cb.is_some() { + a.get_label() + } else { + underflow + }; + let convert = a.get_label(); + let end = a.get_label(); + + Self::emit_f32_int_conv_check( + a, + m, + reg, + lower_bound, + upper_bound, + underflow, + overflow, + nan, + convert, + ); + + a.emit_label(underflow); + underflow_cb(a, m); + a.emit_jmp(Condition::None, end); + + a.emit_label(overflow); + overflow_cb(a, m); + a.emit_jmp(Condition::None, end); + + if let Some(cb) = nan_cb { + a.emit_label(nan); + cb(a, m); + a.emit_jmp(Condition::None, end); + } + + a.emit_label(convert); + convert_cb(a, m); + a.emit_label(end); + } + + // Checks for underflow/overflow/nan. + fn emit_f64_int_conv_check( + a: &mut Assembler, + m: &mut Machine, + reg: XMM, + lower_bound: f64, + upper_bound: f64, + underflow_label: ::Label, + overflow_label: ::Label, + nan_label: ::Label, + succeed_label: ::Label, + ) { + let lower_bound = f64::to_bits(lower_bound); + let upper_bound = f64::to_bits(upper_bound); + + let tmp = m.acquire_temp_gpr().unwrap(); + let tmp_x = m.acquire_temp_xmm().unwrap(); + + // Underflow. + a.emit_mov(Size::S64, Location::Imm64(lower_bound), Location::GPR(tmp)); + a.emit_mov(Size::S64, Location::GPR(tmp), Location::XMM(tmp_x)); + a.emit_vcmplesd(reg, XMMOrMemory::XMM(tmp_x), tmp_x); + a.emit_mov(Size::S32, Location::XMM(tmp_x), Location::GPR(tmp)); + a.emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp)); + a.emit_jmp(Condition::NotEqual, underflow_label); + + // Overflow. + a.emit_mov(Size::S64, Location::Imm64(upper_bound), Location::GPR(tmp)); + a.emit_mov(Size::S64, Location::GPR(tmp), Location::XMM(tmp_x)); + a.emit_vcmpgesd(reg, XMMOrMemory::XMM(tmp_x), tmp_x); + a.emit_mov(Size::S32, Location::XMM(tmp_x), Location::GPR(tmp)); + a.emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp)); + a.emit_jmp(Condition::NotEqual, overflow_label); + + // NaN. + a.emit_vcmpeqsd(reg, XMMOrMemory::XMM(reg), tmp_x); + a.emit_mov(Size::S32, Location::XMM(tmp_x), Location::GPR(tmp)); + a.emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp)); + a.emit_jmp(Condition::Equal, nan_label); + + a.emit_jmp(Condition::None, succeed_label); + + m.release_temp_xmm(tmp_x); + m.release_temp_gpr(tmp); + } + + // Checks for underflow/overflow/nan before IxxTrunc{U/S}F64. + fn emit_f64_int_conv_check_trap( + a: &mut Assembler, + m: &mut Machine, + etable: &mut ExceptionTable, + reg: XMM, + lower_bound: f64, + upper_bound: f64, + ) { + let trap = a.get_label(); + let end = a.get_label(); + + Self::emit_f64_int_conv_check(a, m, reg, lower_bound, upper_bound, trap, trap, trap, end); + a.emit_label(trap); + etable + .offset_to_code + .insert(a.get_offset().0, ExceptionCode::IllegalArithmetic); + a.emit_ud2(); + a.emit_label(end); + } + + fn emit_f64_int_conv_check_sat< + F1: FnOnce(&mut Assembler, &mut Machine), + F2: FnOnce(&mut Assembler, &mut Machine), + F3: FnOnce(&mut Assembler, &mut Machine), + F4: FnOnce(&mut Assembler, &mut Machine), + >( + a: &mut Assembler, + m: &mut Machine, + reg: XMM, + lower_bound: f64, + upper_bound: f64, + underflow_cb: F1, + overflow_cb: F2, + nan_cb: Option, + convert_cb: F4, + ) { + // As an optimization nan_cb is optional, and when set to None we turn + // use 'underflow' as the 'nan' label. This is useful for callers who + // set the return value to zero for both underflow and nan. + + let underflow = a.get_label(); + let overflow = a.get_label(); + let nan = if nan_cb.is_some() { + a.get_label() + } else { + underflow + }; + let convert = a.get_label(); + let end = a.get_label(); + + Self::emit_f64_int_conv_check( + a, + m, + reg, + lower_bound, + upper_bound, + underflow, + overflow, + nan, + convert, + ); + + a.emit_label(underflow); + underflow_cb(a, m); + a.emit_jmp(Condition::None, end); + + a.emit_label(overflow); + overflow_cb(a, m); + a.emit_jmp(Condition::None, end); + + if let Some(cb) = nan_cb { + a.emit_label(nan); + cb(a, m); + a.emit_jmp(Condition::None, end); + } + + a.emit_label(convert); + convert_cb(a, m); + a.emit_label(end); + } + + pub fn get_state_diff( + m: &Machine, + fsm: &mut FunctionStateMap, + control_stack: &mut [ControlFrame], + ) -> usize { + if !m.track_state { + return usize::MAX; + } + let last_frame = control_stack.last_mut().unwrap(); + let mut diff = m.state.diff(&last_frame.state); + diff.last = Some(last_frame.state_diff_id); + let id = fsm.diffs.len(); + last_frame.state = m.state.clone(); + last_frame.state_diff_id = id; + fsm.diffs.push(diff); + id + } +} + +impl FunctionCodeGenerator for X64FunctionCode { + fn feed_return(&mut self, ty: WpType) -> Result<(), CodegenError> { + self.returns.push(ty); + Ok(()) + } + + fn feed_param(&mut self, ty: WpType) -> Result<(), CodegenError> { + self.num_params += 1; + self.local_types.push(ty); + Ok(()) + } + + fn feed_local(&mut self, ty: WpType, n: usize, _loc: u32) -> Result<(), CodegenError> { + self.local_types.extend(iter::repeat(ty).take(n)); + Ok(()) + } + + fn begin_body(&mut self, _module_info: &ModuleInfo) -> Result<(), CodegenError> { + let a = self.assembler.as_mut().unwrap(); + let start_label = a.get_label(); + // skip the patchpoint during normal execution + a.emit_jmp(Condition::None, start_label); + // patchpoint of 32 1-byte nops + for _ in 0..32 { + a.emit_nop(); + } + a.emit_label(start_label); + a.emit_push(Size::S64, Location::GPR(GPR::RBP)); + a.emit_mov(Size::S64, Location::GPR(GPR::RSP), Location::GPR(GPR::RBP)); + + // Stack check. + if self.config.enforce_stack_check { + a.emit_cmp( + Size::S64, + Location::Memory( + GPR::RDI, // first parameter is vmctx + vm::Ctx::offset_stack_lower_bound() as i32, + ), + Location::GPR(GPR::RSP), + ); + Self::mark_range_with_exception_code( + a, + self.exception_table.as_mut().unwrap(), + ExceptionCode::MemoryOutOfBounds, + |a| a.emit_conditional_trap(Condition::Below), + ); + } + + self.locals = self + .machine + .init_locals(a, self.local_types.len(), self.num_params); + + self.machine.state.register_values + [X64Register::GPR(Machine::get_vmctx_reg()).to_index().0] = MachineValue::Vmctx; + + self.fsm = FunctionStateMap::new( + new_machine_state(), + self.local_function_id, + 32, + (0..self.locals.len()) + .map(|_| WasmAbstractValue::Runtime) + .collect(), + ); + + let diff = self.machine.state.diff(&new_machine_state()); + let state_diff_id = self.fsm.diffs.len(); + self.fsm.diffs.push(diff); + + //println!("initial state = {:?}", self.machine.state); + + a.emit_sub(Size::S64, Location::Imm32(32), Location::GPR(GPR::RSP)); // simulate "red zone" if not supported by the platform + + self.control_stack.push(ControlFrame { + label: a.get_label(), + loop_like: false, + if_else: IfElseState::None, + returns: self.returns.clone(), + value_stack_depth: 0, + fp_stack_depth: 0, + state: self.machine.state.clone(), + state_diff_id, + }); + + // Check interrupt signal without branching + let activate_offset = a.get_offset().0; + + if self.config.full_preemption { + a.emit_mov( + Size::S64, + Location::Memory( + Machine::get_vmctx_reg(), + vm::Ctx::offset_interrupt_signal_mem() as i32, + ), + Location::GPR(GPR::RAX), + ); + self.fsm.loop_offsets.insert( + a.get_offset().0, + OffsetInfo { + end_offset: a.get_offset().0 + 1, + activate_offset, + diff_id: state_diff_id, + }, + ); + self.fsm.wasm_function_header_target_offset = + Some(SuspendOffset::Loop(a.get_offset().0)); + a.emit_mov( + Size::S64, + Location::Memory(GPR::RAX, 0), + Location::GPR(GPR::RAX), + ); + } + + if self.machine.state.wasm_inst_offset != usize::MAX { + return Err(CodegenError { + message: format!("begin_body: wasm_inst_offset not usize::MAX"), + }); + } + Ok(()) + } + + fn finalize(&mut self) -> Result<(), CodegenError> { + let a = self.assembler.as_mut().unwrap(); + a.emit_ud2(); + Ok(()) + } + + fn feed_event( + &mut self, + ev: Event, + module_info: &ModuleInfo, + _source_loc: u32, + ) -> Result<(), CodegenError> { + assert!(self.fp_stack.len() <= self.value_stack.len()); + + let a = self.assembler.as_mut().unwrap(); + + match ev { + Event::Internal(InternalEvent::FunctionBegin(_)) + | Event::Internal(InternalEvent::FunctionEnd) => { + return Ok(()); + } + _ => {} + } + + self.machine.state.wasm_inst_offset = self.machine.state.wasm_inst_offset.wrapping_add(1); + + //println!("{:?} {}", op, self.value_stack.len()); + let was_unreachable; + + if self.unreachable_depth > 0 { + was_unreachable = true; + + if let Event::Wasm(op) = ev { + match *op { + Operator::Block { .. } | Operator::Loop { .. } | Operator::If { .. } => { + self.unreachable_depth += 1; + } + Operator::End => { + self.unreachable_depth -= 1; + } + Operator::Else => { + // We are in a reachable true branch + if self.unreachable_depth == 1 { + if let Some(IfElseState::If(_)) = + self.control_stack.last().map(|x| x.if_else) + { + self.unreachable_depth -= 1; + } + } + } + _ => {} + } + } + if self.unreachable_depth > 0 { + return Ok(()); + } + } else { + was_unreachable = false; + } + + let op = match ev { + Event::Wasm(x) => x, + Event::WasmOwned(ref x) => x, + Event::Internal(x) => { + match x { + InternalEvent::Breakpoint(callback) => { + self.breakpoints + .as_mut() + .unwrap() + .insert(a.get_offset(), callback); + Self::mark_trappable( + a, + &self.machine, + &mut self.fsm, + &mut self.control_stack, + ); + a.emit_inline_breakpoint(InlineBreakpointType::Middleware); + } + InternalEvent::FunctionBegin(_) | InternalEvent::FunctionEnd => {} + InternalEvent::GetInternal(idx) => { + let idx = idx as usize; + if idx >= INTERNALS_SIZE { + return Err(CodegenError { + message: format!("GetInternal: incorrect index value"), + }); + } + + let tmp = self.machine.acquire_temp_gpr().unwrap(); + + // Load `internals` pointer. + a.emit_mov( + Size::S64, + Location::Memory( + Machine::get_vmctx_reg(), + vm::Ctx::offset_internals() as i32, + ), + Location::GPR(tmp), + ); + + let loc = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(loc); + + // Move internal into the result location. + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::Memory(tmp, (idx * 8) as i32), + loc, + ); + + self.machine.release_temp_gpr(tmp); + } + InternalEvent::SetInternal(idx) => { + let idx = idx as usize; + if idx >= INTERNALS_SIZE { + return Err(CodegenError { + message: format!("SetInternal: incorrect index value"), + }); + } + + let tmp = self.machine.acquire_temp_gpr().unwrap(); + + // Load `internals` pointer. + a.emit_mov( + Size::S64, + Location::Memory( + Machine::get_vmctx_reg(), + vm::Ctx::offset_internals() as i32, + ), + Location::GPR(tmp), + ); + let loc = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + + // Move internal into storage. + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::Memory(tmp, (idx * 8) as i32), + ); + self.machine.release_temp_gpr(tmp); + } //_ => unimplemented!(), + } + return Ok(()); + } + }; + + match *op { + Operator::GlobalGet { global_index } => { + let global_index = global_index as usize; + + let tmp = self.machine.acquire_temp_gpr().unwrap(); + + let loc = match GlobalIndex::new(global_index).local_or_import(module_info) { + LocalOrImport::Local(local_index) => { + a.emit_mov( + Size::S64, + Location::Memory( + Machine::get_vmctx_reg(), + vm::Ctx::offset_globals() as i32, + ), + Location::GPR(tmp), + ); + a.emit_mov( + Size::S64, + Location::Memory(tmp, (local_index.index() as i32) * 8), + Location::GPR(tmp), + ); + let ty = type_to_wp_type(module_info.globals[local_index].desc.ty); + if ty.is_float() { + self.fp_stack.push(FloatValue::new(self.value_stack.len())); + } + self.machine.acquire_locations( + a, + &[(ty, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0] + } + LocalOrImport::Import(import_index) => { + a.emit_mov( + Size::S64, + Location::Memory( + Machine::get_vmctx_reg(), + vm::Ctx::offset_imported_globals() as i32, + ), + Location::GPR(tmp), + ); + a.emit_mov( + Size::S64, + Location::Memory(tmp, (import_index.index() as i32) * 8), + Location::GPR(tmp), + ); + let ty = type_to_wp_type(module_info.imported_globals[import_index].1.ty); + if ty.is_float() { + self.fp_stack.push(FloatValue::new(self.value_stack.len())); + } + self.machine.acquire_locations( + a, + &[(ty, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0] + } + }; + self.value_stack.push(loc); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::Memory(tmp, LocalGlobal::offset_data() as i32), + loc, + ); + + self.machine.release_temp_gpr(tmp); + } + Operator::GlobalSet { global_index } => { + let mut global_index = global_index as usize; + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + let tmp = self.machine.acquire_temp_gpr().unwrap(); + + let ty = if global_index < module_info.imported_globals.len() { + a.emit_mov( + Size::S64, + Location::Memory( + Machine::get_vmctx_reg(), + vm::Ctx::offset_imported_globals() as i32, + ), + Location::GPR(tmp), + ); + type_to_wp_type( + module_info.imported_globals[ImportedGlobalIndex::new(global_index)] + .1 + .ty, + ) + } else { + global_index -= module_info.imported_globals.len(); + if global_index >= module_info.globals.len() { + return Err(CodegenError { + message: format!("SetGlobal: incorrect global_index value"), + }); + } + a.emit_mov( + Size::S64, + Location::Memory( + Machine::get_vmctx_reg(), + vm::Ctx::offset_globals() as i32, + ), + Location::GPR(tmp), + ); + type_to_wp_type( + module_info.globals[LocalGlobalIndex::new(global_index)] + .desc + .ty, + ) + }; + a.emit_mov( + Size::S64, + Location::Memory(tmp, (global_index as i32) * 8), + Location::GPR(tmp), + ); + if ty.is_float() { + let fp = self.fp_stack.pop1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match ty { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::Memory(tmp, LocalGlobal::offset_data() as i32), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::Memory(tmp, LocalGlobal::offset_data() as i32), + ); + } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::Memory(tmp, LocalGlobal::offset_data() as i32), + ); + } + self.machine.release_temp_gpr(tmp); + } + Operator::LocalGet { local_index } => { + let local_index = local_index as usize; + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + self.locals[local_index], + ret, + ); + self.value_stack.push(ret); + if self.local_types[local_index].is_float() { + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + } + } + Operator::LocalSet { local_index } => { + let local_index = local_index as usize; + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + if self.local_types[local_index].is_float() { + let fp = self.fp_stack.pop1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match self.local_types[local_index] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + self.locals[local_index], + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + self.locals[local_index], + ); + } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + self.locals[local_index], + ); + } + } + Operator::LocalTee { local_index } => { + let local_index = local_index as usize; + let loc = *self.value_stack.last().unwrap(); + + if self.local_types[local_index].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match self.local_types[local_index] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + self.locals[local_index], + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + self.locals[local_index], + ); + } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + self.locals[local_index], + ); + } + } + Operator::I32Const { value } => { + self.value_stack.push(Location::Imm32(value as u32)); + self.machine + .state + .wasm_stack + .push(WasmAbstractValue::Const(value as u32 as u64)); + } + Operator::I32Add => Self::emit_binop_i32( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_add, + ), + Operator::I32Sub => Self::emit_binop_i32( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_sub, + ), + Operator::I32Mul => Self::emit_binop_i32( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_imul, + ), + Operator::I32DivU => { + // We assume that RAX and RDX are temporary registers here. + let loc_b = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc_a = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + a.emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX)); + a.emit_xor(Size::S32, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX)); + Self::emit_relaxed_xdiv( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + Assembler::emit_div, + Size::S32, + loc_b, + &mut self.fsm, + &mut self.control_stack, + ); + a.emit_mov(Size::S32, Location::GPR(GPR::RAX), ret); + self.value_stack.push(ret); + } + Operator::I32DivS => { + // We assume that RAX and RDX are temporary registers here. + let loc_b = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc_a = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + a.emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX)); + a.emit_cdq(); + Self::emit_relaxed_xdiv( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + Assembler::emit_idiv, + Size::S32, + loc_b, + &mut self.fsm, + &mut self.control_stack, + ); + a.emit_mov(Size::S32, Location::GPR(GPR::RAX), ret); + self.value_stack.push(ret); + } + Operator::I32RemU => { + // We assume that RAX and RDX are temporary registers here. + let loc_b = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc_a = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + a.emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX)); + a.emit_xor(Size::S32, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX)); + Self::emit_relaxed_xdiv( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + Assembler::emit_div, + Size::S32, + loc_b, + &mut self.fsm, + &mut self.control_stack, + ); + a.emit_mov(Size::S32, Location::GPR(GPR::RDX), ret); + self.value_stack.push(ret); + } + Operator::I32RemS => { + // We assume that RAX and RDX are temporary registers here. + let loc_b = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc_a = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + + let normal_path = a.get_label(); + let end = a.get_label(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_cmp, + Size::S32, + Location::Imm32(0x80000000), + loc_a, + ); + a.emit_jmp(Condition::NotEqual, normal_path); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_cmp, + Size::S32, + Location::Imm32(0xffffffff), + loc_b, + ); + a.emit_jmp(Condition::NotEqual, normal_path); + a.emit_mov(Size::S32, Location::Imm32(0), ret); + a.emit_jmp(Condition::None, end); + + a.emit_label(normal_path); + a.emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX)); + a.emit_cdq(); + Self::emit_relaxed_xdiv( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + Assembler::emit_idiv, + Size::S32, + loc_b, + &mut self.fsm, + &mut self.control_stack, + ); + a.emit_mov(Size::S32, Location::GPR(GPR::RDX), ret); + self.value_stack.push(ret); + + a.emit_label(end); + } + Operator::I32And => Self::emit_binop_i32( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_and, + ), + Operator::I32Or => Self::emit_binop_i32( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_or, + ), + Operator::I32Xor => Self::emit_binop_i32( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_xor, + ), + Operator::I32Eq => Self::emit_cmpop_i32( + a, + &mut self.machine, + &mut self.value_stack, + Condition::Equal, + )?, + Operator::I32Ne => Self::emit_cmpop_i32( + a, + &mut self.machine, + &mut self.value_stack, + Condition::NotEqual, + )?, + Operator::I32Eqz => Self::emit_cmpop_i32_dynamic_b( + a, + &mut self.machine, + &mut self.value_stack, + Condition::Equal, + Location::Imm32(0), + )?, + Operator::I32Clz => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let src = match loc { + Location::Imm32(_) | Location::Memory(_, _) => { + let tmp = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(tmp)); + tmp + } + Location::GPR(reg) => reg, + _ => { + return Err(CodegenError { + message: format!("I32Clz src: unreachable code"), + }) + } + }; + + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let dst = match ret { + Location::Memory(_, _) => self.machine.acquire_temp_gpr().unwrap(), + Location::GPR(reg) => reg, + _ => { + return Err(CodegenError { + message: format!("I32Clz dst: unreachable code"), + }) + } + }; + + if a.arch_has_xzcnt() { + a.arch_emit_lzcnt(Size::S32, Location::GPR(src), Location::GPR(dst)); + } else { + let zero_path = a.get_label(); + let end = a.get_label(); + + a.emit_test_gpr_64(src); + a.emit_jmp(Condition::Equal, zero_path); + a.emit_bsr(Size::S32, Location::GPR(src), Location::GPR(dst)); + a.emit_xor(Size::S32, Location::Imm32(31), Location::GPR(dst)); + a.emit_jmp(Condition::None, end); + a.emit_label(zero_path); + a.emit_mov(Size::S32, Location::Imm32(32), Location::GPR(dst)); + a.emit_label(end); + } + + match loc { + Location::Imm32(_) | Location::Memory(_, _) => { + self.machine.release_temp_gpr(src); + } + _ => {} + }; + match ret { + Location::Memory(_, _) => { + a.emit_mov(Size::S32, Location::GPR(dst), ret); + self.machine.release_temp_gpr(dst); + } + _ => {} + }; + } + Operator::I32Ctz => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let src = match loc { + Location::Imm32(_) | Location::Memory(_, _) => { + let tmp = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(tmp)); + tmp + } + Location::GPR(reg) => reg, + _ => { + return Err(CodegenError { + message: format!("I32Ctz src: unreachable code"), + }) + } + }; + + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let dst = match ret { + Location::Memory(_, _) => self.machine.acquire_temp_gpr().unwrap(), + Location::GPR(reg) => reg, + _ => { + return Err(CodegenError { + message: format!("I32Ctz dst: unreachable code"), + }) + } + }; + + if a.arch_has_xzcnt() { + a.arch_emit_tzcnt(Size::S32, Location::GPR(src), Location::GPR(dst)); + } else { + let zero_path = a.get_label(); + let end = a.get_label(); + + a.emit_test_gpr_64(src); + a.emit_jmp(Condition::Equal, zero_path); + a.emit_bsf(Size::S32, Location::GPR(src), Location::GPR(dst)); + a.emit_jmp(Condition::None, end); + a.emit_label(zero_path); + a.emit_mov(Size::S32, Location::Imm32(32), Location::GPR(dst)); + a.emit_label(end); + } + + match loc { + Location::Imm32(_) | Location::Memory(_, _) => { + self.machine.release_temp_gpr(src); + } + _ => {} + }; + match ret { + Location::Memory(_, _) => { + a.emit_mov(Size::S32, Location::GPR(dst), ret); + self.machine.release_temp_gpr(dst); + } + _ => {} + }; + } + Operator::I32Popcnt => Self::emit_xcnt_i32( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_popcnt, + )?, + Operator::I32Shl => Self::emit_shift_i32( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_shl, + ), + Operator::I32ShrU => Self::emit_shift_i32( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_shr, + ), + Operator::I32ShrS => Self::emit_shift_i32( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_sar, + ), + Operator::I32Rotl => Self::emit_shift_i32( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_rol, + ), + Operator::I32Rotr => Self::emit_shift_i32( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_ror, + ), + Operator::I32LtU => Self::emit_cmpop_i32( + a, + &mut self.machine, + &mut self.value_stack, + Condition::Below, + )?, + Operator::I32LeU => Self::emit_cmpop_i32( + a, + &mut self.machine, + &mut self.value_stack, + Condition::BelowEqual, + )?, + Operator::I32GtU => Self::emit_cmpop_i32( + a, + &mut self.machine, + &mut self.value_stack, + Condition::Above, + )?, + Operator::I32GeU => Self::emit_cmpop_i32( + a, + &mut self.machine, + &mut self.value_stack, + Condition::AboveEqual, + )?, + Operator::I32LtS => { + Self::emit_cmpop_i32(a, &mut self.machine, &mut self.value_stack, Condition::Less)?; + } + Operator::I32LeS => Self::emit_cmpop_i32( + a, + &mut self.machine, + &mut self.value_stack, + Condition::LessEqual, + )?, + Operator::I32GtS => Self::emit_cmpop_i32( + a, + &mut self.machine, + &mut self.value_stack, + Condition::Greater, + )?, + Operator::I32GeS => Self::emit_cmpop_i32( + a, + &mut self.machine, + &mut self.value_stack, + Condition::GreaterEqual, + )?, + Operator::I64Const { value } => { + let value = value as u64; + self.value_stack.push(Location::Imm64(value)); + self.machine + .state + .wasm_stack + .push(WasmAbstractValue::Const(value)); + } + Operator::I64Add => Self::emit_binop_i64( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_add, + ), + Operator::I64Sub => Self::emit_binop_i64( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_sub, + ), + Operator::I64Mul => Self::emit_binop_i64( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_imul, + ), + Operator::I64DivU => { + // We assume that RAX and RDX are temporary registers here. + let loc_b = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc_a = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + a.emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX)); + a.emit_xor(Size::S64, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX)); + Self::emit_relaxed_xdiv( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + Assembler::emit_div, + Size::S64, + loc_b, + &mut self.fsm, + &mut self.control_stack, + ); + a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); + self.value_stack.push(ret); + } + Operator::I64DivS => { + // We assume that RAX and RDX are temporary registers here. + let loc_b = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc_a = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + a.emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX)); + a.emit_cqo(); + Self::emit_relaxed_xdiv( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + Assembler::emit_idiv, + Size::S64, + loc_b, + &mut self.fsm, + &mut self.control_stack, + ); + a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); + self.value_stack.push(ret); + } + Operator::I64RemU => { + // We assume that RAX and RDX are temporary registers here. + let loc_b = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc_a = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + a.emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX)); + a.emit_xor(Size::S64, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX)); + Self::emit_relaxed_xdiv( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + Assembler::emit_div, + Size::S64, + loc_b, + &mut self.fsm, + &mut self.control_stack, + ); + a.emit_mov(Size::S64, Location::GPR(GPR::RDX), ret); + self.value_stack.push(ret); + } + Operator::I64RemS => { + // We assume that RAX and RDX are temporary registers here. + let loc_b = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc_a = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + + let normal_path = a.get_label(); + let end = a.get_label(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_cmp, + Size::S64, + Location::Imm64(0x8000000000000000u64), + loc_a, + ); + a.emit_jmp(Condition::NotEqual, normal_path); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_cmp, + Size::S64, + Location::Imm64(0xffffffffffffffffu64), + loc_b, + ); + a.emit_jmp(Condition::NotEqual, normal_path); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::Imm64(0), + ret, + ); + a.emit_jmp(Condition::None, end); + + a.emit_label(normal_path); + + a.emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX)); + a.emit_cqo(); + Self::emit_relaxed_xdiv( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + Assembler::emit_idiv, + Size::S64, + loc_b, + &mut self.fsm, + &mut self.control_stack, + ); + a.emit_mov(Size::S64, Location::GPR(GPR::RDX), ret); + self.value_stack.push(ret); + a.emit_label(end); + } + Operator::I64And => Self::emit_binop_i64( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_and, + ), + Operator::I64Or => Self::emit_binop_i64( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_or, + ), + Operator::I64Xor => Self::emit_binop_i64( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_xor, + ), + Operator::I64Eq => Self::emit_cmpop_i64( + a, + &mut self.machine, + &mut self.value_stack, + Condition::Equal, + )?, + Operator::I64Ne => Self::emit_cmpop_i64( + a, + &mut self.machine, + &mut self.value_stack, + Condition::NotEqual, + )?, + Operator::I64Eqz => Self::emit_cmpop_i64_dynamic_b( + a, + &mut self.machine, + &mut self.value_stack, + Condition::Equal, + Location::Imm64(0), + )?, + Operator::I64Clz => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let src = match loc { + Location::Imm64(_) | Location::Imm32(_) | Location::Memory(_, _) => { + let tmp = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S64, loc, Location::GPR(tmp)); + tmp + } + Location::GPR(reg) => reg, + _ => { + return Err(CodegenError { + message: format!("I64Clz src: unreachable code"), + }) + } + }; + + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let dst = match ret { + Location::Memory(_, _) => self.machine.acquire_temp_gpr().unwrap(), + Location::GPR(reg) => reg, + _ => { + return Err(CodegenError { + message: format!("I64Clz dst: unreachable code"), + }) + } + }; + + if a.arch_has_xzcnt() { + a.arch_emit_lzcnt(Size::S64, Location::GPR(src), Location::GPR(dst)); + } else { + let zero_path = a.get_label(); + let end = a.get_label(); + + a.emit_test_gpr_64(src); + a.emit_jmp(Condition::Equal, zero_path); + a.emit_bsr(Size::S64, Location::GPR(src), Location::GPR(dst)); + a.emit_xor(Size::S64, Location::Imm32(63), Location::GPR(dst)); + a.emit_jmp(Condition::None, end); + a.emit_label(zero_path); + a.emit_mov(Size::S64, Location::Imm32(64), Location::GPR(dst)); + a.emit_label(end); + } + + match loc { + Location::Imm64(_) | Location::Imm32(_) | Location::Memory(_, _) => { + self.machine.release_temp_gpr(src); + } + _ => {} + }; + match ret { + Location::Memory(_, _) => { + a.emit_mov(Size::S64, Location::GPR(dst), ret); + self.machine.release_temp_gpr(dst); + } + _ => {} + }; + } + Operator::I64Ctz => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let src = match loc { + Location::Imm64(_) | Location::Imm32(_) | Location::Memory(_, _) => { + let tmp = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S64, loc, Location::GPR(tmp)); + tmp + } + Location::GPR(reg) => reg, + _ => { + return Err(CodegenError { + message: format!("I64Ctz src: unreachable code"), + }) + } + }; + + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let dst = match ret { + Location::Memory(_, _) => self.machine.acquire_temp_gpr().unwrap(), + Location::GPR(reg) => reg, + _ => { + return Err(CodegenError { + message: format!("I64Ctz dst: unreachable code"), + }) + } + }; + + if a.arch_has_xzcnt() { + a.arch_emit_tzcnt(Size::S64, Location::GPR(src), Location::GPR(dst)); + } else { + let zero_path = a.get_label(); + let end = a.get_label(); + + a.emit_test_gpr_64(src); + a.emit_jmp(Condition::Equal, zero_path); + a.emit_bsf(Size::S64, Location::GPR(src), Location::GPR(dst)); + a.emit_jmp(Condition::None, end); + a.emit_label(zero_path); + a.emit_mov(Size::S64, Location::Imm32(64), Location::GPR(dst)); + a.emit_label(end); + } + + match loc { + Location::Imm64(_) | Location::Imm32(_) | Location::Memory(_, _) => { + self.machine.release_temp_gpr(src); + } + _ => {} + }; + match ret { + Location::Memory(_, _) => { + a.emit_mov(Size::S64, Location::GPR(dst), ret); + self.machine.release_temp_gpr(dst); + } + _ => {} + }; + } + Operator::I64Popcnt => Self::emit_xcnt_i64( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_popcnt, + )?, + Operator::I64Shl => Self::emit_shift_i64( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_shl, + ), + Operator::I64ShrU => Self::emit_shift_i64( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_shr, + ), + Operator::I64ShrS => Self::emit_shift_i64( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_sar, + ), + Operator::I64Rotl => Self::emit_shift_i64( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_rol, + ), + Operator::I64Rotr => Self::emit_shift_i64( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_ror, + ), + Operator::I64LtU => Self::emit_cmpop_i64( + a, + &mut self.machine, + &mut self.value_stack, + Condition::Below, + )?, + Operator::I64LeU => Self::emit_cmpop_i64( + a, + &mut self.machine, + &mut self.value_stack, + Condition::BelowEqual, + )?, + Operator::I64GtU => Self::emit_cmpop_i64( + a, + &mut self.machine, + &mut self.value_stack, + Condition::Above, + )?, + Operator::I64GeU => Self::emit_cmpop_i64( + a, + &mut self.machine, + &mut self.value_stack, + Condition::AboveEqual, + )?, + Operator::I64LtS => { + Self::emit_cmpop_i64(a, &mut self.machine, &mut self.value_stack, Condition::Less)?; + } + Operator::I64LeS => Self::emit_cmpop_i64( + a, + &mut self.machine, + &mut self.value_stack, + Condition::LessEqual, + )?, + Operator::I64GtS => Self::emit_cmpop_i64( + a, + &mut self.machine, + &mut self.value_stack, + Condition::Greater, + )?, + Operator::I64GeS => Self::emit_cmpop_i64( + a, + &mut self.machine, + &mut self.value_stack, + Condition::GreaterEqual, + )?, + Operator::I64ExtendI32U => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + ret, + ); + } + Operator::I64ExtendI32S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + Self::emit_relaxed_zx_sx( + a, + &mut self.machine, + Assembler::emit_movsx, + Size::S32, + loc, + Size::S64, + ret, + )?; + } + Operator::I32Extend8S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_relaxed_zx_sx( + a, + &mut self.machine, + Assembler::emit_movsx, + Size::S8, + loc, + Size::S32, + ret, + )?; + } + Operator::I32Extend16S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_relaxed_zx_sx( + a, + &mut self.machine, + Assembler::emit_movsx, + Size::S16, + loc, + Size::S32, + ret, + )?; + } + Operator::I64Extend8S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_relaxed_zx_sx( + a, + &mut self.machine, + Assembler::emit_movsx, + Size::S8, + loc, + Size::S64, + ret, + )?; + } + Operator::I64Extend16S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_relaxed_zx_sx( + a, + &mut self.machine, + Assembler::emit_movsx, + Size::S16, + loc, + Size::S64, + ret, + )?; + } + Operator::I64Extend32S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_relaxed_zx_sx( + a, + &mut self.machine, + Assembler::emit_movsx, + Size::S32, + loc, + Size::S64, + ret, + )?; + } + Operator::I32WrapI64 => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + ret, + ); + } + + Operator::F32Const { value } => { + self.value_stack.push(Location::Imm32(value.bits())); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + self.machine + .state + .wasm_stack + .push(WasmAbstractValue::Const(value.bits() as u64)); + } + Operator::F32Add => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vaddss, + )?; + } + Operator::F32Sub => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vsubss, + )? + } + Operator::F32Mul => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vmulss, + )? + } + Operator::F32Div => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vdivss, + )? + } + Operator::F32Max => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 2)); + if !a.arch_supports_canonicalize_nan() { + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vmaxss, + )?; + } else { + let src2 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let src1 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let tmp1 = self.machine.acquire_temp_xmm().unwrap(); + let tmp2 = self.machine.acquire_temp_xmm().unwrap(); + let tmpg1 = self.machine.acquire_temp_gpr().unwrap(); + let tmpg2 = self.machine.acquire_temp_gpr().unwrap(); + + let src1 = match src1 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src1, Location::XMM(tmp1)); + tmp1 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + _ => { + return Err(CodegenError { + message: format!("F32Max src1: unreachable code"), + }) + } + }; + let src2 = match src2 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src2, Location::XMM(tmp2)); + tmp2 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + _ => { + return Err(CodegenError { + message: format!("F32Max src2: unreachable code"), + }) + } + }; + + let tmp_xmm1 = XMM::XMM8; + let tmp_xmm2 = XMM::XMM9; + let tmp_xmm3 = XMM::XMM10; + + a.emit_mov(Size::S32, Location::XMM(src1), Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::XMM(src2), Location::GPR(tmpg2)); + a.emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1)); + a.emit_vmaxss(src1, XMMOrMemory::XMM(src2), tmp_xmm1); + let label1 = a.get_label(); + let label2 = a.get_label(); + a.emit_jmp(Condition::NotEqual, label1); + a.emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2)); + a.emit_jmp(Condition::None, label2); + a.emit_label(label1); + a.emit_vxorps(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2); + a.emit_label(label2); + a.emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3); + a.emit_vblendvps(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1); + a.emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1); + // load float canonical nan + a.emit_mov( + Size::S64, + Location::Imm32(0x7FC0_0000), // Canonical NaN + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(src2)); + a.emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1); + match ret { + Location::XMM(x) => { + a.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x)); + } + Location::Memory(_, _) | Location::GPR(_) => { + a.emit_mov(Size::S64, Location::XMM(src1), ret); + } + _ => { + return Err(CodegenError { + message: format!("F32Max ret: unreachable code"), + }) + } + } + + self.machine.release_temp_gpr(tmpg2); + self.machine.release_temp_gpr(tmpg1); + self.machine.release_temp_xmm(tmp2); + self.machine.release_temp_xmm(tmp1); + } + } + Operator::F32Min => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 2)); + if !a.arch_supports_canonicalize_nan() { + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vminss, + )?; + } else { + let src2 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let src1 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let tmp1 = self.machine.acquire_temp_xmm().unwrap(); + let tmp2 = self.machine.acquire_temp_xmm().unwrap(); + let tmpg1 = self.machine.acquire_temp_gpr().unwrap(); + let tmpg2 = self.machine.acquire_temp_gpr().unwrap(); + + let src1 = match src1 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src1, Location::XMM(tmp1)); + tmp1 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + _ => { + return Err(CodegenError { + message: format!("F32Min src1: unreachable code"), + }) + } + }; + let src2 = match src2 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src2, Location::XMM(tmp2)); + tmp2 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + _ => { + return Err(CodegenError { + message: format!("F32Min src2: unreachable code"), + }) + } + }; + + let tmp_xmm1 = XMM::XMM8; + let tmp_xmm2 = XMM::XMM9; + let tmp_xmm3 = XMM::XMM10; + + a.emit_mov(Size::S32, Location::XMM(src1), Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::XMM(src2), Location::GPR(tmpg2)); + a.emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1)); + a.emit_vminss(src1, XMMOrMemory::XMM(src2), tmp_xmm1); + let label1 = a.get_label(); + let label2 = a.get_label(); + a.emit_jmp(Condition::NotEqual, label1); + a.emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2)); + a.emit_jmp(Condition::None, label2); + a.emit_label(label1); + // load float -0.0 + a.emit_mov( + Size::S64, + Location::Imm32(0x8000_0000), // Negative zero + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp_xmm2)); + a.emit_label(label2); + a.emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3); + a.emit_vblendvps(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1); + a.emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1); + // load float canonical nan + a.emit_mov( + Size::S64, + Location::Imm32(0x7FC0_0000), // Canonical NaN + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(src2)); + a.emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1); + match ret { + Location::XMM(x) => { + a.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x)); + } + Location::Memory(_, _) | Location::GPR(_) => { + a.emit_mov(Size::S64, Location::XMM(src1), ret); + } + _ => { + return Err(CodegenError { + message: format!("F32Min ret: unreachable code"), + }) + } + } + + self.machine.release_temp_gpr(tmpg2); + self.machine.release_temp_gpr(tmpg1); + self.machine.release_temp_xmm(tmp2); + self.machine.release_temp_xmm(tmp1); + } + } + Operator::F32Eq => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpeqss, + )? + } + Operator::F32Ne => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpneqss, + )? + } + Operator::F32Lt => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpltss, + )? + } + Operator::F32Le => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpless, + )? + } + Operator::F32Gt => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpgtss, + )? + } + Operator::F32Ge => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpgess, + )? + } + Operator::F32Nearest => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundss_nearest, + )? + } + Operator::F32Floor => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundss_floor, + )? + } + Operator::F32Ceil => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundss_ceil, + )? + } + Operator::F32Trunc => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundss_trunc, + )? + } + Operator::F32Sqrt => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vsqrtss, + )? + } + + Operator::F32Copysign => { + let loc_b = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc_a = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let (fp_src1, fp_src2) = self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + + let tmp1 = self.machine.acquire_temp_gpr().unwrap(); + let tmp2 = self.machine.acquire_temp_gpr().unwrap(); + + if a.arch_supports_canonicalize_nan() && self.config.nan_canonicalization { + for (fp, loc, tmp) in [(fp_src1, loc_a, tmp1), (fp_src2, loc_b, tmp2)].iter() { + match fp.canonicalization { + Some(_) => { + Self::canonicalize_nan( + a, + &mut self.machine, + Size::S32, + *loc, + Location::GPR(*tmp), + ); + } + None => { + a.emit_mov(Size::S32, *loc, Location::GPR(*tmp)); + } + } + } + } else { + a.emit_mov(Size::S32, loc_a, Location::GPR(tmp1)); + a.emit_mov(Size::S32, loc_b, Location::GPR(tmp2)); + } + a.emit_and( + Size::S32, + Location::Imm32(0x7fffffffu32), + Location::GPR(tmp1), + ); + a.emit_and( + Size::S32, + Location::Imm32(0x80000000u32), + Location::GPR(tmp2), + ); + a.emit_or(Size::S32, Location::GPR(tmp2), Location::GPR(tmp1)); + a.emit_mov(Size::S32, Location::GPR(tmp1), ret); + self.machine.release_temp_gpr(tmp2); + self.machine.release_temp_gpr(tmp1); + } + + Operator::F32Abs => { + // Preserve canonicalization state. + + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + let tmp = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(tmp)); + a.emit_and( + Size::S32, + Location::Imm32(0x7fffffffu32), + Location::GPR(tmp), + ); + a.emit_mov(Size::S32, Location::GPR(tmp), ret); + self.machine.release_temp_gpr(tmp); + } + + Operator::F32Neg => { + // Preserve canonicalization state. + + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + if a.arch_has_fneg() { + let tmp = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp), + ); + a.arch_emit_f32_neg(tmp, tmp); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::XMM(tmp), + ret, + ); + self.machine.release_temp_xmm(tmp); + } else { + let tmp = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(tmp)); + a.emit_btc_gpr_imm8_32(31, tmp); + a.emit_mov(Size::S32, Location::GPR(tmp), ret); + self.machine.release_temp_gpr(tmp); + } + } + + Operator::F64Const { value } => { + self.value_stack.push(Location::Imm64(value.bits())); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + self.machine + .state + .wasm_stack + .push(WasmAbstractValue::Const(value.bits())); + } + Operator::F64Add => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vaddsd, + )? + } + Operator::F64Sub => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vsubsd, + )? + } + Operator::F64Mul => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vmulsd, + )? + } + Operator::F64Div => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vdivsd, + )? + } + Operator::F64Max => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 2)); + + if !a.arch_supports_canonicalize_nan() { + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vmaxsd, + )?; + } else { + let src2 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let src1 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let tmp1 = self.machine.acquire_temp_xmm().unwrap(); + let tmp2 = self.machine.acquire_temp_xmm().unwrap(); + let tmpg1 = self.machine.acquire_temp_gpr().unwrap(); + let tmpg2 = self.machine.acquire_temp_gpr().unwrap(); + + let src1 = match src1 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src1, Location::XMM(tmp1)); + tmp1 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + _ => { + return Err(CodegenError { + message: format!("F64Max src1: unreachable code"), + }) + } + }; + let src2 = match src2 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src2, Location::XMM(tmp2)); + tmp2 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + _ => { + return Err(CodegenError { + message: format!("F64Max src2: unreachable code"), + }) + } + }; + + let tmp_xmm1 = XMM::XMM8; + let tmp_xmm2 = XMM::XMM9; + let tmp_xmm3 = XMM::XMM10; + + a.emit_mov(Size::S64, Location::XMM(src1), Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::XMM(src2), Location::GPR(tmpg2)); + a.emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1)); + a.emit_vmaxsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1); + let label1 = a.get_label(); + let label2 = a.get_label(); + a.emit_jmp(Condition::NotEqual, label1); + a.emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2)); + a.emit_jmp(Condition::None, label2); + a.emit_label(label1); + a.emit_vxorpd(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2); + a.emit_label(label2); + a.emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3); + a.emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1); + a.emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1); + // load float canonical nan + a.emit_mov( + Size::S64, + Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(src2)); + a.emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1); + match ret { + Location::XMM(x) => { + a.emit_vmovapd(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x)); + } + Location::Memory(_, _) | Location::GPR(_) => { + a.emit_mov(Size::S64, Location::XMM(src1), ret); + } + _ => { + return Err(CodegenError { + message: format!("F64Max ret: unreachable code"), + }) + } + } + + self.machine.release_temp_gpr(tmpg2); + self.machine.release_temp_gpr(tmpg1); + self.machine.release_temp_xmm(tmp2); + self.machine.release_temp_xmm(tmp1); + } + } + Operator::F64Min => { + self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 2)); + + if !a.arch_supports_canonicalize_nan() { + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vminsd, + )?; + } else { + let src2 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let src1 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let tmp1 = self.machine.acquire_temp_xmm().unwrap(); + let tmp2 = self.machine.acquire_temp_xmm().unwrap(); + let tmpg1 = self.machine.acquire_temp_gpr().unwrap(); + let tmpg2 = self.machine.acquire_temp_gpr().unwrap(); + + let src1 = match src1 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src1, Location::XMM(tmp1)); + tmp1 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + _ => { + return Err(CodegenError { + message: format!("F64Min src1: unreachable code"), + }) + } + }; + let src2 = match src2 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src2, Location::XMM(tmp2)); + tmp2 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + _ => { + return Err(CodegenError { + message: format!("F64Min src2: unreachable code"), + }) + } + }; + + let tmp_xmm1 = XMM::XMM8; + let tmp_xmm2 = XMM::XMM9; + let tmp_xmm3 = XMM::XMM10; + + a.emit_mov(Size::S64, Location::XMM(src1), Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::XMM(src2), Location::GPR(tmpg2)); + a.emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1)); + a.emit_vminsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1); + let label1 = a.get_label(); + let label2 = a.get_label(); + a.emit_jmp(Condition::NotEqual, label1); + a.emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2)); + a.emit_jmp(Condition::None, label2); + a.emit_label(label1); + // load float -0.0 + a.emit_mov( + Size::S64, + Location::Imm64(0x8000_0000_0000_0000), // Negative zero + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp_xmm2)); + a.emit_label(label2); + a.emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3); + a.emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1); + a.emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1); + // load float canonical nan + a.emit_mov( + Size::S64, + Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(src2)); + a.emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1); + match ret { + Location::XMM(x) => { + a.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x)); + } + Location::Memory(_, _) | Location::GPR(_) => { + a.emit_mov(Size::S64, Location::XMM(src1), ret); + } + _ => { + return Err(CodegenError { + message: format!("F64Min ret: unreachable code"), + }) + } + } + + self.machine.release_temp_gpr(tmpg2); + self.machine.release_temp_gpr(tmpg1); + self.machine.release_temp_xmm(tmp2); + self.machine.release_temp_xmm(tmp1); + } + } + Operator::F64Eq => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpeqsd, + )? + } + Operator::F64Ne => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpneqsd, + )? + } + Operator::F64Lt => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpltsd, + )? + } + Operator::F64Le => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmplesd, + )? + } + Operator::F64Gt => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpgtsd, + )? + } + Operator::F64Ge => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpgesd, + )? + } + Operator::F64Nearest => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundsd_nearest, + )? + } + Operator::F64Floor => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundsd_floor, + )? + } + Operator::F64Ceil => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundsd_ceil, + )? + } + Operator::F64Trunc => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundsd_trunc, + )? + } + Operator::F64Sqrt => { + self.fp_stack.pop1()?; + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vsqrtsd, + )? + } + + Operator::F64Copysign => { + let loc_b = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc_a = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let (fp_src1, fp_src2) = self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + + let tmp1 = self.machine.acquire_temp_gpr().unwrap(); + let tmp2 = self.machine.acquire_temp_gpr().unwrap(); + + if a.arch_supports_canonicalize_nan() && self.config.nan_canonicalization { + for (fp, loc, tmp) in [(fp_src1, loc_a, tmp1), (fp_src2, loc_b, tmp2)].iter() { + match fp.canonicalization { + Some(_) => { + Self::canonicalize_nan( + a, + &mut self.machine, + Size::S64, + *loc, + Location::GPR(*tmp), + ); + } + None => { + a.emit_mov(Size::S64, *loc, Location::GPR(*tmp)); + } + } + } + } else { + a.emit_mov(Size::S64, loc_a, Location::GPR(tmp1)); + a.emit_mov(Size::S64, loc_b, Location::GPR(tmp2)); + } + + let c = self.machine.acquire_temp_gpr().unwrap(); + + a.emit_mov( + Size::S64, + Location::Imm64(0x7fffffffffffffffu64), + Location::GPR(c), + ); + a.emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp1)); + + a.emit_mov( + Size::S64, + Location::Imm64(0x8000000000000000u64), + Location::GPR(c), + ); + a.emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp2)); + + a.emit_or(Size::S64, Location::GPR(tmp2), Location::GPR(tmp1)); + a.emit_mov(Size::S64, Location::GPR(tmp1), ret); + + self.machine.release_temp_gpr(c); + self.machine.release_temp_gpr(tmp2); + self.machine.release_temp_gpr(tmp1); + } + + Operator::F64Abs => { + // Preserve canonicalization state. + + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let tmp = self.machine.acquire_temp_gpr().unwrap(); + let c = self.machine.acquire_temp_gpr().unwrap(); + + a.emit_mov(Size::S64, loc, Location::GPR(tmp)); + a.emit_mov( + Size::S64, + Location::Imm64(0x7fffffffffffffffu64), + Location::GPR(c), + ); + a.emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp)); + a.emit_mov(Size::S64, Location::GPR(tmp), ret); + + self.machine.release_temp_gpr(c); + self.machine.release_temp_gpr(tmp); + } + + Operator::F64Neg => { + // Preserve canonicalization state. + + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + if a.arch_has_fneg() { + let tmp = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp), + ); + a.arch_emit_f64_neg(tmp, tmp); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::XMM(tmp), + ret, + ); + self.machine.release_temp_xmm(tmp); + } else { + let tmp = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S64, loc, Location::GPR(tmp)); + a.emit_btc_gpr_imm8_64(63, tmp); + a.emit_mov(Size::S64, Location::GPR(tmp), ret); + self.machine.release_temp_gpr(tmp); + } + } + + Operator::F64PromoteF32 => { + let fp = self.fp_stack.pop1()?; + self.fp_stack.push(fp.promote(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcvtss2sd, + )? + } + Operator::F32DemoteF64 => { + let fp = self.fp_stack.pop1()?; + self.fp_stack.push(fp.demote(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcvtsd2ss, + )? + } + + Operator::I32ReinterpretF32 => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + let fp = self.fp_stack.pop1()?; + + if !a.arch_supports_canonicalize_nan() + || !self.config.nan_canonicalization + || fp.canonicalization.is_none() + { + if loc != ret { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + ret, + ); + } + } else { + Self::canonicalize_nan(a, &mut self.machine, Size::S32, loc, ret); + } + } + Operator::F32ReinterpretI32 => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + + if loc != ret { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + ret, + ); + } + } + + Operator::I64ReinterpretF64 => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + let fp = self.fp_stack.pop1()?; + + if !a.arch_supports_canonicalize_nan() + || !self.config.nan_canonicalization + || fp.canonicalization.is_none() + { + if loc != ret { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + ret, + ); + } + } else { + Self::canonicalize_nan(a, &mut self.machine, Size::S64, loc, ret); + } + } + Operator::F64ReinterpretI64 => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + + if loc != ret { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + ret, + ); + } + } + + Operator::I32TruncF32U => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i32_trunc_uf32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check_trap( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + tmp_in, + GEF32_LT_U32_MIN, + LEF32_GT_U32_MAX, + ); + + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + } + + Operator::I32TruncSatF32U => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check_sat( + a, + &mut self.machine, + tmp_in, + GEF32_LT_U32_MIN, + LEF32_GT_U32_MAX, + |a, _m| { + a.emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out)); + }, + |a, _m| { + a.emit_mov( + Size::S32, + Location::Imm32(std::u32::MAX), + Location::GPR(tmp_out), + ); + }, + None::, + |a, _m| { + if a.arch_has_itruncf() { + a.arch_emit_i32_trunc_uf32(tmp_in, tmp_out); + } else { + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + } + }, + ); + + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + + Operator::I32TruncF32S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i32_trunc_sf32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check_trap( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + tmp_in, + GEF32_LT_I32_MIN, + LEF32_GT_I32_MAX, + ); + + a.emit_cvttss2si_32(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + } + Operator::I32TruncSatF32S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check_sat( + a, + &mut self.machine, + tmp_in, + GEF32_LT_I32_MIN, + LEF32_GT_I32_MAX, + |a, _m| { + a.emit_mov( + Size::S32, + Location::Imm32(std::i32::MIN as u32), + Location::GPR(tmp_out), + ); + }, + |a, _m| { + a.emit_mov( + Size::S32, + Location::Imm32(std::i32::MAX as u32), + Location::GPR(tmp_out), + ); + }, + Some(|a: &mut Assembler, _m: &mut Machine| { + a.emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out)); + }), + |a, _m| { + if a.arch_has_itruncf() { + a.arch_emit_i32_trunc_sf32(tmp_in, tmp_out); + } else { + a.emit_cvttss2si_32(XMMOrMemory::XMM(tmp_in), tmp_out); + } + }, + ); + + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + + Operator::I64TruncF32S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i64_trunc_sf32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check_trap( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + tmp_in, + GEF32_LT_I64_MIN, + LEF32_GT_I64_MAX, + ); + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + } + + Operator::I64TruncSatF32S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check_sat( + a, + &mut self.machine, + tmp_in, + GEF32_LT_I64_MIN, + LEF32_GT_I64_MAX, + |a, _m| { + a.emit_mov( + Size::S64, + Location::Imm64(std::i64::MIN as u64), + Location::GPR(tmp_out), + ); + }, + |a, _m| { + a.emit_mov( + Size::S64, + Location::Imm64(std::i64::MAX as u64), + Location::GPR(tmp_out), + ); + }, + Some(|a: &mut Assembler, _m: &mut Machine| { + a.emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out)); + }), + |a, _m| { + if a.arch_has_itruncf() { + a.arch_emit_i64_trunc_sf32(tmp_in, tmp_out); + } else { + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + } + }, + ); + + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + + Operator::I64TruncF32U => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i64_trunc_uf32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); // xmm2 + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check_trap( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + tmp_in, + GEF32_LT_U64_MIN, + LEF32_GT_U64_MAX, + ); + + let tmp = self.machine.acquire_temp_gpr().unwrap(); // r15 + let tmp_x1 = self.machine.acquire_temp_xmm().unwrap(); // xmm1 + let tmp_x2 = self.machine.acquire_temp_xmm().unwrap(); // xmm3 + + a.emit_mov( + Size::S32, + Location::Imm32(1593835520u32), + Location::GPR(tmp), + ); //float 9.22337203E+18 + a.emit_mov(Size::S32, Location::GPR(tmp), Location::XMM(tmp_x1)); + a.emit_mov(Size::S32, Location::XMM(tmp_in), Location::XMM(tmp_x2)); + a.emit_vsubss(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in); + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov( + Size::S64, + Location::Imm64(0x8000000000000000u64), + Location::GPR(tmp), + ); + a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp)); + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out); + a.emit_ucomiss(XMMOrMemory::XMM(tmp_x1), tmp_x2); + a.emit_cmovae_gpr_64(tmp, tmp_out); + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_x2); + self.machine.release_temp_xmm(tmp_x1); + self.machine.release_temp_gpr(tmp); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + } + Operator::I64TruncSatF32U => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check_sat( + a, + &mut self.machine, + tmp_in, + GEF32_LT_U64_MIN, + LEF32_GT_U64_MAX, + |a, _m| { + a.emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out)); + }, + |a, _m| { + a.emit_mov( + Size::S64, + Location::Imm64(std::u64::MAX), + Location::GPR(tmp_out), + ); + }, + None::, + |a, m| { + if a.arch_has_itruncf() { + a.arch_emit_i64_trunc_uf32(tmp_in, tmp_out); + } else { + let tmp = m.acquire_temp_gpr().unwrap(); + let tmp_x1 = m.acquire_temp_xmm().unwrap(); + let tmp_x2 = m.acquire_temp_xmm().unwrap(); + + a.emit_mov( + Size::S32, + Location::Imm32(1593835520u32), + Location::GPR(tmp), + ); //float 9.22337203E+18 + a.emit_mov(Size::S32, Location::GPR(tmp), Location::XMM(tmp_x1)); + a.emit_mov(Size::S32, Location::XMM(tmp_in), Location::XMM(tmp_x2)); + a.emit_vsubss(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in); + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov( + Size::S64, + Location::Imm64(0x8000000000000000u64), + Location::GPR(tmp), + ); + a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp)); + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out); + a.emit_ucomiss(XMMOrMemory::XMM(tmp_x1), tmp_x2); + a.emit_cmovae_gpr_64(tmp, tmp_out); + + m.release_temp_xmm(tmp_x2); + m.release_temp_xmm(tmp_x1); + m.release_temp_gpr(tmp); + } + }, + ); + + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + + Operator::I32TruncF64U => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i32_trunc_uf64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f64_int_conv_check_trap( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + tmp_in, + GEF64_LT_U32_MIN, + LEF64_GT_U32_MAX, + ); + + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + } + + Operator::I32TruncSatF64U => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f64_int_conv_check_sat( + a, + &mut self.machine, + tmp_in, + GEF64_LT_U32_MIN, + LEF64_GT_U32_MAX, + |a, _m| { + a.emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out)); + }, + |a, _m| { + a.emit_mov( + Size::S32, + Location::Imm32(std::u32::MAX), + Location::GPR(tmp_out), + ); + }, + None::, + |a, _m| { + if a.arch_has_itruncf() { + a.arch_emit_i32_trunc_uf64(tmp_in, tmp_out); + } else { + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + } + }, + ); + + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + + Operator::I32TruncF64S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i32_trunc_sf64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + let real_in = match loc { + Location::Imm32(_) | Location::Imm64(_) => { + a.emit_mov(Size::S64, loc, Location::GPR(tmp_out)); + a.emit_mov(Size::S64, Location::GPR(tmp_out), Location::XMM(tmp_in)); + tmp_in + } + Location::XMM(x) => x, + _ => { + a.emit_mov(Size::S64, loc, Location::XMM(tmp_in)); + tmp_in + } + }; + + Self::emit_f64_int_conv_check_trap( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + real_in, + GEF64_LT_I32_MIN, + LEF64_GT_I32_MAX, + ); + + a.emit_cvttsd2si_32(XMMOrMemory::XMM(real_in), tmp_out); + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + } + + Operator::I32TruncSatF64S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + let real_in = match loc { + Location::Imm32(_) | Location::Imm64(_) => { + a.emit_mov(Size::S64, loc, Location::GPR(tmp_out)); + a.emit_mov(Size::S64, Location::GPR(tmp_out), Location::XMM(tmp_in)); + tmp_in + } + Location::XMM(x) => x, + _ => { + a.emit_mov(Size::S64, loc, Location::XMM(tmp_in)); + tmp_in + } + }; + + Self::emit_f64_int_conv_check_sat( + a, + &mut self.machine, + real_in, + GEF64_LT_I32_MIN, + LEF64_GT_I32_MAX, + |a, _m| { + a.emit_mov( + Size::S32, + Location::Imm32(std::i32::MIN as u32), + Location::GPR(tmp_out), + ); + }, + |a, _m| { + a.emit_mov( + Size::S32, + Location::Imm32(std::i32::MAX as u32), + Location::GPR(tmp_out), + ); + }, + Some(|a: &mut Assembler, _m: &mut Machine| { + a.emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out)); + }), + |a, _m| { + if a.arch_has_itruncf() { + a.arch_emit_i32_trunc_sf64(tmp_in, tmp_out); + } else { + a.emit_cvttsd2si_32(XMMOrMemory::XMM(real_in), tmp_out); + } + }, + ); + + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + + Operator::I64TruncF64S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i64_trunc_sf64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f64_int_conv_check_trap( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + tmp_in, + GEF64_LT_I64_MIN, + LEF64_GT_I64_MAX, + ); + + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + } + + Operator::I64TruncSatF64S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f64_int_conv_check_sat( + a, + &mut self.machine, + tmp_in, + GEF64_LT_I64_MIN, + LEF64_GT_I64_MAX, + |a, _m| { + a.emit_mov( + Size::S64, + Location::Imm64(std::i64::MIN as u64), + Location::GPR(tmp_out), + ); + }, + |a, _m| { + a.emit_mov( + Size::S64, + Location::Imm64(std::i64::MAX as u64), + Location::GPR(tmp_out), + ); + }, + Some(|a: &mut Assembler, _m: &mut Machine| { + a.emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out)); + }), + |a, _m| { + if a.arch_has_itruncf() { + a.arch_emit_i64_trunc_sf64(tmp_in, tmp_out); + } else { + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + } + }, + ); + + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + + Operator::I64TruncF64U => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i64_trunc_uf64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); // xmm2 + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f64_int_conv_check_trap( + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + tmp_in, + GEF64_LT_U64_MIN, + LEF64_GT_U64_MAX, + ); + + let tmp = self.machine.acquire_temp_gpr().unwrap(); // r15 + let tmp_x1 = self.machine.acquire_temp_xmm().unwrap(); // xmm1 + let tmp_x2 = self.machine.acquire_temp_xmm().unwrap(); // xmm3 + + a.emit_mov( + Size::S64, + Location::Imm64(4890909195324358656u64), + Location::GPR(tmp), + ); //double 9.2233720368547758E+18 + a.emit_mov(Size::S64, Location::GPR(tmp), Location::XMM(tmp_x1)); + a.emit_mov(Size::S64, Location::XMM(tmp_in), Location::XMM(tmp_x2)); + a.emit_vsubsd(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in); + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov( + Size::S64, + Location::Imm64(0x8000000000000000u64), + Location::GPR(tmp), + ); + a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp)); + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out); + a.emit_ucomisd(XMMOrMemory::XMM(tmp_x1), tmp_x2); + a.emit_cmovae_gpr_64(tmp, tmp_out); + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_x2); + self.machine.release_temp_xmm(tmp_x1); + self.machine.release_temp_gpr(tmp); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + } + + Operator::I64TruncSatF64U => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack.pop1()?; + + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f64_int_conv_check_sat( + a, + &mut self.machine, + tmp_in, + GEF64_LT_U64_MIN, + LEF64_GT_U64_MAX, + |a, _m| { + a.emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out)); + }, + |a, _m| { + a.emit_mov( + Size::S64, + Location::Imm64(std::u64::MAX), + Location::GPR(tmp_out), + ); + }, + None::, + |a, m| { + if a.arch_has_itruncf() { + a.arch_emit_i64_trunc_uf64(tmp_in, tmp_out); + } else { + let tmp = m.acquire_temp_gpr().unwrap(); + let tmp_x1 = m.acquire_temp_xmm().unwrap(); + let tmp_x2 = m.acquire_temp_xmm().unwrap(); + + a.emit_mov( + Size::S64, + Location::Imm64(4890909195324358656u64), + Location::GPR(tmp), + ); //double 9.2233720368547758E+18 + a.emit_mov(Size::S64, Location::GPR(tmp), Location::XMM(tmp_x1)); + a.emit_mov(Size::S64, Location::XMM(tmp_in), Location::XMM(tmp_x2)); + a.emit_vsubsd(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in); + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov( + Size::S64, + Location::Imm64(0x8000000000000000u64), + Location::GPR(tmp), + ); + a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp)); + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out); + a.emit_ucomisd(XMMOrMemory::XMM(tmp_x1), tmp_x2); + a.emit_cmovae_gpr_64(tmp, tmp_out); + + m.release_temp_xmm(tmp_x2); + m.release_temp_xmm(tmp_x1); + m.release_temp_gpr(tmp); + } + }, + ); + + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } + + Operator::F32ConvertI32S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f32 never results in NaN. + + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f32_convert_si32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + + a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2ss_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); + + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } + } + Operator::F32ConvertI32U => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f32 never results in NaN. + + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f32_convert_ui32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + + a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); + + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } + } + Operator::F32ConvertI64S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f32 never results in NaN. + + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f32_convert_si64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + + a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); + + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } + } + Operator::F32ConvertI64U => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f32 never results in NaN. + + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f32_convert_ui64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + let tmp = self.machine.acquire_temp_gpr().unwrap(); + + let do_convert = a.get_label(); + let end_convert = a.get_label(); + + a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); + a.emit_test_gpr_64(tmp_in); + a.emit_jmp(Condition::Signed, do_convert); + a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_jmp(Condition::None, end_convert); + a.emit_label(do_convert); + a.emit_mov(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp)); + a.emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp)); + a.emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in)); + a.emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in)); + a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_vaddss(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out); + a.emit_label(end_convert); + a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); + + self.machine.release_temp_gpr(tmp); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } + } + + Operator::F64ConvertI32S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f64 never results in NaN. + + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f64_convert_si32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + + a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2sd_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } + } + Operator::F64ConvertI32U => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f64 never results in NaN. + + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f64_convert_ui32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + + a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } + } + Operator::F64ConvertI64S => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f64 never results in NaN. + + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f64_convert_si64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + + a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } + } + Operator::F64ConvertI64U => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f64 never results in NaN. + + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f64_convert_ui64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + let tmp = self.machine.acquire_temp_gpr().unwrap(); + + let do_convert = a.get_label(); + let end_convert = a.get_label(); + + a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); + a.emit_test_gpr_64(tmp_in); + a.emit_jmp(Condition::Signed, do_convert); + a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_jmp(Condition::None, end_convert); + a.emit_label(do_convert); + a.emit_mov(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp)); + a.emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp)); + a.emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in)); + a.emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in)); + a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_vaddsd(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out); + a.emit_label(end_convert); + a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + + self.machine.release_temp_gpr(tmp); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } + } + + Operator::Call { function_index } => { + let function_index = function_index as usize; + let label = self + .function_labels + .as_mut() + .unwrap() + .entry(function_index) + .or_insert_with(|| (a.get_label(), None)) + .0; + let sig_index = *self + .function_signatures + .get(FuncIndex::new(function_index)) + .unwrap(); + let sig = self.signatures.get(sig_index).unwrap(); + let param_types: SmallVec<[WpType; 8]> = + sig.params().iter().cloned().map(type_to_wp_type).collect(); + let return_types: SmallVec<[WpType; 1]> = + sig.returns().iter().cloned().map(type_to_wp_type).collect(); + + let params: SmallVec<[_; 8]> = self + .value_stack + .drain(self.value_stack.len() - param_types.len()..) + .collect(); + self.machine.release_locations_only_regs(¶ms); + + self.machine.release_locations_only_osr_state(params.len()); + + // Pop arguments off the FP stack and canonicalize them if needed. + // + // Canonicalization state will be lost across function calls, so early canonicalization + // is necessary here. + while let Some(fp) = self.fp_stack.last() { + if fp.depth >= self.value_stack.len() { + let index = fp.depth - self.value_stack.len(); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + fp.canonicalization.unwrap().to_size(), + params[index], + params[index], + ); + } + self.fp_stack.pop().unwrap(); + } else { + break; + } + } + + Self::emit_call_sysv_label( + a, + &mut self.machine, + label, + params.iter().map(|x| *x), + Some((&mut self.fsm, &mut self.control_stack)), + )?; + + self.machine.release_locations_only_stack(a, ¶ms); + + if return_types.len() > 0 { + let ret = self.machine.acquire_locations( + a, + &[( + return_types[0], + MachineValue::WasmStack(self.value_stack.len()), + )], + false, + )[0]; + self.value_stack.push(ret); + if return_types[0].is_float() { + a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + } else { + a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); + } + } + } + Operator::CallIndirect { index, table_index } => { + if table_index != 0 { + return Err(CodegenError { + message: format!("CallIndirect: table_index is not 0"), + }); + } + let sig = self.signatures.get(SigIndex::new(index as usize)).unwrap(); + let param_types: SmallVec<[WpType; 8]> = + sig.params().iter().cloned().map(type_to_wp_type).collect(); + let return_types: SmallVec<[WpType; 1]> = + sig.returns().iter().cloned().map(type_to_wp_type).collect(); + + let func_index = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + let params: SmallVec<[_; 8]> = self + .value_stack + .drain(self.value_stack.len() - param_types.len()..) + .collect(); + self.machine.release_locations_only_regs(¶ms); + + // Pop arguments off the FP stack and canonicalize them if needed. + // + // Canonicalization state will be lost across function calls, so early canonicalization + // is necessary here. + while let Some(fp) = self.fp_stack.last() { + if fp.depth >= self.value_stack.len() { + let index = fp.depth - self.value_stack.len(); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + fp.canonicalization.unwrap().to_size(), + params[index], + params[index], + ); + } + self.fp_stack.pop().unwrap(); + } else { + break; + } + } + + let table_base = self.machine.acquire_temp_gpr().unwrap(); + let table_count = self.machine.acquire_temp_gpr().unwrap(); + let sigidx = self.machine.acquire_temp_gpr().unwrap(); + + a.emit_mov( + Size::S64, + Location::Memory( + Machine::get_vmctx_reg(), + match TableIndex::new(0).local_or_import(module_info) { + LocalOrImport::Local(_) => vm::Ctx::offset_tables(), + LocalOrImport::Import(_) => vm::Ctx::offset_imported_tables(), + } as i32, + ), + Location::GPR(table_base), + ); + a.emit_mov( + Size::S64, + Location::Memory(table_base, 0), + Location::GPR(table_base), + ); + a.emit_mov( + Size::S32, + Location::Memory(table_base, LocalTable::offset_count() as i32), + Location::GPR(table_count), + ); + a.emit_mov( + Size::S64, + Location::Memory(table_base, LocalTable::offset_base() as i32), + Location::GPR(table_base), + ); + a.emit_cmp(Size::S32, func_index, Location::GPR(table_count)); + Self::mark_range_with_exception_code( + a, + self.exception_table.as_mut().unwrap(), + ExceptionCode::CallIndirectOOB, + |a| a.emit_conditional_trap(Condition::BelowEqual), + ); + a.emit_mov(Size::S32, func_index, Location::GPR(table_count)); + a.emit_imul_imm32_gpr64(vm::Anyfunc::size() as u32, table_count); + a.emit_add( + Size::S64, + Location::GPR(table_base), + Location::GPR(table_count), + ); + a.emit_mov( + Size::S64, + Location::Memory( + Machine::get_vmctx_reg(), + vm::Ctx::offset_signatures() as i32, + ), + Location::GPR(sigidx), + ); + a.emit_mov( + Size::S32, + Location::Memory(sigidx, (index * 4) as i32), + Location::GPR(sigidx), + ); + a.emit_cmp( + Size::S32, + Location::GPR(sigidx), + Location::Memory(table_count, (vm::Anyfunc::offset_sig_id() as usize) as i32), + ); + Self::mark_range_with_exception_code( + a, + self.exception_table.as_mut().unwrap(), + ExceptionCode::IncorrectCallIndirectSignature, + |a| a.emit_conditional_trap(Condition::NotEqual), + ); + + self.machine.release_temp_gpr(sigidx); + self.machine.release_temp_gpr(table_count); + self.machine.release_temp_gpr(table_base); + + if table_count != GPR::RAX { + a.emit_mov( + Size::S64, + Location::GPR(table_count), + Location::GPR(GPR::RAX), + ); + } + + self.machine.release_locations_only_osr_state(params.len()); + + Self::emit_call_sysv( + a, + &mut self.machine, + |a| { + if a.arch_requires_indirect_call_trampoline() { + a.arch_emit_indirect_call_with_trampoline(Location::Memory( + GPR::RAX, + (vm::Anyfunc::offset_func() as usize) as i32, + )); + } else { + a.emit_call_location(Location::Memory( + GPR::RAX, + (vm::Anyfunc::offset_func() as usize) as i32, + )); + } + }, + params.iter().map(|x| *x), + Some((&mut self.fsm, &mut self.control_stack)), + )?; + + self.machine.release_locations_only_stack(a, ¶ms); + + if return_types.len() > 0 { + let ret = self.machine.acquire_locations( + a, + &[( + return_types[0], + MachineValue::WasmStack(self.value_stack.len()), + )], + false, + )[0]; + self.value_stack.push(ret); + if return_types[0].is_float() { + a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + } else { + a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); + } + } + } + Operator::If { ty } => { + let label_end = a.get_label(); + let label_else = a.get_label(); + + let cond = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + let frame = ControlFrame { + label: label_end, + loop_like: false, + if_else: IfElseState::If(label_else), + returns: match ty { + WpTypeOrFuncType::Type(WpType::EmptyBlockType) => smallvec![], + WpTypeOrFuncType::Type(inner_ty) => smallvec![inner_ty], + _ => { + return Err(CodegenError { + message: format!("If: multi-value returns not yet implemented"), + }) + } + }, + value_stack_depth: self.value_stack.len(), + fp_stack_depth: self.fp_stack.len(), + state: self.machine.state.clone(), + state_diff_id: Self::get_state_diff( + &self.machine, + &mut self.fsm, + &mut self.control_stack, + ), + }; + self.control_stack.push(frame); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_cmp, + Size::S32, + Location::Imm32(0), + cond, + ); + a.emit_jmp(Condition::Equal, label_else); + } + Operator::Else => { + let mut frame = self.control_stack.last_mut().unwrap(); + + if !was_unreachable && frame.returns.len() > 0 { + let loc = *self.value_stack.last().unwrap(); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } + + let released: &[Location] = &self.value_stack[frame.value_stack_depth..]; + self.machine.release_locations(a, released); + self.value_stack.truncate(frame.value_stack_depth); + self.fp_stack.truncate(frame.fp_stack_depth); + + match frame.if_else { + IfElseState::If(label) => { + a.emit_jmp(Condition::None, frame.label); + a.emit_label(label); + frame.if_else = IfElseState::Else; + } + _ => { + return Err(CodegenError { + message: format!("Else: frame.if_else unreachable code"), + }) + } + } + } + Operator::Select => { + let cond = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let v_b = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let v_a = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cncl: Option<(Option, Option)> = + if self.fp_stack.len() >= 2 + && self.fp_stack[self.fp_stack.len() - 2].depth == self.value_stack.len() + && self.fp_stack[self.fp_stack.len() - 1].depth + == self.value_stack.len() + 1 + { + let (left, right) = self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::new(self.value_stack.len())); + Some((left.canonicalization, right.canonicalization)) + } else { + None + }; + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let end_label = a.get_label(); + let zero_label = a.get_label(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_cmp, + Size::S32, + Location::Imm32(0), + cond, + ); + a.emit_jmp(Condition::Equal, zero_label); + match cncl { + Some((Some(fp), _)) + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization => + { + Self::canonicalize_nan(a, &mut self.machine, fp.to_size(), v_a, ret); + } + _ => { + if v_a != ret { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + v_a, + ret, + ); + } + } + } + a.emit_jmp(Condition::None, end_label); + a.emit_label(zero_label); + match cncl { + Some((_, Some(fp))) + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization => + { + Self::canonicalize_nan(a, &mut self.machine, fp.to_size(), v_b, ret); + } + _ => { + if v_b != ret { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + v_b, + ret, + ); + } + } + } + a.emit_label(end_label); + } + Operator::Block { ty } => { + let frame = ControlFrame { + label: a.get_label(), + loop_like: false, + if_else: IfElseState::None, + returns: match ty { + WpTypeOrFuncType::Type(WpType::EmptyBlockType) => smallvec![], + WpTypeOrFuncType::Type(inner_ty) => smallvec![inner_ty], + _ => { + return Err(CodegenError { + message: format!("Block: multi-value returns not yet implemented"), + }) + } + }, + value_stack_depth: self.value_stack.len(), + fp_stack_depth: self.fp_stack.len(), + state: self.machine.state.clone(), + state_diff_id: Self::get_state_diff( + &self.machine, + &mut self.fsm, + &mut self.control_stack, + ), + }; + self.control_stack.push(frame); + } + Operator::Loop { ty } => { + let label = a.get_label(); + let state_diff_id = + Self::get_state_diff(&self.machine, &mut self.fsm, &mut self.control_stack); + let activate_offset = a.get_offset().0; + + self.control_stack.push(ControlFrame { + label: label, + loop_like: true, + if_else: IfElseState::None, + returns: match ty { + WpTypeOrFuncType::Type(WpType::EmptyBlockType) => smallvec![], + WpTypeOrFuncType::Type(inner_ty) => smallvec![inner_ty], + _ => { + return Err(CodegenError { + message: format!("Loop: multi-value returns not yet implemented"), + }) + } + }, + value_stack_depth: self.value_stack.len(), + fp_stack_depth: self.fp_stack.len(), + state: self.machine.state.clone(), + state_diff_id, + }); + a.emit_label(label); + + // Check interrupt signal without branching + if self.config.full_preemption { + a.emit_mov( + Size::S64, + Location::Memory( + Machine::get_vmctx_reg(), + vm::Ctx::offset_interrupt_signal_mem() as i32, + ), + Location::GPR(GPR::RAX), + ); + self.fsm.loop_offsets.insert( + a.get_offset().0, + OffsetInfo { + end_offset: a.get_offset().0 + 1, + activate_offset, + diff_id: state_diff_id, + }, + ); + self.fsm.wasm_offset_to_target_offset.insert( + self.machine.state.wasm_inst_offset, + SuspendOffset::Loop(a.get_offset().0), + ); + a.emit_mov( + Size::S64, + Location::Memory(GPR::RAX, 0), + Location::GPR(GPR::RAX), + ); + } + } + Operator::Nop => {} + Operator::MemorySize { reserved } => { + let memory_index = MemoryIndex::new(reserved as usize); + a.emit_mov( + Size::S64, + Location::Memory( + Machine::get_vmctx_reg(), + vm::Ctx::offset_intrinsics() as i32, + ), + Location::GPR(GPR::RAX), + ); + a.emit_mov( + Size::S64, + Location::Memory(GPR::RAX, vm::Intrinsics::offset_memory_size() as i32), + Location::GPR(GPR::RAX), + ); + Self::emit_call_sysv( + a, + &mut self.machine, + |a| { + let label = a.get_label(); + let after = a.get_label(); + a.emit_jmp(Condition::None, after); + a.emit_label(label); + a.emit_host_redirection(GPR::RAX); + a.emit_label(after); + a.emit_call_label(label); + }, + iter::once(Location::Imm32(memory_index.index() as u32)), + None, + )?; + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); + } + Operator::MemoryGrow { reserved } => { + let memory_index = MemoryIndex::new(reserved as usize); + let param_pages = self.value_stack.pop().unwrap(); + + self.machine.release_locations_only_regs(&[param_pages]); + + a.emit_mov( + Size::S64, + Location::Memory( + Machine::get_vmctx_reg(), + vm::Ctx::offset_intrinsics() as i32, + ), + Location::GPR(GPR::RAX), + ); + a.emit_mov( + Size::S64, + Location::Memory(GPR::RAX, vm::Intrinsics::offset_memory_grow() as i32), + Location::GPR(GPR::RAX), + ); + + self.machine.release_locations_only_osr_state(1); + + Self::emit_call_sysv( + a, + &mut self.machine, + |a| { + let label = a.get_label(); + let after = a.get_label(); + a.emit_jmp(Condition::None, after); + a.emit_label(label); + a.emit_host_redirection(GPR::RAX); + a.emit_label(after); + a.emit_call_label(label); + }, + iter::once(Location::Imm32(memory_index.index() as u32)) + .chain(iter::once(param_pages)), + None, + )?; + + self.machine.release_locations_only_stack(a, &[param_pages]); + + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); + } + Operator::I32Load { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + false, + 4, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S32, + Location::Memory(addr, 0), + ret, + ); + Ok(()) + }, + )?; + } + Operator::F32Load { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + false, + 4, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S32, + Location::Memory(addr, 0), + ret, + ); + Ok(()) + }, + )?; + } + Operator::I32Load8U { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + false, + 1, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movzx, + Size::S8, + Location::Memory(addr, 0), + Size::S32, + ret, + )?; + Ok(()) + }, + )?; + } + Operator::I32Load8S { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + false, + 1, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movsx, + Size::S8, + Location::Memory(addr, 0), + Size::S32, + ret, + )?; + Ok(()) + }, + )?; + } + Operator::I32Load16U { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + false, + 2, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movzx, + Size::S16, + Location::Memory(addr, 0), + Size::S32, + ret, + )?; + Ok(()) + }, + )?; + } + Operator::I32Load16S { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + false, + 2, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movsx, + Size::S16, + Location::Memory(addr, 0), + Size::S32, + ret, + )?; + Ok(()) + }, + )?; + } + Operator::I32Store { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + false, + 4, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S32, + target_value, + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + } + Operator::F32Store { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let fp = self.fp_stack.pop1()?; + let config_nan_canonicalization = self.config.nan_canonicalization; + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + false, + 4, + |a, m, addr| { + if !a.arch_supports_canonicalize_nan() + || !config_nan_canonicalization + || fp.canonicalization.is_none() + { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S32, + target_value, + Location::Memory(addr, 0), + ); + } else { + Self::canonicalize_nan( + a, + m, + Size::S32, + target_value, + Location::Memory(addr, 0), + ); + } + + Ok(()) + }, + )?; + } + Operator::I32Store8 { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + false, + 1, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S8, + target_value, + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + } + Operator::I32Store16 { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + false, + 2, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S16, + target_value, + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + } + Operator::I64Load { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + false, + 8, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S64, + Location::Memory(addr, 0), + ret, + ); + Ok(()) + }, + )?; + } + Operator::F64Load { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + false, + 8, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S64, + Location::Memory(addr, 0), + ret, + ); + Ok(()) + }, + )?; + } + Operator::I64Load8U { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + false, + 1, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movzx, + Size::S8, + Location::Memory(addr, 0), + Size::S64, + ret, + )?; + Ok(()) + }, + )?; + } + Operator::I64Load8S { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + false, + 1, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movsx, + Size::S8, + Location::Memory(addr, 0), + Size::S64, + ret, + )?; + Ok(()) + }, + )?; + } + Operator::I64Load16U { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + false, + 2, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movzx, + Size::S16, + Location::Memory(addr, 0), + Size::S64, + ret, + )?; + Ok(()) + }, + )?; + } + Operator::I64Load16S { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + false, + 2, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movsx, + Size::S16, + Location::Memory(addr, 0), + Size::S64, + ret, + )?; + Ok(()) + }, + )?; + } + Operator::I64Load32U { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + false, + 4, + |a, m, addr| { + match ret { + Location::GPR(_) => {} + Location::Memory(base, offset) => { + a.emit_mov( + Size::S32, + Location::Imm32(0), + Location::Memory(base, offset + 4), + ); // clear upper bits + } + _ => { + return Err(CodegenError { + message: format!("I64Load32U ret: unreachable code"), + }) + } + } + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S32, + Location::Memory(addr, 0), + ret, + ); + Ok(()) + }, + )?; + } + Operator::I64Load32S { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + false, + 4, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movsx, + Size::S32, + Location::Memory(addr, 0), + Size::S64, + ret, + )?; + Ok(()) + }, + )?; + } + Operator::I64Store { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + false, + 8, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S64, + target_value, + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + } + Operator::F64Store { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let fp = self.fp_stack.pop1()?; + let config_nan_canonicalization = self.config.nan_canonicalization; + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + false, + 8, + |a, m, addr| { + if !a.arch_supports_canonicalize_nan() + || !config_nan_canonicalization + || fp.canonicalization.is_none() + { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S64, + target_value, + Location::Memory(addr, 0), + ); + } else { + Self::canonicalize_nan( + a, + m, + Size::S64, + target_value, + Location::Memory(addr, 0), + ); + } + Ok(()) + }, + )?; + } + Operator::I64Store8 { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + false, + 1, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S8, + target_value, + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + } + Operator::I64Store16 { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + false, + 2, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S16, + target_value, + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + } + Operator::I64Store32 { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + false, + 4, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S32, + target_value, + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + } + Operator::Unreachable => { + Self::mark_trappable(a, &self.machine, &mut self.fsm, &mut self.control_stack); + self.exception_table + .as_mut() + .unwrap() + .offset_to_code + .insert(a.get_offset().0, ExceptionCode::Unreachable); + a.emit_ud2(); + self.unreachable_depth = 1; + } + Operator::Return => { + let frame = &self.control_stack[0]; + if frame.returns.len() > 0 { + if frame.returns.len() != 1 { + return Err(CodegenError { + message: format!("Return: incorrect frame.returns"), + }); + } + let loc = *self.value_stack.last().unwrap(); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } + let released = &self.value_stack[frame.value_stack_depth..]; + self.machine.release_locations_keep_state(a, released); + a.emit_jmp(Condition::None, frame.label); + self.unreachable_depth = 1; + } + Operator::Br { relative_depth } => { + let frame = + &self.control_stack[self.control_stack.len() - 1 - (relative_depth as usize)]; + if !frame.loop_like && frame.returns.len() > 0 { + if frame.returns.len() != 1 { + return Err(CodegenError { + message: format!("Br: incorrect frame.returns"), + }); + } + let loc = *self.value_stack.last().unwrap(); + + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } else { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + } + } + + let released = &self.value_stack[frame.value_stack_depth..]; + self.machine.release_locations_keep_state(a, released); + a.emit_jmp(Condition::None, frame.label); + self.unreachable_depth = 1; + } + Operator::BrIf { relative_depth } => { + let after = a.get_label(); + let cond = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_cmp, + Size::S32, + Location::Imm32(0), + cond, + ); + a.emit_jmp(Condition::Equal, after); + + let frame = + &self.control_stack[self.control_stack.len() - 1 - (relative_depth as usize)]; + if !frame.loop_like && frame.returns.len() > 0 { + if frame.returns.len() != 1 { + return Err(CodegenError { + message: format!("BrIf: incorrect frame.returns"), + }); + } + let loc = *self.value_stack.last().unwrap(); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } else { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + } + } + let released = &self.value_stack[frame.value_stack_depth..]; + self.machine.release_locations_keep_state(a, released); + a.emit_jmp(Condition::None, frame.label); + + a.emit_label(after); + } + Operator::BrTable { ref table } => { + let (targets, default_target) = table.read_table().map_err(|e| CodegenError { + message: format!("BrTable read_table: {:?}", e), + })?; + let cond = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let table_label = a.get_label(); + let mut table: Vec = vec![]; + let default_br = a.get_label(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_cmp, + Size::S32, + Location::Imm32(targets.len() as u32), + cond, + ); + a.emit_jmp(Condition::AboveEqual, default_br); + + a.emit_lea_label(table_label, Location::GPR(GPR::RCX)); + a.emit_mov(Size::S32, cond, Location::GPR(GPR::RDX)); + + let instr_size = a.get_jmp_instr_size(); + a.emit_imul_imm32_gpr64(instr_size as _, GPR::RDX); + a.emit_add(Size::S64, Location::GPR(GPR::RCX), Location::GPR(GPR::RDX)); + a.emit_jmp_location(Location::GPR(GPR::RDX)); + + for target in targets.iter() { + let label = a.get_label(); + a.emit_label(label); + table.push(label); + let frame = + &self.control_stack[self.control_stack.len() - 1 - (*target as usize)]; + if !frame.loop_like && frame.returns.len() > 0 { + if frame.returns.len() != 1 { + return Err(CodegenError { + message: format!( + "BrTable: incorrect frame.returns for {:?}", + target + ), + }); + } + let loc = *self.value_stack.last().unwrap(); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } else { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + } + } + let released = &self.value_stack[frame.value_stack_depth..]; + self.machine.release_locations_keep_state(a, released); + a.emit_jmp(Condition::None, frame.label); + } + a.emit_label(default_br); + + { + let frame = &self.control_stack + [self.control_stack.len() - 1 - (default_target as usize)]; + if !frame.loop_like && frame.returns.len() > 0 { + if frame.returns.len() != 1 { + return Err(CodegenError { + message: format!("BrTable: incorrect frame.returns"), + }); + } + let loc = *self.value_stack.last().unwrap(); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } else { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + } + } + let released = &self.value_stack[frame.value_stack_depth..]; + self.machine.release_locations_keep_state(a, released); + a.emit_jmp(Condition::None, frame.label); + } + + a.emit_label(table_label); + for x in table { + a.emit_jmp(Condition::None, x); + } + self.unreachable_depth = 1; + } + Operator::Drop => { + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + if let Some(x) = self.fp_stack.last() { + if x.depth == self.value_stack.len() { + self.fp_stack.pop1()?; + } + } + } + Operator::End => { + let frame = self.control_stack.pop().unwrap(); + + if !was_unreachable && frame.returns.len() > 0 { + let loc = *self.value_stack.last().unwrap(); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } + + if self.control_stack.len() == 0 { + a.emit_label(frame.label); + self.machine.finalize_locals(a, &self.locals); + a.emit_mov(Size::S64, Location::GPR(GPR::RBP), Location::GPR(GPR::RSP)); + a.emit_pop(Size::S64, Location::GPR(GPR::RBP)); + + // Make a copy of the return value in XMM0, as required by the SysV CC. + match self.signature.returns() { + [x] if *x == Type::F32 || *x == Type::F64 => { + a.emit_mov( + Size::S64, + Location::GPR(GPR::RAX), + Location::XMM(XMM::XMM0), + ); + } + _ => {} + } + a.emit_ret(); + } else { + let released = &self.value_stack[frame.value_stack_depth..]; + self.machine.release_locations(a, released); + self.value_stack.truncate(frame.value_stack_depth); + self.fp_stack.truncate(frame.fp_stack_depth); + + if !frame.loop_like { + a.emit_label(frame.label); + } + + if let IfElseState::If(label) = frame.if_else { + a.emit_label(label); + } + + if frame.returns.len() > 0 { + if frame.returns.len() != 1 { + return Err(CodegenError { + message: format!("End: incorrect frame.returns"), + }); + } + let loc = self.machine.acquire_locations( + a, + &[( + frame.returns[0], + MachineValue::WasmStack(self.value_stack.len()), + )], + false, + )[0]; + a.emit_mov(Size::S64, Location::GPR(GPR::RAX), loc); + self.value_stack.push(loc); + if frame.returns[0].is_float() { + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + // we already canonicalized at the `Br*` instruction or here previously. + } + } + } + } + Operator::AtomicFence { flags: _ } => { + // Fence is a nop. + // + // Fence was added to preserve information about fences from + // source languages. If in the future Wasm extends the memory + // model, and if we hadn't recorded what fences used to be there, + // it would lead to data races that weren't present in the + // original source language. + } + Operator::I32AtomicLoad { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 4, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S32, + Location::Memory(addr, 0), + ret, + ); + Ok(()) + }, + )?; + } + Operator::I32AtomicLoad8U { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 1, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movzx, + Size::S8, + Location::Memory(addr, 0), + Size::S32, + ret, + )?; + Ok(()) + }, + )?; + } + Operator::I32AtomicLoad16U { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 2, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movzx, + Size::S16, + Location::Memory(addr, 0), + Size::S32, + ret, + )?; + Ok(()) + }, + )?; + } + Operator::I32AtomicStore { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + true, + 4, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_xchg, + Size::S32, + target_value, + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + } + Operator::I32AtomicStore8 { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + true, + 1, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_xchg, + Size::S8, + target_value, + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + } + Operator::I32AtomicStore16 { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + true, + 2, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_xchg, + Size::S16, + target_value, + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + } + Operator::I64AtomicLoad { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 8, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S64, + Location::Memory(addr, 0), + ret, + ); + Ok(()) + }, + )?; + } + Operator::I64AtomicLoad8U { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 1, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movzx, + Size::S8, + Location::Memory(addr, 0), + Size::S64, + ret, + )?; + Ok(()) + }, + )?; + } + Operator::I64AtomicLoad16U { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 2, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movzx, + Size::S16, + Location::Memory(addr, 0), + Size::S64, + ret, + )?; + Ok(()) + }, + )?; + } + Operator::I64AtomicLoad32U { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 4, + |a, m, addr| { + match ret { + Location::GPR(_) => {} + Location::Memory(base, offset) => { + a.emit_mov( + Size::S32, + Location::Imm32(0), + Location::Memory(base, offset + 4), + ); // clear upper bits + } + _ => { + return Err(CodegenError { + message: format!("I64AtomicLoad32U ret: unreachable code"), + }) + } + } + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S32, + Location::Memory(addr, 0), + ret, + ); + Ok(()) + }, + )?; + } + Operator::I64AtomicStore { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + true, + 8, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_xchg, + Size::S64, + target_value, + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + } + Operator::I64AtomicStore8 { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + true, + 1, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_xchg, + Size::S8, + target_value, + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + } + Operator::I64AtomicStore16 { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + true, + 2, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_xchg, + Size::S16, + target_value, + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + } + Operator::I64AtomicStore32 { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target_addr, + memarg, + true, + 4, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_xchg, + Size::S32, + target_value, + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + } + Operator::I32AtomicRmwAdd { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_lock_xadd( + Size::S32, + Location::GPR(value), + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + a.emit_mov(Size::S32, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmwAdd { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S64, loc, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 8, + |a, _m, addr| { + a.emit_lock_xadd( + Size::S64, + Location::GPR(value), + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmw8AddU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S8, loc, Size::S32, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_xadd(Size::S8, Location::GPR(value), Location::Memory(addr, 0)); + Ok(()) + }, + )?; + a.emit_mov(Size::S32, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmw16AddU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S16, loc, Size::S32, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 2, + |a, _m, addr| { + a.emit_lock_xadd( + Size::S16, + Location::GPR(value), + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + a.emit_mov(Size::S32, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw8AddU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S8, loc, Size::S64, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_xadd(Size::S8, Location::GPR(value), Location::Memory(addr, 0)); + Ok(()) + }, + )?; + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw16AddU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S16, loc, Size::S64, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 2, + |a, _m, addr| { + a.emit_lock_xadd( + Size::S16, + Location::GPR(value), + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw32AddU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_lock_xadd( + Size::S32, + Location::GPR(value), + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmwSub { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(value)); + a.emit_neg(Size::S32, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_lock_xadd( + Size::S32, + Location::GPR(value), + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + a.emit_mov(Size::S32, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmwSub { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S64, loc, Location::GPR(value)); + a.emit_neg(Size::S64, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 8, + |a, _m, addr| { + a.emit_lock_xadd( + Size::S64, + Location::GPR(value), + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmw8SubU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S8, loc, Size::S32, Location::GPR(value)); + a.emit_neg(Size::S8, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_xadd(Size::S8, Location::GPR(value), Location::Memory(addr, 0)); + Ok(()) + }, + )?; + a.emit_mov(Size::S32, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmw16SubU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S16, loc, Size::S32, Location::GPR(value)); + a.emit_neg(Size::S16, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 2, + |a, _m, addr| { + a.emit_lock_xadd( + Size::S16, + Location::GPR(value), + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + a.emit_mov(Size::S32, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw8SubU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S8, loc, Size::S64, Location::GPR(value)); + a.emit_neg(Size::S8, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_xadd(Size::S8, Location::GPR(value), Location::Memory(addr, 0)); + Ok(()) + }, + )?; + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw16SubU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S16, loc, Size::S64, Location::GPR(value)); + a.emit_neg(Size::S16, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 2, + |a, _m, addr| { + a.emit_lock_xadd( + Size::S16, + Location::GPR(value), + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw32SubU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(value)); + a.emit_neg(Size::S32, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 2, + |a, _m, addr| { + a.emit_lock_xadd( + Size::S32, + Location::GPR(value), + Location::Memory(addr, 0), + ); + Ok(()) + }, + )?; + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmwAnd { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 4, + Size::S32, + Size::S32, + |a, _m, src, dst| { + a.emit_and(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I64AtomicRmwAnd { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 8, + Size::S64, + Size::S64, + |a, _m, src, dst| { + a.emit_and(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I32AtomicRmw8AndU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 1, + Size::S8, + Size::S32, + |a, _m, src, dst| { + a.emit_and(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I32AtomicRmw16AndU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 1, + Size::S16, + Size::S32, + |a, _m, src, dst| { + a.emit_and(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I64AtomicRmw8AndU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 1, + Size::S8, + Size::S64, + |a, _m, src, dst| { + a.emit_and(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I64AtomicRmw16AndU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 1, + Size::S16, + Size::S64, + |a, _m, src, dst| { + a.emit_and(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I64AtomicRmw32AndU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 1, + Size::S32, + Size::S64, + |a, _m, src, dst| { + a.emit_and(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I32AtomicRmwOr { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 4, + Size::S32, + Size::S32, + |a, _m, src, dst| { + a.emit_or(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I64AtomicRmwOr { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 8, + Size::S64, + Size::S64, + |a, _m, src, dst| { + a.emit_or(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I32AtomicRmw8OrU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 1, + Size::S8, + Size::S32, + |a, _m, src, dst| { + a.emit_or(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I32AtomicRmw16OrU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 1, + Size::S16, + Size::S32, + |a, _m, src, dst| { + a.emit_or(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I64AtomicRmw8OrU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 1, + Size::S8, + Size::S64, + |a, _m, src, dst| { + a.emit_or(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I64AtomicRmw16OrU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 1, + Size::S16, + Size::S64, + |a, _m, src, dst| { + a.emit_or(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I64AtomicRmw32OrU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 1, + Size::S32, + Size::S64, + |a, _m, src, dst| { + a.emit_or(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I32AtomicRmwXor { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 4, + Size::S32, + Size::S32, + |a, _m, src, dst| { + a.emit_xor(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I64AtomicRmwXor { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 8, + Size::S64, + Size::S64, + |a, _m, src, dst| { + a.emit_xor(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I32AtomicRmw8XorU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 1, + Size::S8, + Size::S32, + |a, _m, src, dst| { + a.emit_xor(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I32AtomicRmw16XorU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 1, + Size::S16, + Size::S32, + |a, _m, src, dst| { + a.emit_xor(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I64AtomicRmw8XorU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 1, + Size::S8, + Size::S64, + |a, _m, src, dst| { + a.emit_xor(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I64AtomicRmw16XorU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 1, + Size::S16, + Size::S64, + |a, _m, src, dst| { + a.emit_xor(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I64AtomicRmw32XorU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + loc, + target, + ret, + memarg, + 1, + Size::S32, + Size::S64, + |a, _m, src, dst| { + a.emit_xor(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + )?; + } + Operator::I32AtomicRmwXchg { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_xchg(Size::S32, Location::GPR(value), Location::Memory(addr, 0)); + Ok(()) + }, + )?; + a.emit_mov(Size::S32, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmwXchg { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S64, loc, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 8, + |a, _m, addr| { + a.emit_xchg(Size::S64, Location::GPR(value), Location::Memory(addr, 0)); + Ok(()) + }, + )?; + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmw8XchgU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S8, loc, Size::S32, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_xchg(Size::S8, Location::GPR(value), Location::Memory(addr, 0)); + Ok(()) + }, + )?; + a.emit_mov(Size::S32, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmw16XchgU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S16, loc, Size::S32, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 2, + |a, _m, addr| { + a.emit_xchg(Size::S16, Location::GPR(value), Location::Memory(addr, 0)); + Ok(()) + }, + )?; + a.emit_mov(Size::S32, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw8XchgU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S8, loc, Size::S64, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_xchg(Size::S8, Location::GPR(value), Location::Memory(addr, 0)); + Ok(()) + }, + )?; + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw16XchgU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S16, loc, Size::S64, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 2, + |a, _m, addr| { + a.emit_xchg(Size::S16, Location::GPR(value), Location::Memory(addr, 0)); + Ok(()) + }, + )?; + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw32XchgU { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_xchg(Size::S32, Location::GPR(value), Location::Memory(addr, 0)); + Ok(()) + }, + )?; + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmwCmpxchg { ref memarg } => { + let new = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cmp = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let compare = self.machine.reserve_unused_temp_gpr(GPR::RAX); + let value = if cmp == Location::GPR(GPR::R14) { + if new == Location::GPR(GPR::R13) { + GPR::R12 + } else { + GPR::R13 + } + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + a.emit_mov(Size::S32, cmp, Location::GPR(compare)); + a.emit_mov(Size::S32, new, Location::GPR(value)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_lock_cmpxchg( + Size::S32, + Location::GPR(value), + Location::Memory(addr, 0), + ); + a.emit_mov(Size::S32, Location::GPR(compare), ret); + Ok(()) + }, + )?; + a.emit_pop(Size::S64, Location::GPR(value)); + self.machine.release_temp_gpr(compare); + } + Operator::I64AtomicRmwCmpxchg { ref memarg } => { + let new = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cmp = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let compare = self.machine.reserve_unused_temp_gpr(GPR::RAX); + let value = if cmp == Location::GPR(GPR::R14) { + if new == Location::GPR(GPR::R13) { + GPR::R12 + } else { + GPR::R13 + } + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + a.emit_mov(Size::S64, cmp, Location::GPR(compare)); + a.emit_mov(Size::S64, new, Location::GPR(value)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 8, + |a, _m, addr| { + a.emit_lock_cmpxchg( + Size::S64, + Location::GPR(value), + Location::Memory(addr, 0), + ); + a.emit_mov(Size::S64, Location::GPR(compare), ret); + Ok(()) + }, + )?; + a.emit_pop(Size::S64, Location::GPR(value)); + self.machine.release_temp_gpr(compare); + } + Operator::I32AtomicRmw8CmpxchgU { ref memarg } => { + let new = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cmp = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let compare = self.machine.reserve_unused_temp_gpr(GPR::RAX); + let value = if cmp == Location::GPR(GPR::R14) { + if new == Location::GPR(GPR::R13) { + GPR::R12 + } else { + GPR::R13 + } + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + a.emit_mov(Size::S32, cmp, Location::GPR(compare)); + a.emit_mov(Size::S32, new, Location::GPR(value)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_cmpxchg( + Size::S8, + Location::GPR(value), + Location::Memory(addr, 0), + ); + a.emit_movzx(Size::S8, Location::GPR(compare), Size::S32, ret); + Ok(()) + }, + )?; + a.emit_pop(Size::S64, Location::GPR(value)); + self.machine.release_temp_gpr(compare); + } + Operator::I32AtomicRmw16CmpxchgU { ref memarg } => { + let new = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cmp = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let compare = self.machine.reserve_unused_temp_gpr(GPR::RAX); + let value = if cmp == Location::GPR(GPR::R14) { + if new == Location::GPR(GPR::R13) { + GPR::R12 + } else { + GPR::R13 + } + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + a.emit_mov(Size::S32, cmp, Location::GPR(compare)); + a.emit_mov(Size::S32, new, Location::GPR(value)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_cmpxchg( + Size::S16, + Location::GPR(value), + Location::Memory(addr, 0), + ); + a.emit_movzx(Size::S16, Location::GPR(compare), Size::S32, ret); + Ok(()) + }, + )?; + a.emit_pop(Size::S64, Location::GPR(value)); + self.machine.release_temp_gpr(compare); + } + Operator::I64AtomicRmw8CmpxchgU { ref memarg } => { + let new = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cmp = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let compare = self.machine.reserve_unused_temp_gpr(GPR::RAX); + let value = if cmp == Location::GPR(GPR::R14) { + if new == Location::GPR(GPR::R13) { + GPR::R12 + } else { + GPR::R13 + } + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + a.emit_mov(Size::S64, cmp, Location::GPR(compare)); + a.emit_mov(Size::S64, new, Location::GPR(value)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_cmpxchg( + Size::S8, + Location::GPR(value), + Location::Memory(addr, 0), + ); + a.emit_movzx(Size::S8, Location::GPR(compare), Size::S64, ret); + Ok(()) + }, + )?; + a.emit_pop(Size::S64, Location::GPR(value)); + self.machine.release_temp_gpr(compare); + } + Operator::I64AtomicRmw16CmpxchgU { ref memarg } => { + let new = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cmp = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let compare = self.machine.reserve_unused_temp_gpr(GPR::RAX); + let value = if cmp == Location::GPR(GPR::R14) { + if new == Location::GPR(GPR::R13) { + GPR::R12 + } else { + GPR::R13 + } + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + a.emit_mov(Size::S64, cmp, Location::GPR(compare)); + a.emit_mov(Size::S64, new, Location::GPR(value)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_cmpxchg( + Size::S16, + Location::GPR(value), + Location::Memory(addr, 0), + ); + a.emit_movzx(Size::S16, Location::GPR(compare), Size::S64, ret); + Ok(()) + }, + )?; + a.emit_pop(Size::S64, Location::GPR(value)); + self.machine.release_temp_gpr(compare); + } + Operator::I64AtomicRmw32CmpxchgU { ref memarg } => { + let new = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cmp = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let compare = self.machine.reserve_unused_temp_gpr(GPR::RAX); + let value = if cmp == Location::GPR(GPR::R14) { + if new == Location::GPR(GPR::R13) { + GPR::R12 + } else { + GPR::R13 + } + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + a.emit_mov(Size::S64, cmp, Location::GPR(compare)); + a.emit_mov(Size::S64, new, Location::GPR(value)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + self.exception_table.as_mut().unwrap(), + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_cmpxchg( + Size::S32, + Location::GPR(value), + Location::Memory(addr, 0), + ); + a.emit_mov(Size::S32, Location::GPR(compare), ret); + Ok(()) + }, + )?; + a.emit_pop(Size::S64, Location::GPR(value)); + self.machine.release_temp_gpr(compare); + } + _ => { + return Err(CodegenError { + message: format!("not yet implemented: {:?}", op), + }); + } + } + + Ok(()) + } +} + +fn type_to_wp_type(ty: Type) -> WpType { + match ty { + Type::I32 => WpType::I32, + Type::I64 => WpType::I64, + Type::F32 => WpType::F32, + Type::F64 => WpType::F64, + Type::V128 => WpType::V128, + } +} + +fn get_location_released(a: &mut Assembler, m: &mut Machine, loc: Location) -> Location { + m.release_locations(a, &[loc]); + loc +} + +fn sort_call_movs(movs: &mut [(Location, GPR)]) { + for i in 0..movs.len() { + for j in (i + 1)..movs.len() { + if let Location::GPR(src_gpr) = movs[j].0 { + if src_gpr == movs[i].1 { + movs.swap(i, j); + } + } + } + } + + /* + { + use std::collections::{HashMap, HashSet, VecDeque}; + let mut mov_map: HashMap> = HashMap::new(); + for mov in movs.iter() { + if let Location::GPR(src_gpr) = mov.0 { + if src_gpr != mov.1 { + mov_map.entry(src_gpr).or_insert_with(|| HashSet::new()).insert(mov.1); + } + } + } + + for (start, _) in mov_map.iter() { + let mut q: VecDeque = VecDeque::new(); + let mut black: HashSet = HashSet::new(); + + q.push_back(*start); + black.insert(*start); + + while q.len() > 0 { + let reg = q.pop_front().unwrap(); + let empty_set = HashSet::new(); + for x in mov_map.get(®).unwrap_or(&empty_set).iter() { + if black.contains(x) { + panic!("cycle detected"); + } + q.push_back(*x); + black.insert(*x); + } + } + } + } + */ +} + +// Constants for the bounds of truncation operations. These are the least or +// greatest exact floats in either f32 or f64 representation less-than (for +// least) or greater-than (for greatest) the i32 or i64 or u32 or u64 +// min (for least) or max (for greatest), when rounding towards zero. + +/// Greatest Exact Float (32 bits) less-than i32::MIN when rounding towards zero. +const GEF32_LT_I32_MIN: f32 = -2147483904.0; +/// Least Exact Float (32 bits) greater-than i32::MAX when rounding towards zero. +const LEF32_GT_I32_MAX: f32 = 2147483648.0; +/// Greatest Exact Float (32 bits) less-than i64::MIN when rounding towards zero. +const GEF32_LT_I64_MIN: f32 = -9223373136366403584.0; +/// Least Exact Float (32 bits) greater-than i64::MAX when rounding towards zero. +const LEF32_GT_I64_MAX: f32 = 9223372036854775808.0; +/// Greatest Exact Float (32 bits) less-than u32::MIN when rounding towards zero. +const GEF32_LT_U32_MIN: f32 = -1.0; +/// Least Exact Float (32 bits) greater-than u32::MAX when rounding towards zero. +const LEF32_GT_U32_MAX: f32 = 4294967296.0; +/// Greatest Exact Float (32 bits) less-than u64::MIN when rounding towards zero. +const GEF32_LT_U64_MIN: f32 = -1.0; +/// Least Exact Float (32 bits) greater-than u64::MAX when rounding towards zero. +const LEF32_GT_U64_MAX: f32 = 18446744073709551616.0; + +/// Greatest Exact Float (64 bits) less-than i32::MIN when rounding towards zero. +const GEF64_LT_I32_MIN: f64 = -2147483649.0; +/// Least Exact Float (64 bits) greater-than i32::MAX when rounding towards zero. +const LEF64_GT_I32_MAX: f64 = 2147483648.0; +/// Greatest Exact Float (64 bits) less-than i64::MIN when rounding towards zero. +const GEF64_LT_I64_MIN: f64 = -9223372036854777856.0; +/// Least Exact Float (64 bits) greater-than i64::MAX when rounding towards zero. +const LEF64_GT_I64_MAX: f64 = 9223372036854775808.0; +/// Greatest Exact Float (64 bits) less-than u32::MIN when rounding towards zero. +const GEF64_LT_U32_MIN: f64 = -1.0; +/// Least Exact Float (64 bits) greater-than u32::MAX when rounding towards zero. +const LEF64_GT_U32_MAX: f64 = 4294967296.0; +/// Greatest Exact Float (64 bits) less-than u64::MIN when rounding towards zero. +const GEF64_LT_U64_MIN: f64 = -1.0; +/// Least Exact Float (64 bits) greater-than u64::MAX when rounding towards zero. +const LEF64_GT_U64_MAX: f64 = 18446744073709551616.0; diff --git a/lib/compiler-singlepass/src/common_decl.rs b/lib/compiler-singlepass/src/common_decl.rs new file mode 100644 index 000000000..89ae92770 --- /dev/null +++ b/lib/compiler-singlepass/src/common_decl.rs @@ -0,0 +1,155 @@ +use std::collections::BTreeMap; + +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub struct RegisterIndex(pub usize); + +/// Information of an inline breakpoint. +/// +/// TODO: Move this into runtime. +#[derive(Clone, Debug)] +pub struct InlineBreakpoint { + /// Size in bytes taken by this breakpoint's instruction sequence. + pub size: usize, + + /// Type of the inline breakpoint. + pub ty: InlineBreakpointType, +} + +/// The type of an inline breakpoint. +#[repr(u8)] +#[derive(Copy, Clone, Debug)] +pub enum InlineBreakpointType { + /// A middleware invocation breakpoint. + Middleware, +} + +/// A kind of wasm or constant value +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub enum WasmAbstractValue { + /// A wasm runtime value + Runtime, + /// A wasm constant value + Const(u64), +} + +/// A container for the state of a running wasm instance. +#[derive(Clone, Debug)] +pub struct MachineState { + /// Stack values. + pub stack_values: Vec, + /// Register values. + pub register_values: Vec, + /// Previous frame. + pub prev_frame: BTreeMap, + /// Wasm stack. + pub wasm_stack: Vec, + /// Private depth of the wasm stack. + pub wasm_stack_private_depth: usize, + /// Wasm instruction offset. + pub wasm_inst_offset: usize, +} + +/// A diff of two `MachineState`s. +#[derive(Clone, Debug, Default)] +pub struct MachineStateDiff { + /// Last. + pub last: Option, + /// Stack push. + pub stack_push: Vec, + /// Stack pop. + pub stack_pop: usize, + + /// Register diff. + pub reg_diff: Vec<(RegisterIndex, MachineValue)>, + + /// Previous frame diff. + pub prev_frame_diff: BTreeMap>, // None for removal + + /// Wasm stack push. + pub wasm_stack_push: Vec, + /// Wasm stack pop. + pub wasm_stack_pop: usize, + /// Private depth of the wasm stack. + pub wasm_stack_private_depth: usize, // absolute value; not a diff. + /// Wasm instruction offset. + pub wasm_inst_offset: usize, // absolute value; not a diff. +} + +/// A kind of machine value. +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub enum MachineValue { + /// Undefined. + Undefined, + /// Vmctx. + Vmctx, + /// Vmctx Deref. + VmctxDeref(Vec), + /// Preserve Register. + PreserveRegister(RegisterIndex), + /// Copy Stack BP Relative. + CopyStackBPRelative(i32), // relative to Base Pointer, in byte offset + /// Explicit Shadow. + ExplicitShadow, // indicates that all values above this are above the shadow region + /// Wasm Stack. + WasmStack(usize), + /// Wasm Local. + WasmLocal(usize), + /// Two Halves. + TwoHalves(Box<(MachineValue, MachineValue)>), // 32-bit values. TODO: optimize: add another type for inner "half" value to avoid boxing? +} + +/// A map of function states. +#[derive(Clone, Debug)] +pub struct FunctionStateMap { + /// Initial. + pub initial: MachineState, + /// Local Function Id. + pub local_function_id: usize, + /// Locals. + pub locals: Vec, + /// Shadow size. + pub shadow_size: usize, // for single-pass backend, 32 bytes on x86-64 + /// Diffs. + pub diffs: Vec, + /// Wasm Function Header target offset. + pub wasm_function_header_target_offset: Option, + /// Wasm offset to target offset + pub wasm_offset_to_target_offset: BTreeMap, + /// Loop offsets. + pub loop_offsets: BTreeMap, /* suspend_offset -> info */ + /// Call offsets. + pub call_offsets: BTreeMap, /* suspend_offset -> info */ + /// Trappable offsets. + pub trappable_offsets: BTreeMap, /* suspend_offset -> info */ +} + +/// A kind of suspend offset. +#[derive(Clone, Copy, Debug)] +pub enum SuspendOffset { + /// A loop. + Loop(usize), + /// A call. + Call(usize), + /// A trappable. + Trappable(usize), +} + +/// Info for an offset. +#[derive(Clone, Debug)] +pub struct OffsetInfo { + /// End offset. + pub end_offset: usize, // excluded bound + /// Diff Id. + pub diff_id: usize, + /// Activate offset. + pub activate_offset: usize, +} + +/// A map of module state. +#[derive(Clone, Debug)] +pub struct ModuleStateMap { + /// Local functions. + pub local_functions: BTreeMap, + /// Total size. + pub total_size: usize, +} diff --git a/lib/compiler-singlepass/src/emitter_x64.rs b/lib/compiler-singlepass/src/emitter_x64.rs new file mode 100644 index 000000000..ed0efb8ae --- /dev/null +++ b/lib/compiler-singlepass/src/emitter_x64.rs @@ -0,0 +1,1327 @@ +use crate::common_decl::InlineBreakpointType; +pub use crate::x64_decl::{GPR, XMM}; +use dynasm::dynasm; +use dynasmrt::{x64::Assembler, AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi}; + +fn _dummy(_a: &Assembler) { + dynasm!( + _a + ; .arch x64 + ); +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum Location { + Imm8(u8), + Imm32(u32), + Imm64(u64), + Imm128(u128), + GPR(GPR), + XMM(XMM), + Memory(GPR, i32), +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Condition { + None, + Above, + AboveEqual, + Below, + BelowEqual, + Greater, + GreaterEqual, + Less, + LessEqual, + Equal, + NotEqual, + Signed, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] +pub enum Size { + S8, + S16, + S32, + S64, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[allow(dead_code)] +pub enum XMMOrMemory { + XMM(XMM), + Memory(GPR, i32), +} + +#[derive(Copy, Clone, Debug)] +#[allow(dead_code)] +pub enum GPROrMemory { + GPR(GPR), + Memory(GPR, i32), +} + +pub trait Emitter { + type Label; + type Offset; + + fn get_label(&mut self) -> Self::Label; + fn get_offset(&self) -> Self::Offset; + fn get_jmp_instr_size(&self) -> u8; + + fn emit_u64(&mut self, x: u64); + + fn emit_label(&mut self, label: Self::Label); + + fn emit_nop(&mut self); + + fn emit_mov(&mut self, sz: Size, src: Location, dst: Location); + fn emit_lea(&mut self, sz: Size, src: Location, dst: Location); + fn emit_lea_label(&mut self, label: Self::Label, dst: Location); + fn emit_cdq(&mut self); + fn emit_cqo(&mut self); + fn emit_xor(&mut self, sz: Size, src: Location, dst: Location); + fn emit_jmp(&mut self, condition: Condition, label: Self::Label); + fn emit_jmp_location(&mut self, loc: Location); + fn emit_conditional_trap(&mut self, condition: Condition); + fn emit_set(&mut self, condition: Condition, dst: GPR); + fn emit_push(&mut self, sz: Size, src: Location); + fn emit_pop(&mut self, sz: Size, dst: Location); + fn emit_cmp(&mut self, sz: Size, left: Location, right: Location); + fn emit_add(&mut self, sz: Size, src: Location, dst: Location); + fn emit_sub(&mut self, sz: Size, src: Location, dst: Location); + fn emit_neg(&mut self, sz: Size, value: Location); + fn emit_imul(&mut self, sz: Size, src: Location, dst: Location); + fn emit_imul_imm32_gpr64(&mut self, src: u32, dst: GPR); + fn emit_div(&mut self, sz: Size, divisor: Location); + fn emit_idiv(&mut self, sz: Size, divisor: Location); + fn emit_shl(&mut self, sz: Size, src: Location, dst: Location); + fn emit_shr(&mut self, sz: Size, src: Location, dst: Location); + fn emit_sar(&mut self, sz: Size, src: Location, dst: Location); + fn emit_rol(&mut self, sz: Size, src: Location, dst: Location); + fn emit_ror(&mut self, sz: Size, src: Location, dst: Location); + fn emit_and(&mut self, sz: Size, src: Location, dst: Location); + fn emit_or(&mut self, sz: Size, src: Location, dst: Location); + fn emit_bsr(&mut self, sz: Size, src: Location, dst: Location); + fn emit_bsf(&mut self, sz: Size, src: Location, dst: Location); + fn emit_popcnt(&mut self, sz: Size, src: Location, dst: Location); + fn emit_movzx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location); + fn emit_movsx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location); + fn emit_xchg(&mut self, sz: Size, src: Location, dst: Location); + fn emit_lock_xadd(&mut self, sz: Size, src: Location, dst: Location); + fn emit_lock_cmpxchg(&mut self, sz: Size, src: Location, dst: Location); + + fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR); + fn emit_btc_gpr_imm8_64(&mut self, src: u8, dst: GPR); + + fn emit_cmovae_gpr_32(&mut self, src: GPR, dst: GPR); + fn emit_cmovae_gpr_64(&mut self, src: GPR, dst: GPR); + + fn emit_vmovaps(&mut self, src: XMMOrMemory, dst: XMMOrMemory); + fn emit_vmovapd(&mut self, src: XMMOrMemory, dst: XMMOrMemory); + fn emit_vxorps(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vxorpd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + + fn emit_vaddss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vaddsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vsubss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vsubsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vmulss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vmulsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vdivss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vdivsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vmaxss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vmaxsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vminss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vminsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + + fn emit_vcmpeqss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vcmpeqsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + + fn emit_vcmpneqss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vcmpneqsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + + fn emit_vcmpltss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vcmpltsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + + fn emit_vcmpless(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vcmplesd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + + fn emit_vcmpgtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vcmpgtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + + fn emit_vcmpgess(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vcmpgesd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + + fn emit_vcmpunordss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vcmpunordsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + + fn emit_vcmpordss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vcmpordsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + + fn emit_vsqrtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vsqrtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + + fn emit_vroundss_nearest(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vroundss_floor(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vroundss_ceil(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vroundss_trunc(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vroundsd_nearest(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vroundsd_floor(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vroundsd_ceil(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vroundsd_trunc(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + + fn emit_vcvtss2sd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vcvtsd2ss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + + fn emit_ucomiss(&mut self, src: XMMOrMemory, dst: XMM); + fn emit_ucomisd(&mut self, src: XMMOrMemory, dst: XMM); + + fn emit_cvttss2si_32(&mut self, src: XMMOrMemory, dst: GPR); + fn emit_cvttss2si_64(&mut self, src: XMMOrMemory, dst: GPR); + fn emit_cvttsd2si_32(&mut self, src: XMMOrMemory, dst: GPR); + fn emit_cvttsd2si_64(&mut self, src: XMMOrMemory, dst: GPR); + + fn emit_vcvtsi2ss_32(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM); + fn emit_vcvtsi2ss_64(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM); + fn emit_vcvtsi2sd_32(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM); + fn emit_vcvtsi2sd_64(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM); + + fn emit_vblendvps(&mut self, src1: XMM, src2: XMMOrMemory, mask: XMM, dst: XMM); + fn emit_vblendvpd(&mut self, src1: XMM, src2: XMMOrMemory, mask: XMM, dst: XMM); + + fn emit_test_gpr_64(&mut self, reg: GPR); + + fn emit_ud2(&mut self); + fn emit_ret(&mut self); + fn emit_call_label(&mut self, label: Self::Label); + fn emit_call_location(&mut self, loc: Location); + + fn emit_bkpt(&mut self); + + fn emit_host_redirection(&mut self, target: GPR); + fn emit_inline_breakpoint(&mut self, ty: InlineBreakpointType); + + fn arch_has_itruncf(&self) -> bool { + false + } + fn arch_emit_i32_trunc_sf32(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + fn arch_emit_i32_trunc_sf64(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + fn arch_emit_i32_trunc_uf32(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + fn arch_emit_i32_trunc_uf64(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + fn arch_emit_i64_trunc_sf32(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + fn arch_emit_i64_trunc_sf64(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + fn arch_emit_i64_trunc_uf32(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + fn arch_emit_i64_trunc_uf64(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + + fn arch_has_fconverti(&self) -> bool { + false + } + fn arch_emit_f32_convert_si32(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f32_convert_si64(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f32_convert_ui32(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f32_convert_ui64(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f64_convert_si32(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f64_convert_si64(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f64_convert_ui32(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f64_convert_ui64(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + + fn arch_has_fneg(&self) -> bool { + false + } + fn arch_emit_f32_neg(&mut self, _src: XMM, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f64_neg(&mut self, _src: XMM, _dst: XMM) { + unimplemented!() + } + + fn arch_has_xzcnt(&self) -> bool { + false + } + fn arch_emit_lzcnt(&mut self, _sz: Size, _src: Location, _dst: Location) { + unimplemented!() + } + fn arch_emit_tzcnt(&mut self, _sz: Size, _src: Location, _dst: Location) { + unimplemented!() + } + + fn arch_supports_canonicalize_nan(&self) -> bool { + true + } + + fn arch_requires_indirect_call_trampoline(&self) -> bool { + false + } + + fn arch_emit_indirect_call_with_trampoline(&mut self, _loc: Location) { + unimplemented!() + } + + // Emits entry trampoline just before the real function. + fn arch_emit_entry_trampoline(&mut self) {} +} + +macro_rules! unop_gpr { + ($ins:ident, $assembler:tt, $sz:expr, $loc:expr, $otherwise:block) => { + match ($sz, $loc) { + (Size::S32, Location::GPR(loc)) => { + dynasm!($assembler ; $ins Rd(loc as u8)); + }, + (Size::S64, Location::GPR(loc)) => { + dynasm!($assembler ; $ins Rq(loc as u8)); + }, + _ => $otherwise + } + }; +} + +macro_rules! unop_mem { + ($ins:ident, $assembler:tt, $sz:expr, $loc:expr, $otherwise:block) => { + match ($sz, $loc) { + (Size::S32, Location::Memory(loc, disp)) => { + dynasm!($assembler ; $ins DWORD [Rq(loc as u8) + disp] ); + }, + (Size::S64, Location::Memory(loc, disp)) => { + dynasm!($assembler ; $ins QWORD [Rq(loc as u8) + disp] ); + }, + _ => $otherwise + } + }; +} + +macro_rules! unop_gpr_or_mem { + ($ins:ident, $assembler:tt, $sz:expr, $loc:expr, $otherwise:block) => { + unop_gpr!($ins, $assembler, $sz, $loc, { + unop_mem!($ins, $assembler, $sz, $loc, $otherwise) + }) + }; +} + +macro_rules! binop_imm32_gpr { + ($ins:ident, $assembler:tt, $sz:expr, $src:expr, $dst:expr, $otherwise:block) => { + match ($sz, $src, $dst) { + (Size::S32, Location::Imm32(src), Location::GPR(dst)) => { + dynasm!($assembler ; $ins Rd(dst as u8), src as i32); // IMM32_2GPR + }, + (Size::S64, Location::Imm32(src), Location::GPR(dst)) => { + dynasm!($assembler ; $ins Rq(dst as u8), src as i32); // IMM32_2GPR + }, + _ => $otherwise + } + }; +} + +macro_rules! binop_imm32_mem { + ($ins:ident, $assembler:tt, $sz:expr, $src:expr, $dst:expr, $otherwise:block) => { + match ($sz, $src, $dst) { + (Size::S32, Location::Imm32(src), Location::Memory(dst, disp)) => { + dynasm!($assembler ; $ins DWORD [Rq(dst as u8) + disp], src as i32); + }, + (Size::S64, Location::Imm32(src), Location::Memory(dst, disp)) => { + dynasm!($assembler ; $ins QWORD [Rq(dst as u8) + disp], src as i32); + }, + _ => $otherwise + } + }; +} + +macro_rules! binop_imm64_gpr { + ($ins:ident, $assembler:tt, $sz:expr, $src:expr, $dst:expr, $otherwise:block) => { + match ($sz, $src, $dst) { + (Size::S64, Location::Imm64(src), Location::GPR(dst)) => { + dynasm!($assembler ; $ins Rq(dst as u8), QWORD src as i64); // IMM32_2GPR + }, + _ => $otherwise + } + }; +} + +macro_rules! binop_gpr_gpr { + ($ins:ident, $assembler:tt, $sz:expr, $src:expr, $dst:expr, $otherwise:block) => { + match ($sz, $src, $dst) { + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + dynasm!($assembler ; $ins Rd(dst as u8), Rd(src as u8)); // GPR2GPR + }, + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + dynasm!($assembler ; $ins Rq(dst as u8), Rq(src as u8)); // GPR2GPR + }, + _ => $otherwise + } + }; +} + +macro_rules! binop_gpr_mem { + ($ins:ident, $assembler:tt, $sz:expr, $src:expr, $dst:expr, $otherwise:block) => { + match ($sz, $src, $dst) { + (Size::S32, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!($assembler ; $ins [Rq(dst as u8) + disp], Rd(src as u8)); // GPR2MEM + }, + (Size::S64, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!($assembler ; $ins [Rq(dst as u8) + disp], Rq(src as u8)); // GPR2MEM + }, + _ => $otherwise + } + }; +} + +macro_rules! binop_mem_gpr { + ($ins:ident, $assembler:tt, $sz:expr, $src:expr, $dst:expr, $otherwise:block) => { + match ($sz, $src, $dst) { + (Size::S32, Location::Memory(src, disp), Location::GPR(dst)) => { + dynasm!($assembler ; $ins Rd(dst as u8), [Rq(src as u8) + disp]); // MEM2GPR + }, + (Size::S64, Location::Memory(src, disp), Location::GPR(dst)) => { + dynasm!($assembler ; $ins Rq(dst as u8), [Rq(src as u8) + disp]); // MEM2GPR + }, + _ => $otherwise + } + }; +} + +macro_rules! binop_all_nofp { + ($ins:ident, $assembler:tt, $sz:expr, $src:expr, $dst:expr, $otherwise:block) => { + binop_imm32_gpr!($ins, $assembler, $sz, $src, $dst, { + binop_imm32_mem!($ins, $assembler, $sz, $src, $dst, { + binop_gpr_gpr!($ins, $assembler, $sz, $src, $dst, { + binop_gpr_mem!($ins, $assembler, $sz, $src, $dst, { + binop_mem_gpr!($ins, $assembler, $sz, $src, $dst, $otherwise) + }) + }) + }) + }) + }; +} + +macro_rules! binop_shift { + ($ins:ident, $assembler:tt, $sz:expr, $src:expr, $dst:expr, $otherwise:block) => { + match ($sz, $src, $dst) { + (Size::S32, Location::GPR(GPR::RCX), Location::GPR(dst)) => { + dynasm!($assembler ; $ins Rd(dst as u8), cl); + }, + (Size::S32, Location::GPR(GPR::RCX), Location::Memory(dst, disp)) => { + dynasm!($assembler ; $ins DWORD [Rq(dst as u8) + disp], cl); + }, + (Size::S32, Location::Imm8(imm), Location::GPR(dst)) => { + dynasm!($assembler ; $ins Rd(dst as u8), imm as i8); + }, + (Size::S32, Location::Imm8(imm), Location::Memory(dst, disp)) => { + dynasm!($assembler ; $ins DWORD [Rq(dst as u8) + disp], imm as i8); + }, + (Size::S64, Location::GPR(GPR::RCX), Location::GPR(dst)) => { + dynasm!($assembler ; $ins Rq(dst as u8), cl); + }, + (Size::S64, Location::GPR(GPR::RCX), Location::Memory(dst, disp)) => { + dynasm!($assembler ; $ins QWORD [Rq(dst as u8) + disp], cl); + }, + (Size::S64, Location::Imm8(imm), Location::GPR(dst)) => { + dynasm!($assembler ; $ins Rq(dst as u8), imm as i8); + }, + (Size::S64, Location::Imm8(imm), Location::Memory(dst, disp)) => { + dynasm!($assembler ; $ins QWORD [Rq(dst as u8) + disp], imm as i8); + }, + _ => $otherwise + } + } +} + +macro_rules! jmp_op { + ($ins:ident, $assembler:tt, $label:ident) => { + dynasm!($assembler ; $ins =>$label); + } +} + +macro_rules! trap_op { + ($ins:ident, $assembler:tt) => { + dynasm!($assembler + ; $ins >trap + ; jmp >after + ; trap: + ; ud2 + ; after: + ); + } +} + +macro_rules! avx_fn { + ($ins:ident, $name:ident) => { + fn $name(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + // Dynasm bug: AVX instructions are not encoded correctly. + match src2 { + XMMOrMemory::XMM(x) => match src1 { + XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, Rx((x as u8))), + XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, Rx((x as u8))), + XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, Rx((x as u8))), + XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, Rx((x as u8))), + XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, Rx((x as u8))), + XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, Rx((x as u8))), + XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, Rx((x as u8))), + XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, Rx((x as u8))), + XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, Rx((x as u8))), + XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, Rx((x as u8))), + XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, Rx((x as u8))), + XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, Rx((x as u8))), + XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, Rx((x as u8))), + XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, Rx((x as u8))), + XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, Rx((x as u8))), + XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, Rx((x as u8))), + }, + XMMOrMemory::Memory(base, disp) => match src1 { + XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, [Rq((base as u8)) + disp]), + XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, [Rq((base as u8)) + disp]), + XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, [Rq((base as u8)) + disp]), + XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, [Rq((base as u8)) + disp]), + XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, [Rq((base as u8)) + disp]), + XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, [Rq((base as u8)) + disp]), + XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, [Rq((base as u8)) + disp]), + XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, [Rq((base as u8)) + disp]), + XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, [Rq((base as u8)) + disp]), + XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, [Rq((base as u8)) + disp]), + XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, [Rq((base as u8)) + disp]), + XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, [Rq((base as u8)) + disp]), + XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, [Rq((base as u8)) + disp]), + XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, [Rq((base as u8)) + disp]), + XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, [Rq((base as u8)) + disp]), + XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, [Rq((base as u8)) + disp]), + }, + } + } + } +} + +macro_rules! avx_i2f_64_fn { + ($ins:ident, $name:ident) => { + fn $name(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { + match src2 { + GPROrMemory::GPR(x) => match src1 { + XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, Rq((x as u8))), + XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, Rq((x as u8))), + XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, Rq((x as u8))), + XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, Rq((x as u8))), + XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, Rq((x as u8))), + XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, Rq((x as u8))), + XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, Rq((x as u8))), + XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, Rq((x as u8))), + XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, Rq((x as u8))), + XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, Rq((x as u8))), + XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, Rq((x as u8))), + XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, Rq((x as u8))), + XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, Rq((x as u8))), + XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, Rq((x as u8))), + XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, Rq((x as u8))), + XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, Rq((x as u8))), + }, + GPROrMemory::Memory(base, disp) => match src1 { + XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, QWORD [Rq((base as u8)) + disp]), + XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, QWORD [Rq((base as u8)) + disp]), + XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, QWORD [Rq((base as u8)) + disp]), + XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, QWORD [Rq((base as u8)) + disp]), + XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, QWORD [Rq((base as u8)) + disp]), + XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, QWORD [Rq((base as u8)) + disp]), + XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, QWORD [Rq((base as u8)) + disp]), + XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, QWORD [Rq((base as u8)) + disp]), + XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, QWORD [Rq((base as u8)) + disp]), + XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, QWORD [Rq((base as u8)) + disp]), + XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, QWORD [Rq((base as u8)) + disp]), + XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, QWORD [Rq((base as u8)) + disp]), + XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, QWORD [Rq((base as u8)) + disp]), + XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, QWORD [Rq((base as u8)) + disp]), + XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, QWORD [Rq((base as u8)) + disp]), + XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, QWORD [Rq((base as u8)) + disp]), + }, + } + } + } +} + +macro_rules! avx_i2f_32_fn { + ($ins:ident, $name:ident) => { + fn $name(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { + match src2 { + GPROrMemory::GPR(x) => match src1 { + XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, Rd((x as u8))), + XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, Rd((x as u8))), + XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, Rd((x as u8))), + XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, Rd((x as u8))), + XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, Rd((x as u8))), + XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, Rd((x as u8))), + XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, Rd((x as u8))), + XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, Rd((x as u8))), + XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, Rd((x as u8))), + XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, Rd((x as u8))), + XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, Rd((x as u8))), + XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, Rd((x as u8))), + XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, Rd((x as u8))), + XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, Rd((x as u8))), + XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, Rd((x as u8))), + XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, Rd((x as u8))), + }, + GPROrMemory::Memory(base, disp) => match src1 { + XMM::XMM0 => dynasm!(self ; $ins Rx((dst as u8)), xmm0, DWORD [Rq((base as u8)) + disp]), + XMM::XMM1 => dynasm!(self ; $ins Rx((dst as u8)), xmm1, DWORD [Rq((base as u8)) + disp]), + XMM::XMM2 => dynasm!(self ; $ins Rx((dst as u8)), xmm2, DWORD [Rq((base as u8)) + disp]), + XMM::XMM3 => dynasm!(self ; $ins Rx((dst as u8)), xmm3, DWORD [Rq((base as u8)) + disp]), + XMM::XMM4 => dynasm!(self ; $ins Rx((dst as u8)), xmm4, DWORD [Rq((base as u8)) + disp]), + XMM::XMM5 => dynasm!(self ; $ins Rx((dst as u8)), xmm5, DWORD [Rq((base as u8)) + disp]), + XMM::XMM6 => dynasm!(self ; $ins Rx((dst as u8)), xmm6, DWORD [Rq((base as u8)) + disp]), + XMM::XMM7 => dynasm!(self ; $ins Rx((dst as u8)), xmm7, DWORD [Rq((base as u8)) + disp]), + XMM::XMM8 => dynasm!(self ; $ins Rx((dst as u8)), xmm8, DWORD [Rq((base as u8)) + disp]), + XMM::XMM9 => dynasm!(self ; $ins Rx((dst as u8)), xmm9, DWORD [Rq((base as u8)) + disp]), + XMM::XMM10 => dynasm!(self ; $ins Rx((dst as u8)), xmm10, DWORD [Rq((base as u8)) + disp]), + XMM::XMM11 => dynasm!(self ; $ins Rx((dst as u8)), xmm11, DWORD [Rq((base as u8)) + disp]), + XMM::XMM12 => dynasm!(self ; $ins Rx((dst as u8)), xmm12, DWORD [Rq((base as u8)) + disp]), + XMM::XMM13 => dynasm!(self ; $ins Rx((dst as u8)), xmm13, DWORD [Rq((base as u8)) + disp]), + XMM::XMM14 => dynasm!(self ; $ins Rx((dst as u8)), xmm14, DWORD [Rq((base as u8)) + disp]), + XMM::XMM15 => dynasm!(self ; $ins Rx((dst as u8)), xmm15, DWORD [Rq((base as u8)) + disp]), + }, + } + } + } +} + +macro_rules! avx_round_fn { + ($ins:ident, $name:ident, $mode:expr) => { + fn $name(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match src2 { + XMMOrMemory::XMM(x) => dynasm!(self ; $ins Rx((dst as u8)), Rx((src1 as u8)), Rx((x as u8)), $mode), + XMMOrMemory::Memory(base, disp) => dynasm!(self ; $ins Rx((dst as u8)), Rx((src1 as u8)), [Rq((base as u8)) + disp], $mode), + } + } + } +} + +impl Emitter for Assembler { + type Label = DynamicLabel; + type Offset = AssemblyOffset; + + fn get_label(&mut self) -> DynamicLabel { + self.new_dynamic_label() + } + + fn get_offset(&self) -> AssemblyOffset { + self.offset() + } + + fn get_jmp_instr_size(&self) -> u8 { + 5 + } + + fn emit_u64(&mut self, x: u64) { + self.push_u64(x); + } + + fn emit_label(&mut self, label: Self::Label) { + dynasm!(self ; => label); + } + + fn emit_nop(&mut self) { + dynasm!(self ; nop); + } + + fn emit_mov(&mut self, sz: Size, src: Location, dst: Location) { + binop_all_nofp!(mov, self, sz, src, dst, { + binop_imm64_gpr!(mov, self, sz, src, dst, { + match (sz, src, dst) { + (Size::S8, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; mov [Rq(dst as u8) + disp], Rb(src as u8)); + } + (Size::S8, Location::Memory(src, disp), Location::GPR(dst)) => { + dynasm!(self ; mov Rb(dst as u8), [Rq(src as u8) + disp]); + } + (Size::S8, Location::Imm32(src), Location::GPR(dst)) => { + dynasm!(self ; mov Rb(dst as u8), src as i8); + } + (Size::S8, Location::Imm64(src), Location::GPR(dst)) => { + dynasm!(self ; mov Rb(dst as u8), src as i8); + } + (Size::S8, Location::Imm32(src), Location::Memory(dst, disp)) => { + dynasm!(self ; mov BYTE [Rq(dst as u8) + disp], src as i8); + } + (Size::S8, Location::Imm64(src), Location::Memory(dst, disp)) => { + dynasm!(self ; mov BYTE [Rq(dst as u8) + disp], src as i8); + } + (Size::S16, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; mov [Rq(dst as u8) + disp], Rw(src as u8)); + } + (Size::S16, Location::Memory(src, disp), Location::GPR(dst)) => { + dynasm!(self ; mov Rw(dst as u8), [Rq(src as u8) + disp]); + } + (Size::S16, Location::Imm32(src), Location::GPR(dst)) => { + dynasm!(self ; mov Rw(dst as u8), src as i16); + } + (Size::S16, Location::Imm64(src), Location::GPR(dst)) => { + dynasm!(self ; mov Rw(dst as u8), src as i16); + } + (Size::S16, Location::Imm32(src), Location::Memory(dst, disp)) => { + dynasm!(self ; mov WORD [Rq(dst as u8) + disp], src as i16); + } + (Size::S16, Location::Imm64(src), Location::Memory(dst, disp)) => { + dynasm!(self ; mov WORD [Rq(dst as u8) + disp], src as i16); + } + (Size::S32, Location::Imm64(src), Location::GPR(dst)) => { + dynasm!(self ; mov Rd(dst as u8), src as i32); + } + (Size::S32, Location::Imm64(src), Location::Memory(dst, disp)) => { + dynasm!(self ; mov DWORD [Rq(dst as u8) + disp], src as i32); + } + (Size::S32, Location::GPR(src), Location::XMM(dst)) => { + dynasm!(self ; movd Rx(dst as u8), Rd(src as u8)); + } + (Size::S32, Location::XMM(src), Location::GPR(dst)) => { + dynasm!(self ; movd Rd(dst as u8), Rx(src as u8)); + } + (Size::S32, Location::Memory(src, disp), Location::XMM(dst)) => { + dynasm!(self ; movd Rx(dst as u8), [Rq(src as u8) + disp]); + } + (Size::S32, Location::XMM(src), Location::Memory(dst, disp)) => { + dynasm!(self ; movd [Rq(dst as u8) + disp], Rx(src as u8)); + } + + (Size::S64, Location::GPR(src), Location::XMM(dst)) => { + dynasm!(self ; movq Rx(dst as u8), Rq(src as u8)); + } + (Size::S64, Location::XMM(src), Location::GPR(dst)) => { + dynasm!(self ; movq Rq(dst as u8), Rx(src as u8)); + } + (Size::S64, Location::Memory(src, disp), Location::XMM(dst)) => { + dynasm!(self ; movq Rx(dst as u8), [Rq(src as u8) + disp]); + } + (Size::S64, Location::XMM(src), Location::Memory(dst, disp)) => { + dynasm!(self ; movq [Rq(dst as u8) + disp], Rx(src as u8)); + } + (_, Location::XMM(src), Location::XMM(dst)) => { + dynasm!(self ; movq Rx(dst as u8), Rx(src as u8)); + } + + _ => panic!("singlepass can't emit MOV {:?} {:?} {:?}", sz, src, dst), + } + }) + }); + } + fn emit_lea(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::Memory(src, disp), Location::GPR(dst)) => { + dynasm!(self ; lea Rd(dst as u8), [Rq(src as u8) + disp]); + } + (Size::S64, Location::Memory(src, disp), Location::GPR(dst)) => { + dynasm!(self ; lea Rq(dst as u8), [Rq(src as u8) + disp]); + } + _ => panic!("singlepass can't emit LEA {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_lea_label(&mut self, label: Self::Label, dst: Location) { + match dst { + Location::GPR(x) => { + dynasm!(self ; lea Rq(x as u8), [=>label]); + } + _ => panic!("singlepass can't emit LEA label={:?} {:?}", label, dst), + } + } + fn emit_cdq(&mut self) { + dynasm!(self ; cdq); + } + fn emit_cqo(&mut self) { + dynasm!(self ; cqo); + } + fn emit_xor(&mut self, sz: Size, src: Location, dst: Location) { + binop_all_nofp!(xor, self, sz, src, dst, { + panic!("singlepass can't emit XOR {:?} {:?} {:?}", sz, src, dst) + }); + } + fn emit_jmp(&mut self, condition: Condition, label: Self::Label) { + match condition { + Condition::None => jmp_op!(jmp, self, label), + Condition::Above => jmp_op!(ja, self, label), + Condition::AboveEqual => jmp_op!(jae, self, label), + Condition::Below => jmp_op!(jb, self, label), + Condition::BelowEqual => jmp_op!(jbe, self, label), + Condition::Greater => jmp_op!(jg, self, label), + Condition::GreaterEqual => jmp_op!(jge, self, label), + Condition::Less => jmp_op!(jl, self, label), + Condition::LessEqual => jmp_op!(jle, self, label), + Condition::Equal => jmp_op!(je, self, label), + Condition::NotEqual => jmp_op!(jne, self, label), + Condition::Signed => jmp_op!(js, self, label), + } + } + fn emit_jmp_location(&mut self, loc: Location) { + match loc { + Location::GPR(x) => dynasm!(self ; jmp Rq(x as u8)), + Location::Memory(base, disp) => dynasm!(self ; jmp QWORD [Rq(base as u8) + disp]), + _ => panic!("singlepass can't emit JMP {:?}", loc), + } + } + fn emit_conditional_trap(&mut self, condition: Condition) { + match condition { + Condition::None => trap_op!(jmp, self), + Condition::Above => trap_op!(ja, self), + Condition::AboveEqual => trap_op!(jae, self), + Condition::Below => trap_op!(jb, self), + Condition::BelowEqual => trap_op!(jbe, self), + Condition::Greater => trap_op!(jg, self), + Condition::GreaterEqual => trap_op!(jge, self), + Condition::Less => trap_op!(jl, self), + Condition::LessEqual => trap_op!(jle, self), + Condition::Equal => trap_op!(je, self), + Condition::NotEqual => trap_op!(jne, self), + Condition::Signed => trap_op!(js, self), + } + } + fn emit_set(&mut self, condition: Condition, dst: GPR) { + match condition { + Condition::Above => dynasm!(self ; seta Rb(dst as u8)), + Condition::AboveEqual => dynasm!(self ; setae Rb(dst as u8)), + Condition::Below => dynasm!(self ; setb Rb(dst as u8)), + Condition::BelowEqual => dynasm!(self ; setbe Rb(dst as u8)), + Condition::Greater => dynasm!(self ; setg Rb(dst as u8)), + Condition::GreaterEqual => dynasm!(self ; setge Rb(dst as u8)), + Condition::Less => dynasm!(self ; setl Rb(dst as u8)), + Condition::LessEqual => dynasm!(self ; setle Rb(dst as u8)), + Condition::Equal => dynasm!(self ; sete Rb(dst as u8)), + Condition::NotEqual => dynasm!(self ; setne Rb(dst as u8)), + Condition::Signed => dynasm!(self ; sets Rb(dst as u8)), + _ => panic!("singlepass can't emit SET {:?} {:?}", condition, dst), + } + } + fn emit_push(&mut self, sz: Size, src: Location) { + match (sz, src) { + (Size::S64, Location::Imm32(src)) => dynasm!(self ; push src as i32), + (Size::S64, Location::GPR(src)) => dynasm!(self ; push Rq(src as u8)), + (Size::S64, Location::Memory(src, disp)) => { + dynasm!(self ; push QWORD [Rq(src as u8) + disp]) + } + _ => panic!("singlepass can't emit PUSH {:?} {:?}", sz, src), + } + } + fn emit_pop(&mut self, sz: Size, dst: Location) { + match (sz, dst) { + (Size::S64, Location::GPR(dst)) => dynasm!(self ; pop Rq(dst as u8)), + (Size::S64, Location::Memory(dst, disp)) => { + dynasm!(self ; pop QWORD [Rq(dst as u8) + disp]) + } + _ => panic!("singlepass can't emit POP {:?} {:?}", sz, dst), + } + } + fn emit_cmp(&mut self, sz: Size, left: Location, right: Location) { + binop_all_nofp!(cmp, self, sz, left, right, { + panic!("singlepass can't emit CMP {:?} {:?} {:?}", sz, left, right); + }); + } + fn emit_add(&mut self, sz: Size, src: Location, dst: Location) { + binop_all_nofp!(add, self, sz, src, dst, { + panic!("singlepass can't emit ADD {:?} {:?} {:?}", sz, src, dst) + }); + } + fn emit_sub(&mut self, sz: Size, src: Location, dst: Location) { + binop_all_nofp!(sub, self, sz, src, dst, { + panic!("singlepass can't emit SUB {:?} {:?} {:?}", sz, src, dst) + }); + } + fn emit_neg(&mut self, sz: Size, value: Location) { + match (sz, value) { + (Size::S8, Location::GPR(value)) => dynasm!(self ; neg Rb(value as u8)), + (Size::S8, Location::Memory(value, disp)) => { + dynasm!(self ; neg [Rq(value as u8) + disp]) + } + (Size::S16, Location::GPR(value)) => dynasm!(self ; neg Rw(value as u8)), + (Size::S16, Location::Memory(value, disp)) => { + dynasm!(self ; neg [Rq(value as u8) + disp]) + } + (Size::S32, Location::GPR(value)) => dynasm!(self ; neg Rd(value as u8)), + (Size::S32, Location::Memory(value, disp)) => { + dynasm!(self ; neg [Rq(value as u8) + disp]) + } + (Size::S64, Location::GPR(value)) => dynasm!(self ; neg Rq(value as u8)), + (Size::S64, Location::Memory(value, disp)) => { + dynasm!(self ; neg [Rq(value as u8) + disp]) + } + _ => panic!("singlepass can't emit NEG {:?} {:?}", sz, value), + } + } + fn emit_imul(&mut self, sz: Size, src: Location, dst: Location) { + binop_gpr_gpr!(imul, self, sz, src, dst, { + binop_mem_gpr!(imul, self, sz, src, dst, { + panic!("singlepass can't emit IMUL {:?} {:?} {:?}", sz, src, dst) + }) + }); + } + fn emit_imul_imm32_gpr64(&mut self, src: u32, dst: GPR) { + dynasm!(self ; imul Rq(dst as u8), Rq(dst as u8), src as i32); + } + fn emit_div(&mut self, sz: Size, divisor: Location) { + unop_gpr_or_mem!(div, self, sz, divisor, { + panic!("singlepass can't emit DIV {:?} {:?}", sz, divisor) + }); + } + fn emit_idiv(&mut self, sz: Size, divisor: Location) { + unop_gpr_or_mem!(idiv, self, sz, divisor, { + panic!("singlepass can't emit IDIV {:?} {:?}", sz, divisor) + }); + } + fn emit_shl(&mut self, sz: Size, src: Location, dst: Location) { + binop_shift!(shl, self, sz, src, dst, { + panic!("singlepass can't emit SHL {:?} {:?} {:?}", sz, src, dst) + }); + } + fn emit_shr(&mut self, sz: Size, src: Location, dst: Location) { + binop_shift!(shr, self, sz, src, dst, { + panic!("singlepass can't emit SHR {:?} {:?} {:?}", sz, src, dst) + }); + } + fn emit_sar(&mut self, sz: Size, src: Location, dst: Location) { + binop_shift!(sar, self, sz, src, dst, { + panic!("singlepass can't emit SAR {:?} {:?} {:?}", sz, src, dst) + }); + } + fn emit_rol(&mut self, sz: Size, src: Location, dst: Location) { + binop_shift!(rol, self, sz, src, dst, { + panic!("singlepass can't emit ROL {:?} {:?} {:?}", sz, src, dst) + }); + } + fn emit_ror(&mut self, sz: Size, src: Location, dst: Location) { + binop_shift!(ror, self, sz, src, dst, { + panic!("singlepass can't emit ROR {:?} {:?} {:?}", sz, src, dst) + }); + } + fn emit_and(&mut self, sz: Size, src: Location, dst: Location) { + binop_all_nofp!(and, self, sz, src, dst, { + panic!("singlepass can't emit AND {:?} {:?} {:?}", sz, src, dst) + }); + } + fn emit_or(&mut self, sz: Size, src: Location, dst: Location) { + binop_all_nofp!(or, self, sz, src, dst, { + panic!("singlepass can't emit OR {:?} {:?} {:?}", sz, src, dst) + }); + } + fn emit_bsr(&mut self, sz: Size, src: Location, dst: Location) { + binop_gpr_gpr!(bsr, self, sz, src, dst, { + binop_mem_gpr!(bsr, self, sz, src, dst, { + panic!("singlepass can't emit BSR {:?} {:?} {:?}", sz, src, dst) + }) + }); + } + fn emit_bsf(&mut self, sz: Size, src: Location, dst: Location) { + binop_gpr_gpr!(bsf, self, sz, src, dst, { + binop_mem_gpr!(bsf, self, sz, src, dst, { + panic!("singlepass can't emit BSF {:?} {:?} {:?}", sz, src, dst) + }) + }); + } + fn emit_popcnt(&mut self, sz: Size, src: Location, dst: Location) { + binop_gpr_gpr!(popcnt, self, sz, src, dst, { + binop_mem_gpr!(popcnt, self, sz, src, dst, { + panic!("singlepass can't emit POPCNT {:?} {:?} {:?}", sz, src, dst) + }) + }); + } + fn emit_movzx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location) { + match (sz_src, src, sz_dst, dst) { + (Size::S8, Location::GPR(src), Size::S32, Location::GPR(dst)) => { + dynasm!(self ; movzx Rd(dst as u8), Rb(src as u8)); + } + (Size::S16, Location::GPR(src), Size::S32, Location::GPR(dst)) => { + dynasm!(self ; movzx Rd(dst as u8), Rw(src as u8)); + } + (Size::S8, Location::Memory(src, disp), Size::S32, Location::GPR(dst)) => { + dynasm!(self ; movzx Rd(dst as u8), BYTE [Rq(src as u8) + disp]); + } + (Size::S16, Location::Memory(src, disp), Size::S32, Location::GPR(dst)) => { + dynasm!(self ; movzx Rd(dst as u8), WORD [Rq(src as u8) + disp]); + } + (Size::S8, Location::GPR(src), Size::S64, Location::GPR(dst)) => { + dynasm!(self ; movzx Rq(dst as u8), Rb(src as u8)); + } + (Size::S16, Location::GPR(src), Size::S64, Location::GPR(dst)) => { + dynasm!(self ; movzx Rq(dst as u8), Rw(src as u8)); + } + (Size::S8, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => { + dynasm!(self ; movzx Rq(dst as u8), BYTE [Rq(src as u8) + disp]); + } + (Size::S16, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => { + dynasm!(self ; movzx Rq(dst as u8), WORD [Rq(src as u8) + disp]); + } + _ => panic!( + "singlepass can't emit MOVZX {:?} {:?} {:?} {:?}", + sz_src, src, sz_dst, dst + ), + } + } + fn emit_movsx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location) { + match (sz_src, src, sz_dst, dst) { + (Size::S8, Location::GPR(src), Size::S32, Location::GPR(dst)) => { + dynasm!(self ; movsx Rd(dst as u8), Rb(src as u8)); + } + (Size::S16, Location::GPR(src), Size::S32, Location::GPR(dst)) => { + dynasm!(self ; movsx Rd(dst as u8), Rw(src as u8)); + } + (Size::S8, Location::Memory(src, disp), Size::S32, Location::GPR(dst)) => { + dynasm!(self ; movsx Rd(dst as u8), BYTE [Rq(src as u8) + disp]); + } + (Size::S16, Location::Memory(src, disp), Size::S32, Location::GPR(dst)) => { + dynasm!(self ; movsx Rd(dst as u8), WORD [Rq(src as u8) + disp]); + } + (Size::S8, Location::GPR(src), Size::S64, Location::GPR(dst)) => { + dynasm!(self ; movsx Rq(dst as u8), Rb(src as u8)); + } + (Size::S16, Location::GPR(src), Size::S64, Location::GPR(dst)) => { + dynasm!(self ; movsx Rq(dst as u8), Rw(src as u8)); + } + (Size::S32, Location::GPR(src), Size::S64, Location::GPR(dst)) => { + dynasm!(self ; movsx Rq(dst as u8), Rd(src as u8)); + } + (Size::S8, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => { + dynasm!(self ; movsx Rq(dst as u8), BYTE [Rq(src as u8) + disp]); + } + (Size::S16, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => { + dynasm!(self ; movsx Rq(dst as u8), WORD [Rq(src as u8) + disp]); + } + (Size::S32, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => { + dynasm!(self ; movsx Rq(dst as u8), DWORD [Rq(src as u8) + disp]); + } + _ => panic!( + "singlepass can't emit MOVSX {:?} {:?} {:?} {:?}", + sz_src, src, sz_dst, dst + ), + } + } + + fn emit_xchg(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S8, Location::GPR(src), Location::GPR(dst)) => { + dynasm!(self ; xchg Rb(dst as u8), Rb(src as u8)); + } + (Size::S16, Location::GPR(src), Location::GPR(dst)) => { + dynasm!(self ; xchg Rw(dst as u8), Rw(src as u8)); + } + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + dynasm!(self ; xchg Rd(dst as u8), Rd(src as u8)); + } + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + dynasm!(self ; xchg Rq(dst as u8), Rq(src as u8)); + } + (Size::S8, Location::Memory(src, disp), Location::GPR(dst)) => { + dynasm!(self ; xchg Rb(dst as u8), [Rq(src as u8) + disp]); + } + (Size::S8, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; xchg [Rq(dst as u8) + disp], Rb(src as u8)); + } + (Size::S16, Location::Memory(src, disp), Location::GPR(dst)) => { + dynasm!(self ; xchg Rw(dst as u8), [Rq(src as u8) + disp]); + } + (Size::S16, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; xchg [Rq(dst as u8) + disp], Rw(src as u8)); + } + (Size::S32, Location::Memory(src, disp), Location::GPR(dst)) => { + dynasm!(self ; xchg Rd(dst as u8), [Rq(src as u8) + disp]); + } + (Size::S32, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; xchg [Rq(dst as u8) + disp], Rd(src as u8)); + } + (Size::S64, Location::Memory(src, disp), Location::GPR(dst)) => { + dynasm!(self ; xchg Rq(dst as u8), [Rq(src as u8) + disp]); + } + (Size::S64, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; xchg [Rq(dst as u8) + disp], Rq(src as u8)); + } + _ => panic!("singlepass can't emit XCHG {:?} {:?} {:?}", sz, src, dst), + } + } + + fn emit_lock_xadd(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S8, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock xadd [Rq(dst as u8) + disp], Rb(src as u8)); + } + (Size::S16, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock xadd [Rq(dst as u8) + disp], Rw(src as u8)); + } + (Size::S32, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock xadd [Rq(dst as u8) + disp], Rd(src as u8)); + } + (Size::S64, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock xadd [Rq(dst as u8) + disp], Rq(src as u8)); + } + _ => panic!( + "singlepass can't emit LOCK XADD {:?} {:?} {:?}", + sz, src, dst + ), + } + } + + fn emit_lock_cmpxchg(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S8, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock cmpxchg [Rq(dst as u8) + disp], Rb(src as u8)); + } + (Size::S16, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock cmpxchg [Rq(dst as u8) + disp], Rw(src as u8)); + } + (Size::S32, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock cmpxchg [Rq(dst as u8) + disp], Rd(src as u8)); + } + (Size::S64, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock cmpxchg [Rq(dst as u8) + disp], Rq(src as u8)); + } + _ => panic!( + "singlepass can't emit LOCK CMPXCHG {:?} {:?} {:?}", + sz, src, dst + ), + } + } + + fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR) { + dynasm!(self ; btc Rd(dst as u8), BYTE src as i8); + } + + fn emit_btc_gpr_imm8_64(&mut self, src: u8, dst: GPR) { + dynasm!(self ; btc Rq(dst as u8), BYTE src as i8); + } + + fn emit_cmovae_gpr_32(&mut self, src: GPR, dst: GPR) { + dynasm!(self ; cmovae Rd(dst as u8), Rd(src as u8)); + } + + fn emit_cmovae_gpr_64(&mut self, src: GPR, dst: GPR) { + dynasm!(self ; cmovae Rq(dst as u8), Rq(src as u8)); + } + + fn emit_vmovaps(&mut self, src: XMMOrMemory, dst: XMMOrMemory) { + match (src, dst) { + (XMMOrMemory::XMM(src), XMMOrMemory::XMM(dst)) => { + dynasm!(self ; movaps Rx(dst as u8), Rx(src as u8)) + } + (XMMOrMemory::Memory(base, disp), XMMOrMemory::XMM(dst)) => { + dynasm!(self ; movaps Rx(dst as u8), [Rq(base as u8) + disp]) + } + (XMMOrMemory::XMM(src), XMMOrMemory::Memory(base, disp)) => { + dynasm!(self ; movaps [Rq(base as u8) + disp], Rx(src as u8)) + } + _ => panic!("singlepass can't emit VMOVAPS {:?} {:?}", src, dst), + }; + } + + fn emit_vmovapd(&mut self, src: XMMOrMemory, dst: XMMOrMemory) { + match (src, dst) { + (XMMOrMemory::XMM(src), XMMOrMemory::XMM(dst)) => { + dynasm!(self ; movapd Rx(dst as u8), Rx(src as u8)) + } + (XMMOrMemory::Memory(base, disp), XMMOrMemory::XMM(dst)) => { + dynasm!(self ; movapd Rx(dst as u8), [Rq(base as u8) + disp]) + } + (XMMOrMemory::XMM(src), XMMOrMemory::Memory(base, disp)) => { + dynasm!(self ; movapd [Rq(base as u8) + disp], Rx(src as u8)) + } + _ => panic!("singlepass can't emit VMOVAPD {:?} {:?}", src, dst), + }; + } + + avx_fn!(vxorps, emit_vxorps); + avx_fn!(vxorpd, emit_vxorpd); + + avx_fn!(vaddss, emit_vaddss); + avx_fn!(vaddsd, emit_vaddsd); + + avx_fn!(vsubss, emit_vsubss); + avx_fn!(vsubsd, emit_vsubsd); + + avx_fn!(vmulss, emit_vmulss); + avx_fn!(vmulsd, emit_vmulsd); + + avx_fn!(vdivss, emit_vdivss); + avx_fn!(vdivsd, emit_vdivsd); + + avx_fn!(vmaxss, emit_vmaxss); + avx_fn!(vmaxsd, emit_vmaxsd); + + avx_fn!(vminss, emit_vminss); + avx_fn!(vminsd, emit_vminsd); + + avx_fn!(vcmpeqss, emit_vcmpeqss); + avx_fn!(vcmpeqsd, emit_vcmpeqsd); + + avx_fn!(vcmpneqss, emit_vcmpneqss); + avx_fn!(vcmpneqsd, emit_vcmpneqsd); + + avx_fn!(vcmpltss, emit_vcmpltss); + avx_fn!(vcmpltsd, emit_vcmpltsd); + + avx_fn!(vcmpless, emit_vcmpless); + avx_fn!(vcmplesd, emit_vcmplesd); + + avx_fn!(vcmpgtss, emit_vcmpgtss); + avx_fn!(vcmpgtsd, emit_vcmpgtsd); + + avx_fn!(vcmpgess, emit_vcmpgess); + avx_fn!(vcmpgesd, emit_vcmpgesd); + + avx_fn!(vcmpunordss, emit_vcmpunordss); + avx_fn!(vcmpunordsd, emit_vcmpunordsd); + + avx_fn!(vcmpordss, emit_vcmpordss); + avx_fn!(vcmpordsd, emit_vcmpordsd); + + avx_fn!(vsqrtss, emit_vsqrtss); + avx_fn!(vsqrtsd, emit_vsqrtsd); + + avx_fn!(vcvtss2sd, emit_vcvtss2sd); + avx_fn!(vcvtsd2ss, emit_vcvtsd2ss); + + avx_round_fn!(vroundss, emit_vroundss_nearest, 0); + avx_round_fn!(vroundss, emit_vroundss_floor, 1); + avx_round_fn!(vroundss, emit_vroundss_ceil, 2); + avx_round_fn!(vroundss, emit_vroundss_trunc, 3); + avx_round_fn!(vroundsd, emit_vroundsd_nearest, 0); + avx_round_fn!(vroundsd, emit_vroundsd_floor, 1); + avx_round_fn!(vroundsd, emit_vroundsd_ceil, 2); + avx_round_fn!(vroundsd, emit_vroundsd_trunc, 3); + + avx_i2f_32_fn!(vcvtsi2ss, emit_vcvtsi2ss_32); + avx_i2f_32_fn!(vcvtsi2sd, emit_vcvtsi2sd_32); + avx_i2f_64_fn!(vcvtsi2ss, emit_vcvtsi2ss_64); + avx_i2f_64_fn!(vcvtsi2sd, emit_vcvtsi2sd_64); + + fn emit_vblendvps(&mut self, src1: XMM, src2: XMMOrMemory, mask: XMM, dst: XMM) { + match src2 { + XMMOrMemory::XMM(src2) => { + dynasm!(self ; vblendvps Rx(dst as u8), Rx(mask as u8), Rx(src2 as u8), Rx(src1 as u8)) + } + XMMOrMemory::Memory(base, disp) => { + dynasm!(self ; vblendvps Rx(dst as u8), Rx(mask as u8), [Rq(base as u8) + disp], Rx(src1 as u8)) + } + } + } + + fn emit_vblendvpd(&mut self, src1: XMM, src2: XMMOrMemory, mask: XMM, dst: XMM) { + match src2 { + XMMOrMemory::XMM(src2) => { + dynasm!(self ; vblendvpd Rx(dst as u8), Rx(mask as u8), Rx(src2 as u8), Rx(src1 as u8)) + } + XMMOrMemory::Memory(base, disp) => { + dynasm!(self ; vblendvpd Rx(dst as u8), Rx(mask as u8), [Rq(base as u8) + disp], Rx(src1 as u8)) + } + } + } + + fn emit_ucomiss(&mut self, src: XMMOrMemory, dst: XMM) { + match src { + XMMOrMemory::XMM(x) => dynasm!(self ; ucomiss Rx(dst as u8), Rx(x as u8)), + XMMOrMemory::Memory(base, disp) => { + dynasm!(self ; ucomiss Rx(dst as u8), [Rq(base as u8) + disp]) + } + } + } + + fn emit_ucomisd(&mut self, src: XMMOrMemory, dst: XMM) { + match src { + XMMOrMemory::XMM(x) => dynasm!(self ; ucomisd Rx(dst as u8), Rx(x as u8)), + XMMOrMemory::Memory(base, disp) => { + dynasm!(self ; ucomisd Rx(dst as u8), [Rq(base as u8) + disp]) + } + } + } + + fn emit_cvttss2si_32(&mut self, src: XMMOrMemory, dst: GPR) { + match src { + XMMOrMemory::XMM(x) => dynasm!(self ; cvttss2si Rd(dst as u8), Rx(x as u8)), + XMMOrMemory::Memory(base, disp) => { + dynasm!(self ; cvttss2si Rd(dst as u8), [Rq(base as u8) + disp]) + } + } + } + + fn emit_cvttss2si_64(&mut self, src: XMMOrMemory, dst: GPR) { + match src { + XMMOrMemory::XMM(x) => dynasm!(self ; cvttss2si Rq(dst as u8), Rx(x as u8)), + XMMOrMemory::Memory(base, disp) => { + dynasm!(self ; cvttss2si Rq(dst as u8), [Rq(base as u8) + disp]) + } + } + } + + fn emit_cvttsd2si_32(&mut self, src: XMMOrMemory, dst: GPR) { + match src { + XMMOrMemory::XMM(x) => dynasm!(self ; cvttsd2si Rd(dst as u8), Rx(x as u8)), + XMMOrMemory::Memory(base, disp) => { + dynasm!(self ; cvttsd2si Rd(dst as u8), [Rq(base as u8) + disp]) + } + } + } + + fn emit_cvttsd2si_64(&mut self, src: XMMOrMemory, dst: GPR) { + match src { + XMMOrMemory::XMM(x) => dynasm!(self ; cvttsd2si Rq(dst as u8), Rx(x as u8)), + XMMOrMemory::Memory(base, disp) => { + dynasm!(self ; cvttsd2si Rq(dst as u8), [Rq(base as u8) + disp]) + } + } + } + + fn emit_test_gpr_64(&mut self, reg: GPR) { + dynasm!(self ; test Rq(reg as u8), Rq(reg as u8)); + } + + fn emit_ud2(&mut self) { + dynasm!(self ; ud2); + } + fn emit_ret(&mut self) { + dynasm!(self ; ret); + } + + fn emit_call_label(&mut self, label: Self::Label) { + dynasm!(self ; call =>label); + } + fn emit_call_location(&mut self, loc: Location) { + match loc { + Location::GPR(x) => dynasm!(self ; call Rq(x as u8)), + Location::Memory(base, disp) => dynasm!(self ; call QWORD [Rq(base as u8) + disp]), + _ => panic!("singlepass can't emit CALL {:?}", loc), + } + } + + fn emit_bkpt(&mut self) { + dynasm!(self ; int 0x3); + } + + fn emit_host_redirection(&mut self, target: GPR) { + self.emit_jmp_location(Location::GPR(target)); + } + + fn emit_inline_breakpoint(&mut self, ty: InlineBreakpointType) { + dynasm!(self + ; ud2 + ; .byte 0x0f ; .byte 0xb9u8 as i8 // ud + ; int -1 + ; .byte ty as u8 as i8 + ); + } +} diff --git a/lib/compiler-singlepass/src/lib.rs b/lib/compiler-singlepass/src/lib.rs index 19da0d0f8..b4f4e3087 100644 --- a/lib/compiler-singlepass/src/lib.rs +++ b/lib/compiler-singlepass/src/lib.rs @@ -5,11 +5,20 @@ //! including Blockchains and Edge computing where quick compilation //! times are a must, and JIT bombs should never happen. //! -//! Compared to Cranelift and LLVM, Singlepass is much faster to compile. +//! Compared to Cranelift and LLVM, Singlepass compiles much faster but has worse +//! runtime performance. +//! //! > Note: Singlepass currently depends on Rust nightly features. +#![feature(proc_macro_hygiene)] + mod compiler; mod config; +//mod codegen_x64; +mod common_decl; +mod emitter_x64; +mod machine; +mod x64_decl; pub use crate::compiler::SinglepassCompiler; pub use crate::config::SinglepassConfig; diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs new file mode 100644 index 000000000..cd2ff719a --- /dev/null +++ b/lib/compiler-singlepass/src/machine.rs @@ -0,0 +1,513 @@ +use crate::common_decl::*; +use crate::emitter_x64::*; +use crate::x64_decl::{new_machine_state, X64Register}; +use smallvec::smallvec; +use smallvec::SmallVec; +use std::collections::HashSet; +use wasmparser::Type as WpType; + +struct MachineStackOffset(usize); + +pub struct Machine { + used_gprs: HashSet, + used_xmms: HashSet, + stack_offset: MachineStackOffset, + save_area_offset: Option, + pub state: MachineState, + pub(crate) track_state: bool, +} + +impl Machine { + pub fn new() -> Self { + Machine { + used_gprs: HashSet::new(), + used_xmms: HashSet::new(), + stack_offset: MachineStackOffset(0), + save_area_offset: None, + state: new_machine_state(), + track_state: true, + } + } + + pub fn get_stack_offset(&self) -> usize { + self.stack_offset.0 + } + + pub fn get_used_gprs(&self) -> Vec { + self.used_gprs.iter().cloned().collect() + } + + pub fn get_used_xmms(&self) -> Vec { + self.used_xmms.iter().cloned().collect() + } + + pub fn get_vmctx_reg() -> GPR { + GPR::R15 + } + + /// Picks an unused general purpose register for local/stack/argument use. + /// + /// This method does not mark the register as used. + pub fn pick_gpr(&self) -> Option { + use GPR::*; + static REGS: &'static [GPR] = &[RSI, RDI, R8, R9, R10, R11]; + for r in REGS { + if !self.used_gprs.contains(r) { + return Some(*r); + } + } + None + } + + /// Picks an unused general purpose register for internal temporary use. + /// + /// This method does not mark the register as used. + pub fn pick_temp_gpr(&self) -> Option { + use GPR::*; + static REGS: &'static [GPR] = &[RAX, RCX, RDX]; + for r in REGS { + if !self.used_gprs.contains(r) { + return Some(*r); + } + } + None + } + + /// Acquires a temporary GPR. + pub fn acquire_temp_gpr(&mut self) -> Option { + let gpr = self.pick_temp_gpr(); + if let Some(x) = gpr { + self.used_gprs.insert(x); + } + gpr + } + + /// Releases a temporary GPR. + pub fn release_temp_gpr(&mut self, gpr: GPR) { + assert!(self.used_gprs.remove(&gpr)); + } + + /// Specify that a given register is in use. + pub fn reserve_unused_temp_gpr(&mut self, gpr: GPR) -> GPR { + assert!(!self.used_gprs.contains(&gpr)); + self.used_gprs.insert(gpr); + gpr + } + + /// Picks an unused XMM register. + /// + /// This method does not mark the register as used. + pub fn pick_xmm(&self) -> Option { + use XMM::*; + static REGS: &'static [XMM] = &[XMM3, XMM4, XMM5, XMM6, XMM7]; + for r in REGS { + if !self.used_xmms.contains(r) { + return Some(*r); + } + } + None + } + + /// Picks an unused XMM register for internal temporary use. + /// + /// This method does not mark the register as used. + pub fn pick_temp_xmm(&self) -> Option { + use XMM::*; + static REGS: &'static [XMM] = &[XMM0, XMM1, XMM2]; + for r in REGS { + if !self.used_xmms.contains(r) { + return Some(*r); + } + } + None + } + + /// Acquires a temporary XMM register. + pub fn acquire_temp_xmm(&mut self) -> Option { + let xmm = self.pick_temp_xmm(); + if let Some(x) = xmm { + self.used_xmms.insert(x); + } + xmm + } + + /// Releases a temporary XMM register. + pub fn release_temp_xmm(&mut self, xmm: XMM) { + assert_eq!(self.used_xmms.remove(&xmm), true); + } + + /// Acquires locations from the machine state. + /// + /// If the returned locations are used for stack value, `release_location` needs to be called on them; + /// Otherwise, if the returned locations are used for locals, `release_location` does not need to be called on them. + pub fn acquire_locations( + &mut self, + assembler: &mut E, + tys: &[(WpType, MachineValue)], + zeroed: bool, + ) -> SmallVec<[Location; 1]> { + let mut ret = smallvec![]; + let mut delta_stack_offset: usize = 0; + + for (ty, mv) in tys { + let loc = match *ty { + WpType::F32 | WpType::F64 => self.pick_xmm().map(Location::XMM), + WpType::I32 | WpType::I64 => self.pick_gpr().map(Location::GPR), + _ => unreachable!(), + }; + + let loc = if let Some(x) = loc { + x + } else { + self.stack_offset.0 += 8; + delta_stack_offset += 8; + Location::Memory(GPR::RBP, -(self.stack_offset.0 as i32)) + }; + if let Location::GPR(x) = loc { + self.used_gprs.insert(x); + self.state.register_values[X64Register::GPR(x).to_index().0] = mv.clone(); + } else if let Location::XMM(x) = loc { + self.used_xmms.insert(x); + self.state.register_values[X64Register::XMM(x).to_index().0] = mv.clone(); + } else { + self.state.stack_values.push(mv.clone()); + } + self.state.wasm_stack.push(WasmAbstractValue::Runtime); + ret.push(loc); + } + + if delta_stack_offset != 0 { + assembler.emit_sub( + Size::S64, + Location::Imm32(delta_stack_offset as u32), + Location::GPR(GPR::RSP), + ); + } + if zeroed { + for i in 0..tys.len() { + assembler.emit_mov(Size::S64, Location::Imm32(0), ret[i]); + } + } + ret + } + + /// Releases locations used for stack value. + pub fn release_locations(&mut self, assembler: &mut E, locs: &[Location]) { + let mut delta_stack_offset: usize = 0; + + for loc in locs.iter().rev() { + match *loc { + Location::GPR(ref x) => { + assert_eq!(self.used_gprs.remove(x), true); + self.state.register_values[X64Register::GPR(*x).to_index().0] = + MachineValue::Undefined; + } + Location::XMM(ref x) => { + assert_eq!(self.used_xmms.remove(x), true); + self.state.register_values[X64Register::XMM(*x).to_index().0] = + MachineValue::Undefined; + } + Location::Memory(GPR::RBP, x) => { + if x >= 0 { + unreachable!(); + } + let offset = (-x) as usize; + if offset != self.stack_offset.0 { + unreachable!(); + } + self.stack_offset.0 -= 8; + delta_stack_offset += 8; + self.state.stack_values.pop().unwrap(); + } + _ => {} + } + self.state.wasm_stack.pop().unwrap(); + } + + if delta_stack_offset != 0 { + assembler.emit_add( + Size::S64, + Location::Imm32(delta_stack_offset as u32), + Location::GPR(GPR::RSP), + ); + } + } + + pub fn release_locations_only_regs(&mut self, locs: &[Location]) { + for loc in locs.iter().rev() { + match *loc { + Location::GPR(ref x) => { + assert_eq!(self.used_gprs.remove(x), true); + self.state.register_values[X64Register::GPR(*x).to_index().0] = + MachineValue::Undefined; + } + Location::XMM(ref x) => { + assert_eq!(self.used_xmms.remove(x), true); + self.state.register_values[X64Register::XMM(*x).to_index().0] = + MachineValue::Undefined; + } + _ => {} + } + // Wasm state popping is deferred to `release_locations_only_osr_state`. + } + } + + pub fn release_locations_only_stack( + &mut self, + assembler: &mut E, + locs: &[Location], + ) { + let mut delta_stack_offset: usize = 0; + + for loc in locs.iter().rev() { + match *loc { + Location::Memory(GPR::RBP, x) => { + if x >= 0 { + unreachable!(); + } + let offset = (-x) as usize; + if offset != self.stack_offset.0 { + unreachable!(); + } + self.stack_offset.0 -= 8; + delta_stack_offset += 8; + self.state.stack_values.pop().unwrap(); + } + _ => {} + } + // Wasm state popping is deferred to `release_locations_only_osr_state`. + } + + if delta_stack_offset != 0 { + assembler.emit_add( + Size::S64, + Location::Imm32(delta_stack_offset as u32), + Location::GPR(GPR::RSP), + ); + } + } + + pub fn release_locations_only_osr_state(&mut self, n: usize) { + for _ in 0..n { + self.state.wasm_stack.pop().unwrap(); + } + } + + pub fn release_locations_keep_state(&self, assembler: &mut E, locs: &[Location]) { + let mut delta_stack_offset: usize = 0; + let mut stack_offset = self.stack_offset.0; + + for loc in locs.iter().rev() { + match *loc { + Location::Memory(GPR::RBP, x) => { + if x >= 0 { + unreachable!(); + } + let offset = (-x) as usize; + if offset != stack_offset { + unreachable!(); + } + stack_offset -= 8; + delta_stack_offset += 8; + } + _ => {} + } + } + + if delta_stack_offset != 0 { + assembler.emit_add( + Size::S64, + Location::Imm32(delta_stack_offset as u32), + Location::GPR(GPR::RSP), + ); + } + } + + pub fn init_locals( + &mut self, + a: &mut E, + n: usize, + n_params: usize, + ) -> Vec { + // Use callee-saved registers for locals. + fn get_local_location(idx: usize) -> Location { + match idx { + 0 => Location::GPR(GPR::R12), + 1 => Location::GPR(GPR::R13), + 2 => Location::GPR(GPR::R14), + 3 => Location::GPR(GPR::RBX), + _ => Location::Memory(GPR::RBP, -(((idx - 3) * 8) as i32)), + } + } + + let mut locations: Vec = vec![]; + let mut allocated: usize = 0; + + // Determine locations for parameters. + for i in 0..n_params { + let loc = Self::get_param_location(i + 1); + locations.push(match loc { + Location::GPR(_) => { + let old_idx = allocated; + allocated += 1; + get_local_location(old_idx) + } + Location::Memory(_, _) => { + let old_idx = allocated; + allocated += 1; + get_local_location(old_idx) + } + _ => unreachable!(), + }); + } + + // Determine locations for normal locals. + for _ in n_params..n { + locations.push(get_local_location(allocated)); + allocated += 1; + } + + for (i, loc) in locations.iter().enumerate() { + match *loc { + Location::GPR(x) => { + self.state.register_values[X64Register::GPR(x).to_index().0] = + MachineValue::WasmLocal(i); + } + Location::Memory(_, _) => { + self.state.stack_values.push(MachineValue::WasmLocal(i)); + } + _ => unreachable!(), + } + } + + // How many machine stack slots did all the locals use? + let num_mem_slots = locations + .iter() + .filter(|&&loc| match loc { + Location::Memory(_, _) => true, + _ => false, + }) + .count(); + + // Move RSP down to reserve space for machine stack slots. + if num_mem_slots > 0 { + a.emit_sub( + Size::S64, + Location::Imm32((num_mem_slots * 8) as u32), + Location::GPR(GPR::RSP), + ); + self.stack_offset.0 += num_mem_slots * 8; + } + + // Save callee-saved registers. + for loc in locations.iter() { + if let Location::GPR(x) = *loc { + a.emit_push(Size::S64, *loc); + self.stack_offset.0 += 8; + self.state.stack_values.push(MachineValue::PreserveRegister( + X64Register::GPR(x).to_index(), + )); + } + } + + // Save R15 for vmctx use. + a.emit_push(Size::S64, Location::GPR(GPR::R15)); + self.stack_offset.0 += 8; + self.state.stack_values.push(MachineValue::PreserveRegister( + X64Register::GPR(GPR::R15).to_index(), + )); + + // Save the offset of static area. + self.save_area_offset = Some(MachineStackOffset(self.stack_offset.0)); + + // Load in-register parameters into the allocated locations. + for i in 0..n_params { + let loc = Self::get_param_location(i + 1); + match loc { + Location::GPR(_) => { + a.emit_mov(Size::S64, loc, locations[i]); + } + Location::Memory(_, _) => match locations[i] { + Location::GPR(_) => { + a.emit_mov(Size::S64, loc, locations[i]); + } + Location::Memory(_, _) => { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + a.emit_mov(Size::S64, Location::GPR(GPR::RAX), locations[i]); + } + _ => unreachable!(), + }, + _ => unreachable!(), + } + } + + // Load vmctx. + a.emit_mov( + Size::S64, + Self::get_param_location(0), + Location::GPR(GPR::R15), + ); + + // Initialize all normal locals to zero. + for i in n_params..n { + a.emit_mov(Size::S64, Location::Imm32(0), locations[i]); + } + + locations + } + + pub fn finalize_locals(&mut self, a: &mut E, locations: &[Location]) { + // Unwind stack to the "save area". + a.emit_lea( + Size::S64, + Location::Memory( + GPR::RBP, + -(self.save_area_offset.as_ref().unwrap().0 as i32), + ), + Location::GPR(GPR::RSP), + ); + + // Restore R15 used by vmctx. + a.emit_pop(Size::S64, Location::GPR(GPR::R15)); + + // Restore callee-saved registers. + for loc in locations.iter().rev() { + if let Location::GPR(_) = *loc { + a.emit_pop(Size::S64, *loc); + } + } + } + + pub fn get_param_location(idx: usize) -> Location { + match idx { + 0 => Location::GPR(GPR::RDI), + 1 => Location::GPR(GPR::RSI), + 2 => Location::GPR(GPR::RDX), + 3 => Location::GPR(GPR::RCX), + 4 => Location::GPR(GPR::R8), + 5 => Location::GPR(GPR::R9), + _ => Location::Memory(GPR::RBP, (16 + (idx - 6) * 8) as i32), + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use dynasmrt::x64::Assembler; + + #[test] + fn test_release_locations_keep_state_nopanic() { + let mut machine = Machine::new(); + let mut assembler = Assembler::new().unwrap(); + let locs = machine.acquire_locations( + &mut assembler, + &(0..10) + .map(|_| (WpType::I32, MachineValue::Undefined)) + .collect::>(), + false, + ); + + machine.release_locations_keep_state(&mut assembler, &locs); + } +} diff --git a/lib/compiler-singlepass/src/x64_decl.rs b/lib/compiler-singlepass/src/x64_decl.rs new file mode 100644 index 000000000..1181500e0 --- /dev/null +++ b/lib/compiler-singlepass/src/x64_decl.rs @@ -0,0 +1,222 @@ +//! X64 structures. + +use crate::common_decl::{MachineState, MachineValue, RegisterIndex}; +use std::collections::BTreeMap; +use wasm_common::Type; + +/// General-purpose registers. +#[repr(u8)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum GPR { + /// RAX register + RAX, + /// RCX register + RCX, + /// RDX register + RDX, + /// RBX register + RBX, + /// RSP register + RSP, + /// RBP register + RBP, + /// RSI register + RSI, + /// RDI register + RDI, + /// R8 register + R8, + /// R9 register + R9, + /// R10 register + R10, + /// R11 register + R11, + /// R12 register + R12, + /// R13 register + R13, + /// R14 register + R14, + /// R15 register + R15, +} + +/// XMM registers. +#[repr(u8)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum XMM { + /// XMM register 0 + XMM0, + /// XMM register 1 + XMM1, + /// XMM register 2 + XMM2, + /// XMM register 3 + XMM3, + /// XMM register 4 + XMM4, + /// XMM register 5 + XMM5, + /// XMM register 6 + XMM6, + /// XMM register 7 + XMM7, + /// XMM register 8 + XMM8, + /// XMM register 9 + XMM9, + /// XMM register 10 + XMM10, + /// XMM register 11 + XMM11, + /// XMM register 12 + XMM12, + /// XMM register 13 + XMM13, + /// XMM register 14 + XMM14, + /// XMM register 15 + XMM15, +} + +/// A machine register under the x86-64 architecture. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum X64Register { + /// General-purpose registers. + GPR(GPR), + /// XMM (floating point/SIMD) registers. + XMM(XMM), +} + +impl X64Register { + /// Returns the index of the register. + pub fn to_index(&self) -> RegisterIndex { + match *self { + X64Register::GPR(x) => RegisterIndex(x as usize), + X64Register::XMM(x) => RegisterIndex(x as usize + 16), + } + } + + /// Converts a DWARD regnum to X64Register. + pub fn from_dwarf_regnum(x: u16) -> Option { + Some(match x { + 0 => X64Register::GPR(GPR::RAX), + 1 => X64Register::GPR(GPR::RDX), + 2 => X64Register::GPR(GPR::RCX), + 3 => X64Register::GPR(GPR::RBX), + 4 => X64Register::GPR(GPR::RSI), + 5 => X64Register::GPR(GPR::RDI), + 6 => X64Register::GPR(GPR::RBP), + 7 => X64Register::GPR(GPR::RSP), + 8 => X64Register::GPR(GPR::R8), + 9 => X64Register::GPR(GPR::R9), + 10 => X64Register::GPR(GPR::R10), + 11 => X64Register::GPR(GPR::R11), + 12 => X64Register::GPR(GPR::R12), + 13 => X64Register::GPR(GPR::R13), + 14 => X64Register::GPR(GPR::R14), + 15 => X64Register::GPR(GPR::R15), + + 17 => X64Register::XMM(XMM::XMM0), + 18 => X64Register::XMM(XMM::XMM1), + 19 => X64Register::XMM(XMM::XMM2), + 20 => X64Register::XMM(XMM::XMM3), + 21 => X64Register::XMM(XMM::XMM4), + 22 => X64Register::XMM(XMM::XMM5), + 23 => X64Register::XMM(XMM::XMM6), + 24 => X64Register::XMM(XMM::XMM7), + _ => return None, + }) + } + + /// Returns the instruction prefix for `movq %this_reg, ?(%rsp)`. + /// + /// To build an instruction, append the memory location as a 32-bit + /// offset to the stack pointer to this prefix. + pub fn prefix_mov_to_stack(&self) -> Option<&'static [u8]> { + Some(match *self { + X64Register::GPR(gpr) => match gpr { + GPR::RDI => &[0x48, 0x89, 0xbc, 0x24], + GPR::RSI => &[0x48, 0x89, 0xb4, 0x24], + GPR::RDX => &[0x48, 0x89, 0x94, 0x24], + GPR::RCX => &[0x48, 0x89, 0x8c, 0x24], + GPR::R8 => &[0x4c, 0x89, 0x84, 0x24], + GPR::R9 => &[0x4c, 0x89, 0x8c, 0x24], + _ => return None, + }, + X64Register::XMM(xmm) => match xmm { + XMM::XMM0 => &[0x66, 0x0f, 0xd6, 0x84, 0x24], + XMM::XMM1 => &[0x66, 0x0f, 0xd6, 0x8c, 0x24], + XMM::XMM2 => &[0x66, 0x0f, 0xd6, 0x94, 0x24], + XMM::XMM3 => &[0x66, 0x0f, 0xd6, 0x9c, 0x24], + XMM::XMM4 => &[0x66, 0x0f, 0xd6, 0xa4, 0x24], + XMM::XMM5 => &[0x66, 0x0f, 0xd6, 0xac, 0x24], + XMM::XMM6 => &[0x66, 0x0f, 0xd6, 0xb4, 0x24], + XMM::XMM7 => &[0x66, 0x0f, 0xd6, 0xbc, 0x24], + _ => return None, + }, + }) + } +} + +/// An allocator that allocates registers for function arguments according to the System V ABI. +#[derive(Default)] +pub struct ArgumentRegisterAllocator { + n_gprs: usize, + n_xmms: usize, +} + +impl ArgumentRegisterAllocator { + /// Allocates a register for argument type `ty`. Returns `None` if no register is available for this type. + pub fn next(&mut self, ty: Type) -> Option { + static GPR_SEQ: &'static [GPR] = + &[GPR::RDI, GPR::RSI, GPR::RDX, GPR::RCX, GPR::R8, GPR::R9]; + static XMM_SEQ: &'static [XMM] = &[ + XMM::XMM0, + XMM::XMM1, + XMM::XMM2, + XMM::XMM3, + XMM::XMM4, + XMM::XMM5, + XMM::XMM6, + XMM::XMM7, + ]; + match ty { + Type::I32 | Type::I64 => { + if self.n_gprs < GPR_SEQ.len() { + let gpr = GPR_SEQ[self.n_gprs]; + self.n_gprs += 1; + Some(X64Register::GPR(gpr)) + } else { + None + } + } + Type::F32 | Type::F64 => { + if self.n_xmms < XMM_SEQ.len() { + let xmm = XMM_SEQ[self.n_xmms]; + self.n_xmms += 1; + Some(X64Register::XMM(xmm)) + } else { + None + } + } + _ => todo!( + "ArgumentRegisterAllocator::next: Unsupported type: {:?}", + ty + ), + } + } +} + +/// Create a new `MachineState` with default values. +pub fn new_machine_state() -> MachineState { + MachineState { + stack_values: vec![], + register_values: vec![MachineValue::Undefined; 16 + 8], + prev_frame: BTreeMap::new(), + wasm_stack: vec![], + wasm_stack_private_depth: 0, + wasm_inst_offset: std::usize::MAX, + } +}