diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 4c7b4ed98..f172ea482 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -170,6 +170,7 @@ pub trait EmitterARM64 { fn emit_udf(&mut self); fn emit_dmb(&mut self); + fn emit_brk(&mut self); fn emit_fcmp(&mut self, sz: Size, src1: Location, src2: Location); fn emit_fneg(&mut self, sz: Size, src: Location, dst: Location); @@ -1839,6 +1840,9 @@ impl EmitterARM64 for Assembler { fn emit_dmb(&mut self) { dynasm!(self ; dmb ish); } + fn emit_brk(&mut self) { + dynasm!(self ; brk 0); + } fn emit_fcmp(&mut self, sz: Size, src1: Location, src2: Location) { match (sz, src1, src2) { @@ -2091,11 +2095,10 @@ pub fn gen_std_trampoline_arm64( ) -> FunctionBody { let mut a = Assembler::new(0); - let fptr = GPR::X26; - let args = GPR::X25; + let fptr = GPR::X27; + let args = GPR::X28; dynasm!(a - ; .arch aarch64 ; sub sp, sp, 32 ; stp x29, x30, [sp] ; stp X(fptr as u32), X(args as u32), [sp, 16] @@ -2111,7 +2114,7 @@ pub fn gen_std_trampoline_arm64( stack_offset += 8; assert!(stack_offset % 16 == 0); } - dynasm!(a ; .arch aarch64 ; sub sp, sp, stack_offset); + dynasm!(a ; sub sp, sp, stack_offset); } // Move arguments to their locations. @@ -2151,7 +2154,7 @@ pub fn gen_std_trampoline_arm64( } } - dynasm!(a ; .arch aarch64 ; blr X(fptr as u32)); + dynasm!(a ; blr X(fptr as u32)); // Write return value. if !sig.results().is_empty() { @@ -2160,7 +2163,6 @@ pub fn gen_std_trampoline_arm64( // Restore stack. dynasm!(a - ; .arch aarch64 ; ldp X(fptr as u32), X(args as u32), [x29, 16] ; ldp x29, x30, [x29] ; add sp, sp, 32 + stack_offset as u32 @@ -2180,7 +2182,7 @@ pub fn gen_std_dynamic_import_trampoline_arm64( ) -> FunctionBody { let mut a = Assembler::new(0); // Allocate argument array. - let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()) + 16; + let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()); // Save LR and X20, as scratch register a.emit_stpdb( Size::S64, @@ -2190,21 +2192,23 @@ pub fn gen_std_dynamic_import_trampoline_arm64( 16, ); - if stack_offset < 0x1000 + 16 { - a.emit_sub( - Size::S64, - Location::GPR(GPR::XzrSp), - Location::Imm32((stack_offset - 16) as _), - Location::GPR(GPR::XzrSp), - ); - } else { - a.emit_mov_imm(Location::GPR(GPR::X20), (stack_offset - 16) as u64); - a.emit_sub( - Size::S64, - Location::GPR(GPR::XzrSp), - Location::GPR(GPR::X20), - Location::GPR(GPR::XzrSp), - ); + if stack_offset != 0 { + if stack_offset < 0x1000 { + a.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm32(stack_offset as _), + Location::GPR(GPR::XzrSp), + ); + } else { + a.emit_mov_imm(Location::GPR(GPR::X20), stack_offset as u64); + a.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::GPR(GPR::X20), + Location::GPR(GPR::XzrSp), + ); + } } // Copy arguments. @@ -2222,7 +2226,10 @@ pub fn gen_std_dynamic_import_trampoline_arm64( a.emit_ldr( Size::S64, Location::GPR(GPR::X20), - Location::Memory(GPR::XzrSp, (stack_offset + stack_param_count * 8) as _), + Location::Memory( + GPR::XzrSp, + (stack_offset + 16 + stack_param_count * 8) as _, + ), ); stack_param_count += 1; Location::GPR(GPR::X20) @@ -2246,14 +2253,8 @@ pub fn gen_std_dynamic_import_trampoline_arm64( match calling_convention { _ => { // Load target address. - a.emit_ldr( - Size::S64, - Location::GPR(GPR::X20), - Location::Memory( - GPR::X0, - vmoffsets.vmdynamicfunction_import_context_address() as i32, - ), - ); + let offset = vmoffsets.vmdynamicfunction_import_context_address(); + a.emit_ldur(Size::S64, Location::GPR(GPR::X20), GPR::X0, offset as i32); // Load values array. a.emit_add( Size::S64, @@ -2278,21 +2279,23 @@ pub fn gen_std_dynamic_import_trampoline_arm64( } // Release values array. - if stack_offset < 0x1000 + 16 { - a.emit_add( - Size::S64, - Location::GPR(GPR::XzrSp), - Location::Imm32((stack_offset - 16) as _), - Location::GPR(GPR::XzrSp), - ); - } else { - a.emit_mov_imm(Location::GPR(GPR::X20), (stack_offset - 16) as u64); - a.emit_add( - Size::S64, - Location::GPR(GPR::XzrSp), - Location::GPR(GPR::X20), - Location::GPR(GPR::XzrSp), - ); + if stack_offset != 0 { + if stack_offset < 0x1000 { + a.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm32(stack_offset as _), + Location::GPR(GPR::XzrSp), + ); + } else { + a.emit_mov_imm(Location::GPR(GPR::X20), stack_offset as u64); + a.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::GPR(GPR::X20), + Location::GPR(GPR::XzrSp), + ); + } } a.emit_ldpia( Size::S64, @@ -2437,32 +2440,46 @@ pub fn gen_import_call_trampoline_arm64( let offset = vmoffsets.vmctx_vmfunction_import(index); // for ldr, offset needs to be a multiple of 8, wich often is not // so use ldur, but then offset is limited to -255 .. +255. It will be positive here - let offset = if offset > 0 && offset < 0x1000 { - offset - } else { - a.emit_mov_imm(Location::GPR(GPR::X16), offset as u64); - a.emit_add( - Size::S64, - Location::GPR(GPR::X0), - Location::GPR(GPR::X16), - Location::GPR(GPR::X0), - ); - 0 - }; - match calling_convention { - _ => { - a.emit_ldur( - Size::S64, - Location::GPR(GPR::X16), - GPR::X0, - offset as i32, // function pointer - ); - a.emit_ldur( + let offset = + if (offset > 0 && offset < 0xF8) || (offset > 0 && offset < 0x7FF8 && (offset & 7) == 0) { + offset + } else { + a.emit_mov_imm(Location::GPR(GPR::X16), offset as u64); + a.emit_add( Size::S64, Location::GPR(GPR::X0), - GPR::X0, - offset as i32 + 8, // target vmctx + Location::GPR(GPR::X16), + Location::GPR(GPR::X0), ); + 0 + }; + match calling_convention { + _ => { + if (offset & 7) == 0 { + a.emit_ldr( + Size::S64, + Location::GPR(GPR::X16), + Location::Memory(GPR::X0, offset as i32), // function pointer + ); + a.emit_ldr( + Size::S64, + Location::GPR(GPR::X0), + Location::Memory(GPR::X0, offset as i32 + 8), // target vmctx + ); + } else { + a.emit_ldur( + Size::S64, + Location::GPR(GPR::X16), + GPR::X0, + offset as i32, // function pointer + ); + a.emit_ldur( + Size::S64, + Location::GPR(GPR::X0), + GPR::X0, + offset as i32 + 8, // target vmctx + ); + } } } a.emit_b_register(GPR::X16); diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index ac38693b0..e61e03e41 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -132,10 +132,6 @@ pub trait Machine { /// restore stack /// Like assembler.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) fn restore_stack(&mut self, delta_stack_offset: u32); - /// push callee saved register to the stack - fn push_callee_saved(&mut self); - /// pop callee saved register from the stack - fn pop_callee_saved(&mut self); /// Pop stack of locals /// Like assembler.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) fn pop_stack_locals(&mut self, delta_stack_offset: u32); @@ -262,6 +258,10 @@ pub trait Machine { fn get_gpr_for_ret(&self) -> Self::GPR; /// get the simd for the return of float/double values fn get_simd_for_ret(&self) -> Self::SIMD; + + /// Emit a debug breakpoint + fn emit_debug_breakpoint(&mut self); + /// load the address of a memory location (will panic if src is not a memory) /// like LEA opcode on x86_64 fn location_address( diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index a51c18c75..f23f46e0d 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -1039,7 +1039,7 @@ impl Machine for MachineARM64 { // Picks an unused general purpose register for internal temporary use. fn pick_temp_gpr(&self) -> Option { use GPR::*; - static REGS: &[GPR] = &[X1, X2, X3, X4, X5, X5, X7, X8]; + static REGS: &[GPR] = &[X8, X7, X6, X5, X4, X3, X2, X1]; for r in REGS { if !self.used_gprs.contains(r) { return Some(*r); @@ -1288,15 +1288,13 @@ impl Machine for MachineARM64 { Location::GPR(GPR::XzrSp), ); } - fn push_callee_saved(&mut self) {} - fn pop_callee_saved(&mut self) {} fn pop_stack_locals(&mut self, delta_stack_offset: u32) { let real_delta = if delta_stack_offset & 15 != 0 { delta_stack_offset + 8 } else { delta_stack_offset }; - let delta = if real_delta < 256 { + let delta = if self.compatible_imm(real_delta as i64, ImmType::Bits12) { Location::Imm8(real_delta as u8) } else { let tmp = self.pick_temp_gpr().unwrap(); @@ -1312,9 +1310,7 @@ impl Machine for MachineARM64 { ); } // push a value on the stack for a native call - fn push_location_for_native(&mut self, _loc: Location) { - unimplemented!(); - /* + fn push_location_for_native(&mut self, loc: Location) { match loc { Location::Imm64(_) => { self.reserve_unused_temp_gpr(GPR::X8); @@ -1324,7 +1320,6 @@ impl Machine for MachineARM64 { } _ => self.emit_push(Size::S64, loc), } - */ } // Zero a location that is 32bits @@ -1349,14 +1344,14 @@ impl Machine for MachineARM64 { fn get_local_location(&self, idx: usize, callee_saved_regs_size: usize) -> Location { // Use callee-saved registers for the first locals. match idx { - 0 => Location::GPR(GPR::X18), - 1 => Location::GPR(GPR::X19), - 2 => Location::GPR(GPR::X20), - 3 => Location::GPR(GPR::X21), - 4 => Location::GPR(GPR::X22), - 5 => Location::GPR(GPR::X23), - 6 => Location::GPR(GPR::X24), - 7 => Location::GPR(GPR::X25), + 0 => Location::GPR(GPR::X19), + 1 => Location::GPR(GPR::X20), + 2 => Location::GPR(GPR::X21), + 3 => Location::GPR(GPR::X22), + 4 => Location::GPR(GPR::X23), + 5 => Location::GPR(GPR::X24), + 6 => Location::GPR(GPR::X25), + 7 => Location::GPR(GPR::X26), _ => Location::Memory(GPR::X29, -(((idx - 3) * 8 + callee_saved_regs_size) as i32)), } } @@ -1649,7 +1644,7 @@ impl Machine for MachineARM64 { fn emit_function_prolog(&mut self) { self.emit_double_push(Size::S64, Location::GPR(GPR::X29), Location::GPR(GPR::X30)); // save LR too - self.emit_double_push(Size::S64, Location::GPR(GPR::X26), Location::GPR(GPR::X8)); + self.emit_double_push(Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X28)); // cannot use mov, because XSP is XZR there. Need to use ADD with #0 self.assembler.emit_add( Size::S64, @@ -1668,7 +1663,7 @@ impl Machine for MachineARM64 { Location::GPR(GPR::XzrSp), ); self.pushed = false; // SP is restored, concider it aligned - self.emit_double_pop(Size::S64, Location::GPR(GPR::X26), Location::GPR(GPR::X8)); + self.emit_double_pop(Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X28)); self.emit_double_pop(Size::S64, Location::GPR(GPR::X29), Location::GPR(GPR::X30)); } @@ -1709,7 +1704,7 @@ impl Machine for MachineARM64 { self.assembler.emit_label(label); } fn get_grp_for_call(&self) -> GPR { - GPR::X26 + GPR::X27 } fn emit_call_register(&mut self, reg: GPR) { self.assembler.emit_call_register(reg); @@ -1733,6 +1728,10 @@ impl Machine for MachineARM64 { .arch_emit_indirect_call_with_trampoline(location); } + fn emit_debug_breakpoint(&mut self) { + self.assembler.emit_brk(); + } + fn emit_call_location(&mut self, location: Location) { let mut temps = vec![]; let loc = self.location_to_reg( @@ -1741,7 +1740,7 @@ impl Machine for MachineARM64 { &mut temps, ImmType::None, true, - Some(GPR::X26), + Some(GPR::X27), ); match loc { Location::GPR(reg) => self.assembler.emit_call_register(reg), @@ -2870,7 +2869,7 @@ impl Machine for MachineARM64 { offset: reloc_at as u32, addend: 0, }); - self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 0); + self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 0); let reloc_at = self.assembler.get_offset().0; relocations.push(Relocation { kind: RelocationKind::Arm64Movw1, @@ -2878,7 +2877,7 @@ impl Machine for MachineARM64 { offset: reloc_at as u32, addend: 0, }); - self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 16); + self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 16); let reloc_at = self.assembler.get_offset().0; relocations.push(Relocation { kind: RelocationKind::Arm64Movw2, @@ -2886,7 +2885,7 @@ impl Machine for MachineARM64 { offset: reloc_at as u32, addend: 0, }); - self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 32); + self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 32); let reloc_at = self.assembler.get_offset().0; relocations.push(Relocation { kind: RelocationKind::Arm64Movw3, @@ -2894,7 +2893,7 @@ impl Machine for MachineARM64 { offset: reloc_at as u32, addend: 0, }); - self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 48); + self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 48); } fn emit_binop_add64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index 22ab07070..627fb1b67 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -1798,11 +1798,6 @@ impl Machine for MachineX86_64 { Location::GPR(GPR::RSP), ); } - fn push_callee_saved(&mut self) {} - fn pop_callee_saved(&mut self) { - self.assembler.emit_pop(Size::S64, Location::GPR(GPR::R14)); - self.assembler.emit_pop(Size::S64, Location::GPR(GPR::R15)); - } fn pop_stack_locals(&mut self, delta_stack_offset: u32) { self.assembler.emit_add( Size::S64, @@ -2148,6 +2143,10 @@ impl Machine for MachineX86_64 { .arch_emit_indirect_call_with_trampoline(location); } + fn emit_debug_breakpoint(&mut self) { + self.assembler.emit_bkpt(); + } + fn emit_call_location(&mut self, location: Location) { self.assembler.emit_call_location(location); }