improv(compiler) More native call work and fixes (166 tests passes now)

This commit is contained in:
ptitSeb
2022-01-03 18:39:27 +01:00
parent 0bbc81a5b8
commit 4f938643d1
4 changed files with 117 additions and 102 deletions

View File

@ -170,6 +170,7 @@ pub trait EmitterARM64 {
fn emit_udf(&mut self);
fn emit_dmb(&mut self);
fn emit_brk(&mut self);
fn emit_fcmp(&mut self, sz: Size, src1: Location, src2: Location);
fn emit_fneg(&mut self, sz: Size, src: Location, dst: Location);
@ -1839,6 +1840,9 @@ impl EmitterARM64 for Assembler {
fn emit_dmb(&mut self) {
dynasm!(self ; dmb ish);
}
fn emit_brk(&mut self) {
dynasm!(self ; brk 0);
}
fn emit_fcmp(&mut self, sz: Size, src1: Location, src2: Location) {
match (sz, src1, src2) {
@ -2091,11 +2095,10 @@ pub fn gen_std_trampoline_arm64(
) -> FunctionBody {
let mut a = Assembler::new(0);
let fptr = GPR::X26;
let args = GPR::X25;
let fptr = GPR::X27;
let args = GPR::X28;
dynasm!(a
; .arch aarch64
; sub sp, sp, 32
; stp x29, x30, [sp]
; stp X(fptr as u32), X(args as u32), [sp, 16]
@ -2111,7 +2114,7 @@ pub fn gen_std_trampoline_arm64(
stack_offset += 8;
assert!(stack_offset % 16 == 0);
}
dynasm!(a ; .arch aarch64 ; sub sp, sp, stack_offset);
dynasm!(a ; sub sp, sp, stack_offset);
}
// Move arguments to their locations.
@ -2151,7 +2154,7 @@ pub fn gen_std_trampoline_arm64(
}
}
dynasm!(a ; .arch aarch64 ; blr X(fptr as u32));
dynasm!(a ; blr X(fptr as u32));
// Write return value.
if !sig.results().is_empty() {
@ -2160,7 +2163,6 @@ pub fn gen_std_trampoline_arm64(
// Restore stack.
dynasm!(a
; .arch aarch64
; ldp X(fptr as u32), X(args as u32), [x29, 16]
; ldp x29, x30, [x29]
; add sp, sp, 32 + stack_offset as u32
@ -2180,7 +2182,7 @@ pub fn gen_std_dynamic_import_trampoline_arm64(
) -> FunctionBody {
let mut a = Assembler::new(0);
// Allocate argument array.
let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()) + 16;
let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len());
// Save LR and X20, as scratch register
a.emit_stpdb(
Size::S64,
@ -2190,21 +2192,23 @@ pub fn gen_std_dynamic_import_trampoline_arm64(
16,
);
if stack_offset < 0x1000 + 16 {
a.emit_sub(
Size::S64,
Location::GPR(GPR::XzrSp),
Location::Imm32((stack_offset - 16) as _),
Location::GPR(GPR::XzrSp),
);
} else {
a.emit_mov_imm(Location::GPR(GPR::X20), (stack_offset - 16) as u64);
a.emit_sub(
Size::S64,
Location::GPR(GPR::XzrSp),
Location::GPR(GPR::X20),
Location::GPR(GPR::XzrSp),
);
if stack_offset != 0 {
if stack_offset < 0x1000 {
a.emit_sub(
Size::S64,
Location::GPR(GPR::XzrSp),
Location::Imm32(stack_offset as _),
Location::GPR(GPR::XzrSp),
);
} else {
a.emit_mov_imm(Location::GPR(GPR::X20), stack_offset as u64);
a.emit_sub(
Size::S64,
Location::GPR(GPR::XzrSp),
Location::GPR(GPR::X20),
Location::GPR(GPR::XzrSp),
);
}
}
// Copy arguments.
@ -2222,7 +2226,10 @@ pub fn gen_std_dynamic_import_trampoline_arm64(
a.emit_ldr(
Size::S64,
Location::GPR(GPR::X20),
Location::Memory(GPR::XzrSp, (stack_offset + stack_param_count * 8) as _),
Location::Memory(
GPR::XzrSp,
(stack_offset + 16 + stack_param_count * 8) as _,
),
);
stack_param_count += 1;
Location::GPR(GPR::X20)
@ -2246,14 +2253,8 @@ pub fn gen_std_dynamic_import_trampoline_arm64(
match calling_convention {
_ => {
// Load target address.
a.emit_ldr(
Size::S64,
Location::GPR(GPR::X20),
Location::Memory(
GPR::X0,
vmoffsets.vmdynamicfunction_import_context_address() as i32,
),
);
let offset = vmoffsets.vmdynamicfunction_import_context_address();
a.emit_ldur(Size::S64, Location::GPR(GPR::X20), GPR::X0, offset as i32);
// Load values array.
a.emit_add(
Size::S64,
@ -2278,21 +2279,23 @@ pub fn gen_std_dynamic_import_trampoline_arm64(
}
// Release values array.
if stack_offset < 0x1000 + 16 {
a.emit_add(
Size::S64,
Location::GPR(GPR::XzrSp),
Location::Imm32((stack_offset - 16) as _),
Location::GPR(GPR::XzrSp),
);
} else {
a.emit_mov_imm(Location::GPR(GPR::X20), (stack_offset - 16) as u64);
a.emit_add(
Size::S64,
Location::GPR(GPR::XzrSp),
Location::GPR(GPR::X20),
Location::GPR(GPR::XzrSp),
);
if stack_offset != 0 {
if stack_offset < 0x1000 {
a.emit_add(
Size::S64,
Location::GPR(GPR::XzrSp),
Location::Imm32(stack_offset as _),
Location::GPR(GPR::XzrSp),
);
} else {
a.emit_mov_imm(Location::GPR(GPR::X20), stack_offset as u64);
a.emit_add(
Size::S64,
Location::GPR(GPR::XzrSp),
Location::GPR(GPR::X20),
Location::GPR(GPR::XzrSp),
);
}
}
a.emit_ldpia(
Size::S64,
@ -2437,32 +2440,46 @@ pub fn gen_import_call_trampoline_arm64(
let offset = vmoffsets.vmctx_vmfunction_import(index);
// for ldr, offset needs to be a multiple of 8, wich often is not
// so use ldur, but then offset is limited to -255 .. +255. It will be positive here
let offset = if offset > 0 && offset < 0x1000 {
offset
} else {
a.emit_mov_imm(Location::GPR(GPR::X16), offset as u64);
a.emit_add(
Size::S64,
Location::GPR(GPR::X0),
Location::GPR(GPR::X16),
Location::GPR(GPR::X0),
);
0
};
match calling_convention {
_ => {
a.emit_ldur(
Size::S64,
Location::GPR(GPR::X16),
GPR::X0,
offset as i32, // function pointer
);
a.emit_ldur(
let offset =
if (offset > 0 && offset < 0xF8) || (offset > 0 && offset < 0x7FF8 && (offset & 7) == 0) {
offset
} else {
a.emit_mov_imm(Location::GPR(GPR::X16), offset as u64);
a.emit_add(
Size::S64,
Location::GPR(GPR::X0),
GPR::X0,
offset as i32 + 8, // target vmctx
Location::GPR(GPR::X16),
Location::GPR(GPR::X0),
);
0
};
match calling_convention {
_ => {
if (offset & 7) == 0 {
a.emit_ldr(
Size::S64,
Location::GPR(GPR::X16),
Location::Memory(GPR::X0, offset as i32), // function pointer
);
a.emit_ldr(
Size::S64,
Location::GPR(GPR::X0),
Location::Memory(GPR::X0, offset as i32 + 8), // target vmctx
);
} else {
a.emit_ldur(
Size::S64,
Location::GPR(GPR::X16),
GPR::X0,
offset as i32, // function pointer
);
a.emit_ldur(
Size::S64,
Location::GPR(GPR::X0),
GPR::X0,
offset as i32 + 8, // target vmctx
);
}
}
}
a.emit_b_register(GPR::X16);

View File

@ -132,10 +132,6 @@ pub trait Machine {
/// restore stack
/// Like assembler.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP))
fn restore_stack(&mut self, delta_stack_offset: u32);
/// push callee saved register to the stack
fn push_callee_saved(&mut self);
/// pop callee saved register from the stack
fn pop_callee_saved(&mut self);
/// Pop stack of locals
/// Like assembler.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP))
fn pop_stack_locals(&mut self, delta_stack_offset: u32);
@ -262,6 +258,10 @@ pub trait Machine {
fn get_gpr_for_ret(&self) -> Self::GPR;
/// get the simd for the return of float/double values
fn get_simd_for_ret(&self) -> Self::SIMD;
/// Emit a debug breakpoint
fn emit_debug_breakpoint(&mut self);
/// load the address of a memory location (will panic if src is not a memory)
/// like LEA opcode on x86_64
fn location_address(

View File

@ -1039,7 +1039,7 @@ impl Machine for MachineARM64 {
// Picks an unused general purpose register for internal temporary use.
fn pick_temp_gpr(&self) -> Option<GPR> {
use GPR::*;
static REGS: &[GPR] = &[X1, X2, X3, X4, X5, X5, X7, X8];
static REGS: &[GPR] = &[X8, X7, X6, X5, X4, X3, X2, X1];
for r in REGS {
if !self.used_gprs.contains(r) {
return Some(*r);
@ -1288,15 +1288,13 @@ impl Machine for MachineARM64 {
Location::GPR(GPR::XzrSp),
);
}
fn push_callee_saved(&mut self) {}
fn pop_callee_saved(&mut self) {}
fn pop_stack_locals(&mut self, delta_stack_offset: u32) {
let real_delta = if delta_stack_offset & 15 != 0 {
delta_stack_offset + 8
} else {
delta_stack_offset
};
let delta = if real_delta < 256 {
let delta = if self.compatible_imm(real_delta as i64, ImmType::Bits12) {
Location::Imm8(real_delta as u8)
} else {
let tmp = self.pick_temp_gpr().unwrap();
@ -1312,9 +1310,7 @@ impl Machine for MachineARM64 {
);
}
// push a value on the stack for a native call
fn push_location_for_native(&mut self, _loc: Location) {
unimplemented!();
/*
fn push_location_for_native(&mut self, loc: Location) {
match loc {
Location::Imm64(_) => {
self.reserve_unused_temp_gpr(GPR::X8);
@ -1324,7 +1320,6 @@ impl Machine for MachineARM64 {
}
_ => self.emit_push(Size::S64, loc),
}
*/
}
// Zero a location that is 32bits
@ -1349,14 +1344,14 @@ impl Machine for MachineARM64 {
fn get_local_location(&self, idx: usize, callee_saved_regs_size: usize) -> Location {
// Use callee-saved registers for the first locals.
match idx {
0 => Location::GPR(GPR::X18),
1 => Location::GPR(GPR::X19),
2 => Location::GPR(GPR::X20),
3 => Location::GPR(GPR::X21),
4 => Location::GPR(GPR::X22),
5 => Location::GPR(GPR::X23),
6 => Location::GPR(GPR::X24),
7 => Location::GPR(GPR::X25),
0 => Location::GPR(GPR::X19),
1 => Location::GPR(GPR::X20),
2 => Location::GPR(GPR::X21),
3 => Location::GPR(GPR::X22),
4 => Location::GPR(GPR::X23),
5 => Location::GPR(GPR::X24),
6 => Location::GPR(GPR::X25),
7 => Location::GPR(GPR::X26),
_ => Location::Memory(GPR::X29, -(((idx - 3) * 8 + callee_saved_regs_size) as i32)),
}
}
@ -1649,7 +1644,7 @@ impl Machine for MachineARM64 {
fn emit_function_prolog(&mut self) {
self.emit_double_push(Size::S64, Location::GPR(GPR::X29), Location::GPR(GPR::X30)); // save LR too
self.emit_double_push(Size::S64, Location::GPR(GPR::X26), Location::GPR(GPR::X8));
self.emit_double_push(Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X28));
// cannot use mov, because XSP is XZR there. Need to use ADD with #0
self.assembler.emit_add(
Size::S64,
@ -1668,7 +1663,7 @@ impl Machine for MachineARM64 {
Location::GPR(GPR::XzrSp),
);
self.pushed = false; // SP is restored, concider it aligned
self.emit_double_pop(Size::S64, Location::GPR(GPR::X26), Location::GPR(GPR::X8));
self.emit_double_pop(Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X28));
self.emit_double_pop(Size::S64, Location::GPR(GPR::X29), Location::GPR(GPR::X30));
}
@ -1709,7 +1704,7 @@ impl Machine for MachineARM64 {
self.assembler.emit_label(label);
}
fn get_grp_for_call(&self) -> GPR {
GPR::X26
GPR::X27
}
fn emit_call_register(&mut self, reg: GPR) {
self.assembler.emit_call_register(reg);
@ -1733,6 +1728,10 @@ impl Machine for MachineARM64 {
.arch_emit_indirect_call_with_trampoline(location);
}
fn emit_debug_breakpoint(&mut self) {
self.assembler.emit_brk();
}
fn emit_call_location(&mut self, location: Location) {
let mut temps = vec![];
let loc = self.location_to_reg(
@ -1741,7 +1740,7 @@ impl Machine for MachineARM64 {
&mut temps,
ImmType::None,
true,
Some(GPR::X26),
Some(GPR::X27),
);
match loc {
Location::GPR(reg) => self.assembler.emit_call_register(reg),
@ -2870,7 +2869,7 @@ impl Machine for MachineARM64 {
offset: reloc_at as u32,
addend: 0,
});
self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 0);
self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 0);
let reloc_at = self.assembler.get_offset().0;
relocations.push(Relocation {
kind: RelocationKind::Arm64Movw1,
@ -2878,7 +2877,7 @@ impl Machine for MachineARM64 {
offset: reloc_at as u32,
addend: 0,
});
self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 16);
self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 16);
let reloc_at = self.assembler.get_offset().0;
relocations.push(Relocation {
kind: RelocationKind::Arm64Movw2,
@ -2886,7 +2885,7 @@ impl Machine for MachineARM64 {
offset: reloc_at as u32,
addend: 0,
});
self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 32);
self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 32);
let reloc_at = self.assembler.get_offset().0;
relocations.push(Relocation {
kind: RelocationKind::Arm64Movw3,
@ -2894,7 +2893,7 @@ impl Machine for MachineARM64 {
offset: reloc_at as u32,
addend: 0,
});
self.assembler.emit_movk(Location::GPR(GPR::X26), 0, 48);
self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 48);
}
fn emit_binop_add64(&mut self, loc_a: Location, loc_b: Location, ret: Location) {

View File

@ -1798,11 +1798,6 @@ impl Machine for MachineX86_64 {
Location::GPR(GPR::RSP),
);
}
fn push_callee_saved(&mut self) {}
fn pop_callee_saved(&mut self) {
self.assembler.emit_pop(Size::S64, Location::GPR(GPR::R14));
self.assembler.emit_pop(Size::S64, Location::GPR(GPR::R15));
}
fn pop_stack_locals(&mut self, delta_stack_offset: u32) {
self.assembler.emit_add(
Size::S64,
@ -2148,6 +2143,10 @@ impl Machine for MachineX86_64 {
.arch_emit_indirect_call_with_trampoline(location);
}
fn emit_debug_breakpoint(&mut self) {
self.assembler.emit_bkpt();
}
fn emit_call_location(&mut self, location: Location) {
self.assembler.emit_call_location(location);
}