singlepass: Align loop headers to 16 bytes.

This commit is contained in:
losfair
2020-06-02 01:58:05 +08:00
parent fcfa378ace
commit fe2b979f0e
2 changed files with 49 additions and 0 deletions

View File

@@ -5586,6 +5586,15 @@ impl<'a> FuncGen<'a> {
self.control_stack.push(frame); self.control_stack.push(frame);
} }
Operator::Loop { ty } => { Operator::Loop { ty } => {
// Pad with NOPs to the next 16-byte boundary.
match self.assembler.get_offset().0 % 16 {
0 => {}
x => {
self.assembler.emit_nop_n(16 - x);
}
}
assert_eq!(self.assembler.get_offset().0 % 16, 0);
let label = self.assembler.get_label(); let label = self.assembler.get_label();
let state_diff_id = self.get_state_diff(); let state_diff_id = self.get_state_diff();
let _activate_offset = self.assembler.get_offset().0; let _activate_offset = self.assembler.get_offset().0;

View File

@@ -72,10 +72,12 @@ pub trait Emitter {
fn finalize_function(&mut self) {} fn finalize_function(&mut self) {}
fn emit_u64(&mut self, x: u64); fn emit_u64(&mut self, x: u64);
fn emit_bytes(&mut self, bytes: &[u8]);
fn emit_label(&mut self, label: Self::Label); fn emit_label(&mut self, label: Self::Label);
fn emit_nop(&mut self); fn emit_nop(&mut self);
fn emit_nop_n(&mut self, n: usize);
fn emit_mov(&mut self, sz: Size, src: Location, dst: Location); fn emit_mov(&mut self, sz: Size, src: Location, dst: Location);
fn emit_lea(&mut self, sz: Size, src: Location, dst: Location); fn emit_lea(&mut self, sz: Size, src: Location, dst: Location);
@@ -648,6 +650,12 @@ impl Emitter for Assembler {
self.push_u64(x); self.push_u64(x);
} }
fn emit_bytes(&mut self, bytes: &[u8]) {
for &b in bytes {
self.push(b);
}
}
fn emit_label(&mut self, label: Self::Label) { fn emit_label(&mut self, label: Self::Label) {
dynasm!(self ; => label); dynasm!(self ; => label);
} }
@@ -656,6 +664,38 @@ impl Emitter for Assembler {
dynasm!(self ; nop); dynasm!(self ; nop);
} }
fn emit_nop_n(&mut self, mut n: usize) {
/*
1 90H NOP
2 66 90H 66 NOP
3 0F 1F 00H NOP DWORD ptr [EAX]
4 0F 1F 40 00H NOP DWORD ptr [EAX + 00H]
5 0F 1F 44 00 00H NOP DWORD ptr [EAX + EAX*1 + 00H]
6 66 0F 1F 44 00 00H NOP DWORD ptr [AX + AX*1 + 00H]
7 0F 1F 80 00 00 00 00H NOP DWORD ptr [EAX + 00000000H]
8 0F 1F 84 00 00 00 00 00H NOP DWORD ptr [AX + AX*1 + 00000000H]
9 66 0F 1F 84 00 00 00 00 00H NOP DWORD ptr [AX + AX*1 + 00000000H]
*/
while n >= 9 {
n -= 9;
self.emit_bytes(&[0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00]);
// 9-byte nop
}
let seq: &[u8] = match n {
0 => &[],
1 => &[0x90],
2 => &[0x66, 0x90],
3 => &[0x0f, 0x1f, 0x00],
4 => &[0x0f, 0x1f, 0x40, 0x00],
5 => &[0x0f, 0x1f, 0x44, 0x00, 0x00],
6 => &[0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00],
7 => &[0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00],
8 => &[0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00],
_ => unreachable!(),
};
self.emit_bytes(seq);
}
fn emit_mov(&mut self, sz: Size, src: Location, dst: Location) { fn emit_mov(&mut self, sz: Size, src: Location, dst: Location) {
// fast path // fast path
match (src, dst) { match (src, dst) {