improvement(compiler) abstraction of F64/F32 add/sub/mul/div operators

This commit is contained in:
ptitSeb
2021-12-01 12:00:18 +01:00
parent c247bf3533
commit e430851b05
3 changed files with 166 additions and 600 deletions

View File

@@ -418,142 +418,6 @@ impl<'a> FuncGen<'a> {
}
}
/// Moves `src1` and `src2` to valid locations and possibly adds a layer of indirection for `dst` for AVX instructions.
fn emit_relaxed_avx(
&mut self,
op: fn(&mut Assembler, XMM, XMMOrMemory, XMM),
src1: Location,
src2: Location,
dst: Location,
) -> Result<(), CodegenError> {
self.emit_relaxed_avx_base(
|this, src1, src2, dst| op(&mut this.machine.specific.assembler, src1, src2, dst),
src1,
src2,
dst,
)?;
Ok(())
}
/// Moves `src1` and `src2` to valid locations and possibly adds a layer of indirection for `dst` for AVX instructions.
fn emit_relaxed_avx_base<F: FnOnce(&mut Self, XMM, XMMOrMemory, XMM)>(
&mut self,
op: F,
src1: Location,
src2: Location,
dst: Location,
) -> Result<(), CodegenError> {
let tmp1 = self.machine.acquire_temp_simd().unwrap();
let tmp2 = self.machine.acquire_temp_simd().unwrap();
let tmp3 = self.machine.acquire_temp_simd().unwrap();
let tmpg = self.machine.acquire_temp_gpr().unwrap();
let src1 = match src1 {
Location::SIMD(x) => x,
Location::GPR(_) | Location::Memory(_, _) => {
self.machine
.specific
.assembler
.emit_mov(Size::S64, src1, Location::SIMD(tmp1));
tmp1
}
Location::Imm32(_) => {
self.machine
.specific
.assembler
.emit_mov(Size::S32, src1, Location::GPR(tmpg));
self.machine.specific.move_location(
Size::S32,
Location::GPR(tmpg),
Location::SIMD(tmp1),
);
tmp1
}
Location::Imm64(_) => {
self.machine
.specific
.assembler
.emit_mov(Size::S64, src1, Location::GPR(tmpg));
self.machine.specific.move_location(
Size::S64,
Location::GPR(tmpg),
Location::SIMD(tmp1),
);
tmp1
}
_ => {
return Err(CodegenError {
message: "emit_relaxed_avx_base src1: unreachable code".to_string(),
})
}
};
let src2 = match src2 {
Location::SIMD(x) => XMMOrMemory::XMM(x),
Location::Memory(base, disp) => XMMOrMemory::Memory(base, disp),
Location::GPR(_) => {
self.machine
.specific
.assembler
.emit_mov(Size::S64, src2, Location::SIMD(tmp2));
XMMOrMemory::XMM(tmp2)
}
Location::Imm32(_) => {
self.machine
.specific
.assembler
.emit_mov(Size::S32, src2, Location::GPR(tmpg));
self.machine.specific.move_location(
Size::S32,
Location::GPR(tmpg),
Location::SIMD(tmp2),
);
XMMOrMemory::XMM(tmp2)
}
Location::Imm64(_) => {
self.machine
.specific
.assembler
.emit_mov(Size::S64, src2, Location::GPR(tmpg));
self.machine.specific.move_location(
Size::S64,
Location::GPR(tmpg),
Location::SIMD(tmp2),
);
XMMOrMemory::XMM(tmp2)
}
_ => {
return Err(CodegenError {
message: "emit_relaxed_avx_base src2: unreachable code".to_string(),
})
}
};
match dst {
Location::SIMD(x) => {
op(self, src1, src2, x);
}
Location::Memory(_, _) | Location::GPR(_) => {
op(self, src1, src2, tmp3);
self.machine
.specific
.assembler
.emit_mov(Size::S64, Location::SIMD(tmp3), dst);
}
_ => {
return Err(CodegenError {
message: "emit_relaxed_avx_base dst: unreachable code".to_string(),
})
}
}
self.machine.release_temp_gpr(tmpg);
self.machine.release_temp_simd(tmp3);
self.machine.release_temp_simd(tmp2);
self.machine.release_temp_simd(tmp1);
Ok(())
}
/// I32 binary operation with both operands popped from the virtual stack.
fn emit_binop_i32(&mut self, f: fn(&mut Assembler, Size, Location, Location)) {
// Using Red Zone here.
@@ -2369,7 +2233,7 @@ impl<'a> FuncGen<'a> {
.push(FloatValue::cncl_f32(self.value_stack.len() - 2));
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
self.emit_relaxed_avx(Assembler::emit_vaddss, loc_a, loc_b, ret)?;
self.machine.specific.f32_add(loc_a, loc_b, ret);
}
Operator::F32Sub => {
self.fp_stack.pop2()?;
@@ -2377,7 +2241,7 @@ impl<'a> FuncGen<'a> {
.push(FloatValue::cncl_f32(self.value_stack.len() - 2));
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
self.emit_relaxed_avx(Assembler::emit_vsubss, loc_a, loc_b, ret)?;
self.machine.specific.f32_sub(loc_a, loc_b, ret);
}
Operator::F32Mul => {
self.fp_stack.pop2()?;
@@ -2385,7 +2249,7 @@ impl<'a> FuncGen<'a> {
.push(FloatValue::cncl_f32(self.value_stack.len() - 2));
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
self.emit_relaxed_avx(Assembler::emit_vmulss, loc_a, loc_b, ret)?;
self.machine.specific.f32_mul(loc_a, loc_b, ret);
}
Operator::F32Div => {
self.fp_stack.pop2()?;
@@ -2393,7 +2257,7 @@ impl<'a> FuncGen<'a> {
.push(FloatValue::cncl_f32(self.value_stack.len() - 2));
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
self.emit_relaxed_avx(Assembler::emit_vdivss, loc_a, loc_b, ret)?;
self.machine.specific.f32_div(loc_a, loc_b, ret);
}
Operator::F32Max => {
self.fp_stack.pop2()?;
@@ -2585,7 +2449,7 @@ impl<'a> FuncGen<'a> {
.push(FloatValue::cncl_f64(self.value_stack.len() - 2));
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
self.emit_relaxed_avx(Assembler::emit_vaddsd, loc_a, loc_b, ret)?;
self.machine.specific.f64_add(loc_a, loc_b, ret);
}
Operator::F64Sub => {
self.fp_stack.pop2()?;
@@ -2593,7 +2457,7 @@ impl<'a> FuncGen<'a> {
.push(FloatValue::cncl_f64(self.value_stack.len() - 2));
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
self.emit_relaxed_avx(Assembler::emit_vsubsd, loc_a, loc_b, ret)?;
self.machine.specific.f64_sub(loc_a, loc_b, ret);
}
Operator::F64Mul => {
self.fp_stack.pop2()?;
@@ -2601,7 +2465,7 @@ impl<'a> FuncGen<'a> {
.push(FloatValue::cncl_f64(self.value_stack.len() - 2));
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
self.emit_relaxed_avx(Assembler::emit_vmulsd, loc_a, loc_b, ret)?;
self.machine.specific.f64_mul(loc_a, loc_b, ret);
}
Operator::F64Div => {
self.fp_stack.pop2()?;
@@ -2609,7 +2473,7 @@ impl<'a> FuncGen<'a> {
.push(FloatValue::cncl_f64(self.value_stack.len() - 2));
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
self.emit_relaxed_avx(Assembler::emit_vdivsd, loc_a, loc_b, ret)?;
self.machine.specific.f64_div(loc_a, loc_b, ret);
}
Operator::F64Max => {
self.fp_stack.pop2()?;

View File

@@ -448,7 +448,14 @@ pub trait MachineSpecific<R: Reg, S: Reg> {
fn f64_min(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
/// get Max for 2 F64 values
fn f64_max(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
/// Add 2 F64 values
fn f64_add(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
/// Sub 2 F64 values
fn f64_sub(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
/// Multiply 2 F64 values
fn f64_mul(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
/// Divide 2 F64 values
fn f64_div(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
/// Negate an F32
fn f32_neg(&mut self, loc: Location<R, S>, ret: Location<R, S>);
/// Get the Absolute Value of an F32
@@ -481,6 +488,14 @@ pub trait MachineSpecific<R: Reg, S: Reg> {
fn f32_min(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
/// get Max for 2 F32 values
fn f32_max(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
/// Add 2 F32 values
fn f32_add(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
/// Sub 2 F32 values
fn f32_sub(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
/// Multiply 2 F32 values
fn f32_mul(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
/// Divide 2 F32 values
fn f32_div(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
}
pub struct Machine<R: Reg, S: Reg, M: MachineSpecific<R, S>, C: CombinedRegister> {
@@ -559,15 +574,6 @@ impl<R: Reg, S: Reg, M: MachineSpecific<R, S>, C: CombinedRegister> Machine<R, S
self.specific.release_cmpxchg_temp_gpr();
}
/// Acquires a temporary XMM register.
pub fn acquire_temp_simd(&mut self) -> Option<S> {
self.specific.acquire_temp_simd()
}
/// Releases a temporary XMM register.
pub fn release_temp_simd(&mut self, simd: S) {
self.specific.release_simd(simd);
}
/// Releases a XMM register.
pub fn release_simd(&mut self, simd: S) {
self.specific.release_simd(simd);

View File

@@ -2448,37 +2448,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
let src1 = match loc_a {
Location::SIMD(x) => x,
Location::GPR(_) | Location::Memory(_, _) => {
self.move_location(
Size::S64,
loc_a,
Location::SIMD(tmp1),
);
self.move_location(Size::S64, loc_a, Location::SIMD(tmp1));
tmp1
}
Location::Imm32(_) => {
self.move_location(
Size::S32,
loc_a,
Location::GPR(tmpg1),
);
self.move_location(
Size::S32,
Location::GPR(tmpg1),
Location::SIMD(tmp1),
);
self.move_location(Size::S32, loc_a, Location::GPR(tmpg1));
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1));
tmp1
}
Location::Imm64(_) => {
self.move_location(
Size::S64,
loc_a,
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::GPR(tmpg1),
Location::SIMD(tmp1),
);
self.move_location(Size::S64, loc_a, Location::GPR(tmpg1));
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1));
tmp1
}
_ => {
@@ -2488,37 +2468,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
let src2 = match loc_b {
Location::SIMD(x) => x,
Location::GPR(_) | Location::Memory(_, _) => {
self.move_location(
Size::S64,
loc_b,
Location::SIMD(tmp2),
);
self.move_location(Size::S64, loc_b, Location::SIMD(tmp2));
tmp2
}
Location::Imm32(_) => {
self.move_location(
Size::S32,
loc_b,
Location::GPR(tmpg1),
);
self.move_location(
Size::S32,
Location::GPR(tmpg1),
Location::SIMD(tmp2),
);
self.move_location(Size::S32, loc_b, Location::GPR(tmpg1));
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2));
tmp2
}
Location::Imm64(_) => {
self.move_location(
Size::S64,
loc_b,
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::GPR(tmpg1),
Location::SIMD(tmp2),
);
self.move_location(Size::S64, loc_b, Location::GPR(tmpg1));
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2));
tmp2
}
_ => {
@@ -2530,34 +2490,18 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
let tmp_xmm2 = XMM::XMM9;
let tmp_xmm3 = XMM::XMM10;
self.move_location(
Size::S64,
Location::SIMD(src1),
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::SIMD(src2),
Location::GPR(tmpg2),
);
self.assembler.emit_cmp(
Size::S64,
Location::GPR(tmpg2),
Location::GPR(tmpg1),
);
self.assembler.emit_vminsd(
src1,
XMMOrMemory::XMM(src2),
tmp_xmm1,
);
self.move_location(Size::S64, Location::SIMD(src1), Location::GPR(tmpg1));
self.move_location(Size::S64, Location::SIMD(src2), Location::GPR(tmpg2));
self.assembler
.emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1));
self.assembler
.emit_vminsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1);
let label1 = self.assembler.get_label();
let label2 = self.assembler.get_label();
self.assembler
.emit_jmp(Condition::NotEqual, label1);
self.assembler.emit_jmp(Condition::NotEqual, label1);
self.assembler
.emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2));
self.assembler
.emit_jmp(Condition::None, label2);
self.assembler.emit_jmp(Condition::None, label2);
self.emit_label(label1);
// load float -0.0
self.move_location(
@@ -2565,56 +2509,30 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
Location::Imm64(0x8000_0000_0000_0000), // Negative zero
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::GPR(tmpg1),
Location::SIMD(tmp_xmm2),
);
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp_xmm2));
self.emit_label(label2);
self.assembler.emit_vcmpeqsd(
src1,
XMMOrMemory::XMM(src2),
tmp_xmm3,
);
self.assembler.emit_vblendvpd(
tmp_xmm3,
XMMOrMemory::XMM(tmp_xmm2),
tmp_xmm1,
tmp_xmm1,
);
self.assembler.emit_vcmpunordsd(
src1,
XMMOrMemory::XMM(src2),
src1,
);
self.assembler
.emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3);
self.assembler
.emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1);
self.assembler
.emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1);
// load float canonical nan
self.move_location(
Size::S64,
Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::GPR(tmpg1),
Location::SIMD(src2),
);
self.assembler.emit_vblendvpd(
src1,
XMMOrMemory::XMM(src2),
tmp_xmm1,
src1,
);
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2));
self.assembler
.emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1);
match ret {
Location::SIMD(x) => {
self.assembler
.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x));
}
Location::Memory(_, _) | Location::GPR(_) => {
self.move_location(
Size::S64,
Location::SIMD(src1),
ret,
);
self.move_location(Size::S64, Location::SIMD(src1), ret);
}
_ => {
unreachable!();
@@ -2639,37 +2557,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
let src1 = match loc_a {
Location::SIMD(x) => x,
Location::GPR(_) | Location::Memory(_, _) => {
self.move_location(
Size::S64,
loc_a,
Location::SIMD(tmp1),
);
self.move_location(Size::S64, loc_a, Location::SIMD(tmp1));
tmp1
}
Location::Imm32(_) => {
self.move_location(
Size::S32,
loc_a,
Location::GPR(tmpg1),
);
self.move_location(
Size::S32,
Location::GPR(tmpg1),
Location::SIMD(tmp1),
);
self.move_location(Size::S32, loc_a, Location::GPR(tmpg1));
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1));
tmp1
}
Location::Imm64(_) => {
self.move_location(
Size::S64,
loc_a,
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::GPR(tmpg1),
Location::SIMD(tmp1),
);
self.move_location(Size::S64, loc_a, Location::GPR(tmpg1));
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1));
tmp1
}
_ => {
@@ -2679,37 +2577,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
let src2 = match loc_b {
Location::SIMD(x) => x,
Location::GPR(_) | Location::Memory(_, _) => {
self.move_location(
Size::S64,
loc_b,
Location::SIMD(tmp2),
);
self.move_location(Size::S64, loc_b, Location::SIMD(tmp2));
tmp2
}
Location::Imm32(_) => {
self.move_location(
Size::S32,
loc_b,
Location::GPR(tmpg1),
);
self.move_location(
Size::S32,
Location::GPR(tmpg1),
Location::SIMD(tmp2),
);
self.move_location(Size::S32, loc_b, Location::GPR(tmpg1));
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2));
tmp2
}
Location::Imm64(_) => {
self.move_location(
Size::S64,
loc_b,
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::GPR(tmpg1),
Location::SIMD(tmp2),
);
self.move_location(Size::S64, loc_b, Location::GPR(tmpg1));
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2));
tmp2
}
_ => {
@@ -2721,85 +2599,44 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
let tmp_xmm2 = XMM::XMM9;
let tmp_xmm3 = XMM::XMM10;
self.move_location(
Size::S64,
Location::SIMD(src1),
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::SIMD(src2),
Location::GPR(tmpg2),
);
self.assembler.emit_cmp(
Size::S64,
Location::GPR(tmpg2),
Location::GPR(tmpg1),
);
self.assembler.emit_vmaxsd(
src1,
XMMOrMemory::XMM(src2),
tmp_xmm1,
);
self.move_location(Size::S64, Location::SIMD(src1), Location::GPR(tmpg1));
self.move_location(Size::S64, Location::SIMD(src2), Location::GPR(tmpg2));
self.assembler
.emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1));
self.assembler
.emit_vmaxsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1);
let label1 = self.assembler.get_label();
let label2 = self.assembler.get_label();
self.assembler
.emit_jmp(Condition::NotEqual, label1);
self.assembler.emit_jmp(Condition::NotEqual, label1);
self.assembler
.emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2));
self.assembler
.emit_jmp(Condition::None, label2);
self.assembler.emit_jmp(Condition::None, label2);
self.emit_label(label1);
self.assembler.emit_vxorpd(
tmp_xmm2,
XMMOrMemory::XMM(tmp_xmm2),
tmp_xmm2,
);
self.assembler
.emit_vxorpd(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2);
self.emit_label(label2);
self.assembler.emit_vcmpeqsd(
src1,
XMMOrMemory::XMM(src2),
tmp_xmm3,
);
self.assembler.emit_vblendvpd(
tmp_xmm3,
XMMOrMemory::XMM(tmp_xmm2),
tmp_xmm1,
tmp_xmm1,
);
self.assembler.emit_vcmpunordsd(
src1,
XMMOrMemory::XMM(src2),
src1,
);
self.assembler
.emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3);
self.assembler
.emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1);
self.assembler
.emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1);
// load float canonical nan
self.move_location(
Size::S64,
Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::GPR(tmpg1),
Location::SIMD(src2),
);
self.assembler.emit_vblendvpd(
src1,
XMMOrMemory::XMM(src2),
tmp_xmm1,
src1,
);
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2));
self.assembler
.emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1);
match ret {
Location::SIMD(x) => {
self.assembler
.emit_vmovapd(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x));
}
Location::Memory(_, _) | Location::GPR(_) => {
self.move_location(
Size::S64,
Location::SIMD(src1),
ret,
);
self.move_location(Size::S64, Location::SIMD(src1), ret);
}
_ => {
unreachable!();
@@ -2812,7 +2649,18 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
self.release_simd(tmp1);
}
}
fn f64_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
self.emit_relaxed_avx(Assembler::emit_vaddsd, loc_a, loc_b, ret);
}
fn f64_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
self.emit_relaxed_avx(Assembler::emit_vsubsd, loc_a, loc_b, ret);
}
fn f64_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
self.emit_relaxed_avx(Assembler::emit_vmulsd, loc_a, loc_b, ret);
}
fn f64_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
self.emit_relaxed_avx(Assembler::emit_vdivsd, loc_a, loc_b, ret);
}
fn f32_neg(&mut self, loc: Location, ret: Location) {
if self.assembler.arch_has_fneg() {
let tmp = self.acquire_temp_simd().unwrap();
@@ -2904,37 +2752,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
let src1 = match loc_a {
Location::SIMD(x) => x,
Location::GPR(_) | Location::Memory(_, _) => {
self.move_location(
Size::S64,
loc_a,
Location::SIMD(tmp1),
);
self.move_location(Size::S64, loc_a, Location::SIMD(tmp1));
tmp1
}
Location::Imm32(_) => {
self.move_location(
Size::S32,
loc_a,
Location::GPR(tmpg1),
);
self.move_location(
Size::S32,
Location::GPR(tmpg1),
Location::SIMD(tmp1),
);
self.move_location(Size::S32, loc_a, Location::GPR(tmpg1));
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1));
tmp1
}
Location::Imm64(_) => {
self.move_location(
Size::S64,
loc_a,
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::GPR(tmpg1),
Location::SIMD(tmp1),
);
self.move_location(Size::S64, loc_a, Location::GPR(tmpg1));
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1));
tmp1
}
_ => {
@@ -2944,37 +2772,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
let src2 = match loc_b {
Location::SIMD(x) => x,
Location::GPR(_) | Location::Memory(_, _) => {
self.move_location(
Size::S64,
loc_b,
Location::SIMD(tmp2),
);
self.move_location(Size::S64, loc_b, Location::SIMD(tmp2));
tmp2
}
Location::Imm32(_) => {
self.move_location(
Size::S32,
loc_b,
Location::GPR(tmpg1),
);
self.move_location(
Size::S32,
Location::GPR(tmpg1),
Location::SIMD(tmp2),
);
self.move_location(Size::S32, loc_b, Location::GPR(tmpg1));
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2));
tmp2
}
Location::Imm64(_) => {
self.move_location(
Size::S64,
loc_b,
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::GPR(tmpg1),
Location::SIMD(tmp2),
);
self.move_location(Size::S64, loc_b, Location::GPR(tmpg1));
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2));
tmp2
}
_ => {
@@ -2986,34 +2794,18 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
let tmp_xmm2 = XMM::XMM9;
let tmp_xmm3 = XMM::XMM10;
self.move_location(
Size::S32,
Location::SIMD(src1),
Location::GPR(tmpg1),
);
self.move_location(
Size::S32,
Location::SIMD(src2),
Location::GPR(tmpg2),
);
self.assembler.emit_cmp(
Size::S32,
Location::GPR(tmpg2),
Location::GPR(tmpg1),
);
self.assembler.emit_vminss(
src1,
XMMOrMemory::XMM(src2),
tmp_xmm1,
);
self.move_location(Size::S32, Location::SIMD(src1), Location::GPR(tmpg1));
self.move_location(Size::S32, Location::SIMD(src2), Location::GPR(tmpg2));
self.assembler
.emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1));
self.assembler
.emit_vminss(src1, XMMOrMemory::XMM(src2), tmp_xmm1);
let label1 = self.assembler.get_label();
let label2 = self.assembler.get_label();
self.assembler
.emit_jmp(Condition::NotEqual, label1);
self.assembler.emit_jmp(Condition::NotEqual, label1);
self.assembler
.emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2));
self.assembler
.emit_jmp(Condition::None, label2);
self.assembler.emit_jmp(Condition::None, label2);
self.emit_label(label1);
// load float -0.0
self.move_location(
@@ -3021,56 +2813,30 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
Location::Imm32(0x8000_0000), // Negative zero
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::GPR(tmpg1),
Location::SIMD(tmp_xmm2),
);
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp_xmm2));
self.emit_label(label2);
self.assembler.emit_vcmpeqss(
src1,
XMMOrMemory::XMM(src2),
tmp_xmm3,
);
self.assembler.emit_vblendvps(
tmp_xmm3,
XMMOrMemory::XMM(tmp_xmm2),
tmp_xmm1,
tmp_xmm1,
);
self.assembler.emit_vcmpunordss(
src1,
XMMOrMemory::XMM(src2),
src1,
);
self.assembler
.emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3);
self.assembler
.emit_vblendvps(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1);
self.assembler
.emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1);
// load float canonical nan
self.move_location(
Size::S64,
Location::Imm32(0x7FC0_0000), // Canonical NaN
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::GPR(tmpg1),
Location::SIMD(src2),
);
self.assembler.emit_vblendvps(
src1,
XMMOrMemory::XMM(src2),
tmp_xmm1,
src1,
);
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2));
self.assembler
.emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1);
match ret {
Location::SIMD(x) => {
self.assembler
.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x));
}
Location::Memory(_, _) | Location::GPR(_) => {
self.move_location(
Size::S64,
Location::SIMD(src1),
ret,
);
self.move_location(Size::S64, Location::SIMD(src1), ret);
}
_ => {
unreachable!();
@@ -3087,7 +2853,6 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
if !self.arch_supports_canonicalize_nan() {
self.emit_relaxed_avx(Assembler::emit_vmaxss, loc_a, loc_b, ret);
} else {
let tmp1 = self.acquire_temp_simd().unwrap();
let tmp2 = self.acquire_temp_simd().unwrap();
let tmpg1 = self.acquire_temp_gpr().unwrap();
@@ -3096,37 +2861,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
let src1 = match loc_a {
Location::SIMD(x) => x,
Location::GPR(_) | Location::Memory(_, _) => {
self.move_location(
Size::S64,
loc_a,
Location::SIMD(tmp1),
);
self.move_location(Size::S64, loc_a, Location::SIMD(tmp1));
tmp1
}
Location::Imm32(_) => {
self.move_location(
Size::S32,
loc_a,
Location::GPR(tmpg1),
);
self.move_location(
Size::S32,
Location::GPR(tmpg1),
Location::SIMD(tmp1),
);
self.move_location(Size::S32, loc_a, Location::GPR(tmpg1));
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1));
tmp1
}
Location::Imm64(_) => {
self.move_location(
Size::S64,
loc_a,
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::GPR(tmpg1),
Location::SIMD(tmp1),
);
self.move_location(Size::S64, loc_a, Location::GPR(tmpg1));
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1));
tmp1
}
_ => {
@@ -3136,37 +2881,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
let src2 = match loc_b {
Location::SIMD(x) => x,
Location::GPR(_) | Location::Memory(_, _) => {
self.move_location(
Size::S64,
loc_b,
Location::SIMD(tmp2),
);
self.move_location(Size::S64, loc_b, Location::SIMD(tmp2));
tmp2
}
Location::Imm32(_) => {
self.move_location(
Size::S32,
loc_b,
Location::GPR(tmpg1),
);
self.move_location(
Size::S32,
Location::GPR(tmpg1),
Location::SIMD(tmp2),
);
self.move_location(Size::S32, loc_b, Location::GPR(tmpg1));
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2));
tmp2
}
Location::Imm64(_) => {
self.move_location(
Size::S64,
loc_b,
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::GPR(tmpg1),
Location::SIMD(tmp2),
);
self.move_location(Size::S64, loc_b, Location::GPR(tmpg1));
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2));
tmp2
}
_ => {
@@ -3178,85 +2903,44 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
let tmp_xmm2 = XMM::XMM9;
let tmp_xmm3 = XMM::XMM10;
self.move_location(
Size::S32,
Location::SIMD(src1),
Location::GPR(tmpg1),
);
self.move_location(
Size::S32,
Location::SIMD(src2),
Location::GPR(tmpg2),
);
self.assembler.emit_cmp(
Size::S32,
Location::GPR(tmpg2),
Location::GPR(tmpg1),
);
self.assembler.emit_vmaxss(
src1,
XMMOrMemory::XMM(src2),
tmp_xmm1,
);
self.move_location(Size::S32, Location::SIMD(src1), Location::GPR(tmpg1));
self.move_location(Size::S32, Location::SIMD(src2), Location::GPR(tmpg2));
self.assembler
.emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1));
self.assembler
.emit_vmaxss(src1, XMMOrMemory::XMM(src2), tmp_xmm1);
let label1 = self.assembler.get_label();
let label2 = self.assembler.get_label();
self.assembler
.emit_jmp(Condition::NotEqual, label1);
self.assembler.emit_jmp(Condition::NotEqual, label1);
self.assembler
.emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2));
self.assembler
.emit_jmp(Condition::None, label2);
self.assembler.emit_jmp(Condition::None, label2);
self.emit_label(label1);
self.assembler.emit_vxorps(
tmp_xmm2,
XMMOrMemory::XMM(tmp_xmm2),
tmp_xmm2,
);
self.assembler
.emit_vxorps(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2);
self.emit_label(label2);
self.assembler.emit_vcmpeqss(
src1,
XMMOrMemory::XMM(src2),
tmp_xmm3,
);
self.assembler.emit_vblendvps(
tmp_xmm3,
XMMOrMemory::XMM(tmp_xmm2),
tmp_xmm1,
tmp_xmm1,
);
self.assembler.emit_vcmpunordss(
src1,
XMMOrMemory::XMM(src2),
src1,
);
self.assembler
.emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3);
self.assembler
.emit_vblendvps(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1);
self.assembler
.emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1);
// load float canonical nan
self.move_location(
Size::S64,
Location::Imm32(0x7FC0_0000), // Canonical NaN
Location::GPR(tmpg1),
);
self.move_location(
Size::S64,
Location::GPR(tmpg1),
Location::SIMD(src2),
);
self.assembler.emit_vblendvps(
src1,
XMMOrMemory::XMM(src2),
tmp_xmm1,
src1,
);
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2));
self.assembler
.emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1);
match ret {
Location::SIMD(x) => {
self.assembler
.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x));
}
Location::Memory(_, _) | Location::GPR(_) => {
self.move_location(
Size::S64,
Location::SIMD(src1),
ret,
);
self.move_location(Size::S64, Location::SIMD(src1), ret);
}
_ => {
unreachable!();
@@ -3269,6 +2953,18 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
self.release_simd(tmp1);
}
}
fn f32_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
self.emit_relaxed_avx(Assembler::emit_vaddss, loc_a, loc_b, ret);
}
fn f32_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
self.emit_relaxed_avx(Assembler::emit_vsubss, loc_a, loc_b, ret);
}
fn f32_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
self.emit_relaxed_avx(Assembler::emit_vmulss, loc_a, loc_b, ret);
}
fn f32_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
self.emit_relaxed_avx(Assembler::emit_vdivss, loc_a, loc_b, ret);
}
}
pub type Machine = AbstractMachine<GPR, XMM, MachineX86_64, X64Register>;