mirror of
https://github.com/mii443/wasmer.git
synced 2025-12-16 17:18:57 +00:00
improvement(compiler) abstraction of F64/F32 add/sub/mul/div operators
This commit is contained in:
@@ -418,142 +418,6 @@ impl<'a> FuncGen<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Moves `src1` and `src2` to valid locations and possibly adds a layer of indirection for `dst` for AVX instructions.
|
||||
fn emit_relaxed_avx(
|
||||
&mut self,
|
||||
op: fn(&mut Assembler, XMM, XMMOrMemory, XMM),
|
||||
src1: Location,
|
||||
src2: Location,
|
||||
dst: Location,
|
||||
) -> Result<(), CodegenError> {
|
||||
self.emit_relaxed_avx_base(
|
||||
|this, src1, src2, dst| op(&mut this.machine.specific.assembler, src1, src2, dst),
|
||||
src1,
|
||||
src2,
|
||||
dst,
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Moves `src1` and `src2` to valid locations and possibly adds a layer of indirection for `dst` for AVX instructions.
|
||||
fn emit_relaxed_avx_base<F: FnOnce(&mut Self, XMM, XMMOrMemory, XMM)>(
|
||||
&mut self,
|
||||
op: F,
|
||||
src1: Location,
|
||||
src2: Location,
|
||||
dst: Location,
|
||||
) -> Result<(), CodegenError> {
|
||||
let tmp1 = self.machine.acquire_temp_simd().unwrap();
|
||||
let tmp2 = self.machine.acquire_temp_simd().unwrap();
|
||||
let tmp3 = self.machine.acquire_temp_simd().unwrap();
|
||||
let tmpg = self.machine.acquire_temp_gpr().unwrap();
|
||||
|
||||
let src1 = match src1 {
|
||||
Location::SIMD(x) => x,
|
||||
Location::GPR(_) | Location::Memory(_, _) => {
|
||||
self.machine
|
||||
.specific
|
||||
.assembler
|
||||
.emit_mov(Size::S64, src1, Location::SIMD(tmp1));
|
||||
tmp1
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
self.machine
|
||||
.specific
|
||||
.assembler
|
||||
.emit_mov(Size::S32, src1, Location::GPR(tmpg));
|
||||
self.machine.specific.move_location(
|
||||
Size::S32,
|
||||
Location::GPR(tmpg),
|
||||
Location::SIMD(tmp1),
|
||||
);
|
||||
tmp1
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
self.machine
|
||||
.specific
|
||||
.assembler
|
||||
.emit_mov(Size::S64, src1, Location::GPR(tmpg));
|
||||
self.machine.specific.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg),
|
||||
Location::SIMD(tmp1),
|
||||
);
|
||||
tmp1
|
||||
}
|
||||
_ => {
|
||||
return Err(CodegenError {
|
||||
message: "emit_relaxed_avx_base src1: unreachable code".to_string(),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
let src2 = match src2 {
|
||||
Location::SIMD(x) => XMMOrMemory::XMM(x),
|
||||
Location::Memory(base, disp) => XMMOrMemory::Memory(base, disp),
|
||||
Location::GPR(_) => {
|
||||
self.machine
|
||||
.specific
|
||||
.assembler
|
||||
.emit_mov(Size::S64, src2, Location::SIMD(tmp2));
|
||||
XMMOrMemory::XMM(tmp2)
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
self.machine
|
||||
.specific
|
||||
.assembler
|
||||
.emit_mov(Size::S32, src2, Location::GPR(tmpg));
|
||||
self.machine.specific.move_location(
|
||||
Size::S32,
|
||||
Location::GPR(tmpg),
|
||||
Location::SIMD(tmp2),
|
||||
);
|
||||
XMMOrMemory::XMM(tmp2)
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
self.machine
|
||||
.specific
|
||||
.assembler
|
||||
.emit_mov(Size::S64, src2, Location::GPR(tmpg));
|
||||
self.machine.specific.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg),
|
||||
Location::SIMD(tmp2),
|
||||
);
|
||||
XMMOrMemory::XMM(tmp2)
|
||||
}
|
||||
_ => {
|
||||
return Err(CodegenError {
|
||||
message: "emit_relaxed_avx_base src2: unreachable code".to_string(),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
match dst {
|
||||
Location::SIMD(x) => {
|
||||
op(self, src1, src2, x);
|
||||
}
|
||||
Location::Memory(_, _) | Location::GPR(_) => {
|
||||
op(self, src1, src2, tmp3);
|
||||
self.machine
|
||||
.specific
|
||||
.assembler
|
||||
.emit_mov(Size::S64, Location::SIMD(tmp3), dst);
|
||||
}
|
||||
_ => {
|
||||
return Err(CodegenError {
|
||||
message: "emit_relaxed_avx_base dst: unreachable code".to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
self.machine.release_temp_gpr(tmpg);
|
||||
self.machine.release_temp_simd(tmp3);
|
||||
self.machine.release_temp_simd(tmp2);
|
||||
self.machine.release_temp_simd(tmp1);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// I32 binary operation with both operands popped from the virtual stack.
|
||||
fn emit_binop_i32(&mut self, f: fn(&mut Assembler, Size, Location, Location)) {
|
||||
// Using Red Zone here.
|
||||
@@ -2369,7 +2233,7 @@ impl<'a> FuncGen<'a> {
|
||||
.push(FloatValue::cncl_f32(self.value_stack.len() - 2));
|
||||
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
|
||||
|
||||
self.emit_relaxed_avx(Assembler::emit_vaddss, loc_a, loc_b, ret)?;
|
||||
self.machine.specific.f32_add(loc_a, loc_b, ret);
|
||||
}
|
||||
Operator::F32Sub => {
|
||||
self.fp_stack.pop2()?;
|
||||
@@ -2377,7 +2241,7 @@ impl<'a> FuncGen<'a> {
|
||||
.push(FloatValue::cncl_f32(self.value_stack.len() - 2));
|
||||
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
|
||||
|
||||
self.emit_relaxed_avx(Assembler::emit_vsubss, loc_a, loc_b, ret)?;
|
||||
self.machine.specific.f32_sub(loc_a, loc_b, ret);
|
||||
}
|
||||
Operator::F32Mul => {
|
||||
self.fp_stack.pop2()?;
|
||||
@@ -2385,7 +2249,7 @@ impl<'a> FuncGen<'a> {
|
||||
.push(FloatValue::cncl_f32(self.value_stack.len() - 2));
|
||||
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
|
||||
|
||||
self.emit_relaxed_avx(Assembler::emit_vmulss, loc_a, loc_b, ret)?;
|
||||
self.machine.specific.f32_mul(loc_a, loc_b, ret);
|
||||
}
|
||||
Operator::F32Div => {
|
||||
self.fp_stack.pop2()?;
|
||||
@@ -2393,7 +2257,7 @@ impl<'a> FuncGen<'a> {
|
||||
.push(FloatValue::cncl_f32(self.value_stack.len() - 2));
|
||||
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
|
||||
|
||||
self.emit_relaxed_avx(Assembler::emit_vdivss, loc_a, loc_b, ret)?;
|
||||
self.machine.specific.f32_div(loc_a, loc_b, ret);
|
||||
}
|
||||
Operator::F32Max => {
|
||||
self.fp_stack.pop2()?;
|
||||
@@ -2585,7 +2449,7 @@ impl<'a> FuncGen<'a> {
|
||||
.push(FloatValue::cncl_f64(self.value_stack.len() - 2));
|
||||
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
|
||||
|
||||
self.emit_relaxed_avx(Assembler::emit_vaddsd, loc_a, loc_b, ret)?;
|
||||
self.machine.specific.f64_add(loc_a, loc_b, ret);
|
||||
}
|
||||
Operator::F64Sub => {
|
||||
self.fp_stack.pop2()?;
|
||||
@@ -2593,7 +2457,7 @@ impl<'a> FuncGen<'a> {
|
||||
.push(FloatValue::cncl_f64(self.value_stack.len() - 2));
|
||||
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
|
||||
|
||||
self.emit_relaxed_avx(Assembler::emit_vsubsd, loc_a, loc_b, ret)?;
|
||||
self.machine.specific.f64_sub(loc_a, loc_b, ret);
|
||||
}
|
||||
Operator::F64Mul => {
|
||||
self.fp_stack.pop2()?;
|
||||
@@ -2601,7 +2465,7 @@ impl<'a> FuncGen<'a> {
|
||||
.push(FloatValue::cncl_f64(self.value_stack.len() - 2));
|
||||
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
|
||||
|
||||
self.emit_relaxed_avx(Assembler::emit_vmulsd, loc_a, loc_b, ret)?;
|
||||
self.machine.specific.f64_mul(loc_a, loc_b, ret);
|
||||
}
|
||||
Operator::F64Div => {
|
||||
self.fp_stack.pop2()?;
|
||||
@@ -2609,7 +2473,7 @@ impl<'a> FuncGen<'a> {
|
||||
.push(FloatValue::cncl_f64(self.value_stack.len() - 2));
|
||||
let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::F64);
|
||||
|
||||
self.emit_relaxed_avx(Assembler::emit_vdivsd, loc_a, loc_b, ret)?;
|
||||
self.machine.specific.f64_div(loc_a, loc_b, ret);
|
||||
}
|
||||
Operator::F64Max => {
|
||||
self.fp_stack.pop2()?;
|
||||
|
||||
@@ -448,7 +448,14 @@ pub trait MachineSpecific<R: Reg, S: Reg> {
|
||||
fn f64_min(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
|
||||
/// get Max for 2 F64 values
|
||||
fn f64_max(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
|
||||
|
||||
/// Add 2 F64 values
|
||||
fn f64_add(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
|
||||
/// Sub 2 F64 values
|
||||
fn f64_sub(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
|
||||
/// Multiply 2 F64 values
|
||||
fn f64_mul(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
|
||||
/// Divide 2 F64 values
|
||||
fn f64_div(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
|
||||
/// Negate an F32
|
||||
fn f32_neg(&mut self, loc: Location<R, S>, ret: Location<R, S>);
|
||||
/// Get the Absolute Value of an F32
|
||||
@@ -481,6 +488,14 @@ pub trait MachineSpecific<R: Reg, S: Reg> {
|
||||
fn f32_min(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
|
||||
/// get Max for 2 F32 values
|
||||
fn f32_max(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
|
||||
/// Add 2 F32 values
|
||||
fn f32_add(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
|
||||
/// Sub 2 F32 values
|
||||
fn f32_sub(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
|
||||
/// Multiply 2 F32 values
|
||||
fn f32_mul(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
|
||||
/// Divide 2 F32 values
|
||||
fn f32_div(&mut self, loc_a: Location<R, S>, loc_b: Location<R, S>, ret: Location<R, S>);
|
||||
}
|
||||
|
||||
pub struct Machine<R: Reg, S: Reg, M: MachineSpecific<R, S>, C: CombinedRegister> {
|
||||
@@ -559,15 +574,6 @@ impl<R: Reg, S: Reg, M: MachineSpecific<R, S>, C: CombinedRegister> Machine<R, S
|
||||
self.specific.release_cmpxchg_temp_gpr();
|
||||
}
|
||||
|
||||
/// Acquires a temporary XMM register.
|
||||
pub fn acquire_temp_simd(&mut self) -> Option<S> {
|
||||
self.specific.acquire_temp_simd()
|
||||
}
|
||||
|
||||
/// Releases a temporary XMM register.
|
||||
pub fn release_temp_simd(&mut self, simd: S) {
|
||||
self.specific.release_simd(simd);
|
||||
}
|
||||
/// Releases a XMM register.
|
||||
pub fn release_simd(&mut self, simd: S) {
|
||||
self.specific.release_simd(simd);
|
||||
|
||||
@@ -2448,37 +2448,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
let src1 = match loc_a {
|
||||
Location::SIMD(x) => x,
|
||||
Location::GPR(_) | Location::Memory(_, _) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_a,
|
||||
Location::SIMD(tmp1),
|
||||
);
|
||||
self.move_location(Size::S64, loc_a, Location::SIMD(tmp1));
|
||||
tmp1
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
loc_a,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp1),
|
||||
);
|
||||
self.move_location(Size::S32, loc_a, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1));
|
||||
tmp1
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_a,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp1),
|
||||
);
|
||||
self.move_location(Size::S64, loc_a, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1));
|
||||
tmp1
|
||||
}
|
||||
_ => {
|
||||
@@ -2488,37 +2468,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
let src2 = match loc_b {
|
||||
Location::SIMD(x) => x,
|
||||
Location::GPR(_) | Location::Memory(_, _) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_b,
|
||||
Location::SIMD(tmp2),
|
||||
);
|
||||
self.move_location(Size::S64, loc_b, Location::SIMD(tmp2));
|
||||
tmp2
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
loc_b,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp2),
|
||||
);
|
||||
self.move_location(Size::S32, loc_b, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2));
|
||||
tmp2
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_b,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp2),
|
||||
);
|
||||
self.move_location(Size::S64, loc_b, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2));
|
||||
tmp2
|
||||
}
|
||||
_ => {
|
||||
@@ -2530,34 +2490,18 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
let tmp_xmm2 = XMM::XMM9;
|
||||
let tmp_xmm3 = XMM::XMM10;
|
||||
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::SIMD(src1),
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::SIMD(src2),
|
||||
Location::GPR(tmpg2),
|
||||
);
|
||||
self.assembler.emit_cmp(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg2),
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.assembler.emit_vminsd(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
tmp_xmm1,
|
||||
);
|
||||
self.move_location(Size::S64, Location::SIMD(src1), Location::GPR(tmpg1));
|
||||
self.move_location(Size::S64, Location::SIMD(src2), Location::GPR(tmpg2));
|
||||
self.assembler
|
||||
.emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1));
|
||||
self.assembler
|
||||
.emit_vminsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1);
|
||||
let label1 = self.assembler.get_label();
|
||||
let label2 = self.assembler.get_label();
|
||||
self.assembler
|
||||
.emit_jmp(Condition::NotEqual, label1);
|
||||
self.assembler.emit_jmp(Condition::NotEqual, label1);
|
||||
self.assembler
|
||||
.emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2));
|
||||
self.assembler
|
||||
.emit_jmp(Condition::None, label2);
|
||||
self.assembler.emit_jmp(Condition::None, label2);
|
||||
self.emit_label(label1);
|
||||
// load float -0.0
|
||||
self.move_location(
|
||||
@@ -2565,56 +2509,30 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
Location::Imm64(0x8000_0000_0000_0000), // Negative zero
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp_xmm2),
|
||||
);
|
||||
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp_xmm2));
|
||||
self.emit_label(label2);
|
||||
self.assembler.emit_vcmpeqsd(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
tmp_xmm3,
|
||||
);
|
||||
self.assembler.emit_vblendvpd(
|
||||
tmp_xmm3,
|
||||
XMMOrMemory::XMM(tmp_xmm2),
|
||||
tmp_xmm1,
|
||||
tmp_xmm1,
|
||||
);
|
||||
self.assembler.emit_vcmpunordsd(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
src1,
|
||||
);
|
||||
self.assembler
|
||||
.emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3);
|
||||
self.assembler
|
||||
.emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1);
|
||||
self.assembler
|
||||
.emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1);
|
||||
// load float canonical nan
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(src2),
|
||||
);
|
||||
self.assembler.emit_vblendvpd(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
tmp_xmm1,
|
||||
src1,
|
||||
);
|
||||
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2));
|
||||
self.assembler
|
||||
.emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1);
|
||||
match ret {
|
||||
Location::SIMD(x) => {
|
||||
self.assembler
|
||||
.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x));
|
||||
}
|
||||
Location::Memory(_, _) | Location::GPR(_) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::SIMD(src1),
|
||||
ret,
|
||||
);
|
||||
self.move_location(Size::S64, Location::SIMD(src1), ret);
|
||||
}
|
||||
_ => {
|
||||
unreachable!();
|
||||
@@ -2639,37 +2557,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
let src1 = match loc_a {
|
||||
Location::SIMD(x) => x,
|
||||
Location::GPR(_) | Location::Memory(_, _) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_a,
|
||||
Location::SIMD(tmp1),
|
||||
);
|
||||
self.move_location(Size::S64, loc_a, Location::SIMD(tmp1));
|
||||
tmp1
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
loc_a,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp1),
|
||||
);
|
||||
self.move_location(Size::S32, loc_a, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1));
|
||||
tmp1
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_a,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp1),
|
||||
);
|
||||
self.move_location(Size::S64, loc_a, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1));
|
||||
tmp1
|
||||
}
|
||||
_ => {
|
||||
@@ -2679,37 +2577,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
let src2 = match loc_b {
|
||||
Location::SIMD(x) => x,
|
||||
Location::GPR(_) | Location::Memory(_, _) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_b,
|
||||
Location::SIMD(tmp2),
|
||||
);
|
||||
self.move_location(Size::S64, loc_b, Location::SIMD(tmp2));
|
||||
tmp2
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
loc_b,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp2),
|
||||
);
|
||||
self.move_location(Size::S32, loc_b, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2));
|
||||
tmp2
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_b,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp2),
|
||||
);
|
||||
self.move_location(Size::S64, loc_b, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2));
|
||||
tmp2
|
||||
}
|
||||
_ => {
|
||||
@@ -2721,85 +2599,44 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
let tmp_xmm2 = XMM::XMM9;
|
||||
let tmp_xmm3 = XMM::XMM10;
|
||||
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::SIMD(src1),
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::SIMD(src2),
|
||||
Location::GPR(tmpg2),
|
||||
);
|
||||
self.assembler.emit_cmp(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg2),
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.assembler.emit_vmaxsd(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
tmp_xmm1,
|
||||
);
|
||||
self.move_location(Size::S64, Location::SIMD(src1), Location::GPR(tmpg1));
|
||||
self.move_location(Size::S64, Location::SIMD(src2), Location::GPR(tmpg2));
|
||||
self.assembler
|
||||
.emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1));
|
||||
self.assembler
|
||||
.emit_vmaxsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1);
|
||||
let label1 = self.assembler.get_label();
|
||||
let label2 = self.assembler.get_label();
|
||||
self.assembler
|
||||
.emit_jmp(Condition::NotEqual, label1);
|
||||
self.assembler.emit_jmp(Condition::NotEqual, label1);
|
||||
self.assembler
|
||||
.emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2));
|
||||
self.assembler
|
||||
.emit_jmp(Condition::None, label2);
|
||||
self.assembler.emit_jmp(Condition::None, label2);
|
||||
self.emit_label(label1);
|
||||
self.assembler.emit_vxorpd(
|
||||
tmp_xmm2,
|
||||
XMMOrMemory::XMM(tmp_xmm2),
|
||||
tmp_xmm2,
|
||||
);
|
||||
self.assembler
|
||||
.emit_vxorpd(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2);
|
||||
self.emit_label(label2);
|
||||
self.assembler.emit_vcmpeqsd(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
tmp_xmm3,
|
||||
);
|
||||
self.assembler.emit_vblendvpd(
|
||||
tmp_xmm3,
|
||||
XMMOrMemory::XMM(tmp_xmm2),
|
||||
tmp_xmm1,
|
||||
tmp_xmm1,
|
||||
);
|
||||
self.assembler.emit_vcmpunordsd(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
src1,
|
||||
);
|
||||
self.assembler
|
||||
.emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3);
|
||||
self.assembler
|
||||
.emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1);
|
||||
self.assembler
|
||||
.emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1);
|
||||
// load float canonical nan
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(src2),
|
||||
);
|
||||
self.assembler.emit_vblendvpd(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
tmp_xmm1,
|
||||
src1,
|
||||
);
|
||||
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2));
|
||||
self.assembler
|
||||
.emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1);
|
||||
match ret {
|
||||
Location::SIMD(x) => {
|
||||
self.assembler
|
||||
.emit_vmovapd(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x));
|
||||
}
|
||||
Location::Memory(_, _) | Location::GPR(_) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::SIMD(src1),
|
||||
ret,
|
||||
);
|
||||
self.move_location(Size::S64, Location::SIMD(src1), ret);
|
||||
}
|
||||
_ => {
|
||||
unreachable!();
|
||||
@@ -2812,7 +2649,18 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
self.release_simd(tmp1);
|
||||
}
|
||||
}
|
||||
|
||||
fn f64_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
|
||||
self.emit_relaxed_avx(Assembler::emit_vaddsd, loc_a, loc_b, ret);
|
||||
}
|
||||
fn f64_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
|
||||
self.emit_relaxed_avx(Assembler::emit_vsubsd, loc_a, loc_b, ret);
|
||||
}
|
||||
fn f64_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
|
||||
self.emit_relaxed_avx(Assembler::emit_vmulsd, loc_a, loc_b, ret);
|
||||
}
|
||||
fn f64_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
|
||||
self.emit_relaxed_avx(Assembler::emit_vdivsd, loc_a, loc_b, ret);
|
||||
}
|
||||
fn f32_neg(&mut self, loc: Location, ret: Location) {
|
||||
if self.assembler.arch_has_fneg() {
|
||||
let tmp = self.acquire_temp_simd().unwrap();
|
||||
@@ -2904,37 +2752,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
let src1 = match loc_a {
|
||||
Location::SIMD(x) => x,
|
||||
Location::GPR(_) | Location::Memory(_, _) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_a,
|
||||
Location::SIMD(tmp1),
|
||||
);
|
||||
self.move_location(Size::S64, loc_a, Location::SIMD(tmp1));
|
||||
tmp1
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
loc_a,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp1),
|
||||
);
|
||||
self.move_location(Size::S32, loc_a, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1));
|
||||
tmp1
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_a,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp1),
|
||||
);
|
||||
self.move_location(Size::S64, loc_a, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1));
|
||||
tmp1
|
||||
}
|
||||
_ => {
|
||||
@@ -2944,37 +2772,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
let src2 = match loc_b {
|
||||
Location::SIMD(x) => x,
|
||||
Location::GPR(_) | Location::Memory(_, _) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_b,
|
||||
Location::SIMD(tmp2),
|
||||
);
|
||||
self.move_location(Size::S64, loc_b, Location::SIMD(tmp2));
|
||||
tmp2
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
loc_b,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp2),
|
||||
);
|
||||
self.move_location(Size::S32, loc_b, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2));
|
||||
tmp2
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_b,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp2),
|
||||
);
|
||||
self.move_location(Size::S64, loc_b, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2));
|
||||
tmp2
|
||||
}
|
||||
_ => {
|
||||
@@ -2986,34 +2794,18 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
let tmp_xmm2 = XMM::XMM9;
|
||||
let tmp_xmm3 = XMM::XMM10;
|
||||
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
Location::SIMD(src1),
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
Location::SIMD(src2),
|
||||
Location::GPR(tmpg2),
|
||||
);
|
||||
self.assembler.emit_cmp(
|
||||
Size::S32,
|
||||
Location::GPR(tmpg2),
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.assembler.emit_vminss(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
tmp_xmm1,
|
||||
);
|
||||
self.move_location(Size::S32, Location::SIMD(src1), Location::GPR(tmpg1));
|
||||
self.move_location(Size::S32, Location::SIMD(src2), Location::GPR(tmpg2));
|
||||
self.assembler
|
||||
.emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1));
|
||||
self.assembler
|
||||
.emit_vminss(src1, XMMOrMemory::XMM(src2), tmp_xmm1);
|
||||
let label1 = self.assembler.get_label();
|
||||
let label2 = self.assembler.get_label();
|
||||
self.assembler
|
||||
.emit_jmp(Condition::NotEqual, label1);
|
||||
self.assembler.emit_jmp(Condition::NotEqual, label1);
|
||||
self.assembler
|
||||
.emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2));
|
||||
self.assembler
|
||||
.emit_jmp(Condition::None, label2);
|
||||
self.assembler.emit_jmp(Condition::None, label2);
|
||||
self.emit_label(label1);
|
||||
// load float -0.0
|
||||
self.move_location(
|
||||
@@ -3021,56 +2813,30 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
Location::Imm32(0x8000_0000), // Negative zero
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp_xmm2),
|
||||
);
|
||||
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp_xmm2));
|
||||
self.emit_label(label2);
|
||||
self.assembler.emit_vcmpeqss(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
tmp_xmm3,
|
||||
);
|
||||
self.assembler.emit_vblendvps(
|
||||
tmp_xmm3,
|
||||
XMMOrMemory::XMM(tmp_xmm2),
|
||||
tmp_xmm1,
|
||||
tmp_xmm1,
|
||||
);
|
||||
self.assembler.emit_vcmpunordss(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
src1,
|
||||
);
|
||||
self.assembler
|
||||
.emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3);
|
||||
self.assembler
|
||||
.emit_vblendvps(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1);
|
||||
self.assembler
|
||||
.emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1);
|
||||
// load float canonical nan
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::Imm32(0x7FC0_0000), // Canonical NaN
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(src2),
|
||||
);
|
||||
self.assembler.emit_vblendvps(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
tmp_xmm1,
|
||||
src1,
|
||||
);
|
||||
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2));
|
||||
self.assembler
|
||||
.emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1);
|
||||
match ret {
|
||||
Location::SIMD(x) => {
|
||||
self.assembler
|
||||
.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x));
|
||||
}
|
||||
Location::Memory(_, _) | Location::GPR(_) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::SIMD(src1),
|
||||
ret,
|
||||
);
|
||||
self.move_location(Size::S64, Location::SIMD(src1), ret);
|
||||
}
|
||||
_ => {
|
||||
unreachable!();
|
||||
@@ -3087,7 +2853,6 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
if !self.arch_supports_canonicalize_nan() {
|
||||
self.emit_relaxed_avx(Assembler::emit_vmaxss, loc_a, loc_b, ret);
|
||||
} else {
|
||||
|
||||
let tmp1 = self.acquire_temp_simd().unwrap();
|
||||
let tmp2 = self.acquire_temp_simd().unwrap();
|
||||
let tmpg1 = self.acquire_temp_gpr().unwrap();
|
||||
@@ -3096,37 +2861,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
let src1 = match loc_a {
|
||||
Location::SIMD(x) => x,
|
||||
Location::GPR(_) | Location::Memory(_, _) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_a,
|
||||
Location::SIMD(tmp1),
|
||||
);
|
||||
self.move_location(Size::S64, loc_a, Location::SIMD(tmp1));
|
||||
tmp1
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
loc_a,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp1),
|
||||
);
|
||||
self.move_location(Size::S32, loc_a, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1));
|
||||
tmp1
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_a,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp1),
|
||||
);
|
||||
self.move_location(Size::S64, loc_a, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1));
|
||||
tmp1
|
||||
}
|
||||
_ => {
|
||||
@@ -3136,37 +2881,17 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
let src2 = match loc_b {
|
||||
Location::SIMD(x) => x,
|
||||
Location::GPR(_) | Location::Memory(_, _) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_b,
|
||||
Location::SIMD(tmp2),
|
||||
);
|
||||
self.move_location(Size::S64, loc_b, Location::SIMD(tmp2));
|
||||
tmp2
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
loc_b,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp2),
|
||||
);
|
||||
self.move_location(Size::S32, loc_b, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2));
|
||||
tmp2
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
loc_b,
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(tmp2),
|
||||
);
|
||||
self.move_location(Size::S64, loc_b, Location::GPR(tmpg1));
|
||||
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2));
|
||||
tmp2
|
||||
}
|
||||
_ => {
|
||||
@@ -3178,85 +2903,44 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
let tmp_xmm2 = XMM::XMM9;
|
||||
let tmp_xmm3 = XMM::XMM10;
|
||||
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
Location::SIMD(src1),
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S32,
|
||||
Location::SIMD(src2),
|
||||
Location::GPR(tmpg2),
|
||||
);
|
||||
self.assembler.emit_cmp(
|
||||
Size::S32,
|
||||
Location::GPR(tmpg2),
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.assembler.emit_vmaxss(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
tmp_xmm1,
|
||||
);
|
||||
self.move_location(Size::S32, Location::SIMD(src1), Location::GPR(tmpg1));
|
||||
self.move_location(Size::S32, Location::SIMD(src2), Location::GPR(tmpg2));
|
||||
self.assembler
|
||||
.emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1));
|
||||
self.assembler
|
||||
.emit_vmaxss(src1, XMMOrMemory::XMM(src2), tmp_xmm1);
|
||||
let label1 = self.assembler.get_label();
|
||||
let label2 = self.assembler.get_label();
|
||||
self.assembler
|
||||
.emit_jmp(Condition::NotEqual, label1);
|
||||
self.assembler.emit_jmp(Condition::NotEqual, label1);
|
||||
self.assembler
|
||||
.emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2));
|
||||
self.assembler
|
||||
.emit_jmp(Condition::None, label2);
|
||||
self.assembler.emit_jmp(Condition::None, label2);
|
||||
self.emit_label(label1);
|
||||
self.assembler.emit_vxorps(
|
||||
tmp_xmm2,
|
||||
XMMOrMemory::XMM(tmp_xmm2),
|
||||
tmp_xmm2,
|
||||
);
|
||||
self.assembler
|
||||
.emit_vxorps(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2);
|
||||
self.emit_label(label2);
|
||||
self.assembler.emit_vcmpeqss(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
tmp_xmm3,
|
||||
);
|
||||
self.assembler.emit_vblendvps(
|
||||
tmp_xmm3,
|
||||
XMMOrMemory::XMM(tmp_xmm2),
|
||||
tmp_xmm1,
|
||||
tmp_xmm1,
|
||||
);
|
||||
self.assembler.emit_vcmpunordss(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
src1,
|
||||
);
|
||||
self.assembler
|
||||
.emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3);
|
||||
self.assembler
|
||||
.emit_vblendvps(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1);
|
||||
self.assembler
|
||||
.emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1);
|
||||
// load float canonical nan
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::Imm32(0x7FC0_0000), // Canonical NaN
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::GPR(tmpg1),
|
||||
Location::SIMD(src2),
|
||||
);
|
||||
self.assembler.emit_vblendvps(
|
||||
src1,
|
||||
XMMOrMemory::XMM(src2),
|
||||
tmp_xmm1,
|
||||
src1,
|
||||
);
|
||||
self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2));
|
||||
self.assembler
|
||||
.emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1);
|
||||
match ret {
|
||||
Location::SIMD(x) => {
|
||||
self.assembler
|
||||
.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x));
|
||||
}
|
||||
Location::Memory(_, _) | Location::GPR(_) => {
|
||||
self.move_location(
|
||||
Size::S64,
|
||||
Location::SIMD(src1),
|
||||
ret,
|
||||
);
|
||||
self.move_location(Size::S64, Location::SIMD(src1), ret);
|
||||
}
|
||||
_ => {
|
||||
unreachable!();
|
||||
@@ -3269,6 +2953,18 @@ impl MachineSpecific<GPR, XMM> for MachineX86_64 {
|
||||
self.release_simd(tmp1);
|
||||
}
|
||||
}
|
||||
fn f32_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
|
||||
self.emit_relaxed_avx(Assembler::emit_vaddss, loc_a, loc_b, ret);
|
||||
}
|
||||
fn f32_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
|
||||
self.emit_relaxed_avx(Assembler::emit_vsubss, loc_a, loc_b, ret);
|
||||
}
|
||||
fn f32_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
|
||||
self.emit_relaxed_avx(Assembler::emit_vmulss, loc_a, loc_b, ret);
|
||||
}
|
||||
fn f32_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) {
|
||||
self.emit_relaxed_avx(Assembler::emit_vdivss, loc_a, loc_b, ret);
|
||||
}
|
||||
}
|
||||
|
||||
pub type Machine = AbstractMachine<GPR, XMM, MachineX86_64, X64Register>;
|
||||
|
||||
Reference in New Issue
Block a user