diff --git a/gen/x86_table.js b/gen/x86_table.js index c540d517..cbfb51fe 100644 --- a/gen/x86_table.js +++ b/gen/x86_table.js @@ -670,10 +670,10 @@ const encodings = [ { sse: 1, opcode: 0x0F50, mem_ud: 1, e: 1 }, { sse: 1, opcode: 0x660F50, mem_ud: 1, e: 1 }, - { sse: 1, opcode: 0x0F51, e: 1 }, - { sse: 1, opcode: 0x660F51, e: 1 }, - { sse: 1, opcode: 0xF20F51, e: 1 }, - { sse: 1, opcode: 0xF30F51, e: 1 }, + { sse: 1, opcode: 0x0F51, e: 1, custom: 1 }, + { sse: 1, opcode: 0x660F51, e: 1, custom: 1 }, + { sse: 1, opcode: 0xF20F51, e: 1, custom: 1 }, + { sse: 1, opcode: 0xF30F51, e: 1, custom: 1 }, // approximation of 1/sqrt(x). Skipped because our approximation doesn't match intel's { sse: 1, opcode: 0x0F52, e: 1, skip: 1, }, diff --git a/src/rust/cpu/instructions_0f.rs b/src/rust/cpu/instructions_0f.rs index db890f03..bef530af 100644 --- a/src/rust/cpu/instructions_0f.rs +++ b/src/rust/cpu/instructions_0f.rs @@ -1577,6 +1577,7 @@ pub unsafe fn instr_660F50_reg(r1: i32, r2: i32) { #[no_mangle] pub unsafe fn instr_660F50_mem(_addr: i32, _r1: i32) { trigger_ud(); } +#[no_mangle] pub unsafe fn instr_0F51(source: reg128, r: i32) { // sqrtps xmm, xmm/mem128 // XXX: Should round according to round control @@ -1590,12 +1591,11 @@ pub unsafe fn instr_0F51(source: reg128, r: i32) { }; write_xmm_reg128(r, result); } -#[no_mangle] pub unsafe fn instr_0F51_reg(r1: i32, r2: i32) { instr_0F51(read_xmm128s(r1), r2); } -#[no_mangle] pub unsafe fn instr_0F51_mem(addr: i32, r: i32) { instr_0F51(return_on_pagefault!(safe_read128s(addr)), r); } +#[no_mangle] pub unsafe fn instr_660F51(source: reg128, r: i32) { // sqrtpd xmm, xmm/mem128 // XXX: Should round according to round control @@ -1604,31 +1604,27 @@ pub unsafe fn instr_660F51(source: reg128, r: i32) { }; write_xmm_reg128(r, result); } -#[no_mangle] pub unsafe fn instr_660F51_reg(r1: i32, r2: i32) { instr_660F51(read_xmm128s(r1), r2); } -#[no_mangle] pub unsafe fn instr_660F51_mem(addr: i32, r: i32) { instr_660F51(return_on_pagefault!(safe_read128s(addr)), r); } +#[no_mangle] pub unsafe fn instr_F20F51(source: u64, r: i32) { // sqrtsd xmm, xmm/mem64 // XXX: Should round according to round control write_xmm_f64(r, f64::from_bits(source).sqrt()); } -#[no_mangle] pub unsafe fn instr_F20F51_reg(r1: i32, r2: i32) { instr_F20F51(read_xmm64s(r1), r2); } -#[no_mangle] pub unsafe fn instr_F20F51_mem(addr: i32, r: i32) { instr_F20F51(return_on_pagefault!(safe_read64s(addr)), r); } +#[no_mangle] pub unsafe fn instr_F30F51(source: f32, r: i32) { // sqrtss xmm, xmm/mem32 // XXX: Should round according to round control write_xmm_f32(r, source.sqrt()); } -#[no_mangle] pub unsafe fn instr_F30F51_reg(r1: i32, r2: i32) { instr_F30F51(read_xmm_f32(r1), r2); } -#[no_mangle] pub unsafe fn instr_F30F51_mem(addr: i32, r: i32) { instr_F30F51(return_on_pagefault!(safe_read_f32(addr)), r); } diff --git a/src/rust/jit_instructions.rs b/src/rust/jit_instructions.rs index 1e77f0ea..718fa17b 100644 --- a/src/rust/jit_instructions.rs +++ b/src/rust/jit_instructions.rs @@ -114,6 +114,20 @@ pub fn instr_F3_jit(ctx: &mut JitContext, instr_flags: &mut u32) { jit_handle_prefix(ctx, instr_flags) } +fn sse_read_f32_xmm_mem(ctx: &mut JitContext, name: &str, modrm_byte: ModrmByte, r: u32) { + codegen::gen_modrm_resolve_safe_read32(ctx, modrm_byte); + ctx.builder.reinterpret_i32_as_f32(); + ctx.builder.const_i32(r as i32); + ctx.builder.call_fn2_f32_i32(name); +} +fn sse_read_f32_xmm_xmm(ctx: &mut JitContext, name: &str, r1: u32, r2: u32) { + ctx.builder + .const_i32(global_pointers::get_reg_xmm_offset(r1) as i32); + ctx.builder.load_aligned_f32(0); + ctx.builder.const_i32(r2 as i32); + ctx.builder.call_fn2_f32_i32(name); +} + fn sse_read64_xmm_mem(ctx: &mut JitContext, name: &str, modrm_byte: ModrmByte, r: u32) { codegen::gen_modrm_resolve_safe_read64(ctx, modrm_byte); ctx.builder.const_i32(r as i32); @@ -5131,6 +5145,31 @@ pub fn instr_F30F2D_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) { codegen::gen_set_reg32(ctx, r2); } +pub fn instr_0F51_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32) { + sse_read128_xmm_mem(ctx, "instr_0F51", modrm_byte, r); +} +pub fn instr_0F51_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) { + sse_read128_xmm_xmm(ctx, "instr_0F51", r1, r2); +} +pub fn instr_660F51_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32) { + sse_read128_xmm_mem(ctx, "instr_660F51", modrm_byte, r); +} +pub fn instr_660F51_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) { + sse_read128_xmm_xmm(ctx, "instr_660F51", r1, r2); +} +pub fn instr_F20F51_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32) { + sse_read64_xmm_mem(ctx, "instr_F20F51", modrm_byte, r); +} +pub fn instr_F20F51_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) { + sse_read64_xmm_xmm(ctx, "instr_F20F51", r1, r2); +} +pub fn instr_F30F51_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32) { + sse_read_f32_xmm_mem(ctx, "instr_F30F51", modrm_byte, r); +} +pub fn instr_F30F51_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) { + sse_read_f32_xmm_xmm(ctx, "instr_F30F51", r1, r2); +} + pub fn instr_0F60_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32) { mmx_read64_mm_mem32(ctx, "instr_0F60", modrm_byte, r); } diff --git a/src/rust/wasmgen/wasm_builder.rs b/src/rust/wasmgen/wasm_builder.rs index a9f225b5..26cfce4d 100644 --- a/src/rust/wasmgen/wasm_builder.rs +++ b/src/rust/wasmgen/wasm_builder.rs @@ -28,6 +28,7 @@ enum FunctionType { FN2_I64_I32_TYPE_INDEX, FN2_I64_I32_RET_TYPE_INDEX, FN2_I64_I32_RET_I64_TYPE_INDEX, + FN2_F32_I32_TYPE_INDEX, FN3_RET_TYPE_INDEX, @@ -347,6 +348,13 @@ impl WasmBuilder { self.output.push(1); self.output.push(op::TYPE_I64); }, + FunctionType::FN2_F32_I32_TYPE_INDEX => { + self.output.push(op::TYPE_FUNC); + self.output.push(2); + self.output.push(op::TYPE_F32); + self.output.push(op::TYPE_I32); + self.output.push(0); + }, FunctionType::FN3_RET_TYPE_INDEX => { self.output.push(op::TYPE_FUNC); self.output.push(3); @@ -942,6 +950,9 @@ impl WasmBuilder { pub fn call_fn2_i64_i32_ret_i64(&mut self, name: &str) { self.call_fn(name, FunctionType::FN2_I64_I32_RET_I64_TYPE_INDEX) } + pub fn call_fn2_f32_i32(&mut self, name: &str) { + self.call_fn(name, FunctionType::FN2_F32_I32_TYPE_INDEX) + } pub fn call_fn2_ret(&mut self, name: &str) { self.call_fn(name, FunctionType::FN2_RET_TYPE_INDEX) }