From 6fa702c8aa2e0ad477df98ce0bf7bcce679aedee Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 30 Jul 2018 18:00:34 -0500 Subject: [PATCH] Implement {min,max,div}{p,s}{s,d} sse instructions (#57) --- gen/x86_table.js | 27 +++++--- src/native/instructions_0f.c | 131 ++++++++++++++++++++++++++++++++++- src/native/sse_instr.c | 12 ++++ src/native/sse_instr.h | 2 + tests/qemu/test-i386.c | 16 ++--- 5 files changed, 168 insertions(+), 20 deletions(-) diff --git a/gen/x86_table.js b/gen/x86_table.js index 46a2b403..ae4b412c 100644 --- a/gen/x86_table.js +++ b/gen/x86_table.js @@ -531,25 +531,34 @@ const encodings = [ { sse: 1, opcode: 0x660F57, e: 1 }, { sse: 1, opcode: 0x0F58, e: 1, }, - { sse: 1, opcode: 0x660F58, e: 1, }, + { sse: 1, opcode: 0x660F58, e: 1, }, { sse: 1, opcode: 0xF20F58, e: 1, }, { sse: 1, opcode: 0xF30F58, e: 1, }, - { sse: 1, opcode: 0x0F59, e: 1, }, - { sse: 1, opcode: 0x660F59, e: 1, }, + { sse: 1, opcode: 0x660F59, e: 1, }, { sse: 1, opcode: 0xF20F59, e: 1, }, { sse: 1, opcode: 0xF30F59, e: 1, }, - { sse: 1, opcode: 0x0F5A, skip: 1 }, - { sse: 1, opcode: 0x0F5B, skip: 1 }, + { sse: 1, opcode: 0x0F5A, skip: 1, }, + { sse: 1, opcode: 0x0F5B, skip: 1, }, { sse: 1, opcode: 0x0F5C, e: 1, }, - { sse: 1, opcode: 0x660F5C, e: 1, }, + { sse: 1, opcode: 0x660F5C, e: 1, }, { sse: 1, opcode: 0xF20F5C, e: 1, }, { sse: 1, opcode: 0xF30F5C, e: 1, }, - { sse: 1, opcode: 0x0F5D, skip: 1, }, - { sse: 1, opcode: 0x0F5E, skip: 1, }, - { sse: 1, opcode: 0x0F5F, skip: 1, }, + { sse: 1, opcode: 0x0F5D, e: 1, }, + { sse: 1, opcode: 0x660F5D, e: 1, }, + { sse: 1, opcode: 0xF20F5D, e: 1, }, + { sse: 1, opcode: 0xF30F5D, e: 1, }, + + { sse: 1, opcode: 0x0F5E, e: 1, }, + { sse: 1, opcode: 0x660F5E, e: 1, }, + { sse: 1, opcode: 0xF20F5E, e: 1, }, + { sse: 1, opcode: 0xF30F5E, e: 1, }, + { sse: 1, opcode: 0x0F5F, e: 1, }, + { sse: 1, opcode: 0x660F5F, e: 1, }, + { sse: 1, opcode: 0xF20F5F, e: 1, }, + { sse: 1, opcode: 0xF30F5F, e: 1, }, { sse: 1, opcode: 0x660F60, e: 1 }, { sse: 1, opcode: 0x0F60, e: 1 }, diff --git a/src/native/instructions_0f.c b/src/native/instructions_0f.c index da5e1079..b2efafc1 100644 --- a/src/native/instructions_0f.c +++ b/src/native/instructions_0f.c @@ -1294,9 +1294,134 @@ void instr_F30F5C(float_t source, int32_t r) { } DEFINE_SSE_SPLIT(instr_F30F5C, fpu_load_m32, read_xmm_f32) -void instr_0F5D() { unimplemented_sse(); } -void instr_0F5E() { unimplemented_sse(); } -void instr_0F5F() { unimplemented_sse(); } +void instr_0F5D(union reg128 source, int32_t r) { + // minps xmm, xmm/mem128 + union reg128 destination = read_xmm128s(r); + union reg128 result = { + .f32 = { + sse_min(destination.f32[0], source.f32[0]), + sse_min(destination.f32[1], source.f32[1]), + sse_min(destination.f32[2], source.f32[2]), + sse_min(destination.f32[3], source.f32[3]), + } + }; + write_xmm_reg128(r, result); +} +DEFINE_SSE_SPLIT(instr_0F5D, safe_read128s, read_xmm128s) +void instr_660F5D(union reg128 source, int32_t r) { + // minpd xmm, xmm/mem128 + union reg128 destination = read_xmm128s(r); + union reg128 result = { + .f64 = { + sse_min(destination.f64[0], source.f64[0]), + sse_min(destination.f64[1], source.f64[1]), + } + }; + write_xmm_reg128(r, result); +} +DEFINE_SSE_SPLIT(instr_660F5D, safe_read128s, read_xmm128s) +void instr_F20F5D(union reg64 source, int32_t r) { + // minsd xmm, xmm/mem64 + union reg64 destination = read_xmm64s(r); + union reg64 result = { + .f64 = { sse_min(destination.f64[0], source.f64[0]), } + }; + write_xmm64(r, result); +} +DEFINE_SSE_SPLIT(instr_F20F5D, safe_read64s, read_xmm64s) +void instr_F30F5D(float_t source, int32_t r) { + // minss xmm, xmm/mem32 + float_t destination = read_xmm_f32(r); + float result = sse_min(destination, source); + write_xmm_f32(r, result); +} +DEFINE_SSE_SPLIT(instr_F30F5D, fpu_load_m32, read_xmm_f32) + +void instr_0F5E(union reg128 source, int32_t r) { + // divps xmm, xmm/mem128 + union reg128 destination = read_xmm128s(r); + union reg128 result = { + .f32 = { + destination.f32[0] / source.f32[0], + destination.f32[1] / source.f32[1], + destination.f32[2] / source.f32[2], + destination.f32[3] / source.f32[3], + } + }; + write_xmm_reg128(r, result); +} +DEFINE_SSE_SPLIT(instr_0F5E, safe_read128s, read_xmm128s) +void instr_660F5E(union reg128 source, int32_t r) { + // divpd xmm, xmm/mem128 + union reg128 destination = read_xmm128s(r); + union reg128 result = { + .f64 = { + destination.f64[0] / source.f64[0], + destination.f64[1] / source.f64[1], + } + }; + write_xmm_reg128(r, result); +} +DEFINE_SSE_SPLIT(instr_660F5E, safe_read128s, read_xmm128s) +void instr_F20F5E(union reg64 source, int32_t r) { + // divsd xmm, xmm/mem64 + union reg64 destination = read_xmm64s(r); + union reg64 result = { + .f64 = { destination.f64[0] / source.f64[0], } + }; + write_xmm64(r, result); +} +DEFINE_SSE_SPLIT(instr_F20F5E, safe_read64s, read_xmm64s) +void instr_F30F5E(float_t source, int32_t r) { + // divss xmm, xmm/mem32 + float_t destination = read_xmm_f32(r); + float result = destination / source; + write_xmm_f32(r, result); +} +DEFINE_SSE_SPLIT(instr_F30F5E, fpu_load_m32, read_xmm_f32) + +void instr_0F5F(union reg128 source, int32_t r) { + // maxps xmm, xmm/mem128 + union reg128 destination = read_xmm128s(r); + union reg128 result = { + .f32 = { + sse_max(destination.f32[0], source.f32[0]), + sse_max(destination.f32[1], source.f32[1]), + sse_max(destination.f32[2], source.f32[2]), + sse_max(destination.f32[3], source.f32[3]), + } + }; + write_xmm_reg128(r, result); +} +DEFINE_SSE_SPLIT(instr_0F5F, safe_read128s, read_xmm128s) +void instr_660F5F(union reg128 source, int32_t r) { + // maxpd xmm, xmm/mem128 + union reg128 destination = read_xmm128s(r); + union reg128 result = { + .f64 = { + sse_max(destination.f64[0], source.f64[0]), + sse_max(destination.f64[1], source.f64[1]), + } + }; + write_xmm_reg128(r, result); +} +DEFINE_SSE_SPLIT(instr_660F5F, safe_read128s, read_xmm128s) +void instr_F20F5F(union reg64 source, int32_t r) { + // maxsd xmm, xmm/mem64 + union reg64 destination = read_xmm64s(r); + union reg64 result = { + .f64 = { sse_max(destination.f64[0], source.f64[0]), } + }; + write_xmm64(r, result); +} +DEFINE_SSE_SPLIT(instr_F20F5F, safe_read64s, read_xmm64s) +void instr_F30F5F(float_t source, int32_t r) { + // maxss xmm, xmm/mem32 + float_t destination = read_xmm_f32(r); + float result = sse_max(destination, source); + write_xmm_f32(r, result); +} +DEFINE_SSE_SPLIT(instr_F30F5F, fpu_load_m32, read_xmm_f32) void instr_0F60(int32_t source, int32_t r) { diff --git a/src/native/sse_instr.c b/src/native/sse_instr.c index aa2bba5c..82255ae9 100644 --- a/src/native/sse_instr.c +++ b/src/native/sse_instr.c @@ -417,3 +417,15 @@ bool sse_comparison(int32_t op, double_t x, double_t y) assert(false); } + +double_t sse_min(double_t x, double_t y) +{ + // if both x and y are 0 or x is nan, y is returned + return x < y ? x : y; +} + +double_t sse_max(double_t x, double_t y) +{ + // if both x and y are 0 or x is nan, y is returned + return x > y ? x : y; +} diff --git a/src/native/sse_instr.h b/src/native/sse_instr.h index 5ab65359..9c974caf 100644 --- a/src/native/sse_instr.h +++ b/src/native/sse_instr.h @@ -33,3 +33,5 @@ void psrlq_r128(int32_t r, uint32_t shift); void psllq_r128(int32_t r, uint32_t shift); bool sse_comparison(int32_t op, double_t x, double_t y); +double_t sse_min(double_t x, double_t y); +double_t sse_max(double_t x, double_t y); diff --git a/tests/qemu/test-i386.c b/tests/qemu/test-i386.c index 2e08a0ed..feb4c02e 100644 --- a/tests/qemu/test-i386.c +++ b/tests/qemu/test-i386.c @@ -2821,10 +2821,10 @@ void test_sse(void) SSE_OPS(add); SSE_OPS(mul); SSE_OPS(sub); - //SSE_OPS(min); - //SSE_OPS(div); - //SSE_OPS(max); - //SSE_OPS(sqrt); + SSE_OPS(min); + SSE_OPS(div); + SSE_OPS(max); + SSE_OPS(sqrt); SSE_OPS(cmpeq); SSE_OPS(cmplt); SSE_OPS(cmple); @@ -2846,10 +2846,10 @@ void test_sse(void) SSE_OPD(add); SSE_OPD(mul); SSE_OPD(sub); - //SSE_OPD(min); - //SSE_OPD(div); - //SSE_OPD(max); - //SSE_OPD(sqrt); + SSE_OPD(min); + SSE_OPD(div); + SSE_OPD(max); + SSE_OPD(sqrt); SSE_OPD(cmpeq); SSE_OPD(cmplt); SSE_OPD(cmple);