From 5c6562c204699616766b0d50c9d8a84d4a029b77 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 16 Sep 2011 17:29:04 +0200 Subject: [PATCH] target-i386: Fix several SSE3 instructions. haddp[sd], hsubp[sd] and addsubp[sd] operate on floats, thus it is necessary to use the appropriate floating point calculation functions. If this is not done, those functions operate merely on integers, which is not correct. Signed-off-by: Max Reitz Signed-off-by: Blue Swirl --- target-i386/ops_sse.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h index 703be99cd..aa41d2596 100644 --- a/target-i386/ops_sse.h +++ b/target-i386/ops_sse.h @@ -859,51 +859,51 @@ void helper_insertq_i(XMMReg *d, int index, int length) void helper_haddps(XMMReg *d, XMMReg *s) { XMMReg r; - r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1); - r.XMM_S(1) = d->XMM_S(2) + d->XMM_S(3); - r.XMM_S(2) = s->XMM_S(0) + s->XMM_S(1); - r.XMM_S(3) = s->XMM_S(2) + s->XMM_S(3); + r.XMM_S(0) = float32_add(d->XMM_S(0), d->XMM_S(1), &env->sse_status); + r.XMM_S(1) = float32_add(d->XMM_S(2), d->XMM_S(3), &env->sse_status); + r.XMM_S(2) = float32_add(s->XMM_S(0), s->XMM_S(1), &env->sse_status); + r.XMM_S(3) = float32_add(s->XMM_S(2), s->XMM_S(3), &env->sse_status); *d = r; } void helper_haddpd(XMMReg *d, XMMReg *s) { XMMReg r; - r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1); - r.XMM_D(1) = s->XMM_D(0) + s->XMM_D(1); + r.XMM_D(0) = float64_add(d->XMM_D(0), d->XMM_D(1), &env->sse_status); + r.XMM_D(1) = float64_add(s->XMM_D(0), s->XMM_D(1), &env->sse_status); *d = r; } void helper_hsubps(XMMReg *d, XMMReg *s) { XMMReg r; - r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1); - r.XMM_S(1) = d->XMM_S(2) - d->XMM_S(3); - r.XMM_S(2) = s->XMM_S(0) - s->XMM_S(1); - r.XMM_S(3) = s->XMM_S(2) - s->XMM_S(3); + r.XMM_S(0) = float32_sub(d->XMM_S(0), d->XMM_S(1), &env->sse_status); + r.XMM_S(1) = float32_sub(d->XMM_S(2), d->XMM_S(3), &env->sse_status); + r.XMM_S(2) = float32_sub(s->XMM_S(0), s->XMM_S(1), &env->sse_status); + r.XMM_S(3) = float32_sub(s->XMM_S(2), s->XMM_S(3), &env->sse_status); *d = r; } void helper_hsubpd(XMMReg *d, XMMReg *s) { XMMReg r; - r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1); - r.XMM_D(1) = s->XMM_D(0) - s->XMM_D(1); + r.XMM_D(0) = float64_sub(d->XMM_D(0), d->XMM_D(1), &env->sse_status); + r.XMM_D(1) = float64_sub(s->XMM_D(0), s->XMM_D(1), &env->sse_status); *d = r; } void helper_addsubps(XMMReg *d, XMMReg *s) { - d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0); - d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1); - d->XMM_S(2) = d->XMM_S(2) - s->XMM_S(2); - d->XMM_S(3) = d->XMM_S(3) + s->XMM_S(3); + d->XMM_S(0) = float32_sub(d->XMM_S(0), s->XMM_S(0), &env->sse_status); + d->XMM_S(1) = float32_add(d->XMM_S(1), s->XMM_S(1), &env->sse_status); + d->XMM_S(2) = float32_sub(d->XMM_S(2), s->XMM_S(2), &env->sse_status); + d->XMM_S(3) = float32_add(d->XMM_S(3), s->XMM_S(3), &env->sse_status); } void helper_addsubpd(XMMReg *d, XMMReg *s) { - d->XMM_D(0) = d->XMM_D(0) - s->XMM_D(0); - d->XMM_D(1) = d->XMM_D(1) + s->XMM_D(1); + d->XMM_D(0) = float64_sub(d->XMM_D(0), s->XMM_D(0), &env->sse_status); + d->XMM_D(1) = float64_add(d->XMM_D(1), s->XMM_D(1), &env->sse_status); } /* XXX: unordered */ -- 2.39.2