target-sparc/vis_helper.c

   1 /*
   2  * VIS op helpers
   3  *
   4  *  Copyright (c) 2003-2005 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "cpu.h"
  21 #include "helper.h"
  22
  23 /* This function uses non-native bit order */
  24 #define GET_FIELD(X, FROM, TO)                                  \
  25     ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
  26
  27 /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
  28 #define GET_FIELD_SP(X, FROM, TO)               \
  29     GET_FIELD(X, 63 - (TO), 63 - (FROM))
  30
  31 target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
  32 {
  33     return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
  34         (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
  35         (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
  36         (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
  37         (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
  38         (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
  39         (((pixel_addr >> 55) & 1) << 4) |
  40         (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
  41         GET_FIELD_SP(pixel_addr, 11, 12);
  42 }
  43
  44 uint64_t helper_faligndata(CPUState *env, uint64_t src1, uint64_t src2)
  45 {
  46     uint64_t tmp;
  47
  48     tmp = src1 << ((env->gsr & 7) * 8);
  49     /* on many architectures a shift of 64 does nothing */
  50     if ((env->gsr & 7) != 0) {
  51         tmp |= src2 >> (64 - (env->gsr & 7) * 8);
  52     }
  53     return tmp;
  54 }
  55
  56 #ifdef HOST_WORDS_BIGENDIAN
  57 #define VIS_B64(n) b[7 - (n)]
  58 #define VIS_W64(n) w[3 - (n)]
  59 #define VIS_SW64(n) sw[3 - (n)]
  60 #define VIS_L64(n) l[1 - (n)]
  61 #define VIS_B32(n) b[3 - (n)]
  62 #define VIS_W32(n) w[1 - (n)]
  63 #else
  64 #define VIS_B64(n) b[n]
  65 #define VIS_W64(n) w[n]
  66 #define VIS_SW64(n) sw[n]
  67 #define VIS_L64(n) l[n]
  68 #define VIS_B32(n) b[n]
  69 #define VIS_W32(n) w[n]
  70 #endif
  71
  72 typedef union {
  73     uint8_t b[8];
  74     uint16_t w[4];
  75     int16_t sw[4];
  76     uint32_t l[2];
  77     uint64_t ll;
  78     float64 d;
  79 } VIS64;
  80
  81 typedef union {
  82     uint8_t b[4];
  83     uint16_t w[2];
  84     uint32_t l;
  85     float32 f;
  86 } VIS32;
  87
  88 uint64_t helper_fpmerge(uint64_t src1, uint64_t src2)
  89 {
  90     VIS64 s, d;
  91
  92     s.ll = src1;
  93     d.ll = src2;
  94
  95     /* Reverse calculation order to handle overlap */
  96     d.VIS_B64(7) = s.VIS_B64(3);
  97     d.VIS_B64(6) = d.VIS_B64(3);
  98     d.VIS_B64(5) = s.VIS_B64(2);
  99     d.VIS_B64(4) = d.VIS_B64(2);
 100     d.VIS_B64(3) = s.VIS_B64(1);
 101     d.VIS_B64(2) = d.VIS_B64(1);
 102     d.VIS_B64(1) = s.VIS_B64(0);
 103     /* d.VIS_B64(0) = d.VIS_B64(0); */
 104
 105     return d.ll;
 106 }
 107
 108 uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2)
 109 {
 110     VIS64 s, d;
 111     uint32_t tmp;
 112
 113     s.ll = src1;
 114     d.ll = src2;
 115
 116 #define PMUL(r)                                                 \
 117     tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
 118     if ((tmp & 0xff) > 0x7f) {                                  \
 119         tmp += 0x100;                                           \
 120     }                                                           \
 121     d.VIS_W64(r) = tmp >> 8;
 122
 123     PMUL(0);
 124     PMUL(1);
 125     PMUL(2);
 126     PMUL(3);
 127 #undef PMUL
 128
 129     return d.ll;
 130 }
 131
 132 uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2)
 133 {
 134     VIS64 s, d;
 135     uint32_t tmp;
 136
 137     s.ll = src1;
 138     d.ll = src2;
 139
 140 #define PMUL(r)                                                 \
 141     tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
 142     if ((tmp & 0xff) > 0x7f) {                                  \
 143         tmp += 0x100;                                           \
 144     }                                                           \
 145     d.VIS_W64(r) = tmp >> 8;
 146
 147     PMUL(0);
 148     PMUL(1);
 149     PMUL(2);
 150     PMUL(3);
 151 #undef PMUL
 152
 153     return d.ll;
 154 }
 155
 156 uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2)
 157 {
 158     VIS64 s, d;
 159     uint32_t tmp;
 160
 161     s.ll = src1;
 162     d.ll = src2;
 163
 164 #define PMUL(r)                                                 \
 165     tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
 166     if ((tmp & 0xff) > 0x7f) {                                  \
 167         tmp += 0x100;                                           \
 168     }                                                           \
 169     d.VIS_W64(r) = tmp >> 8;
 170
 171     PMUL(0);
 172     PMUL(1);
 173     PMUL(2);
 174     PMUL(3);
 175 #undef PMUL
 176
 177     return d.ll;
 178 }
 179
 180 uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
 181 {
 182     VIS64 s, d;
 183     uint32_t tmp;
 184
 185     s.ll = src1;
 186     d.ll = src2;
 187
 188 #define PMUL(r)                                                         \
 189     tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
 190     if ((tmp & 0xff) > 0x7f) {                                          \
 191         tmp += 0x100;                                                   \
 192     }                                                                   \
 193     d.VIS_W64(r) = tmp >> 8;
 194
 195     PMUL(0);
 196     PMUL(1);
 197     PMUL(2);
 198     PMUL(3);
 199 #undef PMUL
 200
 201     return d.ll;
 202 }
 203
 204 uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
 205 {
 206     VIS64 s, d;
 207     uint32_t tmp;
 208
 209     s.ll = src1;
 210     d.ll = src2;
 211
 212 #define PMUL(r)                                                         \
 213     tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
 214     if ((tmp & 0xff) > 0x7f) {                                          \
 215         tmp += 0x100;                                                   \
 216     }                                                                   \
 217     d.VIS_W64(r) = tmp >> 8;
 218
 219     PMUL(0);
 220     PMUL(1);
 221     PMUL(2);
 222     PMUL(3);
 223 #undef PMUL
 224
 225     return d.ll;
 226 }
 227
 228 uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2)
 229 {
 230     VIS64 s, d;
 231     uint32_t tmp;
 232
 233     s.ll = src1;
 234     d.ll = src2;
 235
 236 #define PMUL(r)                                                         \
 237     tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
 238     if ((tmp & 0xff) > 0x7f) {                                          \
 239         tmp += 0x100;                                                   \
 240     }                                                                   \
 241     d.VIS_L64(r) = tmp;
 242
 243     /* Reverse calculation order to handle overlap */
 244     PMUL(1);
 245     PMUL(0);
 246 #undef PMUL
 247
 248     return d.ll;
 249 }
 250
 251 uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2)
 252 {
 253     VIS64 s, d;
 254     uint32_t tmp;
 255
 256     s.ll = src1;
 257     d.ll = src2;
 258
 259 #define PMUL(r)                                                         \
 260     tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
 261     if ((tmp & 0xff) > 0x7f) {                                          \
 262         tmp += 0x100;                                                   \
 263     }                                                                   \
 264     d.VIS_L64(r) = tmp;
 265
 266     /* Reverse calculation order to handle overlap */
 267     PMUL(1);
 268     PMUL(0);
 269 #undef PMUL
 270
 271     return d.ll;
 272 }
 273
 274 uint64_t helper_fexpand(uint64_t src1, uint64_t src2)
 275 {
 276     VIS32 s;
 277     VIS64 d;
 278
 279     s.l = (uint32_t)src1;
 280     d.ll = src2;
 281     d.VIS_W64(0) = s.VIS_B32(0) << 4;
 282     d.VIS_W64(1) = s.VIS_B32(1) << 4;
 283     d.VIS_W64(2) = s.VIS_B32(2) << 4;
 284     d.VIS_W64(3) = s.VIS_B32(3) << 4;
 285
 286     return d.ll;
 287 }
 288
 289 #define VIS_HELPER(name, F)                             \
 290     uint64_t name##16(uint64_t src1, uint64_t src2)     \
 291     {                                                   \
 292         VIS64 s, d;                                     \
 293                                                         \
 294         s.ll = src1;                                    \
 295         d.ll = src2;                                    \
 296                                                         \
 297         d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
 298         d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
 299         d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
 300         d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
 301                                                         \
 302         return d.ll;                                    \
 303     }                                                   \
 304                                                         \
 305     uint32_t name##16s(uint32_t src1, uint32_t src2)    \
 306     {                                                   \
 307         VIS32 s, d;                                     \
 308                                                         \
 309         s.l = src1;                                     \
 310         d.l = src2;                                     \
 311                                                         \
 312         d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0));   \
 313         d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1));   \
 314                                                         \
 315         return d.l;                                     \
 316     }                                                   \
 317                                                         \
 318     uint64_t name##32(uint64_t src1, uint64_t src2)     \
 319     {                                                   \
 320         VIS64 s, d;                                     \
 321                                                         \
 322         s.ll = src1;                                    \
 323         d.ll = src2;                                    \
 324                                                         \
 325         d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
 326         d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
 327                                                         \
 328         return d.ll;                                    \
 329     }                                                   \
 330                                                         \
 331     uint32_t name##32s(uint32_t src1, uint32_t src2)    \
 332     {                                                   \
 333         VIS32 s, d;                                     \
 334                                                         \
 335         s.l = src1;                                     \
 336         d.l = src2;                                     \
 337                                                         \
 338         d.l = F(d.l, s.l);                              \
 339                                                         \
 340         return d.l;                                     \
 341     }
 342
 343 #define FADD(a, b) ((a) + (b))
 344 #define FSUB(a, b) ((a) - (b))
 345 VIS_HELPER(helper_fpadd, FADD)
 346 VIS_HELPER(helper_fpsub, FSUB)
 347
 348 #define VIS_CMPHELPER(name, F)                                    \
 349     uint64_t name##16(uint64_t src1, uint64_t src2)               \
 350     {                                                             \
 351         VIS64 s, d;                                               \
 352                                                                   \
 353         s.ll = src1;                                              \
 354         d.ll = src2;                                              \
 355                                                                   \
 356         d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
 357         d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
 358         d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
 359         d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
 360         d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
 361                                                                   \
 362         return d.ll;                                              \
 363     }                                                             \
 364                                                                   \
 365     uint64_t name##32(uint64_t src1, uint64_t src2)               \
 366     {                                                             \
 367         VIS64 s, d;                                               \
 368                                                                   \
 369         s.ll = src1;                                              \
 370         d.ll = src2;                                              \
 371                                                                   \
 372         d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
 373         d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
 374         d.VIS_L64(1) = 0;                                         \
 375                                                                   \
 376         return d.ll;                                              \
 377     }
 378
 379 #define FCMPGT(a, b) ((a) > (b))
 380 #define FCMPEQ(a, b) ((a) == (b))
 381 #define FCMPLE(a, b) ((a) <= (b))
 382 #define FCMPNE(a, b) ((a) != (b))
 383
 384 VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
 385 VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
 386 VIS_CMPHELPER(helper_fcmple, FCMPLE)
 387 VIS_CMPHELPER(helper_fcmpne, FCMPNE)
 388
 389 uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
 390 {
 391     int i;
 392     for (i = 0; i < 8; i++) {
 393         int s1, s2;
 394
 395         s1 = (src1 >> (56 - (i * 8))) & 0xff;
 396         s2 = (src2 >> (56 - (i * 8))) & 0xff;
 397
 398         /* Absolute value of difference. */
 399         s1 -= s2;
 400         if (s1 < 0) {
 401             s1 = -s1;
 402         }
 403
 404         sum += s1;
 405     }
 406
 407     return sum;
 408 }
 409
 410 uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2)
 411 {
 412     int scale = (gsr >> 3) & 0xf;
 413     uint32_t ret = 0;
 414     int byte;
 415
 416     for (byte = 0; byte < 4; byte++) {
 417         uint32_t val;
 418         int16_t src = rs2 >> (byte * 16);
 419         int32_t scaled = src << scale;
 420         int32_t from_fixed = scaled >> 7;
 421
 422         val = (from_fixed < 0 ?  0 :
 423                from_fixed > 255 ?  255 : from_fixed);
 424
 425         ret |= val << (8 * byte);
 426     }
 427
 428     return ret;
 429 }
 430
 431 uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2)
 432 {
 433     int scale = (gsr >> 3) & 0x1f;
 434     uint64_t ret = 0;
 435     int word;
 436
 437     ret = (rs1 << 8) & ~(0x000000ff000000ffULL);
 438     for (word = 0; word < 2; word++) {
 439         uint64_t val;
 440         int32_t src = rs2 >> (word * 32);
 441         int64_t scaled = (int64_t)src << scale;
 442         int64_t from_fixed = scaled >> 23;
 443
 444         val = (from_fixed < 0 ? 0 :
 445                (from_fixed > 255) ? 255 : from_fixed);
 446
 447         ret |= val << (32 * word);
 448     }
 449
 450     return ret;
 451 }
 452
 453 uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
 454 {
 455     int scale = (gsr >> 3) & 0x1f;
 456     uint32_t ret = 0;
 457     int word;
 458
 459     for (word = 0; word < 2; word++) {
 460         uint32_t val;
 461         int32_t src = rs2 >> (word * 32);
 462         int64_t scaled = src << scale;
 463         int64_t from_fixed = scaled >> 16;
 464
 465         val = (from_fixed < -32768 ? -32768 :
 466                from_fixed > 32767 ?  32767 : from_fixed);
 467
 468         ret |= (val & 0xffff) << (word * 16);
 469     }
 470
 471     return ret;
 472 }
 473
 474 uint64 helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
 475 {
 476     union {
 477         uint64_t ll[2];
 478         uint8_t b[16];
 479     } s;
 480     VIS64 r;
 481     uint32_t i, mask, host;
 482
 483     /* Set up S such that we can index across all of the bytes.  */
 484 #ifdef HOST_WORDS_BIGENDIAN
 485     s.ll[0] = src1;
 486     s.ll[1] = src2;
 487     host = 0;
 488 #else
 489     s.ll[1] = src1;
 490     s.ll[0] = src2;
 491     host = 15;
 492 #endif
 493     mask = gsr >> 32;
 494
 495     for (i = 0; i < 8; ++i) {
 496         unsigned e = (mask >> (28 - i*4)) & 0xf;
 497         r.VIS_B64(i) = s.b[e ^ host];
 498     }
 499
 500     return r.ll;
 501 }