arch/ppc64/kernel/vecemu.c

   1 /*
   2  * Routines to emulate some Altivec/VMX instructions, specifically
   3  * those that can trap when given denormalized operands in Java mode.
   4  */
   5 #include <linux/kernel.h>
   6 #include <linux/errno.h>
   7 #include <linux/sched.h>
   8 #include <asm/ptrace.h>
   9 #include <asm/processor.h>
  10 #include <asm/uaccess.h>
  11
  12 /* Functions in vector.S */
  13 extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
  14 extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
  15 extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  16 extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  17 extern void vrefp(vector128 *dst, vector128 *src);
  18 extern void vrsqrtefp(vector128 *dst, vector128 *src);
  19 extern void vexptep(vector128 *dst, vector128 *src);
  20
  21 static unsigned int exp2s[8] = {
  22         0x800000,
  23         0x8b95c2,
  24         0x9837f0,
  25         0xa5fed7,
  26         0xb504f3,
  27         0xc5672a,
  28         0xd744fd,
  29         0xeac0c7
  30 };
  31
  32 /*
  33  * Computes an estimate of 2^x.  The `s' argument is the 32-bit
  34  * single-precision floating-point representation of x.
  35  */
  36 static unsigned int eexp2(unsigned int s)
  37 {
  38         int exp, pwr;
  39         unsigned int mant, frac;
  40
  41         /* extract exponent field from input */
  42         exp = ((s >> 23) & 0xff) - 127;
  43         if (exp > 7) {
  44                 /* check for NaN input */
  45                 if (exp == 128 && (s & 0x7fffff) != 0)
  46                         return s | 0x400000;    /* return QNaN */
  47                 /* 2^-big = 0, 2^+big = +Inf */
  48                 return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
  49         }
  50         if (exp < -23)
  51                 return 0x3f800000;      /* 1.0 */
  52
  53         /* convert to fixed point integer in 9.23 representation */
  54         pwr = (s & 0x7fffff) | 0x800000;
  55         if (exp > 0)
  56                 pwr <<= exp;
  57         else
  58                 pwr >>= -exp;
  59         if (s & 0x80000000)
  60                 pwr = -pwr;
  61
  62         /* extract integer part, which becomes exponent part of result */
  63         exp = (pwr >> 23) + 126;
  64         if (exp >= 254)
  65                 return 0x7f800000;
  66         if (exp < -23)
  67                 return 0;
  68
  69         /* table lookup on top 3 bits of fraction to get mantissa */
  70         mant = exp2s[(pwr >> 20) & 7];
  71
  72         /* linear interpolation using remaining 20 bits of fraction */
  73         asm("mulhwu %0,%1,%2" : "=r" (frac)
  74             : "r" (pwr << 12), "r" (0x172b83ff));
  75         asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
  76         mant += frac;
  77
  78         if (exp >= 0)
  79                 return mant + (exp << 23);
  80
  81         /* denormalized result */
  82         exp = -exp;
  83         mant += 1 << (exp - 1);
  84         return mant >> exp;
  85 }
  86
  87 /*
  88  * Computes an estimate of log_2(x).  The `s' argument is the 32-bit
  89  * single-precision floating-point representation of x.
  90  */
  91 static unsigned int elog2(unsigned int s)
  92 {
  93         int exp, mant, lz, frac;
  94
  95         exp = s & 0x7f800000;
  96         mant = s & 0x7fffff;
  97         if (exp == 0x7f800000) {        /* Inf or NaN */
  98                 if (mant != 0)
  99                         s |= 0x400000;  /* turn NaN into QNaN */
 100                 return s;
 101         }
 102         if ((exp | mant) == 0)          /* +0 or -0 */
 103                 return 0xff800000;      /* return -Inf */
 104
 105         if (exp == 0) {
 106                 /* denormalized */
 107                 asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
 108                 mant <<= lz - 8;
 109                 exp = (-118 - lz) << 23;
 110         } else {
 111                 mant |= 0x800000;
 112                 exp -= 127 << 23;
 113         }
 114
 115         if (mant >= 0xb504f3) {                         /* 2^0.5 * 2^23 */
 116                 exp |= 0x400000;                        /* 0.5 * 2^23 */
 117                 asm("mulhwu %0,%1,%2" : "=r" (mant)
 118                     : "r" (mant), "r" (0xb504f334));    /* 2^-0.5 * 2^32 */
 119         }
 120         if (mant >= 0x9837f0) {                         /* 2^0.25 * 2^23 */
 121                 exp |= 0x200000;                        /* 0.25 * 2^23 */
 122                 asm("mulhwu %0,%1,%2" : "=r" (mant)
 123                     : "r" (mant), "r" (0xd744fccb));    /* 2^-0.25 * 2^32 */
 124         }
 125         if (mant >= 0x8b95c2) {                         /* 2^0.125 * 2^23 */
 126                 exp |= 0x100000;                        /* 0.125 * 2^23 */
 127                 asm("mulhwu %0,%1,%2" : "=r" (mant)
 128                     : "r" (mant), "r" (0xeac0c6e8));    /* 2^-0.125 * 2^32 */
 129         }
 130         if (mant > 0x800000) {                          /* 1.0 * 2^23 */
 131                 /* calculate (mant - 1) * 1.381097463 */
 132                 /* 1.381097463 == 0.125 / (2^0.125 - 1) */
 133                 asm("mulhwu %0,%1,%2" : "=r" (frac)
 134                     : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
 135                 exp += frac;
 136         }
 137         s = exp & 0x80000000;
 138         if (exp != 0) {
 139                 if (s)
 140                         exp = -exp;
 141                 asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
 142                 lz = 8 - lz;
 143                 if (lz > 0)
 144                         exp >>= lz;
 145                 else if (lz < 0)
 146                         exp <<= -lz;
 147                 s += ((lz + 126) << 23) + exp;
 148         }
 149         return s;
 150 }
 151
 152 #define VSCR_SAT        1
 153
 154 static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
 155 {
 156         int exp, mant;
 157
 158         exp = (x >> 23) & 0xff;
 159         mant = x & 0x7fffff;
 160         if (exp == 255 && mant != 0)
 161                 return 0;               /* NaN -> 0 */
 162         exp = exp - 127 + scale;
 163         if (exp < 0)
 164                 return 0;               /* round towards zero */
 165         if (exp >= 31) {
 166                 /* saturate, unless the result would be -2^31 */
 167                 if (x + (scale << 23) != 0xcf000000)
 168                         *vscrp |= VSCR_SAT;
 169                 return (x & 0x80000000)? 0x80000000: 0x7fffffff;
 170         }
 171         mant |= 0x800000;
 172         mant = (mant << 7) >> (30 - exp);
 173         return (x & 0x80000000)? -mant: mant;
 174 }
 175
 176 static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
 177 {
 178         int exp;
 179         unsigned int mant;
 180
 181         exp = (x >> 23) & 0xff;
 182         mant = x & 0x7fffff;
 183         if (exp == 255 && mant != 0)
 184                 return 0;               /* NaN -> 0 */
 185         exp = exp - 127 + scale;
 186         if (exp < 0)
 187                 return 0;               /* round towards zero */
 188         if (x & 0x80000000) {
 189                 /* negative => saturate to 0 */
 190                 *vscrp |= VSCR_SAT;
 191                 return 0;
 192         }
 193         if (exp >= 32) {
 194                 /* saturate */
 195                 *vscrp |= VSCR_SAT;
 196                 return 0xffffffff;
 197         }
 198         mant |= 0x800000;
 199         mant = (mant << 8) >> (31 - exp);
 200         return mant;
 201 }
 202
 203 /* Round to floating integer, towards 0 */
 204 static unsigned int rfiz(unsigned int x)
 205 {
 206         int exp;
 207
 208         exp = ((x >> 23) & 0xff) - 127;
 209         if (exp == 128 && (x & 0x7fffff) != 0)
 210                 return x | 0x400000;    /* NaN -> make it a QNaN */
 211         if (exp >= 23)
 212                 return x;               /* it's an integer already (or Inf) */
 213         if (exp < 0)
 214                 return x & 0x80000000;  /* |x| < 1.0 rounds to 0 */
 215         return x & ~(0x7fffff >> exp);
 216 }
 217
 218 /* Round to floating integer, towards +/- Inf */
 219 static unsigned int rfii(unsigned int x)
 220 {
 221         int exp, mask;
 222
 223         exp = ((x >> 23) & 0xff) - 127;
 224         if (exp == 128 && (x & 0x7fffff) != 0)
 225                 return x | 0x400000;    /* NaN -> make it a QNaN */
 226         if (exp >= 23)
 227                 return x;               /* it's an integer already (or Inf) */
 228         if ((x & 0x7fffffff) == 0)
 229                 return x;               /* +/-0 -> +/-0 */
 230         if (exp < 0)
 231                 /* 0 < |x| < 1.0 rounds to +/- 1.0 */
 232                 return (x & 0x80000000) | 0x3f800000;
 233         mask = 0x7fffff >> exp;
 234         /* mantissa overflows into exponent - that's OK,
 235            it can't overflow into the sign bit */
 236         return (x + mask) & ~mask;
 237 }
 238
 239 /* Round to floating integer, to nearest */
 240 static unsigned int rfin(unsigned int x)
 241 {
 242         int exp, half;
 243
 244         exp = ((x >> 23) & 0xff) - 127;
 245         if (exp == 128 && (x & 0x7fffff) != 0)
 246                 return x | 0x400000;    /* NaN -> make it a QNaN */
 247         if (exp >= 23)
 248                 return x;               /* it's an integer already (or Inf) */
 249         if (exp < -1)
 250                 return x & 0x80000000;  /* |x| < 0.5 -> +/-0 */
 251         if (exp == -1)
 252                 /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
 253                 return (x & 0x80000000) | 0x3f800000;
 254         half = 0x400000 >> exp;
 255         /* add 0.5 to the magnitude and chop off the fraction bits */
 256         return (x + half) & ~(0x7fffff >> exp);
 257 }
 258
 259 int
 260 emulate_altivec(struct pt_regs *regs)
 261 {
 262         unsigned int instr, i;
 263         unsigned int va, vb, vc, vd;
 264         vector128 *vrs;
 265
 266         if (get_user(instr, (unsigned int __user *) regs->nip))
 267                 return -EFAULT;
 268         if ((instr >> 26) != 4)
 269                 return -EINVAL;         /* not an altivec instruction */
 270         vd = (instr >> 21) & 0x1f;
 271         va = (instr >> 16) & 0x1f;
 272         vb = (instr >> 11) & 0x1f;
 273         vc = (instr >> 6) & 0x1f;
 274
 275         vrs = current->thread.vr;
 276         switch (instr & 0x3f) {
 277         case 10:
 278                 switch (vc) {
 279                 case 0: /* vaddfp */
 280                         vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
 281                         break;
 282                 case 1: /* vsubfp */
 283                         vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
 284                         break;
 285                 case 4: /* vrefp */
 286                         vrefp(&vrs[vd], &vrs[vb]);
 287                         break;
 288                 case 5: /* vrsqrtefp */
 289                         vrsqrtefp(&vrs[vd], &vrs[vb]);
 290                         break;
 291                 case 6: /* vexptefp */
 292                         for (i = 0; i < 4; ++i)
 293                                 vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
 294                         break;
 295                 case 7: /* vlogefp */
 296                         for (i = 0; i < 4; ++i)
 297                                 vrs[vd].u[i] = elog2(vrs[vb].u[i]);
 298                         break;
 299                 case 8:         /* vrfin */
 300                         for (i = 0; i < 4; ++i)
 301                                 vrs[vd].u[i] = rfin(vrs[vb].u[i]);
 302                         break;
 303                 case 9:         /* vrfiz */
 304                         for (i = 0; i < 4; ++i)
 305                                 vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
 306                         break;
 307                 case 10:        /* vrfip */
 308                         for (i = 0; i < 4; ++i) {
 309                                 u32 x = vrs[vb].u[i];
 310                                 x = (x & 0x80000000)? rfiz(x): rfii(x);
 311                                 vrs[vd].u[i] = x;
 312                         }
 313                         break;
 314                 case 11:        /* vrfim */
 315                         for (i = 0; i < 4; ++i) {
 316                                 u32 x = vrs[vb].u[i];
 317                                 x = (x & 0x80000000)? rfii(x): rfiz(x);
 318                                 vrs[vd].u[i] = x;
 319                         }
 320                         break;
 321                 case 14:        /* vctuxs */
 322                         for (i = 0; i < 4; ++i)
 323                                 vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
 324                                                 &current->thread.vscr.u[3]);
 325                         break;
 326                 case 15:        /* vctsxs */
 327                         for (i = 0; i < 4; ++i)
 328                                 vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
 329                                                 &current->thread.vscr.u[3]);
 330                         break;
 331                 default:
 332                         return -EINVAL;
 333                 }
 334                 break;
 335         case 46:        /* vmaddfp */
 336                 vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
 337                 break;
 338         case 47:        /* vnmsubfp */
 339                 vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
 340                 break;
 341         default:
 342                 return -EINVAL;
 343         }
 344
 345         return 0;
 346 }