target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35
  36 /*
  37  * Helpers for extracting complex instruction fields.
  38  */
  39
  40 /* See e.g. ASR (immediate, predicated).
  41  * Returns -1 for unallocated encoding; diagnose later.
  42  */
  43 static int tszimm_esz(int x)
  44 {
  45     x >>= 3;  /* discard imm3 */
  46     return 31 - clz32(x);
  47 }
  48
  49 static int tszimm_shr(int x)
  50 {
  51     return (16 << tszimm_esz(x)) - x;
  52 }
  53
  54 /* See e.g. LSL (immediate, predicated).  */
  55 static int tszimm_shl(int x)
  56 {
  57     return x - (8 << tszimm_esz(x));
  58 }
  59
  60 /*
  61  * Include the generated decoder.
  62  */
  63
  64 #include "decode-sve.inc.c"
  65
  66 /*
  67  * Implement all of the translator functions referenced by the decoder.
  68  */
  69
  70 /* Return the offset info CPUARMState of the predicate vector register Pn.
  71  * Note for this purpose, FFR is P16.
  72  */
  73 static inline int pred_full_reg_offset(DisasContext *s, int regno)
  74 {
  75     return offsetof(CPUARMState, vfp.pregs[regno]);
  76 }
  77
  78 /* Return the byte size of the whole predicate register, VL / 64.  */
  79 static inline int pred_full_reg_size(DisasContext *s)
  80 {
  81     return s->sve_len >> 3;
  82 }
  83
  84 /* Round up the size of a register to a size allowed by
  85  * the tcg vector infrastructure.  Any operation which uses this
  86  * size may assume that the bits above pred_full_reg_size are zero,
  87  * and must leave them the same way.
  88  *
  89  * Note that this is not needed for the vector registers as they
  90  * are always properly sized for tcg vectors.
  91  */
  92 static int size_for_gvec(int size)
  93 {
  94     if (size <= 8) {
  95         return 8;
  96     } else {
  97         return QEMU_ALIGN_UP(size, 16);
  98     }
  99 }
 100
 101 static int pred_gvec_reg_size(DisasContext *s)
 102 {
 103     return size_for_gvec(pred_full_reg_size(s));
 104 }
 105
 106 /* Invoke a vector expander on two Zregs.  */
 107 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 108                          int esz, int rd, int rn)
 109 {
 110     if (sve_access_check(s)) {
 111         unsigned vsz = vec_full_reg_size(s);
 112         gvec_fn(esz, vec_full_reg_offset(s, rd),
 113                 vec_full_reg_offset(s, rn), vsz, vsz);
 114     }
 115     return true;
 116 }
 117
 118 /* Invoke a vector expander on three Zregs.  */
 119 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 120                          int esz, int rd, int rn, int rm)
 121 {
 122     if (sve_access_check(s)) {
 123         unsigned vsz = vec_full_reg_size(s);
 124         gvec_fn(esz, vec_full_reg_offset(s, rd),
 125                 vec_full_reg_offset(s, rn),
 126                 vec_full_reg_offset(s, rm), vsz, vsz);
 127     }
 128     return true;
 129 }
 130
 131 /* Invoke a vector move on two Zregs.  */
 132 static bool do_mov_z(DisasContext *s, int rd, int rn)
 133 {
 134     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 135 }
 136
 137 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 138 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 139 {
 140     unsigned vsz = vec_full_reg_size(s);
 141     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 142 }
 143
 144 /* Invoke a vector expander on two Pregs.  */
 145 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 146                          int esz, int rd, int rn)
 147 {
 148     if (sve_access_check(s)) {
 149         unsigned psz = pred_gvec_reg_size(s);
 150         gvec_fn(esz, pred_full_reg_offset(s, rd),
 151                 pred_full_reg_offset(s, rn), psz, psz);
 152     }
 153     return true;
 154 }
 155
 156 /* Invoke a vector expander on three Pregs.  */
 157 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 158                          int esz, int rd, int rn, int rm)
 159 {
 160     if (sve_access_check(s)) {
 161         unsigned psz = pred_gvec_reg_size(s);
 162         gvec_fn(esz, pred_full_reg_offset(s, rd),
 163                 pred_full_reg_offset(s, rn),
 164                 pred_full_reg_offset(s, rm), psz, psz);
 165     }
 166     return true;
 167 }
 168
 169 /* Invoke a vector operation on four Pregs.  */
 170 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 171                         int rd, int rn, int rm, int rg)
 172 {
 173     if (sve_access_check(s)) {
 174         unsigned psz = pred_gvec_reg_size(s);
 175         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 176                        pred_full_reg_offset(s, rn),
 177                        pred_full_reg_offset(s, rm),
 178                        pred_full_reg_offset(s, rg),
 179                        psz, psz, gvec_op);
 180     }
 181     return true;
 182 }
 183
 184 /* Invoke a vector move on two Pregs.  */
 185 static bool do_mov_p(DisasContext *s, int rd, int rn)
 186 {
 187     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 188 }
 189
 190 /* Set the cpu flags as per a return from an SVE helper.  */
 191 static void do_pred_flags(TCGv_i32 t)
 192 {
 193     tcg_gen_mov_i32(cpu_NF, t);
 194     tcg_gen_andi_i32(cpu_ZF, t, 2);
 195     tcg_gen_andi_i32(cpu_CF, t, 1);
 196     tcg_gen_movi_i32(cpu_VF, 0);
 197 }
 198
 199 /* Subroutines computing the ARM PredTest psuedofunction.  */
 200 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 201 {
 202     TCGv_i32 t = tcg_temp_new_i32();
 203
 204     gen_helper_sve_predtest1(t, d, g);
 205     do_pred_flags(t);
 206     tcg_temp_free_i32(t);
 207 }
 208
 209 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 210 {
 211     TCGv_ptr dptr = tcg_temp_new_ptr();
 212     TCGv_ptr gptr = tcg_temp_new_ptr();
 213     TCGv_i32 t;
 214
 215     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 216     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 217     t = tcg_const_i32(words);
 218
 219     gen_helper_sve_predtest(t, dptr, gptr, t);
 220     tcg_temp_free_ptr(dptr);
 221     tcg_temp_free_ptr(gptr);
 222
 223     do_pred_flags(t);
 224     tcg_temp_free_i32(t);
 225 }
 226
 227 /* For each element size, the bits within a predicate word that are active.  */
 228 const uint64_t pred_esz_masks[4] = {
 229     0xffffffffffffffffull, 0x5555555555555555ull,
 230     0x1111111111111111ull, 0x0101010101010101ull
 231 };
 232
 233 /*
 234  *** SVE Logical - Unpredicated Group
 235  */
 236
 237 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 238 {
 239     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 240 }
 241
 242 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 243 {
 244     if (a->rn == a->rm) { /* MOV */
 245         return do_mov_z(s, a->rd, a->rn);
 246     } else {
 247         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 248     }
 249 }
 250
 251 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 252 {
 253     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 254 }
 255
 256 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 257 {
 258     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 259 }
 260
 261 /*
 262  *** SVE Integer Arithmetic - Unpredicated Group
 263  */
 264
 265 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 266 {
 267     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 268 }
 269
 270 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 271 {
 272     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 273 }
 274
 275 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 276 {
 277     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 278 }
 279
 280 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 281 {
 282     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 283 }
 284
 285 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 286 {
 287     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 288 }
 289
 290 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 291 {
 292     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 293 }
 294
 295 /*
 296  *** SVE Integer Arithmetic - Binary Predicated Group
 297  */
 298
 299 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 300 {
 301     unsigned vsz = vec_full_reg_size(s);
 302     if (fn == NULL) {
 303         return false;
 304     }
 305     if (sve_access_check(s)) {
 306         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 307                            vec_full_reg_offset(s, a->rn),
 308                            vec_full_reg_offset(s, a->rm),
 309                            pred_full_reg_offset(s, a->pg),
 310                            vsz, vsz, 0, fn);
 311     }
 312     return true;
 313 }
 314
 315 #define DO_ZPZZ(NAME, name) \
 316 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 317                                 uint32_t insn)                            \
 318 {                                                                         \
 319     static gen_helper_gvec_4 * const fns[4] = {                           \
 320         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 321         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 322     };                                                                    \
 323     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 324 }
 325
 326 DO_ZPZZ(AND, and)
 327 DO_ZPZZ(EOR, eor)
 328 DO_ZPZZ(ORR, orr)
 329 DO_ZPZZ(BIC, bic)
 330
 331 DO_ZPZZ(ADD, add)
 332 DO_ZPZZ(SUB, sub)
 333
 334 DO_ZPZZ(SMAX, smax)
 335 DO_ZPZZ(UMAX, umax)
 336 DO_ZPZZ(SMIN, smin)
 337 DO_ZPZZ(UMIN, umin)
 338 DO_ZPZZ(SABD, sabd)
 339 DO_ZPZZ(UABD, uabd)
 340
 341 DO_ZPZZ(MUL, mul)
 342 DO_ZPZZ(SMULH, smulh)
 343 DO_ZPZZ(UMULH, umulh)
 344
 345 DO_ZPZZ(ASR, asr)
 346 DO_ZPZZ(LSR, lsr)
 347 DO_ZPZZ(LSL, lsl)
 348
 349 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 350 {
 351     static gen_helper_gvec_4 * const fns[4] = {
 352         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 353     };
 354     return do_zpzz_ool(s, a, fns[a->esz]);
 355 }
 356
 357 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 358 {
 359     static gen_helper_gvec_4 * const fns[4] = {
 360         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 361     };
 362     return do_zpzz_ool(s, a, fns[a->esz]);
 363 }
 364
 365 #undef DO_ZPZZ
 366
 367 /*
 368  *** SVE Integer Arithmetic - Unary Predicated Group
 369  */
 370
 371 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 372 {
 373     if (fn == NULL) {
 374         return false;
 375     }
 376     if (sve_access_check(s)) {
 377         unsigned vsz = vec_full_reg_size(s);
 378         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 379                            vec_full_reg_offset(s, a->rn),
 380                            pred_full_reg_offset(s, a->pg),
 381                            vsz, vsz, 0, fn);
 382     }
 383     return true;
 384 }
 385
 386 #define DO_ZPZ(NAME, name) \
 387 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 388 {                                                                   \
 389     static gen_helper_gvec_3 * const fns[4] = {                     \
 390         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 391         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 392     };                                                              \
 393     return do_zpz_ool(s, a, fns[a->esz]);                           \
 394 }
 395
 396 DO_ZPZ(CLS, cls)
 397 DO_ZPZ(CLZ, clz)
 398 DO_ZPZ(CNT_zpz, cnt_zpz)
 399 DO_ZPZ(CNOT, cnot)
 400 DO_ZPZ(NOT_zpz, not_zpz)
 401 DO_ZPZ(ABS, abs)
 402 DO_ZPZ(NEG, neg)
 403
 404 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 405 {
 406     static gen_helper_gvec_3 * const fns[4] = {
 407         NULL,
 408         gen_helper_sve_fabs_h,
 409         gen_helper_sve_fabs_s,
 410         gen_helper_sve_fabs_d
 411     };
 412     return do_zpz_ool(s, a, fns[a->esz]);
 413 }
 414
 415 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 416 {
 417     static gen_helper_gvec_3 * const fns[4] = {
 418         NULL,
 419         gen_helper_sve_fneg_h,
 420         gen_helper_sve_fneg_s,
 421         gen_helper_sve_fneg_d
 422     };
 423     return do_zpz_ool(s, a, fns[a->esz]);
 424 }
 425
 426 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 427 {
 428     static gen_helper_gvec_3 * const fns[4] = {
 429         NULL,
 430         gen_helper_sve_sxtb_h,
 431         gen_helper_sve_sxtb_s,
 432         gen_helper_sve_sxtb_d
 433     };
 434     return do_zpz_ool(s, a, fns[a->esz]);
 435 }
 436
 437 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 438 {
 439     static gen_helper_gvec_3 * const fns[4] = {
 440         NULL,
 441         gen_helper_sve_uxtb_h,
 442         gen_helper_sve_uxtb_s,
 443         gen_helper_sve_uxtb_d
 444     };
 445     return do_zpz_ool(s, a, fns[a->esz]);
 446 }
 447
 448 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 449 {
 450     static gen_helper_gvec_3 * const fns[4] = {
 451         NULL, NULL,
 452         gen_helper_sve_sxth_s,
 453         gen_helper_sve_sxth_d
 454     };
 455     return do_zpz_ool(s, a, fns[a->esz]);
 456 }
 457
 458 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 459 {
 460     static gen_helper_gvec_3 * const fns[4] = {
 461         NULL, NULL,
 462         gen_helper_sve_uxth_s,
 463         gen_helper_sve_uxth_d
 464     };
 465     return do_zpz_ool(s, a, fns[a->esz]);
 466 }
 467
 468 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 469 {
 470     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 471 }
 472
 473 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 474 {
 475     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 476 }
 477
 478 #undef DO_ZPZ
 479
 480 /*
 481  *** SVE Integer Reduction Group
 482  */
 483
 484 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 485 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 486                        gen_helper_gvec_reduc *fn)
 487 {
 488     unsigned vsz = vec_full_reg_size(s);
 489     TCGv_ptr t_zn, t_pg;
 490     TCGv_i32 desc;
 491     TCGv_i64 temp;
 492
 493     if (fn == NULL) {
 494         return false;
 495     }
 496     if (!sve_access_check(s)) {
 497         return true;
 498     }
 499
 500     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 501     temp = tcg_temp_new_i64();
 502     t_zn = tcg_temp_new_ptr();
 503     t_pg = tcg_temp_new_ptr();
 504
 505     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 506     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 507     fn(temp, t_zn, t_pg, desc);
 508     tcg_temp_free_ptr(t_zn);
 509     tcg_temp_free_ptr(t_pg);
 510     tcg_temp_free_i32(desc);
 511
 512     write_fp_dreg(s, a->rd, temp);
 513     tcg_temp_free_i64(temp);
 514     return true;
 515 }
 516
 517 #define DO_VPZ(NAME, name) \
 518 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 519 {                                                                        \
 520     static gen_helper_gvec_reduc * const fns[4] = {                      \
 521         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 522         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 523     };                                                                   \
 524     return do_vpz_ool(s, a, fns[a->esz]);                                \
 525 }
 526
 527 DO_VPZ(ORV, orv)
 528 DO_VPZ(ANDV, andv)
 529 DO_VPZ(EORV, eorv)
 530
 531 DO_VPZ(UADDV, uaddv)
 532 DO_VPZ(SMAXV, smaxv)
 533 DO_VPZ(UMAXV, umaxv)
 534 DO_VPZ(SMINV, sminv)
 535 DO_VPZ(UMINV, uminv)
 536
 537 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 538 {
 539     static gen_helper_gvec_reduc * const fns[4] = {
 540         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 541         gen_helper_sve_saddv_s, NULL
 542     };
 543     return do_vpz_ool(s, a, fns[a->esz]);
 544 }
 545
 546 #undef DO_VPZ
 547
 548 /*
 549  *** SVE Shift by Immediate - Predicated Group
 550  */
 551
 552 /* Store zero into every active element of Zd.  We will use this for two
 553  * and three-operand predicated instructions for which logic dictates a
 554  * zero result.
 555  */
 556 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 557 {
 558     static gen_helper_gvec_2 * const fns[4] = {
 559         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 560         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 561     };
 562     if (sve_access_check(s)) {
 563         unsigned vsz = vec_full_reg_size(s);
 564         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 565                            pred_full_reg_offset(s, pg),
 566                            vsz, vsz, 0, fns[esz]);
 567     }
 568     return true;
 569 }
 570
 571 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 572                         gen_helper_gvec_3 *fn)
 573 {
 574     if (sve_access_check(s)) {
 575         unsigned vsz = vec_full_reg_size(s);
 576         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 577                            vec_full_reg_offset(s, a->rn),
 578                            pred_full_reg_offset(s, a->pg),
 579                            vsz, vsz, a->imm, fn);
 580     }
 581     return true;
 582 }
 583
 584 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 585 {
 586     static gen_helper_gvec_3 * const fns[4] = {
 587         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 588         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 589     };
 590     if (a->esz < 0) {
 591         /* Invalid tsz encoding -- see tszimm_esz. */
 592         return false;
 593     }
 594     /* Shift by element size is architecturally valid.  For
 595        arithmetic right-shift, it's the same as by one less. */
 596     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 597     return do_zpzi_ool(s, a, fns[a->esz]);
 598 }
 599
 600 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 601 {
 602     static gen_helper_gvec_3 * const fns[4] = {
 603         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 604         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 605     };
 606     if (a->esz < 0) {
 607         return false;
 608     }
 609     /* Shift by element size is architecturally valid.
 610        For logical shifts, it is a zeroing operation.  */
 611     if (a->imm >= (8 << a->esz)) {
 612         return do_clr_zp(s, a->rd, a->pg, a->esz);
 613     } else {
 614         return do_zpzi_ool(s, a, fns[a->esz]);
 615     }
 616 }
 617
 618 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 619 {
 620     static gen_helper_gvec_3 * const fns[4] = {
 621         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 622         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 623     };
 624     if (a->esz < 0) {
 625         return false;
 626     }
 627     /* Shift by element size is architecturally valid.
 628        For logical shifts, it is a zeroing operation.  */
 629     if (a->imm >= (8 << a->esz)) {
 630         return do_clr_zp(s, a->rd, a->pg, a->esz);
 631     } else {
 632         return do_zpzi_ool(s, a, fns[a->esz]);
 633     }
 634 }
 635
 636 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 637 {
 638     static gen_helper_gvec_3 * const fns[4] = {
 639         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 640         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 641     };
 642     if (a->esz < 0) {
 643         return false;
 644     }
 645     /* Shift by element size is architecturally valid.  For arithmetic
 646        right shift for division, it is a zeroing operation.  */
 647     if (a->imm >= (8 << a->esz)) {
 648         return do_clr_zp(s, a->rd, a->pg, a->esz);
 649     } else {
 650         return do_zpzi_ool(s, a, fns[a->esz]);
 651     }
 652 }
 653
 654 /*
 655  *** SVE Bitwise Shift - Predicated Group
 656  */
 657
 658 #define DO_ZPZW(NAME, name) \
 659 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 660                                 uint32_t insn)                            \
 661 {                                                                         \
 662     static gen_helper_gvec_4 * const fns[3] = {                           \
 663         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 664         gen_helper_sve_##name##_zpzw_s,                                   \
 665     };                                                                    \
 666     if (a->esz < 0 || a->esz >= 3) {                                      \
 667         return false;                                                     \
 668     }                                                                     \
 669     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 670 }
 671
 672 DO_ZPZW(ASR, asr)
 673 DO_ZPZW(LSR, lsr)
 674 DO_ZPZW(LSL, lsl)
 675
 676 #undef DO_ZPZW
 677
 678 /*
 679  *** SVE Bitwise Shift - Unpredicated Group
 680  */
 681
 682 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 683                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 684                                          int64_t, uint32_t, uint32_t))
 685 {
 686     if (a->esz < 0) {
 687         /* Invalid tsz encoding -- see tszimm_esz. */
 688         return false;
 689     }
 690     if (sve_access_check(s)) {
 691         unsigned vsz = vec_full_reg_size(s);
 692         /* Shift by element size is architecturally valid.  For
 693            arithmetic right-shift, it's the same as by one less.
 694            Otherwise it is a zeroing operation.  */
 695         if (a->imm >= 8 << a->esz) {
 696             if (asr) {
 697                 a->imm = (8 << a->esz) - 1;
 698             } else {
 699                 do_dupi_z(s, a->rd, 0);
 700                 return true;
 701             }
 702         }
 703         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 704                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 705     }
 706     return true;
 707 }
 708
 709 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 710 {
 711     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 712 }
 713
 714 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 715 {
 716     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 717 }
 718
 719 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 720 {
 721     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 722 }
 723
 724 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 725 {
 726     if (fn == NULL) {
 727         return false;
 728     }
 729     if (sve_access_check(s)) {
 730         unsigned vsz = vec_full_reg_size(s);
 731         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 732                            vec_full_reg_offset(s, a->rn),
 733                            vec_full_reg_offset(s, a->rm),
 734                            vsz, vsz, 0, fn);
 735     }
 736     return true;
 737 }
 738
 739 #define DO_ZZW(NAME, name) \
 740 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a,           \
 741                                uint32_t insn)                             \
 742 {                                                                         \
 743     static gen_helper_gvec_3 * const fns[4] = {                           \
 744         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 745         gen_helper_sve_##name##_zzw_s, NULL                               \
 746     };                                                                    \
 747     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 748 }
 749
 750 DO_ZZW(ASR, asr)
 751 DO_ZZW(LSR, lsr)
 752 DO_ZZW(LSL, lsl)
 753
 754 #undef DO_ZZW
 755
 756 /*
 757  *** SVE Integer Multiply-Add Group
 758  */
 759
 760 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 761                          gen_helper_gvec_5 *fn)
 762 {
 763     if (sve_access_check(s)) {
 764         unsigned vsz = vec_full_reg_size(s);
 765         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 766                            vec_full_reg_offset(s, a->ra),
 767                            vec_full_reg_offset(s, a->rn),
 768                            vec_full_reg_offset(s, a->rm),
 769                            pred_full_reg_offset(s, a->pg),
 770                            vsz, vsz, 0, fn);
 771     }
 772     return true;
 773 }
 774
 775 #define DO_ZPZZZ(NAME, name) \
 776 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 777 {                                                                    \
 778     static gen_helper_gvec_5 * const fns[4] = {                      \
 779         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 780         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 781     };                                                               \
 782     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 783 }
 784
 785 DO_ZPZZZ(MLA, mla)
 786 DO_ZPZZZ(MLS, mls)
 787
 788 #undef DO_ZPZZZ
 789
 790 /*
 791  *** SVE Index Generation Group
 792  */
 793
 794 static void do_index(DisasContext *s, int esz, int rd,
 795                      TCGv_i64 start, TCGv_i64 incr)
 796 {
 797     unsigned vsz = vec_full_reg_size(s);
 798     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 799     TCGv_ptr t_zd = tcg_temp_new_ptr();
 800
 801     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 802     if (esz == 3) {
 803         gen_helper_sve_index_d(t_zd, start, incr, desc);
 804     } else {
 805         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 806         static index_fn * const fns[3] = {
 807             gen_helper_sve_index_b,
 808             gen_helper_sve_index_h,
 809             gen_helper_sve_index_s,
 810         };
 811         TCGv_i32 s32 = tcg_temp_new_i32();
 812         TCGv_i32 i32 = tcg_temp_new_i32();
 813
 814         tcg_gen_extrl_i64_i32(s32, start);
 815         tcg_gen_extrl_i64_i32(i32, incr);
 816         fns[esz](t_zd, s32, i32, desc);
 817
 818         tcg_temp_free_i32(s32);
 819         tcg_temp_free_i32(i32);
 820     }
 821     tcg_temp_free_ptr(t_zd);
 822     tcg_temp_free_i32(desc);
 823 }
 824
 825 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
 826 {
 827     if (sve_access_check(s)) {
 828         TCGv_i64 start = tcg_const_i64(a->imm1);
 829         TCGv_i64 incr = tcg_const_i64(a->imm2);
 830         do_index(s, a->esz, a->rd, start, incr);
 831         tcg_temp_free_i64(start);
 832         tcg_temp_free_i64(incr);
 833     }
 834     return true;
 835 }
 836
 837 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
 838 {
 839     if (sve_access_check(s)) {
 840         TCGv_i64 start = tcg_const_i64(a->imm);
 841         TCGv_i64 incr = cpu_reg(s, a->rm);
 842         do_index(s, a->esz, a->rd, start, incr);
 843         tcg_temp_free_i64(start);
 844     }
 845     return true;
 846 }
 847
 848 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
 849 {
 850     if (sve_access_check(s)) {
 851         TCGv_i64 start = cpu_reg(s, a->rn);
 852         TCGv_i64 incr = tcg_const_i64(a->imm);
 853         do_index(s, a->esz, a->rd, start, incr);
 854         tcg_temp_free_i64(incr);
 855     }
 856     return true;
 857 }
 858
 859 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
 860 {
 861     if (sve_access_check(s)) {
 862         TCGv_i64 start = cpu_reg(s, a->rn);
 863         TCGv_i64 incr = cpu_reg(s, a->rm);
 864         do_index(s, a->esz, a->rd, start, incr);
 865     }
 866     return true;
 867 }
 868
 869 /*
 870  *** SVE Stack Allocation Group
 871  */
 872
 873 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
 874 {
 875     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 876     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 877     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 878     return true;
 879 }
 880
 881 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
 882 {
 883     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 884     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 885     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 886     return true;
 887 }
 888
 889 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
 890 {
 891     TCGv_i64 reg = cpu_reg(s, a->rd);
 892     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 893     return true;
 894 }
 895
 896 /*
 897  *** SVE Predicate Logical Operations Group
 898  */
 899
 900 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
 901                           const GVecGen4 *gvec_op)
 902 {
 903     if (!sve_access_check(s)) {
 904         return true;
 905     }
 906
 907     unsigned psz = pred_gvec_reg_size(s);
 908     int dofs = pred_full_reg_offset(s, a->rd);
 909     int nofs = pred_full_reg_offset(s, a->rn);
 910     int mofs = pred_full_reg_offset(s, a->rm);
 911     int gofs = pred_full_reg_offset(s, a->pg);
 912
 913     if (psz == 8) {
 914         /* Do the operation and the flags generation in temps.  */
 915         TCGv_i64 pd = tcg_temp_new_i64();
 916         TCGv_i64 pn = tcg_temp_new_i64();
 917         TCGv_i64 pm = tcg_temp_new_i64();
 918         TCGv_i64 pg = tcg_temp_new_i64();
 919
 920         tcg_gen_ld_i64(pn, cpu_env, nofs);
 921         tcg_gen_ld_i64(pm, cpu_env, mofs);
 922         tcg_gen_ld_i64(pg, cpu_env, gofs);
 923
 924         gvec_op->fni8(pd, pn, pm, pg);
 925         tcg_gen_st_i64(pd, cpu_env, dofs);
 926
 927         do_predtest1(pd, pg);
 928
 929         tcg_temp_free_i64(pd);
 930         tcg_temp_free_i64(pn);
 931         tcg_temp_free_i64(pm);
 932         tcg_temp_free_i64(pg);
 933     } else {
 934         /* The operation and flags generation is large.  The computation
 935          * of the flags depends on the original contents of the guarding
 936          * predicate.  If the destination overwrites the guarding predicate,
 937          * then the easiest way to get this right is to save a copy.
 938           */
 939         int tofs = gofs;
 940         if (a->rd == a->pg) {
 941             tofs = offsetof(CPUARMState, vfp.preg_tmp);
 942             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
 943         }
 944
 945         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
 946         do_predtest(s, dofs, tofs, psz / 8);
 947     }
 948     return true;
 949 }
 950
 951 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 952 {
 953     tcg_gen_and_i64(pd, pn, pm);
 954     tcg_gen_and_i64(pd, pd, pg);
 955 }
 956
 957 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 958                            TCGv_vec pm, TCGv_vec pg)
 959 {
 960     tcg_gen_and_vec(vece, pd, pn, pm);
 961     tcg_gen_and_vec(vece, pd, pd, pg);
 962 }
 963
 964 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 965 {
 966     static const GVecGen4 op = {
 967         .fni8 = gen_and_pg_i64,
 968         .fniv = gen_and_pg_vec,
 969         .fno = gen_helper_sve_and_pppp,
 970         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 971     };
 972     if (a->s) {
 973         return do_pppp_flags(s, a, &op);
 974     } else if (a->rn == a->rm) {
 975         if (a->pg == a->rn) {
 976             return do_mov_p(s, a->rd, a->rn);
 977         } else {
 978             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
 979         }
 980     } else if (a->pg == a->rn || a->pg == a->rm) {
 981         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 982     } else {
 983         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 984     }
 985 }
 986
 987 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 988 {
 989     tcg_gen_andc_i64(pd, pn, pm);
 990     tcg_gen_and_i64(pd, pd, pg);
 991 }
 992
 993 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 994                            TCGv_vec pm, TCGv_vec pg)
 995 {
 996     tcg_gen_andc_vec(vece, pd, pn, pm);
 997     tcg_gen_and_vec(vece, pd, pd, pg);
 998 }
 999
1000 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1001 {
1002     static const GVecGen4 op = {
1003         .fni8 = gen_bic_pg_i64,
1004         .fniv = gen_bic_pg_vec,
1005         .fno = gen_helper_sve_bic_pppp,
1006         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1007     };
1008     if (a->s) {
1009         return do_pppp_flags(s, a, &op);
1010     } else if (a->pg == a->rn) {
1011         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1012     } else {
1013         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1014     }
1015 }
1016
1017 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1018 {
1019     tcg_gen_xor_i64(pd, pn, pm);
1020     tcg_gen_and_i64(pd, pd, pg);
1021 }
1022
1023 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1024                            TCGv_vec pm, TCGv_vec pg)
1025 {
1026     tcg_gen_xor_vec(vece, pd, pn, pm);
1027     tcg_gen_and_vec(vece, pd, pd, pg);
1028 }
1029
1030 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1031 {
1032     static const GVecGen4 op = {
1033         .fni8 = gen_eor_pg_i64,
1034         .fniv = gen_eor_pg_vec,
1035         .fno = gen_helper_sve_eor_pppp,
1036         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1037     };
1038     if (a->s) {
1039         return do_pppp_flags(s, a, &op);
1040     } else {
1041         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1042     }
1043 }
1044
1045 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1046 {
1047     tcg_gen_and_i64(pn, pn, pg);
1048     tcg_gen_andc_i64(pm, pm, pg);
1049     tcg_gen_or_i64(pd, pn, pm);
1050 }
1051
1052 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1053                            TCGv_vec pm, TCGv_vec pg)
1054 {
1055     tcg_gen_and_vec(vece, pn, pn, pg);
1056     tcg_gen_andc_vec(vece, pm, pm, pg);
1057     tcg_gen_or_vec(vece, pd, pn, pm);
1058 }
1059
1060 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1061 {
1062     static const GVecGen4 op = {
1063         .fni8 = gen_sel_pg_i64,
1064         .fniv = gen_sel_pg_vec,
1065         .fno = gen_helper_sve_sel_pppp,
1066         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1067     };
1068     if (a->s) {
1069         return false;
1070     } else {
1071         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1072     }
1073 }
1074
1075 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1076 {
1077     tcg_gen_or_i64(pd, pn, pm);
1078     tcg_gen_and_i64(pd, pd, pg);
1079 }
1080
1081 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1082                            TCGv_vec pm, TCGv_vec pg)
1083 {
1084     tcg_gen_or_vec(vece, pd, pn, pm);
1085     tcg_gen_and_vec(vece, pd, pd, pg);
1086 }
1087
1088 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1089 {
1090     static const GVecGen4 op = {
1091         .fni8 = gen_orr_pg_i64,
1092         .fniv = gen_orr_pg_vec,
1093         .fno = gen_helper_sve_orr_pppp,
1094         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1095     };
1096     if (a->s) {
1097         return do_pppp_flags(s, a, &op);
1098     } else if (a->pg == a->rn && a->rn == a->rm) {
1099         return do_mov_p(s, a->rd, a->rn);
1100     } else {
1101         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1102     }
1103 }
1104
1105 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1106 {
1107     tcg_gen_orc_i64(pd, pn, pm);
1108     tcg_gen_and_i64(pd, pd, pg);
1109 }
1110
1111 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1112                            TCGv_vec pm, TCGv_vec pg)
1113 {
1114     tcg_gen_orc_vec(vece, pd, pn, pm);
1115     tcg_gen_and_vec(vece, pd, pd, pg);
1116 }
1117
1118 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1119 {
1120     static const GVecGen4 op = {
1121         .fni8 = gen_orn_pg_i64,
1122         .fniv = gen_orn_pg_vec,
1123         .fno = gen_helper_sve_orn_pppp,
1124         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1125     };
1126     if (a->s) {
1127         return do_pppp_flags(s, a, &op);
1128     } else {
1129         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1130     }
1131 }
1132
1133 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1134 {
1135     tcg_gen_or_i64(pd, pn, pm);
1136     tcg_gen_andc_i64(pd, pg, pd);
1137 }
1138
1139 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1140                            TCGv_vec pm, TCGv_vec pg)
1141 {
1142     tcg_gen_or_vec(vece, pd, pn, pm);
1143     tcg_gen_andc_vec(vece, pd, pg, pd);
1144 }
1145
1146 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1147 {
1148     static const GVecGen4 op = {
1149         .fni8 = gen_nor_pg_i64,
1150         .fniv = gen_nor_pg_vec,
1151         .fno = gen_helper_sve_nor_pppp,
1152         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1153     };
1154     if (a->s) {
1155         return do_pppp_flags(s, a, &op);
1156     } else {
1157         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1158     }
1159 }
1160
1161 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1162 {
1163     tcg_gen_and_i64(pd, pn, pm);
1164     tcg_gen_andc_i64(pd, pg, pd);
1165 }
1166
1167 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1168                            TCGv_vec pm, TCGv_vec pg)
1169 {
1170     tcg_gen_and_vec(vece, pd, pn, pm);
1171     tcg_gen_andc_vec(vece, pd, pg, pd);
1172 }
1173
1174 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1175 {
1176     static const GVecGen4 op = {
1177         .fni8 = gen_nand_pg_i64,
1178         .fniv = gen_nand_pg_vec,
1179         .fno = gen_helper_sve_nand_pppp,
1180         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1181     };
1182     if (a->s) {
1183         return do_pppp_flags(s, a, &op);
1184     } else {
1185         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1186     }
1187 }
1188
1189 /*
1190  *** SVE Predicate Misc Group
1191  */
1192
1193 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1194 {
1195     if (sve_access_check(s)) {
1196         int nofs = pred_full_reg_offset(s, a->rn);
1197         int gofs = pred_full_reg_offset(s, a->pg);
1198         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1199
1200         if (words == 1) {
1201             TCGv_i64 pn = tcg_temp_new_i64();
1202             TCGv_i64 pg = tcg_temp_new_i64();
1203
1204             tcg_gen_ld_i64(pn, cpu_env, nofs);
1205             tcg_gen_ld_i64(pg, cpu_env, gofs);
1206             do_predtest1(pn, pg);
1207
1208             tcg_temp_free_i64(pn);
1209             tcg_temp_free_i64(pg);
1210         } else {
1211             do_predtest(s, nofs, gofs, words);
1212         }
1213     }
1214     return true;
1215 }
1216
1217 /* See the ARM pseudocode DecodePredCount.  */
1218 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1219 {
1220     unsigned elements = fullsz >> esz;
1221     unsigned bound;
1222
1223     switch (pattern) {
1224     case 0x0: /* POW2 */
1225         return pow2floor(elements);
1226     case 0x1: /* VL1 */
1227     case 0x2: /* VL2 */
1228     case 0x3: /* VL3 */
1229     case 0x4: /* VL4 */
1230     case 0x5: /* VL5 */
1231     case 0x6: /* VL6 */
1232     case 0x7: /* VL7 */
1233     case 0x8: /* VL8 */
1234         bound = pattern;
1235         break;
1236     case 0x9: /* VL16 */
1237     case 0xa: /* VL32 */
1238     case 0xb: /* VL64 */
1239     case 0xc: /* VL128 */
1240     case 0xd: /* VL256 */
1241         bound = 16 << (pattern - 9);
1242         break;
1243     case 0x1d: /* MUL4 */
1244         return elements - elements % 4;
1245     case 0x1e: /* MUL3 */
1246         return elements - elements % 3;
1247     case 0x1f: /* ALL */
1248         return elements;
1249     default:   /* #uimm5 */
1250         return 0;
1251     }
1252     return elements >= bound ? bound : 0;
1253 }
1254
1255 /* This handles all of the predicate initialization instructions,
1256  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1257  * so that decode_pred_count returns 0.  For SETFFR, we will have
1258  * set RD == 16 == FFR.
1259  */
1260 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1261 {
1262     if (!sve_access_check(s)) {
1263         return true;
1264     }
1265
1266     unsigned fullsz = vec_full_reg_size(s);
1267     unsigned ofs = pred_full_reg_offset(s, rd);
1268     unsigned numelem, setsz, i;
1269     uint64_t word, lastword;
1270     TCGv_i64 t;
1271
1272     numelem = decode_pred_count(fullsz, pat, esz);
1273
1274     /* Determine what we must store into each bit, and how many.  */
1275     if (numelem == 0) {
1276         lastword = word = 0;
1277         setsz = fullsz;
1278     } else {
1279         setsz = numelem << esz;
1280         lastword = word = pred_esz_masks[esz];
1281         if (setsz % 64) {
1282             lastword &= ~(-1ull << (setsz % 64));
1283         }
1284     }
1285
1286     t = tcg_temp_new_i64();
1287     if (fullsz <= 64) {
1288         tcg_gen_movi_i64(t, lastword);
1289         tcg_gen_st_i64(t, cpu_env, ofs);
1290         goto done;
1291     }
1292
1293     if (word == lastword) {
1294         unsigned maxsz = size_for_gvec(fullsz / 8);
1295         unsigned oprsz = size_for_gvec(setsz / 8);
1296
1297         if (oprsz * 8 == setsz) {
1298             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1299             goto done;
1300         }
1301         if (oprsz * 8 == setsz + 8) {
1302             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1303             tcg_gen_movi_i64(t, 0);
1304             tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1305             goto done;
1306         }
1307     }
1308
1309     setsz /= 8;
1310     fullsz /= 8;
1311
1312     tcg_gen_movi_i64(t, word);
1313     for (i = 0; i < setsz; i += 8) {
1314         tcg_gen_st_i64(t, cpu_env, ofs + i);
1315     }
1316     if (lastword != word) {
1317         tcg_gen_movi_i64(t, lastword);
1318         tcg_gen_st_i64(t, cpu_env, ofs + i);
1319         i += 8;
1320     }
1321     if (i < fullsz) {
1322         tcg_gen_movi_i64(t, 0);
1323         for (; i < fullsz; i += 8) {
1324             tcg_gen_st_i64(t, cpu_env, ofs + i);
1325         }
1326     }
1327
1328  done:
1329     tcg_temp_free_i64(t);
1330
1331     /* PTRUES */
1332     if (setflag) {
1333         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1334         tcg_gen_movi_i32(cpu_CF, word == 0);
1335         tcg_gen_movi_i32(cpu_VF, 0);
1336         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1337     }
1338     return true;
1339 }
1340
1341 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1342 {
1343     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1344 }
1345
1346 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1347 {
1348     /* Note pat == 31 is #all, to set all elements.  */
1349     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1350 }
1351
1352 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1353 {
1354     /* Note pat == 32 is #unimp, to set no elements.  */
1355     return do_predset(s, 0, a->rd, 32, false);
1356 }
1357
1358 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1359 {
1360     /* The path through do_pppp_flags is complicated enough to want to avoid
1361      * duplication.  Frob the arguments into the form of a predicated AND.
1362      */
1363     arg_rprr_s alt_a = {
1364         .rd = a->rd, .pg = a->pg, .s = a->s,
1365         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1366     };
1367     return trans_AND_pppp(s, &alt_a, insn);
1368 }
1369
1370 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1371 {
1372     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1373 }
1374
1375 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1376 {
1377     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1378 }
1379
1380 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1381                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1382                                            TCGv_ptr, TCGv_i32))
1383 {
1384     if (!sve_access_check(s)) {
1385         return true;
1386     }
1387
1388     TCGv_ptr t_pd = tcg_temp_new_ptr();
1389     TCGv_ptr t_pg = tcg_temp_new_ptr();
1390     TCGv_i32 t;
1391     unsigned desc;
1392
1393     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1394     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1395
1396     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1397     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1398     t = tcg_const_i32(desc);
1399
1400     gen_fn(t, t_pd, t_pg, t);
1401     tcg_temp_free_ptr(t_pd);
1402     tcg_temp_free_ptr(t_pg);
1403
1404     do_pred_flags(t);
1405     tcg_temp_free_i32(t);
1406     return true;
1407 }
1408
1409 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1410 {
1411     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1412 }
1413
1414 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1415 {
1416     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1417 }
1418
1419 /*
1420  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
1421  */
1422
1423 /* Subroutine loading a vector register at VOFS of LEN bytes.
1424  * The load should begin at the address Rn + IMM.
1425  */
1426
1427 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
1428                    int rn, int imm)
1429 {
1430     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
1431     uint32_t len_remain = len % 8;
1432     uint32_t nparts = len / 8 + ctpop8(len_remain);
1433     int midx = get_mem_index(s);
1434     TCGv_i64 addr, t0, t1;
1435
1436     addr = tcg_temp_new_i64();
1437     t0 = tcg_temp_new_i64();
1438
1439     /* Note that unpredicated load/store of vector/predicate registers
1440      * are defined as a stream of bytes, which equates to little-endian
1441      * operations on larger quantities.  There is no nice way to force
1442      * a little-endian load for aarch64_be-linux-user out of line.
1443      *
1444      * Attempt to keep code expansion to a minimum by limiting the
1445      * amount of unrolling done.
1446      */
1447     if (nparts <= 4) {
1448         int i;
1449
1450         for (i = 0; i < len_align; i += 8) {
1451             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
1452             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1453             tcg_gen_st_i64(t0, cpu_env, vofs + i);
1454         }
1455     } else {
1456         TCGLabel *loop = gen_new_label();
1457         TCGv_ptr tp, i = tcg_const_local_ptr(0);
1458
1459         gen_set_label(loop);
1460
1461         /* Minimize the number of local temps that must be re-read from
1462          * the stack each iteration.  Instead, re-compute values other
1463          * than the loop counter.
1464          */
1465         tp = tcg_temp_new_ptr();
1466         tcg_gen_addi_ptr(tp, i, imm);
1467         tcg_gen_extu_ptr_i64(addr, tp);
1468         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
1469
1470         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1471
1472         tcg_gen_add_ptr(tp, cpu_env, i);
1473         tcg_gen_addi_ptr(i, i, 8);
1474         tcg_gen_st_i64(t0, tp, vofs);
1475         tcg_temp_free_ptr(tp);
1476
1477         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
1478         tcg_temp_free_ptr(i);
1479     }
1480
1481     /* Predicate register loads can be any multiple of 2.
1482      * Note that we still store the entire 64-bit unit into cpu_env.
1483      */
1484     if (len_remain) {
1485         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
1486
1487         switch (len_remain) {
1488         case 2:
1489         case 4:
1490         case 8:
1491             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
1492             break;
1493
1494         case 6:
1495             t1 = tcg_temp_new_i64();
1496             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
1497             tcg_gen_addi_i64(addr, addr, 4);
1498             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
1499             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
1500             tcg_temp_free_i64(t1);
1501             break;
1502
1503         default:
1504             g_assert_not_reached();
1505         }
1506         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
1507     }
1508     tcg_temp_free_i64(addr);
1509     tcg_temp_free_i64(t0);
1510 }
1511
1512 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
1513 {
1514     if (sve_access_check(s)) {
1515         int size = vec_full_reg_size(s);
1516         int off = vec_full_reg_offset(s, a->rd);
1517         do_ldr(s, off, size, a->rn, a->imm * size);
1518     }
1519     return true;
1520 }
1521
1522 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
1523 {
1524     if (sve_access_check(s)) {
1525         int size = pred_full_reg_size(s);
1526         int off = pred_full_reg_offset(s, a->rd);
1527         do_ldr(s, off, size, a->rn, a->imm * size);
1528     }
1529     return true;
1530 }