target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35
  36 /*
  37  * Helpers for extracting complex instruction fields.
  38  */
  39
  40 /* See e.g. ASR (immediate, predicated).
  41  * Returns -1 for unallocated encoding; diagnose later.
  42  */
  43 static int tszimm_esz(int x)
  44 {
  45     x >>= 3;  /* discard imm3 */
  46     return 31 - clz32(x);
  47 }
  48
  49 static int tszimm_shr(int x)
  50 {
  51     return (16 << tszimm_esz(x)) - x;
  52 }
  53
  54 /* See e.g. LSL (immediate, predicated).  */
  55 static int tszimm_shl(int x)
  56 {
  57     return x - (8 << tszimm_esz(x));
  58 }
  59
  60 /*
  61  * Include the generated decoder.
  62  */
  63
  64 #include "decode-sve.inc.c"
  65
  66 /*
  67  * Implement all of the translator functions referenced by the decoder.
  68  */
  69
  70 /* Return the offset info CPUARMState of the predicate vector register Pn.
  71  * Note for this purpose, FFR is P16.
  72  */
  73 static inline int pred_full_reg_offset(DisasContext *s, int regno)
  74 {
  75     return offsetof(CPUARMState, vfp.pregs[regno]);
  76 }
  77
  78 /* Return the byte size of the whole predicate register, VL / 64.  */
  79 static inline int pred_full_reg_size(DisasContext *s)
  80 {
  81     return s->sve_len >> 3;
  82 }
  83
  84 /* Round up the size of a register to a size allowed by
  85  * the tcg vector infrastructure.  Any operation which uses this
  86  * size may assume that the bits above pred_full_reg_size are zero,
  87  * and must leave them the same way.
  88  *
  89  * Note that this is not needed for the vector registers as they
  90  * are always properly sized for tcg vectors.
  91  */
  92 static int size_for_gvec(int size)
  93 {
  94     if (size <= 8) {
  95         return 8;
  96     } else {
  97         return QEMU_ALIGN_UP(size, 16);
  98     }
  99 }
 100
 101 static int pred_gvec_reg_size(DisasContext *s)
 102 {
 103     return size_for_gvec(pred_full_reg_size(s));
 104 }
 105
 106 /* Invoke a vector expander on two Zregs.  */
 107 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 108                          int esz, int rd, int rn)
 109 {
 110     if (sve_access_check(s)) {
 111         unsigned vsz = vec_full_reg_size(s);
 112         gvec_fn(esz, vec_full_reg_offset(s, rd),
 113                 vec_full_reg_offset(s, rn), vsz, vsz);
 114     }
 115     return true;
 116 }
 117
 118 /* Invoke a vector expander on three Zregs.  */
 119 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 120                          int esz, int rd, int rn, int rm)
 121 {
 122     if (sve_access_check(s)) {
 123         unsigned vsz = vec_full_reg_size(s);
 124         gvec_fn(esz, vec_full_reg_offset(s, rd),
 125                 vec_full_reg_offset(s, rn),
 126                 vec_full_reg_offset(s, rm), vsz, vsz);
 127     }
 128     return true;
 129 }
 130
 131 /* Invoke a vector move on two Zregs.  */
 132 static bool do_mov_z(DisasContext *s, int rd, int rn)
 133 {
 134     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 135 }
 136
 137 /* Invoke a vector expander on two Pregs.  */
 138 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 139                          int esz, int rd, int rn)
 140 {
 141     if (sve_access_check(s)) {
 142         unsigned psz = pred_gvec_reg_size(s);
 143         gvec_fn(esz, pred_full_reg_offset(s, rd),
 144                 pred_full_reg_offset(s, rn), psz, psz);
 145     }
 146     return true;
 147 }
 148
 149 /* Invoke a vector expander on three Pregs.  */
 150 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 151                          int esz, int rd, int rn, int rm)
 152 {
 153     if (sve_access_check(s)) {
 154         unsigned psz = pred_gvec_reg_size(s);
 155         gvec_fn(esz, pred_full_reg_offset(s, rd),
 156                 pred_full_reg_offset(s, rn),
 157                 pred_full_reg_offset(s, rm), psz, psz);
 158     }
 159     return true;
 160 }
 161
 162 /* Invoke a vector operation on four Pregs.  */
 163 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 164                         int rd, int rn, int rm, int rg)
 165 {
 166     if (sve_access_check(s)) {
 167         unsigned psz = pred_gvec_reg_size(s);
 168         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 169                        pred_full_reg_offset(s, rn),
 170                        pred_full_reg_offset(s, rm),
 171                        pred_full_reg_offset(s, rg),
 172                        psz, psz, gvec_op);
 173     }
 174     return true;
 175 }
 176
 177 /* Invoke a vector move on two Pregs.  */
 178 static bool do_mov_p(DisasContext *s, int rd, int rn)
 179 {
 180     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 181 }
 182
 183 /* Set the cpu flags as per a return from an SVE helper.  */
 184 static void do_pred_flags(TCGv_i32 t)
 185 {
 186     tcg_gen_mov_i32(cpu_NF, t);
 187     tcg_gen_andi_i32(cpu_ZF, t, 2);
 188     tcg_gen_andi_i32(cpu_CF, t, 1);
 189     tcg_gen_movi_i32(cpu_VF, 0);
 190 }
 191
 192 /* Subroutines computing the ARM PredTest psuedofunction.  */
 193 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 194 {
 195     TCGv_i32 t = tcg_temp_new_i32();
 196
 197     gen_helper_sve_predtest1(t, d, g);
 198     do_pred_flags(t);
 199     tcg_temp_free_i32(t);
 200 }
 201
 202 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 203 {
 204     TCGv_ptr dptr = tcg_temp_new_ptr();
 205     TCGv_ptr gptr = tcg_temp_new_ptr();
 206     TCGv_i32 t;
 207
 208     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 209     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 210     t = tcg_const_i32(words);
 211
 212     gen_helper_sve_predtest(t, dptr, gptr, t);
 213     tcg_temp_free_ptr(dptr);
 214     tcg_temp_free_ptr(gptr);
 215
 216     do_pred_flags(t);
 217     tcg_temp_free_i32(t);
 218 }
 219
 220 /* For each element size, the bits within a predicate word that are active.  */
 221 const uint64_t pred_esz_masks[4] = {
 222     0xffffffffffffffffull, 0x5555555555555555ull,
 223     0x1111111111111111ull, 0x0101010101010101ull
 224 };
 225
 226 /*
 227  *** SVE Logical - Unpredicated Group
 228  */
 229
 230 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 231 {
 232     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 233 }
 234
 235 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 236 {
 237     if (a->rn == a->rm) { /* MOV */
 238         return do_mov_z(s, a->rd, a->rn);
 239     } else {
 240         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 241     }
 242 }
 243
 244 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 245 {
 246     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 247 }
 248
 249 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 250 {
 251     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 252 }
 253
 254 /*
 255  *** SVE Integer Arithmetic - Unpredicated Group
 256  */
 257
 258 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 259 {
 260     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 261 }
 262
 263 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 264 {
 265     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 266 }
 267
 268 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 269 {
 270     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 271 }
 272
 273 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 274 {
 275     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 276 }
 277
 278 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 279 {
 280     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 281 }
 282
 283 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 284 {
 285     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 286 }
 287
 288 /*
 289  *** SVE Integer Arithmetic - Binary Predicated Group
 290  */
 291
 292 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 293 {
 294     unsigned vsz = vec_full_reg_size(s);
 295     if (fn == NULL) {
 296         return false;
 297     }
 298     if (sve_access_check(s)) {
 299         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 300                            vec_full_reg_offset(s, a->rn),
 301                            vec_full_reg_offset(s, a->rm),
 302                            pred_full_reg_offset(s, a->pg),
 303                            vsz, vsz, 0, fn);
 304     }
 305     return true;
 306 }
 307
 308 #define DO_ZPZZ(NAME, name) \
 309 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 310                                 uint32_t insn)                            \
 311 {                                                                         \
 312     static gen_helper_gvec_4 * const fns[4] = {                           \
 313         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 314         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 315     };                                                                    \
 316     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 317 }
 318
 319 DO_ZPZZ(AND, and)
 320 DO_ZPZZ(EOR, eor)
 321 DO_ZPZZ(ORR, orr)
 322 DO_ZPZZ(BIC, bic)
 323
 324 DO_ZPZZ(ADD, add)
 325 DO_ZPZZ(SUB, sub)
 326
 327 DO_ZPZZ(SMAX, smax)
 328 DO_ZPZZ(UMAX, umax)
 329 DO_ZPZZ(SMIN, smin)
 330 DO_ZPZZ(UMIN, umin)
 331 DO_ZPZZ(SABD, sabd)
 332 DO_ZPZZ(UABD, uabd)
 333
 334 DO_ZPZZ(MUL, mul)
 335 DO_ZPZZ(SMULH, smulh)
 336 DO_ZPZZ(UMULH, umulh)
 337
 338 DO_ZPZZ(ASR, asr)
 339 DO_ZPZZ(LSR, lsr)
 340 DO_ZPZZ(LSL, lsl)
 341
 342 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 343 {
 344     static gen_helper_gvec_4 * const fns[4] = {
 345         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 346     };
 347     return do_zpzz_ool(s, a, fns[a->esz]);
 348 }
 349
 350 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 351 {
 352     static gen_helper_gvec_4 * const fns[4] = {
 353         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 354     };
 355     return do_zpzz_ool(s, a, fns[a->esz]);
 356 }
 357
 358 #undef DO_ZPZZ
 359
 360 /*
 361  *** SVE Integer Arithmetic - Unary Predicated Group
 362  */
 363
 364 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 365 {
 366     if (fn == NULL) {
 367         return false;
 368     }
 369     if (sve_access_check(s)) {
 370         unsigned vsz = vec_full_reg_size(s);
 371         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 372                            vec_full_reg_offset(s, a->rn),
 373                            pred_full_reg_offset(s, a->pg),
 374                            vsz, vsz, 0, fn);
 375     }
 376     return true;
 377 }
 378
 379 #define DO_ZPZ(NAME, name) \
 380 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 381 {                                                                   \
 382     static gen_helper_gvec_3 * const fns[4] = {                     \
 383         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 384         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 385     };                                                              \
 386     return do_zpz_ool(s, a, fns[a->esz]);                           \
 387 }
 388
 389 DO_ZPZ(CLS, cls)
 390 DO_ZPZ(CLZ, clz)
 391 DO_ZPZ(CNT_zpz, cnt_zpz)
 392 DO_ZPZ(CNOT, cnot)
 393 DO_ZPZ(NOT_zpz, not_zpz)
 394 DO_ZPZ(ABS, abs)
 395 DO_ZPZ(NEG, neg)
 396
 397 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 398 {
 399     static gen_helper_gvec_3 * const fns[4] = {
 400         NULL,
 401         gen_helper_sve_fabs_h,
 402         gen_helper_sve_fabs_s,
 403         gen_helper_sve_fabs_d
 404     };
 405     return do_zpz_ool(s, a, fns[a->esz]);
 406 }
 407
 408 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 409 {
 410     static gen_helper_gvec_3 * const fns[4] = {
 411         NULL,
 412         gen_helper_sve_fneg_h,
 413         gen_helper_sve_fneg_s,
 414         gen_helper_sve_fneg_d
 415     };
 416     return do_zpz_ool(s, a, fns[a->esz]);
 417 }
 418
 419 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 420 {
 421     static gen_helper_gvec_3 * const fns[4] = {
 422         NULL,
 423         gen_helper_sve_sxtb_h,
 424         gen_helper_sve_sxtb_s,
 425         gen_helper_sve_sxtb_d
 426     };
 427     return do_zpz_ool(s, a, fns[a->esz]);
 428 }
 429
 430 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 431 {
 432     static gen_helper_gvec_3 * const fns[4] = {
 433         NULL,
 434         gen_helper_sve_uxtb_h,
 435         gen_helper_sve_uxtb_s,
 436         gen_helper_sve_uxtb_d
 437     };
 438     return do_zpz_ool(s, a, fns[a->esz]);
 439 }
 440
 441 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 442 {
 443     static gen_helper_gvec_3 * const fns[4] = {
 444         NULL, NULL,
 445         gen_helper_sve_sxth_s,
 446         gen_helper_sve_sxth_d
 447     };
 448     return do_zpz_ool(s, a, fns[a->esz]);
 449 }
 450
 451 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 452 {
 453     static gen_helper_gvec_3 * const fns[4] = {
 454         NULL, NULL,
 455         gen_helper_sve_uxth_s,
 456         gen_helper_sve_uxth_d
 457     };
 458     return do_zpz_ool(s, a, fns[a->esz]);
 459 }
 460
 461 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 462 {
 463     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 464 }
 465
 466 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 467 {
 468     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 469 }
 470
 471 #undef DO_ZPZ
 472
 473 /*
 474  *** SVE Integer Reduction Group
 475  */
 476
 477 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 478 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 479                        gen_helper_gvec_reduc *fn)
 480 {
 481     unsigned vsz = vec_full_reg_size(s);
 482     TCGv_ptr t_zn, t_pg;
 483     TCGv_i32 desc;
 484     TCGv_i64 temp;
 485
 486     if (fn == NULL) {
 487         return false;
 488     }
 489     if (!sve_access_check(s)) {
 490         return true;
 491     }
 492
 493     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 494     temp = tcg_temp_new_i64();
 495     t_zn = tcg_temp_new_ptr();
 496     t_pg = tcg_temp_new_ptr();
 497
 498     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 499     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 500     fn(temp, t_zn, t_pg, desc);
 501     tcg_temp_free_ptr(t_zn);
 502     tcg_temp_free_ptr(t_pg);
 503     tcg_temp_free_i32(desc);
 504
 505     write_fp_dreg(s, a->rd, temp);
 506     tcg_temp_free_i64(temp);
 507     return true;
 508 }
 509
 510 #define DO_VPZ(NAME, name) \
 511 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 512 {                                                                        \
 513     static gen_helper_gvec_reduc * const fns[4] = {                      \
 514         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 515         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 516     };                                                                   \
 517     return do_vpz_ool(s, a, fns[a->esz]);                                \
 518 }
 519
 520 DO_VPZ(ORV, orv)
 521 DO_VPZ(ANDV, andv)
 522 DO_VPZ(EORV, eorv)
 523
 524 DO_VPZ(UADDV, uaddv)
 525 DO_VPZ(SMAXV, smaxv)
 526 DO_VPZ(UMAXV, umaxv)
 527 DO_VPZ(SMINV, sminv)
 528 DO_VPZ(UMINV, uminv)
 529
 530 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 531 {
 532     static gen_helper_gvec_reduc * const fns[4] = {
 533         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 534         gen_helper_sve_saddv_s, NULL
 535     };
 536     return do_vpz_ool(s, a, fns[a->esz]);
 537 }
 538
 539 #undef DO_VPZ
 540
 541 /*
 542  *** SVE Shift by Immediate - Predicated Group
 543  */
 544
 545 /* Store zero into every active element of Zd.  We will use this for two
 546  * and three-operand predicated instructions for which logic dictates a
 547  * zero result.
 548  */
 549 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 550 {
 551     static gen_helper_gvec_2 * const fns[4] = {
 552         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 553         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 554     };
 555     if (sve_access_check(s)) {
 556         unsigned vsz = vec_full_reg_size(s);
 557         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 558                            pred_full_reg_offset(s, pg),
 559                            vsz, vsz, 0, fns[esz]);
 560     }
 561     return true;
 562 }
 563
 564 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 565                         gen_helper_gvec_3 *fn)
 566 {
 567     if (sve_access_check(s)) {
 568         unsigned vsz = vec_full_reg_size(s);
 569         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 570                            vec_full_reg_offset(s, a->rn),
 571                            pred_full_reg_offset(s, a->pg),
 572                            vsz, vsz, a->imm, fn);
 573     }
 574     return true;
 575 }
 576
 577 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 578 {
 579     static gen_helper_gvec_3 * const fns[4] = {
 580         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 581         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 582     };
 583     if (a->esz < 0) {
 584         /* Invalid tsz encoding -- see tszimm_esz. */
 585         return false;
 586     }
 587     /* Shift by element size is architecturally valid.  For
 588        arithmetic right-shift, it's the same as by one less. */
 589     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 590     return do_zpzi_ool(s, a, fns[a->esz]);
 591 }
 592
 593 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 594 {
 595     static gen_helper_gvec_3 * const fns[4] = {
 596         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 597         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 598     };
 599     if (a->esz < 0) {
 600         return false;
 601     }
 602     /* Shift by element size is architecturally valid.
 603        For logical shifts, it is a zeroing operation.  */
 604     if (a->imm >= (8 << a->esz)) {
 605         return do_clr_zp(s, a->rd, a->pg, a->esz);
 606     } else {
 607         return do_zpzi_ool(s, a, fns[a->esz]);
 608     }
 609 }
 610
 611 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 612 {
 613     static gen_helper_gvec_3 * const fns[4] = {
 614         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 615         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 616     };
 617     if (a->esz < 0) {
 618         return false;
 619     }
 620     /* Shift by element size is architecturally valid.
 621        For logical shifts, it is a zeroing operation.  */
 622     if (a->imm >= (8 << a->esz)) {
 623         return do_clr_zp(s, a->rd, a->pg, a->esz);
 624     } else {
 625         return do_zpzi_ool(s, a, fns[a->esz]);
 626     }
 627 }
 628
 629 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 630 {
 631     static gen_helper_gvec_3 * const fns[4] = {
 632         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 633         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 634     };
 635     if (a->esz < 0) {
 636         return false;
 637     }
 638     /* Shift by element size is architecturally valid.  For arithmetic
 639        right shift for division, it is a zeroing operation.  */
 640     if (a->imm >= (8 << a->esz)) {
 641         return do_clr_zp(s, a->rd, a->pg, a->esz);
 642     } else {
 643         return do_zpzi_ool(s, a, fns[a->esz]);
 644     }
 645 }
 646
 647 /*
 648  *** SVE Bitwise Shift - Predicated Group
 649  */
 650
 651 #define DO_ZPZW(NAME, name) \
 652 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 653                                 uint32_t insn)                            \
 654 {                                                                         \
 655     static gen_helper_gvec_4 * const fns[3] = {                           \
 656         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 657         gen_helper_sve_##name##_zpzw_s,                                   \
 658     };                                                                    \
 659     if (a->esz < 0 || a->esz >= 3) {                                      \
 660         return false;                                                     \
 661     }                                                                     \
 662     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 663 }
 664
 665 DO_ZPZW(ASR, asr)
 666 DO_ZPZW(LSR, lsr)
 667 DO_ZPZW(LSL, lsl)
 668
 669 #undef DO_ZPZW
 670
 671 /*
 672  *** SVE Integer Multiply-Add Group
 673  */
 674
 675 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 676                          gen_helper_gvec_5 *fn)
 677 {
 678     if (sve_access_check(s)) {
 679         unsigned vsz = vec_full_reg_size(s);
 680         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 681                            vec_full_reg_offset(s, a->ra),
 682                            vec_full_reg_offset(s, a->rn),
 683                            vec_full_reg_offset(s, a->rm),
 684                            pred_full_reg_offset(s, a->pg),
 685                            vsz, vsz, 0, fn);
 686     }
 687     return true;
 688 }
 689
 690 #define DO_ZPZZZ(NAME, name) \
 691 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 692 {                                                                    \
 693     static gen_helper_gvec_5 * const fns[4] = {                      \
 694         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 695         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 696     };                                                               \
 697     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 698 }
 699
 700 DO_ZPZZZ(MLA, mla)
 701 DO_ZPZZZ(MLS, mls)
 702
 703 #undef DO_ZPZZZ
 704
 705 /*
 706  *** SVE Predicate Logical Operations Group
 707  */
 708
 709 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
 710                           const GVecGen4 *gvec_op)
 711 {
 712     if (!sve_access_check(s)) {
 713         return true;
 714     }
 715
 716     unsigned psz = pred_gvec_reg_size(s);
 717     int dofs = pred_full_reg_offset(s, a->rd);
 718     int nofs = pred_full_reg_offset(s, a->rn);
 719     int mofs = pred_full_reg_offset(s, a->rm);
 720     int gofs = pred_full_reg_offset(s, a->pg);
 721
 722     if (psz == 8) {
 723         /* Do the operation and the flags generation in temps.  */
 724         TCGv_i64 pd = tcg_temp_new_i64();
 725         TCGv_i64 pn = tcg_temp_new_i64();
 726         TCGv_i64 pm = tcg_temp_new_i64();
 727         TCGv_i64 pg = tcg_temp_new_i64();
 728
 729         tcg_gen_ld_i64(pn, cpu_env, nofs);
 730         tcg_gen_ld_i64(pm, cpu_env, mofs);
 731         tcg_gen_ld_i64(pg, cpu_env, gofs);
 732
 733         gvec_op->fni8(pd, pn, pm, pg);
 734         tcg_gen_st_i64(pd, cpu_env, dofs);
 735
 736         do_predtest1(pd, pg);
 737
 738         tcg_temp_free_i64(pd);
 739         tcg_temp_free_i64(pn);
 740         tcg_temp_free_i64(pm);
 741         tcg_temp_free_i64(pg);
 742     } else {
 743         /* The operation and flags generation is large.  The computation
 744          * of the flags depends on the original contents of the guarding
 745          * predicate.  If the destination overwrites the guarding predicate,
 746          * then the easiest way to get this right is to save a copy.
 747           */
 748         int tofs = gofs;
 749         if (a->rd == a->pg) {
 750             tofs = offsetof(CPUARMState, vfp.preg_tmp);
 751             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
 752         }
 753
 754         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
 755         do_predtest(s, dofs, tofs, psz / 8);
 756     }
 757     return true;
 758 }
 759
 760 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 761 {
 762     tcg_gen_and_i64(pd, pn, pm);
 763     tcg_gen_and_i64(pd, pd, pg);
 764 }
 765
 766 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 767                            TCGv_vec pm, TCGv_vec pg)
 768 {
 769     tcg_gen_and_vec(vece, pd, pn, pm);
 770     tcg_gen_and_vec(vece, pd, pd, pg);
 771 }
 772
 773 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 774 {
 775     static const GVecGen4 op = {
 776         .fni8 = gen_and_pg_i64,
 777         .fniv = gen_and_pg_vec,
 778         .fno = gen_helper_sve_and_pppp,
 779         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 780     };
 781     if (a->s) {
 782         return do_pppp_flags(s, a, &op);
 783     } else if (a->rn == a->rm) {
 784         if (a->pg == a->rn) {
 785             return do_mov_p(s, a->rd, a->rn);
 786         } else {
 787             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
 788         }
 789     } else if (a->pg == a->rn || a->pg == a->rm) {
 790         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 791     } else {
 792         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 793     }
 794 }
 795
 796 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 797 {
 798     tcg_gen_andc_i64(pd, pn, pm);
 799     tcg_gen_and_i64(pd, pd, pg);
 800 }
 801
 802 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 803                            TCGv_vec pm, TCGv_vec pg)
 804 {
 805     tcg_gen_andc_vec(vece, pd, pn, pm);
 806     tcg_gen_and_vec(vece, pd, pd, pg);
 807 }
 808
 809 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 810 {
 811     static const GVecGen4 op = {
 812         .fni8 = gen_bic_pg_i64,
 813         .fniv = gen_bic_pg_vec,
 814         .fno = gen_helper_sve_bic_pppp,
 815         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 816     };
 817     if (a->s) {
 818         return do_pppp_flags(s, a, &op);
 819     } else if (a->pg == a->rn) {
 820         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 821     } else {
 822         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 823     }
 824 }
 825
 826 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 827 {
 828     tcg_gen_xor_i64(pd, pn, pm);
 829     tcg_gen_and_i64(pd, pd, pg);
 830 }
 831
 832 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 833                            TCGv_vec pm, TCGv_vec pg)
 834 {
 835     tcg_gen_xor_vec(vece, pd, pn, pm);
 836     tcg_gen_and_vec(vece, pd, pd, pg);
 837 }
 838
 839 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 840 {
 841     static const GVecGen4 op = {
 842         .fni8 = gen_eor_pg_i64,
 843         .fniv = gen_eor_pg_vec,
 844         .fno = gen_helper_sve_eor_pppp,
 845         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 846     };
 847     if (a->s) {
 848         return do_pppp_flags(s, a, &op);
 849     } else {
 850         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 851     }
 852 }
 853
 854 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 855 {
 856     tcg_gen_and_i64(pn, pn, pg);
 857     tcg_gen_andc_i64(pm, pm, pg);
 858     tcg_gen_or_i64(pd, pn, pm);
 859 }
 860
 861 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 862                            TCGv_vec pm, TCGv_vec pg)
 863 {
 864     tcg_gen_and_vec(vece, pn, pn, pg);
 865     tcg_gen_andc_vec(vece, pm, pm, pg);
 866     tcg_gen_or_vec(vece, pd, pn, pm);
 867 }
 868
 869 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 870 {
 871     static const GVecGen4 op = {
 872         .fni8 = gen_sel_pg_i64,
 873         .fniv = gen_sel_pg_vec,
 874         .fno = gen_helper_sve_sel_pppp,
 875         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 876     };
 877     if (a->s) {
 878         return false;
 879     } else {
 880         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 881     }
 882 }
 883
 884 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 885 {
 886     tcg_gen_or_i64(pd, pn, pm);
 887     tcg_gen_and_i64(pd, pd, pg);
 888 }
 889
 890 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 891                            TCGv_vec pm, TCGv_vec pg)
 892 {
 893     tcg_gen_or_vec(vece, pd, pn, pm);
 894     tcg_gen_and_vec(vece, pd, pd, pg);
 895 }
 896
 897 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 898 {
 899     static const GVecGen4 op = {
 900         .fni8 = gen_orr_pg_i64,
 901         .fniv = gen_orr_pg_vec,
 902         .fno = gen_helper_sve_orr_pppp,
 903         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 904     };
 905     if (a->s) {
 906         return do_pppp_flags(s, a, &op);
 907     } else if (a->pg == a->rn && a->rn == a->rm) {
 908         return do_mov_p(s, a->rd, a->rn);
 909     } else {
 910         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 911     }
 912 }
 913
 914 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 915 {
 916     tcg_gen_orc_i64(pd, pn, pm);
 917     tcg_gen_and_i64(pd, pd, pg);
 918 }
 919
 920 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 921                            TCGv_vec pm, TCGv_vec pg)
 922 {
 923     tcg_gen_orc_vec(vece, pd, pn, pm);
 924     tcg_gen_and_vec(vece, pd, pd, pg);
 925 }
 926
 927 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 928 {
 929     static const GVecGen4 op = {
 930         .fni8 = gen_orn_pg_i64,
 931         .fniv = gen_orn_pg_vec,
 932         .fno = gen_helper_sve_orn_pppp,
 933         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 934     };
 935     if (a->s) {
 936         return do_pppp_flags(s, a, &op);
 937     } else {
 938         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 939     }
 940 }
 941
 942 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 943 {
 944     tcg_gen_or_i64(pd, pn, pm);
 945     tcg_gen_andc_i64(pd, pg, pd);
 946 }
 947
 948 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 949                            TCGv_vec pm, TCGv_vec pg)
 950 {
 951     tcg_gen_or_vec(vece, pd, pn, pm);
 952     tcg_gen_andc_vec(vece, pd, pg, pd);
 953 }
 954
 955 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 956 {
 957     static const GVecGen4 op = {
 958         .fni8 = gen_nor_pg_i64,
 959         .fniv = gen_nor_pg_vec,
 960         .fno = gen_helper_sve_nor_pppp,
 961         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 962     };
 963     if (a->s) {
 964         return do_pppp_flags(s, a, &op);
 965     } else {
 966         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 967     }
 968 }
 969
 970 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 971 {
 972     tcg_gen_and_i64(pd, pn, pm);
 973     tcg_gen_andc_i64(pd, pg, pd);
 974 }
 975
 976 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 977                            TCGv_vec pm, TCGv_vec pg)
 978 {
 979     tcg_gen_and_vec(vece, pd, pn, pm);
 980     tcg_gen_andc_vec(vece, pd, pg, pd);
 981 }
 982
 983 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 984 {
 985     static const GVecGen4 op = {
 986         .fni8 = gen_nand_pg_i64,
 987         .fniv = gen_nand_pg_vec,
 988         .fno = gen_helper_sve_nand_pppp,
 989         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 990     };
 991     if (a->s) {
 992         return do_pppp_flags(s, a, &op);
 993     } else {
 994         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 995     }
 996 }
 997
 998 /*
 999  *** SVE Predicate Misc Group
1000  */
1001
1002 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1003 {
1004     if (sve_access_check(s)) {
1005         int nofs = pred_full_reg_offset(s, a->rn);
1006         int gofs = pred_full_reg_offset(s, a->pg);
1007         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1008
1009         if (words == 1) {
1010             TCGv_i64 pn = tcg_temp_new_i64();
1011             TCGv_i64 pg = tcg_temp_new_i64();
1012
1013             tcg_gen_ld_i64(pn, cpu_env, nofs);
1014             tcg_gen_ld_i64(pg, cpu_env, gofs);
1015             do_predtest1(pn, pg);
1016
1017             tcg_temp_free_i64(pn);
1018             tcg_temp_free_i64(pg);
1019         } else {
1020             do_predtest(s, nofs, gofs, words);
1021         }
1022     }
1023     return true;
1024 }
1025
1026 /* See the ARM pseudocode DecodePredCount.  */
1027 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1028 {
1029     unsigned elements = fullsz >> esz;
1030     unsigned bound;
1031
1032     switch (pattern) {
1033     case 0x0: /* POW2 */
1034         return pow2floor(elements);
1035     case 0x1: /* VL1 */
1036     case 0x2: /* VL2 */
1037     case 0x3: /* VL3 */
1038     case 0x4: /* VL4 */
1039     case 0x5: /* VL5 */
1040     case 0x6: /* VL6 */
1041     case 0x7: /* VL7 */
1042     case 0x8: /* VL8 */
1043         bound = pattern;
1044         break;
1045     case 0x9: /* VL16 */
1046     case 0xa: /* VL32 */
1047     case 0xb: /* VL64 */
1048     case 0xc: /* VL128 */
1049     case 0xd: /* VL256 */
1050         bound = 16 << (pattern - 9);
1051         break;
1052     case 0x1d: /* MUL4 */
1053         return elements - elements % 4;
1054     case 0x1e: /* MUL3 */
1055         return elements - elements % 3;
1056     case 0x1f: /* ALL */
1057         return elements;
1058     default:   /* #uimm5 */
1059         return 0;
1060     }
1061     return elements >= bound ? bound : 0;
1062 }
1063
1064 /* This handles all of the predicate initialization instructions,
1065  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1066  * so that decode_pred_count returns 0.  For SETFFR, we will have
1067  * set RD == 16 == FFR.
1068  */
1069 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1070 {
1071     if (!sve_access_check(s)) {
1072         return true;
1073     }
1074
1075     unsigned fullsz = vec_full_reg_size(s);
1076     unsigned ofs = pred_full_reg_offset(s, rd);
1077     unsigned numelem, setsz, i;
1078     uint64_t word, lastword;
1079     TCGv_i64 t;
1080
1081     numelem = decode_pred_count(fullsz, pat, esz);
1082
1083     /* Determine what we must store into each bit, and how many.  */
1084     if (numelem == 0) {
1085         lastword = word = 0;
1086         setsz = fullsz;
1087     } else {
1088         setsz = numelem << esz;
1089         lastword = word = pred_esz_masks[esz];
1090         if (setsz % 64) {
1091             lastword &= ~(-1ull << (setsz % 64));
1092         }
1093     }
1094
1095     t = tcg_temp_new_i64();
1096     if (fullsz <= 64) {
1097         tcg_gen_movi_i64(t, lastword);
1098         tcg_gen_st_i64(t, cpu_env, ofs);
1099         goto done;
1100     }
1101
1102     if (word == lastword) {
1103         unsigned maxsz = size_for_gvec(fullsz / 8);
1104         unsigned oprsz = size_for_gvec(setsz / 8);
1105
1106         if (oprsz * 8 == setsz) {
1107             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1108             goto done;
1109         }
1110         if (oprsz * 8 == setsz + 8) {
1111             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1112             tcg_gen_movi_i64(t, 0);
1113             tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1114             goto done;
1115         }
1116     }
1117
1118     setsz /= 8;
1119     fullsz /= 8;
1120
1121     tcg_gen_movi_i64(t, word);
1122     for (i = 0; i < setsz; i += 8) {
1123         tcg_gen_st_i64(t, cpu_env, ofs + i);
1124     }
1125     if (lastword != word) {
1126         tcg_gen_movi_i64(t, lastword);
1127         tcg_gen_st_i64(t, cpu_env, ofs + i);
1128         i += 8;
1129     }
1130     if (i < fullsz) {
1131         tcg_gen_movi_i64(t, 0);
1132         for (; i < fullsz; i += 8) {
1133             tcg_gen_st_i64(t, cpu_env, ofs + i);
1134         }
1135     }
1136
1137  done:
1138     tcg_temp_free_i64(t);
1139
1140     /* PTRUES */
1141     if (setflag) {
1142         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1143         tcg_gen_movi_i32(cpu_CF, word == 0);
1144         tcg_gen_movi_i32(cpu_VF, 0);
1145         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1146     }
1147     return true;
1148 }
1149
1150 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1151 {
1152     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1153 }
1154
1155 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1156 {
1157     /* Note pat == 31 is #all, to set all elements.  */
1158     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1159 }
1160
1161 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1162 {
1163     /* Note pat == 32 is #unimp, to set no elements.  */
1164     return do_predset(s, 0, a->rd, 32, false);
1165 }
1166
1167 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1168 {
1169     /* The path through do_pppp_flags is complicated enough to want to avoid
1170      * duplication.  Frob the arguments into the form of a predicated AND.
1171      */
1172     arg_rprr_s alt_a = {
1173         .rd = a->rd, .pg = a->pg, .s = a->s,
1174         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1175     };
1176     return trans_AND_pppp(s, &alt_a, insn);
1177 }
1178
1179 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1180 {
1181     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1182 }
1183
1184 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1185 {
1186     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1187 }
1188
1189 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1190                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1191                                            TCGv_ptr, TCGv_i32))
1192 {
1193     if (!sve_access_check(s)) {
1194         return true;
1195     }
1196
1197     TCGv_ptr t_pd = tcg_temp_new_ptr();
1198     TCGv_ptr t_pg = tcg_temp_new_ptr();
1199     TCGv_i32 t;
1200     unsigned desc;
1201
1202     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1203     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1204
1205     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1206     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1207     t = tcg_const_i32(desc);
1208
1209     gen_fn(t, t_pd, t_pg, t);
1210     tcg_temp_free_ptr(t_pd);
1211     tcg_temp_free_ptr(t_pg);
1212
1213     do_pred_flags(t);
1214     tcg_temp_free_i32(t);
1215     return true;
1216 }
1217
1218 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1219 {
1220     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1221 }
1222
1223 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1224 {
1225     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1226 }
1227
1228 /*
1229  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
1230  */
1231
1232 /* Subroutine loading a vector register at VOFS of LEN bytes.
1233  * The load should begin at the address Rn + IMM.
1234  */
1235
1236 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
1237                    int rn, int imm)
1238 {
1239     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
1240     uint32_t len_remain = len % 8;
1241     uint32_t nparts = len / 8 + ctpop8(len_remain);
1242     int midx = get_mem_index(s);
1243     TCGv_i64 addr, t0, t1;
1244
1245     addr = tcg_temp_new_i64();
1246     t0 = tcg_temp_new_i64();
1247
1248     /* Note that unpredicated load/store of vector/predicate registers
1249      * are defined as a stream of bytes, which equates to little-endian
1250      * operations on larger quantities.  There is no nice way to force
1251      * a little-endian load for aarch64_be-linux-user out of line.
1252      *
1253      * Attempt to keep code expansion to a minimum by limiting the
1254      * amount of unrolling done.
1255      */
1256     if (nparts <= 4) {
1257         int i;
1258
1259         for (i = 0; i < len_align; i += 8) {
1260             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
1261             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1262             tcg_gen_st_i64(t0, cpu_env, vofs + i);
1263         }
1264     } else {
1265         TCGLabel *loop = gen_new_label();
1266         TCGv_ptr tp, i = tcg_const_local_ptr(0);
1267
1268         gen_set_label(loop);
1269
1270         /* Minimize the number of local temps that must be re-read from
1271          * the stack each iteration.  Instead, re-compute values other
1272          * than the loop counter.
1273          */
1274         tp = tcg_temp_new_ptr();
1275         tcg_gen_addi_ptr(tp, i, imm);
1276         tcg_gen_extu_ptr_i64(addr, tp);
1277         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
1278
1279         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1280
1281         tcg_gen_add_ptr(tp, cpu_env, i);
1282         tcg_gen_addi_ptr(i, i, 8);
1283         tcg_gen_st_i64(t0, tp, vofs);
1284         tcg_temp_free_ptr(tp);
1285
1286         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
1287         tcg_temp_free_ptr(i);
1288     }
1289
1290     /* Predicate register loads can be any multiple of 2.
1291      * Note that we still store the entire 64-bit unit into cpu_env.
1292      */
1293     if (len_remain) {
1294         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
1295
1296         switch (len_remain) {
1297         case 2:
1298         case 4:
1299         case 8:
1300             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
1301             break;
1302
1303         case 6:
1304             t1 = tcg_temp_new_i64();
1305             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
1306             tcg_gen_addi_i64(addr, addr, 4);
1307             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
1308             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
1309             tcg_temp_free_i64(t1);
1310             break;
1311
1312         default:
1313             g_assert_not_reached();
1314         }
1315         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
1316     }
1317     tcg_temp_free_i64(addr);
1318     tcg_temp_free_i64(t0);
1319 }
1320
1321 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
1322 {
1323     if (sve_access_check(s)) {
1324         int size = vec_full_reg_size(s);
1325         int off = vec_full_reg_offset(s, a->rd);
1326         do_ldr(s, off, size, a->rn, a->imm * size);
1327     }
1328     return true;
1329 }
1330
1331 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
1332 {
1333     if (sve_access_check(s)) {
1334         int size = pred_full_reg_size(s);
1335         int off = pred_full_reg_offset(s, a->rd);
1336         do_ldr(s, off, size, a->rn, a->imm * size);
1337     }
1338     return true;
1339 }