target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35 #include "fpu/softfloat.h"
  36
  37
  38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                          TCGv_i64, uint32_t, uint32_t);
  40
  41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                      TCGv_ptr, TCGv_i32);
  43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                          TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50 /*
  51  * Helpers for extracting complex instruction fields.
  52  */
  53
  54 /* See e.g. ASR (immediate, predicated).
  55  * Returns -1 for unallocated encoding; diagnose later.
  56  */
  57 static int tszimm_esz(int x)
  58 {
  59     x >>= 3;  /* discard imm3 */
  60     return 31 - clz32(x);
  61 }
  62
  63 static int tszimm_shr(int x)
  64 {
  65     return (16 << tszimm_esz(x)) - x;
  66 }
  67
  68 /* See e.g. LSL (immediate, predicated).  */
  69 static int tszimm_shl(int x)
  70 {
  71     return x - (8 << tszimm_esz(x));
  72 }
  73
  74 static inline int plus1(int x)
  75 {
  76     return x + 1;
  77 }
  78
  79 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80 static inline int expand_imm_sh8s(int x)
  81 {
  82     return (int8_t)x << (x & 0x100 ? 8 : 0);
  83 }
  84
  85 static inline int expand_imm_sh8u(int x)
  86 {
  87     return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88 }
  89
  90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92  */
  93 static inline int msz_dtype(int msz)
  94 {
  95     static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96     return dtype[msz];
  97 }
  98
  99 /*
 100  * Include the generated decoder.
 101  */
 102
 103 #include "decode-sve.inc.c"
 104
 105 /*
 106  * Implement all of the translator functions referenced by the decoder.
 107  */
 108
 109 /* Return the offset info CPUARMState of the predicate vector register Pn.
 110  * Note for this purpose, FFR is P16.
 111  */
 112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113 {
 114     return offsetof(CPUARMState, vfp.pregs[regno]);
 115 }
 116
 117 /* Return the byte size of the whole predicate register, VL / 64.  */
 118 static inline int pred_full_reg_size(DisasContext *s)
 119 {
 120     return s->sve_len >> 3;
 121 }
 122
 123 /* Round up the size of a register to a size allowed by
 124  * the tcg vector infrastructure.  Any operation which uses this
 125  * size may assume that the bits above pred_full_reg_size are zero,
 126  * and must leave them the same way.
 127  *
 128  * Note that this is not needed for the vector registers as they
 129  * are always properly sized for tcg vectors.
 130  */
 131 static int size_for_gvec(int size)
 132 {
 133     if (size <= 8) {
 134         return 8;
 135     } else {
 136         return QEMU_ALIGN_UP(size, 16);
 137     }
 138 }
 139
 140 static int pred_gvec_reg_size(DisasContext *s)
 141 {
 142     return size_for_gvec(pred_full_reg_size(s));
 143 }
 144
 145 /* Invoke a vector expander on two Zregs.  */
 146 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                          int esz, int rd, int rn)
 148 {
 149     if (sve_access_check(s)) {
 150         unsigned vsz = vec_full_reg_size(s);
 151         gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                 vec_full_reg_offset(s, rn), vsz, vsz);
 153     }
 154     return true;
 155 }
 156
 157 /* Invoke a vector expander on three Zregs.  */
 158 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                          int esz, int rd, int rn, int rm)
 160 {
 161     if (sve_access_check(s)) {
 162         unsigned vsz = vec_full_reg_size(s);
 163         gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                 vec_full_reg_offset(s, rn),
 165                 vec_full_reg_offset(s, rm), vsz, vsz);
 166     }
 167     return true;
 168 }
 169
 170 /* Invoke a vector move on two Zregs.  */
 171 static bool do_mov_z(DisasContext *s, int rd, int rn)
 172 {
 173     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174 }
 175
 176 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 177 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178 {
 179     unsigned vsz = vec_full_reg_size(s);
 180     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 181 }
 182
 183 /* Invoke a vector expander on two Pregs.  */
 184 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                          int esz, int rd, int rn)
 186 {
 187     if (sve_access_check(s)) {
 188         unsigned psz = pred_gvec_reg_size(s);
 189         gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                 pred_full_reg_offset(s, rn), psz, psz);
 191     }
 192     return true;
 193 }
 194
 195 /* Invoke a vector expander on three Pregs.  */
 196 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                          int esz, int rd, int rn, int rm)
 198 {
 199     if (sve_access_check(s)) {
 200         unsigned psz = pred_gvec_reg_size(s);
 201         gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                 pred_full_reg_offset(s, rn),
 203                 pred_full_reg_offset(s, rm), psz, psz);
 204     }
 205     return true;
 206 }
 207
 208 /* Invoke a vector operation on four Pregs.  */
 209 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                         int rd, int rn, int rm, int rg)
 211 {
 212     if (sve_access_check(s)) {
 213         unsigned psz = pred_gvec_reg_size(s);
 214         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                        pred_full_reg_offset(s, rn),
 216                        pred_full_reg_offset(s, rm),
 217                        pred_full_reg_offset(s, rg),
 218                        psz, psz, gvec_op);
 219     }
 220     return true;
 221 }
 222
 223 /* Invoke a vector move on two Pregs.  */
 224 static bool do_mov_p(DisasContext *s, int rd, int rn)
 225 {
 226     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227 }
 228
 229 /* Set the cpu flags as per a return from an SVE helper.  */
 230 static void do_pred_flags(TCGv_i32 t)
 231 {
 232     tcg_gen_mov_i32(cpu_NF, t);
 233     tcg_gen_andi_i32(cpu_ZF, t, 2);
 234     tcg_gen_andi_i32(cpu_CF, t, 1);
 235     tcg_gen_movi_i32(cpu_VF, 0);
 236 }
 237
 238 /* Subroutines computing the ARM PredTest psuedofunction.  */
 239 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240 {
 241     TCGv_i32 t = tcg_temp_new_i32();
 242
 243     gen_helper_sve_predtest1(t, d, g);
 244     do_pred_flags(t);
 245     tcg_temp_free_i32(t);
 246 }
 247
 248 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249 {
 250     TCGv_ptr dptr = tcg_temp_new_ptr();
 251     TCGv_ptr gptr = tcg_temp_new_ptr();
 252     TCGv_i32 t;
 253
 254     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256     t = tcg_const_i32(words);
 257
 258     gen_helper_sve_predtest(t, dptr, gptr, t);
 259     tcg_temp_free_ptr(dptr);
 260     tcg_temp_free_ptr(gptr);
 261
 262     do_pred_flags(t);
 263     tcg_temp_free_i32(t);
 264 }
 265
 266 /* For each element size, the bits within a predicate word that are active.  */
 267 const uint64_t pred_esz_masks[4] = {
 268     0xffffffffffffffffull, 0x5555555555555555ull,
 269     0x1111111111111111ull, 0x0101010101010101ull
 270 };
 271
 272 /*
 273  *** SVE Logical - Unpredicated Group
 274  */
 275
 276 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279 }
 280
 281 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 282 {
 283     if (a->rn == a->rm) { /* MOV */
 284         return do_mov_z(s, a->rd, a->rn);
 285     } else {
 286         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 287     }
 288 }
 289
 290 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 291 {
 292     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 293 }
 294
 295 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 296 {
 297     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 298 }
 299
 300 /*
 301  *** SVE Integer Arithmetic - Unpredicated Group
 302  */
 303
 304 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 305 {
 306     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 307 }
 308
 309 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 310 {
 311     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 312 }
 313
 314 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 315 {
 316     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 317 }
 318
 319 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 320 {
 321     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 322 }
 323
 324 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 325 {
 326     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 327 }
 328
 329 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 330 {
 331     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 332 }
 333
 334 /*
 335  *** SVE Integer Arithmetic - Binary Predicated Group
 336  */
 337
 338 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 339 {
 340     unsigned vsz = vec_full_reg_size(s);
 341     if (fn == NULL) {
 342         return false;
 343     }
 344     if (sve_access_check(s)) {
 345         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 346                            vec_full_reg_offset(s, a->rn),
 347                            vec_full_reg_offset(s, a->rm),
 348                            pred_full_reg_offset(s, a->pg),
 349                            vsz, vsz, 0, fn);
 350     }
 351     return true;
 352 }
 353
 354 #define DO_ZPZZ(NAME, name) \
 355 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 356                                 uint32_t insn)                            \
 357 {                                                                         \
 358     static gen_helper_gvec_4 * const fns[4] = {                           \
 359         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 360         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 361     };                                                                    \
 362     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 363 }
 364
 365 DO_ZPZZ(AND, and)
 366 DO_ZPZZ(EOR, eor)
 367 DO_ZPZZ(ORR, orr)
 368 DO_ZPZZ(BIC, bic)
 369
 370 DO_ZPZZ(ADD, add)
 371 DO_ZPZZ(SUB, sub)
 372
 373 DO_ZPZZ(SMAX, smax)
 374 DO_ZPZZ(UMAX, umax)
 375 DO_ZPZZ(SMIN, smin)
 376 DO_ZPZZ(UMIN, umin)
 377 DO_ZPZZ(SABD, sabd)
 378 DO_ZPZZ(UABD, uabd)
 379
 380 DO_ZPZZ(MUL, mul)
 381 DO_ZPZZ(SMULH, smulh)
 382 DO_ZPZZ(UMULH, umulh)
 383
 384 DO_ZPZZ(ASR, asr)
 385 DO_ZPZZ(LSR, lsr)
 386 DO_ZPZZ(LSL, lsl)
 387
 388 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 389 {
 390     static gen_helper_gvec_4 * const fns[4] = {
 391         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 392     };
 393     return do_zpzz_ool(s, a, fns[a->esz]);
 394 }
 395
 396 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 397 {
 398     static gen_helper_gvec_4 * const fns[4] = {
 399         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 400     };
 401     return do_zpzz_ool(s, a, fns[a->esz]);
 402 }
 403
 404 DO_ZPZZ(SEL, sel)
 405
 406 #undef DO_ZPZZ
 407
 408 /*
 409  *** SVE Integer Arithmetic - Unary Predicated Group
 410  */
 411
 412 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 413 {
 414     if (fn == NULL) {
 415         return false;
 416     }
 417     if (sve_access_check(s)) {
 418         unsigned vsz = vec_full_reg_size(s);
 419         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 420                            vec_full_reg_offset(s, a->rn),
 421                            pred_full_reg_offset(s, a->pg),
 422                            vsz, vsz, 0, fn);
 423     }
 424     return true;
 425 }
 426
 427 #define DO_ZPZ(NAME, name) \
 428 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 429 {                                                                   \
 430     static gen_helper_gvec_3 * const fns[4] = {                     \
 431         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 432         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 433     };                                                              \
 434     return do_zpz_ool(s, a, fns[a->esz]);                           \
 435 }
 436
 437 DO_ZPZ(CLS, cls)
 438 DO_ZPZ(CLZ, clz)
 439 DO_ZPZ(CNT_zpz, cnt_zpz)
 440 DO_ZPZ(CNOT, cnot)
 441 DO_ZPZ(NOT_zpz, not_zpz)
 442 DO_ZPZ(ABS, abs)
 443 DO_ZPZ(NEG, neg)
 444
 445 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 446 {
 447     static gen_helper_gvec_3 * const fns[4] = {
 448         NULL,
 449         gen_helper_sve_fabs_h,
 450         gen_helper_sve_fabs_s,
 451         gen_helper_sve_fabs_d
 452     };
 453     return do_zpz_ool(s, a, fns[a->esz]);
 454 }
 455
 456 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 457 {
 458     static gen_helper_gvec_3 * const fns[4] = {
 459         NULL,
 460         gen_helper_sve_fneg_h,
 461         gen_helper_sve_fneg_s,
 462         gen_helper_sve_fneg_d
 463     };
 464     return do_zpz_ool(s, a, fns[a->esz]);
 465 }
 466
 467 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 468 {
 469     static gen_helper_gvec_3 * const fns[4] = {
 470         NULL,
 471         gen_helper_sve_sxtb_h,
 472         gen_helper_sve_sxtb_s,
 473         gen_helper_sve_sxtb_d
 474     };
 475     return do_zpz_ool(s, a, fns[a->esz]);
 476 }
 477
 478 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 479 {
 480     static gen_helper_gvec_3 * const fns[4] = {
 481         NULL,
 482         gen_helper_sve_uxtb_h,
 483         gen_helper_sve_uxtb_s,
 484         gen_helper_sve_uxtb_d
 485     };
 486     return do_zpz_ool(s, a, fns[a->esz]);
 487 }
 488
 489 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 490 {
 491     static gen_helper_gvec_3 * const fns[4] = {
 492         NULL, NULL,
 493         gen_helper_sve_sxth_s,
 494         gen_helper_sve_sxth_d
 495     };
 496     return do_zpz_ool(s, a, fns[a->esz]);
 497 }
 498
 499 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 500 {
 501     static gen_helper_gvec_3 * const fns[4] = {
 502         NULL, NULL,
 503         gen_helper_sve_uxth_s,
 504         gen_helper_sve_uxth_d
 505     };
 506     return do_zpz_ool(s, a, fns[a->esz]);
 507 }
 508
 509 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 510 {
 511     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 512 }
 513
 514 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 515 {
 516     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 517 }
 518
 519 #undef DO_ZPZ
 520
 521 /*
 522  *** SVE Integer Reduction Group
 523  */
 524
 525 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 526 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 527                        gen_helper_gvec_reduc *fn)
 528 {
 529     unsigned vsz = vec_full_reg_size(s);
 530     TCGv_ptr t_zn, t_pg;
 531     TCGv_i32 desc;
 532     TCGv_i64 temp;
 533
 534     if (fn == NULL) {
 535         return false;
 536     }
 537     if (!sve_access_check(s)) {
 538         return true;
 539     }
 540
 541     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 542     temp = tcg_temp_new_i64();
 543     t_zn = tcg_temp_new_ptr();
 544     t_pg = tcg_temp_new_ptr();
 545
 546     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 547     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 548     fn(temp, t_zn, t_pg, desc);
 549     tcg_temp_free_ptr(t_zn);
 550     tcg_temp_free_ptr(t_pg);
 551     tcg_temp_free_i32(desc);
 552
 553     write_fp_dreg(s, a->rd, temp);
 554     tcg_temp_free_i64(temp);
 555     return true;
 556 }
 557
 558 #define DO_VPZ(NAME, name) \
 559 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 560 {                                                                        \
 561     static gen_helper_gvec_reduc * const fns[4] = {                      \
 562         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 563         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 564     };                                                                   \
 565     return do_vpz_ool(s, a, fns[a->esz]);                                \
 566 }
 567
 568 DO_VPZ(ORV, orv)
 569 DO_VPZ(ANDV, andv)
 570 DO_VPZ(EORV, eorv)
 571
 572 DO_VPZ(UADDV, uaddv)
 573 DO_VPZ(SMAXV, smaxv)
 574 DO_VPZ(UMAXV, umaxv)
 575 DO_VPZ(SMINV, sminv)
 576 DO_VPZ(UMINV, uminv)
 577
 578 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 579 {
 580     static gen_helper_gvec_reduc * const fns[4] = {
 581         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 582         gen_helper_sve_saddv_s, NULL
 583     };
 584     return do_vpz_ool(s, a, fns[a->esz]);
 585 }
 586
 587 #undef DO_VPZ
 588
 589 /*
 590  *** SVE Shift by Immediate - Predicated Group
 591  */
 592
 593 /* Store zero into every active element of Zd.  We will use this for two
 594  * and three-operand predicated instructions for which logic dictates a
 595  * zero result.
 596  */
 597 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 598 {
 599     static gen_helper_gvec_2 * const fns[4] = {
 600         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 601         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 602     };
 603     if (sve_access_check(s)) {
 604         unsigned vsz = vec_full_reg_size(s);
 605         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 606                            pred_full_reg_offset(s, pg),
 607                            vsz, vsz, 0, fns[esz]);
 608     }
 609     return true;
 610 }
 611
 612 /* Copy Zn into Zd, storing zeros into inactive elements.  */
 613 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 614 {
 615     static gen_helper_gvec_3 * const fns[4] = {
 616         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 617         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 618     };
 619     unsigned vsz = vec_full_reg_size(s);
 620     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 621                        vec_full_reg_offset(s, rn),
 622                        pred_full_reg_offset(s, pg),
 623                        vsz, vsz, 0, fns[esz]);
 624 }
 625
 626 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 627                         gen_helper_gvec_3 *fn)
 628 {
 629     if (sve_access_check(s)) {
 630         unsigned vsz = vec_full_reg_size(s);
 631         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 632                            vec_full_reg_offset(s, a->rn),
 633                            pred_full_reg_offset(s, a->pg),
 634                            vsz, vsz, a->imm, fn);
 635     }
 636     return true;
 637 }
 638
 639 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 640 {
 641     static gen_helper_gvec_3 * const fns[4] = {
 642         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 643         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 644     };
 645     if (a->esz < 0) {
 646         /* Invalid tsz encoding -- see tszimm_esz. */
 647         return false;
 648     }
 649     /* Shift by element size is architecturally valid.  For
 650        arithmetic right-shift, it's the same as by one less. */
 651     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 652     return do_zpzi_ool(s, a, fns[a->esz]);
 653 }
 654
 655 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 656 {
 657     static gen_helper_gvec_3 * const fns[4] = {
 658         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 659         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 660     };
 661     if (a->esz < 0) {
 662         return false;
 663     }
 664     /* Shift by element size is architecturally valid.
 665        For logical shifts, it is a zeroing operation.  */
 666     if (a->imm >= (8 << a->esz)) {
 667         return do_clr_zp(s, a->rd, a->pg, a->esz);
 668     } else {
 669         return do_zpzi_ool(s, a, fns[a->esz]);
 670     }
 671 }
 672
 673 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 674 {
 675     static gen_helper_gvec_3 * const fns[4] = {
 676         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 677         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 678     };
 679     if (a->esz < 0) {
 680         return false;
 681     }
 682     /* Shift by element size is architecturally valid.
 683        For logical shifts, it is a zeroing operation.  */
 684     if (a->imm >= (8 << a->esz)) {
 685         return do_clr_zp(s, a->rd, a->pg, a->esz);
 686     } else {
 687         return do_zpzi_ool(s, a, fns[a->esz]);
 688     }
 689 }
 690
 691 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 692 {
 693     static gen_helper_gvec_3 * const fns[4] = {
 694         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 695         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 696     };
 697     if (a->esz < 0) {
 698         return false;
 699     }
 700     /* Shift by element size is architecturally valid.  For arithmetic
 701        right shift for division, it is a zeroing operation.  */
 702     if (a->imm >= (8 << a->esz)) {
 703         return do_clr_zp(s, a->rd, a->pg, a->esz);
 704     } else {
 705         return do_zpzi_ool(s, a, fns[a->esz]);
 706     }
 707 }
 708
 709 /*
 710  *** SVE Bitwise Shift - Predicated Group
 711  */
 712
 713 #define DO_ZPZW(NAME, name) \
 714 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 715                                 uint32_t insn)                            \
 716 {                                                                         \
 717     static gen_helper_gvec_4 * const fns[3] = {                           \
 718         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 719         gen_helper_sve_##name##_zpzw_s,                                   \
 720     };                                                                    \
 721     if (a->esz < 0 || a->esz >= 3) {                                      \
 722         return false;                                                     \
 723     }                                                                     \
 724     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 725 }
 726
 727 DO_ZPZW(ASR, asr)
 728 DO_ZPZW(LSR, lsr)
 729 DO_ZPZW(LSL, lsl)
 730
 731 #undef DO_ZPZW
 732
 733 /*
 734  *** SVE Bitwise Shift - Unpredicated Group
 735  */
 736
 737 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 738                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 739                                          int64_t, uint32_t, uint32_t))
 740 {
 741     if (a->esz < 0) {
 742         /* Invalid tsz encoding -- see tszimm_esz. */
 743         return false;
 744     }
 745     if (sve_access_check(s)) {
 746         unsigned vsz = vec_full_reg_size(s);
 747         /* Shift by element size is architecturally valid.  For
 748            arithmetic right-shift, it's the same as by one less.
 749            Otherwise it is a zeroing operation.  */
 750         if (a->imm >= 8 << a->esz) {
 751             if (asr) {
 752                 a->imm = (8 << a->esz) - 1;
 753             } else {
 754                 do_dupi_z(s, a->rd, 0);
 755                 return true;
 756             }
 757         }
 758         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 759                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 760     }
 761     return true;
 762 }
 763
 764 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 765 {
 766     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 767 }
 768
 769 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 770 {
 771     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 772 }
 773
 774 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 775 {
 776     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 777 }
 778
 779 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 780 {
 781     if (fn == NULL) {
 782         return false;
 783     }
 784     if (sve_access_check(s)) {
 785         unsigned vsz = vec_full_reg_size(s);
 786         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 787                            vec_full_reg_offset(s, a->rn),
 788                            vec_full_reg_offset(s, a->rm),
 789                            vsz, vsz, 0, fn);
 790     }
 791     return true;
 792 }
 793
 794 #define DO_ZZW(NAME, name) \
 795 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a,           \
 796                                uint32_t insn)                             \
 797 {                                                                         \
 798     static gen_helper_gvec_3 * const fns[4] = {                           \
 799         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 800         gen_helper_sve_##name##_zzw_s, NULL                               \
 801     };                                                                    \
 802     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 803 }
 804
 805 DO_ZZW(ASR, asr)
 806 DO_ZZW(LSR, lsr)
 807 DO_ZZW(LSL, lsl)
 808
 809 #undef DO_ZZW
 810
 811 /*
 812  *** SVE Integer Multiply-Add Group
 813  */
 814
 815 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 816                          gen_helper_gvec_5 *fn)
 817 {
 818     if (sve_access_check(s)) {
 819         unsigned vsz = vec_full_reg_size(s);
 820         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 821                            vec_full_reg_offset(s, a->ra),
 822                            vec_full_reg_offset(s, a->rn),
 823                            vec_full_reg_offset(s, a->rm),
 824                            pred_full_reg_offset(s, a->pg),
 825                            vsz, vsz, 0, fn);
 826     }
 827     return true;
 828 }
 829
 830 #define DO_ZPZZZ(NAME, name) \
 831 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 832 {                                                                    \
 833     static gen_helper_gvec_5 * const fns[4] = {                      \
 834         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 835         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 836     };                                                               \
 837     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 838 }
 839
 840 DO_ZPZZZ(MLA, mla)
 841 DO_ZPZZZ(MLS, mls)
 842
 843 #undef DO_ZPZZZ
 844
 845 /*
 846  *** SVE Index Generation Group
 847  */
 848
 849 static void do_index(DisasContext *s, int esz, int rd,
 850                      TCGv_i64 start, TCGv_i64 incr)
 851 {
 852     unsigned vsz = vec_full_reg_size(s);
 853     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 854     TCGv_ptr t_zd = tcg_temp_new_ptr();
 855
 856     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 857     if (esz == 3) {
 858         gen_helper_sve_index_d(t_zd, start, incr, desc);
 859     } else {
 860         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 861         static index_fn * const fns[3] = {
 862             gen_helper_sve_index_b,
 863             gen_helper_sve_index_h,
 864             gen_helper_sve_index_s,
 865         };
 866         TCGv_i32 s32 = tcg_temp_new_i32();
 867         TCGv_i32 i32 = tcg_temp_new_i32();
 868
 869         tcg_gen_extrl_i64_i32(s32, start);
 870         tcg_gen_extrl_i64_i32(i32, incr);
 871         fns[esz](t_zd, s32, i32, desc);
 872
 873         tcg_temp_free_i32(s32);
 874         tcg_temp_free_i32(i32);
 875     }
 876     tcg_temp_free_ptr(t_zd);
 877     tcg_temp_free_i32(desc);
 878 }
 879
 880 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
 881 {
 882     if (sve_access_check(s)) {
 883         TCGv_i64 start = tcg_const_i64(a->imm1);
 884         TCGv_i64 incr = tcg_const_i64(a->imm2);
 885         do_index(s, a->esz, a->rd, start, incr);
 886         tcg_temp_free_i64(start);
 887         tcg_temp_free_i64(incr);
 888     }
 889     return true;
 890 }
 891
 892 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
 893 {
 894     if (sve_access_check(s)) {
 895         TCGv_i64 start = tcg_const_i64(a->imm);
 896         TCGv_i64 incr = cpu_reg(s, a->rm);
 897         do_index(s, a->esz, a->rd, start, incr);
 898         tcg_temp_free_i64(start);
 899     }
 900     return true;
 901 }
 902
 903 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
 904 {
 905     if (sve_access_check(s)) {
 906         TCGv_i64 start = cpu_reg(s, a->rn);
 907         TCGv_i64 incr = tcg_const_i64(a->imm);
 908         do_index(s, a->esz, a->rd, start, incr);
 909         tcg_temp_free_i64(incr);
 910     }
 911     return true;
 912 }
 913
 914 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
 915 {
 916     if (sve_access_check(s)) {
 917         TCGv_i64 start = cpu_reg(s, a->rn);
 918         TCGv_i64 incr = cpu_reg(s, a->rm);
 919         do_index(s, a->esz, a->rd, start, incr);
 920     }
 921     return true;
 922 }
 923
 924 /*
 925  *** SVE Stack Allocation Group
 926  */
 927
 928 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
 929 {
 930     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 931     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 932     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 933     return true;
 934 }
 935
 936 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
 937 {
 938     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 939     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 940     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 941     return true;
 942 }
 943
 944 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
 945 {
 946     TCGv_i64 reg = cpu_reg(s, a->rd);
 947     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 948     return true;
 949 }
 950
 951 /*
 952  *** SVE Compute Vector Address Group
 953  */
 954
 955 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 956 {
 957     if (sve_access_check(s)) {
 958         unsigned vsz = vec_full_reg_size(s);
 959         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 960                            vec_full_reg_offset(s, a->rn),
 961                            vec_full_reg_offset(s, a->rm),
 962                            vsz, vsz, a->imm, fn);
 963     }
 964     return true;
 965 }
 966
 967 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
 968 {
 969     return do_adr(s, a, gen_helper_sve_adr_p32);
 970 }
 971
 972 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
 973 {
 974     return do_adr(s, a, gen_helper_sve_adr_p64);
 975 }
 976
 977 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
 978 {
 979     return do_adr(s, a, gen_helper_sve_adr_s32);
 980 }
 981
 982 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
 983 {
 984     return do_adr(s, a, gen_helper_sve_adr_u32);
 985 }
 986
 987 /*
 988  *** SVE Integer Misc - Unpredicated Group
 989  */
 990
 991 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
 992 {
 993     static gen_helper_gvec_2 * const fns[4] = {
 994         NULL,
 995         gen_helper_sve_fexpa_h,
 996         gen_helper_sve_fexpa_s,
 997         gen_helper_sve_fexpa_d,
 998     };
 999     if (a->esz == 0) {
1000         return false;
1001     }
1002     if (sve_access_check(s)) {
1003         unsigned vsz = vec_full_reg_size(s);
1004         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1005                            vec_full_reg_offset(s, a->rn),
1006                            vsz, vsz, 0, fns[a->esz]);
1007     }
1008     return true;
1009 }
1010
1011 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1012 {
1013     static gen_helper_gvec_3 * const fns[4] = {
1014         NULL,
1015         gen_helper_sve_ftssel_h,
1016         gen_helper_sve_ftssel_s,
1017         gen_helper_sve_ftssel_d,
1018     };
1019     if (a->esz == 0) {
1020         return false;
1021     }
1022     if (sve_access_check(s)) {
1023         unsigned vsz = vec_full_reg_size(s);
1024         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1025                            vec_full_reg_offset(s, a->rn),
1026                            vec_full_reg_offset(s, a->rm),
1027                            vsz, vsz, 0, fns[a->esz]);
1028     }
1029     return true;
1030 }
1031
1032 /*
1033  *** SVE Predicate Logical Operations Group
1034  */
1035
1036 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1037                           const GVecGen4 *gvec_op)
1038 {
1039     if (!sve_access_check(s)) {
1040         return true;
1041     }
1042
1043     unsigned psz = pred_gvec_reg_size(s);
1044     int dofs = pred_full_reg_offset(s, a->rd);
1045     int nofs = pred_full_reg_offset(s, a->rn);
1046     int mofs = pred_full_reg_offset(s, a->rm);
1047     int gofs = pred_full_reg_offset(s, a->pg);
1048
1049     if (psz == 8) {
1050         /* Do the operation and the flags generation in temps.  */
1051         TCGv_i64 pd = tcg_temp_new_i64();
1052         TCGv_i64 pn = tcg_temp_new_i64();
1053         TCGv_i64 pm = tcg_temp_new_i64();
1054         TCGv_i64 pg = tcg_temp_new_i64();
1055
1056         tcg_gen_ld_i64(pn, cpu_env, nofs);
1057         tcg_gen_ld_i64(pm, cpu_env, mofs);
1058         tcg_gen_ld_i64(pg, cpu_env, gofs);
1059
1060         gvec_op->fni8(pd, pn, pm, pg);
1061         tcg_gen_st_i64(pd, cpu_env, dofs);
1062
1063         do_predtest1(pd, pg);
1064
1065         tcg_temp_free_i64(pd);
1066         tcg_temp_free_i64(pn);
1067         tcg_temp_free_i64(pm);
1068         tcg_temp_free_i64(pg);
1069     } else {
1070         /* The operation and flags generation is large.  The computation
1071          * of the flags depends on the original contents of the guarding
1072          * predicate.  If the destination overwrites the guarding predicate,
1073          * then the easiest way to get this right is to save a copy.
1074           */
1075         int tofs = gofs;
1076         if (a->rd == a->pg) {
1077             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1078             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1079         }
1080
1081         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1082         do_predtest(s, dofs, tofs, psz / 8);
1083     }
1084     return true;
1085 }
1086
1087 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1088 {
1089     tcg_gen_and_i64(pd, pn, pm);
1090     tcg_gen_and_i64(pd, pd, pg);
1091 }
1092
1093 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1094                            TCGv_vec pm, TCGv_vec pg)
1095 {
1096     tcg_gen_and_vec(vece, pd, pn, pm);
1097     tcg_gen_and_vec(vece, pd, pd, pg);
1098 }
1099
1100 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1101 {
1102     static const GVecGen4 op = {
1103         .fni8 = gen_and_pg_i64,
1104         .fniv = gen_and_pg_vec,
1105         .fno = gen_helper_sve_and_pppp,
1106         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1107     };
1108     if (a->s) {
1109         return do_pppp_flags(s, a, &op);
1110     } else if (a->rn == a->rm) {
1111         if (a->pg == a->rn) {
1112             return do_mov_p(s, a->rd, a->rn);
1113         } else {
1114             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1115         }
1116     } else if (a->pg == a->rn || a->pg == a->rm) {
1117         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1118     } else {
1119         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1120     }
1121 }
1122
1123 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1124 {
1125     tcg_gen_andc_i64(pd, pn, pm);
1126     tcg_gen_and_i64(pd, pd, pg);
1127 }
1128
1129 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1130                            TCGv_vec pm, TCGv_vec pg)
1131 {
1132     tcg_gen_andc_vec(vece, pd, pn, pm);
1133     tcg_gen_and_vec(vece, pd, pd, pg);
1134 }
1135
1136 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1137 {
1138     static const GVecGen4 op = {
1139         .fni8 = gen_bic_pg_i64,
1140         .fniv = gen_bic_pg_vec,
1141         .fno = gen_helper_sve_bic_pppp,
1142         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1143     };
1144     if (a->s) {
1145         return do_pppp_flags(s, a, &op);
1146     } else if (a->pg == a->rn) {
1147         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1148     } else {
1149         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1150     }
1151 }
1152
1153 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1154 {
1155     tcg_gen_xor_i64(pd, pn, pm);
1156     tcg_gen_and_i64(pd, pd, pg);
1157 }
1158
1159 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1160                            TCGv_vec pm, TCGv_vec pg)
1161 {
1162     tcg_gen_xor_vec(vece, pd, pn, pm);
1163     tcg_gen_and_vec(vece, pd, pd, pg);
1164 }
1165
1166 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1167 {
1168     static const GVecGen4 op = {
1169         .fni8 = gen_eor_pg_i64,
1170         .fniv = gen_eor_pg_vec,
1171         .fno = gen_helper_sve_eor_pppp,
1172         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1173     };
1174     if (a->s) {
1175         return do_pppp_flags(s, a, &op);
1176     } else {
1177         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1178     }
1179 }
1180
1181 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1182 {
1183     tcg_gen_and_i64(pn, pn, pg);
1184     tcg_gen_andc_i64(pm, pm, pg);
1185     tcg_gen_or_i64(pd, pn, pm);
1186 }
1187
1188 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1189                            TCGv_vec pm, TCGv_vec pg)
1190 {
1191     tcg_gen_and_vec(vece, pn, pn, pg);
1192     tcg_gen_andc_vec(vece, pm, pm, pg);
1193     tcg_gen_or_vec(vece, pd, pn, pm);
1194 }
1195
1196 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1197 {
1198     static const GVecGen4 op = {
1199         .fni8 = gen_sel_pg_i64,
1200         .fniv = gen_sel_pg_vec,
1201         .fno = gen_helper_sve_sel_pppp,
1202         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1203     };
1204     if (a->s) {
1205         return false;
1206     } else {
1207         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1208     }
1209 }
1210
1211 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1212 {
1213     tcg_gen_or_i64(pd, pn, pm);
1214     tcg_gen_and_i64(pd, pd, pg);
1215 }
1216
1217 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1218                            TCGv_vec pm, TCGv_vec pg)
1219 {
1220     tcg_gen_or_vec(vece, pd, pn, pm);
1221     tcg_gen_and_vec(vece, pd, pd, pg);
1222 }
1223
1224 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1225 {
1226     static const GVecGen4 op = {
1227         .fni8 = gen_orr_pg_i64,
1228         .fniv = gen_orr_pg_vec,
1229         .fno = gen_helper_sve_orr_pppp,
1230         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1231     };
1232     if (a->s) {
1233         return do_pppp_flags(s, a, &op);
1234     } else if (a->pg == a->rn && a->rn == a->rm) {
1235         return do_mov_p(s, a->rd, a->rn);
1236     } else {
1237         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1238     }
1239 }
1240
1241 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1242 {
1243     tcg_gen_orc_i64(pd, pn, pm);
1244     tcg_gen_and_i64(pd, pd, pg);
1245 }
1246
1247 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1248                            TCGv_vec pm, TCGv_vec pg)
1249 {
1250     tcg_gen_orc_vec(vece, pd, pn, pm);
1251     tcg_gen_and_vec(vece, pd, pd, pg);
1252 }
1253
1254 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1255 {
1256     static const GVecGen4 op = {
1257         .fni8 = gen_orn_pg_i64,
1258         .fniv = gen_orn_pg_vec,
1259         .fno = gen_helper_sve_orn_pppp,
1260         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1261     };
1262     if (a->s) {
1263         return do_pppp_flags(s, a, &op);
1264     } else {
1265         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1266     }
1267 }
1268
1269 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1270 {
1271     tcg_gen_or_i64(pd, pn, pm);
1272     tcg_gen_andc_i64(pd, pg, pd);
1273 }
1274
1275 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1276                            TCGv_vec pm, TCGv_vec pg)
1277 {
1278     tcg_gen_or_vec(vece, pd, pn, pm);
1279     tcg_gen_andc_vec(vece, pd, pg, pd);
1280 }
1281
1282 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1283 {
1284     static const GVecGen4 op = {
1285         .fni8 = gen_nor_pg_i64,
1286         .fniv = gen_nor_pg_vec,
1287         .fno = gen_helper_sve_nor_pppp,
1288         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1289     };
1290     if (a->s) {
1291         return do_pppp_flags(s, a, &op);
1292     } else {
1293         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1294     }
1295 }
1296
1297 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1298 {
1299     tcg_gen_and_i64(pd, pn, pm);
1300     tcg_gen_andc_i64(pd, pg, pd);
1301 }
1302
1303 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1304                            TCGv_vec pm, TCGv_vec pg)
1305 {
1306     tcg_gen_and_vec(vece, pd, pn, pm);
1307     tcg_gen_andc_vec(vece, pd, pg, pd);
1308 }
1309
1310 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1311 {
1312     static const GVecGen4 op = {
1313         .fni8 = gen_nand_pg_i64,
1314         .fniv = gen_nand_pg_vec,
1315         .fno = gen_helper_sve_nand_pppp,
1316         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1317     };
1318     if (a->s) {
1319         return do_pppp_flags(s, a, &op);
1320     } else {
1321         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1322     }
1323 }
1324
1325 /*
1326  *** SVE Predicate Misc Group
1327  */
1328
1329 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1330 {
1331     if (sve_access_check(s)) {
1332         int nofs = pred_full_reg_offset(s, a->rn);
1333         int gofs = pred_full_reg_offset(s, a->pg);
1334         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1335
1336         if (words == 1) {
1337             TCGv_i64 pn = tcg_temp_new_i64();
1338             TCGv_i64 pg = tcg_temp_new_i64();
1339
1340             tcg_gen_ld_i64(pn, cpu_env, nofs);
1341             tcg_gen_ld_i64(pg, cpu_env, gofs);
1342             do_predtest1(pn, pg);
1343
1344             tcg_temp_free_i64(pn);
1345             tcg_temp_free_i64(pg);
1346         } else {
1347             do_predtest(s, nofs, gofs, words);
1348         }
1349     }
1350     return true;
1351 }
1352
1353 /* See the ARM pseudocode DecodePredCount.  */
1354 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1355 {
1356     unsigned elements = fullsz >> esz;
1357     unsigned bound;
1358
1359     switch (pattern) {
1360     case 0x0: /* POW2 */
1361         return pow2floor(elements);
1362     case 0x1: /* VL1 */
1363     case 0x2: /* VL2 */
1364     case 0x3: /* VL3 */
1365     case 0x4: /* VL4 */
1366     case 0x5: /* VL5 */
1367     case 0x6: /* VL6 */
1368     case 0x7: /* VL7 */
1369     case 0x8: /* VL8 */
1370         bound = pattern;
1371         break;
1372     case 0x9: /* VL16 */
1373     case 0xa: /* VL32 */
1374     case 0xb: /* VL64 */
1375     case 0xc: /* VL128 */
1376     case 0xd: /* VL256 */
1377         bound = 16 << (pattern - 9);
1378         break;
1379     case 0x1d: /* MUL4 */
1380         return elements - elements % 4;
1381     case 0x1e: /* MUL3 */
1382         return elements - elements % 3;
1383     case 0x1f: /* ALL */
1384         return elements;
1385     default:   /* #uimm5 */
1386         return 0;
1387     }
1388     return elements >= bound ? bound : 0;
1389 }
1390
1391 /* This handles all of the predicate initialization instructions,
1392  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1393  * so that decode_pred_count returns 0.  For SETFFR, we will have
1394  * set RD == 16 == FFR.
1395  */
1396 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1397 {
1398     if (!sve_access_check(s)) {
1399         return true;
1400     }
1401
1402     unsigned fullsz = vec_full_reg_size(s);
1403     unsigned ofs = pred_full_reg_offset(s, rd);
1404     unsigned numelem, setsz, i;
1405     uint64_t word, lastword;
1406     TCGv_i64 t;
1407
1408     numelem = decode_pred_count(fullsz, pat, esz);
1409
1410     /* Determine what we must store into each bit, and how many.  */
1411     if (numelem == 0) {
1412         lastword = word = 0;
1413         setsz = fullsz;
1414     } else {
1415         setsz = numelem << esz;
1416         lastword = word = pred_esz_masks[esz];
1417         if (setsz % 64) {
1418             lastword &= ~(-1ull << (setsz % 64));
1419         }
1420     }
1421
1422     t = tcg_temp_new_i64();
1423     if (fullsz <= 64) {
1424         tcg_gen_movi_i64(t, lastword);
1425         tcg_gen_st_i64(t, cpu_env, ofs);
1426         goto done;
1427     }
1428
1429     if (word == lastword) {
1430         unsigned maxsz = size_for_gvec(fullsz / 8);
1431         unsigned oprsz = size_for_gvec(setsz / 8);
1432
1433         if (oprsz * 8 == setsz) {
1434             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1435             goto done;
1436         }
1437         if (oprsz * 8 == setsz + 8) {
1438             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1439             tcg_gen_movi_i64(t, 0);
1440             tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1441             goto done;
1442         }
1443     }
1444
1445     setsz /= 8;
1446     fullsz /= 8;
1447
1448     tcg_gen_movi_i64(t, word);
1449     for (i = 0; i < setsz; i += 8) {
1450         tcg_gen_st_i64(t, cpu_env, ofs + i);
1451     }
1452     if (lastword != word) {
1453         tcg_gen_movi_i64(t, lastword);
1454         tcg_gen_st_i64(t, cpu_env, ofs + i);
1455         i += 8;
1456     }
1457     if (i < fullsz) {
1458         tcg_gen_movi_i64(t, 0);
1459         for (; i < fullsz; i += 8) {
1460             tcg_gen_st_i64(t, cpu_env, ofs + i);
1461         }
1462     }
1463
1464  done:
1465     tcg_temp_free_i64(t);
1466
1467     /* PTRUES */
1468     if (setflag) {
1469         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1470         tcg_gen_movi_i32(cpu_CF, word == 0);
1471         tcg_gen_movi_i32(cpu_VF, 0);
1472         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1473     }
1474     return true;
1475 }
1476
1477 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1478 {
1479     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1480 }
1481
1482 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1483 {
1484     /* Note pat == 31 is #all, to set all elements.  */
1485     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1486 }
1487
1488 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1489 {
1490     /* Note pat == 32 is #unimp, to set no elements.  */
1491     return do_predset(s, 0, a->rd, 32, false);
1492 }
1493
1494 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1495 {
1496     /* The path through do_pppp_flags is complicated enough to want to avoid
1497      * duplication.  Frob the arguments into the form of a predicated AND.
1498      */
1499     arg_rprr_s alt_a = {
1500         .rd = a->rd, .pg = a->pg, .s = a->s,
1501         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1502     };
1503     return trans_AND_pppp(s, &alt_a, insn);
1504 }
1505
1506 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1507 {
1508     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1509 }
1510
1511 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1512 {
1513     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1514 }
1515
1516 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1517                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1518                                            TCGv_ptr, TCGv_i32))
1519 {
1520     if (!sve_access_check(s)) {
1521         return true;
1522     }
1523
1524     TCGv_ptr t_pd = tcg_temp_new_ptr();
1525     TCGv_ptr t_pg = tcg_temp_new_ptr();
1526     TCGv_i32 t;
1527     unsigned desc;
1528
1529     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1530     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1531
1532     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1533     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1534     t = tcg_const_i32(desc);
1535
1536     gen_fn(t, t_pd, t_pg, t);
1537     tcg_temp_free_ptr(t_pd);
1538     tcg_temp_free_ptr(t_pg);
1539
1540     do_pred_flags(t);
1541     tcg_temp_free_i32(t);
1542     return true;
1543 }
1544
1545 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1546 {
1547     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1548 }
1549
1550 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1551 {
1552     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1553 }
1554
1555 /*
1556  *** SVE Element Count Group
1557  */
1558
1559 /* Perform an inline saturating addition of a 32-bit value within
1560  * a 64-bit register.  The second operand is known to be positive,
1561  * which halves the comparisions we must perform to bound the result.
1562  */
1563 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1564 {
1565     int64_t ibound;
1566     TCGv_i64 bound;
1567     TCGCond cond;
1568
1569     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1570     if (u) {
1571         tcg_gen_ext32u_i64(reg, reg);
1572     } else {
1573         tcg_gen_ext32s_i64(reg, reg);
1574     }
1575     if (d) {
1576         tcg_gen_sub_i64(reg, reg, val);
1577         ibound = (u ? 0 : INT32_MIN);
1578         cond = TCG_COND_LT;
1579     } else {
1580         tcg_gen_add_i64(reg, reg, val);
1581         ibound = (u ? UINT32_MAX : INT32_MAX);
1582         cond = TCG_COND_GT;
1583     }
1584     bound = tcg_const_i64(ibound);
1585     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1586     tcg_temp_free_i64(bound);
1587 }
1588
1589 /* Similarly with 64-bit values.  */
1590 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1591 {
1592     TCGv_i64 t0 = tcg_temp_new_i64();
1593     TCGv_i64 t1 = tcg_temp_new_i64();
1594     TCGv_i64 t2;
1595
1596     if (u) {
1597         if (d) {
1598             tcg_gen_sub_i64(t0, reg, val);
1599             tcg_gen_movi_i64(t1, 0);
1600             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1601         } else {
1602             tcg_gen_add_i64(t0, reg, val);
1603             tcg_gen_movi_i64(t1, -1);
1604             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1605         }
1606     } else {
1607         if (d) {
1608             /* Detect signed overflow for subtraction.  */
1609             tcg_gen_xor_i64(t0, reg, val);
1610             tcg_gen_sub_i64(t1, reg, val);
1611             tcg_gen_xor_i64(reg, reg, t0);
1612             tcg_gen_and_i64(t0, t0, reg);
1613
1614             /* Bound the result.  */
1615             tcg_gen_movi_i64(reg, INT64_MIN);
1616             t2 = tcg_const_i64(0);
1617             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1618         } else {
1619             /* Detect signed overflow for addition.  */
1620             tcg_gen_xor_i64(t0, reg, val);
1621             tcg_gen_add_i64(reg, reg, val);
1622             tcg_gen_xor_i64(t1, reg, val);
1623             tcg_gen_andc_i64(t0, t1, t0);
1624
1625             /* Bound the result.  */
1626             tcg_gen_movi_i64(t1, INT64_MAX);
1627             t2 = tcg_const_i64(0);
1628             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1629         }
1630         tcg_temp_free_i64(t2);
1631     }
1632     tcg_temp_free_i64(t0);
1633     tcg_temp_free_i64(t1);
1634 }
1635
1636 /* Similarly with a vector and a scalar operand.  */
1637 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1638                               TCGv_i64 val, bool u, bool d)
1639 {
1640     unsigned vsz = vec_full_reg_size(s);
1641     TCGv_ptr dptr, nptr;
1642     TCGv_i32 t32, desc;
1643     TCGv_i64 t64;
1644
1645     dptr = tcg_temp_new_ptr();
1646     nptr = tcg_temp_new_ptr();
1647     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1648     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1649     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1650
1651     switch (esz) {
1652     case MO_8:
1653         t32 = tcg_temp_new_i32();
1654         tcg_gen_extrl_i64_i32(t32, val);
1655         if (d) {
1656             tcg_gen_neg_i32(t32, t32);
1657         }
1658         if (u) {
1659             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1660         } else {
1661             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1662         }
1663         tcg_temp_free_i32(t32);
1664         break;
1665
1666     case MO_16:
1667         t32 = tcg_temp_new_i32();
1668         tcg_gen_extrl_i64_i32(t32, val);
1669         if (d) {
1670             tcg_gen_neg_i32(t32, t32);
1671         }
1672         if (u) {
1673             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1674         } else {
1675             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1676         }
1677         tcg_temp_free_i32(t32);
1678         break;
1679
1680     case MO_32:
1681         t64 = tcg_temp_new_i64();
1682         if (d) {
1683             tcg_gen_neg_i64(t64, val);
1684         } else {
1685             tcg_gen_mov_i64(t64, val);
1686         }
1687         if (u) {
1688             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1689         } else {
1690             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1691         }
1692         tcg_temp_free_i64(t64);
1693         break;
1694
1695     case MO_64:
1696         if (u) {
1697             if (d) {
1698                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1699             } else {
1700                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1701             }
1702         } else if (d) {
1703             t64 = tcg_temp_new_i64();
1704             tcg_gen_neg_i64(t64, val);
1705             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1706             tcg_temp_free_i64(t64);
1707         } else {
1708             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1709         }
1710         break;
1711
1712     default:
1713         g_assert_not_reached();
1714     }
1715
1716     tcg_temp_free_ptr(dptr);
1717     tcg_temp_free_ptr(nptr);
1718     tcg_temp_free_i32(desc);
1719 }
1720
1721 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1722 {
1723     if (sve_access_check(s)) {
1724         unsigned fullsz = vec_full_reg_size(s);
1725         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1726         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1727     }
1728     return true;
1729 }
1730
1731 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1732 {
1733     if (sve_access_check(s)) {
1734         unsigned fullsz = vec_full_reg_size(s);
1735         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1736         int inc = numelem * a->imm * (a->d ? -1 : 1);
1737         TCGv_i64 reg = cpu_reg(s, a->rd);
1738
1739         tcg_gen_addi_i64(reg, reg, inc);
1740     }
1741     return true;
1742 }
1743
1744 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1745                                uint32_t insn)
1746 {
1747     if (!sve_access_check(s)) {
1748         return true;
1749     }
1750
1751     unsigned fullsz = vec_full_reg_size(s);
1752     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1753     int inc = numelem * a->imm;
1754     TCGv_i64 reg = cpu_reg(s, a->rd);
1755
1756     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1757     if (inc == 0) {
1758         if (a->u) {
1759             tcg_gen_ext32u_i64(reg, reg);
1760         } else {
1761             tcg_gen_ext32s_i64(reg, reg);
1762         }
1763     } else {
1764         TCGv_i64 t = tcg_const_i64(inc);
1765         do_sat_addsub_32(reg, t, a->u, a->d);
1766         tcg_temp_free_i64(t);
1767     }
1768     return true;
1769 }
1770
1771 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1772                                uint32_t insn)
1773 {
1774     if (!sve_access_check(s)) {
1775         return true;
1776     }
1777
1778     unsigned fullsz = vec_full_reg_size(s);
1779     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1780     int inc = numelem * a->imm;
1781     TCGv_i64 reg = cpu_reg(s, a->rd);
1782
1783     if (inc != 0) {
1784         TCGv_i64 t = tcg_const_i64(inc);
1785         do_sat_addsub_64(reg, t, a->u, a->d);
1786         tcg_temp_free_i64(t);
1787     }
1788     return true;
1789 }
1790
1791 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1792 {
1793     if (a->esz == 0) {
1794         return false;
1795     }
1796
1797     unsigned fullsz = vec_full_reg_size(s);
1798     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1799     int inc = numelem * a->imm;
1800
1801     if (inc != 0) {
1802         if (sve_access_check(s)) {
1803             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1804             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1805                               vec_full_reg_offset(s, a->rn),
1806                               t, fullsz, fullsz);
1807             tcg_temp_free_i64(t);
1808         }
1809     } else {
1810         do_mov_z(s, a->rd, a->rn);
1811     }
1812     return true;
1813 }
1814
1815 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1816                             uint32_t insn)
1817 {
1818     if (a->esz == 0) {
1819         return false;
1820     }
1821
1822     unsigned fullsz = vec_full_reg_size(s);
1823     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1824     int inc = numelem * a->imm;
1825
1826     if (inc != 0) {
1827         if (sve_access_check(s)) {
1828             TCGv_i64 t = tcg_const_i64(inc);
1829             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1830             tcg_temp_free_i64(t);
1831         }
1832     } else {
1833         do_mov_z(s, a->rd, a->rn);
1834     }
1835     return true;
1836 }
1837
1838 /*
1839  *** SVE Bitwise Immediate Group
1840  */
1841
1842 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1843 {
1844     uint64_t imm;
1845     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1846                                 extract32(a->dbm, 0, 6),
1847                                 extract32(a->dbm, 6, 6))) {
1848         return false;
1849     }
1850     if (sve_access_check(s)) {
1851         unsigned vsz = vec_full_reg_size(s);
1852         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1853                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1854     }
1855     return true;
1856 }
1857
1858 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1859 {
1860     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1861 }
1862
1863 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1864 {
1865     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1866 }
1867
1868 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1869 {
1870     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1871 }
1872
1873 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1874 {
1875     uint64_t imm;
1876     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1877                                 extract32(a->dbm, 0, 6),
1878                                 extract32(a->dbm, 6, 6))) {
1879         return false;
1880     }
1881     if (sve_access_check(s)) {
1882         do_dupi_z(s, a->rd, imm);
1883     }
1884     return true;
1885 }
1886
1887 /*
1888  *** SVE Integer Wide Immediate - Predicated Group
1889  */
1890
1891 /* Implement all merging copies.  This is used for CPY (immediate),
1892  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1893  */
1894 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1895                      TCGv_i64 val)
1896 {
1897     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1898     static gen_cpy * const fns[4] = {
1899         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1900         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1901     };
1902     unsigned vsz = vec_full_reg_size(s);
1903     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1904     TCGv_ptr t_zd = tcg_temp_new_ptr();
1905     TCGv_ptr t_zn = tcg_temp_new_ptr();
1906     TCGv_ptr t_pg = tcg_temp_new_ptr();
1907
1908     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1909     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1910     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1911
1912     fns[esz](t_zd, t_zn, t_pg, val, desc);
1913
1914     tcg_temp_free_ptr(t_zd);
1915     tcg_temp_free_ptr(t_zn);
1916     tcg_temp_free_ptr(t_pg);
1917     tcg_temp_free_i32(desc);
1918 }
1919
1920 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1921 {
1922     if (a->esz == 0) {
1923         return false;
1924     }
1925     if (sve_access_check(s)) {
1926         /* Decode the VFP immediate.  */
1927         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1928         TCGv_i64 t_imm = tcg_const_i64(imm);
1929         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1930         tcg_temp_free_i64(t_imm);
1931     }
1932     return true;
1933 }
1934
1935 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1936 {
1937     if (a->esz == 0 && extract32(insn, 13, 1)) {
1938         return false;
1939     }
1940     if (sve_access_check(s)) {
1941         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1942         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1943         tcg_temp_free_i64(t_imm);
1944     }
1945     return true;
1946 }
1947
1948 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1949 {
1950     static gen_helper_gvec_2i * const fns[4] = {
1951         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1952         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1953     };
1954
1955     if (a->esz == 0 && extract32(insn, 13, 1)) {
1956         return false;
1957     }
1958     if (sve_access_check(s)) {
1959         unsigned vsz = vec_full_reg_size(s);
1960         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1961         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1962                             pred_full_reg_offset(s, a->pg),
1963                             t_imm, vsz, vsz, 0, fns[a->esz]);
1964         tcg_temp_free_i64(t_imm);
1965     }
1966     return true;
1967 }
1968
1969 /*
1970  *** SVE Permute Extract Group
1971  */
1972
1973 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1974 {
1975     if (!sve_access_check(s)) {
1976         return true;
1977     }
1978
1979     unsigned vsz = vec_full_reg_size(s);
1980     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1981     unsigned n_siz = vsz - n_ofs;
1982     unsigned d = vec_full_reg_offset(s, a->rd);
1983     unsigned n = vec_full_reg_offset(s, a->rn);
1984     unsigned m = vec_full_reg_offset(s, a->rm);
1985
1986     /* Use host vector move insns if we have appropriate sizes
1987      * and no unfortunate overlap.
1988      */
1989     if (m != d
1990         && n_ofs == size_for_gvec(n_ofs)
1991         && n_siz == size_for_gvec(n_siz)
1992         && (d != n || n_siz <= n_ofs)) {
1993         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1994         if (n_ofs != 0) {
1995             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1996         }
1997     } else {
1998         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1999     }
2000     return true;
2001 }
2002
2003 /*
2004  *** SVE Permute - Unpredicated Group
2005  */
2006
2007 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2008 {
2009     if (sve_access_check(s)) {
2010         unsigned vsz = vec_full_reg_size(s);
2011         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2012                              vsz, vsz, cpu_reg_sp(s, a->rn));
2013     }
2014     return true;
2015 }
2016
2017 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2018 {
2019     if ((a->imm & 0x1f) == 0) {
2020         return false;
2021     }
2022     if (sve_access_check(s)) {
2023         unsigned vsz = vec_full_reg_size(s);
2024         unsigned dofs = vec_full_reg_offset(s, a->rd);
2025         unsigned esz, index;
2026
2027         esz = ctz32(a->imm);
2028         index = a->imm >> (esz + 1);
2029
2030         if ((index << esz) < vsz) {
2031             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2032             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2033         } else {
2034             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2035         }
2036     }
2037     return true;
2038 }
2039
2040 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2041 {
2042     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2043     static gen_insr * const fns[4] = {
2044         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2045         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2046     };
2047     unsigned vsz = vec_full_reg_size(s);
2048     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2049     TCGv_ptr t_zd = tcg_temp_new_ptr();
2050     TCGv_ptr t_zn = tcg_temp_new_ptr();
2051
2052     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2053     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2054
2055     fns[a->esz](t_zd, t_zn, val, desc);
2056
2057     tcg_temp_free_ptr(t_zd);
2058     tcg_temp_free_ptr(t_zn);
2059     tcg_temp_free_i32(desc);
2060 }
2061
2062 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2063 {
2064     if (sve_access_check(s)) {
2065         TCGv_i64 t = tcg_temp_new_i64();
2066         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2067         do_insr_i64(s, a, t);
2068         tcg_temp_free_i64(t);
2069     }
2070     return true;
2071 }
2072
2073 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2074 {
2075     if (sve_access_check(s)) {
2076         do_insr_i64(s, a, cpu_reg(s, a->rm));
2077     }
2078     return true;
2079 }
2080
2081 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2082 {
2083     static gen_helper_gvec_2 * const fns[4] = {
2084         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2085         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2086     };
2087
2088     if (sve_access_check(s)) {
2089         unsigned vsz = vec_full_reg_size(s);
2090         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2091                            vec_full_reg_offset(s, a->rn),
2092                            vsz, vsz, 0, fns[a->esz]);
2093     }
2094     return true;
2095 }
2096
2097 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2098 {
2099     static gen_helper_gvec_3 * const fns[4] = {
2100         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2101         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2102     };
2103
2104     if (sve_access_check(s)) {
2105         unsigned vsz = vec_full_reg_size(s);
2106         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2107                            vec_full_reg_offset(s, a->rn),
2108                            vec_full_reg_offset(s, a->rm),
2109                            vsz, vsz, 0, fns[a->esz]);
2110     }
2111     return true;
2112 }
2113
2114 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2115 {
2116     static gen_helper_gvec_2 * const fns[4][2] = {
2117         { NULL, NULL },
2118         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2119         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2120         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2121     };
2122
2123     if (a->esz == 0) {
2124         return false;
2125     }
2126     if (sve_access_check(s)) {
2127         unsigned vsz = vec_full_reg_size(s);
2128         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2129                            vec_full_reg_offset(s, a->rn)
2130                            + (a->h ? vsz / 2 : 0),
2131                            vsz, vsz, 0, fns[a->esz][a->u]);
2132     }
2133     return true;
2134 }
2135
2136 /*
2137  *** SVE Permute - Predicates Group
2138  */
2139
2140 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2141                           gen_helper_gvec_3 *fn)
2142 {
2143     if (!sve_access_check(s)) {
2144         return true;
2145     }
2146
2147     unsigned vsz = pred_full_reg_size(s);
2148
2149     /* Predicate sizes may be smaller and cannot use simd_desc.
2150        We cannot round up, as we do elsewhere, because we need
2151        the exact size for ZIP2 and REV.  We retain the style for
2152        the other helpers for consistency.  */
2153     TCGv_ptr t_d = tcg_temp_new_ptr();
2154     TCGv_ptr t_n = tcg_temp_new_ptr();
2155     TCGv_ptr t_m = tcg_temp_new_ptr();
2156     TCGv_i32 t_desc;
2157     int desc;
2158
2159     desc = vsz - 2;
2160     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2161     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2162
2163     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2164     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2165     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2166     t_desc = tcg_const_i32(desc);
2167
2168     fn(t_d, t_n, t_m, t_desc);
2169
2170     tcg_temp_free_ptr(t_d);
2171     tcg_temp_free_ptr(t_n);
2172     tcg_temp_free_ptr(t_m);
2173     tcg_temp_free_i32(t_desc);
2174     return true;
2175 }
2176
2177 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2178                           gen_helper_gvec_2 *fn)
2179 {
2180     if (!sve_access_check(s)) {
2181         return true;
2182     }
2183
2184     unsigned vsz = pred_full_reg_size(s);
2185     TCGv_ptr t_d = tcg_temp_new_ptr();
2186     TCGv_ptr t_n = tcg_temp_new_ptr();
2187     TCGv_i32 t_desc;
2188     int desc;
2189
2190     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2191     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2192
2193     /* Predicate sizes may be smaller and cannot use simd_desc.
2194        We cannot round up, as we do elsewhere, because we need
2195        the exact size for ZIP2 and REV.  We retain the style for
2196        the other helpers for consistency.  */
2197
2198     desc = vsz - 2;
2199     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2200     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2201     t_desc = tcg_const_i32(desc);
2202
2203     fn(t_d, t_n, t_desc);
2204
2205     tcg_temp_free_i32(t_desc);
2206     tcg_temp_free_ptr(t_d);
2207     tcg_temp_free_ptr(t_n);
2208     return true;
2209 }
2210
2211 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2212 {
2213     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2214 }
2215
2216 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2217 {
2218     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2219 }
2220
2221 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2222 {
2223     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2224 }
2225
2226 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2227 {
2228     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2229 }
2230
2231 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2232 {
2233     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2234 }
2235
2236 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2237 {
2238     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2239 }
2240
2241 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2242 {
2243     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2244 }
2245
2246 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2247 {
2248     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2249 }
2250
2251 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2252 {
2253     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2254 }
2255
2256 /*
2257  *** SVE Permute - Interleaving Group
2258  */
2259
2260 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2261 {
2262     static gen_helper_gvec_3 * const fns[4] = {
2263         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2264         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2265     };
2266
2267     if (sve_access_check(s)) {
2268         unsigned vsz = vec_full_reg_size(s);
2269         unsigned high_ofs = high ? vsz / 2 : 0;
2270         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2271                            vec_full_reg_offset(s, a->rn) + high_ofs,
2272                            vec_full_reg_offset(s, a->rm) + high_ofs,
2273                            vsz, vsz, 0, fns[a->esz]);
2274     }
2275     return true;
2276 }
2277
2278 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2279                             gen_helper_gvec_3 *fn)
2280 {
2281     if (sve_access_check(s)) {
2282         unsigned vsz = vec_full_reg_size(s);
2283         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2284                            vec_full_reg_offset(s, a->rn),
2285                            vec_full_reg_offset(s, a->rm),
2286                            vsz, vsz, data, fn);
2287     }
2288     return true;
2289 }
2290
2291 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2292 {
2293     return do_zip(s, a, false);
2294 }
2295
2296 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2297 {
2298     return do_zip(s, a, true);
2299 }
2300
2301 static gen_helper_gvec_3 * const uzp_fns[4] = {
2302     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2303     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2304 };
2305
2306 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2307 {
2308     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2309 }
2310
2311 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2312 {
2313     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2314 }
2315
2316 static gen_helper_gvec_3 * const trn_fns[4] = {
2317     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2318     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2319 };
2320
2321 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2322 {
2323     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2324 }
2325
2326 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2327 {
2328     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2329 }
2330
2331 /*
2332  *** SVE Permute Vector - Predicated Group
2333  */
2334
2335 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2336 {
2337     static gen_helper_gvec_3 * const fns[4] = {
2338         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2339     };
2340     return do_zpz_ool(s, a, fns[a->esz]);
2341 }
2342
2343 /* Call the helper that computes the ARM LastActiveElement pseudocode
2344  * function, scaled by the element size.  This includes the not found
2345  * indication; e.g. not found for esz=3 is -8.
2346  */
2347 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2348 {
2349     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2350      * round up, as we do elsewhere, because we need the exact size.
2351      */
2352     TCGv_ptr t_p = tcg_temp_new_ptr();
2353     TCGv_i32 t_desc;
2354     unsigned vsz = pred_full_reg_size(s);
2355     unsigned desc;
2356
2357     desc = vsz - 2;
2358     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2359
2360     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2361     t_desc = tcg_const_i32(desc);
2362
2363     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2364
2365     tcg_temp_free_i32(t_desc);
2366     tcg_temp_free_ptr(t_p);
2367 }
2368
2369 /* Increment LAST to the offset of the next element in the vector,
2370  * wrapping around to 0.
2371  */
2372 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2373 {
2374     unsigned vsz = vec_full_reg_size(s);
2375
2376     tcg_gen_addi_i32(last, last, 1 << esz);
2377     if (is_power_of_2(vsz)) {
2378         tcg_gen_andi_i32(last, last, vsz - 1);
2379     } else {
2380         TCGv_i32 max = tcg_const_i32(vsz);
2381         TCGv_i32 zero = tcg_const_i32(0);
2382         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2383         tcg_temp_free_i32(max);
2384         tcg_temp_free_i32(zero);
2385     }
2386 }
2387
2388 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2389 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2390 {
2391     unsigned vsz = vec_full_reg_size(s);
2392
2393     if (is_power_of_2(vsz)) {
2394         tcg_gen_andi_i32(last, last, vsz - 1);
2395     } else {
2396         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2397         TCGv_i32 zero = tcg_const_i32(0);
2398         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2399         tcg_temp_free_i32(max);
2400         tcg_temp_free_i32(zero);
2401     }
2402 }
2403
2404 /* Load an unsigned element of ESZ from BASE+OFS.  */
2405 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2406 {
2407     TCGv_i64 r = tcg_temp_new_i64();
2408
2409     switch (esz) {
2410     case 0:
2411         tcg_gen_ld8u_i64(r, base, ofs);
2412         break;
2413     case 1:
2414         tcg_gen_ld16u_i64(r, base, ofs);
2415         break;
2416     case 2:
2417         tcg_gen_ld32u_i64(r, base, ofs);
2418         break;
2419     case 3:
2420         tcg_gen_ld_i64(r, base, ofs);
2421         break;
2422     default:
2423         g_assert_not_reached();
2424     }
2425     return r;
2426 }
2427
2428 /* Load an unsigned element of ESZ from RM[LAST].  */
2429 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2430                                  int rm, int esz)
2431 {
2432     TCGv_ptr p = tcg_temp_new_ptr();
2433     TCGv_i64 r;
2434
2435     /* Convert offset into vector into offset into ENV.
2436      * The final adjustment for the vector register base
2437      * is added via constant offset to the load.
2438      */
2439 #ifdef HOST_WORDS_BIGENDIAN
2440     /* Adjust for element ordering.  See vec_reg_offset.  */
2441     if (esz < 3) {
2442         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2443     }
2444 #endif
2445     tcg_gen_ext_i32_ptr(p, last);
2446     tcg_gen_add_ptr(p, p, cpu_env);
2447
2448     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2449     tcg_temp_free_ptr(p);
2450
2451     return r;
2452 }
2453
2454 /* Compute CLAST for a Zreg.  */
2455 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2456 {
2457     TCGv_i32 last;
2458     TCGLabel *over;
2459     TCGv_i64 ele;
2460     unsigned vsz, esz = a->esz;
2461
2462     if (!sve_access_check(s)) {
2463         return true;
2464     }
2465
2466     last = tcg_temp_local_new_i32();
2467     over = gen_new_label();
2468
2469     find_last_active(s, last, esz, a->pg);
2470
2471     /* There is of course no movcond for a 2048-bit vector,
2472      * so we must branch over the actual store.
2473      */
2474     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2475
2476     if (!before) {
2477         incr_last_active(s, last, esz);
2478     }
2479
2480     ele = load_last_active(s, last, a->rm, esz);
2481     tcg_temp_free_i32(last);
2482
2483     vsz = vec_full_reg_size(s);
2484     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2485     tcg_temp_free_i64(ele);
2486
2487     /* If this insn used MOVPRFX, we may need a second move.  */
2488     if (a->rd != a->rn) {
2489         TCGLabel *done = gen_new_label();
2490         tcg_gen_br(done);
2491
2492         gen_set_label(over);
2493         do_mov_z(s, a->rd, a->rn);
2494
2495         gen_set_label(done);
2496     } else {
2497         gen_set_label(over);
2498     }
2499     return true;
2500 }
2501
2502 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2503 {
2504     return do_clast_vector(s, a, false);
2505 }
2506
2507 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2508 {
2509     return do_clast_vector(s, a, true);
2510 }
2511
2512 /* Compute CLAST for a scalar.  */
2513 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2514                             bool before, TCGv_i64 reg_val)
2515 {
2516     TCGv_i32 last = tcg_temp_new_i32();
2517     TCGv_i64 ele, cmp, zero;
2518
2519     find_last_active(s, last, esz, pg);
2520
2521     /* Extend the original value of last prior to incrementing.  */
2522     cmp = tcg_temp_new_i64();
2523     tcg_gen_ext_i32_i64(cmp, last);
2524
2525     if (!before) {
2526         incr_last_active(s, last, esz);
2527     }
2528
2529     /* The conceit here is that while last < 0 indicates not found, after
2530      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2531      * from which we can load garbage.  We then discard the garbage with
2532      * a conditional move.
2533      */
2534     ele = load_last_active(s, last, rm, esz);
2535     tcg_temp_free_i32(last);
2536
2537     zero = tcg_const_i64(0);
2538     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2539
2540     tcg_temp_free_i64(zero);
2541     tcg_temp_free_i64(cmp);
2542     tcg_temp_free_i64(ele);
2543 }
2544
2545 /* Compute CLAST for a Vreg.  */
2546 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2547 {
2548     if (sve_access_check(s)) {
2549         int esz = a->esz;
2550         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2551         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2552
2553         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2554         write_fp_dreg(s, a->rd, reg);
2555         tcg_temp_free_i64(reg);
2556     }
2557     return true;
2558 }
2559
2560 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2561 {
2562     return do_clast_fp(s, a, false);
2563 }
2564
2565 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2566 {
2567     return do_clast_fp(s, a, true);
2568 }
2569
2570 /* Compute CLAST for a Xreg.  */
2571 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2572 {
2573     TCGv_i64 reg;
2574
2575     if (!sve_access_check(s)) {
2576         return true;
2577     }
2578
2579     reg = cpu_reg(s, a->rd);
2580     switch (a->esz) {
2581     case 0:
2582         tcg_gen_ext8u_i64(reg, reg);
2583         break;
2584     case 1:
2585         tcg_gen_ext16u_i64(reg, reg);
2586         break;
2587     case 2:
2588         tcg_gen_ext32u_i64(reg, reg);
2589         break;
2590     case 3:
2591         break;
2592     default:
2593         g_assert_not_reached();
2594     }
2595
2596     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2597     return true;
2598 }
2599
2600 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2601 {
2602     return do_clast_general(s, a, false);
2603 }
2604
2605 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2606 {
2607     return do_clast_general(s, a, true);
2608 }
2609
2610 /* Compute LAST for a scalar.  */
2611 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2612                                int pg, int rm, bool before)
2613 {
2614     TCGv_i32 last = tcg_temp_new_i32();
2615     TCGv_i64 ret;
2616
2617     find_last_active(s, last, esz, pg);
2618     if (before) {
2619         wrap_last_active(s, last, esz);
2620     } else {
2621         incr_last_active(s, last, esz);
2622     }
2623
2624     ret = load_last_active(s, last, rm, esz);
2625     tcg_temp_free_i32(last);
2626     return ret;
2627 }
2628
2629 /* Compute LAST for a Vreg.  */
2630 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2631 {
2632     if (sve_access_check(s)) {
2633         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2634         write_fp_dreg(s, a->rd, val);
2635         tcg_temp_free_i64(val);
2636     }
2637     return true;
2638 }
2639
2640 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2641 {
2642     return do_last_fp(s, a, false);
2643 }
2644
2645 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2646 {
2647     return do_last_fp(s, a, true);
2648 }
2649
2650 /* Compute LAST for a Xreg.  */
2651 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2652 {
2653     if (sve_access_check(s)) {
2654         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2655         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2656         tcg_temp_free_i64(val);
2657     }
2658     return true;
2659 }
2660
2661 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2662 {
2663     return do_last_general(s, a, false);
2664 }
2665
2666 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2667 {
2668     return do_last_general(s, a, true);
2669 }
2670
2671 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2672 {
2673     if (sve_access_check(s)) {
2674         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2675     }
2676     return true;
2677 }
2678
2679 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2680 {
2681     if (sve_access_check(s)) {
2682         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2683         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2684         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2685         tcg_temp_free_i64(t);
2686     }
2687     return true;
2688 }
2689
2690 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2691 {
2692     static gen_helper_gvec_3 * const fns[4] = {
2693         NULL,
2694         gen_helper_sve_revb_h,
2695         gen_helper_sve_revb_s,
2696         gen_helper_sve_revb_d,
2697     };
2698     return do_zpz_ool(s, a, fns[a->esz]);
2699 }
2700
2701 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2702 {
2703     static gen_helper_gvec_3 * const fns[4] = {
2704         NULL,
2705         NULL,
2706         gen_helper_sve_revh_s,
2707         gen_helper_sve_revh_d,
2708     };
2709     return do_zpz_ool(s, a, fns[a->esz]);
2710 }
2711
2712 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2713 {
2714     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2715 }
2716
2717 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2718 {
2719     static gen_helper_gvec_3 * const fns[4] = {
2720         gen_helper_sve_rbit_b,
2721         gen_helper_sve_rbit_h,
2722         gen_helper_sve_rbit_s,
2723         gen_helper_sve_rbit_d,
2724     };
2725     return do_zpz_ool(s, a, fns[a->esz]);
2726 }
2727
2728 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2729 {
2730     if (sve_access_check(s)) {
2731         unsigned vsz = vec_full_reg_size(s);
2732         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2733                            vec_full_reg_offset(s, a->rn),
2734                            vec_full_reg_offset(s, a->rm),
2735                            pred_full_reg_offset(s, a->pg),
2736                            vsz, vsz, a->esz, gen_helper_sve_splice);
2737     }
2738     return true;
2739 }
2740
2741 /*
2742  *** SVE Integer Compare - Vectors Group
2743  */
2744
2745 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2746                           gen_helper_gvec_flags_4 *gen_fn)
2747 {
2748     TCGv_ptr pd, zn, zm, pg;
2749     unsigned vsz;
2750     TCGv_i32 t;
2751
2752     if (gen_fn == NULL) {
2753         return false;
2754     }
2755     if (!sve_access_check(s)) {
2756         return true;
2757     }
2758
2759     vsz = vec_full_reg_size(s);
2760     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2761     pd = tcg_temp_new_ptr();
2762     zn = tcg_temp_new_ptr();
2763     zm = tcg_temp_new_ptr();
2764     pg = tcg_temp_new_ptr();
2765
2766     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2767     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2768     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2769     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2770
2771     gen_fn(t, pd, zn, zm, pg, t);
2772
2773     tcg_temp_free_ptr(pd);
2774     tcg_temp_free_ptr(zn);
2775     tcg_temp_free_ptr(zm);
2776     tcg_temp_free_ptr(pg);
2777
2778     do_pred_flags(t);
2779
2780     tcg_temp_free_i32(t);
2781     return true;
2782 }
2783
2784 #define DO_PPZZ(NAME, name) \
2785 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,         \
2786                                 uint32_t insn)                            \
2787 {                                                                         \
2788     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2789         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2790         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2791     };                                                                    \
2792     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2793 }
2794
2795 DO_PPZZ(CMPEQ, cmpeq)
2796 DO_PPZZ(CMPNE, cmpne)
2797 DO_PPZZ(CMPGT, cmpgt)
2798 DO_PPZZ(CMPGE, cmpge)
2799 DO_PPZZ(CMPHI, cmphi)
2800 DO_PPZZ(CMPHS, cmphs)
2801
2802 #undef DO_PPZZ
2803
2804 #define DO_PPZW(NAME, name) \
2805 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a,         \
2806                                 uint32_t insn)                            \
2807 {                                                                         \
2808     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2809         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2810         gen_helper_sve_##name##_ppzw_s, NULL                              \
2811     };                                                                    \
2812     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2813 }
2814
2815 DO_PPZW(CMPEQ, cmpeq)
2816 DO_PPZW(CMPNE, cmpne)
2817 DO_PPZW(CMPGT, cmpgt)
2818 DO_PPZW(CMPGE, cmpge)
2819 DO_PPZW(CMPHI, cmphi)
2820 DO_PPZW(CMPHS, cmphs)
2821 DO_PPZW(CMPLT, cmplt)
2822 DO_PPZW(CMPLE, cmple)
2823 DO_PPZW(CMPLO, cmplo)
2824 DO_PPZW(CMPLS, cmpls)
2825
2826 #undef DO_PPZW
2827
2828 /*
2829  *** SVE Integer Compare - Immediate Groups
2830  */
2831
2832 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2833                           gen_helper_gvec_flags_3 *gen_fn)
2834 {
2835     TCGv_ptr pd, zn, pg;
2836     unsigned vsz;
2837     TCGv_i32 t;
2838
2839     if (gen_fn == NULL) {
2840         return false;
2841     }
2842     if (!sve_access_check(s)) {
2843         return true;
2844     }
2845
2846     vsz = vec_full_reg_size(s);
2847     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2848     pd = tcg_temp_new_ptr();
2849     zn = tcg_temp_new_ptr();
2850     pg = tcg_temp_new_ptr();
2851
2852     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2853     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2854     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2855
2856     gen_fn(t, pd, zn, pg, t);
2857
2858     tcg_temp_free_ptr(pd);
2859     tcg_temp_free_ptr(zn);
2860     tcg_temp_free_ptr(pg);
2861
2862     do_pred_flags(t);
2863
2864     tcg_temp_free_i32(t);
2865     return true;
2866 }
2867
2868 #define DO_PPZI(NAME, name) \
2869 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a,         \
2870                                 uint32_t insn)                            \
2871 {                                                                         \
2872     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2873         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2874         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2875     };                                                                    \
2876     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2877 }
2878
2879 DO_PPZI(CMPEQ, cmpeq)
2880 DO_PPZI(CMPNE, cmpne)
2881 DO_PPZI(CMPGT, cmpgt)
2882 DO_PPZI(CMPGE, cmpge)
2883 DO_PPZI(CMPHI, cmphi)
2884 DO_PPZI(CMPHS, cmphs)
2885 DO_PPZI(CMPLT, cmplt)
2886 DO_PPZI(CMPLE, cmple)
2887 DO_PPZI(CMPLO, cmplo)
2888 DO_PPZI(CMPLS, cmpls)
2889
2890 #undef DO_PPZI
2891
2892 /*
2893  *** SVE Partition Break Group
2894  */
2895
2896 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2897                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2898 {
2899     if (!sve_access_check(s)) {
2900         return true;
2901     }
2902
2903     unsigned vsz = pred_full_reg_size(s);
2904
2905     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2906     TCGv_ptr d = tcg_temp_new_ptr();
2907     TCGv_ptr n = tcg_temp_new_ptr();
2908     TCGv_ptr m = tcg_temp_new_ptr();
2909     TCGv_ptr g = tcg_temp_new_ptr();
2910     TCGv_i32 t = tcg_const_i32(vsz - 2);
2911
2912     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2913     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2914     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2915     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2916
2917     if (a->s) {
2918         fn_s(t, d, n, m, g, t);
2919         do_pred_flags(t);
2920     } else {
2921         fn(d, n, m, g, t);
2922     }
2923     tcg_temp_free_ptr(d);
2924     tcg_temp_free_ptr(n);
2925     tcg_temp_free_ptr(m);
2926     tcg_temp_free_ptr(g);
2927     tcg_temp_free_i32(t);
2928     return true;
2929 }
2930
2931 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2932                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2933 {
2934     if (!sve_access_check(s)) {
2935         return true;
2936     }
2937
2938     unsigned vsz = pred_full_reg_size(s);
2939
2940     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2941     TCGv_ptr d = tcg_temp_new_ptr();
2942     TCGv_ptr n = tcg_temp_new_ptr();
2943     TCGv_ptr g = tcg_temp_new_ptr();
2944     TCGv_i32 t = tcg_const_i32(vsz - 2);
2945
2946     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2947     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2948     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2949
2950     if (a->s) {
2951         fn_s(t, d, n, g, t);
2952         do_pred_flags(t);
2953     } else {
2954         fn(d, n, g, t);
2955     }
2956     tcg_temp_free_ptr(d);
2957     tcg_temp_free_ptr(n);
2958     tcg_temp_free_ptr(g);
2959     tcg_temp_free_i32(t);
2960     return true;
2961 }
2962
2963 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2964 {
2965     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2966 }
2967
2968 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2969 {
2970     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2971 }
2972
2973 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2974 {
2975     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2976 }
2977
2978 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2979 {
2980     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2981 }
2982
2983 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2984 {
2985     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2986 }
2987
2988 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2989 {
2990     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2991 }
2992
2993 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2994 {
2995     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2996 }
2997
2998 /*
2999  *** SVE Predicate Count Group
3000  */
3001
3002 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3003 {
3004     unsigned psz = pred_full_reg_size(s);
3005
3006     if (psz <= 8) {
3007         uint64_t psz_mask;
3008
3009         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3010         if (pn != pg) {
3011             TCGv_i64 g = tcg_temp_new_i64();
3012             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3013             tcg_gen_and_i64(val, val, g);
3014             tcg_temp_free_i64(g);
3015         }
3016
3017         /* Reduce the pred_esz_masks value simply to reduce the
3018          * size of the code generated here.
3019          */
3020         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3021         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3022
3023         tcg_gen_ctpop_i64(val, val);
3024     } else {
3025         TCGv_ptr t_pn = tcg_temp_new_ptr();
3026         TCGv_ptr t_pg = tcg_temp_new_ptr();
3027         unsigned desc;
3028         TCGv_i32 t_desc;
3029
3030         desc = psz - 2;
3031         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3032
3033         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3034         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3035         t_desc = tcg_const_i32(desc);
3036
3037         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3038         tcg_temp_free_ptr(t_pn);
3039         tcg_temp_free_ptr(t_pg);
3040         tcg_temp_free_i32(t_desc);
3041     }
3042 }
3043
3044 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3045 {
3046     if (sve_access_check(s)) {
3047         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3048     }
3049     return true;
3050 }
3051
3052 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3053                             uint32_t insn)
3054 {
3055     if (sve_access_check(s)) {
3056         TCGv_i64 reg = cpu_reg(s, a->rd);
3057         TCGv_i64 val = tcg_temp_new_i64();
3058
3059         do_cntp(s, val, a->esz, a->pg, a->pg);
3060         if (a->d) {
3061             tcg_gen_sub_i64(reg, reg, val);
3062         } else {
3063             tcg_gen_add_i64(reg, reg, val);
3064         }
3065         tcg_temp_free_i64(val);
3066     }
3067     return true;
3068 }
3069
3070 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3071                             uint32_t insn)
3072 {
3073     if (a->esz == 0) {
3074         return false;
3075     }
3076     if (sve_access_check(s)) {
3077         unsigned vsz = vec_full_reg_size(s);
3078         TCGv_i64 val = tcg_temp_new_i64();
3079         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3080
3081         do_cntp(s, val, a->esz, a->pg, a->pg);
3082         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3083                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3084     }
3085     return true;
3086 }
3087
3088 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3089                                 uint32_t insn)
3090 {
3091     if (sve_access_check(s)) {
3092         TCGv_i64 reg = cpu_reg(s, a->rd);
3093         TCGv_i64 val = tcg_temp_new_i64();
3094
3095         do_cntp(s, val, a->esz, a->pg, a->pg);
3096         do_sat_addsub_32(reg, val, a->u, a->d);
3097     }
3098     return true;
3099 }
3100
3101 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3102                                 uint32_t insn)
3103 {
3104     if (sve_access_check(s)) {
3105         TCGv_i64 reg = cpu_reg(s, a->rd);
3106         TCGv_i64 val = tcg_temp_new_i64();
3107
3108         do_cntp(s, val, a->esz, a->pg, a->pg);
3109         do_sat_addsub_64(reg, val, a->u, a->d);
3110     }
3111     return true;
3112 }
3113
3114 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3115                              uint32_t insn)
3116 {
3117     if (a->esz == 0) {
3118         return false;
3119     }
3120     if (sve_access_check(s)) {
3121         TCGv_i64 val = tcg_temp_new_i64();
3122         do_cntp(s, val, a->esz, a->pg, a->pg);
3123         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3124     }
3125     return true;
3126 }
3127
3128 /*
3129  *** SVE Integer Compare Scalars Group
3130  */
3131
3132 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3133 {
3134     if (!sve_access_check(s)) {
3135         return true;
3136     }
3137
3138     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3139     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3140     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3141     TCGv_i64 cmp = tcg_temp_new_i64();
3142
3143     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3144     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3145     tcg_temp_free_i64(cmp);
3146
3147     /* VF = !NF & !CF.  */
3148     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3149     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3150
3151     /* Both NF and VF actually look at bit 31.  */
3152     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3153     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3154     return true;
3155 }
3156
3157 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3158 {
3159     if (!sve_access_check(s)) {
3160         return true;
3161     }
3162
3163     TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3164     TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3165     TCGv_i64 t0 = tcg_temp_new_i64();
3166     TCGv_i64 t1 = tcg_temp_new_i64();
3167     TCGv_i32 t2, t3;
3168     TCGv_ptr ptr;
3169     unsigned desc, vsz = vec_full_reg_size(s);
3170     TCGCond cond;
3171
3172     if (!a->sf) {
3173         if (a->u) {
3174             tcg_gen_ext32u_i64(op0, op0);
3175             tcg_gen_ext32u_i64(op1, op1);
3176         } else {
3177             tcg_gen_ext32s_i64(op0, op0);
3178             tcg_gen_ext32s_i64(op1, op1);
3179         }
3180     }
3181
3182     /* For the helper, compress the different conditions into a computation
3183      * of how many iterations for which the condition is true.
3184      *
3185      * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3186      * 2**64 iterations, overflowing to 0.  Of course, predicate registers
3187      * aren't that large, so any value >= predicate size is sufficient.
3188      */
3189     tcg_gen_sub_i64(t0, op1, op0);
3190
3191     /* t0 = MIN(op1 - op0, vsz).  */
3192     tcg_gen_movi_i64(t1, vsz);
3193     tcg_gen_umin_i64(t0, t0, t1);
3194     if (a->eq) {
3195         /* Equality means one more iteration.  */
3196         tcg_gen_addi_i64(t0, t0, 1);
3197     }
3198
3199     /* t0 = (condition true ? t0 : 0).  */
3200     cond = (a->u
3201             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3202             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3203     tcg_gen_movi_i64(t1, 0);
3204     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3205
3206     t2 = tcg_temp_new_i32();
3207     tcg_gen_extrl_i64_i32(t2, t0);
3208     tcg_temp_free_i64(t0);
3209     tcg_temp_free_i64(t1);
3210
3211     desc = (vsz / 8) - 2;
3212     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3213     t3 = tcg_const_i32(desc);
3214
3215     ptr = tcg_temp_new_ptr();
3216     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3217
3218     gen_helper_sve_while(t2, ptr, t2, t3);
3219     do_pred_flags(t2);
3220
3221     tcg_temp_free_ptr(ptr);
3222     tcg_temp_free_i32(t2);
3223     tcg_temp_free_i32(t3);
3224     return true;
3225 }
3226
3227 /*
3228  *** SVE Integer Wide Immediate - Unpredicated Group
3229  */
3230
3231 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3232 {
3233     if (a->esz == 0) {
3234         return false;
3235     }
3236     if (sve_access_check(s)) {
3237         unsigned vsz = vec_full_reg_size(s);
3238         int dofs = vec_full_reg_offset(s, a->rd);
3239         uint64_t imm;
3240
3241         /* Decode the VFP immediate.  */
3242         imm = vfp_expand_imm(a->esz, a->imm);
3243         imm = dup_const(a->esz, imm);
3244
3245         tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3246     }
3247     return true;
3248 }
3249
3250 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3251 {
3252     if (a->esz == 0 && extract32(insn, 13, 1)) {
3253         return false;
3254     }
3255     if (sve_access_check(s)) {
3256         unsigned vsz = vec_full_reg_size(s);
3257         int dofs = vec_full_reg_offset(s, a->rd);
3258
3259         tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3260     }
3261     return true;
3262 }
3263
3264 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3265 {
3266     if (a->esz == 0 && extract32(insn, 13, 1)) {
3267         return false;
3268     }
3269     if (sve_access_check(s)) {
3270         unsigned vsz = vec_full_reg_size(s);
3271         tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3272                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3273     }
3274     return true;
3275 }
3276
3277 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3278 {
3279     a->imm = -a->imm;
3280     return trans_ADD_zzi(s, a, insn);
3281 }
3282
3283 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3284 {
3285     static const GVecGen2s op[4] = {
3286         { .fni8 = tcg_gen_vec_sub8_i64,
3287           .fniv = tcg_gen_sub_vec,
3288           .fno = gen_helper_sve_subri_b,
3289           .opc = INDEX_op_sub_vec,
3290           .vece = MO_8,
3291           .scalar_first = true },
3292         { .fni8 = tcg_gen_vec_sub16_i64,
3293           .fniv = tcg_gen_sub_vec,
3294           .fno = gen_helper_sve_subri_h,
3295           .opc = INDEX_op_sub_vec,
3296           .vece = MO_16,
3297           .scalar_first = true },
3298         { .fni4 = tcg_gen_sub_i32,
3299           .fniv = tcg_gen_sub_vec,
3300           .fno = gen_helper_sve_subri_s,
3301           .opc = INDEX_op_sub_vec,
3302           .vece = MO_32,
3303           .scalar_first = true },
3304         { .fni8 = tcg_gen_sub_i64,
3305           .fniv = tcg_gen_sub_vec,
3306           .fno = gen_helper_sve_subri_d,
3307           .opc = INDEX_op_sub_vec,
3308           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3309           .vece = MO_64,
3310           .scalar_first = true }
3311     };
3312
3313     if (a->esz == 0 && extract32(insn, 13, 1)) {
3314         return false;
3315     }
3316     if (sve_access_check(s)) {
3317         unsigned vsz = vec_full_reg_size(s);
3318         TCGv_i64 c = tcg_const_i64(a->imm);
3319         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3320                         vec_full_reg_offset(s, a->rn),
3321                         vsz, vsz, c, &op[a->esz]);
3322         tcg_temp_free_i64(c);
3323     }
3324     return true;
3325 }
3326
3327 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3328 {
3329     if (sve_access_check(s)) {
3330         unsigned vsz = vec_full_reg_size(s);
3331         tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3332                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3333     }
3334     return true;
3335 }
3336
3337 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3338                        bool u, bool d)
3339 {
3340     if (a->esz == 0 && extract32(insn, 13, 1)) {
3341         return false;
3342     }
3343     if (sve_access_check(s)) {
3344         TCGv_i64 val = tcg_const_i64(a->imm);
3345         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3346         tcg_temp_free_i64(val);
3347     }
3348     return true;
3349 }
3350
3351 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3352 {
3353     return do_zzi_sat(s, a, insn, false, false);
3354 }
3355
3356 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3357 {
3358     return do_zzi_sat(s, a, insn, true, false);
3359 }
3360
3361 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3362 {
3363     return do_zzi_sat(s, a, insn, false, true);
3364 }
3365
3366 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3367 {
3368     return do_zzi_sat(s, a, insn, true, true);
3369 }
3370
3371 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3372 {
3373     if (sve_access_check(s)) {
3374         unsigned vsz = vec_full_reg_size(s);
3375         TCGv_i64 c = tcg_const_i64(a->imm);
3376
3377         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3378                             vec_full_reg_offset(s, a->rn),
3379                             c, vsz, vsz, 0, fn);
3380         tcg_temp_free_i64(c);
3381     }
3382     return true;
3383 }
3384
3385 #define DO_ZZI(NAME, name) \
3386 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a,         \
3387                                uint32_t insn)                           \
3388 {                                                                       \
3389     static gen_helper_gvec_2i * const fns[4] = {                        \
3390         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3391         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3392     };                                                                  \
3393     return do_zzi_ool(s, a, fns[a->esz]);                               \
3394 }
3395
3396 DO_ZZI(SMAX, smax)
3397 DO_ZZI(UMAX, umax)
3398 DO_ZZI(SMIN, smin)
3399 DO_ZZI(UMIN, umin)
3400
3401 #undef DO_ZZI
3402
3403 /*
3404  *** SVE Floating Point Multiply-Add Indexed Group
3405  */
3406
3407 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
3408 {
3409     static gen_helper_gvec_4_ptr * const fns[3] = {
3410         gen_helper_gvec_fmla_idx_h,
3411         gen_helper_gvec_fmla_idx_s,
3412         gen_helper_gvec_fmla_idx_d,
3413     };
3414
3415     if (sve_access_check(s)) {
3416         unsigned vsz = vec_full_reg_size(s);
3417         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3418         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3419                            vec_full_reg_offset(s, a->rn),
3420                            vec_full_reg_offset(s, a->rm),
3421                            vec_full_reg_offset(s, a->ra),
3422                            status, vsz, vsz, (a->index << 1) | a->sub,
3423                            fns[a->esz - 1]);
3424         tcg_temp_free_ptr(status);
3425     }
3426     return true;
3427 }
3428
3429 /*
3430  *** SVE Floating Point Multiply Indexed Group
3431  */
3432
3433 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
3434 {
3435     static gen_helper_gvec_3_ptr * const fns[3] = {
3436         gen_helper_gvec_fmul_idx_h,
3437         gen_helper_gvec_fmul_idx_s,
3438         gen_helper_gvec_fmul_idx_d,
3439     };
3440
3441     if (sve_access_check(s)) {
3442         unsigned vsz = vec_full_reg_size(s);
3443         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3444         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3445                            vec_full_reg_offset(s, a->rn),
3446                            vec_full_reg_offset(s, a->rm),
3447                            status, vsz, vsz, a->index, fns[a->esz - 1]);
3448         tcg_temp_free_ptr(status);
3449     }
3450     return true;
3451 }
3452
3453 /*
3454  *** SVE Floating Point Accumulating Reduction Group
3455  */
3456
3457 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3458 {
3459     typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3460                           TCGv_ptr, TCGv_ptr, TCGv_i32);
3461     static fadda_fn * const fns[3] = {
3462         gen_helper_sve_fadda_h,
3463         gen_helper_sve_fadda_s,
3464         gen_helper_sve_fadda_d,
3465     };
3466     unsigned vsz = vec_full_reg_size(s);
3467     TCGv_ptr t_rm, t_pg, t_fpst;
3468     TCGv_i64 t_val;
3469     TCGv_i32 t_desc;
3470
3471     if (a->esz == 0) {
3472         return false;
3473     }
3474     if (!sve_access_check(s)) {
3475         return true;
3476     }
3477
3478     t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3479     t_rm = tcg_temp_new_ptr();
3480     t_pg = tcg_temp_new_ptr();
3481     tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3482     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3483     t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3484     t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3485
3486     fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3487
3488     tcg_temp_free_i32(t_desc);
3489     tcg_temp_free_ptr(t_fpst);
3490     tcg_temp_free_ptr(t_pg);
3491     tcg_temp_free_ptr(t_rm);
3492
3493     write_fp_dreg(s, a->rd, t_val);
3494     tcg_temp_free_i64(t_val);
3495     return true;
3496 }
3497
3498 /*
3499  *** SVE Floating Point Arithmetic - Unpredicated Group
3500  */
3501
3502 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3503                       gen_helper_gvec_3_ptr *fn)
3504 {
3505     if (fn == NULL) {
3506         return false;
3507     }
3508     if (sve_access_check(s)) {
3509         unsigned vsz = vec_full_reg_size(s);
3510         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3511         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3512                            vec_full_reg_offset(s, a->rn),
3513                            vec_full_reg_offset(s, a->rm),
3514                            status, vsz, vsz, 0, fn);
3515         tcg_temp_free_ptr(status);
3516     }
3517     return true;
3518 }
3519
3520
3521 #define DO_FP3(NAME, name) \
3522 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3523 {                                                                   \
3524     static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3525         NULL, gen_helper_gvec_##name##_h,                           \
3526         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3527     };                                                              \
3528     return do_zzz_fp(s, a, fns[a->esz]);                            \
3529 }
3530
3531 DO_FP3(FADD_zzz, fadd)
3532 DO_FP3(FSUB_zzz, fsub)
3533 DO_FP3(FMUL_zzz, fmul)
3534 DO_FP3(FTSMUL, ftsmul)
3535 DO_FP3(FRECPS, recps)
3536 DO_FP3(FRSQRTS, rsqrts)
3537
3538 #undef DO_FP3
3539
3540 /*
3541  *** SVE Floating Point Arithmetic - Predicated Group
3542  */
3543
3544 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3545                        gen_helper_gvec_4_ptr *fn)
3546 {
3547     if (fn == NULL) {
3548         return false;
3549     }
3550     if (sve_access_check(s)) {
3551         unsigned vsz = vec_full_reg_size(s);
3552         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3553         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3554                            vec_full_reg_offset(s, a->rn),
3555                            vec_full_reg_offset(s, a->rm),
3556                            pred_full_reg_offset(s, a->pg),
3557                            status, vsz, vsz, 0, fn);
3558         tcg_temp_free_ptr(status);
3559     }
3560     return true;
3561 }
3562
3563 #define DO_FP3(NAME, name) \
3564 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3565 {                                                                   \
3566     static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3567         NULL, gen_helper_sve_##name##_h,                            \
3568         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3569     };                                                              \
3570     return do_zpzz_fp(s, a, fns[a->esz]);                           \
3571 }
3572
3573 DO_FP3(FADD_zpzz, fadd)
3574 DO_FP3(FSUB_zpzz, fsub)
3575 DO_FP3(FMUL_zpzz, fmul)
3576 DO_FP3(FMIN_zpzz, fmin)
3577 DO_FP3(FMAX_zpzz, fmax)
3578 DO_FP3(FMINNM_zpzz, fminnum)
3579 DO_FP3(FMAXNM_zpzz, fmaxnum)
3580 DO_FP3(FABD, fabd)
3581 DO_FP3(FSCALE, fscalbn)
3582 DO_FP3(FDIV, fdiv)
3583 DO_FP3(FMULX, fmulx)
3584
3585 #undef DO_FP3
3586
3587 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3588                                       TCGv_i64, TCGv_ptr, TCGv_i32);
3589
3590 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3591                          TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3592 {
3593     unsigned vsz = vec_full_reg_size(s);
3594     TCGv_ptr t_zd, t_zn, t_pg, status;
3595     TCGv_i32 desc;
3596
3597     t_zd = tcg_temp_new_ptr();
3598     t_zn = tcg_temp_new_ptr();
3599     t_pg = tcg_temp_new_ptr();
3600     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3601     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3602     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3603
3604     status = get_fpstatus_ptr(is_fp16);
3605     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3606     fn(t_zd, t_zn, t_pg, scalar, status, desc);
3607
3608     tcg_temp_free_i32(desc);
3609     tcg_temp_free_ptr(status);
3610     tcg_temp_free_ptr(t_pg);
3611     tcg_temp_free_ptr(t_zn);
3612     tcg_temp_free_ptr(t_zd);
3613 }
3614
3615 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3616                       gen_helper_sve_fp2scalar *fn)
3617 {
3618     TCGv_i64 temp = tcg_const_i64(imm);
3619     do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3620     tcg_temp_free_i64(temp);
3621 }
3622
3623 #define DO_FP_IMM(NAME, name, const0, const1) \
3624 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a,         \
3625                                 uint32_t insn)                            \
3626 {                                                                         \
3627     static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3628         gen_helper_sve_##name##_h,                                        \
3629         gen_helper_sve_##name##_s,                                        \
3630         gen_helper_sve_##name##_d                                         \
3631     };                                                                    \
3632     static uint64_t const val[3][2] = {                                   \
3633         { float16_##const0, float16_##const1 },                           \
3634         { float32_##const0, float32_##const1 },                           \
3635         { float64_##const0, float64_##const1 },                           \
3636     };                                                                    \
3637     if (a->esz == 0) {                                                    \
3638         return false;                                                     \
3639     }                                                                     \
3640     if (sve_access_check(s)) {                                            \
3641         do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3642     }                                                                     \
3643     return true;                                                          \
3644 }
3645
3646 #define float16_two  make_float16(0x4000)
3647 #define float32_two  make_float32(0x40000000)
3648 #define float64_two  make_float64(0x4000000000000000ULL)
3649
3650 DO_FP_IMM(FADD, fadds, half, one)
3651 DO_FP_IMM(FSUB, fsubs, half, one)
3652 DO_FP_IMM(FMUL, fmuls, half, two)
3653 DO_FP_IMM(FSUBR, fsubrs, half, one)
3654 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3655 DO_FP_IMM(FMINNM, fminnms, zero, one)
3656 DO_FP_IMM(FMAX, fmaxs, zero, one)
3657 DO_FP_IMM(FMIN, fmins, zero, one)
3658
3659 #undef DO_FP_IMM
3660
3661 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3662                       gen_helper_gvec_4_ptr *fn)
3663 {
3664     if (fn == NULL) {
3665         return false;
3666     }
3667     if (sve_access_check(s)) {
3668         unsigned vsz = vec_full_reg_size(s);
3669         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3670         tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3671                            vec_full_reg_offset(s, a->rn),
3672                            vec_full_reg_offset(s, a->rm),
3673                            pred_full_reg_offset(s, a->pg),
3674                            status, vsz, vsz, 0, fn);
3675         tcg_temp_free_ptr(status);
3676     }
3677     return true;
3678 }
3679
3680 #define DO_FPCMP(NAME, name) \
3681 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,     \
3682                                 uint32_t insn)                        \
3683 {                                                                     \
3684     static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3685         NULL, gen_helper_sve_##name##_h,                              \
3686         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3687     };                                                                \
3688     return do_fp_cmp(s, a, fns[a->esz]);                              \
3689 }
3690
3691 DO_FPCMP(FCMGE, fcmge)
3692 DO_FPCMP(FCMGT, fcmgt)
3693 DO_FPCMP(FCMEQ, fcmeq)
3694 DO_FPCMP(FCMNE, fcmne)
3695 DO_FPCMP(FCMUO, fcmuo)
3696 DO_FPCMP(FACGE, facge)
3697 DO_FPCMP(FACGT, facgt)
3698
3699 #undef DO_FPCMP
3700
3701 typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3702
3703 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3704 {
3705     if (fn == NULL) {
3706         return false;
3707     }
3708     if (!sve_access_check(s)) {
3709         return true;
3710     }
3711
3712     unsigned vsz = vec_full_reg_size(s);
3713     unsigned desc;
3714     TCGv_i32 t_desc;
3715     TCGv_ptr pg = tcg_temp_new_ptr();
3716
3717     /* We would need 7 operands to pass these arguments "properly".
3718      * So we encode all the register numbers into the descriptor.
3719      */
3720     desc = deposit32(a->rd, 5, 5, a->rn);
3721     desc = deposit32(desc, 10, 5, a->rm);
3722     desc = deposit32(desc, 15, 5, a->ra);
3723     desc = simd_desc(vsz, vsz, desc);
3724
3725     t_desc = tcg_const_i32(desc);
3726     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3727     fn(cpu_env, pg, t_desc);
3728     tcg_temp_free_i32(t_desc);
3729     tcg_temp_free_ptr(pg);
3730     return true;
3731 }
3732
3733 #define DO_FMLA(NAME, name) \
3734 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3735 {                                                                    \
3736     static gen_helper_sve_fmla * const fns[4] = {                    \
3737         NULL, gen_helper_sve_##name##_h,                             \
3738         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3739     };                                                               \
3740     return do_fmla(s, a, fns[a->esz]);                               \
3741 }
3742
3743 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3744 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3745 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3746 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3747
3748 #undef DO_FMLA
3749
3750 /*
3751  *** SVE Floating Point Unary Operations Predicated Group
3752  */
3753
3754 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3755                        bool is_fp16, gen_helper_gvec_3_ptr *fn)
3756 {
3757     if (sve_access_check(s)) {
3758         unsigned vsz = vec_full_reg_size(s);
3759         TCGv_ptr status = get_fpstatus_ptr(is_fp16);
3760         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3761                            vec_full_reg_offset(s, rn),
3762                            pred_full_reg_offset(s, pg),
3763                            status, vsz, vsz, 0, fn);
3764         tcg_temp_free_ptr(status);
3765     }
3766     return true;
3767 }
3768
3769 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3770 {
3771     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
3772 }
3773
3774 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3775 {
3776     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
3777 }
3778
3779 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3780 {
3781     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
3782 }
3783
3784 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3785 {
3786     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
3787 }
3788
3789 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3790 {
3791     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
3792 }
3793
3794 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3795 {
3796     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
3797 }
3798
3799 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3800 {
3801     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
3802 }
3803
3804 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3805 {
3806     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
3807 }
3808
3809 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3810 {
3811     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
3812 }
3813
3814 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3815 {
3816     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
3817 }
3818
3819 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3820 {
3821     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
3822 }
3823
3824 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3825 {
3826     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
3827 }
3828
3829 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3830 {
3831     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
3832 }
3833
3834 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3835 {
3836     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
3837 }
3838
3839 /*
3840  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3841  */
3842
3843 /* Subroutine loading a vector register at VOFS of LEN bytes.
3844  * The load should begin at the address Rn + IMM.
3845  */
3846
3847 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3848                    int rn, int imm)
3849 {
3850     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3851     uint32_t len_remain = len % 8;
3852     uint32_t nparts = len / 8 + ctpop8(len_remain);
3853     int midx = get_mem_index(s);
3854     TCGv_i64 addr, t0, t1;
3855
3856     addr = tcg_temp_new_i64();
3857     t0 = tcg_temp_new_i64();
3858
3859     /* Note that unpredicated load/store of vector/predicate registers
3860      * are defined as a stream of bytes, which equates to little-endian
3861      * operations on larger quantities.  There is no nice way to force
3862      * a little-endian load for aarch64_be-linux-user out of line.
3863      *
3864      * Attempt to keep code expansion to a minimum by limiting the
3865      * amount of unrolling done.
3866      */
3867     if (nparts <= 4) {
3868         int i;
3869
3870         for (i = 0; i < len_align; i += 8) {
3871             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3872             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3873             tcg_gen_st_i64(t0, cpu_env, vofs + i);
3874         }
3875     } else {
3876         TCGLabel *loop = gen_new_label();
3877         TCGv_ptr tp, i = tcg_const_local_ptr(0);
3878
3879         gen_set_label(loop);
3880
3881         /* Minimize the number of local temps that must be re-read from
3882          * the stack each iteration.  Instead, re-compute values other
3883          * than the loop counter.
3884          */
3885         tp = tcg_temp_new_ptr();
3886         tcg_gen_addi_ptr(tp, i, imm);
3887         tcg_gen_extu_ptr_i64(addr, tp);
3888         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3889
3890         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3891
3892         tcg_gen_add_ptr(tp, cpu_env, i);
3893         tcg_gen_addi_ptr(i, i, 8);
3894         tcg_gen_st_i64(t0, tp, vofs);
3895         tcg_temp_free_ptr(tp);
3896
3897         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3898         tcg_temp_free_ptr(i);
3899     }
3900
3901     /* Predicate register loads can be any multiple of 2.
3902      * Note that we still store the entire 64-bit unit into cpu_env.
3903      */
3904     if (len_remain) {
3905         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3906
3907         switch (len_remain) {
3908         case 2:
3909         case 4:
3910         case 8:
3911             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3912             break;
3913
3914         case 6:
3915             t1 = tcg_temp_new_i64();
3916             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3917             tcg_gen_addi_i64(addr, addr, 4);
3918             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3919             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3920             tcg_temp_free_i64(t1);
3921             break;
3922
3923         default:
3924             g_assert_not_reached();
3925         }
3926         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3927     }
3928     tcg_temp_free_i64(addr);
3929     tcg_temp_free_i64(t0);
3930 }
3931
3932 /* Similarly for stores.  */
3933 static void do_str(DisasContext *s, uint32_t vofs, uint32_t len,
3934                    int rn, int imm)
3935 {
3936     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3937     uint32_t len_remain = len % 8;
3938     uint32_t nparts = len / 8 + ctpop8(len_remain);
3939     int midx = get_mem_index(s);
3940     TCGv_i64 addr, t0;
3941
3942     addr = tcg_temp_new_i64();
3943     t0 = tcg_temp_new_i64();
3944
3945     /* Note that unpredicated load/store of vector/predicate registers
3946      * are defined as a stream of bytes, which equates to little-endian
3947      * operations on larger quantities.  There is no nice way to force
3948      * a little-endian store for aarch64_be-linux-user out of line.
3949      *
3950      * Attempt to keep code expansion to a minimum by limiting the
3951      * amount of unrolling done.
3952      */
3953     if (nparts <= 4) {
3954         int i;
3955
3956         for (i = 0; i < len_align; i += 8) {
3957             tcg_gen_ld_i64(t0, cpu_env, vofs + i);
3958             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3959             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
3960         }
3961     } else {
3962         TCGLabel *loop = gen_new_label();
3963         TCGv_ptr t2, i = tcg_const_local_ptr(0);
3964
3965         gen_set_label(loop);
3966
3967         t2 = tcg_temp_new_ptr();
3968         tcg_gen_add_ptr(t2, cpu_env, i);
3969         tcg_gen_ld_i64(t0, t2, vofs);
3970
3971         /* Minimize the number of local temps that must be re-read from
3972          * the stack each iteration.  Instead, re-compute values other
3973          * than the loop counter.
3974          */
3975         tcg_gen_addi_ptr(t2, i, imm);
3976         tcg_gen_extu_ptr_i64(addr, t2);
3977         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3978         tcg_temp_free_ptr(t2);
3979
3980         tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
3981
3982         tcg_gen_addi_ptr(i, i, 8);
3983
3984         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3985         tcg_temp_free_ptr(i);
3986     }
3987
3988     /* Predicate register stores can be any multiple of 2.  */
3989     if (len_remain) {
3990         tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
3991         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3992
3993         switch (len_remain) {
3994         case 2:
3995         case 4:
3996         case 8:
3997             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3998             break;
3999
4000         case 6:
4001             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4002             tcg_gen_addi_i64(addr, addr, 4);
4003             tcg_gen_shri_i64(t0, t0, 32);
4004             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4005             break;
4006
4007         default:
4008             g_assert_not_reached();
4009         }
4010     }
4011     tcg_temp_free_i64(addr);
4012     tcg_temp_free_i64(t0);
4013 }
4014
4015 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4016 {
4017     if (sve_access_check(s)) {
4018         int size = vec_full_reg_size(s);
4019         int off = vec_full_reg_offset(s, a->rd);
4020         do_ldr(s, off, size, a->rn, a->imm * size);
4021     }
4022     return true;
4023 }
4024
4025 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4026 {
4027     if (sve_access_check(s)) {
4028         int size = pred_full_reg_size(s);
4029         int off = pred_full_reg_offset(s, a->rd);
4030         do_ldr(s, off, size, a->rn, a->imm * size);
4031     }
4032     return true;
4033 }
4034
4035 static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4036 {
4037     if (sve_access_check(s)) {
4038         int size = vec_full_reg_size(s);
4039         int off = vec_full_reg_offset(s, a->rd);
4040         do_str(s, off, size, a->rn, a->imm * size);
4041     }
4042     return true;
4043 }
4044
4045 static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4046 {
4047     if (sve_access_check(s)) {
4048         int size = pred_full_reg_size(s);
4049         int off = pred_full_reg_offset(s, a->rd);
4050         do_str(s, off, size, a->rn, a->imm * size);
4051     }
4052     return true;
4053 }
4054
4055 /*
4056  *** SVE Memory - Contiguous Load Group
4057  */
4058
4059 /* The memory mode of the dtype.  */
4060 static const TCGMemOp dtype_mop[16] = {
4061     MO_UB, MO_UB, MO_UB, MO_UB,
4062     MO_SL, MO_UW, MO_UW, MO_UW,
4063     MO_SW, MO_SW, MO_UL, MO_UL,
4064     MO_SB, MO_SB, MO_SB, MO_Q
4065 };
4066
4067 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4068
4069 /* The vector element size of dtype.  */
4070 static const uint8_t dtype_esz[16] = {
4071     0, 1, 2, 3,
4072     3, 1, 2, 3,
4073     3, 2, 2, 3,
4074     3, 2, 1, 3
4075 };
4076
4077 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4078                        gen_helper_gvec_mem *fn)
4079 {
4080     unsigned vsz = vec_full_reg_size(s);
4081     TCGv_ptr t_pg;
4082     TCGv_i32 desc;
4083
4084     /* For e.g. LD4, there are not enough arguments to pass all 4
4085      * registers as pointers, so encode the regno into the data field.
4086      * For consistency, do this even for LD1.
4087      */
4088     desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
4089     t_pg = tcg_temp_new_ptr();
4090
4091     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4092     fn(cpu_env, t_pg, addr, desc);
4093
4094     tcg_temp_free_ptr(t_pg);
4095     tcg_temp_free_i32(desc);
4096 }
4097
4098 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4099                       TCGv_i64 addr, int dtype, int nreg)
4100 {
4101     static gen_helper_gvec_mem * const fns[16][4] = {
4102         { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4103           gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4104         { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4105         { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4106         { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4107
4108         { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
4109         { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
4110           gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
4111         { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
4112         { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
4113
4114         { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
4115         { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
4116         { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
4117           gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
4118         { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
4119
4120         { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4121         { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4122         { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4123         { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
4124           gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
4125     };
4126     gen_helper_gvec_mem *fn = fns[dtype][nreg];
4127
4128     /* While there are holes in the table, they are not
4129      * accessible via the instruction encoding.
4130      */
4131     assert(fn != NULL);
4132     do_mem_zpa(s, zt, pg, addr, fn);
4133 }
4134
4135 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4136 {
4137     if (a->rm == 31) {
4138         return false;
4139     }
4140     if (sve_access_check(s)) {
4141         TCGv_i64 addr = new_tmp_a64(s);
4142         tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
4143                          (a->nreg + 1) << dtype_msz(a->dtype));
4144         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4145         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4146     }
4147     return true;
4148 }
4149
4150 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4151 {
4152     if (sve_access_check(s)) {
4153         int vsz = vec_full_reg_size(s);
4154         int elements = vsz >> dtype_esz[a->dtype];
4155         TCGv_i64 addr = new_tmp_a64(s);
4156
4157         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4158                          (a->imm * elements * (a->nreg + 1))
4159                          << dtype_msz(a->dtype));
4160         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4161     }
4162     return true;
4163 }
4164
4165 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4166 {
4167     static gen_helper_gvec_mem * const fns[16] = {
4168         gen_helper_sve_ldff1bb_r,
4169         gen_helper_sve_ldff1bhu_r,
4170         gen_helper_sve_ldff1bsu_r,
4171         gen_helper_sve_ldff1bdu_r,
4172
4173         gen_helper_sve_ldff1sds_r,
4174         gen_helper_sve_ldff1hh_r,
4175         gen_helper_sve_ldff1hsu_r,
4176         gen_helper_sve_ldff1hdu_r,
4177
4178         gen_helper_sve_ldff1hds_r,
4179         gen_helper_sve_ldff1hss_r,
4180         gen_helper_sve_ldff1ss_r,
4181         gen_helper_sve_ldff1sdu_r,
4182
4183         gen_helper_sve_ldff1bds_r,
4184         gen_helper_sve_ldff1bss_r,
4185         gen_helper_sve_ldff1bhs_r,
4186         gen_helper_sve_ldff1dd_r,
4187     };
4188
4189     if (sve_access_check(s)) {
4190         TCGv_i64 addr = new_tmp_a64(s);
4191         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4192         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4193         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4194     }
4195     return true;
4196 }
4197
4198 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4199 {
4200     static gen_helper_gvec_mem * const fns[16] = {
4201         gen_helper_sve_ldnf1bb_r,
4202         gen_helper_sve_ldnf1bhu_r,
4203         gen_helper_sve_ldnf1bsu_r,
4204         gen_helper_sve_ldnf1bdu_r,
4205
4206         gen_helper_sve_ldnf1sds_r,
4207         gen_helper_sve_ldnf1hh_r,
4208         gen_helper_sve_ldnf1hsu_r,
4209         gen_helper_sve_ldnf1hdu_r,
4210
4211         gen_helper_sve_ldnf1hds_r,
4212         gen_helper_sve_ldnf1hss_r,
4213         gen_helper_sve_ldnf1ss_r,
4214         gen_helper_sve_ldnf1sdu_r,
4215
4216         gen_helper_sve_ldnf1bds_r,
4217         gen_helper_sve_ldnf1bss_r,
4218         gen_helper_sve_ldnf1bhs_r,
4219         gen_helper_sve_ldnf1dd_r,
4220     };
4221
4222     if (sve_access_check(s)) {
4223         int vsz = vec_full_reg_size(s);
4224         int elements = vsz >> dtype_esz[a->dtype];
4225         int off = (a->imm * elements) << dtype_msz(a->dtype);
4226         TCGv_i64 addr = new_tmp_a64(s);
4227
4228         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4229         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4230     }
4231     return true;
4232 }
4233
4234 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4235 {
4236     static gen_helper_gvec_mem * const fns[4] = {
4237         gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4238         gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4239     };
4240     unsigned vsz = vec_full_reg_size(s);
4241     TCGv_ptr t_pg;
4242     TCGv_i32 desc;
4243
4244     /* Load the first quadword using the normal predicated load helpers.  */
4245     desc = tcg_const_i32(simd_desc(16, 16, zt));
4246     t_pg = tcg_temp_new_ptr();
4247
4248     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4249     fns[msz](cpu_env, t_pg, addr, desc);
4250
4251     tcg_temp_free_ptr(t_pg);
4252     tcg_temp_free_i32(desc);
4253
4254     /* Replicate that first quadword.  */
4255     if (vsz > 16) {
4256         unsigned dofs = vec_full_reg_offset(s, zt);
4257         tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4258     }
4259 }
4260
4261 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4262 {
4263     if (a->rm == 31) {
4264         return false;
4265     }
4266     if (sve_access_check(s)) {
4267         int msz = dtype_msz(a->dtype);
4268         TCGv_i64 addr = new_tmp_a64(s);
4269         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4270         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4271         do_ldrq(s, a->rd, a->pg, addr, msz);
4272     }
4273     return true;
4274 }
4275
4276 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4277 {
4278     if (sve_access_check(s)) {
4279         TCGv_i64 addr = new_tmp_a64(s);
4280         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4281         do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4282     }
4283     return true;
4284 }
4285
4286 /* Load and broadcast element.  */
4287 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4288 {
4289     if (!sve_access_check(s)) {
4290         return true;
4291     }
4292
4293     unsigned vsz = vec_full_reg_size(s);
4294     unsigned psz = pred_full_reg_size(s);
4295     unsigned esz = dtype_esz[a->dtype];
4296     TCGLabel *over = gen_new_label();
4297     TCGv_i64 temp;
4298
4299     /* If the guarding predicate has no bits set, no load occurs.  */
4300     if (psz <= 8) {
4301         /* Reduce the pred_esz_masks value simply to reduce the
4302          * size of the code generated here.
4303          */
4304         uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4305         temp = tcg_temp_new_i64();
4306         tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4307         tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4308         tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4309         tcg_temp_free_i64(temp);
4310     } else {
4311         TCGv_i32 t32 = tcg_temp_new_i32();
4312         find_last_active(s, t32, esz, a->pg);
4313         tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4314         tcg_temp_free_i32(t32);
4315     }
4316
4317     /* Load the data.  */
4318     temp = tcg_temp_new_i64();
4319     tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << esz);
4320     tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4321                         s->be_data | dtype_mop[a->dtype]);
4322
4323     /* Broadcast to *all* elements.  */
4324     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4325                          vsz, vsz, temp);
4326     tcg_temp_free_i64(temp);
4327
4328     /* Zero the inactive elements.  */
4329     gen_set_label(over);
4330     do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4331     return true;
4332 }
4333
4334 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4335                       int msz, int esz, int nreg)
4336 {
4337     static gen_helper_gvec_mem * const fn_single[4][4] = {
4338         { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4339           gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4340         { NULL,                   gen_helper_sve_st1hh_r,
4341           gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4342         { NULL, NULL,
4343           gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4344         { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4345     };
4346     static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4347         { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4348           gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4349         { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4350           gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4351         { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4352           gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4353     };
4354     gen_helper_gvec_mem *fn;
4355
4356     if (nreg == 0) {
4357         /* ST1 */
4358         fn = fn_single[msz][esz];
4359     } else {
4360         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4361         assert(msz == esz);
4362         fn = fn_multiple[nreg - 1][msz];
4363     }
4364     assert(fn != NULL);
4365     do_mem_zpa(s, zt, pg, addr, fn);
4366 }
4367
4368 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4369 {
4370     if (a->rm == 31 || a->msz > a->esz) {
4371         return false;
4372     }
4373     if (sve_access_check(s)) {
4374         TCGv_i64 addr = new_tmp_a64(s);
4375         tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
4376         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4377         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4378     }
4379     return true;
4380 }
4381
4382 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4383 {
4384     if (a->msz > a->esz) {
4385         return false;
4386     }
4387     if (sve_access_check(s)) {
4388         int vsz = vec_full_reg_size(s);
4389         int elements = vsz >> a->esz;
4390         TCGv_i64 addr = new_tmp_a64(s);
4391
4392         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4393                          (a->imm * elements * (a->nreg + 1)) << a->msz);
4394         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4395     }
4396     return true;
4397 }
4398
4399 /*
4400  *** SVE gather loads / scatter stores
4401  */
4402
4403 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
4404                        TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
4405 {
4406     unsigned vsz = vec_full_reg_size(s);
4407     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
4408     TCGv_ptr t_zm = tcg_temp_new_ptr();
4409     TCGv_ptr t_pg = tcg_temp_new_ptr();
4410     TCGv_ptr t_zt = tcg_temp_new_ptr();
4411
4412     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4413     tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
4414     tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
4415     fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
4416
4417     tcg_temp_free_ptr(t_zt);
4418     tcg_temp_free_ptr(t_zm);
4419     tcg_temp_free_ptr(t_pg);
4420     tcg_temp_free_i32(desc);
4421 }
4422
4423 /* Indexed by [ff][xs][u][msz].  */
4424 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
4425     { { { gen_helper_sve_ldbss_zsu,
4426           gen_helper_sve_ldhss_zsu,
4427           NULL, },
4428         { gen_helper_sve_ldbsu_zsu,
4429           gen_helper_sve_ldhsu_zsu,
4430           gen_helper_sve_ldssu_zsu, } },
4431       { { gen_helper_sve_ldbss_zss,
4432           gen_helper_sve_ldhss_zss,
4433           NULL, },
4434         { gen_helper_sve_ldbsu_zss,
4435           gen_helper_sve_ldhsu_zss,
4436           gen_helper_sve_ldssu_zss, } } },
4437
4438     { { { gen_helper_sve_ldffbss_zsu,
4439           gen_helper_sve_ldffhss_zsu,
4440           NULL, },
4441         { gen_helper_sve_ldffbsu_zsu,
4442           gen_helper_sve_ldffhsu_zsu,
4443           gen_helper_sve_ldffssu_zsu, } },
4444       { { gen_helper_sve_ldffbss_zss,
4445           gen_helper_sve_ldffhss_zss,
4446           NULL, },
4447         { gen_helper_sve_ldffbsu_zss,
4448           gen_helper_sve_ldffhsu_zss,
4449           gen_helper_sve_ldffssu_zss, } } }
4450 };
4451
4452 /* Note that we overload xs=2 to indicate 64-bit offset.  */
4453 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
4454     { { { gen_helper_sve_ldbds_zsu,
4455           gen_helper_sve_ldhds_zsu,
4456           gen_helper_sve_ldsds_zsu,
4457           NULL, },
4458         { gen_helper_sve_ldbdu_zsu,
4459           gen_helper_sve_ldhdu_zsu,
4460           gen_helper_sve_ldsdu_zsu,
4461           gen_helper_sve_ldddu_zsu, } },
4462       { { gen_helper_sve_ldbds_zss,
4463           gen_helper_sve_ldhds_zss,
4464           gen_helper_sve_ldsds_zss,
4465           NULL, },
4466         { gen_helper_sve_ldbdu_zss,
4467           gen_helper_sve_ldhdu_zss,
4468           gen_helper_sve_ldsdu_zss,
4469           gen_helper_sve_ldddu_zss, } },
4470       { { gen_helper_sve_ldbds_zd,
4471           gen_helper_sve_ldhds_zd,
4472           gen_helper_sve_ldsds_zd,
4473           NULL, },
4474         { gen_helper_sve_ldbdu_zd,
4475           gen_helper_sve_ldhdu_zd,
4476           gen_helper_sve_ldsdu_zd,
4477           gen_helper_sve_ldddu_zd, } } },
4478
4479     { { { gen_helper_sve_ldffbds_zsu,
4480           gen_helper_sve_ldffhds_zsu,
4481           gen_helper_sve_ldffsds_zsu,
4482           NULL, },
4483         { gen_helper_sve_ldffbdu_zsu,
4484           gen_helper_sve_ldffhdu_zsu,
4485           gen_helper_sve_ldffsdu_zsu,
4486           gen_helper_sve_ldffddu_zsu, } },
4487       { { gen_helper_sve_ldffbds_zss,
4488           gen_helper_sve_ldffhds_zss,
4489           gen_helper_sve_ldffsds_zss,
4490           NULL, },
4491         { gen_helper_sve_ldffbdu_zss,
4492           gen_helper_sve_ldffhdu_zss,
4493           gen_helper_sve_ldffsdu_zss,
4494           gen_helper_sve_ldffddu_zss, } },
4495       { { gen_helper_sve_ldffbds_zd,
4496           gen_helper_sve_ldffhds_zd,
4497           gen_helper_sve_ldffsds_zd,
4498           NULL, },
4499         { gen_helper_sve_ldffbdu_zd,
4500           gen_helper_sve_ldffhdu_zd,
4501           gen_helper_sve_ldffsdu_zd,
4502           gen_helper_sve_ldffddu_zd, } } }
4503 };
4504
4505 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
4506 {
4507     gen_helper_gvec_mem_scatter *fn = NULL;
4508
4509     if (!sve_access_check(s)) {
4510         return true;
4511     }
4512
4513     switch (a->esz) {
4514     case MO_32:
4515         fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
4516         break;
4517     case MO_64:
4518         fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
4519         break;
4520     }
4521     assert(fn != NULL);
4522
4523     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
4524                cpu_reg_sp(s, a->rn), fn);
4525     return true;
4526 }
4527
4528 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
4529 {
4530     gen_helper_gvec_mem_scatter *fn = NULL;
4531     TCGv_i64 imm;
4532
4533     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
4534         return false;
4535     }
4536     if (!sve_access_check(s)) {
4537         return true;
4538     }
4539
4540     switch (a->esz) {
4541     case MO_32:
4542         fn = gather_load_fn32[a->ff][0][a->u][a->msz];
4543         break;
4544     case MO_64:
4545         fn = gather_load_fn64[a->ff][2][a->u][a->msz];
4546         break;
4547     }
4548     assert(fn != NULL);
4549
4550     /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
4551      * by loading the immediate into the scalar parameter.
4552      */
4553     imm = tcg_const_i64(a->imm << a->msz);
4554     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
4555     tcg_temp_free_i64(imm);
4556     return true;
4557 }
4558
4559 /* Indexed by [xs][msz].  */
4560 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = {
4561     { gen_helper_sve_stbs_zsu,
4562       gen_helper_sve_sths_zsu,
4563       gen_helper_sve_stss_zsu, },
4564     { gen_helper_sve_stbs_zss,
4565       gen_helper_sve_sths_zss,
4566       gen_helper_sve_stss_zss, },
4567 };
4568
4569 /* Note that we overload xs=2 to indicate 64-bit offset.  */
4570 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = {
4571     { gen_helper_sve_stbd_zsu,
4572       gen_helper_sve_sthd_zsu,
4573       gen_helper_sve_stsd_zsu,
4574       gen_helper_sve_stdd_zsu, },
4575     { gen_helper_sve_stbd_zss,
4576       gen_helper_sve_sthd_zss,
4577       gen_helper_sve_stsd_zss,
4578       gen_helper_sve_stdd_zss, },
4579     { gen_helper_sve_stbd_zd,
4580       gen_helper_sve_sthd_zd,
4581       gen_helper_sve_stsd_zd,
4582       gen_helper_sve_stdd_zd, },
4583 };
4584
4585 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
4586 {
4587     gen_helper_gvec_mem_scatter *fn;
4588
4589     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
4590         return false;
4591     }
4592     if (!sve_access_check(s)) {
4593         return true;
4594     }
4595     switch (a->esz) {
4596     case MO_32:
4597         fn = scatter_store_fn32[a->xs][a->msz];
4598         break;
4599     case MO_64:
4600         fn = scatter_store_fn64[a->xs][a->msz];
4601         break;
4602     default:
4603         g_assert_not_reached();
4604     }
4605     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
4606                cpu_reg_sp(s, a->rn), fn);
4607     return true;
4608 }
4609
4610 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
4611 {
4612     gen_helper_gvec_mem_scatter *fn = NULL;
4613     TCGv_i64 imm;
4614
4615     if (a->esz < a->msz) {
4616         return false;
4617     }
4618     if (!sve_access_check(s)) {
4619         return true;
4620     }
4621
4622     switch (a->esz) {
4623     case MO_32:
4624         fn = scatter_store_fn32[0][a->msz];
4625         break;
4626     case MO_64:
4627         fn = scatter_store_fn64[2][a->msz];
4628         break;
4629     }
4630     assert(fn != NULL);
4631
4632     /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
4633      * by loading the immediate into the scalar parameter.
4634      */
4635     imm = tcg_const_i64(a->imm << a->msz);
4636     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
4637     tcg_temp_free_i64(imm);
4638     return true;
4639 }
4640
4641 /*
4642  * Prefetches
4643  */
4644
4645 static bool trans_PRF(DisasContext *s, arg_PRF *a, uint32_t insn)
4646 {
4647     /* Prefetch is a nop within QEMU.  */
4648     sve_access_check(s);
4649     return true;
4650 }
4651
4652 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a, uint32_t insn)
4653 {
4654     if (a->rm == 31) {
4655         return false;
4656     }
4657     /* Prefetch is a nop within QEMU.  */
4658     sve_access_check(s);
4659     return true;
4660 }