target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35 #include "fpu/softfloat.h"
  36
  37
  38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                          TCGv_i64, uint32_t, uint32_t);
  40
  41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                      TCGv_ptr, TCGv_i32);
  43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                          TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50 /*
  51  * Helpers for extracting complex instruction fields.
  52  */
  53
  54 /* See e.g. ASR (immediate, predicated).
  55  * Returns -1 for unallocated encoding; diagnose later.
  56  */
  57 static int tszimm_esz(int x)
  58 {
  59     x >>= 3;  /* discard imm3 */
  60     return 31 - clz32(x);
  61 }
  62
  63 static int tszimm_shr(int x)
  64 {
  65     return (16 << tszimm_esz(x)) - x;
  66 }
  67
  68 /* See e.g. LSL (immediate, predicated).  */
  69 static int tszimm_shl(int x)
  70 {
  71     return x - (8 << tszimm_esz(x));
  72 }
  73
  74 static inline int plus1(int x)
  75 {
  76     return x + 1;
  77 }
  78
  79 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80 static inline int expand_imm_sh8s(int x)
  81 {
  82     return (int8_t)x << (x & 0x100 ? 8 : 0);
  83 }
  84
  85 static inline int expand_imm_sh8u(int x)
  86 {
  87     return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88 }
  89
  90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92  */
  93 static inline int msz_dtype(int msz)
  94 {
  95     static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96     return dtype[msz];
  97 }
  98
  99 /*
 100  * Include the generated decoder.
 101  */
 102
 103 #include "decode-sve.inc.c"
 104
 105 /*
 106  * Implement all of the translator functions referenced by the decoder.
 107  */
 108
 109 /* Return the offset info CPUARMState of the predicate vector register Pn.
 110  * Note for this purpose, FFR is P16.
 111  */
 112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113 {
 114     return offsetof(CPUARMState, vfp.pregs[regno]);
 115 }
 116
 117 /* Return the byte size of the whole predicate register, VL / 64.  */
 118 static inline int pred_full_reg_size(DisasContext *s)
 119 {
 120     return s->sve_len >> 3;
 121 }
 122
 123 /* Round up the size of a register to a size allowed by
 124  * the tcg vector infrastructure.  Any operation which uses this
 125  * size may assume that the bits above pred_full_reg_size are zero,
 126  * and must leave them the same way.
 127  *
 128  * Note that this is not needed for the vector registers as they
 129  * are always properly sized for tcg vectors.
 130  */
 131 static int size_for_gvec(int size)
 132 {
 133     if (size <= 8) {
 134         return 8;
 135     } else {
 136         return QEMU_ALIGN_UP(size, 16);
 137     }
 138 }
 139
 140 static int pred_gvec_reg_size(DisasContext *s)
 141 {
 142     return size_for_gvec(pred_full_reg_size(s));
 143 }
 144
 145 /* Invoke a vector expander on two Zregs.  */
 146 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                          int esz, int rd, int rn)
 148 {
 149     if (sve_access_check(s)) {
 150         unsigned vsz = vec_full_reg_size(s);
 151         gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                 vec_full_reg_offset(s, rn), vsz, vsz);
 153     }
 154     return true;
 155 }
 156
 157 /* Invoke a vector expander on three Zregs.  */
 158 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                          int esz, int rd, int rn, int rm)
 160 {
 161     if (sve_access_check(s)) {
 162         unsigned vsz = vec_full_reg_size(s);
 163         gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                 vec_full_reg_offset(s, rn),
 165                 vec_full_reg_offset(s, rm), vsz, vsz);
 166     }
 167     return true;
 168 }
 169
 170 /* Invoke a vector move on two Zregs.  */
 171 static bool do_mov_z(DisasContext *s, int rd, int rn)
 172 {
 173     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174 }
 175
 176 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 177 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178 {
 179     unsigned vsz = vec_full_reg_size(s);
 180     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 181 }
 182
 183 /* Invoke a vector expander on two Pregs.  */
 184 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                          int esz, int rd, int rn)
 186 {
 187     if (sve_access_check(s)) {
 188         unsigned psz = pred_gvec_reg_size(s);
 189         gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                 pred_full_reg_offset(s, rn), psz, psz);
 191     }
 192     return true;
 193 }
 194
 195 /* Invoke a vector expander on three Pregs.  */
 196 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                          int esz, int rd, int rn, int rm)
 198 {
 199     if (sve_access_check(s)) {
 200         unsigned psz = pred_gvec_reg_size(s);
 201         gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                 pred_full_reg_offset(s, rn),
 203                 pred_full_reg_offset(s, rm), psz, psz);
 204     }
 205     return true;
 206 }
 207
 208 /* Invoke a vector operation on four Pregs.  */
 209 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                         int rd, int rn, int rm, int rg)
 211 {
 212     if (sve_access_check(s)) {
 213         unsigned psz = pred_gvec_reg_size(s);
 214         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                        pred_full_reg_offset(s, rn),
 216                        pred_full_reg_offset(s, rm),
 217                        pred_full_reg_offset(s, rg),
 218                        psz, psz, gvec_op);
 219     }
 220     return true;
 221 }
 222
 223 /* Invoke a vector move on two Pregs.  */
 224 static bool do_mov_p(DisasContext *s, int rd, int rn)
 225 {
 226     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227 }
 228
 229 /* Set the cpu flags as per a return from an SVE helper.  */
 230 static void do_pred_flags(TCGv_i32 t)
 231 {
 232     tcg_gen_mov_i32(cpu_NF, t);
 233     tcg_gen_andi_i32(cpu_ZF, t, 2);
 234     tcg_gen_andi_i32(cpu_CF, t, 1);
 235     tcg_gen_movi_i32(cpu_VF, 0);
 236 }
 237
 238 /* Subroutines computing the ARM PredTest psuedofunction.  */
 239 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240 {
 241     TCGv_i32 t = tcg_temp_new_i32();
 242
 243     gen_helper_sve_predtest1(t, d, g);
 244     do_pred_flags(t);
 245     tcg_temp_free_i32(t);
 246 }
 247
 248 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249 {
 250     TCGv_ptr dptr = tcg_temp_new_ptr();
 251     TCGv_ptr gptr = tcg_temp_new_ptr();
 252     TCGv_i32 t;
 253
 254     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256     t = tcg_const_i32(words);
 257
 258     gen_helper_sve_predtest(t, dptr, gptr, t);
 259     tcg_temp_free_ptr(dptr);
 260     tcg_temp_free_ptr(gptr);
 261
 262     do_pred_flags(t);
 263     tcg_temp_free_i32(t);
 264 }
 265
 266 /* For each element size, the bits within a predicate word that are active.  */
 267 const uint64_t pred_esz_masks[4] = {
 268     0xffffffffffffffffull, 0x5555555555555555ull,
 269     0x1111111111111111ull, 0x0101010101010101ull
 270 };
 271
 272 /*
 273  *** SVE Logical - Unpredicated Group
 274  */
 275
 276 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279 }
 280
 281 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 282 {
 283     if (a->rn == a->rm) { /* MOV */
 284         return do_mov_z(s, a->rd, a->rn);
 285     } else {
 286         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 287     }
 288 }
 289
 290 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 291 {
 292     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 293 }
 294
 295 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 296 {
 297     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 298 }
 299
 300 /*
 301  *** SVE Integer Arithmetic - Unpredicated Group
 302  */
 303
 304 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 305 {
 306     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 307 }
 308
 309 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 310 {
 311     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 312 }
 313
 314 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 315 {
 316     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 317 }
 318
 319 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 320 {
 321     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 322 }
 323
 324 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 325 {
 326     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 327 }
 328
 329 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 330 {
 331     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 332 }
 333
 334 /*
 335  *** SVE Integer Arithmetic - Binary Predicated Group
 336  */
 337
 338 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 339 {
 340     unsigned vsz = vec_full_reg_size(s);
 341     if (fn == NULL) {
 342         return false;
 343     }
 344     if (sve_access_check(s)) {
 345         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 346                            vec_full_reg_offset(s, a->rn),
 347                            vec_full_reg_offset(s, a->rm),
 348                            pred_full_reg_offset(s, a->pg),
 349                            vsz, vsz, 0, fn);
 350     }
 351     return true;
 352 }
 353
 354 /* Select active elememnts from Zn and inactive elements from Zm,
 355  * storing the result in Zd.
 356  */
 357 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 358 {
 359     static gen_helper_gvec_4 * const fns[4] = {
 360         gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 361         gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 362     };
 363     unsigned vsz = vec_full_reg_size(s);
 364     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 365                        vec_full_reg_offset(s, rn),
 366                        vec_full_reg_offset(s, rm),
 367                        pred_full_reg_offset(s, pg),
 368                        vsz, vsz, 0, fns[esz]);
 369 }
 370
 371 #define DO_ZPZZ(NAME, name) \
 372 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 373                                 uint32_t insn)                            \
 374 {                                                                         \
 375     static gen_helper_gvec_4 * const fns[4] = {                           \
 376         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 377         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 378     };                                                                    \
 379     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 380 }
 381
 382 DO_ZPZZ(AND, and)
 383 DO_ZPZZ(EOR, eor)
 384 DO_ZPZZ(ORR, orr)
 385 DO_ZPZZ(BIC, bic)
 386
 387 DO_ZPZZ(ADD, add)
 388 DO_ZPZZ(SUB, sub)
 389
 390 DO_ZPZZ(SMAX, smax)
 391 DO_ZPZZ(UMAX, umax)
 392 DO_ZPZZ(SMIN, smin)
 393 DO_ZPZZ(UMIN, umin)
 394 DO_ZPZZ(SABD, sabd)
 395 DO_ZPZZ(UABD, uabd)
 396
 397 DO_ZPZZ(MUL, mul)
 398 DO_ZPZZ(SMULH, smulh)
 399 DO_ZPZZ(UMULH, umulh)
 400
 401 DO_ZPZZ(ASR, asr)
 402 DO_ZPZZ(LSR, lsr)
 403 DO_ZPZZ(LSL, lsl)
 404
 405 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 406 {
 407     static gen_helper_gvec_4 * const fns[4] = {
 408         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 409     };
 410     return do_zpzz_ool(s, a, fns[a->esz]);
 411 }
 412
 413 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 414 {
 415     static gen_helper_gvec_4 * const fns[4] = {
 416         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 417     };
 418     return do_zpzz_ool(s, a, fns[a->esz]);
 419 }
 420
 421 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 422 {
 423     if (sve_access_check(s)) {
 424         do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 425     }
 426     return true;
 427 }
 428
 429 #undef DO_ZPZZ
 430
 431 /*
 432  *** SVE Integer Arithmetic - Unary Predicated Group
 433  */
 434
 435 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 436 {
 437     if (fn == NULL) {
 438         return false;
 439     }
 440     if (sve_access_check(s)) {
 441         unsigned vsz = vec_full_reg_size(s);
 442         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 443                            vec_full_reg_offset(s, a->rn),
 444                            pred_full_reg_offset(s, a->pg),
 445                            vsz, vsz, 0, fn);
 446     }
 447     return true;
 448 }
 449
 450 #define DO_ZPZ(NAME, name) \
 451 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 452 {                                                                   \
 453     static gen_helper_gvec_3 * const fns[4] = {                     \
 454         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 455         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 456     };                                                              \
 457     return do_zpz_ool(s, a, fns[a->esz]);                           \
 458 }
 459
 460 DO_ZPZ(CLS, cls)
 461 DO_ZPZ(CLZ, clz)
 462 DO_ZPZ(CNT_zpz, cnt_zpz)
 463 DO_ZPZ(CNOT, cnot)
 464 DO_ZPZ(NOT_zpz, not_zpz)
 465 DO_ZPZ(ABS, abs)
 466 DO_ZPZ(NEG, neg)
 467
 468 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 469 {
 470     static gen_helper_gvec_3 * const fns[4] = {
 471         NULL,
 472         gen_helper_sve_fabs_h,
 473         gen_helper_sve_fabs_s,
 474         gen_helper_sve_fabs_d
 475     };
 476     return do_zpz_ool(s, a, fns[a->esz]);
 477 }
 478
 479 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 480 {
 481     static gen_helper_gvec_3 * const fns[4] = {
 482         NULL,
 483         gen_helper_sve_fneg_h,
 484         gen_helper_sve_fneg_s,
 485         gen_helper_sve_fneg_d
 486     };
 487     return do_zpz_ool(s, a, fns[a->esz]);
 488 }
 489
 490 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 491 {
 492     static gen_helper_gvec_3 * const fns[4] = {
 493         NULL,
 494         gen_helper_sve_sxtb_h,
 495         gen_helper_sve_sxtb_s,
 496         gen_helper_sve_sxtb_d
 497     };
 498     return do_zpz_ool(s, a, fns[a->esz]);
 499 }
 500
 501 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 502 {
 503     static gen_helper_gvec_3 * const fns[4] = {
 504         NULL,
 505         gen_helper_sve_uxtb_h,
 506         gen_helper_sve_uxtb_s,
 507         gen_helper_sve_uxtb_d
 508     };
 509     return do_zpz_ool(s, a, fns[a->esz]);
 510 }
 511
 512 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 513 {
 514     static gen_helper_gvec_3 * const fns[4] = {
 515         NULL, NULL,
 516         gen_helper_sve_sxth_s,
 517         gen_helper_sve_sxth_d
 518     };
 519     return do_zpz_ool(s, a, fns[a->esz]);
 520 }
 521
 522 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 523 {
 524     static gen_helper_gvec_3 * const fns[4] = {
 525         NULL, NULL,
 526         gen_helper_sve_uxth_s,
 527         gen_helper_sve_uxth_d
 528     };
 529     return do_zpz_ool(s, a, fns[a->esz]);
 530 }
 531
 532 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 533 {
 534     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 535 }
 536
 537 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 538 {
 539     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 540 }
 541
 542 #undef DO_ZPZ
 543
 544 /*
 545  *** SVE Integer Reduction Group
 546  */
 547
 548 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 549 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 550                        gen_helper_gvec_reduc *fn)
 551 {
 552     unsigned vsz = vec_full_reg_size(s);
 553     TCGv_ptr t_zn, t_pg;
 554     TCGv_i32 desc;
 555     TCGv_i64 temp;
 556
 557     if (fn == NULL) {
 558         return false;
 559     }
 560     if (!sve_access_check(s)) {
 561         return true;
 562     }
 563
 564     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 565     temp = tcg_temp_new_i64();
 566     t_zn = tcg_temp_new_ptr();
 567     t_pg = tcg_temp_new_ptr();
 568
 569     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 570     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 571     fn(temp, t_zn, t_pg, desc);
 572     tcg_temp_free_ptr(t_zn);
 573     tcg_temp_free_ptr(t_pg);
 574     tcg_temp_free_i32(desc);
 575
 576     write_fp_dreg(s, a->rd, temp);
 577     tcg_temp_free_i64(temp);
 578     return true;
 579 }
 580
 581 #define DO_VPZ(NAME, name) \
 582 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 583 {                                                                        \
 584     static gen_helper_gvec_reduc * const fns[4] = {                      \
 585         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 586         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 587     };                                                                   \
 588     return do_vpz_ool(s, a, fns[a->esz]);                                \
 589 }
 590
 591 DO_VPZ(ORV, orv)
 592 DO_VPZ(ANDV, andv)
 593 DO_VPZ(EORV, eorv)
 594
 595 DO_VPZ(UADDV, uaddv)
 596 DO_VPZ(SMAXV, smaxv)
 597 DO_VPZ(UMAXV, umaxv)
 598 DO_VPZ(SMINV, sminv)
 599 DO_VPZ(UMINV, uminv)
 600
 601 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 602 {
 603     static gen_helper_gvec_reduc * const fns[4] = {
 604         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 605         gen_helper_sve_saddv_s, NULL
 606     };
 607     return do_vpz_ool(s, a, fns[a->esz]);
 608 }
 609
 610 #undef DO_VPZ
 611
 612 /*
 613  *** SVE Shift by Immediate - Predicated Group
 614  */
 615
 616 /* Store zero into every active element of Zd.  We will use this for two
 617  * and three-operand predicated instructions for which logic dictates a
 618  * zero result.
 619  */
 620 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 621 {
 622     static gen_helper_gvec_2 * const fns[4] = {
 623         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 624         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 625     };
 626     if (sve_access_check(s)) {
 627         unsigned vsz = vec_full_reg_size(s);
 628         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 629                            pred_full_reg_offset(s, pg),
 630                            vsz, vsz, 0, fns[esz]);
 631     }
 632     return true;
 633 }
 634
 635 /* Copy Zn into Zd, storing zeros into inactive elements.  */
 636 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 637 {
 638     static gen_helper_gvec_3 * const fns[4] = {
 639         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 640         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 641     };
 642     unsigned vsz = vec_full_reg_size(s);
 643     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 644                        vec_full_reg_offset(s, rn),
 645                        pred_full_reg_offset(s, pg),
 646                        vsz, vsz, 0, fns[esz]);
 647 }
 648
 649 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 650                         gen_helper_gvec_3 *fn)
 651 {
 652     if (sve_access_check(s)) {
 653         unsigned vsz = vec_full_reg_size(s);
 654         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 655                            vec_full_reg_offset(s, a->rn),
 656                            pred_full_reg_offset(s, a->pg),
 657                            vsz, vsz, a->imm, fn);
 658     }
 659     return true;
 660 }
 661
 662 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 663 {
 664     static gen_helper_gvec_3 * const fns[4] = {
 665         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 666         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 667     };
 668     if (a->esz < 0) {
 669         /* Invalid tsz encoding -- see tszimm_esz. */
 670         return false;
 671     }
 672     /* Shift by element size is architecturally valid.  For
 673        arithmetic right-shift, it's the same as by one less. */
 674     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 675     return do_zpzi_ool(s, a, fns[a->esz]);
 676 }
 677
 678 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 679 {
 680     static gen_helper_gvec_3 * const fns[4] = {
 681         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 682         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 683     };
 684     if (a->esz < 0) {
 685         return false;
 686     }
 687     /* Shift by element size is architecturally valid.
 688        For logical shifts, it is a zeroing operation.  */
 689     if (a->imm >= (8 << a->esz)) {
 690         return do_clr_zp(s, a->rd, a->pg, a->esz);
 691     } else {
 692         return do_zpzi_ool(s, a, fns[a->esz]);
 693     }
 694 }
 695
 696 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 697 {
 698     static gen_helper_gvec_3 * const fns[4] = {
 699         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 700         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 701     };
 702     if (a->esz < 0) {
 703         return false;
 704     }
 705     /* Shift by element size is architecturally valid.
 706        For logical shifts, it is a zeroing operation.  */
 707     if (a->imm >= (8 << a->esz)) {
 708         return do_clr_zp(s, a->rd, a->pg, a->esz);
 709     } else {
 710         return do_zpzi_ool(s, a, fns[a->esz]);
 711     }
 712 }
 713
 714 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 715 {
 716     static gen_helper_gvec_3 * const fns[4] = {
 717         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 718         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 719     };
 720     if (a->esz < 0) {
 721         return false;
 722     }
 723     /* Shift by element size is architecturally valid.  For arithmetic
 724        right shift for division, it is a zeroing operation.  */
 725     if (a->imm >= (8 << a->esz)) {
 726         return do_clr_zp(s, a->rd, a->pg, a->esz);
 727     } else {
 728         return do_zpzi_ool(s, a, fns[a->esz]);
 729     }
 730 }
 731
 732 /*
 733  *** SVE Bitwise Shift - Predicated Group
 734  */
 735
 736 #define DO_ZPZW(NAME, name) \
 737 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 738                                 uint32_t insn)                            \
 739 {                                                                         \
 740     static gen_helper_gvec_4 * const fns[3] = {                           \
 741         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 742         gen_helper_sve_##name##_zpzw_s,                                   \
 743     };                                                                    \
 744     if (a->esz < 0 || a->esz >= 3) {                                      \
 745         return false;                                                     \
 746     }                                                                     \
 747     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 748 }
 749
 750 DO_ZPZW(ASR, asr)
 751 DO_ZPZW(LSR, lsr)
 752 DO_ZPZW(LSL, lsl)
 753
 754 #undef DO_ZPZW
 755
 756 /*
 757  *** SVE Bitwise Shift - Unpredicated Group
 758  */
 759
 760 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 761                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 762                                          int64_t, uint32_t, uint32_t))
 763 {
 764     if (a->esz < 0) {
 765         /* Invalid tsz encoding -- see tszimm_esz. */
 766         return false;
 767     }
 768     if (sve_access_check(s)) {
 769         unsigned vsz = vec_full_reg_size(s);
 770         /* Shift by element size is architecturally valid.  For
 771            arithmetic right-shift, it's the same as by one less.
 772            Otherwise it is a zeroing operation.  */
 773         if (a->imm >= 8 << a->esz) {
 774             if (asr) {
 775                 a->imm = (8 << a->esz) - 1;
 776             } else {
 777                 do_dupi_z(s, a->rd, 0);
 778                 return true;
 779             }
 780         }
 781         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 782                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 783     }
 784     return true;
 785 }
 786
 787 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 788 {
 789     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 790 }
 791
 792 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 793 {
 794     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 795 }
 796
 797 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 798 {
 799     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 800 }
 801
 802 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 803 {
 804     if (fn == NULL) {
 805         return false;
 806     }
 807     if (sve_access_check(s)) {
 808         unsigned vsz = vec_full_reg_size(s);
 809         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 810                            vec_full_reg_offset(s, a->rn),
 811                            vec_full_reg_offset(s, a->rm),
 812                            vsz, vsz, 0, fn);
 813     }
 814     return true;
 815 }
 816
 817 #define DO_ZZW(NAME, name) \
 818 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a,           \
 819                                uint32_t insn)                             \
 820 {                                                                         \
 821     static gen_helper_gvec_3 * const fns[4] = {                           \
 822         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 823         gen_helper_sve_##name##_zzw_s, NULL                               \
 824     };                                                                    \
 825     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 826 }
 827
 828 DO_ZZW(ASR, asr)
 829 DO_ZZW(LSR, lsr)
 830 DO_ZZW(LSL, lsl)
 831
 832 #undef DO_ZZW
 833
 834 /*
 835  *** SVE Integer Multiply-Add Group
 836  */
 837
 838 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 839                          gen_helper_gvec_5 *fn)
 840 {
 841     if (sve_access_check(s)) {
 842         unsigned vsz = vec_full_reg_size(s);
 843         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 844                            vec_full_reg_offset(s, a->ra),
 845                            vec_full_reg_offset(s, a->rn),
 846                            vec_full_reg_offset(s, a->rm),
 847                            pred_full_reg_offset(s, a->pg),
 848                            vsz, vsz, 0, fn);
 849     }
 850     return true;
 851 }
 852
 853 #define DO_ZPZZZ(NAME, name) \
 854 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 855 {                                                                    \
 856     static gen_helper_gvec_5 * const fns[4] = {                      \
 857         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 858         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 859     };                                                               \
 860     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 861 }
 862
 863 DO_ZPZZZ(MLA, mla)
 864 DO_ZPZZZ(MLS, mls)
 865
 866 #undef DO_ZPZZZ
 867
 868 /*
 869  *** SVE Index Generation Group
 870  */
 871
 872 static void do_index(DisasContext *s, int esz, int rd,
 873                      TCGv_i64 start, TCGv_i64 incr)
 874 {
 875     unsigned vsz = vec_full_reg_size(s);
 876     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 877     TCGv_ptr t_zd = tcg_temp_new_ptr();
 878
 879     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 880     if (esz == 3) {
 881         gen_helper_sve_index_d(t_zd, start, incr, desc);
 882     } else {
 883         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 884         static index_fn * const fns[3] = {
 885             gen_helper_sve_index_b,
 886             gen_helper_sve_index_h,
 887             gen_helper_sve_index_s,
 888         };
 889         TCGv_i32 s32 = tcg_temp_new_i32();
 890         TCGv_i32 i32 = tcg_temp_new_i32();
 891
 892         tcg_gen_extrl_i64_i32(s32, start);
 893         tcg_gen_extrl_i64_i32(i32, incr);
 894         fns[esz](t_zd, s32, i32, desc);
 895
 896         tcg_temp_free_i32(s32);
 897         tcg_temp_free_i32(i32);
 898     }
 899     tcg_temp_free_ptr(t_zd);
 900     tcg_temp_free_i32(desc);
 901 }
 902
 903 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
 904 {
 905     if (sve_access_check(s)) {
 906         TCGv_i64 start = tcg_const_i64(a->imm1);
 907         TCGv_i64 incr = tcg_const_i64(a->imm2);
 908         do_index(s, a->esz, a->rd, start, incr);
 909         tcg_temp_free_i64(start);
 910         tcg_temp_free_i64(incr);
 911     }
 912     return true;
 913 }
 914
 915 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
 916 {
 917     if (sve_access_check(s)) {
 918         TCGv_i64 start = tcg_const_i64(a->imm);
 919         TCGv_i64 incr = cpu_reg(s, a->rm);
 920         do_index(s, a->esz, a->rd, start, incr);
 921         tcg_temp_free_i64(start);
 922     }
 923     return true;
 924 }
 925
 926 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
 927 {
 928     if (sve_access_check(s)) {
 929         TCGv_i64 start = cpu_reg(s, a->rn);
 930         TCGv_i64 incr = tcg_const_i64(a->imm);
 931         do_index(s, a->esz, a->rd, start, incr);
 932         tcg_temp_free_i64(incr);
 933     }
 934     return true;
 935 }
 936
 937 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
 938 {
 939     if (sve_access_check(s)) {
 940         TCGv_i64 start = cpu_reg(s, a->rn);
 941         TCGv_i64 incr = cpu_reg(s, a->rm);
 942         do_index(s, a->esz, a->rd, start, incr);
 943     }
 944     return true;
 945 }
 946
 947 /*
 948  *** SVE Stack Allocation Group
 949  */
 950
 951 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
 952 {
 953     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 954     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 955     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 956     return true;
 957 }
 958
 959 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
 960 {
 961     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 962     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 963     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 964     return true;
 965 }
 966
 967 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
 968 {
 969     TCGv_i64 reg = cpu_reg(s, a->rd);
 970     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 971     return true;
 972 }
 973
 974 /*
 975  *** SVE Compute Vector Address Group
 976  */
 977
 978 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 979 {
 980     if (sve_access_check(s)) {
 981         unsigned vsz = vec_full_reg_size(s);
 982         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 983                            vec_full_reg_offset(s, a->rn),
 984                            vec_full_reg_offset(s, a->rm),
 985                            vsz, vsz, a->imm, fn);
 986     }
 987     return true;
 988 }
 989
 990 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
 991 {
 992     return do_adr(s, a, gen_helper_sve_adr_p32);
 993 }
 994
 995 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
 996 {
 997     return do_adr(s, a, gen_helper_sve_adr_p64);
 998 }
 999
1000 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
1001 {
1002     return do_adr(s, a, gen_helper_sve_adr_s32);
1003 }
1004
1005 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
1006 {
1007     return do_adr(s, a, gen_helper_sve_adr_u32);
1008 }
1009
1010 /*
1011  *** SVE Integer Misc - Unpredicated Group
1012  */
1013
1014 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1015 {
1016     static gen_helper_gvec_2 * const fns[4] = {
1017         NULL,
1018         gen_helper_sve_fexpa_h,
1019         gen_helper_sve_fexpa_s,
1020         gen_helper_sve_fexpa_d,
1021     };
1022     if (a->esz == 0) {
1023         return false;
1024     }
1025     if (sve_access_check(s)) {
1026         unsigned vsz = vec_full_reg_size(s);
1027         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1028                            vec_full_reg_offset(s, a->rn),
1029                            vsz, vsz, 0, fns[a->esz]);
1030     }
1031     return true;
1032 }
1033
1034 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1035 {
1036     static gen_helper_gvec_3 * const fns[4] = {
1037         NULL,
1038         gen_helper_sve_ftssel_h,
1039         gen_helper_sve_ftssel_s,
1040         gen_helper_sve_ftssel_d,
1041     };
1042     if (a->esz == 0) {
1043         return false;
1044     }
1045     if (sve_access_check(s)) {
1046         unsigned vsz = vec_full_reg_size(s);
1047         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1048                            vec_full_reg_offset(s, a->rn),
1049                            vec_full_reg_offset(s, a->rm),
1050                            vsz, vsz, 0, fns[a->esz]);
1051     }
1052     return true;
1053 }
1054
1055 /*
1056  *** SVE Predicate Logical Operations Group
1057  */
1058
1059 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1060                           const GVecGen4 *gvec_op)
1061 {
1062     if (!sve_access_check(s)) {
1063         return true;
1064     }
1065
1066     unsigned psz = pred_gvec_reg_size(s);
1067     int dofs = pred_full_reg_offset(s, a->rd);
1068     int nofs = pred_full_reg_offset(s, a->rn);
1069     int mofs = pred_full_reg_offset(s, a->rm);
1070     int gofs = pred_full_reg_offset(s, a->pg);
1071
1072     if (psz == 8) {
1073         /* Do the operation and the flags generation in temps.  */
1074         TCGv_i64 pd = tcg_temp_new_i64();
1075         TCGv_i64 pn = tcg_temp_new_i64();
1076         TCGv_i64 pm = tcg_temp_new_i64();
1077         TCGv_i64 pg = tcg_temp_new_i64();
1078
1079         tcg_gen_ld_i64(pn, cpu_env, nofs);
1080         tcg_gen_ld_i64(pm, cpu_env, mofs);
1081         tcg_gen_ld_i64(pg, cpu_env, gofs);
1082
1083         gvec_op->fni8(pd, pn, pm, pg);
1084         tcg_gen_st_i64(pd, cpu_env, dofs);
1085
1086         do_predtest1(pd, pg);
1087
1088         tcg_temp_free_i64(pd);
1089         tcg_temp_free_i64(pn);
1090         tcg_temp_free_i64(pm);
1091         tcg_temp_free_i64(pg);
1092     } else {
1093         /* The operation and flags generation is large.  The computation
1094          * of the flags depends on the original contents of the guarding
1095          * predicate.  If the destination overwrites the guarding predicate,
1096          * then the easiest way to get this right is to save a copy.
1097           */
1098         int tofs = gofs;
1099         if (a->rd == a->pg) {
1100             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1101             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1102         }
1103
1104         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1105         do_predtest(s, dofs, tofs, psz / 8);
1106     }
1107     return true;
1108 }
1109
1110 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1111 {
1112     tcg_gen_and_i64(pd, pn, pm);
1113     tcg_gen_and_i64(pd, pd, pg);
1114 }
1115
1116 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1117                            TCGv_vec pm, TCGv_vec pg)
1118 {
1119     tcg_gen_and_vec(vece, pd, pn, pm);
1120     tcg_gen_and_vec(vece, pd, pd, pg);
1121 }
1122
1123 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1124 {
1125     static const GVecGen4 op = {
1126         .fni8 = gen_and_pg_i64,
1127         .fniv = gen_and_pg_vec,
1128         .fno = gen_helper_sve_and_pppp,
1129         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1130     };
1131     if (a->s) {
1132         return do_pppp_flags(s, a, &op);
1133     } else if (a->rn == a->rm) {
1134         if (a->pg == a->rn) {
1135             return do_mov_p(s, a->rd, a->rn);
1136         } else {
1137             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1138         }
1139     } else if (a->pg == a->rn || a->pg == a->rm) {
1140         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1141     } else {
1142         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1143     }
1144 }
1145
1146 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1147 {
1148     tcg_gen_andc_i64(pd, pn, pm);
1149     tcg_gen_and_i64(pd, pd, pg);
1150 }
1151
1152 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1153                            TCGv_vec pm, TCGv_vec pg)
1154 {
1155     tcg_gen_andc_vec(vece, pd, pn, pm);
1156     tcg_gen_and_vec(vece, pd, pd, pg);
1157 }
1158
1159 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1160 {
1161     static const GVecGen4 op = {
1162         .fni8 = gen_bic_pg_i64,
1163         .fniv = gen_bic_pg_vec,
1164         .fno = gen_helper_sve_bic_pppp,
1165         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1166     };
1167     if (a->s) {
1168         return do_pppp_flags(s, a, &op);
1169     } else if (a->pg == a->rn) {
1170         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1171     } else {
1172         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1173     }
1174 }
1175
1176 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1177 {
1178     tcg_gen_xor_i64(pd, pn, pm);
1179     tcg_gen_and_i64(pd, pd, pg);
1180 }
1181
1182 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1183                            TCGv_vec pm, TCGv_vec pg)
1184 {
1185     tcg_gen_xor_vec(vece, pd, pn, pm);
1186     tcg_gen_and_vec(vece, pd, pd, pg);
1187 }
1188
1189 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1190 {
1191     static const GVecGen4 op = {
1192         .fni8 = gen_eor_pg_i64,
1193         .fniv = gen_eor_pg_vec,
1194         .fno = gen_helper_sve_eor_pppp,
1195         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1196     };
1197     if (a->s) {
1198         return do_pppp_flags(s, a, &op);
1199     } else {
1200         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1201     }
1202 }
1203
1204 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1205 {
1206     tcg_gen_and_i64(pn, pn, pg);
1207     tcg_gen_andc_i64(pm, pm, pg);
1208     tcg_gen_or_i64(pd, pn, pm);
1209 }
1210
1211 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1212                            TCGv_vec pm, TCGv_vec pg)
1213 {
1214     tcg_gen_and_vec(vece, pn, pn, pg);
1215     tcg_gen_andc_vec(vece, pm, pm, pg);
1216     tcg_gen_or_vec(vece, pd, pn, pm);
1217 }
1218
1219 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1220 {
1221     static const GVecGen4 op = {
1222         .fni8 = gen_sel_pg_i64,
1223         .fniv = gen_sel_pg_vec,
1224         .fno = gen_helper_sve_sel_pppp,
1225         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1226     };
1227     if (a->s) {
1228         return false;
1229     } else {
1230         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1231     }
1232 }
1233
1234 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1235 {
1236     tcg_gen_or_i64(pd, pn, pm);
1237     tcg_gen_and_i64(pd, pd, pg);
1238 }
1239
1240 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1241                            TCGv_vec pm, TCGv_vec pg)
1242 {
1243     tcg_gen_or_vec(vece, pd, pn, pm);
1244     tcg_gen_and_vec(vece, pd, pd, pg);
1245 }
1246
1247 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1248 {
1249     static const GVecGen4 op = {
1250         .fni8 = gen_orr_pg_i64,
1251         .fniv = gen_orr_pg_vec,
1252         .fno = gen_helper_sve_orr_pppp,
1253         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1254     };
1255     if (a->s) {
1256         return do_pppp_flags(s, a, &op);
1257     } else if (a->pg == a->rn && a->rn == a->rm) {
1258         return do_mov_p(s, a->rd, a->rn);
1259     } else {
1260         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1261     }
1262 }
1263
1264 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1265 {
1266     tcg_gen_orc_i64(pd, pn, pm);
1267     tcg_gen_and_i64(pd, pd, pg);
1268 }
1269
1270 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1271                            TCGv_vec pm, TCGv_vec pg)
1272 {
1273     tcg_gen_orc_vec(vece, pd, pn, pm);
1274     tcg_gen_and_vec(vece, pd, pd, pg);
1275 }
1276
1277 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1278 {
1279     static const GVecGen4 op = {
1280         .fni8 = gen_orn_pg_i64,
1281         .fniv = gen_orn_pg_vec,
1282         .fno = gen_helper_sve_orn_pppp,
1283         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1284     };
1285     if (a->s) {
1286         return do_pppp_flags(s, a, &op);
1287     } else {
1288         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1289     }
1290 }
1291
1292 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1293 {
1294     tcg_gen_or_i64(pd, pn, pm);
1295     tcg_gen_andc_i64(pd, pg, pd);
1296 }
1297
1298 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1299                            TCGv_vec pm, TCGv_vec pg)
1300 {
1301     tcg_gen_or_vec(vece, pd, pn, pm);
1302     tcg_gen_andc_vec(vece, pd, pg, pd);
1303 }
1304
1305 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1306 {
1307     static const GVecGen4 op = {
1308         .fni8 = gen_nor_pg_i64,
1309         .fniv = gen_nor_pg_vec,
1310         .fno = gen_helper_sve_nor_pppp,
1311         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1312     };
1313     if (a->s) {
1314         return do_pppp_flags(s, a, &op);
1315     } else {
1316         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1317     }
1318 }
1319
1320 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1321 {
1322     tcg_gen_and_i64(pd, pn, pm);
1323     tcg_gen_andc_i64(pd, pg, pd);
1324 }
1325
1326 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1327                            TCGv_vec pm, TCGv_vec pg)
1328 {
1329     tcg_gen_and_vec(vece, pd, pn, pm);
1330     tcg_gen_andc_vec(vece, pd, pg, pd);
1331 }
1332
1333 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1334 {
1335     static const GVecGen4 op = {
1336         .fni8 = gen_nand_pg_i64,
1337         .fniv = gen_nand_pg_vec,
1338         .fno = gen_helper_sve_nand_pppp,
1339         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1340     };
1341     if (a->s) {
1342         return do_pppp_flags(s, a, &op);
1343     } else {
1344         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1345     }
1346 }
1347
1348 /*
1349  *** SVE Predicate Misc Group
1350  */
1351
1352 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1353 {
1354     if (sve_access_check(s)) {
1355         int nofs = pred_full_reg_offset(s, a->rn);
1356         int gofs = pred_full_reg_offset(s, a->pg);
1357         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1358
1359         if (words == 1) {
1360             TCGv_i64 pn = tcg_temp_new_i64();
1361             TCGv_i64 pg = tcg_temp_new_i64();
1362
1363             tcg_gen_ld_i64(pn, cpu_env, nofs);
1364             tcg_gen_ld_i64(pg, cpu_env, gofs);
1365             do_predtest1(pn, pg);
1366
1367             tcg_temp_free_i64(pn);
1368             tcg_temp_free_i64(pg);
1369         } else {
1370             do_predtest(s, nofs, gofs, words);
1371         }
1372     }
1373     return true;
1374 }
1375
1376 /* See the ARM pseudocode DecodePredCount.  */
1377 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1378 {
1379     unsigned elements = fullsz >> esz;
1380     unsigned bound;
1381
1382     switch (pattern) {
1383     case 0x0: /* POW2 */
1384         return pow2floor(elements);
1385     case 0x1: /* VL1 */
1386     case 0x2: /* VL2 */
1387     case 0x3: /* VL3 */
1388     case 0x4: /* VL4 */
1389     case 0x5: /* VL5 */
1390     case 0x6: /* VL6 */
1391     case 0x7: /* VL7 */
1392     case 0x8: /* VL8 */
1393         bound = pattern;
1394         break;
1395     case 0x9: /* VL16 */
1396     case 0xa: /* VL32 */
1397     case 0xb: /* VL64 */
1398     case 0xc: /* VL128 */
1399     case 0xd: /* VL256 */
1400         bound = 16 << (pattern - 9);
1401         break;
1402     case 0x1d: /* MUL4 */
1403         return elements - elements % 4;
1404     case 0x1e: /* MUL3 */
1405         return elements - elements % 3;
1406     case 0x1f: /* ALL */
1407         return elements;
1408     default:   /* #uimm5 */
1409         return 0;
1410     }
1411     return elements >= bound ? bound : 0;
1412 }
1413
1414 /* This handles all of the predicate initialization instructions,
1415  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1416  * so that decode_pred_count returns 0.  For SETFFR, we will have
1417  * set RD == 16 == FFR.
1418  */
1419 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1420 {
1421     if (!sve_access_check(s)) {
1422         return true;
1423     }
1424
1425     unsigned fullsz = vec_full_reg_size(s);
1426     unsigned ofs = pred_full_reg_offset(s, rd);
1427     unsigned numelem, setsz, i;
1428     uint64_t word, lastword;
1429     TCGv_i64 t;
1430
1431     numelem = decode_pred_count(fullsz, pat, esz);
1432
1433     /* Determine what we must store into each bit, and how many.  */
1434     if (numelem == 0) {
1435         lastword = word = 0;
1436         setsz = fullsz;
1437     } else {
1438         setsz = numelem << esz;
1439         lastword = word = pred_esz_masks[esz];
1440         if (setsz % 64) {
1441             lastword &= ~(-1ull << (setsz % 64));
1442         }
1443     }
1444
1445     t = tcg_temp_new_i64();
1446     if (fullsz <= 64) {
1447         tcg_gen_movi_i64(t, lastword);
1448         tcg_gen_st_i64(t, cpu_env, ofs);
1449         goto done;
1450     }
1451
1452     if (word == lastword) {
1453         unsigned maxsz = size_for_gvec(fullsz / 8);
1454         unsigned oprsz = size_for_gvec(setsz / 8);
1455
1456         if (oprsz * 8 == setsz) {
1457             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1458             goto done;
1459         }
1460         if (oprsz * 8 == setsz + 8) {
1461             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1462             tcg_gen_movi_i64(t, 0);
1463             tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1464             goto done;
1465         }
1466     }
1467
1468     setsz /= 8;
1469     fullsz /= 8;
1470
1471     tcg_gen_movi_i64(t, word);
1472     for (i = 0; i < setsz; i += 8) {
1473         tcg_gen_st_i64(t, cpu_env, ofs + i);
1474     }
1475     if (lastword != word) {
1476         tcg_gen_movi_i64(t, lastword);
1477         tcg_gen_st_i64(t, cpu_env, ofs + i);
1478         i += 8;
1479     }
1480     if (i < fullsz) {
1481         tcg_gen_movi_i64(t, 0);
1482         for (; i < fullsz; i += 8) {
1483             tcg_gen_st_i64(t, cpu_env, ofs + i);
1484         }
1485     }
1486
1487  done:
1488     tcg_temp_free_i64(t);
1489
1490     /* PTRUES */
1491     if (setflag) {
1492         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1493         tcg_gen_movi_i32(cpu_CF, word == 0);
1494         tcg_gen_movi_i32(cpu_VF, 0);
1495         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1496     }
1497     return true;
1498 }
1499
1500 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1501 {
1502     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1503 }
1504
1505 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1506 {
1507     /* Note pat == 31 is #all, to set all elements.  */
1508     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1509 }
1510
1511 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1512 {
1513     /* Note pat == 32 is #unimp, to set no elements.  */
1514     return do_predset(s, 0, a->rd, 32, false);
1515 }
1516
1517 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1518 {
1519     /* The path through do_pppp_flags is complicated enough to want to avoid
1520      * duplication.  Frob the arguments into the form of a predicated AND.
1521      */
1522     arg_rprr_s alt_a = {
1523         .rd = a->rd, .pg = a->pg, .s = a->s,
1524         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1525     };
1526     return trans_AND_pppp(s, &alt_a, insn);
1527 }
1528
1529 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1530 {
1531     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1532 }
1533
1534 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1535 {
1536     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1537 }
1538
1539 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1540                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1541                                            TCGv_ptr, TCGv_i32))
1542 {
1543     if (!sve_access_check(s)) {
1544         return true;
1545     }
1546
1547     TCGv_ptr t_pd = tcg_temp_new_ptr();
1548     TCGv_ptr t_pg = tcg_temp_new_ptr();
1549     TCGv_i32 t;
1550     unsigned desc;
1551
1552     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1553     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1554
1555     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1556     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1557     t = tcg_const_i32(desc);
1558
1559     gen_fn(t, t_pd, t_pg, t);
1560     tcg_temp_free_ptr(t_pd);
1561     tcg_temp_free_ptr(t_pg);
1562
1563     do_pred_flags(t);
1564     tcg_temp_free_i32(t);
1565     return true;
1566 }
1567
1568 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1569 {
1570     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1571 }
1572
1573 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1574 {
1575     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1576 }
1577
1578 /*
1579  *** SVE Element Count Group
1580  */
1581
1582 /* Perform an inline saturating addition of a 32-bit value within
1583  * a 64-bit register.  The second operand is known to be positive,
1584  * which halves the comparisions we must perform to bound the result.
1585  */
1586 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1587 {
1588     int64_t ibound;
1589     TCGv_i64 bound;
1590     TCGCond cond;
1591
1592     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1593     if (u) {
1594         tcg_gen_ext32u_i64(reg, reg);
1595     } else {
1596         tcg_gen_ext32s_i64(reg, reg);
1597     }
1598     if (d) {
1599         tcg_gen_sub_i64(reg, reg, val);
1600         ibound = (u ? 0 : INT32_MIN);
1601         cond = TCG_COND_LT;
1602     } else {
1603         tcg_gen_add_i64(reg, reg, val);
1604         ibound = (u ? UINT32_MAX : INT32_MAX);
1605         cond = TCG_COND_GT;
1606     }
1607     bound = tcg_const_i64(ibound);
1608     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1609     tcg_temp_free_i64(bound);
1610 }
1611
1612 /* Similarly with 64-bit values.  */
1613 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1614 {
1615     TCGv_i64 t0 = tcg_temp_new_i64();
1616     TCGv_i64 t1 = tcg_temp_new_i64();
1617     TCGv_i64 t2;
1618
1619     if (u) {
1620         if (d) {
1621             tcg_gen_sub_i64(t0, reg, val);
1622             tcg_gen_movi_i64(t1, 0);
1623             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1624         } else {
1625             tcg_gen_add_i64(t0, reg, val);
1626             tcg_gen_movi_i64(t1, -1);
1627             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1628         }
1629     } else {
1630         if (d) {
1631             /* Detect signed overflow for subtraction.  */
1632             tcg_gen_xor_i64(t0, reg, val);
1633             tcg_gen_sub_i64(t1, reg, val);
1634             tcg_gen_xor_i64(reg, reg, t0);
1635             tcg_gen_and_i64(t0, t0, reg);
1636
1637             /* Bound the result.  */
1638             tcg_gen_movi_i64(reg, INT64_MIN);
1639             t2 = tcg_const_i64(0);
1640             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1641         } else {
1642             /* Detect signed overflow for addition.  */
1643             tcg_gen_xor_i64(t0, reg, val);
1644             tcg_gen_add_i64(reg, reg, val);
1645             tcg_gen_xor_i64(t1, reg, val);
1646             tcg_gen_andc_i64(t0, t1, t0);
1647
1648             /* Bound the result.  */
1649             tcg_gen_movi_i64(t1, INT64_MAX);
1650             t2 = tcg_const_i64(0);
1651             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1652         }
1653         tcg_temp_free_i64(t2);
1654     }
1655     tcg_temp_free_i64(t0);
1656     tcg_temp_free_i64(t1);
1657 }
1658
1659 /* Similarly with a vector and a scalar operand.  */
1660 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1661                               TCGv_i64 val, bool u, bool d)
1662 {
1663     unsigned vsz = vec_full_reg_size(s);
1664     TCGv_ptr dptr, nptr;
1665     TCGv_i32 t32, desc;
1666     TCGv_i64 t64;
1667
1668     dptr = tcg_temp_new_ptr();
1669     nptr = tcg_temp_new_ptr();
1670     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1671     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1672     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1673
1674     switch (esz) {
1675     case MO_8:
1676         t32 = tcg_temp_new_i32();
1677         tcg_gen_extrl_i64_i32(t32, val);
1678         if (d) {
1679             tcg_gen_neg_i32(t32, t32);
1680         }
1681         if (u) {
1682             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1683         } else {
1684             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1685         }
1686         tcg_temp_free_i32(t32);
1687         break;
1688
1689     case MO_16:
1690         t32 = tcg_temp_new_i32();
1691         tcg_gen_extrl_i64_i32(t32, val);
1692         if (d) {
1693             tcg_gen_neg_i32(t32, t32);
1694         }
1695         if (u) {
1696             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1697         } else {
1698             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1699         }
1700         tcg_temp_free_i32(t32);
1701         break;
1702
1703     case MO_32:
1704         t64 = tcg_temp_new_i64();
1705         if (d) {
1706             tcg_gen_neg_i64(t64, val);
1707         } else {
1708             tcg_gen_mov_i64(t64, val);
1709         }
1710         if (u) {
1711             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1712         } else {
1713             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1714         }
1715         tcg_temp_free_i64(t64);
1716         break;
1717
1718     case MO_64:
1719         if (u) {
1720             if (d) {
1721                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1722             } else {
1723                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1724             }
1725         } else if (d) {
1726             t64 = tcg_temp_new_i64();
1727             tcg_gen_neg_i64(t64, val);
1728             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1729             tcg_temp_free_i64(t64);
1730         } else {
1731             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1732         }
1733         break;
1734
1735     default:
1736         g_assert_not_reached();
1737     }
1738
1739     tcg_temp_free_ptr(dptr);
1740     tcg_temp_free_ptr(nptr);
1741     tcg_temp_free_i32(desc);
1742 }
1743
1744 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1745 {
1746     if (sve_access_check(s)) {
1747         unsigned fullsz = vec_full_reg_size(s);
1748         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1749         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1750     }
1751     return true;
1752 }
1753
1754 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1755 {
1756     if (sve_access_check(s)) {
1757         unsigned fullsz = vec_full_reg_size(s);
1758         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1759         int inc = numelem * a->imm * (a->d ? -1 : 1);
1760         TCGv_i64 reg = cpu_reg(s, a->rd);
1761
1762         tcg_gen_addi_i64(reg, reg, inc);
1763     }
1764     return true;
1765 }
1766
1767 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1768                                uint32_t insn)
1769 {
1770     if (!sve_access_check(s)) {
1771         return true;
1772     }
1773
1774     unsigned fullsz = vec_full_reg_size(s);
1775     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1776     int inc = numelem * a->imm;
1777     TCGv_i64 reg = cpu_reg(s, a->rd);
1778
1779     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1780     if (inc == 0) {
1781         if (a->u) {
1782             tcg_gen_ext32u_i64(reg, reg);
1783         } else {
1784             tcg_gen_ext32s_i64(reg, reg);
1785         }
1786     } else {
1787         TCGv_i64 t = tcg_const_i64(inc);
1788         do_sat_addsub_32(reg, t, a->u, a->d);
1789         tcg_temp_free_i64(t);
1790     }
1791     return true;
1792 }
1793
1794 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1795                                uint32_t insn)
1796 {
1797     if (!sve_access_check(s)) {
1798         return true;
1799     }
1800
1801     unsigned fullsz = vec_full_reg_size(s);
1802     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1803     int inc = numelem * a->imm;
1804     TCGv_i64 reg = cpu_reg(s, a->rd);
1805
1806     if (inc != 0) {
1807         TCGv_i64 t = tcg_const_i64(inc);
1808         do_sat_addsub_64(reg, t, a->u, a->d);
1809         tcg_temp_free_i64(t);
1810     }
1811     return true;
1812 }
1813
1814 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1815 {
1816     if (a->esz == 0) {
1817         return false;
1818     }
1819
1820     unsigned fullsz = vec_full_reg_size(s);
1821     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1822     int inc = numelem * a->imm;
1823
1824     if (inc != 0) {
1825         if (sve_access_check(s)) {
1826             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1827             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1828                               vec_full_reg_offset(s, a->rn),
1829                               t, fullsz, fullsz);
1830             tcg_temp_free_i64(t);
1831         }
1832     } else {
1833         do_mov_z(s, a->rd, a->rn);
1834     }
1835     return true;
1836 }
1837
1838 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1839                             uint32_t insn)
1840 {
1841     if (a->esz == 0) {
1842         return false;
1843     }
1844
1845     unsigned fullsz = vec_full_reg_size(s);
1846     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1847     int inc = numelem * a->imm;
1848
1849     if (inc != 0) {
1850         if (sve_access_check(s)) {
1851             TCGv_i64 t = tcg_const_i64(inc);
1852             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1853             tcg_temp_free_i64(t);
1854         }
1855     } else {
1856         do_mov_z(s, a->rd, a->rn);
1857     }
1858     return true;
1859 }
1860
1861 /*
1862  *** SVE Bitwise Immediate Group
1863  */
1864
1865 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1866 {
1867     uint64_t imm;
1868     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1869                                 extract32(a->dbm, 0, 6),
1870                                 extract32(a->dbm, 6, 6))) {
1871         return false;
1872     }
1873     if (sve_access_check(s)) {
1874         unsigned vsz = vec_full_reg_size(s);
1875         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1876                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1877     }
1878     return true;
1879 }
1880
1881 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1882 {
1883     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1884 }
1885
1886 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1887 {
1888     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1889 }
1890
1891 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1892 {
1893     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1894 }
1895
1896 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1897 {
1898     uint64_t imm;
1899     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1900                                 extract32(a->dbm, 0, 6),
1901                                 extract32(a->dbm, 6, 6))) {
1902         return false;
1903     }
1904     if (sve_access_check(s)) {
1905         do_dupi_z(s, a->rd, imm);
1906     }
1907     return true;
1908 }
1909
1910 /*
1911  *** SVE Integer Wide Immediate - Predicated Group
1912  */
1913
1914 /* Implement all merging copies.  This is used for CPY (immediate),
1915  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1916  */
1917 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1918                      TCGv_i64 val)
1919 {
1920     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1921     static gen_cpy * const fns[4] = {
1922         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1923         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1924     };
1925     unsigned vsz = vec_full_reg_size(s);
1926     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1927     TCGv_ptr t_zd = tcg_temp_new_ptr();
1928     TCGv_ptr t_zn = tcg_temp_new_ptr();
1929     TCGv_ptr t_pg = tcg_temp_new_ptr();
1930
1931     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1932     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1933     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1934
1935     fns[esz](t_zd, t_zn, t_pg, val, desc);
1936
1937     tcg_temp_free_ptr(t_zd);
1938     tcg_temp_free_ptr(t_zn);
1939     tcg_temp_free_ptr(t_pg);
1940     tcg_temp_free_i32(desc);
1941 }
1942
1943 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1944 {
1945     if (a->esz == 0) {
1946         return false;
1947     }
1948     if (sve_access_check(s)) {
1949         /* Decode the VFP immediate.  */
1950         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1951         TCGv_i64 t_imm = tcg_const_i64(imm);
1952         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1953         tcg_temp_free_i64(t_imm);
1954     }
1955     return true;
1956 }
1957
1958 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1959 {
1960     if (a->esz == 0 && extract32(insn, 13, 1)) {
1961         return false;
1962     }
1963     if (sve_access_check(s)) {
1964         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1965         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1966         tcg_temp_free_i64(t_imm);
1967     }
1968     return true;
1969 }
1970
1971 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1972 {
1973     static gen_helper_gvec_2i * const fns[4] = {
1974         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1975         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1976     };
1977
1978     if (a->esz == 0 && extract32(insn, 13, 1)) {
1979         return false;
1980     }
1981     if (sve_access_check(s)) {
1982         unsigned vsz = vec_full_reg_size(s);
1983         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1984         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1985                             pred_full_reg_offset(s, a->pg),
1986                             t_imm, vsz, vsz, 0, fns[a->esz]);
1987         tcg_temp_free_i64(t_imm);
1988     }
1989     return true;
1990 }
1991
1992 /*
1993  *** SVE Permute Extract Group
1994  */
1995
1996 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1997 {
1998     if (!sve_access_check(s)) {
1999         return true;
2000     }
2001
2002     unsigned vsz = vec_full_reg_size(s);
2003     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
2004     unsigned n_siz = vsz - n_ofs;
2005     unsigned d = vec_full_reg_offset(s, a->rd);
2006     unsigned n = vec_full_reg_offset(s, a->rn);
2007     unsigned m = vec_full_reg_offset(s, a->rm);
2008
2009     /* Use host vector move insns if we have appropriate sizes
2010      * and no unfortunate overlap.
2011      */
2012     if (m != d
2013         && n_ofs == size_for_gvec(n_ofs)
2014         && n_siz == size_for_gvec(n_siz)
2015         && (d != n || n_siz <= n_ofs)) {
2016         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2017         if (n_ofs != 0) {
2018             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2019         }
2020     } else {
2021         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2022     }
2023     return true;
2024 }
2025
2026 /*
2027  *** SVE Permute - Unpredicated Group
2028  */
2029
2030 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2031 {
2032     if (sve_access_check(s)) {
2033         unsigned vsz = vec_full_reg_size(s);
2034         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2035                              vsz, vsz, cpu_reg_sp(s, a->rn));
2036     }
2037     return true;
2038 }
2039
2040 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2041 {
2042     if ((a->imm & 0x1f) == 0) {
2043         return false;
2044     }
2045     if (sve_access_check(s)) {
2046         unsigned vsz = vec_full_reg_size(s);
2047         unsigned dofs = vec_full_reg_offset(s, a->rd);
2048         unsigned esz, index;
2049
2050         esz = ctz32(a->imm);
2051         index = a->imm >> (esz + 1);
2052
2053         if ((index << esz) < vsz) {
2054             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2055             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2056         } else {
2057             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2058         }
2059     }
2060     return true;
2061 }
2062
2063 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2064 {
2065     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2066     static gen_insr * const fns[4] = {
2067         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2068         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2069     };
2070     unsigned vsz = vec_full_reg_size(s);
2071     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2072     TCGv_ptr t_zd = tcg_temp_new_ptr();
2073     TCGv_ptr t_zn = tcg_temp_new_ptr();
2074
2075     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2076     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2077
2078     fns[a->esz](t_zd, t_zn, val, desc);
2079
2080     tcg_temp_free_ptr(t_zd);
2081     tcg_temp_free_ptr(t_zn);
2082     tcg_temp_free_i32(desc);
2083 }
2084
2085 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2086 {
2087     if (sve_access_check(s)) {
2088         TCGv_i64 t = tcg_temp_new_i64();
2089         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2090         do_insr_i64(s, a, t);
2091         tcg_temp_free_i64(t);
2092     }
2093     return true;
2094 }
2095
2096 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2097 {
2098     if (sve_access_check(s)) {
2099         do_insr_i64(s, a, cpu_reg(s, a->rm));
2100     }
2101     return true;
2102 }
2103
2104 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2105 {
2106     static gen_helper_gvec_2 * const fns[4] = {
2107         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2108         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2109     };
2110
2111     if (sve_access_check(s)) {
2112         unsigned vsz = vec_full_reg_size(s);
2113         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2114                            vec_full_reg_offset(s, a->rn),
2115                            vsz, vsz, 0, fns[a->esz]);
2116     }
2117     return true;
2118 }
2119
2120 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2121 {
2122     static gen_helper_gvec_3 * const fns[4] = {
2123         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2124         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2125     };
2126
2127     if (sve_access_check(s)) {
2128         unsigned vsz = vec_full_reg_size(s);
2129         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2130                            vec_full_reg_offset(s, a->rn),
2131                            vec_full_reg_offset(s, a->rm),
2132                            vsz, vsz, 0, fns[a->esz]);
2133     }
2134     return true;
2135 }
2136
2137 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2138 {
2139     static gen_helper_gvec_2 * const fns[4][2] = {
2140         { NULL, NULL },
2141         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2142         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2143         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2144     };
2145
2146     if (a->esz == 0) {
2147         return false;
2148     }
2149     if (sve_access_check(s)) {
2150         unsigned vsz = vec_full_reg_size(s);
2151         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2152                            vec_full_reg_offset(s, a->rn)
2153                            + (a->h ? vsz / 2 : 0),
2154                            vsz, vsz, 0, fns[a->esz][a->u]);
2155     }
2156     return true;
2157 }
2158
2159 /*
2160  *** SVE Permute - Predicates Group
2161  */
2162
2163 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2164                           gen_helper_gvec_3 *fn)
2165 {
2166     if (!sve_access_check(s)) {
2167         return true;
2168     }
2169
2170     unsigned vsz = pred_full_reg_size(s);
2171
2172     /* Predicate sizes may be smaller and cannot use simd_desc.
2173        We cannot round up, as we do elsewhere, because we need
2174        the exact size for ZIP2 and REV.  We retain the style for
2175        the other helpers for consistency.  */
2176     TCGv_ptr t_d = tcg_temp_new_ptr();
2177     TCGv_ptr t_n = tcg_temp_new_ptr();
2178     TCGv_ptr t_m = tcg_temp_new_ptr();
2179     TCGv_i32 t_desc;
2180     int desc;
2181
2182     desc = vsz - 2;
2183     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2184     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2185
2186     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2187     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2188     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2189     t_desc = tcg_const_i32(desc);
2190
2191     fn(t_d, t_n, t_m, t_desc);
2192
2193     tcg_temp_free_ptr(t_d);
2194     tcg_temp_free_ptr(t_n);
2195     tcg_temp_free_ptr(t_m);
2196     tcg_temp_free_i32(t_desc);
2197     return true;
2198 }
2199
2200 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2201                           gen_helper_gvec_2 *fn)
2202 {
2203     if (!sve_access_check(s)) {
2204         return true;
2205     }
2206
2207     unsigned vsz = pred_full_reg_size(s);
2208     TCGv_ptr t_d = tcg_temp_new_ptr();
2209     TCGv_ptr t_n = tcg_temp_new_ptr();
2210     TCGv_i32 t_desc;
2211     int desc;
2212
2213     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2214     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2215
2216     /* Predicate sizes may be smaller and cannot use simd_desc.
2217        We cannot round up, as we do elsewhere, because we need
2218        the exact size for ZIP2 and REV.  We retain the style for
2219        the other helpers for consistency.  */
2220
2221     desc = vsz - 2;
2222     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2223     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2224     t_desc = tcg_const_i32(desc);
2225
2226     fn(t_d, t_n, t_desc);
2227
2228     tcg_temp_free_i32(t_desc);
2229     tcg_temp_free_ptr(t_d);
2230     tcg_temp_free_ptr(t_n);
2231     return true;
2232 }
2233
2234 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2235 {
2236     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2237 }
2238
2239 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2240 {
2241     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2242 }
2243
2244 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2245 {
2246     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2247 }
2248
2249 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2250 {
2251     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2252 }
2253
2254 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2255 {
2256     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2257 }
2258
2259 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2260 {
2261     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2262 }
2263
2264 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2265 {
2266     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2267 }
2268
2269 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2270 {
2271     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2272 }
2273
2274 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2275 {
2276     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2277 }
2278
2279 /*
2280  *** SVE Permute - Interleaving Group
2281  */
2282
2283 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2284 {
2285     static gen_helper_gvec_3 * const fns[4] = {
2286         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2287         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2288     };
2289
2290     if (sve_access_check(s)) {
2291         unsigned vsz = vec_full_reg_size(s);
2292         unsigned high_ofs = high ? vsz / 2 : 0;
2293         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2294                            vec_full_reg_offset(s, a->rn) + high_ofs,
2295                            vec_full_reg_offset(s, a->rm) + high_ofs,
2296                            vsz, vsz, 0, fns[a->esz]);
2297     }
2298     return true;
2299 }
2300
2301 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2302                             gen_helper_gvec_3 *fn)
2303 {
2304     if (sve_access_check(s)) {
2305         unsigned vsz = vec_full_reg_size(s);
2306         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2307                            vec_full_reg_offset(s, a->rn),
2308                            vec_full_reg_offset(s, a->rm),
2309                            vsz, vsz, data, fn);
2310     }
2311     return true;
2312 }
2313
2314 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2315 {
2316     return do_zip(s, a, false);
2317 }
2318
2319 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2320 {
2321     return do_zip(s, a, true);
2322 }
2323
2324 static gen_helper_gvec_3 * const uzp_fns[4] = {
2325     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2326     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2327 };
2328
2329 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2330 {
2331     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2332 }
2333
2334 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2335 {
2336     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2337 }
2338
2339 static gen_helper_gvec_3 * const trn_fns[4] = {
2340     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2341     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2342 };
2343
2344 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2345 {
2346     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2347 }
2348
2349 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2350 {
2351     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2352 }
2353
2354 /*
2355  *** SVE Permute Vector - Predicated Group
2356  */
2357
2358 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2359 {
2360     static gen_helper_gvec_3 * const fns[4] = {
2361         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2362     };
2363     return do_zpz_ool(s, a, fns[a->esz]);
2364 }
2365
2366 /* Call the helper that computes the ARM LastActiveElement pseudocode
2367  * function, scaled by the element size.  This includes the not found
2368  * indication; e.g. not found for esz=3 is -8.
2369  */
2370 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2371 {
2372     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2373      * round up, as we do elsewhere, because we need the exact size.
2374      */
2375     TCGv_ptr t_p = tcg_temp_new_ptr();
2376     TCGv_i32 t_desc;
2377     unsigned vsz = pred_full_reg_size(s);
2378     unsigned desc;
2379
2380     desc = vsz - 2;
2381     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2382
2383     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2384     t_desc = tcg_const_i32(desc);
2385
2386     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2387
2388     tcg_temp_free_i32(t_desc);
2389     tcg_temp_free_ptr(t_p);
2390 }
2391
2392 /* Increment LAST to the offset of the next element in the vector,
2393  * wrapping around to 0.
2394  */
2395 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2396 {
2397     unsigned vsz = vec_full_reg_size(s);
2398
2399     tcg_gen_addi_i32(last, last, 1 << esz);
2400     if (is_power_of_2(vsz)) {
2401         tcg_gen_andi_i32(last, last, vsz - 1);
2402     } else {
2403         TCGv_i32 max = tcg_const_i32(vsz);
2404         TCGv_i32 zero = tcg_const_i32(0);
2405         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2406         tcg_temp_free_i32(max);
2407         tcg_temp_free_i32(zero);
2408     }
2409 }
2410
2411 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2412 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2413 {
2414     unsigned vsz = vec_full_reg_size(s);
2415
2416     if (is_power_of_2(vsz)) {
2417         tcg_gen_andi_i32(last, last, vsz - 1);
2418     } else {
2419         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2420         TCGv_i32 zero = tcg_const_i32(0);
2421         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2422         tcg_temp_free_i32(max);
2423         tcg_temp_free_i32(zero);
2424     }
2425 }
2426
2427 /* Load an unsigned element of ESZ from BASE+OFS.  */
2428 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2429 {
2430     TCGv_i64 r = tcg_temp_new_i64();
2431
2432     switch (esz) {
2433     case 0:
2434         tcg_gen_ld8u_i64(r, base, ofs);
2435         break;
2436     case 1:
2437         tcg_gen_ld16u_i64(r, base, ofs);
2438         break;
2439     case 2:
2440         tcg_gen_ld32u_i64(r, base, ofs);
2441         break;
2442     case 3:
2443         tcg_gen_ld_i64(r, base, ofs);
2444         break;
2445     default:
2446         g_assert_not_reached();
2447     }
2448     return r;
2449 }
2450
2451 /* Load an unsigned element of ESZ from RM[LAST].  */
2452 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2453                                  int rm, int esz)
2454 {
2455     TCGv_ptr p = tcg_temp_new_ptr();
2456     TCGv_i64 r;
2457
2458     /* Convert offset into vector into offset into ENV.
2459      * The final adjustment for the vector register base
2460      * is added via constant offset to the load.
2461      */
2462 #ifdef HOST_WORDS_BIGENDIAN
2463     /* Adjust for element ordering.  See vec_reg_offset.  */
2464     if (esz < 3) {
2465         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2466     }
2467 #endif
2468     tcg_gen_ext_i32_ptr(p, last);
2469     tcg_gen_add_ptr(p, p, cpu_env);
2470
2471     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2472     tcg_temp_free_ptr(p);
2473
2474     return r;
2475 }
2476
2477 /* Compute CLAST for a Zreg.  */
2478 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2479 {
2480     TCGv_i32 last;
2481     TCGLabel *over;
2482     TCGv_i64 ele;
2483     unsigned vsz, esz = a->esz;
2484
2485     if (!sve_access_check(s)) {
2486         return true;
2487     }
2488
2489     last = tcg_temp_local_new_i32();
2490     over = gen_new_label();
2491
2492     find_last_active(s, last, esz, a->pg);
2493
2494     /* There is of course no movcond for a 2048-bit vector,
2495      * so we must branch over the actual store.
2496      */
2497     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2498
2499     if (!before) {
2500         incr_last_active(s, last, esz);
2501     }
2502
2503     ele = load_last_active(s, last, a->rm, esz);
2504     tcg_temp_free_i32(last);
2505
2506     vsz = vec_full_reg_size(s);
2507     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2508     tcg_temp_free_i64(ele);
2509
2510     /* If this insn used MOVPRFX, we may need a second move.  */
2511     if (a->rd != a->rn) {
2512         TCGLabel *done = gen_new_label();
2513         tcg_gen_br(done);
2514
2515         gen_set_label(over);
2516         do_mov_z(s, a->rd, a->rn);
2517
2518         gen_set_label(done);
2519     } else {
2520         gen_set_label(over);
2521     }
2522     return true;
2523 }
2524
2525 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2526 {
2527     return do_clast_vector(s, a, false);
2528 }
2529
2530 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2531 {
2532     return do_clast_vector(s, a, true);
2533 }
2534
2535 /* Compute CLAST for a scalar.  */
2536 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2537                             bool before, TCGv_i64 reg_val)
2538 {
2539     TCGv_i32 last = tcg_temp_new_i32();
2540     TCGv_i64 ele, cmp, zero;
2541
2542     find_last_active(s, last, esz, pg);
2543
2544     /* Extend the original value of last prior to incrementing.  */
2545     cmp = tcg_temp_new_i64();
2546     tcg_gen_ext_i32_i64(cmp, last);
2547
2548     if (!before) {
2549         incr_last_active(s, last, esz);
2550     }
2551
2552     /* The conceit here is that while last < 0 indicates not found, after
2553      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2554      * from which we can load garbage.  We then discard the garbage with
2555      * a conditional move.
2556      */
2557     ele = load_last_active(s, last, rm, esz);
2558     tcg_temp_free_i32(last);
2559
2560     zero = tcg_const_i64(0);
2561     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2562
2563     tcg_temp_free_i64(zero);
2564     tcg_temp_free_i64(cmp);
2565     tcg_temp_free_i64(ele);
2566 }
2567
2568 /* Compute CLAST for a Vreg.  */
2569 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2570 {
2571     if (sve_access_check(s)) {
2572         int esz = a->esz;
2573         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2574         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2575
2576         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2577         write_fp_dreg(s, a->rd, reg);
2578         tcg_temp_free_i64(reg);
2579     }
2580     return true;
2581 }
2582
2583 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2584 {
2585     return do_clast_fp(s, a, false);
2586 }
2587
2588 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2589 {
2590     return do_clast_fp(s, a, true);
2591 }
2592
2593 /* Compute CLAST for a Xreg.  */
2594 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2595 {
2596     TCGv_i64 reg;
2597
2598     if (!sve_access_check(s)) {
2599         return true;
2600     }
2601
2602     reg = cpu_reg(s, a->rd);
2603     switch (a->esz) {
2604     case 0:
2605         tcg_gen_ext8u_i64(reg, reg);
2606         break;
2607     case 1:
2608         tcg_gen_ext16u_i64(reg, reg);
2609         break;
2610     case 2:
2611         tcg_gen_ext32u_i64(reg, reg);
2612         break;
2613     case 3:
2614         break;
2615     default:
2616         g_assert_not_reached();
2617     }
2618
2619     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2620     return true;
2621 }
2622
2623 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2624 {
2625     return do_clast_general(s, a, false);
2626 }
2627
2628 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2629 {
2630     return do_clast_general(s, a, true);
2631 }
2632
2633 /* Compute LAST for a scalar.  */
2634 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2635                                int pg, int rm, bool before)
2636 {
2637     TCGv_i32 last = tcg_temp_new_i32();
2638     TCGv_i64 ret;
2639
2640     find_last_active(s, last, esz, pg);
2641     if (before) {
2642         wrap_last_active(s, last, esz);
2643     } else {
2644         incr_last_active(s, last, esz);
2645     }
2646
2647     ret = load_last_active(s, last, rm, esz);
2648     tcg_temp_free_i32(last);
2649     return ret;
2650 }
2651
2652 /* Compute LAST for a Vreg.  */
2653 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2654 {
2655     if (sve_access_check(s)) {
2656         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2657         write_fp_dreg(s, a->rd, val);
2658         tcg_temp_free_i64(val);
2659     }
2660     return true;
2661 }
2662
2663 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2664 {
2665     return do_last_fp(s, a, false);
2666 }
2667
2668 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2669 {
2670     return do_last_fp(s, a, true);
2671 }
2672
2673 /* Compute LAST for a Xreg.  */
2674 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2675 {
2676     if (sve_access_check(s)) {
2677         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2678         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2679         tcg_temp_free_i64(val);
2680     }
2681     return true;
2682 }
2683
2684 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2685 {
2686     return do_last_general(s, a, false);
2687 }
2688
2689 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2690 {
2691     return do_last_general(s, a, true);
2692 }
2693
2694 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2695 {
2696     if (sve_access_check(s)) {
2697         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2698     }
2699     return true;
2700 }
2701
2702 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2703 {
2704     if (sve_access_check(s)) {
2705         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2706         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2707         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2708         tcg_temp_free_i64(t);
2709     }
2710     return true;
2711 }
2712
2713 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2714 {
2715     static gen_helper_gvec_3 * const fns[4] = {
2716         NULL,
2717         gen_helper_sve_revb_h,
2718         gen_helper_sve_revb_s,
2719         gen_helper_sve_revb_d,
2720     };
2721     return do_zpz_ool(s, a, fns[a->esz]);
2722 }
2723
2724 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2725 {
2726     static gen_helper_gvec_3 * const fns[4] = {
2727         NULL,
2728         NULL,
2729         gen_helper_sve_revh_s,
2730         gen_helper_sve_revh_d,
2731     };
2732     return do_zpz_ool(s, a, fns[a->esz]);
2733 }
2734
2735 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2736 {
2737     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2738 }
2739
2740 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2741 {
2742     static gen_helper_gvec_3 * const fns[4] = {
2743         gen_helper_sve_rbit_b,
2744         gen_helper_sve_rbit_h,
2745         gen_helper_sve_rbit_s,
2746         gen_helper_sve_rbit_d,
2747     };
2748     return do_zpz_ool(s, a, fns[a->esz]);
2749 }
2750
2751 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2752 {
2753     if (sve_access_check(s)) {
2754         unsigned vsz = vec_full_reg_size(s);
2755         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2756                            vec_full_reg_offset(s, a->rn),
2757                            vec_full_reg_offset(s, a->rm),
2758                            pred_full_reg_offset(s, a->pg),
2759                            vsz, vsz, a->esz, gen_helper_sve_splice);
2760     }
2761     return true;
2762 }
2763
2764 /*
2765  *** SVE Integer Compare - Vectors Group
2766  */
2767
2768 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2769                           gen_helper_gvec_flags_4 *gen_fn)
2770 {
2771     TCGv_ptr pd, zn, zm, pg;
2772     unsigned vsz;
2773     TCGv_i32 t;
2774
2775     if (gen_fn == NULL) {
2776         return false;
2777     }
2778     if (!sve_access_check(s)) {
2779         return true;
2780     }
2781
2782     vsz = vec_full_reg_size(s);
2783     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2784     pd = tcg_temp_new_ptr();
2785     zn = tcg_temp_new_ptr();
2786     zm = tcg_temp_new_ptr();
2787     pg = tcg_temp_new_ptr();
2788
2789     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2790     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2791     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2792     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2793
2794     gen_fn(t, pd, zn, zm, pg, t);
2795
2796     tcg_temp_free_ptr(pd);
2797     tcg_temp_free_ptr(zn);
2798     tcg_temp_free_ptr(zm);
2799     tcg_temp_free_ptr(pg);
2800
2801     do_pred_flags(t);
2802
2803     tcg_temp_free_i32(t);
2804     return true;
2805 }
2806
2807 #define DO_PPZZ(NAME, name) \
2808 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,         \
2809                                 uint32_t insn)                            \
2810 {                                                                         \
2811     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2812         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2813         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2814     };                                                                    \
2815     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2816 }
2817
2818 DO_PPZZ(CMPEQ, cmpeq)
2819 DO_PPZZ(CMPNE, cmpne)
2820 DO_PPZZ(CMPGT, cmpgt)
2821 DO_PPZZ(CMPGE, cmpge)
2822 DO_PPZZ(CMPHI, cmphi)
2823 DO_PPZZ(CMPHS, cmphs)
2824
2825 #undef DO_PPZZ
2826
2827 #define DO_PPZW(NAME, name) \
2828 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a,         \
2829                                 uint32_t insn)                            \
2830 {                                                                         \
2831     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2832         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2833         gen_helper_sve_##name##_ppzw_s, NULL                              \
2834     };                                                                    \
2835     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2836 }
2837
2838 DO_PPZW(CMPEQ, cmpeq)
2839 DO_PPZW(CMPNE, cmpne)
2840 DO_PPZW(CMPGT, cmpgt)
2841 DO_PPZW(CMPGE, cmpge)
2842 DO_PPZW(CMPHI, cmphi)
2843 DO_PPZW(CMPHS, cmphs)
2844 DO_PPZW(CMPLT, cmplt)
2845 DO_PPZW(CMPLE, cmple)
2846 DO_PPZW(CMPLO, cmplo)
2847 DO_PPZW(CMPLS, cmpls)
2848
2849 #undef DO_PPZW
2850
2851 /*
2852  *** SVE Integer Compare - Immediate Groups
2853  */
2854
2855 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2856                           gen_helper_gvec_flags_3 *gen_fn)
2857 {
2858     TCGv_ptr pd, zn, pg;
2859     unsigned vsz;
2860     TCGv_i32 t;
2861
2862     if (gen_fn == NULL) {
2863         return false;
2864     }
2865     if (!sve_access_check(s)) {
2866         return true;
2867     }
2868
2869     vsz = vec_full_reg_size(s);
2870     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2871     pd = tcg_temp_new_ptr();
2872     zn = tcg_temp_new_ptr();
2873     pg = tcg_temp_new_ptr();
2874
2875     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2876     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2877     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2878
2879     gen_fn(t, pd, zn, pg, t);
2880
2881     tcg_temp_free_ptr(pd);
2882     tcg_temp_free_ptr(zn);
2883     tcg_temp_free_ptr(pg);
2884
2885     do_pred_flags(t);
2886
2887     tcg_temp_free_i32(t);
2888     return true;
2889 }
2890
2891 #define DO_PPZI(NAME, name) \
2892 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a,         \
2893                                 uint32_t insn)                            \
2894 {                                                                         \
2895     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2896         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2897         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2898     };                                                                    \
2899     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2900 }
2901
2902 DO_PPZI(CMPEQ, cmpeq)
2903 DO_PPZI(CMPNE, cmpne)
2904 DO_PPZI(CMPGT, cmpgt)
2905 DO_PPZI(CMPGE, cmpge)
2906 DO_PPZI(CMPHI, cmphi)
2907 DO_PPZI(CMPHS, cmphs)
2908 DO_PPZI(CMPLT, cmplt)
2909 DO_PPZI(CMPLE, cmple)
2910 DO_PPZI(CMPLO, cmplo)
2911 DO_PPZI(CMPLS, cmpls)
2912
2913 #undef DO_PPZI
2914
2915 /*
2916  *** SVE Partition Break Group
2917  */
2918
2919 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2920                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2921 {
2922     if (!sve_access_check(s)) {
2923         return true;
2924     }
2925
2926     unsigned vsz = pred_full_reg_size(s);
2927
2928     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2929     TCGv_ptr d = tcg_temp_new_ptr();
2930     TCGv_ptr n = tcg_temp_new_ptr();
2931     TCGv_ptr m = tcg_temp_new_ptr();
2932     TCGv_ptr g = tcg_temp_new_ptr();
2933     TCGv_i32 t = tcg_const_i32(vsz - 2);
2934
2935     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2936     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2937     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2938     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2939
2940     if (a->s) {
2941         fn_s(t, d, n, m, g, t);
2942         do_pred_flags(t);
2943     } else {
2944         fn(d, n, m, g, t);
2945     }
2946     tcg_temp_free_ptr(d);
2947     tcg_temp_free_ptr(n);
2948     tcg_temp_free_ptr(m);
2949     tcg_temp_free_ptr(g);
2950     tcg_temp_free_i32(t);
2951     return true;
2952 }
2953
2954 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2955                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2956 {
2957     if (!sve_access_check(s)) {
2958         return true;
2959     }
2960
2961     unsigned vsz = pred_full_reg_size(s);
2962
2963     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2964     TCGv_ptr d = tcg_temp_new_ptr();
2965     TCGv_ptr n = tcg_temp_new_ptr();
2966     TCGv_ptr g = tcg_temp_new_ptr();
2967     TCGv_i32 t = tcg_const_i32(vsz - 2);
2968
2969     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2970     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2971     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2972
2973     if (a->s) {
2974         fn_s(t, d, n, g, t);
2975         do_pred_flags(t);
2976     } else {
2977         fn(d, n, g, t);
2978     }
2979     tcg_temp_free_ptr(d);
2980     tcg_temp_free_ptr(n);
2981     tcg_temp_free_ptr(g);
2982     tcg_temp_free_i32(t);
2983     return true;
2984 }
2985
2986 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2987 {
2988     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2989 }
2990
2991 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2992 {
2993     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2994 }
2995
2996 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2997 {
2998     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2999 }
3000
3001 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3002 {
3003     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3004 }
3005
3006 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3007 {
3008     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3009 }
3010
3011 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3012 {
3013     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3014 }
3015
3016 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3017 {
3018     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3019 }
3020
3021 /*
3022  *** SVE Predicate Count Group
3023  */
3024
3025 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3026 {
3027     unsigned psz = pred_full_reg_size(s);
3028
3029     if (psz <= 8) {
3030         uint64_t psz_mask;
3031
3032         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3033         if (pn != pg) {
3034             TCGv_i64 g = tcg_temp_new_i64();
3035             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3036             tcg_gen_and_i64(val, val, g);
3037             tcg_temp_free_i64(g);
3038         }
3039
3040         /* Reduce the pred_esz_masks value simply to reduce the
3041          * size of the code generated here.
3042          */
3043         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3044         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3045
3046         tcg_gen_ctpop_i64(val, val);
3047     } else {
3048         TCGv_ptr t_pn = tcg_temp_new_ptr();
3049         TCGv_ptr t_pg = tcg_temp_new_ptr();
3050         unsigned desc;
3051         TCGv_i32 t_desc;
3052
3053         desc = psz - 2;
3054         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3055
3056         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3057         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3058         t_desc = tcg_const_i32(desc);
3059
3060         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3061         tcg_temp_free_ptr(t_pn);
3062         tcg_temp_free_ptr(t_pg);
3063         tcg_temp_free_i32(t_desc);
3064     }
3065 }
3066
3067 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3068 {
3069     if (sve_access_check(s)) {
3070         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3071     }
3072     return true;
3073 }
3074
3075 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3076                             uint32_t insn)
3077 {
3078     if (sve_access_check(s)) {
3079         TCGv_i64 reg = cpu_reg(s, a->rd);
3080         TCGv_i64 val = tcg_temp_new_i64();
3081
3082         do_cntp(s, val, a->esz, a->pg, a->pg);
3083         if (a->d) {
3084             tcg_gen_sub_i64(reg, reg, val);
3085         } else {
3086             tcg_gen_add_i64(reg, reg, val);
3087         }
3088         tcg_temp_free_i64(val);
3089     }
3090     return true;
3091 }
3092
3093 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3094                             uint32_t insn)
3095 {
3096     if (a->esz == 0) {
3097         return false;
3098     }
3099     if (sve_access_check(s)) {
3100         unsigned vsz = vec_full_reg_size(s);
3101         TCGv_i64 val = tcg_temp_new_i64();
3102         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3103
3104         do_cntp(s, val, a->esz, a->pg, a->pg);
3105         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3106                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3107     }
3108     return true;
3109 }
3110
3111 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3112                                 uint32_t insn)
3113 {
3114     if (sve_access_check(s)) {
3115         TCGv_i64 reg = cpu_reg(s, a->rd);
3116         TCGv_i64 val = tcg_temp_new_i64();
3117
3118         do_cntp(s, val, a->esz, a->pg, a->pg);
3119         do_sat_addsub_32(reg, val, a->u, a->d);
3120     }
3121     return true;
3122 }
3123
3124 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3125                                 uint32_t insn)
3126 {
3127     if (sve_access_check(s)) {
3128         TCGv_i64 reg = cpu_reg(s, a->rd);
3129         TCGv_i64 val = tcg_temp_new_i64();
3130
3131         do_cntp(s, val, a->esz, a->pg, a->pg);
3132         do_sat_addsub_64(reg, val, a->u, a->d);
3133     }
3134     return true;
3135 }
3136
3137 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3138                              uint32_t insn)
3139 {
3140     if (a->esz == 0) {
3141         return false;
3142     }
3143     if (sve_access_check(s)) {
3144         TCGv_i64 val = tcg_temp_new_i64();
3145         do_cntp(s, val, a->esz, a->pg, a->pg);
3146         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3147     }
3148     return true;
3149 }
3150
3151 /*
3152  *** SVE Integer Compare Scalars Group
3153  */
3154
3155 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3156 {
3157     if (!sve_access_check(s)) {
3158         return true;
3159     }
3160
3161     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3162     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3163     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3164     TCGv_i64 cmp = tcg_temp_new_i64();
3165
3166     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3167     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3168     tcg_temp_free_i64(cmp);
3169
3170     /* VF = !NF & !CF.  */
3171     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3172     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3173
3174     /* Both NF and VF actually look at bit 31.  */
3175     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3176     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3177     return true;
3178 }
3179
3180 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3181 {
3182     if (!sve_access_check(s)) {
3183         return true;
3184     }
3185
3186     TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3187     TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3188     TCGv_i64 t0 = tcg_temp_new_i64();
3189     TCGv_i64 t1 = tcg_temp_new_i64();
3190     TCGv_i32 t2, t3;
3191     TCGv_ptr ptr;
3192     unsigned desc, vsz = vec_full_reg_size(s);
3193     TCGCond cond;
3194
3195     if (!a->sf) {
3196         if (a->u) {
3197             tcg_gen_ext32u_i64(op0, op0);
3198             tcg_gen_ext32u_i64(op1, op1);
3199         } else {
3200             tcg_gen_ext32s_i64(op0, op0);
3201             tcg_gen_ext32s_i64(op1, op1);
3202         }
3203     }
3204
3205     /* For the helper, compress the different conditions into a computation
3206      * of how many iterations for which the condition is true.
3207      *
3208      * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3209      * 2**64 iterations, overflowing to 0.  Of course, predicate registers
3210      * aren't that large, so any value >= predicate size is sufficient.
3211      */
3212     tcg_gen_sub_i64(t0, op1, op0);
3213
3214     /* t0 = MIN(op1 - op0, vsz).  */
3215     tcg_gen_movi_i64(t1, vsz);
3216     tcg_gen_umin_i64(t0, t0, t1);
3217     if (a->eq) {
3218         /* Equality means one more iteration.  */
3219         tcg_gen_addi_i64(t0, t0, 1);
3220     }
3221
3222     /* t0 = (condition true ? t0 : 0).  */
3223     cond = (a->u
3224             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3225             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3226     tcg_gen_movi_i64(t1, 0);
3227     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3228
3229     t2 = tcg_temp_new_i32();
3230     tcg_gen_extrl_i64_i32(t2, t0);
3231     tcg_temp_free_i64(t0);
3232     tcg_temp_free_i64(t1);
3233
3234     desc = (vsz / 8) - 2;
3235     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3236     t3 = tcg_const_i32(desc);
3237
3238     ptr = tcg_temp_new_ptr();
3239     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3240
3241     gen_helper_sve_while(t2, ptr, t2, t3);
3242     do_pred_flags(t2);
3243
3244     tcg_temp_free_ptr(ptr);
3245     tcg_temp_free_i32(t2);
3246     tcg_temp_free_i32(t3);
3247     return true;
3248 }
3249
3250 /*
3251  *** SVE Integer Wide Immediate - Unpredicated Group
3252  */
3253
3254 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3255 {
3256     if (a->esz == 0) {
3257         return false;
3258     }
3259     if (sve_access_check(s)) {
3260         unsigned vsz = vec_full_reg_size(s);
3261         int dofs = vec_full_reg_offset(s, a->rd);
3262         uint64_t imm;
3263
3264         /* Decode the VFP immediate.  */
3265         imm = vfp_expand_imm(a->esz, a->imm);
3266         imm = dup_const(a->esz, imm);
3267
3268         tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3269     }
3270     return true;
3271 }
3272
3273 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3274 {
3275     if (a->esz == 0 && extract32(insn, 13, 1)) {
3276         return false;
3277     }
3278     if (sve_access_check(s)) {
3279         unsigned vsz = vec_full_reg_size(s);
3280         int dofs = vec_full_reg_offset(s, a->rd);
3281
3282         tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3283     }
3284     return true;
3285 }
3286
3287 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3288 {
3289     if (a->esz == 0 && extract32(insn, 13, 1)) {
3290         return false;
3291     }
3292     if (sve_access_check(s)) {
3293         unsigned vsz = vec_full_reg_size(s);
3294         tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3295                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3296     }
3297     return true;
3298 }
3299
3300 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3301 {
3302     a->imm = -a->imm;
3303     return trans_ADD_zzi(s, a, insn);
3304 }
3305
3306 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3307 {
3308     static const GVecGen2s op[4] = {
3309         { .fni8 = tcg_gen_vec_sub8_i64,
3310           .fniv = tcg_gen_sub_vec,
3311           .fno = gen_helper_sve_subri_b,
3312           .opc = INDEX_op_sub_vec,
3313           .vece = MO_8,
3314           .scalar_first = true },
3315         { .fni8 = tcg_gen_vec_sub16_i64,
3316           .fniv = tcg_gen_sub_vec,
3317           .fno = gen_helper_sve_subri_h,
3318           .opc = INDEX_op_sub_vec,
3319           .vece = MO_16,
3320           .scalar_first = true },
3321         { .fni4 = tcg_gen_sub_i32,
3322           .fniv = tcg_gen_sub_vec,
3323           .fno = gen_helper_sve_subri_s,
3324           .opc = INDEX_op_sub_vec,
3325           .vece = MO_32,
3326           .scalar_first = true },
3327         { .fni8 = tcg_gen_sub_i64,
3328           .fniv = tcg_gen_sub_vec,
3329           .fno = gen_helper_sve_subri_d,
3330           .opc = INDEX_op_sub_vec,
3331           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3332           .vece = MO_64,
3333           .scalar_first = true }
3334     };
3335
3336     if (a->esz == 0 && extract32(insn, 13, 1)) {
3337         return false;
3338     }
3339     if (sve_access_check(s)) {
3340         unsigned vsz = vec_full_reg_size(s);
3341         TCGv_i64 c = tcg_const_i64(a->imm);
3342         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3343                         vec_full_reg_offset(s, a->rn),
3344                         vsz, vsz, c, &op[a->esz]);
3345         tcg_temp_free_i64(c);
3346     }
3347     return true;
3348 }
3349
3350 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3351 {
3352     if (sve_access_check(s)) {
3353         unsigned vsz = vec_full_reg_size(s);
3354         tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3355                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3356     }
3357     return true;
3358 }
3359
3360 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3361                        bool u, bool d)
3362 {
3363     if (a->esz == 0 && extract32(insn, 13, 1)) {
3364         return false;
3365     }
3366     if (sve_access_check(s)) {
3367         TCGv_i64 val = tcg_const_i64(a->imm);
3368         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3369         tcg_temp_free_i64(val);
3370     }
3371     return true;
3372 }
3373
3374 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3375 {
3376     return do_zzi_sat(s, a, insn, false, false);
3377 }
3378
3379 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3380 {
3381     return do_zzi_sat(s, a, insn, true, false);
3382 }
3383
3384 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3385 {
3386     return do_zzi_sat(s, a, insn, false, true);
3387 }
3388
3389 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3390 {
3391     return do_zzi_sat(s, a, insn, true, true);
3392 }
3393
3394 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3395 {
3396     if (sve_access_check(s)) {
3397         unsigned vsz = vec_full_reg_size(s);
3398         TCGv_i64 c = tcg_const_i64(a->imm);
3399
3400         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3401                             vec_full_reg_offset(s, a->rn),
3402                             c, vsz, vsz, 0, fn);
3403         tcg_temp_free_i64(c);
3404     }
3405     return true;
3406 }
3407
3408 #define DO_ZZI(NAME, name) \
3409 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a,         \
3410                                uint32_t insn)                           \
3411 {                                                                       \
3412     static gen_helper_gvec_2i * const fns[4] = {                        \
3413         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3414         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3415     };                                                                  \
3416     return do_zzi_ool(s, a, fns[a->esz]);                               \
3417 }
3418
3419 DO_ZZI(SMAX, smax)
3420 DO_ZZI(UMAX, umax)
3421 DO_ZZI(SMIN, smin)
3422 DO_ZZI(UMIN, umin)
3423
3424 #undef DO_ZZI
3425
3426 /*
3427  *** SVE Floating Point Multiply-Add Indexed Group
3428  */
3429
3430 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
3431 {
3432     static gen_helper_gvec_4_ptr * const fns[3] = {
3433         gen_helper_gvec_fmla_idx_h,
3434         gen_helper_gvec_fmla_idx_s,
3435         gen_helper_gvec_fmla_idx_d,
3436     };
3437
3438     if (sve_access_check(s)) {
3439         unsigned vsz = vec_full_reg_size(s);
3440         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3441         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3442                            vec_full_reg_offset(s, a->rn),
3443                            vec_full_reg_offset(s, a->rm),
3444                            vec_full_reg_offset(s, a->ra),
3445                            status, vsz, vsz, (a->index << 1) | a->sub,
3446                            fns[a->esz - 1]);
3447         tcg_temp_free_ptr(status);
3448     }
3449     return true;
3450 }
3451
3452 /*
3453  *** SVE Floating Point Multiply Indexed Group
3454  */
3455
3456 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
3457 {
3458     static gen_helper_gvec_3_ptr * const fns[3] = {
3459         gen_helper_gvec_fmul_idx_h,
3460         gen_helper_gvec_fmul_idx_s,
3461         gen_helper_gvec_fmul_idx_d,
3462     };
3463
3464     if (sve_access_check(s)) {
3465         unsigned vsz = vec_full_reg_size(s);
3466         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3467         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3468                            vec_full_reg_offset(s, a->rn),
3469                            vec_full_reg_offset(s, a->rm),
3470                            status, vsz, vsz, a->index, fns[a->esz - 1]);
3471         tcg_temp_free_ptr(status);
3472     }
3473     return true;
3474 }
3475
3476 /*
3477  *** SVE Floating Point Fast Reduction Group
3478  */
3479
3480 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3481                                   TCGv_ptr, TCGv_i32);
3482
3483 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3484                       gen_helper_fp_reduce *fn)
3485 {
3486     unsigned vsz = vec_full_reg_size(s);
3487     unsigned p2vsz = pow2ceil(vsz);
3488     TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3489     TCGv_ptr t_zn, t_pg, status;
3490     TCGv_i64 temp;
3491
3492     temp = tcg_temp_new_i64();
3493     t_zn = tcg_temp_new_ptr();
3494     t_pg = tcg_temp_new_ptr();
3495
3496     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3497     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3498     status = get_fpstatus_ptr(a->esz == MO_16);
3499
3500     fn(temp, t_zn, t_pg, status, t_desc);
3501     tcg_temp_free_ptr(t_zn);
3502     tcg_temp_free_ptr(t_pg);
3503     tcg_temp_free_ptr(status);
3504     tcg_temp_free_i32(t_desc);
3505
3506     write_fp_dreg(s, a->rd, temp);
3507     tcg_temp_free_i64(temp);
3508 }
3509
3510 #define DO_VPZ(NAME, name) \
3511 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3512 {                                                                        \
3513     static gen_helper_fp_reduce * const fns[3] = {                       \
3514         gen_helper_sve_##name##_h,                                       \
3515         gen_helper_sve_##name##_s,                                       \
3516         gen_helper_sve_##name##_d,                                       \
3517     };                                                                   \
3518     if (a->esz == 0) {                                                   \
3519         return false;                                                    \
3520     }                                                                    \
3521     if (sve_access_check(s)) {                                           \
3522         do_reduce(s, a, fns[a->esz - 1]);                                \
3523     }                                                                    \
3524     return true;                                                         \
3525 }
3526
3527 DO_VPZ(FADDV, faddv)
3528 DO_VPZ(FMINNMV, fminnmv)
3529 DO_VPZ(FMAXNMV, fmaxnmv)
3530 DO_VPZ(FMINV, fminv)
3531 DO_VPZ(FMAXV, fmaxv)
3532
3533 /*
3534  *** SVE Floating Point Unary Operations - Unpredicated Group
3535  */
3536
3537 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3538 {
3539     unsigned vsz = vec_full_reg_size(s);
3540     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3541
3542     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3543                        vec_full_reg_offset(s, a->rn),
3544                        status, vsz, vsz, 0, fn);
3545     tcg_temp_free_ptr(status);
3546 }
3547
3548 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3549 {
3550     static gen_helper_gvec_2_ptr * const fns[3] = {
3551         gen_helper_gvec_frecpe_h,
3552         gen_helper_gvec_frecpe_s,
3553         gen_helper_gvec_frecpe_d,
3554     };
3555     if (a->esz == 0) {
3556         return false;
3557     }
3558     if (sve_access_check(s)) {
3559         do_zz_fp(s, a, fns[a->esz - 1]);
3560     }
3561     return true;
3562 }
3563
3564 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3565 {
3566     static gen_helper_gvec_2_ptr * const fns[3] = {
3567         gen_helper_gvec_frsqrte_h,
3568         gen_helper_gvec_frsqrte_s,
3569         gen_helper_gvec_frsqrte_d,
3570     };
3571     if (a->esz == 0) {
3572         return false;
3573     }
3574     if (sve_access_check(s)) {
3575         do_zz_fp(s, a, fns[a->esz - 1]);
3576     }
3577     return true;
3578 }
3579
3580 /*
3581  *** SVE Floating Point Compare with Zero Group
3582  */
3583
3584 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3585                       gen_helper_gvec_3_ptr *fn)
3586 {
3587     unsigned vsz = vec_full_reg_size(s);
3588     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3589
3590     tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3591                        vec_full_reg_offset(s, a->rn),
3592                        pred_full_reg_offset(s, a->pg),
3593                        status, vsz, vsz, 0, fn);
3594     tcg_temp_free_ptr(status);
3595 }
3596
3597 #define DO_PPZ(NAME, name) \
3598 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3599 {                                                                 \
3600     static gen_helper_gvec_3_ptr * const fns[3] = {               \
3601         gen_helper_sve_##name##_h,                                \
3602         gen_helper_sve_##name##_s,                                \
3603         gen_helper_sve_##name##_d,                                \
3604     };                                                            \
3605     if (a->esz == 0) {                                            \
3606         return false;                                             \
3607     }                                                             \
3608     if (sve_access_check(s)) {                                    \
3609         do_ppz_fp(s, a, fns[a->esz - 1]);                         \
3610     }                                                             \
3611     return true;                                                  \
3612 }
3613
3614 DO_PPZ(FCMGE_ppz0, fcmge0)
3615 DO_PPZ(FCMGT_ppz0, fcmgt0)
3616 DO_PPZ(FCMLE_ppz0, fcmle0)
3617 DO_PPZ(FCMLT_ppz0, fcmlt0)
3618 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3619 DO_PPZ(FCMNE_ppz0, fcmne0)
3620
3621 #undef DO_PPZ
3622
3623 /*
3624  *** SVE floating-point trig multiply-add coefficient
3625  */
3626
3627 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a, uint32_t insn)
3628 {
3629     static gen_helper_gvec_3_ptr * const fns[3] = {
3630         gen_helper_sve_ftmad_h,
3631         gen_helper_sve_ftmad_s,
3632         gen_helper_sve_ftmad_d,
3633     };
3634
3635     if (a->esz == 0) {
3636         return false;
3637     }
3638     if (sve_access_check(s)) {
3639         unsigned vsz = vec_full_reg_size(s);
3640         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3641         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3642                            vec_full_reg_offset(s, a->rn),
3643                            vec_full_reg_offset(s, a->rm),
3644                            status, vsz, vsz, a->imm, fns[a->esz - 1]);
3645         tcg_temp_free_ptr(status);
3646     }
3647     return true;
3648 }
3649
3650 /*
3651  *** SVE Floating Point Accumulating Reduction Group
3652  */
3653
3654 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3655 {
3656     typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3657                           TCGv_ptr, TCGv_ptr, TCGv_i32);
3658     static fadda_fn * const fns[3] = {
3659         gen_helper_sve_fadda_h,
3660         gen_helper_sve_fadda_s,
3661         gen_helper_sve_fadda_d,
3662     };
3663     unsigned vsz = vec_full_reg_size(s);
3664     TCGv_ptr t_rm, t_pg, t_fpst;
3665     TCGv_i64 t_val;
3666     TCGv_i32 t_desc;
3667
3668     if (a->esz == 0) {
3669         return false;
3670     }
3671     if (!sve_access_check(s)) {
3672         return true;
3673     }
3674
3675     t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3676     t_rm = tcg_temp_new_ptr();
3677     t_pg = tcg_temp_new_ptr();
3678     tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3679     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3680     t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3681     t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3682
3683     fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3684
3685     tcg_temp_free_i32(t_desc);
3686     tcg_temp_free_ptr(t_fpst);
3687     tcg_temp_free_ptr(t_pg);
3688     tcg_temp_free_ptr(t_rm);
3689
3690     write_fp_dreg(s, a->rd, t_val);
3691     tcg_temp_free_i64(t_val);
3692     return true;
3693 }
3694
3695 /*
3696  *** SVE Floating Point Arithmetic - Unpredicated Group
3697  */
3698
3699 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3700                       gen_helper_gvec_3_ptr *fn)
3701 {
3702     if (fn == NULL) {
3703         return false;
3704     }
3705     if (sve_access_check(s)) {
3706         unsigned vsz = vec_full_reg_size(s);
3707         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3708         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3709                            vec_full_reg_offset(s, a->rn),
3710                            vec_full_reg_offset(s, a->rm),
3711                            status, vsz, vsz, 0, fn);
3712         tcg_temp_free_ptr(status);
3713     }
3714     return true;
3715 }
3716
3717
3718 #define DO_FP3(NAME, name) \
3719 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3720 {                                                                   \
3721     static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3722         NULL, gen_helper_gvec_##name##_h,                           \
3723         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3724     };                                                              \
3725     return do_zzz_fp(s, a, fns[a->esz]);                            \
3726 }
3727
3728 DO_FP3(FADD_zzz, fadd)
3729 DO_FP3(FSUB_zzz, fsub)
3730 DO_FP3(FMUL_zzz, fmul)
3731 DO_FP3(FTSMUL, ftsmul)
3732 DO_FP3(FRECPS, recps)
3733 DO_FP3(FRSQRTS, rsqrts)
3734
3735 #undef DO_FP3
3736
3737 /*
3738  *** SVE Floating Point Arithmetic - Predicated Group
3739  */
3740
3741 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3742                        gen_helper_gvec_4_ptr *fn)
3743 {
3744     if (fn == NULL) {
3745         return false;
3746     }
3747     if (sve_access_check(s)) {
3748         unsigned vsz = vec_full_reg_size(s);
3749         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3750         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3751                            vec_full_reg_offset(s, a->rn),
3752                            vec_full_reg_offset(s, a->rm),
3753                            pred_full_reg_offset(s, a->pg),
3754                            status, vsz, vsz, 0, fn);
3755         tcg_temp_free_ptr(status);
3756     }
3757     return true;
3758 }
3759
3760 #define DO_FP3(NAME, name) \
3761 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3762 {                                                                   \
3763     static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3764         NULL, gen_helper_sve_##name##_h,                            \
3765         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3766     };                                                              \
3767     return do_zpzz_fp(s, a, fns[a->esz]);                           \
3768 }
3769
3770 DO_FP3(FADD_zpzz, fadd)
3771 DO_FP3(FSUB_zpzz, fsub)
3772 DO_FP3(FMUL_zpzz, fmul)
3773 DO_FP3(FMIN_zpzz, fmin)
3774 DO_FP3(FMAX_zpzz, fmax)
3775 DO_FP3(FMINNM_zpzz, fminnum)
3776 DO_FP3(FMAXNM_zpzz, fmaxnum)
3777 DO_FP3(FABD, fabd)
3778 DO_FP3(FSCALE, fscalbn)
3779 DO_FP3(FDIV, fdiv)
3780 DO_FP3(FMULX, fmulx)
3781
3782 #undef DO_FP3
3783
3784 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3785                                       TCGv_i64, TCGv_ptr, TCGv_i32);
3786
3787 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3788                          TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3789 {
3790     unsigned vsz = vec_full_reg_size(s);
3791     TCGv_ptr t_zd, t_zn, t_pg, status;
3792     TCGv_i32 desc;
3793
3794     t_zd = tcg_temp_new_ptr();
3795     t_zn = tcg_temp_new_ptr();
3796     t_pg = tcg_temp_new_ptr();
3797     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3798     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3799     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3800
3801     status = get_fpstatus_ptr(is_fp16);
3802     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3803     fn(t_zd, t_zn, t_pg, scalar, status, desc);
3804
3805     tcg_temp_free_i32(desc);
3806     tcg_temp_free_ptr(status);
3807     tcg_temp_free_ptr(t_pg);
3808     tcg_temp_free_ptr(t_zn);
3809     tcg_temp_free_ptr(t_zd);
3810 }
3811
3812 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3813                       gen_helper_sve_fp2scalar *fn)
3814 {
3815     TCGv_i64 temp = tcg_const_i64(imm);
3816     do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3817     tcg_temp_free_i64(temp);
3818 }
3819
3820 #define DO_FP_IMM(NAME, name, const0, const1) \
3821 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a,         \
3822                                 uint32_t insn)                            \
3823 {                                                                         \
3824     static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3825         gen_helper_sve_##name##_h,                                        \
3826         gen_helper_sve_##name##_s,                                        \
3827         gen_helper_sve_##name##_d                                         \
3828     };                                                                    \
3829     static uint64_t const val[3][2] = {                                   \
3830         { float16_##const0, float16_##const1 },                           \
3831         { float32_##const0, float32_##const1 },                           \
3832         { float64_##const0, float64_##const1 },                           \
3833     };                                                                    \
3834     if (a->esz == 0) {                                                    \
3835         return false;                                                     \
3836     }                                                                     \
3837     if (sve_access_check(s)) {                                            \
3838         do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3839     }                                                                     \
3840     return true;                                                          \
3841 }
3842
3843 #define float16_two  make_float16(0x4000)
3844 #define float32_two  make_float32(0x40000000)
3845 #define float64_two  make_float64(0x4000000000000000ULL)
3846
3847 DO_FP_IMM(FADD, fadds, half, one)
3848 DO_FP_IMM(FSUB, fsubs, half, one)
3849 DO_FP_IMM(FMUL, fmuls, half, two)
3850 DO_FP_IMM(FSUBR, fsubrs, half, one)
3851 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3852 DO_FP_IMM(FMINNM, fminnms, zero, one)
3853 DO_FP_IMM(FMAX, fmaxs, zero, one)
3854 DO_FP_IMM(FMIN, fmins, zero, one)
3855
3856 #undef DO_FP_IMM
3857
3858 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3859                       gen_helper_gvec_4_ptr *fn)
3860 {
3861     if (fn == NULL) {
3862         return false;
3863     }
3864     if (sve_access_check(s)) {
3865         unsigned vsz = vec_full_reg_size(s);
3866         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3867         tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3868                            vec_full_reg_offset(s, a->rn),
3869                            vec_full_reg_offset(s, a->rm),
3870                            pred_full_reg_offset(s, a->pg),
3871                            status, vsz, vsz, 0, fn);
3872         tcg_temp_free_ptr(status);
3873     }
3874     return true;
3875 }
3876
3877 #define DO_FPCMP(NAME, name) \
3878 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,     \
3879                                 uint32_t insn)                        \
3880 {                                                                     \
3881     static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3882         NULL, gen_helper_sve_##name##_h,                              \
3883         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3884     };                                                                \
3885     return do_fp_cmp(s, a, fns[a->esz]);                              \
3886 }
3887
3888 DO_FPCMP(FCMGE, fcmge)
3889 DO_FPCMP(FCMGT, fcmgt)
3890 DO_FPCMP(FCMEQ, fcmeq)
3891 DO_FPCMP(FCMNE, fcmne)
3892 DO_FPCMP(FCMUO, fcmuo)
3893 DO_FPCMP(FACGE, facge)
3894 DO_FPCMP(FACGT, facgt)
3895
3896 #undef DO_FPCMP
3897
3898 static bool trans_FCADD(DisasContext *s, arg_FCADD *a, uint32_t insn)
3899 {
3900     static gen_helper_gvec_4_ptr * const fns[3] = {
3901         gen_helper_sve_fcadd_h,
3902         gen_helper_sve_fcadd_s,
3903         gen_helper_sve_fcadd_d
3904     };
3905
3906     if (a->esz == 0) {
3907         return false;
3908     }
3909     if (sve_access_check(s)) {
3910         unsigned vsz = vec_full_reg_size(s);
3911         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3912         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3913                            vec_full_reg_offset(s, a->rn),
3914                            vec_full_reg_offset(s, a->rm),
3915                            pred_full_reg_offset(s, a->pg),
3916                            status, vsz, vsz, a->rot, fns[a->esz - 1]);
3917         tcg_temp_free_ptr(status);
3918     }
3919     return true;
3920 }
3921
3922 typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3923
3924 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3925 {
3926     if (fn == NULL) {
3927         return false;
3928     }
3929     if (!sve_access_check(s)) {
3930         return true;
3931     }
3932
3933     unsigned vsz = vec_full_reg_size(s);
3934     unsigned desc;
3935     TCGv_i32 t_desc;
3936     TCGv_ptr pg = tcg_temp_new_ptr();
3937
3938     /* We would need 7 operands to pass these arguments "properly".
3939      * So we encode all the register numbers into the descriptor.
3940      */
3941     desc = deposit32(a->rd, 5, 5, a->rn);
3942     desc = deposit32(desc, 10, 5, a->rm);
3943     desc = deposit32(desc, 15, 5, a->ra);
3944     desc = simd_desc(vsz, vsz, desc);
3945
3946     t_desc = tcg_const_i32(desc);
3947     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3948     fn(cpu_env, pg, t_desc);
3949     tcg_temp_free_i32(t_desc);
3950     tcg_temp_free_ptr(pg);
3951     return true;
3952 }
3953
3954 #define DO_FMLA(NAME, name) \
3955 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3956 {                                                                    \
3957     static gen_helper_sve_fmla * const fns[4] = {                    \
3958         NULL, gen_helper_sve_##name##_h,                             \
3959         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3960     };                                                               \
3961     return do_fmla(s, a, fns[a->esz]);                               \
3962 }
3963
3964 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3965 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3966 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3967 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3968
3969 #undef DO_FMLA
3970
3971 /*
3972  *** SVE Floating Point Unary Operations Predicated Group
3973  */
3974
3975 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3976                        bool is_fp16, gen_helper_gvec_3_ptr *fn)
3977 {
3978     if (sve_access_check(s)) {
3979         unsigned vsz = vec_full_reg_size(s);
3980         TCGv_ptr status = get_fpstatus_ptr(is_fp16);
3981         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3982                            vec_full_reg_offset(s, rn),
3983                            pred_full_reg_offset(s, pg),
3984                            status, vsz, vsz, 0, fn);
3985         tcg_temp_free_ptr(status);
3986     }
3987     return true;
3988 }
3989
3990 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3991 {
3992     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_sh);
3993 }
3994
3995 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3996 {
3997     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
3998 }
3999
4000 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4001 {
4002     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_dh);
4003 }
4004
4005 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4006 {
4007     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4008 }
4009
4010 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4011 {
4012     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4013 }
4014
4015 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4016 {
4017     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4018 }
4019
4020 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4021 {
4022     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4023 }
4024
4025 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4026 {
4027     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4028 }
4029
4030 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4031 {
4032     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4033 }
4034
4035 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4036 {
4037     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4038 }
4039
4040 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4041 {
4042     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4043 }
4044
4045 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4046 {
4047     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4048 }
4049
4050 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4051 {
4052     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4053 }
4054
4055 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4056 {
4057     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4058 }
4059
4060 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4061 {
4062     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4063 }
4064
4065 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4066 {
4067     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4068 }
4069
4070 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4071 {
4072     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4073 }
4074
4075 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4076 {
4077     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4078 }
4079
4080 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4081 {
4082     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4083 }
4084
4085 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4086 {
4087     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4088 }
4089
4090 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4091     gen_helper_sve_frint_h,
4092     gen_helper_sve_frint_s,
4093     gen_helper_sve_frint_d
4094 };
4095
4096 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4097 {
4098     if (a->esz == 0) {
4099         return false;
4100     }
4101     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4102                       frint_fns[a->esz - 1]);
4103 }
4104
4105 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4106 {
4107     static gen_helper_gvec_3_ptr * const fns[3] = {
4108         gen_helper_sve_frintx_h,
4109         gen_helper_sve_frintx_s,
4110         gen_helper_sve_frintx_d
4111     };
4112     if (a->esz == 0) {
4113         return false;
4114     }
4115     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4116 }
4117
4118 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4119 {
4120     if (a->esz == 0) {
4121         return false;
4122     }
4123     if (sve_access_check(s)) {
4124         unsigned vsz = vec_full_reg_size(s);
4125         TCGv_i32 tmode = tcg_const_i32(mode);
4126         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4127
4128         gen_helper_set_rmode(tmode, tmode, status);
4129
4130         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4131                            vec_full_reg_offset(s, a->rn),
4132                            pred_full_reg_offset(s, a->pg),
4133                            status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4134
4135         gen_helper_set_rmode(tmode, tmode, status);
4136         tcg_temp_free_i32(tmode);
4137         tcg_temp_free_ptr(status);
4138     }
4139     return true;
4140 }
4141
4142 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4143 {
4144     return do_frint_mode(s, a, float_round_nearest_even);
4145 }
4146
4147 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4148 {
4149     return do_frint_mode(s, a, float_round_up);
4150 }
4151
4152 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4153 {
4154     return do_frint_mode(s, a, float_round_down);
4155 }
4156
4157 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4158 {
4159     return do_frint_mode(s, a, float_round_to_zero);
4160 }
4161
4162 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4163 {
4164     return do_frint_mode(s, a, float_round_ties_away);
4165 }
4166
4167 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4168 {
4169     static gen_helper_gvec_3_ptr * const fns[3] = {
4170         gen_helper_sve_frecpx_h,
4171         gen_helper_sve_frecpx_s,
4172         gen_helper_sve_frecpx_d
4173     };
4174     if (a->esz == 0) {
4175         return false;
4176     }
4177     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4178 }
4179
4180 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4181 {
4182     static gen_helper_gvec_3_ptr * const fns[3] = {
4183         gen_helper_sve_fsqrt_h,
4184         gen_helper_sve_fsqrt_s,
4185         gen_helper_sve_fsqrt_d
4186     };
4187     if (a->esz == 0) {
4188         return false;
4189     }
4190     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4191 }
4192
4193 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4194 {
4195     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4196 }
4197
4198 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4199 {
4200     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4201 }
4202
4203 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4204 {
4205     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4206 }
4207
4208 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4209 {
4210     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4211 }
4212
4213 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4214 {
4215     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4216 }
4217
4218 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4219 {
4220     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4221 }
4222
4223 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4224 {
4225     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4226 }
4227
4228 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4229 {
4230     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4231 }
4232
4233 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4234 {
4235     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4236 }
4237
4238 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4239 {
4240     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4241 }
4242
4243 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4244 {
4245     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4246 }
4247
4248 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4249 {
4250     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4251 }
4252
4253 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4254 {
4255     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4256 }
4257
4258 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4259 {
4260     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4261 }
4262
4263 /*
4264  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4265  */
4266
4267 /* Subroutine loading a vector register at VOFS of LEN bytes.
4268  * The load should begin at the address Rn + IMM.
4269  */
4270
4271 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
4272                    int rn, int imm)
4273 {
4274     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
4275     uint32_t len_remain = len % 8;
4276     uint32_t nparts = len / 8 + ctpop8(len_remain);
4277     int midx = get_mem_index(s);
4278     TCGv_i64 addr, t0, t1;
4279
4280     addr = tcg_temp_new_i64();
4281     t0 = tcg_temp_new_i64();
4282
4283     /* Note that unpredicated load/store of vector/predicate registers
4284      * are defined as a stream of bytes, which equates to little-endian
4285      * operations on larger quantities.  There is no nice way to force
4286      * a little-endian load for aarch64_be-linux-user out of line.
4287      *
4288      * Attempt to keep code expansion to a minimum by limiting the
4289      * amount of unrolling done.
4290      */
4291     if (nparts <= 4) {
4292         int i;
4293
4294         for (i = 0; i < len_align; i += 8) {
4295             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4296             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4297             tcg_gen_st_i64(t0, cpu_env, vofs + i);
4298         }
4299     } else {
4300         TCGLabel *loop = gen_new_label();
4301         TCGv_ptr tp, i = tcg_const_local_ptr(0);
4302
4303         gen_set_label(loop);
4304
4305         /* Minimize the number of local temps that must be re-read from
4306          * the stack each iteration.  Instead, re-compute values other
4307          * than the loop counter.
4308          */
4309         tp = tcg_temp_new_ptr();
4310         tcg_gen_addi_ptr(tp, i, imm);
4311         tcg_gen_extu_ptr_i64(addr, tp);
4312         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4313
4314         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4315
4316         tcg_gen_add_ptr(tp, cpu_env, i);
4317         tcg_gen_addi_ptr(i, i, 8);
4318         tcg_gen_st_i64(t0, tp, vofs);
4319         tcg_temp_free_ptr(tp);
4320
4321         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4322         tcg_temp_free_ptr(i);
4323     }
4324
4325     /* Predicate register loads can be any multiple of 2.
4326      * Note that we still store the entire 64-bit unit into cpu_env.
4327      */
4328     if (len_remain) {
4329         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4330
4331         switch (len_remain) {
4332         case 2:
4333         case 4:
4334         case 8:
4335             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4336             break;
4337
4338         case 6:
4339             t1 = tcg_temp_new_i64();
4340             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4341             tcg_gen_addi_i64(addr, addr, 4);
4342             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4343             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4344             tcg_temp_free_i64(t1);
4345             break;
4346
4347         default:
4348             g_assert_not_reached();
4349         }
4350         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4351     }
4352     tcg_temp_free_i64(addr);
4353     tcg_temp_free_i64(t0);
4354 }
4355
4356 /* Similarly for stores.  */
4357 static void do_str(DisasContext *s, uint32_t vofs, uint32_t len,
4358                    int rn, int imm)
4359 {
4360     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
4361     uint32_t len_remain = len % 8;
4362     uint32_t nparts = len / 8 + ctpop8(len_remain);
4363     int midx = get_mem_index(s);
4364     TCGv_i64 addr, t0;
4365
4366     addr = tcg_temp_new_i64();
4367     t0 = tcg_temp_new_i64();
4368
4369     /* Note that unpredicated load/store of vector/predicate registers
4370      * are defined as a stream of bytes, which equates to little-endian
4371      * operations on larger quantities.  There is no nice way to force
4372      * a little-endian store for aarch64_be-linux-user out of line.
4373      *
4374      * Attempt to keep code expansion to a minimum by limiting the
4375      * amount of unrolling done.
4376      */
4377     if (nparts <= 4) {
4378         int i;
4379
4380         for (i = 0; i < len_align; i += 8) {
4381             tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4382             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4383             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4384         }
4385     } else {
4386         TCGLabel *loop = gen_new_label();
4387         TCGv_ptr t2, i = tcg_const_local_ptr(0);
4388
4389         gen_set_label(loop);
4390
4391         t2 = tcg_temp_new_ptr();
4392         tcg_gen_add_ptr(t2, cpu_env, i);
4393         tcg_gen_ld_i64(t0, t2, vofs);
4394
4395         /* Minimize the number of local temps that must be re-read from
4396          * the stack each iteration.  Instead, re-compute values other
4397          * than the loop counter.
4398          */
4399         tcg_gen_addi_ptr(t2, i, imm);
4400         tcg_gen_extu_ptr_i64(addr, t2);
4401         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4402         tcg_temp_free_ptr(t2);
4403
4404         tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4405
4406         tcg_gen_addi_ptr(i, i, 8);
4407
4408         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4409         tcg_temp_free_ptr(i);
4410     }
4411
4412     /* Predicate register stores can be any multiple of 2.  */
4413     if (len_remain) {
4414         tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4415         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4416
4417         switch (len_remain) {
4418         case 2:
4419         case 4:
4420         case 8:
4421             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4422             break;
4423
4424         case 6:
4425             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4426             tcg_gen_addi_i64(addr, addr, 4);
4427             tcg_gen_shri_i64(t0, t0, 32);
4428             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4429             break;
4430
4431         default:
4432             g_assert_not_reached();
4433         }
4434     }
4435     tcg_temp_free_i64(addr);
4436     tcg_temp_free_i64(t0);
4437 }
4438
4439 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4440 {
4441     if (sve_access_check(s)) {
4442         int size = vec_full_reg_size(s);
4443         int off = vec_full_reg_offset(s, a->rd);
4444         do_ldr(s, off, size, a->rn, a->imm * size);
4445     }
4446     return true;
4447 }
4448
4449 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4450 {
4451     if (sve_access_check(s)) {
4452         int size = pred_full_reg_size(s);
4453         int off = pred_full_reg_offset(s, a->rd);
4454         do_ldr(s, off, size, a->rn, a->imm * size);
4455     }
4456     return true;
4457 }
4458
4459 static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4460 {
4461     if (sve_access_check(s)) {
4462         int size = vec_full_reg_size(s);
4463         int off = vec_full_reg_offset(s, a->rd);
4464         do_str(s, off, size, a->rn, a->imm * size);
4465     }
4466     return true;
4467 }
4468
4469 static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4470 {
4471     if (sve_access_check(s)) {
4472         int size = pred_full_reg_size(s);
4473         int off = pred_full_reg_offset(s, a->rd);
4474         do_str(s, off, size, a->rn, a->imm * size);
4475     }
4476     return true;
4477 }
4478
4479 /*
4480  *** SVE Memory - Contiguous Load Group
4481  */
4482
4483 /* The memory mode of the dtype.  */
4484 static const TCGMemOp dtype_mop[16] = {
4485     MO_UB, MO_UB, MO_UB, MO_UB,
4486     MO_SL, MO_UW, MO_UW, MO_UW,
4487     MO_SW, MO_SW, MO_UL, MO_UL,
4488     MO_SB, MO_SB, MO_SB, MO_Q
4489 };
4490
4491 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4492
4493 /* The vector element size of dtype.  */
4494 static const uint8_t dtype_esz[16] = {
4495     0, 1, 2, 3,
4496     3, 1, 2, 3,
4497     3, 2, 2, 3,
4498     3, 2, 1, 3
4499 };
4500
4501 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4502                        gen_helper_gvec_mem *fn)
4503 {
4504     unsigned vsz = vec_full_reg_size(s);
4505     TCGv_ptr t_pg;
4506     TCGv_i32 desc;
4507
4508     /* For e.g. LD4, there are not enough arguments to pass all 4
4509      * registers as pointers, so encode the regno into the data field.
4510      * For consistency, do this even for LD1.
4511      */
4512     desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
4513     t_pg = tcg_temp_new_ptr();
4514
4515     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4516     fn(cpu_env, t_pg, addr, desc);
4517
4518     tcg_temp_free_ptr(t_pg);
4519     tcg_temp_free_i32(desc);
4520 }
4521
4522 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4523                       TCGv_i64 addr, int dtype, int nreg)
4524 {
4525     static gen_helper_gvec_mem * const fns[16][4] = {
4526         { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4527           gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4528         { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4529         { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4530         { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4531
4532         { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
4533         { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
4534           gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
4535         { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
4536         { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
4537
4538         { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
4539         { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
4540         { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
4541           gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
4542         { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
4543
4544         { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4545         { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4546         { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4547         { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
4548           gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
4549     };
4550     gen_helper_gvec_mem *fn = fns[dtype][nreg];
4551
4552     /* While there are holes in the table, they are not
4553      * accessible via the instruction encoding.
4554      */
4555     assert(fn != NULL);
4556     do_mem_zpa(s, zt, pg, addr, fn);
4557 }
4558
4559 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4560 {
4561     if (a->rm == 31) {
4562         return false;
4563     }
4564     if (sve_access_check(s)) {
4565         TCGv_i64 addr = new_tmp_a64(s);
4566         tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
4567                          (a->nreg + 1) << dtype_msz(a->dtype));
4568         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4569         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4570     }
4571     return true;
4572 }
4573
4574 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4575 {
4576     if (sve_access_check(s)) {
4577         int vsz = vec_full_reg_size(s);
4578         int elements = vsz >> dtype_esz[a->dtype];
4579         TCGv_i64 addr = new_tmp_a64(s);
4580
4581         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4582                          (a->imm * elements * (a->nreg + 1))
4583                          << dtype_msz(a->dtype));
4584         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4585     }
4586     return true;
4587 }
4588
4589 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4590 {
4591     static gen_helper_gvec_mem * const fns[16] = {
4592         gen_helper_sve_ldff1bb_r,
4593         gen_helper_sve_ldff1bhu_r,
4594         gen_helper_sve_ldff1bsu_r,
4595         gen_helper_sve_ldff1bdu_r,
4596
4597         gen_helper_sve_ldff1sds_r,
4598         gen_helper_sve_ldff1hh_r,
4599         gen_helper_sve_ldff1hsu_r,
4600         gen_helper_sve_ldff1hdu_r,
4601
4602         gen_helper_sve_ldff1hds_r,
4603         gen_helper_sve_ldff1hss_r,
4604         gen_helper_sve_ldff1ss_r,
4605         gen_helper_sve_ldff1sdu_r,
4606
4607         gen_helper_sve_ldff1bds_r,
4608         gen_helper_sve_ldff1bss_r,
4609         gen_helper_sve_ldff1bhs_r,
4610         gen_helper_sve_ldff1dd_r,
4611     };
4612
4613     if (sve_access_check(s)) {
4614         TCGv_i64 addr = new_tmp_a64(s);
4615         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4616         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4617         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4618     }
4619     return true;
4620 }
4621
4622 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4623 {
4624     static gen_helper_gvec_mem * const fns[16] = {
4625         gen_helper_sve_ldnf1bb_r,
4626         gen_helper_sve_ldnf1bhu_r,
4627         gen_helper_sve_ldnf1bsu_r,
4628         gen_helper_sve_ldnf1bdu_r,
4629
4630         gen_helper_sve_ldnf1sds_r,
4631         gen_helper_sve_ldnf1hh_r,
4632         gen_helper_sve_ldnf1hsu_r,
4633         gen_helper_sve_ldnf1hdu_r,
4634
4635         gen_helper_sve_ldnf1hds_r,
4636         gen_helper_sve_ldnf1hss_r,
4637         gen_helper_sve_ldnf1ss_r,
4638         gen_helper_sve_ldnf1sdu_r,
4639
4640         gen_helper_sve_ldnf1bds_r,
4641         gen_helper_sve_ldnf1bss_r,
4642         gen_helper_sve_ldnf1bhs_r,
4643         gen_helper_sve_ldnf1dd_r,
4644     };
4645
4646     if (sve_access_check(s)) {
4647         int vsz = vec_full_reg_size(s);
4648         int elements = vsz >> dtype_esz[a->dtype];
4649         int off = (a->imm * elements) << dtype_msz(a->dtype);
4650         TCGv_i64 addr = new_tmp_a64(s);
4651
4652         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4653         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4654     }
4655     return true;
4656 }
4657
4658 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4659 {
4660     static gen_helper_gvec_mem * const fns[4] = {
4661         gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4662         gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4663     };
4664     unsigned vsz = vec_full_reg_size(s);
4665     TCGv_ptr t_pg;
4666     TCGv_i32 desc;
4667
4668     /* Load the first quadword using the normal predicated load helpers.  */
4669     desc = tcg_const_i32(simd_desc(16, 16, zt));
4670     t_pg = tcg_temp_new_ptr();
4671
4672     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4673     fns[msz](cpu_env, t_pg, addr, desc);
4674
4675     tcg_temp_free_ptr(t_pg);
4676     tcg_temp_free_i32(desc);
4677
4678     /* Replicate that first quadword.  */
4679     if (vsz > 16) {
4680         unsigned dofs = vec_full_reg_offset(s, zt);
4681         tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4682     }
4683 }
4684
4685 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4686 {
4687     if (a->rm == 31) {
4688         return false;
4689     }
4690     if (sve_access_check(s)) {
4691         int msz = dtype_msz(a->dtype);
4692         TCGv_i64 addr = new_tmp_a64(s);
4693         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4694         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4695         do_ldrq(s, a->rd, a->pg, addr, msz);
4696     }
4697     return true;
4698 }
4699
4700 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4701 {
4702     if (sve_access_check(s)) {
4703         TCGv_i64 addr = new_tmp_a64(s);
4704         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4705         do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4706     }
4707     return true;
4708 }
4709
4710 /* Load and broadcast element.  */
4711 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4712 {
4713     if (!sve_access_check(s)) {
4714         return true;
4715     }
4716
4717     unsigned vsz = vec_full_reg_size(s);
4718     unsigned psz = pred_full_reg_size(s);
4719     unsigned esz = dtype_esz[a->dtype];
4720     TCGLabel *over = gen_new_label();
4721     TCGv_i64 temp;
4722
4723     /* If the guarding predicate has no bits set, no load occurs.  */
4724     if (psz <= 8) {
4725         /* Reduce the pred_esz_masks value simply to reduce the
4726          * size of the code generated here.
4727          */
4728         uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4729         temp = tcg_temp_new_i64();
4730         tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4731         tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4732         tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4733         tcg_temp_free_i64(temp);
4734     } else {
4735         TCGv_i32 t32 = tcg_temp_new_i32();
4736         find_last_active(s, t32, esz, a->pg);
4737         tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4738         tcg_temp_free_i32(t32);
4739     }
4740
4741     /* Load the data.  */
4742     temp = tcg_temp_new_i64();
4743     tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << esz);
4744     tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4745                         s->be_data | dtype_mop[a->dtype]);
4746
4747     /* Broadcast to *all* elements.  */
4748     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4749                          vsz, vsz, temp);
4750     tcg_temp_free_i64(temp);
4751
4752     /* Zero the inactive elements.  */
4753     gen_set_label(over);
4754     do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4755     return true;
4756 }
4757
4758 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4759                       int msz, int esz, int nreg)
4760 {
4761     static gen_helper_gvec_mem * const fn_single[4][4] = {
4762         { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4763           gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4764         { NULL,                   gen_helper_sve_st1hh_r,
4765           gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4766         { NULL, NULL,
4767           gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4768         { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4769     };
4770     static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4771         { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4772           gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4773         { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4774           gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4775         { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4776           gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4777     };
4778     gen_helper_gvec_mem *fn;
4779
4780     if (nreg == 0) {
4781         /* ST1 */
4782         fn = fn_single[msz][esz];
4783     } else {
4784         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4785         assert(msz == esz);
4786         fn = fn_multiple[nreg - 1][msz];
4787     }
4788     assert(fn != NULL);
4789     do_mem_zpa(s, zt, pg, addr, fn);
4790 }
4791
4792 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4793 {
4794     if (a->rm == 31 || a->msz > a->esz) {
4795         return false;
4796     }
4797     if (sve_access_check(s)) {
4798         TCGv_i64 addr = new_tmp_a64(s);
4799         tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
4800         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4801         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4802     }
4803     return true;
4804 }
4805
4806 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4807 {
4808     if (a->msz > a->esz) {
4809         return false;
4810     }
4811     if (sve_access_check(s)) {
4812         int vsz = vec_full_reg_size(s);
4813         int elements = vsz >> a->esz;
4814         TCGv_i64 addr = new_tmp_a64(s);
4815
4816         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4817                          (a->imm * elements * (a->nreg + 1)) << a->msz);
4818         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4819     }
4820     return true;
4821 }
4822
4823 /*
4824  *** SVE gather loads / scatter stores
4825  */
4826
4827 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
4828                        TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
4829 {
4830     unsigned vsz = vec_full_reg_size(s);
4831     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
4832     TCGv_ptr t_zm = tcg_temp_new_ptr();
4833     TCGv_ptr t_pg = tcg_temp_new_ptr();
4834     TCGv_ptr t_zt = tcg_temp_new_ptr();
4835
4836     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4837     tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
4838     tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
4839     fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
4840
4841     tcg_temp_free_ptr(t_zt);
4842     tcg_temp_free_ptr(t_zm);
4843     tcg_temp_free_ptr(t_pg);
4844     tcg_temp_free_i32(desc);
4845 }
4846
4847 /* Indexed by [ff][xs][u][msz].  */
4848 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
4849     { { { gen_helper_sve_ldbss_zsu,
4850           gen_helper_sve_ldhss_zsu,
4851           NULL, },
4852         { gen_helper_sve_ldbsu_zsu,
4853           gen_helper_sve_ldhsu_zsu,
4854           gen_helper_sve_ldssu_zsu, } },
4855       { { gen_helper_sve_ldbss_zss,
4856           gen_helper_sve_ldhss_zss,
4857           NULL, },
4858         { gen_helper_sve_ldbsu_zss,
4859           gen_helper_sve_ldhsu_zss,
4860           gen_helper_sve_ldssu_zss, } } },
4861
4862     { { { gen_helper_sve_ldffbss_zsu,
4863           gen_helper_sve_ldffhss_zsu,
4864           NULL, },
4865         { gen_helper_sve_ldffbsu_zsu,
4866           gen_helper_sve_ldffhsu_zsu,
4867           gen_helper_sve_ldffssu_zsu, } },
4868       { { gen_helper_sve_ldffbss_zss,
4869           gen_helper_sve_ldffhss_zss,
4870           NULL, },
4871         { gen_helper_sve_ldffbsu_zss,
4872           gen_helper_sve_ldffhsu_zss,
4873           gen_helper_sve_ldffssu_zss, } } }
4874 };
4875
4876 /* Note that we overload xs=2 to indicate 64-bit offset.  */
4877 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
4878     { { { gen_helper_sve_ldbds_zsu,
4879           gen_helper_sve_ldhds_zsu,
4880           gen_helper_sve_ldsds_zsu,
4881           NULL, },
4882         { gen_helper_sve_ldbdu_zsu,
4883           gen_helper_sve_ldhdu_zsu,
4884           gen_helper_sve_ldsdu_zsu,
4885           gen_helper_sve_ldddu_zsu, } },
4886       { { gen_helper_sve_ldbds_zss,
4887           gen_helper_sve_ldhds_zss,
4888           gen_helper_sve_ldsds_zss,
4889           NULL, },
4890         { gen_helper_sve_ldbdu_zss,
4891           gen_helper_sve_ldhdu_zss,
4892           gen_helper_sve_ldsdu_zss,
4893           gen_helper_sve_ldddu_zss, } },
4894       { { gen_helper_sve_ldbds_zd,
4895           gen_helper_sve_ldhds_zd,
4896           gen_helper_sve_ldsds_zd,
4897           NULL, },
4898         { gen_helper_sve_ldbdu_zd,
4899           gen_helper_sve_ldhdu_zd,
4900           gen_helper_sve_ldsdu_zd,
4901           gen_helper_sve_ldddu_zd, } } },
4902
4903     { { { gen_helper_sve_ldffbds_zsu,
4904           gen_helper_sve_ldffhds_zsu,
4905           gen_helper_sve_ldffsds_zsu,
4906           NULL, },
4907         { gen_helper_sve_ldffbdu_zsu,
4908           gen_helper_sve_ldffhdu_zsu,
4909           gen_helper_sve_ldffsdu_zsu,
4910           gen_helper_sve_ldffddu_zsu, } },
4911       { { gen_helper_sve_ldffbds_zss,
4912           gen_helper_sve_ldffhds_zss,
4913           gen_helper_sve_ldffsds_zss,
4914           NULL, },
4915         { gen_helper_sve_ldffbdu_zss,
4916           gen_helper_sve_ldffhdu_zss,
4917           gen_helper_sve_ldffsdu_zss,
4918           gen_helper_sve_ldffddu_zss, } },
4919       { { gen_helper_sve_ldffbds_zd,
4920           gen_helper_sve_ldffhds_zd,
4921           gen_helper_sve_ldffsds_zd,
4922           NULL, },
4923         { gen_helper_sve_ldffbdu_zd,
4924           gen_helper_sve_ldffhdu_zd,
4925           gen_helper_sve_ldffsdu_zd,
4926           gen_helper_sve_ldffddu_zd, } } }
4927 };
4928
4929 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
4930 {
4931     gen_helper_gvec_mem_scatter *fn = NULL;
4932
4933     if (!sve_access_check(s)) {
4934         return true;
4935     }
4936
4937     switch (a->esz) {
4938     case MO_32:
4939         fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
4940         break;
4941     case MO_64:
4942         fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
4943         break;
4944     }
4945     assert(fn != NULL);
4946
4947     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
4948                cpu_reg_sp(s, a->rn), fn);
4949     return true;
4950 }
4951
4952 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
4953 {
4954     gen_helper_gvec_mem_scatter *fn = NULL;
4955     TCGv_i64 imm;
4956
4957     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
4958         return false;
4959     }
4960     if (!sve_access_check(s)) {
4961         return true;
4962     }
4963
4964     switch (a->esz) {
4965     case MO_32:
4966         fn = gather_load_fn32[a->ff][0][a->u][a->msz];
4967         break;
4968     case MO_64:
4969         fn = gather_load_fn64[a->ff][2][a->u][a->msz];
4970         break;
4971     }
4972     assert(fn != NULL);
4973
4974     /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
4975      * by loading the immediate into the scalar parameter.
4976      */
4977     imm = tcg_const_i64(a->imm << a->msz);
4978     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
4979     tcg_temp_free_i64(imm);
4980     return true;
4981 }
4982
4983 /* Indexed by [xs][msz].  */
4984 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = {
4985     { gen_helper_sve_stbs_zsu,
4986       gen_helper_sve_sths_zsu,
4987       gen_helper_sve_stss_zsu, },
4988     { gen_helper_sve_stbs_zss,
4989       gen_helper_sve_sths_zss,
4990       gen_helper_sve_stss_zss, },
4991 };
4992
4993 /* Note that we overload xs=2 to indicate 64-bit offset.  */
4994 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = {
4995     { gen_helper_sve_stbd_zsu,
4996       gen_helper_sve_sthd_zsu,
4997       gen_helper_sve_stsd_zsu,
4998       gen_helper_sve_stdd_zsu, },
4999     { gen_helper_sve_stbd_zss,
5000       gen_helper_sve_sthd_zss,
5001       gen_helper_sve_stsd_zss,
5002       gen_helper_sve_stdd_zss, },
5003     { gen_helper_sve_stbd_zd,
5004       gen_helper_sve_sthd_zd,
5005       gen_helper_sve_stsd_zd,
5006       gen_helper_sve_stdd_zd, },
5007 };
5008
5009 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
5010 {
5011     gen_helper_gvec_mem_scatter *fn;
5012
5013     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5014         return false;
5015     }
5016     if (!sve_access_check(s)) {
5017         return true;
5018     }
5019     switch (a->esz) {
5020     case MO_32:
5021         fn = scatter_store_fn32[a->xs][a->msz];
5022         break;
5023     case MO_64:
5024         fn = scatter_store_fn64[a->xs][a->msz];
5025         break;
5026     default:
5027         g_assert_not_reached();
5028     }
5029     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5030                cpu_reg_sp(s, a->rn), fn);
5031     return true;
5032 }
5033
5034 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
5035 {
5036     gen_helper_gvec_mem_scatter *fn = NULL;
5037     TCGv_i64 imm;
5038
5039     if (a->esz < a->msz) {
5040         return false;
5041     }
5042     if (!sve_access_check(s)) {
5043         return true;
5044     }
5045
5046     switch (a->esz) {
5047     case MO_32:
5048         fn = scatter_store_fn32[0][a->msz];
5049         break;
5050     case MO_64:
5051         fn = scatter_store_fn64[2][a->msz];
5052         break;
5053     }
5054     assert(fn != NULL);
5055
5056     /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5057      * by loading the immediate into the scalar parameter.
5058      */
5059     imm = tcg_const_i64(a->imm << a->msz);
5060     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
5061     tcg_temp_free_i64(imm);
5062     return true;
5063 }
5064
5065 /*
5066  * Prefetches
5067  */
5068
5069 static bool trans_PRF(DisasContext *s, arg_PRF *a, uint32_t insn)
5070 {
5071     /* Prefetch is a nop within QEMU.  */
5072     sve_access_check(s);
5073     return true;
5074 }
5075
5076 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a, uint32_t insn)
5077 {
5078     if (a->rm == 31) {
5079         return false;
5080     }
5081     /* Prefetch is a nop within QEMU.  */
5082     sve_access_check(s);
5083     return true;
5084 }
5085
5086 /*
5087  * Move Prefix
5088  *
5089  * TODO: The implementation so far could handle predicated merging movprfx.
5090  * The helper functions as written take an extra source register to
5091  * use in the operation, but the result is only written when predication
5092  * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
5093  * to allow the final write back to the destination to be unconditional.
5094  * For predicated zeroing movprfx, we need to rearrange the helpers to
5095  * allow the final write back to zero inactives.
5096  *
5097  * In the meantime, just emit the moves.
5098  */
5099
5100 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a, uint32_t insn)
5101 {
5102     return do_mov_z(s, a->rd, a->rn);
5103 }
5104
5105 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5106 {
5107     if (sve_access_check(s)) {
5108         do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5109     }
5110     return true;
5111 }
5112
5113 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5114 {
5115     if (sve_access_check(s)) {
5116         do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5117     }
5118     return true;
5119 }