target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35
  36
  37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  38                          TCGv_i64, uint32_t, uint32_t);
  39
  40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  41                                      TCGv_ptr, TCGv_i32);
  42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  43                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  44
  45 /*
  46  * Helpers for extracting complex instruction fields.
  47  */
  48
  49 /* See e.g. ASR (immediate, predicated).
  50  * Returns -1 for unallocated encoding; diagnose later.
  51  */
  52 static int tszimm_esz(int x)
  53 {
  54     x >>= 3;  /* discard imm3 */
  55     return 31 - clz32(x);
  56 }
  57
  58 static int tszimm_shr(int x)
  59 {
  60     return (16 << tszimm_esz(x)) - x;
  61 }
  62
  63 /* See e.g. LSL (immediate, predicated).  */
  64 static int tszimm_shl(int x)
  65 {
  66     return x - (8 << tszimm_esz(x));
  67 }
  68
  69 static inline int plus1(int x)
  70 {
  71     return x + 1;
  72 }
  73
  74 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  75 static inline int expand_imm_sh8s(int x)
  76 {
  77     return (int8_t)x << (x & 0x100 ? 8 : 0);
  78 }
  79
  80 static inline int expand_imm_sh8u(int x)
  81 {
  82     return (uint8_t)x << (x & 0x100 ? 8 : 0);
  83 }
  84
  85 /*
  86  * Include the generated decoder.
  87  */
  88
  89 #include "decode-sve.inc.c"
  90
  91 /*
  92  * Implement all of the translator functions referenced by the decoder.
  93  */
  94
  95 /* Return the offset info CPUARMState of the predicate vector register Pn.
  96  * Note for this purpose, FFR is P16.
  97  */
  98 static inline int pred_full_reg_offset(DisasContext *s, int regno)
  99 {
 100     return offsetof(CPUARMState, vfp.pregs[regno]);
 101 }
 102
 103 /* Return the byte size of the whole predicate register, VL / 64.  */
 104 static inline int pred_full_reg_size(DisasContext *s)
 105 {
 106     return s->sve_len >> 3;
 107 }
 108
 109 /* Round up the size of a register to a size allowed by
 110  * the tcg vector infrastructure.  Any operation which uses this
 111  * size may assume that the bits above pred_full_reg_size are zero,
 112  * and must leave them the same way.
 113  *
 114  * Note that this is not needed for the vector registers as they
 115  * are always properly sized for tcg vectors.
 116  */
 117 static int size_for_gvec(int size)
 118 {
 119     if (size <= 8) {
 120         return 8;
 121     } else {
 122         return QEMU_ALIGN_UP(size, 16);
 123     }
 124 }
 125
 126 static int pred_gvec_reg_size(DisasContext *s)
 127 {
 128     return size_for_gvec(pred_full_reg_size(s));
 129 }
 130
 131 /* Invoke a vector expander on two Zregs.  */
 132 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 133                          int esz, int rd, int rn)
 134 {
 135     if (sve_access_check(s)) {
 136         unsigned vsz = vec_full_reg_size(s);
 137         gvec_fn(esz, vec_full_reg_offset(s, rd),
 138                 vec_full_reg_offset(s, rn), vsz, vsz);
 139     }
 140     return true;
 141 }
 142
 143 /* Invoke a vector expander on three Zregs.  */
 144 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 145                          int esz, int rd, int rn, int rm)
 146 {
 147     if (sve_access_check(s)) {
 148         unsigned vsz = vec_full_reg_size(s);
 149         gvec_fn(esz, vec_full_reg_offset(s, rd),
 150                 vec_full_reg_offset(s, rn),
 151                 vec_full_reg_offset(s, rm), vsz, vsz);
 152     }
 153     return true;
 154 }
 155
 156 /* Invoke a vector move on two Zregs.  */
 157 static bool do_mov_z(DisasContext *s, int rd, int rn)
 158 {
 159     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 160 }
 161
 162 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 163 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 164 {
 165     unsigned vsz = vec_full_reg_size(s);
 166     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 167 }
 168
 169 /* Invoke a vector expander on two Pregs.  */
 170 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 171                          int esz, int rd, int rn)
 172 {
 173     if (sve_access_check(s)) {
 174         unsigned psz = pred_gvec_reg_size(s);
 175         gvec_fn(esz, pred_full_reg_offset(s, rd),
 176                 pred_full_reg_offset(s, rn), psz, psz);
 177     }
 178     return true;
 179 }
 180
 181 /* Invoke a vector expander on three Pregs.  */
 182 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 183                          int esz, int rd, int rn, int rm)
 184 {
 185     if (sve_access_check(s)) {
 186         unsigned psz = pred_gvec_reg_size(s);
 187         gvec_fn(esz, pred_full_reg_offset(s, rd),
 188                 pred_full_reg_offset(s, rn),
 189                 pred_full_reg_offset(s, rm), psz, psz);
 190     }
 191     return true;
 192 }
 193
 194 /* Invoke a vector operation on four Pregs.  */
 195 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 196                         int rd, int rn, int rm, int rg)
 197 {
 198     if (sve_access_check(s)) {
 199         unsigned psz = pred_gvec_reg_size(s);
 200         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 201                        pred_full_reg_offset(s, rn),
 202                        pred_full_reg_offset(s, rm),
 203                        pred_full_reg_offset(s, rg),
 204                        psz, psz, gvec_op);
 205     }
 206     return true;
 207 }
 208
 209 /* Invoke a vector move on two Pregs.  */
 210 static bool do_mov_p(DisasContext *s, int rd, int rn)
 211 {
 212     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 213 }
 214
 215 /* Set the cpu flags as per a return from an SVE helper.  */
 216 static void do_pred_flags(TCGv_i32 t)
 217 {
 218     tcg_gen_mov_i32(cpu_NF, t);
 219     tcg_gen_andi_i32(cpu_ZF, t, 2);
 220     tcg_gen_andi_i32(cpu_CF, t, 1);
 221     tcg_gen_movi_i32(cpu_VF, 0);
 222 }
 223
 224 /* Subroutines computing the ARM PredTest psuedofunction.  */
 225 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 226 {
 227     TCGv_i32 t = tcg_temp_new_i32();
 228
 229     gen_helper_sve_predtest1(t, d, g);
 230     do_pred_flags(t);
 231     tcg_temp_free_i32(t);
 232 }
 233
 234 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 235 {
 236     TCGv_ptr dptr = tcg_temp_new_ptr();
 237     TCGv_ptr gptr = tcg_temp_new_ptr();
 238     TCGv_i32 t;
 239
 240     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 241     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 242     t = tcg_const_i32(words);
 243
 244     gen_helper_sve_predtest(t, dptr, gptr, t);
 245     tcg_temp_free_ptr(dptr);
 246     tcg_temp_free_ptr(gptr);
 247
 248     do_pred_flags(t);
 249     tcg_temp_free_i32(t);
 250 }
 251
 252 /* For each element size, the bits within a predicate word that are active.  */
 253 const uint64_t pred_esz_masks[4] = {
 254     0xffffffffffffffffull, 0x5555555555555555ull,
 255     0x1111111111111111ull, 0x0101010101010101ull
 256 };
 257
 258 /*
 259  *** SVE Logical - Unpredicated Group
 260  */
 261
 262 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 263 {
 264     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 265 }
 266
 267 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 268 {
 269     if (a->rn == a->rm) { /* MOV */
 270         return do_mov_z(s, a->rd, a->rn);
 271     } else {
 272         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 273     }
 274 }
 275
 276 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 279 }
 280
 281 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 282 {
 283     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 284 }
 285
 286 /*
 287  *** SVE Integer Arithmetic - Unpredicated Group
 288  */
 289
 290 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 291 {
 292     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 293 }
 294
 295 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 296 {
 297     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 298 }
 299
 300 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 301 {
 302     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 303 }
 304
 305 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 306 {
 307     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 308 }
 309
 310 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 311 {
 312     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 313 }
 314
 315 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 316 {
 317     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 318 }
 319
 320 /*
 321  *** SVE Integer Arithmetic - Binary Predicated Group
 322  */
 323
 324 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 325 {
 326     unsigned vsz = vec_full_reg_size(s);
 327     if (fn == NULL) {
 328         return false;
 329     }
 330     if (sve_access_check(s)) {
 331         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 332                            vec_full_reg_offset(s, a->rn),
 333                            vec_full_reg_offset(s, a->rm),
 334                            pred_full_reg_offset(s, a->pg),
 335                            vsz, vsz, 0, fn);
 336     }
 337     return true;
 338 }
 339
 340 #define DO_ZPZZ(NAME, name) \
 341 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 342                                 uint32_t insn)                            \
 343 {                                                                         \
 344     static gen_helper_gvec_4 * const fns[4] = {                           \
 345         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 346         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 347     };                                                                    \
 348     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 349 }
 350
 351 DO_ZPZZ(AND, and)
 352 DO_ZPZZ(EOR, eor)
 353 DO_ZPZZ(ORR, orr)
 354 DO_ZPZZ(BIC, bic)
 355
 356 DO_ZPZZ(ADD, add)
 357 DO_ZPZZ(SUB, sub)
 358
 359 DO_ZPZZ(SMAX, smax)
 360 DO_ZPZZ(UMAX, umax)
 361 DO_ZPZZ(SMIN, smin)
 362 DO_ZPZZ(UMIN, umin)
 363 DO_ZPZZ(SABD, sabd)
 364 DO_ZPZZ(UABD, uabd)
 365
 366 DO_ZPZZ(MUL, mul)
 367 DO_ZPZZ(SMULH, smulh)
 368 DO_ZPZZ(UMULH, umulh)
 369
 370 DO_ZPZZ(ASR, asr)
 371 DO_ZPZZ(LSR, lsr)
 372 DO_ZPZZ(LSL, lsl)
 373
 374 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 375 {
 376     static gen_helper_gvec_4 * const fns[4] = {
 377         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 378     };
 379     return do_zpzz_ool(s, a, fns[a->esz]);
 380 }
 381
 382 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 383 {
 384     static gen_helper_gvec_4 * const fns[4] = {
 385         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 386     };
 387     return do_zpzz_ool(s, a, fns[a->esz]);
 388 }
 389
 390 DO_ZPZZ(SEL, sel)
 391
 392 #undef DO_ZPZZ
 393
 394 /*
 395  *** SVE Integer Arithmetic - Unary Predicated Group
 396  */
 397
 398 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 399 {
 400     if (fn == NULL) {
 401         return false;
 402     }
 403     if (sve_access_check(s)) {
 404         unsigned vsz = vec_full_reg_size(s);
 405         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 406                            vec_full_reg_offset(s, a->rn),
 407                            pred_full_reg_offset(s, a->pg),
 408                            vsz, vsz, 0, fn);
 409     }
 410     return true;
 411 }
 412
 413 #define DO_ZPZ(NAME, name) \
 414 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 415 {                                                                   \
 416     static gen_helper_gvec_3 * const fns[4] = {                     \
 417         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 418         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 419     };                                                              \
 420     return do_zpz_ool(s, a, fns[a->esz]);                           \
 421 }
 422
 423 DO_ZPZ(CLS, cls)
 424 DO_ZPZ(CLZ, clz)
 425 DO_ZPZ(CNT_zpz, cnt_zpz)
 426 DO_ZPZ(CNOT, cnot)
 427 DO_ZPZ(NOT_zpz, not_zpz)
 428 DO_ZPZ(ABS, abs)
 429 DO_ZPZ(NEG, neg)
 430
 431 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 432 {
 433     static gen_helper_gvec_3 * const fns[4] = {
 434         NULL,
 435         gen_helper_sve_fabs_h,
 436         gen_helper_sve_fabs_s,
 437         gen_helper_sve_fabs_d
 438     };
 439     return do_zpz_ool(s, a, fns[a->esz]);
 440 }
 441
 442 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 443 {
 444     static gen_helper_gvec_3 * const fns[4] = {
 445         NULL,
 446         gen_helper_sve_fneg_h,
 447         gen_helper_sve_fneg_s,
 448         gen_helper_sve_fneg_d
 449     };
 450     return do_zpz_ool(s, a, fns[a->esz]);
 451 }
 452
 453 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 454 {
 455     static gen_helper_gvec_3 * const fns[4] = {
 456         NULL,
 457         gen_helper_sve_sxtb_h,
 458         gen_helper_sve_sxtb_s,
 459         gen_helper_sve_sxtb_d
 460     };
 461     return do_zpz_ool(s, a, fns[a->esz]);
 462 }
 463
 464 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 465 {
 466     static gen_helper_gvec_3 * const fns[4] = {
 467         NULL,
 468         gen_helper_sve_uxtb_h,
 469         gen_helper_sve_uxtb_s,
 470         gen_helper_sve_uxtb_d
 471     };
 472     return do_zpz_ool(s, a, fns[a->esz]);
 473 }
 474
 475 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 476 {
 477     static gen_helper_gvec_3 * const fns[4] = {
 478         NULL, NULL,
 479         gen_helper_sve_sxth_s,
 480         gen_helper_sve_sxth_d
 481     };
 482     return do_zpz_ool(s, a, fns[a->esz]);
 483 }
 484
 485 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 486 {
 487     static gen_helper_gvec_3 * const fns[4] = {
 488         NULL, NULL,
 489         gen_helper_sve_uxth_s,
 490         gen_helper_sve_uxth_d
 491     };
 492     return do_zpz_ool(s, a, fns[a->esz]);
 493 }
 494
 495 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 496 {
 497     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 498 }
 499
 500 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 501 {
 502     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 503 }
 504
 505 #undef DO_ZPZ
 506
 507 /*
 508  *** SVE Integer Reduction Group
 509  */
 510
 511 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 512 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 513                        gen_helper_gvec_reduc *fn)
 514 {
 515     unsigned vsz = vec_full_reg_size(s);
 516     TCGv_ptr t_zn, t_pg;
 517     TCGv_i32 desc;
 518     TCGv_i64 temp;
 519
 520     if (fn == NULL) {
 521         return false;
 522     }
 523     if (!sve_access_check(s)) {
 524         return true;
 525     }
 526
 527     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 528     temp = tcg_temp_new_i64();
 529     t_zn = tcg_temp_new_ptr();
 530     t_pg = tcg_temp_new_ptr();
 531
 532     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 533     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 534     fn(temp, t_zn, t_pg, desc);
 535     tcg_temp_free_ptr(t_zn);
 536     tcg_temp_free_ptr(t_pg);
 537     tcg_temp_free_i32(desc);
 538
 539     write_fp_dreg(s, a->rd, temp);
 540     tcg_temp_free_i64(temp);
 541     return true;
 542 }
 543
 544 #define DO_VPZ(NAME, name) \
 545 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 546 {                                                                        \
 547     static gen_helper_gvec_reduc * const fns[4] = {                      \
 548         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 549         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 550     };                                                                   \
 551     return do_vpz_ool(s, a, fns[a->esz]);                                \
 552 }
 553
 554 DO_VPZ(ORV, orv)
 555 DO_VPZ(ANDV, andv)
 556 DO_VPZ(EORV, eorv)
 557
 558 DO_VPZ(UADDV, uaddv)
 559 DO_VPZ(SMAXV, smaxv)
 560 DO_VPZ(UMAXV, umaxv)
 561 DO_VPZ(SMINV, sminv)
 562 DO_VPZ(UMINV, uminv)
 563
 564 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 565 {
 566     static gen_helper_gvec_reduc * const fns[4] = {
 567         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 568         gen_helper_sve_saddv_s, NULL
 569     };
 570     return do_vpz_ool(s, a, fns[a->esz]);
 571 }
 572
 573 #undef DO_VPZ
 574
 575 /*
 576  *** SVE Shift by Immediate - Predicated Group
 577  */
 578
 579 /* Store zero into every active element of Zd.  We will use this for two
 580  * and three-operand predicated instructions for which logic dictates a
 581  * zero result.
 582  */
 583 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 584 {
 585     static gen_helper_gvec_2 * const fns[4] = {
 586         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 587         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 588     };
 589     if (sve_access_check(s)) {
 590         unsigned vsz = vec_full_reg_size(s);
 591         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 592                            pred_full_reg_offset(s, pg),
 593                            vsz, vsz, 0, fns[esz]);
 594     }
 595     return true;
 596 }
 597
 598 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 599                         gen_helper_gvec_3 *fn)
 600 {
 601     if (sve_access_check(s)) {
 602         unsigned vsz = vec_full_reg_size(s);
 603         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 604                            vec_full_reg_offset(s, a->rn),
 605                            pred_full_reg_offset(s, a->pg),
 606                            vsz, vsz, a->imm, fn);
 607     }
 608     return true;
 609 }
 610
 611 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 612 {
 613     static gen_helper_gvec_3 * const fns[4] = {
 614         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 615         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 616     };
 617     if (a->esz < 0) {
 618         /* Invalid tsz encoding -- see tszimm_esz. */
 619         return false;
 620     }
 621     /* Shift by element size is architecturally valid.  For
 622        arithmetic right-shift, it's the same as by one less. */
 623     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 624     return do_zpzi_ool(s, a, fns[a->esz]);
 625 }
 626
 627 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 628 {
 629     static gen_helper_gvec_3 * const fns[4] = {
 630         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 631         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 632     };
 633     if (a->esz < 0) {
 634         return false;
 635     }
 636     /* Shift by element size is architecturally valid.
 637        For logical shifts, it is a zeroing operation.  */
 638     if (a->imm >= (8 << a->esz)) {
 639         return do_clr_zp(s, a->rd, a->pg, a->esz);
 640     } else {
 641         return do_zpzi_ool(s, a, fns[a->esz]);
 642     }
 643 }
 644
 645 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 646 {
 647     static gen_helper_gvec_3 * const fns[4] = {
 648         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 649         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 650     };
 651     if (a->esz < 0) {
 652         return false;
 653     }
 654     /* Shift by element size is architecturally valid.
 655        For logical shifts, it is a zeroing operation.  */
 656     if (a->imm >= (8 << a->esz)) {
 657         return do_clr_zp(s, a->rd, a->pg, a->esz);
 658     } else {
 659         return do_zpzi_ool(s, a, fns[a->esz]);
 660     }
 661 }
 662
 663 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 664 {
 665     static gen_helper_gvec_3 * const fns[4] = {
 666         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 667         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 668     };
 669     if (a->esz < 0) {
 670         return false;
 671     }
 672     /* Shift by element size is architecturally valid.  For arithmetic
 673        right shift for division, it is a zeroing operation.  */
 674     if (a->imm >= (8 << a->esz)) {
 675         return do_clr_zp(s, a->rd, a->pg, a->esz);
 676     } else {
 677         return do_zpzi_ool(s, a, fns[a->esz]);
 678     }
 679 }
 680
 681 /*
 682  *** SVE Bitwise Shift - Predicated Group
 683  */
 684
 685 #define DO_ZPZW(NAME, name) \
 686 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 687                                 uint32_t insn)                            \
 688 {                                                                         \
 689     static gen_helper_gvec_4 * const fns[3] = {                           \
 690         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 691         gen_helper_sve_##name##_zpzw_s,                                   \
 692     };                                                                    \
 693     if (a->esz < 0 || a->esz >= 3) {                                      \
 694         return false;                                                     \
 695     }                                                                     \
 696     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 697 }
 698
 699 DO_ZPZW(ASR, asr)
 700 DO_ZPZW(LSR, lsr)
 701 DO_ZPZW(LSL, lsl)
 702
 703 #undef DO_ZPZW
 704
 705 /*
 706  *** SVE Bitwise Shift - Unpredicated Group
 707  */
 708
 709 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 710                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 711                                          int64_t, uint32_t, uint32_t))
 712 {
 713     if (a->esz < 0) {
 714         /* Invalid tsz encoding -- see tszimm_esz. */
 715         return false;
 716     }
 717     if (sve_access_check(s)) {
 718         unsigned vsz = vec_full_reg_size(s);
 719         /* Shift by element size is architecturally valid.  For
 720            arithmetic right-shift, it's the same as by one less.
 721            Otherwise it is a zeroing operation.  */
 722         if (a->imm >= 8 << a->esz) {
 723             if (asr) {
 724                 a->imm = (8 << a->esz) - 1;
 725             } else {
 726                 do_dupi_z(s, a->rd, 0);
 727                 return true;
 728             }
 729         }
 730         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 731                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 732     }
 733     return true;
 734 }
 735
 736 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 737 {
 738     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 739 }
 740
 741 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 742 {
 743     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 744 }
 745
 746 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 747 {
 748     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 749 }
 750
 751 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 752 {
 753     if (fn == NULL) {
 754         return false;
 755     }
 756     if (sve_access_check(s)) {
 757         unsigned vsz = vec_full_reg_size(s);
 758         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 759                            vec_full_reg_offset(s, a->rn),
 760                            vec_full_reg_offset(s, a->rm),
 761                            vsz, vsz, 0, fn);
 762     }
 763     return true;
 764 }
 765
 766 #define DO_ZZW(NAME, name) \
 767 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a,           \
 768                                uint32_t insn)                             \
 769 {                                                                         \
 770     static gen_helper_gvec_3 * const fns[4] = {                           \
 771         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 772         gen_helper_sve_##name##_zzw_s, NULL                               \
 773     };                                                                    \
 774     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 775 }
 776
 777 DO_ZZW(ASR, asr)
 778 DO_ZZW(LSR, lsr)
 779 DO_ZZW(LSL, lsl)
 780
 781 #undef DO_ZZW
 782
 783 /*
 784  *** SVE Integer Multiply-Add Group
 785  */
 786
 787 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 788                          gen_helper_gvec_5 *fn)
 789 {
 790     if (sve_access_check(s)) {
 791         unsigned vsz = vec_full_reg_size(s);
 792         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 793                            vec_full_reg_offset(s, a->ra),
 794                            vec_full_reg_offset(s, a->rn),
 795                            vec_full_reg_offset(s, a->rm),
 796                            pred_full_reg_offset(s, a->pg),
 797                            vsz, vsz, 0, fn);
 798     }
 799     return true;
 800 }
 801
 802 #define DO_ZPZZZ(NAME, name) \
 803 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 804 {                                                                    \
 805     static gen_helper_gvec_5 * const fns[4] = {                      \
 806         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 807         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 808     };                                                               \
 809     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 810 }
 811
 812 DO_ZPZZZ(MLA, mla)
 813 DO_ZPZZZ(MLS, mls)
 814
 815 #undef DO_ZPZZZ
 816
 817 /*
 818  *** SVE Index Generation Group
 819  */
 820
 821 static void do_index(DisasContext *s, int esz, int rd,
 822                      TCGv_i64 start, TCGv_i64 incr)
 823 {
 824     unsigned vsz = vec_full_reg_size(s);
 825     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 826     TCGv_ptr t_zd = tcg_temp_new_ptr();
 827
 828     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 829     if (esz == 3) {
 830         gen_helper_sve_index_d(t_zd, start, incr, desc);
 831     } else {
 832         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 833         static index_fn * const fns[3] = {
 834             gen_helper_sve_index_b,
 835             gen_helper_sve_index_h,
 836             gen_helper_sve_index_s,
 837         };
 838         TCGv_i32 s32 = tcg_temp_new_i32();
 839         TCGv_i32 i32 = tcg_temp_new_i32();
 840
 841         tcg_gen_extrl_i64_i32(s32, start);
 842         tcg_gen_extrl_i64_i32(i32, incr);
 843         fns[esz](t_zd, s32, i32, desc);
 844
 845         tcg_temp_free_i32(s32);
 846         tcg_temp_free_i32(i32);
 847     }
 848     tcg_temp_free_ptr(t_zd);
 849     tcg_temp_free_i32(desc);
 850 }
 851
 852 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
 853 {
 854     if (sve_access_check(s)) {
 855         TCGv_i64 start = tcg_const_i64(a->imm1);
 856         TCGv_i64 incr = tcg_const_i64(a->imm2);
 857         do_index(s, a->esz, a->rd, start, incr);
 858         tcg_temp_free_i64(start);
 859         tcg_temp_free_i64(incr);
 860     }
 861     return true;
 862 }
 863
 864 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
 865 {
 866     if (sve_access_check(s)) {
 867         TCGv_i64 start = tcg_const_i64(a->imm);
 868         TCGv_i64 incr = cpu_reg(s, a->rm);
 869         do_index(s, a->esz, a->rd, start, incr);
 870         tcg_temp_free_i64(start);
 871     }
 872     return true;
 873 }
 874
 875 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
 876 {
 877     if (sve_access_check(s)) {
 878         TCGv_i64 start = cpu_reg(s, a->rn);
 879         TCGv_i64 incr = tcg_const_i64(a->imm);
 880         do_index(s, a->esz, a->rd, start, incr);
 881         tcg_temp_free_i64(incr);
 882     }
 883     return true;
 884 }
 885
 886 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
 887 {
 888     if (sve_access_check(s)) {
 889         TCGv_i64 start = cpu_reg(s, a->rn);
 890         TCGv_i64 incr = cpu_reg(s, a->rm);
 891         do_index(s, a->esz, a->rd, start, incr);
 892     }
 893     return true;
 894 }
 895
 896 /*
 897  *** SVE Stack Allocation Group
 898  */
 899
 900 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
 901 {
 902     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 903     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 904     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 905     return true;
 906 }
 907
 908 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
 909 {
 910     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 911     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 912     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 913     return true;
 914 }
 915
 916 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
 917 {
 918     TCGv_i64 reg = cpu_reg(s, a->rd);
 919     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 920     return true;
 921 }
 922
 923 /*
 924  *** SVE Compute Vector Address Group
 925  */
 926
 927 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 928 {
 929     if (sve_access_check(s)) {
 930         unsigned vsz = vec_full_reg_size(s);
 931         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 932                            vec_full_reg_offset(s, a->rn),
 933                            vec_full_reg_offset(s, a->rm),
 934                            vsz, vsz, a->imm, fn);
 935     }
 936     return true;
 937 }
 938
 939 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
 940 {
 941     return do_adr(s, a, gen_helper_sve_adr_p32);
 942 }
 943
 944 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
 945 {
 946     return do_adr(s, a, gen_helper_sve_adr_p64);
 947 }
 948
 949 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
 950 {
 951     return do_adr(s, a, gen_helper_sve_adr_s32);
 952 }
 953
 954 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
 955 {
 956     return do_adr(s, a, gen_helper_sve_adr_u32);
 957 }
 958
 959 /*
 960  *** SVE Integer Misc - Unpredicated Group
 961  */
 962
 963 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
 964 {
 965     static gen_helper_gvec_2 * const fns[4] = {
 966         NULL,
 967         gen_helper_sve_fexpa_h,
 968         gen_helper_sve_fexpa_s,
 969         gen_helper_sve_fexpa_d,
 970     };
 971     if (a->esz == 0) {
 972         return false;
 973     }
 974     if (sve_access_check(s)) {
 975         unsigned vsz = vec_full_reg_size(s);
 976         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
 977                            vec_full_reg_offset(s, a->rn),
 978                            vsz, vsz, 0, fns[a->esz]);
 979     }
 980     return true;
 981 }
 982
 983 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 984 {
 985     static gen_helper_gvec_3 * const fns[4] = {
 986         NULL,
 987         gen_helper_sve_ftssel_h,
 988         gen_helper_sve_ftssel_s,
 989         gen_helper_sve_ftssel_d,
 990     };
 991     if (a->esz == 0) {
 992         return false;
 993     }
 994     if (sve_access_check(s)) {
 995         unsigned vsz = vec_full_reg_size(s);
 996         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 997                            vec_full_reg_offset(s, a->rn),
 998                            vec_full_reg_offset(s, a->rm),
 999                            vsz, vsz, 0, fns[a->esz]);
1000     }
1001     return true;
1002 }
1003
1004 /*
1005  *** SVE Predicate Logical Operations Group
1006  */
1007
1008 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1009                           const GVecGen4 *gvec_op)
1010 {
1011     if (!sve_access_check(s)) {
1012         return true;
1013     }
1014
1015     unsigned psz = pred_gvec_reg_size(s);
1016     int dofs = pred_full_reg_offset(s, a->rd);
1017     int nofs = pred_full_reg_offset(s, a->rn);
1018     int mofs = pred_full_reg_offset(s, a->rm);
1019     int gofs = pred_full_reg_offset(s, a->pg);
1020
1021     if (psz == 8) {
1022         /* Do the operation and the flags generation in temps.  */
1023         TCGv_i64 pd = tcg_temp_new_i64();
1024         TCGv_i64 pn = tcg_temp_new_i64();
1025         TCGv_i64 pm = tcg_temp_new_i64();
1026         TCGv_i64 pg = tcg_temp_new_i64();
1027
1028         tcg_gen_ld_i64(pn, cpu_env, nofs);
1029         tcg_gen_ld_i64(pm, cpu_env, mofs);
1030         tcg_gen_ld_i64(pg, cpu_env, gofs);
1031
1032         gvec_op->fni8(pd, pn, pm, pg);
1033         tcg_gen_st_i64(pd, cpu_env, dofs);
1034
1035         do_predtest1(pd, pg);
1036
1037         tcg_temp_free_i64(pd);
1038         tcg_temp_free_i64(pn);
1039         tcg_temp_free_i64(pm);
1040         tcg_temp_free_i64(pg);
1041     } else {
1042         /* The operation and flags generation is large.  The computation
1043          * of the flags depends on the original contents of the guarding
1044          * predicate.  If the destination overwrites the guarding predicate,
1045          * then the easiest way to get this right is to save a copy.
1046           */
1047         int tofs = gofs;
1048         if (a->rd == a->pg) {
1049             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1050             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1051         }
1052
1053         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1054         do_predtest(s, dofs, tofs, psz / 8);
1055     }
1056     return true;
1057 }
1058
1059 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1060 {
1061     tcg_gen_and_i64(pd, pn, pm);
1062     tcg_gen_and_i64(pd, pd, pg);
1063 }
1064
1065 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1066                            TCGv_vec pm, TCGv_vec pg)
1067 {
1068     tcg_gen_and_vec(vece, pd, pn, pm);
1069     tcg_gen_and_vec(vece, pd, pd, pg);
1070 }
1071
1072 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1073 {
1074     static const GVecGen4 op = {
1075         .fni8 = gen_and_pg_i64,
1076         .fniv = gen_and_pg_vec,
1077         .fno = gen_helper_sve_and_pppp,
1078         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1079     };
1080     if (a->s) {
1081         return do_pppp_flags(s, a, &op);
1082     } else if (a->rn == a->rm) {
1083         if (a->pg == a->rn) {
1084             return do_mov_p(s, a->rd, a->rn);
1085         } else {
1086             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1087         }
1088     } else if (a->pg == a->rn || a->pg == a->rm) {
1089         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1090     } else {
1091         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1092     }
1093 }
1094
1095 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1096 {
1097     tcg_gen_andc_i64(pd, pn, pm);
1098     tcg_gen_and_i64(pd, pd, pg);
1099 }
1100
1101 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1102                            TCGv_vec pm, TCGv_vec pg)
1103 {
1104     tcg_gen_andc_vec(vece, pd, pn, pm);
1105     tcg_gen_and_vec(vece, pd, pd, pg);
1106 }
1107
1108 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1109 {
1110     static const GVecGen4 op = {
1111         .fni8 = gen_bic_pg_i64,
1112         .fniv = gen_bic_pg_vec,
1113         .fno = gen_helper_sve_bic_pppp,
1114         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1115     };
1116     if (a->s) {
1117         return do_pppp_flags(s, a, &op);
1118     } else if (a->pg == a->rn) {
1119         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1120     } else {
1121         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1122     }
1123 }
1124
1125 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1126 {
1127     tcg_gen_xor_i64(pd, pn, pm);
1128     tcg_gen_and_i64(pd, pd, pg);
1129 }
1130
1131 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1132                            TCGv_vec pm, TCGv_vec pg)
1133 {
1134     tcg_gen_xor_vec(vece, pd, pn, pm);
1135     tcg_gen_and_vec(vece, pd, pd, pg);
1136 }
1137
1138 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1139 {
1140     static const GVecGen4 op = {
1141         .fni8 = gen_eor_pg_i64,
1142         .fniv = gen_eor_pg_vec,
1143         .fno = gen_helper_sve_eor_pppp,
1144         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1145     };
1146     if (a->s) {
1147         return do_pppp_flags(s, a, &op);
1148     } else {
1149         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1150     }
1151 }
1152
1153 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1154 {
1155     tcg_gen_and_i64(pn, pn, pg);
1156     tcg_gen_andc_i64(pm, pm, pg);
1157     tcg_gen_or_i64(pd, pn, pm);
1158 }
1159
1160 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1161                            TCGv_vec pm, TCGv_vec pg)
1162 {
1163     tcg_gen_and_vec(vece, pn, pn, pg);
1164     tcg_gen_andc_vec(vece, pm, pm, pg);
1165     tcg_gen_or_vec(vece, pd, pn, pm);
1166 }
1167
1168 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1169 {
1170     static const GVecGen4 op = {
1171         .fni8 = gen_sel_pg_i64,
1172         .fniv = gen_sel_pg_vec,
1173         .fno = gen_helper_sve_sel_pppp,
1174         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1175     };
1176     if (a->s) {
1177         return false;
1178     } else {
1179         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1180     }
1181 }
1182
1183 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1184 {
1185     tcg_gen_or_i64(pd, pn, pm);
1186     tcg_gen_and_i64(pd, pd, pg);
1187 }
1188
1189 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1190                            TCGv_vec pm, TCGv_vec pg)
1191 {
1192     tcg_gen_or_vec(vece, pd, pn, pm);
1193     tcg_gen_and_vec(vece, pd, pd, pg);
1194 }
1195
1196 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1197 {
1198     static const GVecGen4 op = {
1199         .fni8 = gen_orr_pg_i64,
1200         .fniv = gen_orr_pg_vec,
1201         .fno = gen_helper_sve_orr_pppp,
1202         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1203     };
1204     if (a->s) {
1205         return do_pppp_flags(s, a, &op);
1206     } else if (a->pg == a->rn && a->rn == a->rm) {
1207         return do_mov_p(s, a->rd, a->rn);
1208     } else {
1209         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1210     }
1211 }
1212
1213 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1214 {
1215     tcg_gen_orc_i64(pd, pn, pm);
1216     tcg_gen_and_i64(pd, pd, pg);
1217 }
1218
1219 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1220                            TCGv_vec pm, TCGv_vec pg)
1221 {
1222     tcg_gen_orc_vec(vece, pd, pn, pm);
1223     tcg_gen_and_vec(vece, pd, pd, pg);
1224 }
1225
1226 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1227 {
1228     static const GVecGen4 op = {
1229         .fni8 = gen_orn_pg_i64,
1230         .fniv = gen_orn_pg_vec,
1231         .fno = gen_helper_sve_orn_pppp,
1232         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1233     };
1234     if (a->s) {
1235         return do_pppp_flags(s, a, &op);
1236     } else {
1237         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1238     }
1239 }
1240
1241 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1242 {
1243     tcg_gen_or_i64(pd, pn, pm);
1244     tcg_gen_andc_i64(pd, pg, pd);
1245 }
1246
1247 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1248                            TCGv_vec pm, TCGv_vec pg)
1249 {
1250     tcg_gen_or_vec(vece, pd, pn, pm);
1251     tcg_gen_andc_vec(vece, pd, pg, pd);
1252 }
1253
1254 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1255 {
1256     static const GVecGen4 op = {
1257         .fni8 = gen_nor_pg_i64,
1258         .fniv = gen_nor_pg_vec,
1259         .fno = gen_helper_sve_nor_pppp,
1260         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1261     };
1262     if (a->s) {
1263         return do_pppp_flags(s, a, &op);
1264     } else {
1265         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1266     }
1267 }
1268
1269 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1270 {
1271     tcg_gen_and_i64(pd, pn, pm);
1272     tcg_gen_andc_i64(pd, pg, pd);
1273 }
1274
1275 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1276                            TCGv_vec pm, TCGv_vec pg)
1277 {
1278     tcg_gen_and_vec(vece, pd, pn, pm);
1279     tcg_gen_andc_vec(vece, pd, pg, pd);
1280 }
1281
1282 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1283 {
1284     static const GVecGen4 op = {
1285         .fni8 = gen_nand_pg_i64,
1286         .fniv = gen_nand_pg_vec,
1287         .fno = gen_helper_sve_nand_pppp,
1288         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1289     };
1290     if (a->s) {
1291         return do_pppp_flags(s, a, &op);
1292     } else {
1293         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1294     }
1295 }
1296
1297 /*
1298  *** SVE Predicate Misc Group
1299  */
1300
1301 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1302 {
1303     if (sve_access_check(s)) {
1304         int nofs = pred_full_reg_offset(s, a->rn);
1305         int gofs = pred_full_reg_offset(s, a->pg);
1306         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1307
1308         if (words == 1) {
1309             TCGv_i64 pn = tcg_temp_new_i64();
1310             TCGv_i64 pg = tcg_temp_new_i64();
1311
1312             tcg_gen_ld_i64(pn, cpu_env, nofs);
1313             tcg_gen_ld_i64(pg, cpu_env, gofs);
1314             do_predtest1(pn, pg);
1315
1316             tcg_temp_free_i64(pn);
1317             tcg_temp_free_i64(pg);
1318         } else {
1319             do_predtest(s, nofs, gofs, words);
1320         }
1321     }
1322     return true;
1323 }
1324
1325 /* See the ARM pseudocode DecodePredCount.  */
1326 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1327 {
1328     unsigned elements = fullsz >> esz;
1329     unsigned bound;
1330
1331     switch (pattern) {
1332     case 0x0: /* POW2 */
1333         return pow2floor(elements);
1334     case 0x1: /* VL1 */
1335     case 0x2: /* VL2 */
1336     case 0x3: /* VL3 */
1337     case 0x4: /* VL4 */
1338     case 0x5: /* VL5 */
1339     case 0x6: /* VL6 */
1340     case 0x7: /* VL7 */
1341     case 0x8: /* VL8 */
1342         bound = pattern;
1343         break;
1344     case 0x9: /* VL16 */
1345     case 0xa: /* VL32 */
1346     case 0xb: /* VL64 */
1347     case 0xc: /* VL128 */
1348     case 0xd: /* VL256 */
1349         bound = 16 << (pattern - 9);
1350         break;
1351     case 0x1d: /* MUL4 */
1352         return elements - elements % 4;
1353     case 0x1e: /* MUL3 */
1354         return elements - elements % 3;
1355     case 0x1f: /* ALL */
1356         return elements;
1357     default:   /* #uimm5 */
1358         return 0;
1359     }
1360     return elements >= bound ? bound : 0;
1361 }
1362
1363 /* This handles all of the predicate initialization instructions,
1364  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1365  * so that decode_pred_count returns 0.  For SETFFR, we will have
1366  * set RD == 16 == FFR.
1367  */
1368 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1369 {
1370     if (!sve_access_check(s)) {
1371         return true;
1372     }
1373
1374     unsigned fullsz = vec_full_reg_size(s);
1375     unsigned ofs = pred_full_reg_offset(s, rd);
1376     unsigned numelem, setsz, i;
1377     uint64_t word, lastword;
1378     TCGv_i64 t;
1379
1380     numelem = decode_pred_count(fullsz, pat, esz);
1381
1382     /* Determine what we must store into each bit, and how many.  */
1383     if (numelem == 0) {
1384         lastword = word = 0;
1385         setsz = fullsz;
1386     } else {
1387         setsz = numelem << esz;
1388         lastword = word = pred_esz_masks[esz];
1389         if (setsz % 64) {
1390             lastword &= ~(-1ull << (setsz % 64));
1391         }
1392     }
1393
1394     t = tcg_temp_new_i64();
1395     if (fullsz <= 64) {
1396         tcg_gen_movi_i64(t, lastword);
1397         tcg_gen_st_i64(t, cpu_env, ofs);
1398         goto done;
1399     }
1400
1401     if (word == lastword) {
1402         unsigned maxsz = size_for_gvec(fullsz / 8);
1403         unsigned oprsz = size_for_gvec(setsz / 8);
1404
1405         if (oprsz * 8 == setsz) {
1406             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1407             goto done;
1408         }
1409         if (oprsz * 8 == setsz + 8) {
1410             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1411             tcg_gen_movi_i64(t, 0);
1412             tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1413             goto done;
1414         }
1415     }
1416
1417     setsz /= 8;
1418     fullsz /= 8;
1419
1420     tcg_gen_movi_i64(t, word);
1421     for (i = 0; i < setsz; i += 8) {
1422         tcg_gen_st_i64(t, cpu_env, ofs + i);
1423     }
1424     if (lastword != word) {
1425         tcg_gen_movi_i64(t, lastword);
1426         tcg_gen_st_i64(t, cpu_env, ofs + i);
1427         i += 8;
1428     }
1429     if (i < fullsz) {
1430         tcg_gen_movi_i64(t, 0);
1431         for (; i < fullsz; i += 8) {
1432             tcg_gen_st_i64(t, cpu_env, ofs + i);
1433         }
1434     }
1435
1436  done:
1437     tcg_temp_free_i64(t);
1438
1439     /* PTRUES */
1440     if (setflag) {
1441         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1442         tcg_gen_movi_i32(cpu_CF, word == 0);
1443         tcg_gen_movi_i32(cpu_VF, 0);
1444         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1445     }
1446     return true;
1447 }
1448
1449 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1450 {
1451     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1452 }
1453
1454 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1455 {
1456     /* Note pat == 31 is #all, to set all elements.  */
1457     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1458 }
1459
1460 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1461 {
1462     /* Note pat == 32 is #unimp, to set no elements.  */
1463     return do_predset(s, 0, a->rd, 32, false);
1464 }
1465
1466 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1467 {
1468     /* The path through do_pppp_flags is complicated enough to want to avoid
1469      * duplication.  Frob the arguments into the form of a predicated AND.
1470      */
1471     arg_rprr_s alt_a = {
1472         .rd = a->rd, .pg = a->pg, .s = a->s,
1473         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1474     };
1475     return trans_AND_pppp(s, &alt_a, insn);
1476 }
1477
1478 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1479 {
1480     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1481 }
1482
1483 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1484 {
1485     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1486 }
1487
1488 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1489                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1490                                            TCGv_ptr, TCGv_i32))
1491 {
1492     if (!sve_access_check(s)) {
1493         return true;
1494     }
1495
1496     TCGv_ptr t_pd = tcg_temp_new_ptr();
1497     TCGv_ptr t_pg = tcg_temp_new_ptr();
1498     TCGv_i32 t;
1499     unsigned desc;
1500
1501     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1502     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1503
1504     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1505     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1506     t = tcg_const_i32(desc);
1507
1508     gen_fn(t, t_pd, t_pg, t);
1509     tcg_temp_free_ptr(t_pd);
1510     tcg_temp_free_ptr(t_pg);
1511
1512     do_pred_flags(t);
1513     tcg_temp_free_i32(t);
1514     return true;
1515 }
1516
1517 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1518 {
1519     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1520 }
1521
1522 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1523 {
1524     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1525 }
1526
1527 /*
1528  *** SVE Element Count Group
1529  */
1530
1531 /* Perform an inline saturating addition of a 32-bit value within
1532  * a 64-bit register.  The second operand is known to be positive,
1533  * which halves the comparisions we must perform to bound the result.
1534  */
1535 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1536 {
1537     int64_t ibound;
1538     TCGv_i64 bound;
1539     TCGCond cond;
1540
1541     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1542     if (u) {
1543         tcg_gen_ext32u_i64(reg, reg);
1544     } else {
1545         tcg_gen_ext32s_i64(reg, reg);
1546     }
1547     if (d) {
1548         tcg_gen_sub_i64(reg, reg, val);
1549         ibound = (u ? 0 : INT32_MIN);
1550         cond = TCG_COND_LT;
1551     } else {
1552         tcg_gen_add_i64(reg, reg, val);
1553         ibound = (u ? UINT32_MAX : INT32_MAX);
1554         cond = TCG_COND_GT;
1555     }
1556     bound = tcg_const_i64(ibound);
1557     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1558     tcg_temp_free_i64(bound);
1559 }
1560
1561 /* Similarly with 64-bit values.  */
1562 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1563 {
1564     TCGv_i64 t0 = tcg_temp_new_i64();
1565     TCGv_i64 t1 = tcg_temp_new_i64();
1566     TCGv_i64 t2;
1567
1568     if (u) {
1569         if (d) {
1570             tcg_gen_sub_i64(t0, reg, val);
1571             tcg_gen_movi_i64(t1, 0);
1572             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1573         } else {
1574             tcg_gen_add_i64(t0, reg, val);
1575             tcg_gen_movi_i64(t1, -1);
1576             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1577         }
1578     } else {
1579         if (d) {
1580             /* Detect signed overflow for subtraction.  */
1581             tcg_gen_xor_i64(t0, reg, val);
1582             tcg_gen_sub_i64(t1, reg, val);
1583             tcg_gen_xor_i64(reg, reg, t0);
1584             tcg_gen_and_i64(t0, t0, reg);
1585
1586             /* Bound the result.  */
1587             tcg_gen_movi_i64(reg, INT64_MIN);
1588             t2 = tcg_const_i64(0);
1589             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1590         } else {
1591             /* Detect signed overflow for addition.  */
1592             tcg_gen_xor_i64(t0, reg, val);
1593             tcg_gen_add_i64(reg, reg, val);
1594             tcg_gen_xor_i64(t1, reg, val);
1595             tcg_gen_andc_i64(t0, t1, t0);
1596
1597             /* Bound the result.  */
1598             tcg_gen_movi_i64(t1, INT64_MAX);
1599             t2 = tcg_const_i64(0);
1600             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1601         }
1602         tcg_temp_free_i64(t2);
1603     }
1604     tcg_temp_free_i64(t0);
1605     tcg_temp_free_i64(t1);
1606 }
1607
1608 /* Similarly with a vector and a scalar operand.  */
1609 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1610                               TCGv_i64 val, bool u, bool d)
1611 {
1612     unsigned vsz = vec_full_reg_size(s);
1613     TCGv_ptr dptr, nptr;
1614     TCGv_i32 t32, desc;
1615     TCGv_i64 t64;
1616
1617     dptr = tcg_temp_new_ptr();
1618     nptr = tcg_temp_new_ptr();
1619     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1620     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1621     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1622
1623     switch (esz) {
1624     case MO_8:
1625         t32 = tcg_temp_new_i32();
1626         tcg_gen_extrl_i64_i32(t32, val);
1627         if (d) {
1628             tcg_gen_neg_i32(t32, t32);
1629         }
1630         if (u) {
1631             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1632         } else {
1633             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1634         }
1635         tcg_temp_free_i32(t32);
1636         break;
1637
1638     case MO_16:
1639         t32 = tcg_temp_new_i32();
1640         tcg_gen_extrl_i64_i32(t32, val);
1641         if (d) {
1642             tcg_gen_neg_i32(t32, t32);
1643         }
1644         if (u) {
1645             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1646         } else {
1647             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1648         }
1649         tcg_temp_free_i32(t32);
1650         break;
1651
1652     case MO_32:
1653         t64 = tcg_temp_new_i64();
1654         if (d) {
1655             tcg_gen_neg_i64(t64, val);
1656         } else {
1657             tcg_gen_mov_i64(t64, val);
1658         }
1659         if (u) {
1660             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1661         } else {
1662             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1663         }
1664         tcg_temp_free_i64(t64);
1665         break;
1666
1667     case MO_64:
1668         if (u) {
1669             if (d) {
1670                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1671             } else {
1672                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1673             }
1674         } else if (d) {
1675             t64 = tcg_temp_new_i64();
1676             tcg_gen_neg_i64(t64, val);
1677             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1678             tcg_temp_free_i64(t64);
1679         } else {
1680             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1681         }
1682         break;
1683
1684     default:
1685         g_assert_not_reached();
1686     }
1687
1688     tcg_temp_free_ptr(dptr);
1689     tcg_temp_free_ptr(nptr);
1690     tcg_temp_free_i32(desc);
1691 }
1692
1693 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1694 {
1695     if (sve_access_check(s)) {
1696         unsigned fullsz = vec_full_reg_size(s);
1697         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1698         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1699     }
1700     return true;
1701 }
1702
1703 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1704 {
1705     if (sve_access_check(s)) {
1706         unsigned fullsz = vec_full_reg_size(s);
1707         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1708         int inc = numelem * a->imm * (a->d ? -1 : 1);
1709         TCGv_i64 reg = cpu_reg(s, a->rd);
1710
1711         tcg_gen_addi_i64(reg, reg, inc);
1712     }
1713     return true;
1714 }
1715
1716 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1717                                uint32_t insn)
1718 {
1719     if (!sve_access_check(s)) {
1720         return true;
1721     }
1722
1723     unsigned fullsz = vec_full_reg_size(s);
1724     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1725     int inc = numelem * a->imm;
1726     TCGv_i64 reg = cpu_reg(s, a->rd);
1727
1728     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1729     if (inc == 0) {
1730         if (a->u) {
1731             tcg_gen_ext32u_i64(reg, reg);
1732         } else {
1733             tcg_gen_ext32s_i64(reg, reg);
1734         }
1735     } else {
1736         TCGv_i64 t = tcg_const_i64(inc);
1737         do_sat_addsub_32(reg, t, a->u, a->d);
1738         tcg_temp_free_i64(t);
1739     }
1740     return true;
1741 }
1742
1743 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1744                                uint32_t insn)
1745 {
1746     if (!sve_access_check(s)) {
1747         return true;
1748     }
1749
1750     unsigned fullsz = vec_full_reg_size(s);
1751     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752     int inc = numelem * a->imm;
1753     TCGv_i64 reg = cpu_reg(s, a->rd);
1754
1755     if (inc != 0) {
1756         TCGv_i64 t = tcg_const_i64(inc);
1757         do_sat_addsub_64(reg, t, a->u, a->d);
1758         tcg_temp_free_i64(t);
1759     }
1760     return true;
1761 }
1762
1763 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1764 {
1765     if (a->esz == 0) {
1766         return false;
1767     }
1768
1769     unsigned fullsz = vec_full_reg_size(s);
1770     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1771     int inc = numelem * a->imm;
1772
1773     if (inc != 0) {
1774         if (sve_access_check(s)) {
1775             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1776             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1777                               vec_full_reg_offset(s, a->rn),
1778                               t, fullsz, fullsz);
1779             tcg_temp_free_i64(t);
1780         }
1781     } else {
1782         do_mov_z(s, a->rd, a->rn);
1783     }
1784     return true;
1785 }
1786
1787 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1788                             uint32_t insn)
1789 {
1790     if (a->esz == 0) {
1791         return false;
1792     }
1793
1794     unsigned fullsz = vec_full_reg_size(s);
1795     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1796     int inc = numelem * a->imm;
1797
1798     if (inc != 0) {
1799         if (sve_access_check(s)) {
1800             TCGv_i64 t = tcg_const_i64(inc);
1801             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1802             tcg_temp_free_i64(t);
1803         }
1804     } else {
1805         do_mov_z(s, a->rd, a->rn);
1806     }
1807     return true;
1808 }
1809
1810 /*
1811  *** SVE Bitwise Immediate Group
1812  */
1813
1814 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1815 {
1816     uint64_t imm;
1817     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1818                                 extract32(a->dbm, 0, 6),
1819                                 extract32(a->dbm, 6, 6))) {
1820         return false;
1821     }
1822     if (sve_access_check(s)) {
1823         unsigned vsz = vec_full_reg_size(s);
1824         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1825                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1826     }
1827     return true;
1828 }
1829
1830 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1831 {
1832     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1833 }
1834
1835 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1836 {
1837     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1838 }
1839
1840 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1841 {
1842     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1843 }
1844
1845 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1846 {
1847     uint64_t imm;
1848     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1849                                 extract32(a->dbm, 0, 6),
1850                                 extract32(a->dbm, 6, 6))) {
1851         return false;
1852     }
1853     if (sve_access_check(s)) {
1854         do_dupi_z(s, a->rd, imm);
1855     }
1856     return true;
1857 }
1858
1859 /*
1860  *** SVE Integer Wide Immediate - Predicated Group
1861  */
1862
1863 /* Implement all merging copies.  This is used for CPY (immediate),
1864  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1865  */
1866 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1867                      TCGv_i64 val)
1868 {
1869     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1870     static gen_cpy * const fns[4] = {
1871         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1872         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1873     };
1874     unsigned vsz = vec_full_reg_size(s);
1875     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1876     TCGv_ptr t_zd = tcg_temp_new_ptr();
1877     TCGv_ptr t_zn = tcg_temp_new_ptr();
1878     TCGv_ptr t_pg = tcg_temp_new_ptr();
1879
1880     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1881     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1882     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1883
1884     fns[esz](t_zd, t_zn, t_pg, val, desc);
1885
1886     tcg_temp_free_ptr(t_zd);
1887     tcg_temp_free_ptr(t_zn);
1888     tcg_temp_free_ptr(t_pg);
1889     tcg_temp_free_i32(desc);
1890 }
1891
1892 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1893 {
1894     if (a->esz == 0) {
1895         return false;
1896     }
1897     if (sve_access_check(s)) {
1898         /* Decode the VFP immediate.  */
1899         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1900         TCGv_i64 t_imm = tcg_const_i64(imm);
1901         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1902         tcg_temp_free_i64(t_imm);
1903     }
1904     return true;
1905 }
1906
1907 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1908 {
1909     if (a->esz == 0 && extract32(insn, 13, 1)) {
1910         return false;
1911     }
1912     if (sve_access_check(s)) {
1913         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1914         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1915         tcg_temp_free_i64(t_imm);
1916     }
1917     return true;
1918 }
1919
1920 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1921 {
1922     static gen_helper_gvec_2i * const fns[4] = {
1923         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1924         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1925     };
1926
1927     if (a->esz == 0 && extract32(insn, 13, 1)) {
1928         return false;
1929     }
1930     if (sve_access_check(s)) {
1931         unsigned vsz = vec_full_reg_size(s);
1932         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1933         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1934                             pred_full_reg_offset(s, a->pg),
1935                             t_imm, vsz, vsz, 0, fns[a->esz]);
1936         tcg_temp_free_i64(t_imm);
1937     }
1938     return true;
1939 }
1940
1941 /*
1942  *** SVE Permute Extract Group
1943  */
1944
1945 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1946 {
1947     if (!sve_access_check(s)) {
1948         return true;
1949     }
1950
1951     unsigned vsz = vec_full_reg_size(s);
1952     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1953     unsigned n_siz = vsz - n_ofs;
1954     unsigned d = vec_full_reg_offset(s, a->rd);
1955     unsigned n = vec_full_reg_offset(s, a->rn);
1956     unsigned m = vec_full_reg_offset(s, a->rm);
1957
1958     /* Use host vector move insns if we have appropriate sizes
1959      * and no unfortunate overlap.
1960      */
1961     if (m != d
1962         && n_ofs == size_for_gvec(n_ofs)
1963         && n_siz == size_for_gvec(n_siz)
1964         && (d != n || n_siz <= n_ofs)) {
1965         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1966         if (n_ofs != 0) {
1967             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1968         }
1969     } else {
1970         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1971     }
1972     return true;
1973 }
1974
1975 /*
1976  *** SVE Permute - Unpredicated Group
1977  */
1978
1979 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
1980 {
1981     if (sve_access_check(s)) {
1982         unsigned vsz = vec_full_reg_size(s);
1983         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1984                              vsz, vsz, cpu_reg_sp(s, a->rn));
1985     }
1986     return true;
1987 }
1988
1989 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
1990 {
1991     if ((a->imm & 0x1f) == 0) {
1992         return false;
1993     }
1994     if (sve_access_check(s)) {
1995         unsigned vsz = vec_full_reg_size(s);
1996         unsigned dofs = vec_full_reg_offset(s, a->rd);
1997         unsigned esz, index;
1998
1999         esz = ctz32(a->imm);
2000         index = a->imm >> (esz + 1);
2001
2002         if ((index << esz) < vsz) {
2003             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2004             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2005         } else {
2006             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2007         }
2008     }
2009     return true;
2010 }
2011
2012 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2013 {
2014     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2015     static gen_insr * const fns[4] = {
2016         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2017         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2018     };
2019     unsigned vsz = vec_full_reg_size(s);
2020     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2021     TCGv_ptr t_zd = tcg_temp_new_ptr();
2022     TCGv_ptr t_zn = tcg_temp_new_ptr();
2023
2024     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2025     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2026
2027     fns[a->esz](t_zd, t_zn, val, desc);
2028
2029     tcg_temp_free_ptr(t_zd);
2030     tcg_temp_free_ptr(t_zn);
2031     tcg_temp_free_i32(desc);
2032 }
2033
2034 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2035 {
2036     if (sve_access_check(s)) {
2037         TCGv_i64 t = tcg_temp_new_i64();
2038         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2039         do_insr_i64(s, a, t);
2040         tcg_temp_free_i64(t);
2041     }
2042     return true;
2043 }
2044
2045 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2046 {
2047     if (sve_access_check(s)) {
2048         do_insr_i64(s, a, cpu_reg(s, a->rm));
2049     }
2050     return true;
2051 }
2052
2053 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2054 {
2055     static gen_helper_gvec_2 * const fns[4] = {
2056         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2057         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2058     };
2059
2060     if (sve_access_check(s)) {
2061         unsigned vsz = vec_full_reg_size(s);
2062         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2063                            vec_full_reg_offset(s, a->rn),
2064                            vsz, vsz, 0, fns[a->esz]);
2065     }
2066     return true;
2067 }
2068
2069 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2070 {
2071     static gen_helper_gvec_3 * const fns[4] = {
2072         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2073         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2074     };
2075
2076     if (sve_access_check(s)) {
2077         unsigned vsz = vec_full_reg_size(s);
2078         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2079                            vec_full_reg_offset(s, a->rn),
2080                            vec_full_reg_offset(s, a->rm),
2081                            vsz, vsz, 0, fns[a->esz]);
2082     }
2083     return true;
2084 }
2085
2086 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2087 {
2088     static gen_helper_gvec_2 * const fns[4][2] = {
2089         { NULL, NULL },
2090         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2091         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2092         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2093     };
2094
2095     if (a->esz == 0) {
2096         return false;
2097     }
2098     if (sve_access_check(s)) {
2099         unsigned vsz = vec_full_reg_size(s);
2100         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2101                            vec_full_reg_offset(s, a->rn)
2102                            + (a->h ? vsz / 2 : 0),
2103                            vsz, vsz, 0, fns[a->esz][a->u]);
2104     }
2105     return true;
2106 }
2107
2108 /*
2109  *** SVE Permute - Predicates Group
2110  */
2111
2112 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2113                           gen_helper_gvec_3 *fn)
2114 {
2115     if (!sve_access_check(s)) {
2116         return true;
2117     }
2118
2119     unsigned vsz = pred_full_reg_size(s);
2120
2121     /* Predicate sizes may be smaller and cannot use simd_desc.
2122        We cannot round up, as we do elsewhere, because we need
2123        the exact size for ZIP2 and REV.  We retain the style for
2124        the other helpers for consistency.  */
2125     TCGv_ptr t_d = tcg_temp_new_ptr();
2126     TCGv_ptr t_n = tcg_temp_new_ptr();
2127     TCGv_ptr t_m = tcg_temp_new_ptr();
2128     TCGv_i32 t_desc;
2129     int desc;
2130
2131     desc = vsz - 2;
2132     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2133     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2134
2135     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2136     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2137     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2138     t_desc = tcg_const_i32(desc);
2139
2140     fn(t_d, t_n, t_m, t_desc);
2141
2142     tcg_temp_free_ptr(t_d);
2143     tcg_temp_free_ptr(t_n);
2144     tcg_temp_free_ptr(t_m);
2145     tcg_temp_free_i32(t_desc);
2146     return true;
2147 }
2148
2149 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2150                           gen_helper_gvec_2 *fn)
2151 {
2152     if (!sve_access_check(s)) {
2153         return true;
2154     }
2155
2156     unsigned vsz = pred_full_reg_size(s);
2157     TCGv_ptr t_d = tcg_temp_new_ptr();
2158     TCGv_ptr t_n = tcg_temp_new_ptr();
2159     TCGv_i32 t_desc;
2160     int desc;
2161
2162     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2163     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2164
2165     /* Predicate sizes may be smaller and cannot use simd_desc.
2166        We cannot round up, as we do elsewhere, because we need
2167        the exact size for ZIP2 and REV.  We retain the style for
2168        the other helpers for consistency.  */
2169
2170     desc = vsz - 2;
2171     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2172     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2173     t_desc = tcg_const_i32(desc);
2174
2175     fn(t_d, t_n, t_desc);
2176
2177     tcg_temp_free_i32(t_desc);
2178     tcg_temp_free_ptr(t_d);
2179     tcg_temp_free_ptr(t_n);
2180     return true;
2181 }
2182
2183 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2184 {
2185     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2186 }
2187
2188 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2189 {
2190     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2191 }
2192
2193 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2194 {
2195     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2196 }
2197
2198 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2199 {
2200     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2201 }
2202
2203 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2204 {
2205     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2206 }
2207
2208 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2209 {
2210     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2211 }
2212
2213 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2214 {
2215     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2216 }
2217
2218 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2219 {
2220     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2221 }
2222
2223 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2224 {
2225     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2226 }
2227
2228 /*
2229  *** SVE Permute - Interleaving Group
2230  */
2231
2232 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2233 {
2234     static gen_helper_gvec_3 * const fns[4] = {
2235         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2236         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2237     };
2238
2239     if (sve_access_check(s)) {
2240         unsigned vsz = vec_full_reg_size(s);
2241         unsigned high_ofs = high ? vsz / 2 : 0;
2242         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2243                            vec_full_reg_offset(s, a->rn) + high_ofs,
2244                            vec_full_reg_offset(s, a->rm) + high_ofs,
2245                            vsz, vsz, 0, fns[a->esz]);
2246     }
2247     return true;
2248 }
2249
2250 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2251                             gen_helper_gvec_3 *fn)
2252 {
2253     if (sve_access_check(s)) {
2254         unsigned vsz = vec_full_reg_size(s);
2255         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2256                            vec_full_reg_offset(s, a->rn),
2257                            vec_full_reg_offset(s, a->rm),
2258                            vsz, vsz, data, fn);
2259     }
2260     return true;
2261 }
2262
2263 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2264 {
2265     return do_zip(s, a, false);
2266 }
2267
2268 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2269 {
2270     return do_zip(s, a, true);
2271 }
2272
2273 static gen_helper_gvec_3 * const uzp_fns[4] = {
2274     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2275     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2276 };
2277
2278 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2279 {
2280     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2281 }
2282
2283 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2284 {
2285     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2286 }
2287
2288 static gen_helper_gvec_3 * const trn_fns[4] = {
2289     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2290     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2291 };
2292
2293 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2294 {
2295     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2296 }
2297
2298 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2299 {
2300     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2301 }
2302
2303 /*
2304  *** SVE Permute Vector - Predicated Group
2305  */
2306
2307 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2308 {
2309     static gen_helper_gvec_3 * const fns[4] = {
2310         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2311     };
2312     return do_zpz_ool(s, a, fns[a->esz]);
2313 }
2314
2315 /* Call the helper that computes the ARM LastActiveElement pseudocode
2316  * function, scaled by the element size.  This includes the not found
2317  * indication; e.g. not found for esz=3 is -8.
2318  */
2319 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2320 {
2321     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2322      * round up, as we do elsewhere, because we need the exact size.
2323      */
2324     TCGv_ptr t_p = tcg_temp_new_ptr();
2325     TCGv_i32 t_desc;
2326     unsigned vsz = pred_full_reg_size(s);
2327     unsigned desc;
2328
2329     desc = vsz - 2;
2330     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2331
2332     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2333     t_desc = tcg_const_i32(desc);
2334
2335     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2336
2337     tcg_temp_free_i32(t_desc);
2338     tcg_temp_free_ptr(t_p);
2339 }
2340
2341 /* Increment LAST to the offset of the next element in the vector,
2342  * wrapping around to 0.
2343  */
2344 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2345 {
2346     unsigned vsz = vec_full_reg_size(s);
2347
2348     tcg_gen_addi_i32(last, last, 1 << esz);
2349     if (is_power_of_2(vsz)) {
2350         tcg_gen_andi_i32(last, last, vsz - 1);
2351     } else {
2352         TCGv_i32 max = tcg_const_i32(vsz);
2353         TCGv_i32 zero = tcg_const_i32(0);
2354         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2355         tcg_temp_free_i32(max);
2356         tcg_temp_free_i32(zero);
2357     }
2358 }
2359
2360 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2361 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2362 {
2363     unsigned vsz = vec_full_reg_size(s);
2364
2365     if (is_power_of_2(vsz)) {
2366         tcg_gen_andi_i32(last, last, vsz - 1);
2367     } else {
2368         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2369         TCGv_i32 zero = tcg_const_i32(0);
2370         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2371         tcg_temp_free_i32(max);
2372         tcg_temp_free_i32(zero);
2373     }
2374 }
2375
2376 /* Load an unsigned element of ESZ from BASE+OFS.  */
2377 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2378 {
2379     TCGv_i64 r = tcg_temp_new_i64();
2380
2381     switch (esz) {
2382     case 0:
2383         tcg_gen_ld8u_i64(r, base, ofs);
2384         break;
2385     case 1:
2386         tcg_gen_ld16u_i64(r, base, ofs);
2387         break;
2388     case 2:
2389         tcg_gen_ld32u_i64(r, base, ofs);
2390         break;
2391     case 3:
2392         tcg_gen_ld_i64(r, base, ofs);
2393         break;
2394     default:
2395         g_assert_not_reached();
2396     }
2397     return r;
2398 }
2399
2400 /* Load an unsigned element of ESZ from RM[LAST].  */
2401 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2402                                  int rm, int esz)
2403 {
2404     TCGv_ptr p = tcg_temp_new_ptr();
2405     TCGv_i64 r;
2406
2407     /* Convert offset into vector into offset into ENV.
2408      * The final adjustment for the vector register base
2409      * is added via constant offset to the load.
2410      */
2411 #ifdef HOST_WORDS_BIGENDIAN
2412     /* Adjust for element ordering.  See vec_reg_offset.  */
2413     if (esz < 3) {
2414         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2415     }
2416 #endif
2417     tcg_gen_ext_i32_ptr(p, last);
2418     tcg_gen_add_ptr(p, p, cpu_env);
2419
2420     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2421     tcg_temp_free_ptr(p);
2422
2423     return r;
2424 }
2425
2426 /* Compute CLAST for a Zreg.  */
2427 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2428 {
2429     TCGv_i32 last;
2430     TCGLabel *over;
2431     TCGv_i64 ele;
2432     unsigned vsz, esz = a->esz;
2433
2434     if (!sve_access_check(s)) {
2435         return true;
2436     }
2437
2438     last = tcg_temp_local_new_i32();
2439     over = gen_new_label();
2440
2441     find_last_active(s, last, esz, a->pg);
2442
2443     /* There is of course no movcond for a 2048-bit vector,
2444      * so we must branch over the actual store.
2445      */
2446     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2447
2448     if (!before) {
2449         incr_last_active(s, last, esz);
2450     }
2451
2452     ele = load_last_active(s, last, a->rm, esz);
2453     tcg_temp_free_i32(last);
2454
2455     vsz = vec_full_reg_size(s);
2456     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2457     tcg_temp_free_i64(ele);
2458
2459     /* If this insn used MOVPRFX, we may need a second move.  */
2460     if (a->rd != a->rn) {
2461         TCGLabel *done = gen_new_label();
2462         tcg_gen_br(done);
2463
2464         gen_set_label(over);
2465         do_mov_z(s, a->rd, a->rn);
2466
2467         gen_set_label(done);
2468     } else {
2469         gen_set_label(over);
2470     }
2471     return true;
2472 }
2473
2474 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2475 {
2476     return do_clast_vector(s, a, false);
2477 }
2478
2479 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2480 {
2481     return do_clast_vector(s, a, true);
2482 }
2483
2484 /* Compute CLAST for a scalar.  */
2485 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2486                             bool before, TCGv_i64 reg_val)
2487 {
2488     TCGv_i32 last = tcg_temp_new_i32();
2489     TCGv_i64 ele, cmp, zero;
2490
2491     find_last_active(s, last, esz, pg);
2492
2493     /* Extend the original value of last prior to incrementing.  */
2494     cmp = tcg_temp_new_i64();
2495     tcg_gen_ext_i32_i64(cmp, last);
2496
2497     if (!before) {
2498         incr_last_active(s, last, esz);
2499     }
2500
2501     /* The conceit here is that while last < 0 indicates not found, after
2502      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2503      * from which we can load garbage.  We then discard the garbage with
2504      * a conditional move.
2505      */
2506     ele = load_last_active(s, last, rm, esz);
2507     tcg_temp_free_i32(last);
2508
2509     zero = tcg_const_i64(0);
2510     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2511
2512     tcg_temp_free_i64(zero);
2513     tcg_temp_free_i64(cmp);
2514     tcg_temp_free_i64(ele);
2515 }
2516
2517 /* Compute CLAST for a Vreg.  */
2518 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2519 {
2520     if (sve_access_check(s)) {
2521         int esz = a->esz;
2522         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2523         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2524
2525         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2526         write_fp_dreg(s, a->rd, reg);
2527         tcg_temp_free_i64(reg);
2528     }
2529     return true;
2530 }
2531
2532 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2533 {
2534     return do_clast_fp(s, a, false);
2535 }
2536
2537 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2538 {
2539     return do_clast_fp(s, a, true);
2540 }
2541
2542 /* Compute CLAST for a Xreg.  */
2543 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2544 {
2545     TCGv_i64 reg;
2546
2547     if (!sve_access_check(s)) {
2548         return true;
2549     }
2550
2551     reg = cpu_reg(s, a->rd);
2552     switch (a->esz) {
2553     case 0:
2554         tcg_gen_ext8u_i64(reg, reg);
2555         break;
2556     case 1:
2557         tcg_gen_ext16u_i64(reg, reg);
2558         break;
2559     case 2:
2560         tcg_gen_ext32u_i64(reg, reg);
2561         break;
2562     case 3:
2563         break;
2564     default:
2565         g_assert_not_reached();
2566     }
2567
2568     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2569     return true;
2570 }
2571
2572 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2573 {
2574     return do_clast_general(s, a, false);
2575 }
2576
2577 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2578 {
2579     return do_clast_general(s, a, true);
2580 }
2581
2582 /* Compute LAST for a scalar.  */
2583 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2584                                int pg, int rm, bool before)
2585 {
2586     TCGv_i32 last = tcg_temp_new_i32();
2587     TCGv_i64 ret;
2588
2589     find_last_active(s, last, esz, pg);
2590     if (before) {
2591         wrap_last_active(s, last, esz);
2592     } else {
2593         incr_last_active(s, last, esz);
2594     }
2595
2596     ret = load_last_active(s, last, rm, esz);
2597     tcg_temp_free_i32(last);
2598     return ret;
2599 }
2600
2601 /* Compute LAST for a Vreg.  */
2602 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2603 {
2604     if (sve_access_check(s)) {
2605         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2606         write_fp_dreg(s, a->rd, val);
2607         tcg_temp_free_i64(val);
2608     }
2609     return true;
2610 }
2611
2612 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2613 {
2614     return do_last_fp(s, a, false);
2615 }
2616
2617 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2618 {
2619     return do_last_fp(s, a, true);
2620 }
2621
2622 /* Compute LAST for a Xreg.  */
2623 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2624 {
2625     if (sve_access_check(s)) {
2626         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2627         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2628         tcg_temp_free_i64(val);
2629     }
2630     return true;
2631 }
2632
2633 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2634 {
2635     return do_last_general(s, a, false);
2636 }
2637
2638 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2639 {
2640     return do_last_general(s, a, true);
2641 }
2642
2643 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2644 {
2645     if (sve_access_check(s)) {
2646         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2647     }
2648     return true;
2649 }
2650
2651 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2652 {
2653     if (sve_access_check(s)) {
2654         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2655         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2656         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2657         tcg_temp_free_i64(t);
2658     }
2659     return true;
2660 }
2661
2662 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2663 {
2664     static gen_helper_gvec_3 * const fns[4] = {
2665         NULL,
2666         gen_helper_sve_revb_h,
2667         gen_helper_sve_revb_s,
2668         gen_helper_sve_revb_d,
2669     };
2670     return do_zpz_ool(s, a, fns[a->esz]);
2671 }
2672
2673 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2674 {
2675     static gen_helper_gvec_3 * const fns[4] = {
2676         NULL,
2677         NULL,
2678         gen_helper_sve_revh_s,
2679         gen_helper_sve_revh_d,
2680     };
2681     return do_zpz_ool(s, a, fns[a->esz]);
2682 }
2683
2684 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2685 {
2686     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2687 }
2688
2689 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2690 {
2691     static gen_helper_gvec_3 * const fns[4] = {
2692         gen_helper_sve_rbit_b,
2693         gen_helper_sve_rbit_h,
2694         gen_helper_sve_rbit_s,
2695         gen_helper_sve_rbit_d,
2696     };
2697     return do_zpz_ool(s, a, fns[a->esz]);
2698 }
2699
2700 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2701 {
2702     if (sve_access_check(s)) {
2703         unsigned vsz = vec_full_reg_size(s);
2704         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2705                            vec_full_reg_offset(s, a->rn),
2706                            vec_full_reg_offset(s, a->rm),
2707                            pred_full_reg_offset(s, a->pg),
2708                            vsz, vsz, a->esz, gen_helper_sve_splice);
2709     }
2710     return true;
2711 }
2712
2713 /*
2714  *** SVE Integer Compare - Vectors Group
2715  */
2716
2717 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2718                           gen_helper_gvec_flags_4 *gen_fn)
2719 {
2720     TCGv_ptr pd, zn, zm, pg;
2721     unsigned vsz;
2722     TCGv_i32 t;
2723
2724     if (gen_fn == NULL) {
2725         return false;
2726     }
2727     if (!sve_access_check(s)) {
2728         return true;
2729     }
2730
2731     vsz = vec_full_reg_size(s);
2732     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2733     pd = tcg_temp_new_ptr();
2734     zn = tcg_temp_new_ptr();
2735     zm = tcg_temp_new_ptr();
2736     pg = tcg_temp_new_ptr();
2737
2738     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2739     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2740     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2741     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2742
2743     gen_fn(t, pd, zn, zm, pg, t);
2744
2745     tcg_temp_free_ptr(pd);
2746     tcg_temp_free_ptr(zn);
2747     tcg_temp_free_ptr(zm);
2748     tcg_temp_free_ptr(pg);
2749
2750     do_pred_flags(t);
2751
2752     tcg_temp_free_i32(t);
2753     return true;
2754 }
2755
2756 #define DO_PPZZ(NAME, name) \
2757 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,         \
2758                                 uint32_t insn)                            \
2759 {                                                                         \
2760     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2761         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2762         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2763     };                                                                    \
2764     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2765 }
2766
2767 DO_PPZZ(CMPEQ, cmpeq)
2768 DO_PPZZ(CMPNE, cmpne)
2769 DO_PPZZ(CMPGT, cmpgt)
2770 DO_PPZZ(CMPGE, cmpge)
2771 DO_PPZZ(CMPHI, cmphi)
2772 DO_PPZZ(CMPHS, cmphs)
2773
2774 #undef DO_PPZZ
2775
2776 #define DO_PPZW(NAME, name) \
2777 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a,         \
2778                                 uint32_t insn)                            \
2779 {                                                                         \
2780     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2781         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2782         gen_helper_sve_##name##_ppzw_s, NULL                              \
2783     };                                                                    \
2784     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2785 }
2786
2787 DO_PPZW(CMPEQ, cmpeq)
2788 DO_PPZW(CMPNE, cmpne)
2789 DO_PPZW(CMPGT, cmpgt)
2790 DO_PPZW(CMPGE, cmpge)
2791 DO_PPZW(CMPHI, cmphi)
2792 DO_PPZW(CMPHS, cmphs)
2793 DO_PPZW(CMPLT, cmplt)
2794 DO_PPZW(CMPLE, cmple)
2795 DO_PPZW(CMPLO, cmplo)
2796 DO_PPZW(CMPLS, cmpls)
2797
2798 #undef DO_PPZW
2799
2800 /*
2801  *** SVE Integer Compare - Immediate Groups
2802  */
2803
2804 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2805                           gen_helper_gvec_flags_3 *gen_fn)
2806 {
2807     TCGv_ptr pd, zn, pg;
2808     unsigned vsz;
2809     TCGv_i32 t;
2810
2811     if (gen_fn == NULL) {
2812         return false;
2813     }
2814     if (!sve_access_check(s)) {
2815         return true;
2816     }
2817
2818     vsz = vec_full_reg_size(s);
2819     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2820     pd = tcg_temp_new_ptr();
2821     zn = tcg_temp_new_ptr();
2822     pg = tcg_temp_new_ptr();
2823
2824     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2825     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2826     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2827
2828     gen_fn(t, pd, zn, pg, t);
2829
2830     tcg_temp_free_ptr(pd);
2831     tcg_temp_free_ptr(zn);
2832     tcg_temp_free_ptr(pg);
2833
2834     do_pred_flags(t);
2835
2836     tcg_temp_free_i32(t);
2837     return true;
2838 }
2839
2840 #define DO_PPZI(NAME, name) \
2841 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a,         \
2842                                 uint32_t insn)                            \
2843 {                                                                         \
2844     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2845         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2846         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2847     };                                                                    \
2848     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2849 }
2850
2851 DO_PPZI(CMPEQ, cmpeq)
2852 DO_PPZI(CMPNE, cmpne)
2853 DO_PPZI(CMPGT, cmpgt)
2854 DO_PPZI(CMPGE, cmpge)
2855 DO_PPZI(CMPHI, cmphi)
2856 DO_PPZI(CMPHS, cmphs)
2857 DO_PPZI(CMPLT, cmplt)
2858 DO_PPZI(CMPLE, cmple)
2859 DO_PPZI(CMPLO, cmplo)
2860 DO_PPZI(CMPLS, cmpls)
2861
2862 #undef DO_PPZI
2863
2864 /*
2865  *** SVE Partition Break Group
2866  */
2867
2868 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2869                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2870 {
2871     if (!sve_access_check(s)) {
2872         return true;
2873     }
2874
2875     unsigned vsz = pred_full_reg_size(s);
2876
2877     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2878     TCGv_ptr d = tcg_temp_new_ptr();
2879     TCGv_ptr n = tcg_temp_new_ptr();
2880     TCGv_ptr m = tcg_temp_new_ptr();
2881     TCGv_ptr g = tcg_temp_new_ptr();
2882     TCGv_i32 t = tcg_const_i32(vsz - 2);
2883
2884     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2885     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2886     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2887     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2888
2889     if (a->s) {
2890         fn_s(t, d, n, m, g, t);
2891         do_pred_flags(t);
2892     } else {
2893         fn(d, n, m, g, t);
2894     }
2895     tcg_temp_free_ptr(d);
2896     tcg_temp_free_ptr(n);
2897     tcg_temp_free_ptr(m);
2898     tcg_temp_free_ptr(g);
2899     tcg_temp_free_i32(t);
2900     return true;
2901 }
2902
2903 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2904                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2905 {
2906     if (!sve_access_check(s)) {
2907         return true;
2908     }
2909
2910     unsigned vsz = pred_full_reg_size(s);
2911
2912     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2913     TCGv_ptr d = tcg_temp_new_ptr();
2914     TCGv_ptr n = tcg_temp_new_ptr();
2915     TCGv_ptr g = tcg_temp_new_ptr();
2916     TCGv_i32 t = tcg_const_i32(vsz - 2);
2917
2918     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2919     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2920     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2921
2922     if (a->s) {
2923         fn_s(t, d, n, g, t);
2924         do_pred_flags(t);
2925     } else {
2926         fn(d, n, g, t);
2927     }
2928     tcg_temp_free_ptr(d);
2929     tcg_temp_free_ptr(n);
2930     tcg_temp_free_ptr(g);
2931     tcg_temp_free_i32(t);
2932     return true;
2933 }
2934
2935 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2936 {
2937     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2938 }
2939
2940 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2941 {
2942     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2943 }
2944
2945 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2946 {
2947     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2948 }
2949
2950 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2951 {
2952     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2953 }
2954
2955 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2956 {
2957     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2958 }
2959
2960 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2961 {
2962     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2963 }
2964
2965 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2966 {
2967     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2968 }
2969
2970 /*
2971  *** SVE Predicate Count Group
2972  */
2973
2974 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2975 {
2976     unsigned psz = pred_full_reg_size(s);
2977
2978     if (psz <= 8) {
2979         uint64_t psz_mask;
2980
2981         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2982         if (pn != pg) {
2983             TCGv_i64 g = tcg_temp_new_i64();
2984             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2985             tcg_gen_and_i64(val, val, g);
2986             tcg_temp_free_i64(g);
2987         }
2988
2989         /* Reduce the pred_esz_masks value simply to reduce the
2990          * size of the code generated here.
2991          */
2992         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2993         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2994
2995         tcg_gen_ctpop_i64(val, val);
2996     } else {
2997         TCGv_ptr t_pn = tcg_temp_new_ptr();
2998         TCGv_ptr t_pg = tcg_temp_new_ptr();
2999         unsigned desc;
3000         TCGv_i32 t_desc;
3001
3002         desc = psz - 2;
3003         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3004
3005         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3006         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3007         t_desc = tcg_const_i32(desc);
3008
3009         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3010         tcg_temp_free_ptr(t_pn);
3011         tcg_temp_free_ptr(t_pg);
3012         tcg_temp_free_i32(t_desc);
3013     }
3014 }
3015
3016 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3017 {
3018     if (sve_access_check(s)) {
3019         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3020     }
3021     return true;
3022 }
3023
3024 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3025                             uint32_t insn)
3026 {
3027     if (sve_access_check(s)) {
3028         TCGv_i64 reg = cpu_reg(s, a->rd);
3029         TCGv_i64 val = tcg_temp_new_i64();
3030
3031         do_cntp(s, val, a->esz, a->pg, a->pg);
3032         if (a->d) {
3033             tcg_gen_sub_i64(reg, reg, val);
3034         } else {
3035             tcg_gen_add_i64(reg, reg, val);
3036         }
3037         tcg_temp_free_i64(val);
3038     }
3039     return true;
3040 }
3041
3042 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3043                             uint32_t insn)
3044 {
3045     if (a->esz == 0) {
3046         return false;
3047     }
3048     if (sve_access_check(s)) {
3049         unsigned vsz = vec_full_reg_size(s);
3050         TCGv_i64 val = tcg_temp_new_i64();
3051         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3052
3053         do_cntp(s, val, a->esz, a->pg, a->pg);
3054         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3055                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3056     }
3057     return true;
3058 }
3059
3060 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3061                                 uint32_t insn)
3062 {
3063     if (sve_access_check(s)) {
3064         TCGv_i64 reg = cpu_reg(s, a->rd);
3065         TCGv_i64 val = tcg_temp_new_i64();
3066
3067         do_cntp(s, val, a->esz, a->pg, a->pg);
3068         do_sat_addsub_32(reg, val, a->u, a->d);
3069     }
3070     return true;
3071 }
3072
3073 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3074                                 uint32_t insn)
3075 {
3076     if (sve_access_check(s)) {
3077         TCGv_i64 reg = cpu_reg(s, a->rd);
3078         TCGv_i64 val = tcg_temp_new_i64();
3079
3080         do_cntp(s, val, a->esz, a->pg, a->pg);
3081         do_sat_addsub_64(reg, val, a->u, a->d);
3082     }
3083     return true;
3084 }
3085
3086 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3087                              uint32_t insn)
3088 {
3089     if (a->esz == 0) {
3090         return false;
3091     }
3092     if (sve_access_check(s)) {
3093         TCGv_i64 val = tcg_temp_new_i64();
3094         do_cntp(s, val, a->esz, a->pg, a->pg);
3095         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3096     }
3097     return true;
3098 }
3099
3100 /*
3101  *** SVE Integer Compare Scalars Group
3102  */
3103
3104 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3105 {
3106     if (!sve_access_check(s)) {
3107         return true;
3108     }
3109
3110     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3111     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3112     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3113     TCGv_i64 cmp = tcg_temp_new_i64();
3114
3115     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3116     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3117     tcg_temp_free_i64(cmp);
3118
3119     /* VF = !NF & !CF.  */
3120     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3121     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3122
3123     /* Both NF and VF actually look at bit 31.  */
3124     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3125     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3126     return true;
3127 }
3128
3129 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3130 {
3131     if (!sve_access_check(s)) {
3132         return true;
3133     }
3134
3135     TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3136     TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3137     TCGv_i64 t0 = tcg_temp_new_i64();
3138     TCGv_i64 t1 = tcg_temp_new_i64();
3139     TCGv_i32 t2, t3;
3140     TCGv_ptr ptr;
3141     unsigned desc, vsz = vec_full_reg_size(s);
3142     TCGCond cond;
3143
3144     if (!a->sf) {
3145         if (a->u) {
3146             tcg_gen_ext32u_i64(op0, op0);
3147             tcg_gen_ext32u_i64(op1, op1);
3148         } else {
3149             tcg_gen_ext32s_i64(op0, op0);
3150             tcg_gen_ext32s_i64(op1, op1);
3151         }
3152     }
3153
3154     /* For the helper, compress the different conditions into a computation
3155      * of how many iterations for which the condition is true.
3156      *
3157      * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3158      * 2**64 iterations, overflowing to 0.  Of course, predicate registers
3159      * aren't that large, so any value >= predicate size is sufficient.
3160      */
3161     tcg_gen_sub_i64(t0, op1, op0);
3162
3163     /* t0 = MIN(op1 - op0, vsz).  */
3164     tcg_gen_movi_i64(t1, vsz);
3165     tcg_gen_umin_i64(t0, t0, t1);
3166     if (a->eq) {
3167         /* Equality means one more iteration.  */
3168         tcg_gen_addi_i64(t0, t0, 1);
3169     }
3170
3171     /* t0 = (condition true ? t0 : 0).  */
3172     cond = (a->u
3173             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3174             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3175     tcg_gen_movi_i64(t1, 0);
3176     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3177
3178     t2 = tcg_temp_new_i32();
3179     tcg_gen_extrl_i64_i32(t2, t0);
3180     tcg_temp_free_i64(t0);
3181     tcg_temp_free_i64(t1);
3182
3183     desc = (vsz / 8) - 2;
3184     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3185     t3 = tcg_const_i32(desc);
3186
3187     ptr = tcg_temp_new_ptr();
3188     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3189
3190     gen_helper_sve_while(t2, ptr, t2, t3);
3191     do_pred_flags(t2);
3192
3193     tcg_temp_free_ptr(ptr);
3194     tcg_temp_free_i32(t2);
3195     tcg_temp_free_i32(t3);
3196     return true;
3197 }
3198
3199 /*
3200  *** SVE Integer Wide Immediate - Unpredicated Group
3201  */
3202
3203 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3204 {
3205     if (a->esz == 0) {
3206         return false;
3207     }
3208     if (sve_access_check(s)) {
3209         unsigned vsz = vec_full_reg_size(s);
3210         int dofs = vec_full_reg_offset(s, a->rd);
3211         uint64_t imm;
3212
3213         /* Decode the VFP immediate.  */
3214         imm = vfp_expand_imm(a->esz, a->imm);
3215         imm = dup_const(a->esz, imm);
3216
3217         tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3218     }
3219     return true;
3220 }
3221
3222 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3223 {
3224     if (a->esz == 0 && extract32(insn, 13, 1)) {
3225         return false;
3226     }
3227     if (sve_access_check(s)) {
3228         unsigned vsz = vec_full_reg_size(s);
3229         int dofs = vec_full_reg_offset(s, a->rd);
3230
3231         tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3232     }
3233     return true;
3234 }
3235
3236 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3237 {
3238     if (a->esz == 0 && extract32(insn, 13, 1)) {
3239         return false;
3240     }
3241     if (sve_access_check(s)) {
3242         unsigned vsz = vec_full_reg_size(s);
3243         tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3244                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3245     }
3246     return true;
3247 }
3248
3249 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3250 {
3251     a->imm = -a->imm;
3252     return trans_ADD_zzi(s, a, insn);
3253 }
3254
3255 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3256 {
3257     static const GVecGen2s op[4] = {
3258         { .fni8 = tcg_gen_vec_sub8_i64,
3259           .fniv = tcg_gen_sub_vec,
3260           .fno = gen_helper_sve_subri_b,
3261           .opc = INDEX_op_sub_vec,
3262           .vece = MO_8,
3263           .scalar_first = true },
3264         { .fni8 = tcg_gen_vec_sub16_i64,
3265           .fniv = tcg_gen_sub_vec,
3266           .fno = gen_helper_sve_subri_h,
3267           .opc = INDEX_op_sub_vec,
3268           .vece = MO_16,
3269           .scalar_first = true },
3270         { .fni4 = tcg_gen_sub_i32,
3271           .fniv = tcg_gen_sub_vec,
3272           .fno = gen_helper_sve_subri_s,
3273           .opc = INDEX_op_sub_vec,
3274           .vece = MO_32,
3275           .scalar_first = true },
3276         { .fni8 = tcg_gen_sub_i64,
3277           .fniv = tcg_gen_sub_vec,
3278           .fno = gen_helper_sve_subri_d,
3279           .opc = INDEX_op_sub_vec,
3280           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3281           .vece = MO_64,
3282           .scalar_first = true }
3283     };
3284
3285     if (a->esz == 0 && extract32(insn, 13, 1)) {
3286         return false;
3287     }
3288     if (sve_access_check(s)) {
3289         unsigned vsz = vec_full_reg_size(s);
3290         TCGv_i64 c = tcg_const_i64(a->imm);
3291         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3292                         vec_full_reg_offset(s, a->rn),
3293                         vsz, vsz, c, &op[a->esz]);
3294         tcg_temp_free_i64(c);
3295     }
3296     return true;
3297 }
3298
3299 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3300 {
3301     if (sve_access_check(s)) {
3302         unsigned vsz = vec_full_reg_size(s);
3303         tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3304                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3305     }
3306     return true;
3307 }
3308
3309 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3310                        bool u, bool d)
3311 {
3312     if (a->esz == 0 && extract32(insn, 13, 1)) {
3313         return false;
3314     }
3315     if (sve_access_check(s)) {
3316         TCGv_i64 val = tcg_const_i64(a->imm);
3317         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3318         tcg_temp_free_i64(val);
3319     }
3320     return true;
3321 }
3322
3323 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3324 {
3325     return do_zzi_sat(s, a, insn, false, false);
3326 }
3327
3328 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3329 {
3330     return do_zzi_sat(s, a, insn, true, false);
3331 }
3332
3333 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3334 {
3335     return do_zzi_sat(s, a, insn, false, true);
3336 }
3337
3338 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3339 {
3340     return do_zzi_sat(s, a, insn, true, true);
3341 }
3342
3343 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3344 {
3345     if (sve_access_check(s)) {
3346         unsigned vsz = vec_full_reg_size(s);
3347         TCGv_i64 c = tcg_const_i64(a->imm);
3348
3349         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3350                             vec_full_reg_offset(s, a->rn),
3351                             c, vsz, vsz, 0, fn);
3352         tcg_temp_free_i64(c);
3353     }
3354     return true;
3355 }
3356
3357 #define DO_ZZI(NAME, name) \
3358 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a,         \
3359                                uint32_t insn)                           \
3360 {                                                                       \
3361     static gen_helper_gvec_2i * const fns[4] = {                        \
3362         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3363         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3364     };                                                                  \
3365     return do_zzi_ool(s, a, fns[a->esz]);                               \
3366 }
3367
3368 DO_ZZI(SMAX, smax)
3369 DO_ZZI(UMAX, umax)
3370 DO_ZZI(SMIN, smin)
3371 DO_ZZI(UMIN, umin)
3372
3373 #undef DO_ZZI
3374
3375 /*
3376  *** SVE Floating Point Arithmetic - Unpredicated Group
3377  */
3378
3379 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3380                       gen_helper_gvec_3_ptr *fn)
3381 {
3382     if (fn == NULL) {
3383         return false;
3384     }
3385     if (sve_access_check(s)) {
3386         unsigned vsz = vec_full_reg_size(s);
3387         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3388         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3389                            vec_full_reg_offset(s, a->rn),
3390                            vec_full_reg_offset(s, a->rm),
3391                            status, vsz, vsz, 0, fn);
3392         tcg_temp_free_ptr(status);
3393     }
3394     return true;
3395 }
3396
3397
3398 #define DO_FP3(NAME, name) \
3399 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3400 {                                                                   \
3401     static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3402         NULL, gen_helper_gvec_##name##_h,                           \
3403         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3404     };                                                              \
3405     return do_zzz_fp(s, a, fns[a->esz]);                            \
3406 }
3407
3408 DO_FP3(FADD_zzz, fadd)
3409 DO_FP3(FSUB_zzz, fsub)
3410 DO_FP3(FMUL_zzz, fmul)
3411 DO_FP3(FTSMUL, ftsmul)
3412 DO_FP3(FRECPS, recps)
3413 DO_FP3(FRSQRTS, rsqrts)
3414
3415 #undef DO_FP3
3416
3417 /*
3418  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3419  */
3420
3421 /* Subroutine loading a vector register at VOFS of LEN bytes.
3422  * The load should begin at the address Rn + IMM.
3423  */
3424
3425 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3426                    int rn, int imm)
3427 {
3428     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3429     uint32_t len_remain = len % 8;
3430     uint32_t nparts = len / 8 + ctpop8(len_remain);
3431     int midx = get_mem_index(s);
3432     TCGv_i64 addr, t0, t1;
3433
3434     addr = tcg_temp_new_i64();
3435     t0 = tcg_temp_new_i64();
3436
3437     /* Note that unpredicated load/store of vector/predicate registers
3438      * are defined as a stream of bytes, which equates to little-endian
3439      * operations on larger quantities.  There is no nice way to force
3440      * a little-endian load for aarch64_be-linux-user out of line.
3441      *
3442      * Attempt to keep code expansion to a minimum by limiting the
3443      * amount of unrolling done.
3444      */
3445     if (nparts <= 4) {
3446         int i;
3447
3448         for (i = 0; i < len_align; i += 8) {
3449             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3450             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3451             tcg_gen_st_i64(t0, cpu_env, vofs + i);
3452         }
3453     } else {
3454         TCGLabel *loop = gen_new_label();
3455         TCGv_ptr tp, i = tcg_const_local_ptr(0);
3456
3457         gen_set_label(loop);
3458
3459         /* Minimize the number of local temps that must be re-read from
3460          * the stack each iteration.  Instead, re-compute values other
3461          * than the loop counter.
3462          */
3463         tp = tcg_temp_new_ptr();
3464         tcg_gen_addi_ptr(tp, i, imm);
3465         tcg_gen_extu_ptr_i64(addr, tp);
3466         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3467
3468         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3469
3470         tcg_gen_add_ptr(tp, cpu_env, i);
3471         tcg_gen_addi_ptr(i, i, 8);
3472         tcg_gen_st_i64(t0, tp, vofs);
3473         tcg_temp_free_ptr(tp);
3474
3475         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3476         tcg_temp_free_ptr(i);
3477     }
3478
3479     /* Predicate register loads can be any multiple of 2.
3480      * Note that we still store the entire 64-bit unit into cpu_env.
3481      */
3482     if (len_remain) {
3483         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3484
3485         switch (len_remain) {
3486         case 2:
3487         case 4:
3488         case 8:
3489             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3490             break;
3491
3492         case 6:
3493             t1 = tcg_temp_new_i64();
3494             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3495             tcg_gen_addi_i64(addr, addr, 4);
3496             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3497             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3498             tcg_temp_free_i64(t1);
3499             break;
3500
3501         default:
3502             g_assert_not_reached();
3503         }
3504         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3505     }
3506     tcg_temp_free_i64(addr);
3507     tcg_temp_free_i64(t0);
3508 }
3509
3510 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3511 {
3512     if (sve_access_check(s)) {
3513         int size = vec_full_reg_size(s);
3514         int off = vec_full_reg_offset(s, a->rd);
3515         do_ldr(s, off, size, a->rn, a->imm * size);
3516     }
3517     return true;
3518 }
3519
3520 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3521 {
3522     if (sve_access_check(s)) {
3523         int size = pred_full_reg_size(s);
3524         int off = pred_full_reg_offset(s, a->rd);
3525         do_ldr(s, off, size, a->rn, a->imm * size);
3526     }
3527     return true;
3528 }