target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35
  36
  37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  38                          TCGv_i64, uint32_t, uint32_t);
  39
  40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  41                                      TCGv_ptr, TCGv_i32);
  42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  43                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  44
  45 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  46
  47 /*
  48  * Helpers for extracting complex instruction fields.
  49  */
  50
  51 /* See e.g. ASR (immediate, predicated).
  52  * Returns -1 for unallocated encoding; diagnose later.
  53  */
  54 static int tszimm_esz(int x)
  55 {
  56     x >>= 3;  /* discard imm3 */
  57     return 31 - clz32(x);
  58 }
  59
  60 static int tszimm_shr(int x)
  61 {
  62     return (16 << tszimm_esz(x)) - x;
  63 }
  64
  65 /* See e.g. LSL (immediate, predicated).  */
  66 static int tszimm_shl(int x)
  67 {
  68     return x - (8 << tszimm_esz(x));
  69 }
  70
  71 static inline int plus1(int x)
  72 {
  73     return x + 1;
  74 }
  75
  76 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  77 static inline int expand_imm_sh8s(int x)
  78 {
  79     return (int8_t)x << (x & 0x100 ? 8 : 0);
  80 }
  81
  82 static inline int expand_imm_sh8u(int x)
  83 {
  84     return (uint8_t)x << (x & 0x100 ? 8 : 0);
  85 }
  86
  87 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  88  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  89  */
  90 static inline int msz_dtype(int msz)
  91 {
  92     static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  93     return dtype[msz];
  94 }
  95
  96 /*
  97  * Include the generated decoder.
  98  */
  99
 100 #include "decode-sve.inc.c"
 101
 102 /*
 103  * Implement all of the translator functions referenced by the decoder.
 104  */
 105
 106 /* Return the offset info CPUARMState of the predicate vector register Pn.
 107  * Note for this purpose, FFR is P16.
 108  */
 109 static inline int pred_full_reg_offset(DisasContext *s, int regno)
 110 {
 111     return offsetof(CPUARMState, vfp.pregs[regno]);
 112 }
 113
 114 /* Return the byte size of the whole predicate register, VL / 64.  */
 115 static inline int pred_full_reg_size(DisasContext *s)
 116 {
 117     return s->sve_len >> 3;
 118 }
 119
 120 /* Round up the size of a register to a size allowed by
 121  * the tcg vector infrastructure.  Any operation which uses this
 122  * size may assume that the bits above pred_full_reg_size are zero,
 123  * and must leave them the same way.
 124  *
 125  * Note that this is not needed for the vector registers as they
 126  * are always properly sized for tcg vectors.
 127  */
 128 static int size_for_gvec(int size)
 129 {
 130     if (size <= 8) {
 131         return 8;
 132     } else {
 133         return QEMU_ALIGN_UP(size, 16);
 134     }
 135 }
 136
 137 static int pred_gvec_reg_size(DisasContext *s)
 138 {
 139     return size_for_gvec(pred_full_reg_size(s));
 140 }
 141
 142 /* Invoke a vector expander on two Zregs.  */
 143 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 144                          int esz, int rd, int rn)
 145 {
 146     if (sve_access_check(s)) {
 147         unsigned vsz = vec_full_reg_size(s);
 148         gvec_fn(esz, vec_full_reg_offset(s, rd),
 149                 vec_full_reg_offset(s, rn), vsz, vsz);
 150     }
 151     return true;
 152 }
 153
 154 /* Invoke a vector expander on three Zregs.  */
 155 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 156                          int esz, int rd, int rn, int rm)
 157 {
 158     if (sve_access_check(s)) {
 159         unsigned vsz = vec_full_reg_size(s);
 160         gvec_fn(esz, vec_full_reg_offset(s, rd),
 161                 vec_full_reg_offset(s, rn),
 162                 vec_full_reg_offset(s, rm), vsz, vsz);
 163     }
 164     return true;
 165 }
 166
 167 /* Invoke a vector move on two Zregs.  */
 168 static bool do_mov_z(DisasContext *s, int rd, int rn)
 169 {
 170     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 171 }
 172
 173 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 174 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 175 {
 176     unsigned vsz = vec_full_reg_size(s);
 177     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 178 }
 179
 180 /* Invoke a vector expander on two Pregs.  */
 181 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 182                          int esz, int rd, int rn)
 183 {
 184     if (sve_access_check(s)) {
 185         unsigned psz = pred_gvec_reg_size(s);
 186         gvec_fn(esz, pred_full_reg_offset(s, rd),
 187                 pred_full_reg_offset(s, rn), psz, psz);
 188     }
 189     return true;
 190 }
 191
 192 /* Invoke a vector expander on three Pregs.  */
 193 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 194                          int esz, int rd, int rn, int rm)
 195 {
 196     if (sve_access_check(s)) {
 197         unsigned psz = pred_gvec_reg_size(s);
 198         gvec_fn(esz, pred_full_reg_offset(s, rd),
 199                 pred_full_reg_offset(s, rn),
 200                 pred_full_reg_offset(s, rm), psz, psz);
 201     }
 202     return true;
 203 }
 204
 205 /* Invoke a vector operation on four Pregs.  */
 206 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 207                         int rd, int rn, int rm, int rg)
 208 {
 209     if (sve_access_check(s)) {
 210         unsigned psz = pred_gvec_reg_size(s);
 211         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 212                        pred_full_reg_offset(s, rn),
 213                        pred_full_reg_offset(s, rm),
 214                        pred_full_reg_offset(s, rg),
 215                        psz, psz, gvec_op);
 216     }
 217     return true;
 218 }
 219
 220 /* Invoke a vector move on two Pregs.  */
 221 static bool do_mov_p(DisasContext *s, int rd, int rn)
 222 {
 223     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 224 }
 225
 226 /* Set the cpu flags as per a return from an SVE helper.  */
 227 static void do_pred_flags(TCGv_i32 t)
 228 {
 229     tcg_gen_mov_i32(cpu_NF, t);
 230     tcg_gen_andi_i32(cpu_ZF, t, 2);
 231     tcg_gen_andi_i32(cpu_CF, t, 1);
 232     tcg_gen_movi_i32(cpu_VF, 0);
 233 }
 234
 235 /* Subroutines computing the ARM PredTest psuedofunction.  */
 236 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 237 {
 238     TCGv_i32 t = tcg_temp_new_i32();
 239
 240     gen_helper_sve_predtest1(t, d, g);
 241     do_pred_flags(t);
 242     tcg_temp_free_i32(t);
 243 }
 244
 245 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 246 {
 247     TCGv_ptr dptr = tcg_temp_new_ptr();
 248     TCGv_ptr gptr = tcg_temp_new_ptr();
 249     TCGv_i32 t;
 250
 251     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 252     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 253     t = tcg_const_i32(words);
 254
 255     gen_helper_sve_predtest(t, dptr, gptr, t);
 256     tcg_temp_free_ptr(dptr);
 257     tcg_temp_free_ptr(gptr);
 258
 259     do_pred_flags(t);
 260     tcg_temp_free_i32(t);
 261 }
 262
 263 /* For each element size, the bits within a predicate word that are active.  */
 264 const uint64_t pred_esz_masks[4] = {
 265     0xffffffffffffffffull, 0x5555555555555555ull,
 266     0x1111111111111111ull, 0x0101010101010101ull
 267 };
 268
 269 /*
 270  *** SVE Logical - Unpredicated Group
 271  */
 272
 273 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 274 {
 275     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 276 }
 277
 278 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 279 {
 280     if (a->rn == a->rm) { /* MOV */
 281         return do_mov_z(s, a->rd, a->rn);
 282     } else {
 283         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 284     }
 285 }
 286
 287 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 288 {
 289     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 290 }
 291
 292 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 293 {
 294     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 295 }
 296
 297 /*
 298  *** SVE Integer Arithmetic - Unpredicated Group
 299  */
 300
 301 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 302 {
 303     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 304 }
 305
 306 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 307 {
 308     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 309 }
 310
 311 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 312 {
 313     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 314 }
 315
 316 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 317 {
 318     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 319 }
 320
 321 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 322 {
 323     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 324 }
 325
 326 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 327 {
 328     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 329 }
 330
 331 /*
 332  *** SVE Integer Arithmetic - Binary Predicated Group
 333  */
 334
 335 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 336 {
 337     unsigned vsz = vec_full_reg_size(s);
 338     if (fn == NULL) {
 339         return false;
 340     }
 341     if (sve_access_check(s)) {
 342         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 343                            vec_full_reg_offset(s, a->rn),
 344                            vec_full_reg_offset(s, a->rm),
 345                            pred_full_reg_offset(s, a->pg),
 346                            vsz, vsz, 0, fn);
 347     }
 348     return true;
 349 }
 350
 351 #define DO_ZPZZ(NAME, name) \
 352 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 353                                 uint32_t insn)                            \
 354 {                                                                         \
 355     static gen_helper_gvec_4 * const fns[4] = {                           \
 356         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 357         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 358     };                                                                    \
 359     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 360 }
 361
 362 DO_ZPZZ(AND, and)
 363 DO_ZPZZ(EOR, eor)
 364 DO_ZPZZ(ORR, orr)
 365 DO_ZPZZ(BIC, bic)
 366
 367 DO_ZPZZ(ADD, add)
 368 DO_ZPZZ(SUB, sub)
 369
 370 DO_ZPZZ(SMAX, smax)
 371 DO_ZPZZ(UMAX, umax)
 372 DO_ZPZZ(SMIN, smin)
 373 DO_ZPZZ(UMIN, umin)
 374 DO_ZPZZ(SABD, sabd)
 375 DO_ZPZZ(UABD, uabd)
 376
 377 DO_ZPZZ(MUL, mul)
 378 DO_ZPZZ(SMULH, smulh)
 379 DO_ZPZZ(UMULH, umulh)
 380
 381 DO_ZPZZ(ASR, asr)
 382 DO_ZPZZ(LSR, lsr)
 383 DO_ZPZZ(LSL, lsl)
 384
 385 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 386 {
 387     static gen_helper_gvec_4 * const fns[4] = {
 388         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 389     };
 390     return do_zpzz_ool(s, a, fns[a->esz]);
 391 }
 392
 393 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 394 {
 395     static gen_helper_gvec_4 * const fns[4] = {
 396         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 397     };
 398     return do_zpzz_ool(s, a, fns[a->esz]);
 399 }
 400
 401 DO_ZPZZ(SEL, sel)
 402
 403 #undef DO_ZPZZ
 404
 405 /*
 406  *** SVE Integer Arithmetic - Unary Predicated Group
 407  */
 408
 409 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 410 {
 411     if (fn == NULL) {
 412         return false;
 413     }
 414     if (sve_access_check(s)) {
 415         unsigned vsz = vec_full_reg_size(s);
 416         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 417                            vec_full_reg_offset(s, a->rn),
 418                            pred_full_reg_offset(s, a->pg),
 419                            vsz, vsz, 0, fn);
 420     }
 421     return true;
 422 }
 423
 424 #define DO_ZPZ(NAME, name) \
 425 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 426 {                                                                   \
 427     static gen_helper_gvec_3 * const fns[4] = {                     \
 428         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 429         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 430     };                                                              \
 431     return do_zpz_ool(s, a, fns[a->esz]);                           \
 432 }
 433
 434 DO_ZPZ(CLS, cls)
 435 DO_ZPZ(CLZ, clz)
 436 DO_ZPZ(CNT_zpz, cnt_zpz)
 437 DO_ZPZ(CNOT, cnot)
 438 DO_ZPZ(NOT_zpz, not_zpz)
 439 DO_ZPZ(ABS, abs)
 440 DO_ZPZ(NEG, neg)
 441
 442 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 443 {
 444     static gen_helper_gvec_3 * const fns[4] = {
 445         NULL,
 446         gen_helper_sve_fabs_h,
 447         gen_helper_sve_fabs_s,
 448         gen_helper_sve_fabs_d
 449     };
 450     return do_zpz_ool(s, a, fns[a->esz]);
 451 }
 452
 453 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 454 {
 455     static gen_helper_gvec_3 * const fns[4] = {
 456         NULL,
 457         gen_helper_sve_fneg_h,
 458         gen_helper_sve_fneg_s,
 459         gen_helper_sve_fneg_d
 460     };
 461     return do_zpz_ool(s, a, fns[a->esz]);
 462 }
 463
 464 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 465 {
 466     static gen_helper_gvec_3 * const fns[4] = {
 467         NULL,
 468         gen_helper_sve_sxtb_h,
 469         gen_helper_sve_sxtb_s,
 470         gen_helper_sve_sxtb_d
 471     };
 472     return do_zpz_ool(s, a, fns[a->esz]);
 473 }
 474
 475 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 476 {
 477     static gen_helper_gvec_3 * const fns[4] = {
 478         NULL,
 479         gen_helper_sve_uxtb_h,
 480         gen_helper_sve_uxtb_s,
 481         gen_helper_sve_uxtb_d
 482     };
 483     return do_zpz_ool(s, a, fns[a->esz]);
 484 }
 485
 486 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 487 {
 488     static gen_helper_gvec_3 * const fns[4] = {
 489         NULL, NULL,
 490         gen_helper_sve_sxth_s,
 491         gen_helper_sve_sxth_d
 492     };
 493     return do_zpz_ool(s, a, fns[a->esz]);
 494 }
 495
 496 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 497 {
 498     static gen_helper_gvec_3 * const fns[4] = {
 499         NULL, NULL,
 500         gen_helper_sve_uxth_s,
 501         gen_helper_sve_uxth_d
 502     };
 503     return do_zpz_ool(s, a, fns[a->esz]);
 504 }
 505
 506 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 507 {
 508     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 509 }
 510
 511 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 512 {
 513     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 514 }
 515
 516 #undef DO_ZPZ
 517
 518 /*
 519  *** SVE Integer Reduction Group
 520  */
 521
 522 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 523 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 524                        gen_helper_gvec_reduc *fn)
 525 {
 526     unsigned vsz = vec_full_reg_size(s);
 527     TCGv_ptr t_zn, t_pg;
 528     TCGv_i32 desc;
 529     TCGv_i64 temp;
 530
 531     if (fn == NULL) {
 532         return false;
 533     }
 534     if (!sve_access_check(s)) {
 535         return true;
 536     }
 537
 538     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 539     temp = tcg_temp_new_i64();
 540     t_zn = tcg_temp_new_ptr();
 541     t_pg = tcg_temp_new_ptr();
 542
 543     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 544     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 545     fn(temp, t_zn, t_pg, desc);
 546     tcg_temp_free_ptr(t_zn);
 547     tcg_temp_free_ptr(t_pg);
 548     tcg_temp_free_i32(desc);
 549
 550     write_fp_dreg(s, a->rd, temp);
 551     tcg_temp_free_i64(temp);
 552     return true;
 553 }
 554
 555 #define DO_VPZ(NAME, name) \
 556 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 557 {                                                                        \
 558     static gen_helper_gvec_reduc * const fns[4] = {                      \
 559         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 560         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 561     };                                                                   \
 562     return do_vpz_ool(s, a, fns[a->esz]);                                \
 563 }
 564
 565 DO_VPZ(ORV, orv)
 566 DO_VPZ(ANDV, andv)
 567 DO_VPZ(EORV, eorv)
 568
 569 DO_VPZ(UADDV, uaddv)
 570 DO_VPZ(SMAXV, smaxv)
 571 DO_VPZ(UMAXV, umaxv)
 572 DO_VPZ(SMINV, sminv)
 573 DO_VPZ(UMINV, uminv)
 574
 575 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 576 {
 577     static gen_helper_gvec_reduc * const fns[4] = {
 578         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 579         gen_helper_sve_saddv_s, NULL
 580     };
 581     return do_vpz_ool(s, a, fns[a->esz]);
 582 }
 583
 584 #undef DO_VPZ
 585
 586 /*
 587  *** SVE Shift by Immediate - Predicated Group
 588  */
 589
 590 /* Store zero into every active element of Zd.  We will use this for two
 591  * and three-operand predicated instructions for which logic dictates a
 592  * zero result.
 593  */
 594 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 595 {
 596     static gen_helper_gvec_2 * const fns[4] = {
 597         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 598         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 599     };
 600     if (sve_access_check(s)) {
 601         unsigned vsz = vec_full_reg_size(s);
 602         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 603                            pred_full_reg_offset(s, pg),
 604                            vsz, vsz, 0, fns[esz]);
 605     }
 606     return true;
 607 }
 608
 609 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 610                         gen_helper_gvec_3 *fn)
 611 {
 612     if (sve_access_check(s)) {
 613         unsigned vsz = vec_full_reg_size(s);
 614         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 615                            vec_full_reg_offset(s, a->rn),
 616                            pred_full_reg_offset(s, a->pg),
 617                            vsz, vsz, a->imm, fn);
 618     }
 619     return true;
 620 }
 621
 622 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 623 {
 624     static gen_helper_gvec_3 * const fns[4] = {
 625         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 626         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 627     };
 628     if (a->esz < 0) {
 629         /* Invalid tsz encoding -- see tszimm_esz. */
 630         return false;
 631     }
 632     /* Shift by element size is architecturally valid.  For
 633        arithmetic right-shift, it's the same as by one less. */
 634     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 635     return do_zpzi_ool(s, a, fns[a->esz]);
 636 }
 637
 638 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 639 {
 640     static gen_helper_gvec_3 * const fns[4] = {
 641         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 642         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 643     };
 644     if (a->esz < 0) {
 645         return false;
 646     }
 647     /* Shift by element size is architecturally valid.
 648        For logical shifts, it is a zeroing operation.  */
 649     if (a->imm >= (8 << a->esz)) {
 650         return do_clr_zp(s, a->rd, a->pg, a->esz);
 651     } else {
 652         return do_zpzi_ool(s, a, fns[a->esz]);
 653     }
 654 }
 655
 656 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 657 {
 658     static gen_helper_gvec_3 * const fns[4] = {
 659         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 660         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 661     };
 662     if (a->esz < 0) {
 663         return false;
 664     }
 665     /* Shift by element size is architecturally valid.
 666        For logical shifts, it is a zeroing operation.  */
 667     if (a->imm >= (8 << a->esz)) {
 668         return do_clr_zp(s, a->rd, a->pg, a->esz);
 669     } else {
 670         return do_zpzi_ool(s, a, fns[a->esz]);
 671     }
 672 }
 673
 674 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 675 {
 676     static gen_helper_gvec_3 * const fns[4] = {
 677         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 678         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 679     };
 680     if (a->esz < 0) {
 681         return false;
 682     }
 683     /* Shift by element size is architecturally valid.  For arithmetic
 684        right shift for division, it is a zeroing operation.  */
 685     if (a->imm >= (8 << a->esz)) {
 686         return do_clr_zp(s, a->rd, a->pg, a->esz);
 687     } else {
 688         return do_zpzi_ool(s, a, fns[a->esz]);
 689     }
 690 }
 691
 692 /*
 693  *** SVE Bitwise Shift - Predicated Group
 694  */
 695
 696 #define DO_ZPZW(NAME, name) \
 697 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 698                                 uint32_t insn)                            \
 699 {                                                                         \
 700     static gen_helper_gvec_4 * const fns[3] = {                           \
 701         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 702         gen_helper_sve_##name##_zpzw_s,                                   \
 703     };                                                                    \
 704     if (a->esz < 0 || a->esz >= 3) {                                      \
 705         return false;                                                     \
 706     }                                                                     \
 707     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 708 }
 709
 710 DO_ZPZW(ASR, asr)
 711 DO_ZPZW(LSR, lsr)
 712 DO_ZPZW(LSL, lsl)
 713
 714 #undef DO_ZPZW
 715
 716 /*
 717  *** SVE Bitwise Shift - Unpredicated Group
 718  */
 719
 720 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 721                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 722                                          int64_t, uint32_t, uint32_t))
 723 {
 724     if (a->esz < 0) {
 725         /* Invalid tsz encoding -- see tszimm_esz. */
 726         return false;
 727     }
 728     if (sve_access_check(s)) {
 729         unsigned vsz = vec_full_reg_size(s);
 730         /* Shift by element size is architecturally valid.  For
 731            arithmetic right-shift, it's the same as by one less.
 732            Otherwise it is a zeroing operation.  */
 733         if (a->imm >= 8 << a->esz) {
 734             if (asr) {
 735                 a->imm = (8 << a->esz) - 1;
 736             } else {
 737                 do_dupi_z(s, a->rd, 0);
 738                 return true;
 739             }
 740         }
 741         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 742                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 743     }
 744     return true;
 745 }
 746
 747 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 748 {
 749     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 750 }
 751
 752 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 753 {
 754     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 755 }
 756
 757 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 758 {
 759     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 760 }
 761
 762 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 763 {
 764     if (fn == NULL) {
 765         return false;
 766     }
 767     if (sve_access_check(s)) {
 768         unsigned vsz = vec_full_reg_size(s);
 769         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 770                            vec_full_reg_offset(s, a->rn),
 771                            vec_full_reg_offset(s, a->rm),
 772                            vsz, vsz, 0, fn);
 773     }
 774     return true;
 775 }
 776
 777 #define DO_ZZW(NAME, name) \
 778 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a,           \
 779                                uint32_t insn)                             \
 780 {                                                                         \
 781     static gen_helper_gvec_3 * const fns[4] = {                           \
 782         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 783         gen_helper_sve_##name##_zzw_s, NULL                               \
 784     };                                                                    \
 785     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 786 }
 787
 788 DO_ZZW(ASR, asr)
 789 DO_ZZW(LSR, lsr)
 790 DO_ZZW(LSL, lsl)
 791
 792 #undef DO_ZZW
 793
 794 /*
 795  *** SVE Integer Multiply-Add Group
 796  */
 797
 798 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 799                          gen_helper_gvec_5 *fn)
 800 {
 801     if (sve_access_check(s)) {
 802         unsigned vsz = vec_full_reg_size(s);
 803         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 804                            vec_full_reg_offset(s, a->ra),
 805                            vec_full_reg_offset(s, a->rn),
 806                            vec_full_reg_offset(s, a->rm),
 807                            pred_full_reg_offset(s, a->pg),
 808                            vsz, vsz, 0, fn);
 809     }
 810     return true;
 811 }
 812
 813 #define DO_ZPZZZ(NAME, name) \
 814 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 815 {                                                                    \
 816     static gen_helper_gvec_5 * const fns[4] = {                      \
 817         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 818         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 819     };                                                               \
 820     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 821 }
 822
 823 DO_ZPZZZ(MLA, mla)
 824 DO_ZPZZZ(MLS, mls)
 825
 826 #undef DO_ZPZZZ
 827
 828 /*
 829  *** SVE Index Generation Group
 830  */
 831
 832 static void do_index(DisasContext *s, int esz, int rd,
 833                      TCGv_i64 start, TCGv_i64 incr)
 834 {
 835     unsigned vsz = vec_full_reg_size(s);
 836     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 837     TCGv_ptr t_zd = tcg_temp_new_ptr();
 838
 839     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 840     if (esz == 3) {
 841         gen_helper_sve_index_d(t_zd, start, incr, desc);
 842     } else {
 843         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 844         static index_fn * const fns[3] = {
 845             gen_helper_sve_index_b,
 846             gen_helper_sve_index_h,
 847             gen_helper_sve_index_s,
 848         };
 849         TCGv_i32 s32 = tcg_temp_new_i32();
 850         TCGv_i32 i32 = tcg_temp_new_i32();
 851
 852         tcg_gen_extrl_i64_i32(s32, start);
 853         tcg_gen_extrl_i64_i32(i32, incr);
 854         fns[esz](t_zd, s32, i32, desc);
 855
 856         tcg_temp_free_i32(s32);
 857         tcg_temp_free_i32(i32);
 858     }
 859     tcg_temp_free_ptr(t_zd);
 860     tcg_temp_free_i32(desc);
 861 }
 862
 863 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
 864 {
 865     if (sve_access_check(s)) {
 866         TCGv_i64 start = tcg_const_i64(a->imm1);
 867         TCGv_i64 incr = tcg_const_i64(a->imm2);
 868         do_index(s, a->esz, a->rd, start, incr);
 869         tcg_temp_free_i64(start);
 870         tcg_temp_free_i64(incr);
 871     }
 872     return true;
 873 }
 874
 875 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
 876 {
 877     if (sve_access_check(s)) {
 878         TCGv_i64 start = tcg_const_i64(a->imm);
 879         TCGv_i64 incr = cpu_reg(s, a->rm);
 880         do_index(s, a->esz, a->rd, start, incr);
 881         tcg_temp_free_i64(start);
 882     }
 883     return true;
 884 }
 885
 886 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
 887 {
 888     if (sve_access_check(s)) {
 889         TCGv_i64 start = cpu_reg(s, a->rn);
 890         TCGv_i64 incr = tcg_const_i64(a->imm);
 891         do_index(s, a->esz, a->rd, start, incr);
 892         tcg_temp_free_i64(incr);
 893     }
 894     return true;
 895 }
 896
 897 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
 898 {
 899     if (sve_access_check(s)) {
 900         TCGv_i64 start = cpu_reg(s, a->rn);
 901         TCGv_i64 incr = cpu_reg(s, a->rm);
 902         do_index(s, a->esz, a->rd, start, incr);
 903     }
 904     return true;
 905 }
 906
 907 /*
 908  *** SVE Stack Allocation Group
 909  */
 910
 911 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
 912 {
 913     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 914     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 915     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 916     return true;
 917 }
 918
 919 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
 920 {
 921     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 922     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 923     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 924     return true;
 925 }
 926
 927 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
 928 {
 929     TCGv_i64 reg = cpu_reg(s, a->rd);
 930     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 931     return true;
 932 }
 933
 934 /*
 935  *** SVE Compute Vector Address Group
 936  */
 937
 938 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 939 {
 940     if (sve_access_check(s)) {
 941         unsigned vsz = vec_full_reg_size(s);
 942         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 943                            vec_full_reg_offset(s, a->rn),
 944                            vec_full_reg_offset(s, a->rm),
 945                            vsz, vsz, a->imm, fn);
 946     }
 947     return true;
 948 }
 949
 950 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
 951 {
 952     return do_adr(s, a, gen_helper_sve_adr_p32);
 953 }
 954
 955 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
 956 {
 957     return do_adr(s, a, gen_helper_sve_adr_p64);
 958 }
 959
 960 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
 961 {
 962     return do_adr(s, a, gen_helper_sve_adr_s32);
 963 }
 964
 965 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
 966 {
 967     return do_adr(s, a, gen_helper_sve_adr_u32);
 968 }
 969
 970 /*
 971  *** SVE Integer Misc - Unpredicated Group
 972  */
 973
 974 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
 975 {
 976     static gen_helper_gvec_2 * const fns[4] = {
 977         NULL,
 978         gen_helper_sve_fexpa_h,
 979         gen_helper_sve_fexpa_s,
 980         gen_helper_sve_fexpa_d,
 981     };
 982     if (a->esz == 0) {
 983         return false;
 984     }
 985     if (sve_access_check(s)) {
 986         unsigned vsz = vec_full_reg_size(s);
 987         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
 988                            vec_full_reg_offset(s, a->rn),
 989                            vsz, vsz, 0, fns[a->esz]);
 990     }
 991     return true;
 992 }
 993
 994 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 995 {
 996     static gen_helper_gvec_3 * const fns[4] = {
 997         NULL,
 998         gen_helper_sve_ftssel_h,
 999         gen_helper_sve_ftssel_s,
1000         gen_helper_sve_ftssel_d,
1001     };
1002     if (a->esz == 0) {
1003         return false;
1004     }
1005     if (sve_access_check(s)) {
1006         unsigned vsz = vec_full_reg_size(s);
1007         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1008                            vec_full_reg_offset(s, a->rn),
1009                            vec_full_reg_offset(s, a->rm),
1010                            vsz, vsz, 0, fns[a->esz]);
1011     }
1012     return true;
1013 }
1014
1015 /*
1016  *** SVE Predicate Logical Operations Group
1017  */
1018
1019 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1020                           const GVecGen4 *gvec_op)
1021 {
1022     if (!sve_access_check(s)) {
1023         return true;
1024     }
1025
1026     unsigned psz = pred_gvec_reg_size(s);
1027     int dofs = pred_full_reg_offset(s, a->rd);
1028     int nofs = pred_full_reg_offset(s, a->rn);
1029     int mofs = pred_full_reg_offset(s, a->rm);
1030     int gofs = pred_full_reg_offset(s, a->pg);
1031
1032     if (psz == 8) {
1033         /* Do the operation and the flags generation in temps.  */
1034         TCGv_i64 pd = tcg_temp_new_i64();
1035         TCGv_i64 pn = tcg_temp_new_i64();
1036         TCGv_i64 pm = tcg_temp_new_i64();
1037         TCGv_i64 pg = tcg_temp_new_i64();
1038
1039         tcg_gen_ld_i64(pn, cpu_env, nofs);
1040         tcg_gen_ld_i64(pm, cpu_env, mofs);
1041         tcg_gen_ld_i64(pg, cpu_env, gofs);
1042
1043         gvec_op->fni8(pd, pn, pm, pg);
1044         tcg_gen_st_i64(pd, cpu_env, dofs);
1045
1046         do_predtest1(pd, pg);
1047
1048         tcg_temp_free_i64(pd);
1049         tcg_temp_free_i64(pn);
1050         tcg_temp_free_i64(pm);
1051         tcg_temp_free_i64(pg);
1052     } else {
1053         /* The operation and flags generation is large.  The computation
1054          * of the flags depends on the original contents of the guarding
1055          * predicate.  If the destination overwrites the guarding predicate,
1056          * then the easiest way to get this right is to save a copy.
1057           */
1058         int tofs = gofs;
1059         if (a->rd == a->pg) {
1060             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1061             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1062         }
1063
1064         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1065         do_predtest(s, dofs, tofs, psz / 8);
1066     }
1067     return true;
1068 }
1069
1070 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1071 {
1072     tcg_gen_and_i64(pd, pn, pm);
1073     tcg_gen_and_i64(pd, pd, pg);
1074 }
1075
1076 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1077                            TCGv_vec pm, TCGv_vec pg)
1078 {
1079     tcg_gen_and_vec(vece, pd, pn, pm);
1080     tcg_gen_and_vec(vece, pd, pd, pg);
1081 }
1082
1083 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1084 {
1085     static const GVecGen4 op = {
1086         .fni8 = gen_and_pg_i64,
1087         .fniv = gen_and_pg_vec,
1088         .fno = gen_helper_sve_and_pppp,
1089         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1090     };
1091     if (a->s) {
1092         return do_pppp_flags(s, a, &op);
1093     } else if (a->rn == a->rm) {
1094         if (a->pg == a->rn) {
1095             return do_mov_p(s, a->rd, a->rn);
1096         } else {
1097             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1098         }
1099     } else if (a->pg == a->rn || a->pg == a->rm) {
1100         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1101     } else {
1102         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1103     }
1104 }
1105
1106 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1107 {
1108     tcg_gen_andc_i64(pd, pn, pm);
1109     tcg_gen_and_i64(pd, pd, pg);
1110 }
1111
1112 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1113                            TCGv_vec pm, TCGv_vec pg)
1114 {
1115     tcg_gen_andc_vec(vece, pd, pn, pm);
1116     tcg_gen_and_vec(vece, pd, pd, pg);
1117 }
1118
1119 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1120 {
1121     static const GVecGen4 op = {
1122         .fni8 = gen_bic_pg_i64,
1123         .fniv = gen_bic_pg_vec,
1124         .fno = gen_helper_sve_bic_pppp,
1125         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1126     };
1127     if (a->s) {
1128         return do_pppp_flags(s, a, &op);
1129     } else if (a->pg == a->rn) {
1130         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1131     } else {
1132         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1133     }
1134 }
1135
1136 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1137 {
1138     tcg_gen_xor_i64(pd, pn, pm);
1139     tcg_gen_and_i64(pd, pd, pg);
1140 }
1141
1142 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1143                            TCGv_vec pm, TCGv_vec pg)
1144 {
1145     tcg_gen_xor_vec(vece, pd, pn, pm);
1146     tcg_gen_and_vec(vece, pd, pd, pg);
1147 }
1148
1149 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1150 {
1151     static const GVecGen4 op = {
1152         .fni8 = gen_eor_pg_i64,
1153         .fniv = gen_eor_pg_vec,
1154         .fno = gen_helper_sve_eor_pppp,
1155         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1156     };
1157     if (a->s) {
1158         return do_pppp_flags(s, a, &op);
1159     } else {
1160         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1161     }
1162 }
1163
1164 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1165 {
1166     tcg_gen_and_i64(pn, pn, pg);
1167     tcg_gen_andc_i64(pm, pm, pg);
1168     tcg_gen_or_i64(pd, pn, pm);
1169 }
1170
1171 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1172                            TCGv_vec pm, TCGv_vec pg)
1173 {
1174     tcg_gen_and_vec(vece, pn, pn, pg);
1175     tcg_gen_andc_vec(vece, pm, pm, pg);
1176     tcg_gen_or_vec(vece, pd, pn, pm);
1177 }
1178
1179 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1180 {
1181     static const GVecGen4 op = {
1182         .fni8 = gen_sel_pg_i64,
1183         .fniv = gen_sel_pg_vec,
1184         .fno = gen_helper_sve_sel_pppp,
1185         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1186     };
1187     if (a->s) {
1188         return false;
1189     } else {
1190         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1191     }
1192 }
1193
1194 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1195 {
1196     tcg_gen_or_i64(pd, pn, pm);
1197     tcg_gen_and_i64(pd, pd, pg);
1198 }
1199
1200 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1201                            TCGv_vec pm, TCGv_vec pg)
1202 {
1203     tcg_gen_or_vec(vece, pd, pn, pm);
1204     tcg_gen_and_vec(vece, pd, pd, pg);
1205 }
1206
1207 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1208 {
1209     static const GVecGen4 op = {
1210         .fni8 = gen_orr_pg_i64,
1211         .fniv = gen_orr_pg_vec,
1212         .fno = gen_helper_sve_orr_pppp,
1213         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1214     };
1215     if (a->s) {
1216         return do_pppp_flags(s, a, &op);
1217     } else if (a->pg == a->rn && a->rn == a->rm) {
1218         return do_mov_p(s, a->rd, a->rn);
1219     } else {
1220         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1221     }
1222 }
1223
1224 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1225 {
1226     tcg_gen_orc_i64(pd, pn, pm);
1227     tcg_gen_and_i64(pd, pd, pg);
1228 }
1229
1230 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1231                            TCGv_vec pm, TCGv_vec pg)
1232 {
1233     tcg_gen_orc_vec(vece, pd, pn, pm);
1234     tcg_gen_and_vec(vece, pd, pd, pg);
1235 }
1236
1237 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1238 {
1239     static const GVecGen4 op = {
1240         .fni8 = gen_orn_pg_i64,
1241         .fniv = gen_orn_pg_vec,
1242         .fno = gen_helper_sve_orn_pppp,
1243         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1244     };
1245     if (a->s) {
1246         return do_pppp_flags(s, a, &op);
1247     } else {
1248         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1249     }
1250 }
1251
1252 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1253 {
1254     tcg_gen_or_i64(pd, pn, pm);
1255     tcg_gen_andc_i64(pd, pg, pd);
1256 }
1257
1258 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1259                            TCGv_vec pm, TCGv_vec pg)
1260 {
1261     tcg_gen_or_vec(vece, pd, pn, pm);
1262     tcg_gen_andc_vec(vece, pd, pg, pd);
1263 }
1264
1265 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1266 {
1267     static const GVecGen4 op = {
1268         .fni8 = gen_nor_pg_i64,
1269         .fniv = gen_nor_pg_vec,
1270         .fno = gen_helper_sve_nor_pppp,
1271         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1272     };
1273     if (a->s) {
1274         return do_pppp_flags(s, a, &op);
1275     } else {
1276         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1277     }
1278 }
1279
1280 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1281 {
1282     tcg_gen_and_i64(pd, pn, pm);
1283     tcg_gen_andc_i64(pd, pg, pd);
1284 }
1285
1286 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1287                            TCGv_vec pm, TCGv_vec pg)
1288 {
1289     tcg_gen_and_vec(vece, pd, pn, pm);
1290     tcg_gen_andc_vec(vece, pd, pg, pd);
1291 }
1292
1293 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1294 {
1295     static const GVecGen4 op = {
1296         .fni8 = gen_nand_pg_i64,
1297         .fniv = gen_nand_pg_vec,
1298         .fno = gen_helper_sve_nand_pppp,
1299         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1300     };
1301     if (a->s) {
1302         return do_pppp_flags(s, a, &op);
1303     } else {
1304         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1305     }
1306 }
1307
1308 /*
1309  *** SVE Predicate Misc Group
1310  */
1311
1312 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1313 {
1314     if (sve_access_check(s)) {
1315         int nofs = pred_full_reg_offset(s, a->rn);
1316         int gofs = pred_full_reg_offset(s, a->pg);
1317         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1318
1319         if (words == 1) {
1320             TCGv_i64 pn = tcg_temp_new_i64();
1321             TCGv_i64 pg = tcg_temp_new_i64();
1322
1323             tcg_gen_ld_i64(pn, cpu_env, nofs);
1324             tcg_gen_ld_i64(pg, cpu_env, gofs);
1325             do_predtest1(pn, pg);
1326
1327             tcg_temp_free_i64(pn);
1328             tcg_temp_free_i64(pg);
1329         } else {
1330             do_predtest(s, nofs, gofs, words);
1331         }
1332     }
1333     return true;
1334 }
1335
1336 /* See the ARM pseudocode DecodePredCount.  */
1337 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1338 {
1339     unsigned elements = fullsz >> esz;
1340     unsigned bound;
1341
1342     switch (pattern) {
1343     case 0x0: /* POW2 */
1344         return pow2floor(elements);
1345     case 0x1: /* VL1 */
1346     case 0x2: /* VL2 */
1347     case 0x3: /* VL3 */
1348     case 0x4: /* VL4 */
1349     case 0x5: /* VL5 */
1350     case 0x6: /* VL6 */
1351     case 0x7: /* VL7 */
1352     case 0x8: /* VL8 */
1353         bound = pattern;
1354         break;
1355     case 0x9: /* VL16 */
1356     case 0xa: /* VL32 */
1357     case 0xb: /* VL64 */
1358     case 0xc: /* VL128 */
1359     case 0xd: /* VL256 */
1360         bound = 16 << (pattern - 9);
1361         break;
1362     case 0x1d: /* MUL4 */
1363         return elements - elements % 4;
1364     case 0x1e: /* MUL3 */
1365         return elements - elements % 3;
1366     case 0x1f: /* ALL */
1367         return elements;
1368     default:   /* #uimm5 */
1369         return 0;
1370     }
1371     return elements >= bound ? bound : 0;
1372 }
1373
1374 /* This handles all of the predicate initialization instructions,
1375  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1376  * so that decode_pred_count returns 0.  For SETFFR, we will have
1377  * set RD == 16 == FFR.
1378  */
1379 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1380 {
1381     if (!sve_access_check(s)) {
1382         return true;
1383     }
1384
1385     unsigned fullsz = vec_full_reg_size(s);
1386     unsigned ofs = pred_full_reg_offset(s, rd);
1387     unsigned numelem, setsz, i;
1388     uint64_t word, lastword;
1389     TCGv_i64 t;
1390
1391     numelem = decode_pred_count(fullsz, pat, esz);
1392
1393     /* Determine what we must store into each bit, and how many.  */
1394     if (numelem == 0) {
1395         lastword = word = 0;
1396         setsz = fullsz;
1397     } else {
1398         setsz = numelem << esz;
1399         lastword = word = pred_esz_masks[esz];
1400         if (setsz % 64) {
1401             lastword &= ~(-1ull << (setsz % 64));
1402         }
1403     }
1404
1405     t = tcg_temp_new_i64();
1406     if (fullsz <= 64) {
1407         tcg_gen_movi_i64(t, lastword);
1408         tcg_gen_st_i64(t, cpu_env, ofs);
1409         goto done;
1410     }
1411
1412     if (word == lastword) {
1413         unsigned maxsz = size_for_gvec(fullsz / 8);
1414         unsigned oprsz = size_for_gvec(setsz / 8);
1415
1416         if (oprsz * 8 == setsz) {
1417             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1418             goto done;
1419         }
1420         if (oprsz * 8 == setsz + 8) {
1421             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1422             tcg_gen_movi_i64(t, 0);
1423             tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1424             goto done;
1425         }
1426     }
1427
1428     setsz /= 8;
1429     fullsz /= 8;
1430
1431     tcg_gen_movi_i64(t, word);
1432     for (i = 0; i < setsz; i += 8) {
1433         tcg_gen_st_i64(t, cpu_env, ofs + i);
1434     }
1435     if (lastword != word) {
1436         tcg_gen_movi_i64(t, lastword);
1437         tcg_gen_st_i64(t, cpu_env, ofs + i);
1438         i += 8;
1439     }
1440     if (i < fullsz) {
1441         tcg_gen_movi_i64(t, 0);
1442         for (; i < fullsz; i += 8) {
1443             tcg_gen_st_i64(t, cpu_env, ofs + i);
1444         }
1445     }
1446
1447  done:
1448     tcg_temp_free_i64(t);
1449
1450     /* PTRUES */
1451     if (setflag) {
1452         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1453         tcg_gen_movi_i32(cpu_CF, word == 0);
1454         tcg_gen_movi_i32(cpu_VF, 0);
1455         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1456     }
1457     return true;
1458 }
1459
1460 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1461 {
1462     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1463 }
1464
1465 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1466 {
1467     /* Note pat == 31 is #all, to set all elements.  */
1468     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1469 }
1470
1471 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1472 {
1473     /* Note pat == 32 is #unimp, to set no elements.  */
1474     return do_predset(s, 0, a->rd, 32, false);
1475 }
1476
1477 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1478 {
1479     /* The path through do_pppp_flags is complicated enough to want to avoid
1480      * duplication.  Frob the arguments into the form of a predicated AND.
1481      */
1482     arg_rprr_s alt_a = {
1483         .rd = a->rd, .pg = a->pg, .s = a->s,
1484         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1485     };
1486     return trans_AND_pppp(s, &alt_a, insn);
1487 }
1488
1489 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1490 {
1491     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1492 }
1493
1494 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1495 {
1496     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1497 }
1498
1499 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1500                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1501                                            TCGv_ptr, TCGv_i32))
1502 {
1503     if (!sve_access_check(s)) {
1504         return true;
1505     }
1506
1507     TCGv_ptr t_pd = tcg_temp_new_ptr();
1508     TCGv_ptr t_pg = tcg_temp_new_ptr();
1509     TCGv_i32 t;
1510     unsigned desc;
1511
1512     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1513     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1514
1515     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1516     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1517     t = tcg_const_i32(desc);
1518
1519     gen_fn(t, t_pd, t_pg, t);
1520     tcg_temp_free_ptr(t_pd);
1521     tcg_temp_free_ptr(t_pg);
1522
1523     do_pred_flags(t);
1524     tcg_temp_free_i32(t);
1525     return true;
1526 }
1527
1528 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1529 {
1530     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1531 }
1532
1533 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1534 {
1535     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1536 }
1537
1538 /*
1539  *** SVE Element Count Group
1540  */
1541
1542 /* Perform an inline saturating addition of a 32-bit value within
1543  * a 64-bit register.  The second operand is known to be positive,
1544  * which halves the comparisions we must perform to bound the result.
1545  */
1546 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1547 {
1548     int64_t ibound;
1549     TCGv_i64 bound;
1550     TCGCond cond;
1551
1552     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1553     if (u) {
1554         tcg_gen_ext32u_i64(reg, reg);
1555     } else {
1556         tcg_gen_ext32s_i64(reg, reg);
1557     }
1558     if (d) {
1559         tcg_gen_sub_i64(reg, reg, val);
1560         ibound = (u ? 0 : INT32_MIN);
1561         cond = TCG_COND_LT;
1562     } else {
1563         tcg_gen_add_i64(reg, reg, val);
1564         ibound = (u ? UINT32_MAX : INT32_MAX);
1565         cond = TCG_COND_GT;
1566     }
1567     bound = tcg_const_i64(ibound);
1568     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1569     tcg_temp_free_i64(bound);
1570 }
1571
1572 /* Similarly with 64-bit values.  */
1573 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1574 {
1575     TCGv_i64 t0 = tcg_temp_new_i64();
1576     TCGv_i64 t1 = tcg_temp_new_i64();
1577     TCGv_i64 t2;
1578
1579     if (u) {
1580         if (d) {
1581             tcg_gen_sub_i64(t0, reg, val);
1582             tcg_gen_movi_i64(t1, 0);
1583             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1584         } else {
1585             tcg_gen_add_i64(t0, reg, val);
1586             tcg_gen_movi_i64(t1, -1);
1587             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1588         }
1589     } else {
1590         if (d) {
1591             /* Detect signed overflow for subtraction.  */
1592             tcg_gen_xor_i64(t0, reg, val);
1593             tcg_gen_sub_i64(t1, reg, val);
1594             tcg_gen_xor_i64(reg, reg, t0);
1595             tcg_gen_and_i64(t0, t0, reg);
1596
1597             /* Bound the result.  */
1598             tcg_gen_movi_i64(reg, INT64_MIN);
1599             t2 = tcg_const_i64(0);
1600             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1601         } else {
1602             /* Detect signed overflow for addition.  */
1603             tcg_gen_xor_i64(t0, reg, val);
1604             tcg_gen_add_i64(reg, reg, val);
1605             tcg_gen_xor_i64(t1, reg, val);
1606             tcg_gen_andc_i64(t0, t1, t0);
1607
1608             /* Bound the result.  */
1609             tcg_gen_movi_i64(t1, INT64_MAX);
1610             t2 = tcg_const_i64(0);
1611             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1612         }
1613         tcg_temp_free_i64(t2);
1614     }
1615     tcg_temp_free_i64(t0);
1616     tcg_temp_free_i64(t1);
1617 }
1618
1619 /* Similarly with a vector and a scalar operand.  */
1620 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1621                               TCGv_i64 val, bool u, bool d)
1622 {
1623     unsigned vsz = vec_full_reg_size(s);
1624     TCGv_ptr dptr, nptr;
1625     TCGv_i32 t32, desc;
1626     TCGv_i64 t64;
1627
1628     dptr = tcg_temp_new_ptr();
1629     nptr = tcg_temp_new_ptr();
1630     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1631     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1632     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1633
1634     switch (esz) {
1635     case MO_8:
1636         t32 = tcg_temp_new_i32();
1637         tcg_gen_extrl_i64_i32(t32, val);
1638         if (d) {
1639             tcg_gen_neg_i32(t32, t32);
1640         }
1641         if (u) {
1642             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1643         } else {
1644             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1645         }
1646         tcg_temp_free_i32(t32);
1647         break;
1648
1649     case MO_16:
1650         t32 = tcg_temp_new_i32();
1651         tcg_gen_extrl_i64_i32(t32, val);
1652         if (d) {
1653             tcg_gen_neg_i32(t32, t32);
1654         }
1655         if (u) {
1656             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1657         } else {
1658             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1659         }
1660         tcg_temp_free_i32(t32);
1661         break;
1662
1663     case MO_32:
1664         t64 = tcg_temp_new_i64();
1665         if (d) {
1666             tcg_gen_neg_i64(t64, val);
1667         } else {
1668             tcg_gen_mov_i64(t64, val);
1669         }
1670         if (u) {
1671             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1672         } else {
1673             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1674         }
1675         tcg_temp_free_i64(t64);
1676         break;
1677
1678     case MO_64:
1679         if (u) {
1680             if (d) {
1681                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1682             } else {
1683                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1684             }
1685         } else if (d) {
1686             t64 = tcg_temp_new_i64();
1687             tcg_gen_neg_i64(t64, val);
1688             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1689             tcg_temp_free_i64(t64);
1690         } else {
1691             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1692         }
1693         break;
1694
1695     default:
1696         g_assert_not_reached();
1697     }
1698
1699     tcg_temp_free_ptr(dptr);
1700     tcg_temp_free_ptr(nptr);
1701     tcg_temp_free_i32(desc);
1702 }
1703
1704 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1705 {
1706     if (sve_access_check(s)) {
1707         unsigned fullsz = vec_full_reg_size(s);
1708         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1709         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1710     }
1711     return true;
1712 }
1713
1714 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1715 {
1716     if (sve_access_check(s)) {
1717         unsigned fullsz = vec_full_reg_size(s);
1718         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1719         int inc = numelem * a->imm * (a->d ? -1 : 1);
1720         TCGv_i64 reg = cpu_reg(s, a->rd);
1721
1722         tcg_gen_addi_i64(reg, reg, inc);
1723     }
1724     return true;
1725 }
1726
1727 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1728                                uint32_t insn)
1729 {
1730     if (!sve_access_check(s)) {
1731         return true;
1732     }
1733
1734     unsigned fullsz = vec_full_reg_size(s);
1735     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1736     int inc = numelem * a->imm;
1737     TCGv_i64 reg = cpu_reg(s, a->rd);
1738
1739     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1740     if (inc == 0) {
1741         if (a->u) {
1742             tcg_gen_ext32u_i64(reg, reg);
1743         } else {
1744             tcg_gen_ext32s_i64(reg, reg);
1745         }
1746     } else {
1747         TCGv_i64 t = tcg_const_i64(inc);
1748         do_sat_addsub_32(reg, t, a->u, a->d);
1749         tcg_temp_free_i64(t);
1750     }
1751     return true;
1752 }
1753
1754 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1755                                uint32_t insn)
1756 {
1757     if (!sve_access_check(s)) {
1758         return true;
1759     }
1760
1761     unsigned fullsz = vec_full_reg_size(s);
1762     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1763     int inc = numelem * a->imm;
1764     TCGv_i64 reg = cpu_reg(s, a->rd);
1765
1766     if (inc != 0) {
1767         TCGv_i64 t = tcg_const_i64(inc);
1768         do_sat_addsub_64(reg, t, a->u, a->d);
1769         tcg_temp_free_i64(t);
1770     }
1771     return true;
1772 }
1773
1774 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1775 {
1776     if (a->esz == 0) {
1777         return false;
1778     }
1779
1780     unsigned fullsz = vec_full_reg_size(s);
1781     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1782     int inc = numelem * a->imm;
1783
1784     if (inc != 0) {
1785         if (sve_access_check(s)) {
1786             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1787             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1788                               vec_full_reg_offset(s, a->rn),
1789                               t, fullsz, fullsz);
1790             tcg_temp_free_i64(t);
1791         }
1792     } else {
1793         do_mov_z(s, a->rd, a->rn);
1794     }
1795     return true;
1796 }
1797
1798 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1799                             uint32_t insn)
1800 {
1801     if (a->esz == 0) {
1802         return false;
1803     }
1804
1805     unsigned fullsz = vec_full_reg_size(s);
1806     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1807     int inc = numelem * a->imm;
1808
1809     if (inc != 0) {
1810         if (sve_access_check(s)) {
1811             TCGv_i64 t = tcg_const_i64(inc);
1812             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1813             tcg_temp_free_i64(t);
1814         }
1815     } else {
1816         do_mov_z(s, a->rd, a->rn);
1817     }
1818     return true;
1819 }
1820
1821 /*
1822  *** SVE Bitwise Immediate Group
1823  */
1824
1825 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1826 {
1827     uint64_t imm;
1828     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1829                                 extract32(a->dbm, 0, 6),
1830                                 extract32(a->dbm, 6, 6))) {
1831         return false;
1832     }
1833     if (sve_access_check(s)) {
1834         unsigned vsz = vec_full_reg_size(s);
1835         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1836                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1837     }
1838     return true;
1839 }
1840
1841 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1842 {
1843     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1844 }
1845
1846 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1847 {
1848     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1849 }
1850
1851 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1852 {
1853     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1854 }
1855
1856 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1857 {
1858     uint64_t imm;
1859     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1860                                 extract32(a->dbm, 0, 6),
1861                                 extract32(a->dbm, 6, 6))) {
1862         return false;
1863     }
1864     if (sve_access_check(s)) {
1865         do_dupi_z(s, a->rd, imm);
1866     }
1867     return true;
1868 }
1869
1870 /*
1871  *** SVE Integer Wide Immediate - Predicated Group
1872  */
1873
1874 /* Implement all merging copies.  This is used for CPY (immediate),
1875  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1876  */
1877 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1878                      TCGv_i64 val)
1879 {
1880     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1881     static gen_cpy * const fns[4] = {
1882         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1883         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1884     };
1885     unsigned vsz = vec_full_reg_size(s);
1886     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1887     TCGv_ptr t_zd = tcg_temp_new_ptr();
1888     TCGv_ptr t_zn = tcg_temp_new_ptr();
1889     TCGv_ptr t_pg = tcg_temp_new_ptr();
1890
1891     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1892     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1893     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1894
1895     fns[esz](t_zd, t_zn, t_pg, val, desc);
1896
1897     tcg_temp_free_ptr(t_zd);
1898     tcg_temp_free_ptr(t_zn);
1899     tcg_temp_free_ptr(t_pg);
1900     tcg_temp_free_i32(desc);
1901 }
1902
1903 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1904 {
1905     if (a->esz == 0) {
1906         return false;
1907     }
1908     if (sve_access_check(s)) {
1909         /* Decode the VFP immediate.  */
1910         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1911         TCGv_i64 t_imm = tcg_const_i64(imm);
1912         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1913         tcg_temp_free_i64(t_imm);
1914     }
1915     return true;
1916 }
1917
1918 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1919 {
1920     if (a->esz == 0 && extract32(insn, 13, 1)) {
1921         return false;
1922     }
1923     if (sve_access_check(s)) {
1924         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1925         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1926         tcg_temp_free_i64(t_imm);
1927     }
1928     return true;
1929 }
1930
1931 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1932 {
1933     static gen_helper_gvec_2i * const fns[4] = {
1934         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1935         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1936     };
1937
1938     if (a->esz == 0 && extract32(insn, 13, 1)) {
1939         return false;
1940     }
1941     if (sve_access_check(s)) {
1942         unsigned vsz = vec_full_reg_size(s);
1943         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1944         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1945                             pred_full_reg_offset(s, a->pg),
1946                             t_imm, vsz, vsz, 0, fns[a->esz]);
1947         tcg_temp_free_i64(t_imm);
1948     }
1949     return true;
1950 }
1951
1952 /*
1953  *** SVE Permute Extract Group
1954  */
1955
1956 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1957 {
1958     if (!sve_access_check(s)) {
1959         return true;
1960     }
1961
1962     unsigned vsz = vec_full_reg_size(s);
1963     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1964     unsigned n_siz = vsz - n_ofs;
1965     unsigned d = vec_full_reg_offset(s, a->rd);
1966     unsigned n = vec_full_reg_offset(s, a->rn);
1967     unsigned m = vec_full_reg_offset(s, a->rm);
1968
1969     /* Use host vector move insns if we have appropriate sizes
1970      * and no unfortunate overlap.
1971      */
1972     if (m != d
1973         && n_ofs == size_for_gvec(n_ofs)
1974         && n_siz == size_for_gvec(n_siz)
1975         && (d != n || n_siz <= n_ofs)) {
1976         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1977         if (n_ofs != 0) {
1978             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1979         }
1980     } else {
1981         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1982     }
1983     return true;
1984 }
1985
1986 /*
1987  *** SVE Permute - Unpredicated Group
1988  */
1989
1990 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
1991 {
1992     if (sve_access_check(s)) {
1993         unsigned vsz = vec_full_reg_size(s);
1994         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1995                              vsz, vsz, cpu_reg_sp(s, a->rn));
1996     }
1997     return true;
1998 }
1999
2000 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2001 {
2002     if ((a->imm & 0x1f) == 0) {
2003         return false;
2004     }
2005     if (sve_access_check(s)) {
2006         unsigned vsz = vec_full_reg_size(s);
2007         unsigned dofs = vec_full_reg_offset(s, a->rd);
2008         unsigned esz, index;
2009
2010         esz = ctz32(a->imm);
2011         index = a->imm >> (esz + 1);
2012
2013         if ((index << esz) < vsz) {
2014             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2015             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2016         } else {
2017             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2018         }
2019     }
2020     return true;
2021 }
2022
2023 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2024 {
2025     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2026     static gen_insr * const fns[4] = {
2027         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2028         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2029     };
2030     unsigned vsz = vec_full_reg_size(s);
2031     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2032     TCGv_ptr t_zd = tcg_temp_new_ptr();
2033     TCGv_ptr t_zn = tcg_temp_new_ptr();
2034
2035     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2036     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2037
2038     fns[a->esz](t_zd, t_zn, val, desc);
2039
2040     tcg_temp_free_ptr(t_zd);
2041     tcg_temp_free_ptr(t_zn);
2042     tcg_temp_free_i32(desc);
2043 }
2044
2045 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2046 {
2047     if (sve_access_check(s)) {
2048         TCGv_i64 t = tcg_temp_new_i64();
2049         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2050         do_insr_i64(s, a, t);
2051         tcg_temp_free_i64(t);
2052     }
2053     return true;
2054 }
2055
2056 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2057 {
2058     if (sve_access_check(s)) {
2059         do_insr_i64(s, a, cpu_reg(s, a->rm));
2060     }
2061     return true;
2062 }
2063
2064 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2065 {
2066     static gen_helper_gvec_2 * const fns[4] = {
2067         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2068         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2069     };
2070
2071     if (sve_access_check(s)) {
2072         unsigned vsz = vec_full_reg_size(s);
2073         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2074                            vec_full_reg_offset(s, a->rn),
2075                            vsz, vsz, 0, fns[a->esz]);
2076     }
2077     return true;
2078 }
2079
2080 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2081 {
2082     static gen_helper_gvec_3 * const fns[4] = {
2083         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2084         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2085     };
2086
2087     if (sve_access_check(s)) {
2088         unsigned vsz = vec_full_reg_size(s);
2089         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2090                            vec_full_reg_offset(s, a->rn),
2091                            vec_full_reg_offset(s, a->rm),
2092                            vsz, vsz, 0, fns[a->esz]);
2093     }
2094     return true;
2095 }
2096
2097 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2098 {
2099     static gen_helper_gvec_2 * const fns[4][2] = {
2100         { NULL, NULL },
2101         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2102         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2103         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2104     };
2105
2106     if (a->esz == 0) {
2107         return false;
2108     }
2109     if (sve_access_check(s)) {
2110         unsigned vsz = vec_full_reg_size(s);
2111         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2112                            vec_full_reg_offset(s, a->rn)
2113                            + (a->h ? vsz / 2 : 0),
2114                            vsz, vsz, 0, fns[a->esz][a->u]);
2115     }
2116     return true;
2117 }
2118
2119 /*
2120  *** SVE Permute - Predicates Group
2121  */
2122
2123 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2124                           gen_helper_gvec_3 *fn)
2125 {
2126     if (!sve_access_check(s)) {
2127         return true;
2128     }
2129
2130     unsigned vsz = pred_full_reg_size(s);
2131
2132     /* Predicate sizes may be smaller and cannot use simd_desc.
2133        We cannot round up, as we do elsewhere, because we need
2134        the exact size for ZIP2 and REV.  We retain the style for
2135        the other helpers for consistency.  */
2136     TCGv_ptr t_d = tcg_temp_new_ptr();
2137     TCGv_ptr t_n = tcg_temp_new_ptr();
2138     TCGv_ptr t_m = tcg_temp_new_ptr();
2139     TCGv_i32 t_desc;
2140     int desc;
2141
2142     desc = vsz - 2;
2143     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2144     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2145
2146     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2147     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2148     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2149     t_desc = tcg_const_i32(desc);
2150
2151     fn(t_d, t_n, t_m, t_desc);
2152
2153     tcg_temp_free_ptr(t_d);
2154     tcg_temp_free_ptr(t_n);
2155     tcg_temp_free_ptr(t_m);
2156     tcg_temp_free_i32(t_desc);
2157     return true;
2158 }
2159
2160 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2161                           gen_helper_gvec_2 *fn)
2162 {
2163     if (!sve_access_check(s)) {
2164         return true;
2165     }
2166
2167     unsigned vsz = pred_full_reg_size(s);
2168     TCGv_ptr t_d = tcg_temp_new_ptr();
2169     TCGv_ptr t_n = tcg_temp_new_ptr();
2170     TCGv_i32 t_desc;
2171     int desc;
2172
2173     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2174     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2175
2176     /* Predicate sizes may be smaller and cannot use simd_desc.
2177        We cannot round up, as we do elsewhere, because we need
2178        the exact size for ZIP2 and REV.  We retain the style for
2179        the other helpers for consistency.  */
2180
2181     desc = vsz - 2;
2182     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2183     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2184     t_desc = tcg_const_i32(desc);
2185
2186     fn(t_d, t_n, t_desc);
2187
2188     tcg_temp_free_i32(t_desc);
2189     tcg_temp_free_ptr(t_d);
2190     tcg_temp_free_ptr(t_n);
2191     return true;
2192 }
2193
2194 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2195 {
2196     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2197 }
2198
2199 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2200 {
2201     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2202 }
2203
2204 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2205 {
2206     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2207 }
2208
2209 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2210 {
2211     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2212 }
2213
2214 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2215 {
2216     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2217 }
2218
2219 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2220 {
2221     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2222 }
2223
2224 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2225 {
2226     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2227 }
2228
2229 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2230 {
2231     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2232 }
2233
2234 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2235 {
2236     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2237 }
2238
2239 /*
2240  *** SVE Permute - Interleaving Group
2241  */
2242
2243 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2244 {
2245     static gen_helper_gvec_3 * const fns[4] = {
2246         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2247         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2248     };
2249
2250     if (sve_access_check(s)) {
2251         unsigned vsz = vec_full_reg_size(s);
2252         unsigned high_ofs = high ? vsz / 2 : 0;
2253         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2254                            vec_full_reg_offset(s, a->rn) + high_ofs,
2255                            vec_full_reg_offset(s, a->rm) + high_ofs,
2256                            vsz, vsz, 0, fns[a->esz]);
2257     }
2258     return true;
2259 }
2260
2261 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2262                             gen_helper_gvec_3 *fn)
2263 {
2264     if (sve_access_check(s)) {
2265         unsigned vsz = vec_full_reg_size(s);
2266         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2267                            vec_full_reg_offset(s, a->rn),
2268                            vec_full_reg_offset(s, a->rm),
2269                            vsz, vsz, data, fn);
2270     }
2271     return true;
2272 }
2273
2274 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2275 {
2276     return do_zip(s, a, false);
2277 }
2278
2279 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2280 {
2281     return do_zip(s, a, true);
2282 }
2283
2284 static gen_helper_gvec_3 * const uzp_fns[4] = {
2285     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2286     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2287 };
2288
2289 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2290 {
2291     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2292 }
2293
2294 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2295 {
2296     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2297 }
2298
2299 static gen_helper_gvec_3 * const trn_fns[4] = {
2300     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2301     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2302 };
2303
2304 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2305 {
2306     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2307 }
2308
2309 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2310 {
2311     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2312 }
2313
2314 /*
2315  *** SVE Permute Vector - Predicated Group
2316  */
2317
2318 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2319 {
2320     static gen_helper_gvec_3 * const fns[4] = {
2321         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2322     };
2323     return do_zpz_ool(s, a, fns[a->esz]);
2324 }
2325
2326 /* Call the helper that computes the ARM LastActiveElement pseudocode
2327  * function, scaled by the element size.  This includes the not found
2328  * indication; e.g. not found for esz=3 is -8.
2329  */
2330 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2331 {
2332     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2333      * round up, as we do elsewhere, because we need the exact size.
2334      */
2335     TCGv_ptr t_p = tcg_temp_new_ptr();
2336     TCGv_i32 t_desc;
2337     unsigned vsz = pred_full_reg_size(s);
2338     unsigned desc;
2339
2340     desc = vsz - 2;
2341     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2342
2343     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2344     t_desc = tcg_const_i32(desc);
2345
2346     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2347
2348     tcg_temp_free_i32(t_desc);
2349     tcg_temp_free_ptr(t_p);
2350 }
2351
2352 /* Increment LAST to the offset of the next element in the vector,
2353  * wrapping around to 0.
2354  */
2355 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2356 {
2357     unsigned vsz = vec_full_reg_size(s);
2358
2359     tcg_gen_addi_i32(last, last, 1 << esz);
2360     if (is_power_of_2(vsz)) {
2361         tcg_gen_andi_i32(last, last, vsz - 1);
2362     } else {
2363         TCGv_i32 max = tcg_const_i32(vsz);
2364         TCGv_i32 zero = tcg_const_i32(0);
2365         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2366         tcg_temp_free_i32(max);
2367         tcg_temp_free_i32(zero);
2368     }
2369 }
2370
2371 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2372 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2373 {
2374     unsigned vsz = vec_full_reg_size(s);
2375
2376     if (is_power_of_2(vsz)) {
2377         tcg_gen_andi_i32(last, last, vsz - 1);
2378     } else {
2379         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2380         TCGv_i32 zero = tcg_const_i32(0);
2381         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2382         tcg_temp_free_i32(max);
2383         tcg_temp_free_i32(zero);
2384     }
2385 }
2386
2387 /* Load an unsigned element of ESZ from BASE+OFS.  */
2388 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2389 {
2390     TCGv_i64 r = tcg_temp_new_i64();
2391
2392     switch (esz) {
2393     case 0:
2394         tcg_gen_ld8u_i64(r, base, ofs);
2395         break;
2396     case 1:
2397         tcg_gen_ld16u_i64(r, base, ofs);
2398         break;
2399     case 2:
2400         tcg_gen_ld32u_i64(r, base, ofs);
2401         break;
2402     case 3:
2403         tcg_gen_ld_i64(r, base, ofs);
2404         break;
2405     default:
2406         g_assert_not_reached();
2407     }
2408     return r;
2409 }
2410
2411 /* Load an unsigned element of ESZ from RM[LAST].  */
2412 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2413                                  int rm, int esz)
2414 {
2415     TCGv_ptr p = tcg_temp_new_ptr();
2416     TCGv_i64 r;
2417
2418     /* Convert offset into vector into offset into ENV.
2419      * The final adjustment for the vector register base
2420      * is added via constant offset to the load.
2421      */
2422 #ifdef HOST_WORDS_BIGENDIAN
2423     /* Adjust for element ordering.  See vec_reg_offset.  */
2424     if (esz < 3) {
2425         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2426     }
2427 #endif
2428     tcg_gen_ext_i32_ptr(p, last);
2429     tcg_gen_add_ptr(p, p, cpu_env);
2430
2431     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2432     tcg_temp_free_ptr(p);
2433
2434     return r;
2435 }
2436
2437 /* Compute CLAST for a Zreg.  */
2438 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2439 {
2440     TCGv_i32 last;
2441     TCGLabel *over;
2442     TCGv_i64 ele;
2443     unsigned vsz, esz = a->esz;
2444
2445     if (!sve_access_check(s)) {
2446         return true;
2447     }
2448
2449     last = tcg_temp_local_new_i32();
2450     over = gen_new_label();
2451
2452     find_last_active(s, last, esz, a->pg);
2453
2454     /* There is of course no movcond for a 2048-bit vector,
2455      * so we must branch over the actual store.
2456      */
2457     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2458
2459     if (!before) {
2460         incr_last_active(s, last, esz);
2461     }
2462
2463     ele = load_last_active(s, last, a->rm, esz);
2464     tcg_temp_free_i32(last);
2465
2466     vsz = vec_full_reg_size(s);
2467     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2468     tcg_temp_free_i64(ele);
2469
2470     /* If this insn used MOVPRFX, we may need a second move.  */
2471     if (a->rd != a->rn) {
2472         TCGLabel *done = gen_new_label();
2473         tcg_gen_br(done);
2474
2475         gen_set_label(over);
2476         do_mov_z(s, a->rd, a->rn);
2477
2478         gen_set_label(done);
2479     } else {
2480         gen_set_label(over);
2481     }
2482     return true;
2483 }
2484
2485 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2486 {
2487     return do_clast_vector(s, a, false);
2488 }
2489
2490 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2491 {
2492     return do_clast_vector(s, a, true);
2493 }
2494
2495 /* Compute CLAST for a scalar.  */
2496 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2497                             bool before, TCGv_i64 reg_val)
2498 {
2499     TCGv_i32 last = tcg_temp_new_i32();
2500     TCGv_i64 ele, cmp, zero;
2501
2502     find_last_active(s, last, esz, pg);
2503
2504     /* Extend the original value of last prior to incrementing.  */
2505     cmp = tcg_temp_new_i64();
2506     tcg_gen_ext_i32_i64(cmp, last);
2507
2508     if (!before) {
2509         incr_last_active(s, last, esz);
2510     }
2511
2512     /* The conceit here is that while last < 0 indicates not found, after
2513      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2514      * from which we can load garbage.  We then discard the garbage with
2515      * a conditional move.
2516      */
2517     ele = load_last_active(s, last, rm, esz);
2518     tcg_temp_free_i32(last);
2519
2520     zero = tcg_const_i64(0);
2521     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2522
2523     tcg_temp_free_i64(zero);
2524     tcg_temp_free_i64(cmp);
2525     tcg_temp_free_i64(ele);
2526 }
2527
2528 /* Compute CLAST for a Vreg.  */
2529 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2530 {
2531     if (sve_access_check(s)) {
2532         int esz = a->esz;
2533         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2534         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2535
2536         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2537         write_fp_dreg(s, a->rd, reg);
2538         tcg_temp_free_i64(reg);
2539     }
2540     return true;
2541 }
2542
2543 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2544 {
2545     return do_clast_fp(s, a, false);
2546 }
2547
2548 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2549 {
2550     return do_clast_fp(s, a, true);
2551 }
2552
2553 /* Compute CLAST for a Xreg.  */
2554 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2555 {
2556     TCGv_i64 reg;
2557
2558     if (!sve_access_check(s)) {
2559         return true;
2560     }
2561
2562     reg = cpu_reg(s, a->rd);
2563     switch (a->esz) {
2564     case 0:
2565         tcg_gen_ext8u_i64(reg, reg);
2566         break;
2567     case 1:
2568         tcg_gen_ext16u_i64(reg, reg);
2569         break;
2570     case 2:
2571         tcg_gen_ext32u_i64(reg, reg);
2572         break;
2573     case 3:
2574         break;
2575     default:
2576         g_assert_not_reached();
2577     }
2578
2579     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2580     return true;
2581 }
2582
2583 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2584 {
2585     return do_clast_general(s, a, false);
2586 }
2587
2588 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2589 {
2590     return do_clast_general(s, a, true);
2591 }
2592
2593 /* Compute LAST for a scalar.  */
2594 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2595                                int pg, int rm, bool before)
2596 {
2597     TCGv_i32 last = tcg_temp_new_i32();
2598     TCGv_i64 ret;
2599
2600     find_last_active(s, last, esz, pg);
2601     if (before) {
2602         wrap_last_active(s, last, esz);
2603     } else {
2604         incr_last_active(s, last, esz);
2605     }
2606
2607     ret = load_last_active(s, last, rm, esz);
2608     tcg_temp_free_i32(last);
2609     return ret;
2610 }
2611
2612 /* Compute LAST for a Vreg.  */
2613 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2614 {
2615     if (sve_access_check(s)) {
2616         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2617         write_fp_dreg(s, a->rd, val);
2618         tcg_temp_free_i64(val);
2619     }
2620     return true;
2621 }
2622
2623 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2624 {
2625     return do_last_fp(s, a, false);
2626 }
2627
2628 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2629 {
2630     return do_last_fp(s, a, true);
2631 }
2632
2633 /* Compute LAST for a Xreg.  */
2634 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2635 {
2636     if (sve_access_check(s)) {
2637         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2638         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2639         tcg_temp_free_i64(val);
2640     }
2641     return true;
2642 }
2643
2644 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2645 {
2646     return do_last_general(s, a, false);
2647 }
2648
2649 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2650 {
2651     return do_last_general(s, a, true);
2652 }
2653
2654 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2655 {
2656     if (sve_access_check(s)) {
2657         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2658     }
2659     return true;
2660 }
2661
2662 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2663 {
2664     if (sve_access_check(s)) {
2665         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2666         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2667         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2668         tcg_temp_free_i64(t);
2669     }
2670     return true;
2671 }
2672
2673 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2674 {
2675     static gen_helper_gvec_3 * const fns[4] = {
2676         NULL,
2677         gen_helper_sve_revb_h,
2678         gen_helper_sve_revb_s,
2679         gen_helper_sve_revb_d,
2680     };
2681     return do_zpz_ool(s, a, fns[a->esz]);
2682 }
2683
2684 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2685 {
2686     static gen_helper_gvec_3 * const fns[4] = {
2687         NULL,
2688         NULL,
2689         gen_helper_sve_revh_s,
2690         gen_helper_sve_revh_d,
2691     };
2692     return do_zpz_ool(s, a, fns[a->esz]);
2693 }
2694
2695 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2696 {
2697     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2698 }
2699
2700 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2701 {
2702     static gen_helper_gvec_3 * const fns[4] = {
2703         gen_helper_sve_rbit_b,
2704         gen_helper_sve_rbit_h,
2705         gen_helper_sve_rbit_s,
2706         gen_helper_sve_rbit_d,
2707     };
2708     return do_zpz_ool(s, a, fns[a->esz]);
2709 }
2710
2711 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2712 {
2713     if (sve_access_check(s)) {
2714         unsigned vsz = vec_full_reg_size(s);
2715         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2716                            vec_full_reg_offset(s, a->rn),
2717                            vec_full_reg_offset(s, a->rm),
2718                            pred_full_reg_offset(s, a->pg),
2719                            vsz, vsz, a->esz, gen_helper_sve_splice);
2720     }
2721     return true;
2722 }
2723
2724 /*
2725  *** SVE Integer Compare - Vectors Group
2726  */
2727
2728 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2729                           gen_helper_gvec_flags_4 *gen_fn)
2730 {
2731     TCGv_ptr pd, zn, zm, pg;
2732     unsigned vsz;
2733     TCGv_i32 t;
2734
2735     if (gen_fn == NULL) {
2736         return false;
2737     }
2738     if (!sve_access_check(s)) {
2739         return true;
2740     }
2741
2742     vsz = vec_full_reg_size(s);
2743     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2744     pd = tcg_temp_new_ptr();
2745     zn = tcg_temp_new_ptr();
2746     zm = tcg_temp_new_ptr();
2747     pg = tcg_temp_new_ptr();
2748
2749     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2750     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2751     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2752     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2753
2754     gen_fn(t, pd, zn, zm, pg, t);
2755
2756     tcg_temp_free_ptr(pd);
2757     tcg_temp_free_ptr(zn);
2758     tcg_temp_free_ptr(zm);
2759     tcg_temp_free_ptr(pg);
2760
2761     do_pred_flags(t);
2762
2763     tcg_temp_free_i32(t);
2764     return true;
2765 }
2766
2767 #define DO_PPZZ(NAME, name) \
2768 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,         \
2769                                 uint32_t insn)                            \
2770 {                                                                         \
2771     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2772         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2773         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2774     };                                                                    \
2775     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2776 }
2777
2778 DO_PPZZ(CMPEQ, cmpeq)
2779 DO_PPZZ(CMPNE, cmpne)
2780 DO_PPZZ(CMPGT, cmpgt)
2781 DO_PPZZ(CMPGE, cmpge)
2782 DO_PPZZ(CMPHI, cmphi)
2783 DO_PPZZ(CMPHS, cmphs)
2784
2785 #undef DO_PPZZ
2786
2787 #define DO_PPZW(NAME, name) \
2788 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a,         \
2789                                 uint32_t insn)                            \
2790 {                                                                         \
2791     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2792         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2793         gen_helper_sve_##name##_ppzw_s, NULL                              \
2794     };                                                                    \
2795     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2796 }
2797
2798 DO_PPZW(CMPEQ, cmpeq)
2799 DO_PPZW(CMPNE, cmpne)
2800 DO_PPZW(CMPGT, cmpgt)
2801 DO_PPZW(CMPGE, cmpge)
2802 DO_PPZW(CMPHI, cmphi)
2803 DO_PPZW(CMPHS, cmphs)
2804 DO_PPZW(CMPLT, cmplt)
2805 DO_PPZW(CMPLE, cmple)
2806 DO_PPZW(CMPLO, cmplo)
2807 DO_PPZW(CMPLS, cmpls)
2808
2809 #undef DO_PPZW
2810
2811 /*
2812  *** SVE Integer Compare - Immediate Groups
2813  */
2814
2815 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2816                           gen_helper_gvec_flags_3 *gen_fn)
2817 {
2818     TCGv_ptr pd, zn, pg;
2819     unsigned vsz;
2820     TCGv_i32 t;
2821
2822     if (gen_fn == NULL) {
2823         return false;
2824     }
2825     if (!sve_access_check(s)) {
2826         return true;
2827     }
2828
2829     vsz = vec_full_reg_size(s);
2830     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2831     pd = tcg_temp_new_ptr();
2832     zn = tcg_temp_new_ptr();
2833     pg = tcg_temp_new_ptr();
2834
2835     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2836     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2837     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2838
2839     gen_fn(t, pd, zn, pg, t);
2840
2841     tcg_temp_free_ptr(pd);
2842     tcg_temp_free_ptr(zn);
2843     tcg_temp_free_ptr(pg);
2844
2845     do_pred_flags(t);
2846
2847     tcg_temp_free_i32(t);
2848     return true;
2849 }
2850
2851 #define DO_PPZI(NAME, name) \
2852 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a,         \
2853                                 uint32_t insn)                            \
2854 {                                                                         \
2855     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2856         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2857         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2858     };                                                                    \
2859     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2860 }
2861
2862 DO_PPZI(CMPEQ, cmpeq)
2863 DO_PPZI(CMPNE, cmpne)
2864 DO_PPZI(CMPGT, cmpgt)
2865 DO_PPZI(CMPGE, cmpge)
2866 DO_PPZI(CMPHI, cmphi)
2867 DO_PPZI(CMPHS, cmphs)
2868 DO_PPZI(CMPLT, cmplt)
2869 DO_PPZI(CMPLE, cmple)
2870 DO_PPZI(CMPLO, cmplo)
2871 DO_PPZI(CMPLS, cmpls)
2872
2873 #undef DO_PPZI
2874
2875 /*
2876  *** SVE Partition Break Group
2877  */
2878
2879 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2880                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2881 {
2882     if (!sve_access_check(s)) {
2883         return true;
2884     }
2885
2886     unsigned vsz = pred_full_reg_size(s);
2887
2888     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2889     TCGv_ptr d = tcg_temp_new_ptr();
2890     TCGv_ptr n = tcg_temp_new_ptr();
2891     TCGv_ptr m = tcg_temp_new_ptr();
2892     TCGv_ptr g = tcg_temp_new_ptr();
2893     TCGv_i32 t = tcg_const_i32(vsz - 2);
2894
2895     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2896     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2897     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2898     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2899
2900     if (a->s) {
2901         fn_s(t, d, n, m, g, t);
2902         do_pred_flags(t);
2903     } else {
2904         fn(d, n, m, g, t);
2905     }
2906     tcg_temp_free_ptr(d);
2907     tcg_temp_free_ptr(n);
2908     tcg_temp_free_ptr(m);
2909     tcg_temp_free_ptr(g);
2910     tcg_temp_free_i32(t);
2911     return true;
2912 }
2913
2914 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2915                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2916 {
2917     if (!sve_access_check(s)) {
2918         return true;
2919     }
2920
2921     unsigned vsz = pred_full_reg_size(s);
2922
2923     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2924     TCGv_ptr d = tcg_temp_new_ptr();
2925     TCGv_ptr n = tcg_temp_new_ptr();
2926     TCGv_ptr g = tcg_temp_new_ptr();
2927     TCGv_i32 t = tcg_const_i32(vsz - 2);
2928
2929     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2930     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2931     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2932
2933     if (a->s) {
2934         fn_s(t, d, n, g, t);
2935         do_pred_flags(t);
2936     } else {
2937         fn(d, n, g, t);
2938     }
2939     tcg_temp_free_ptr(d);
2940     tcg_temp_free_ptr(n);
2941     tcg_temp_free_ptr(g);
2942     tcg_temp_free_i32(t);
2943     return true;
2944 }
2945
2946 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2947 {
2948     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2949 }
2950
2951 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2952 {
2953     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2954 }
2955
2956 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2957 {
2958     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2959 }
2960
2961 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2962 {
2963     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2964 }
2965
2966 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2967 {
2968     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2969 }
2970
2971 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2972 {
2973     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2974 }
2975
2976 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2977 {
2978     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2979 }
2980
2981 /*
2982  *** SVE Predicate Count Group
2983  */
2984
2985 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2986 {
2987     unsigned psz = pred_full_reg_size(s);
2988
2989     if (psz <= 8) {
2990         uint64_t psz_mask;
2991
2992         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2993         if (pn != pg) {
2994             TCGv_i64 g = tcg_temp_new_i64();
2995             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2996             tcg_gen_and_i64(val, val, g);
2997             tcg_temp_free_i64(g);
2998         }
2999
3000         /* Reduce the pred_esz_masks value simply to reduce the
3001          * size of the code generated here.
3002          */
3003         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3004         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3005
3006         tcg_gen_ctpop_i64(val, val);
3007     } else {
3008         TCGv_ptr t_pn = tcg_temp_new_ptr();
3009         TCGv_ptr t_pg = tcg_temp_new_ptr();
3010         unsigned desc;
3011         TCGv_i32 t_desc;
3012
3013         desc = psz - 2;
3014         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3015
3016         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3017         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3018         t_desc = tcg_const_i32(desc);
3019
3020         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3021         tcg_temp_free_ptr(t_pn);
3022         tcg_temp_free_ptr(t_pg);
3023         tcg_temp_free_i32(t_desc);
3024     }
3025 }
3026
3027 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3028 {
3029     if (sve_access_check(s)) {
3030         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3031     }
3032     return true;
3033 }
3034
3035 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3036                             uint32_t insn)
3037 {
3038     if (sve_access_check(s)) {
3039         TCGv_i64 reg = cpu_reg(s, a->rd);
3040         TCGv_i64 val = tcg_temp_new_i64();
3041
3042         do_cntp(s, val, a->esz, a->pg, a->pg);
3043         if (a->d) {
3044             tcg_gen_sub_i64(reg, reg, val);
3045         } else {
3046             tcg_gen_add_i64(reg, reg, val);
3047         }
3048         tcg_temp_free_i64(val);
3049     }
3050     return true;
3051 }
3052
3053 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3054                             uint32_t insn)
3055 {
3056     if (a->esz == 0) {
3057         return false;
3058     }
3059     if (sve_access_check(s)) {
3060         unsigned vsz = vec_full_reg_size(s);
3061         TCGv_i64 val = tcg_temp_new_i64();
3062         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3063
3064         do_cntp(s, val, a->esz, a->pg, a->pg);
3065         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3066                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3067     }
3068     return true;
3069 }
3070
3071 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3072                                 uint32_t insn)
3073 {
3074     if (sve_access_check(s)) {
3075         TCGv_i64 reg = cpu_reg(s, a->rd);
3076         TCGv_i64 val = tcg_temp_new_i64();
3077
3078         do_cntp(s, val, a->esz, a->pg, a->pg);
3079         do_sat_addsub_32(reg, val, a->u, a->d);
3080     }
3081     return true;
3082 }
3083
3084 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3085                                 uint32_t insn)
3086 {
3087     if (sve_access_check(s)) {
3088         TCGv_i64 reg = cpu_reg(s, a->rd);
3089         TCGv_i64 val = tcg_temp_new_i64();
3090
3091         do_cntp(s, val, a->esz, a->pg, a->pg);
3092         do_sat_addsub_64(reg, val, a->u, a->d);
3093     }
3094     return true;
3095 }
3096
3097 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3098                              uint32_t insn)
3099 {
3100     if (a->esz == 0) {
3101         return false;
3102     }
3103     if (sve_access_check(s)) {
3104         TCGv_i64 val = tcg_temp_new_i64();
3105         do_cntp(s, val, a->esz, a->pg, a->pg);
3106         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3107     }
3108     return true;
3109 }
3110
3111 /*
3112  *** SVE Integer Compare Scalars Group
3113  */
3114
3115 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3116 {
3117     if (!sve_access_check(s)) {
3118         return true;
3119     }
3120
3121     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3122     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3123     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3124     TCGv_i64 cmp = tcg_temp_new_i64();
3125
3126     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3127     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3128     tcg_temp_free_i64(cmp);
3129
3130     /* VF = !NF & !CF.  */
3131     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3132     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3133
3134     /* Both NF and VF actually look at bit 31.  */
3135     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3136     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3137     return true;
3138 }
3139
3140 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3141 {
3142     if (!sve_access_check(s)) {
3143         return true;
3144     }
3145
3146     TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3147     TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3148     TCGv_i64 t0 = tcg_temp_new_i64();
3149     TCGv_i64 t1 = tcg_temp_new_i64();
3150     TCGv_i32 t2, t3;
3151     TCGv_ptr ptr;
3152     unsigned desc, vsz = vec_full_reg_size(s);
3153     TCGCond cond;
3154
3155     if (!a->sf) {
3156         if (a->u) {
3157             tcg_gen_ext32u_i64(op0, op0);
3158             tcg_gen_ext32u_i64(op1, op1);
3159         } else {
3160             tcg_gen_ext32s_i64(op0, op0);
3161             tcg_gen_ext32s_i64(op1, op1);
3162         }
3163     }
3164
3165     /* For the helper, compress the different conditions into a computation
3166      * of how many iterations for which the condition is true.
3167      *
3168      * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3169      * 2**64 iterations, overflowing to 0.  Of course, predicate registers
3170      * aren't that large, so any value >= predicate size is sufficient.
3171      */
3172     tcg_gen_sub_i64(t0, op1, op0);
3173
3174     /* t0 = MIN(op1 - op0, vsz).  */
3175     tcg_gen_movi_i64(t1, vsz);
3176     tcg_gen_umin_i64(t0, t0, t1);
3177     if (a->eq) {
3178         /* Equality means one more iteration.  */
3179         tcg_gen_addi_i64(t0, t0, 1);
3180     }
3181
3182     /* t0 = (condition true ? t0 : 0).  */
3183     cond = (a->u
3184             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3185             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3186     tcg_gen_movi_i64(t1, 0);
3187     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3188
3189     t2 = tcg_temp_new_i32();
3190     tcg_gen_extrl_i64_i32(t2, t0);
3191     tcg_temp_free_i64(t0);
3192     tcg_temp_free_i64(t1);
3193
3194     desc = (vsz / 8) - 2;
3195     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3196     t3 = tcg_const_i32(desc);
3197
3198     ptr = tcg_temp_new_ptr();
3199     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3200
3201     gen_helper_sve_while(t2, ptr, t2, t3);
3202     do_pred_flags(t2);
3203
3204     tcg_temp_free_ptr(ptr);
3205     tcg_temp_free_i32(t2);
3206     tcg_temp_free_i32(t3);
3207     return true;
3208 }
3209
3210 /*
3211  *** SVE Integer Wide Immediate - Unpredicated Group
3212  */
3213
3214 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3215 {
3216     if (a->esz == 0) {
3217         return false;
3218     }
3219     if (sve_access_check(s)) {
3220         unsigned vsz = vec_full_reg_size(s);
3221         int dofs = vec_full_reg_offset(s, a->rd);
3222         uint64_t imm;
3223
3224         /* Decode the VFP immediate.  */
3225         imm = vfp_expand_imm(a->esz, a->imm);
3226         imm = dup_const(a->esz, imm);
3227
3228         tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3229     }
3230     return true;
3231 }
3232
3233 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3234 {
3235     if (a->esz == 0 && extract32(insn, 13, 1)) {
3236         return false;
3237     }
3238     if (sve_access_check(s)) {
3239         unsigned vsz = vec_full_reg_size(s);
3240         int dofs = vec_full_reg_offset(s, a->rd);
3241
3242         tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3243     }
3244     return true;
3245 }
3246
3247 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3248 {
3249     if (a->esz == 0 && extract32(insn, 13, 1)) {
3250         return false;
3251     }
3252     if (sve_access_check(s)) {
3253         unsigned vsz = vec_full_reg_size(s);
3254         tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3255                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3256     }
3257     return true;
3258 }
3259
3260 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3261 {
3262     a->imm = -a->imm;
3263     return trans_ADD_zzi(s, a, insn);
3264 }
3265
3266 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3267 {
3268     static const GVecGen2s op[4] = {
3269         { .fni8 = tcg_gen_vec_sub8_i64,
3270           .fniv = tcg_gen_sub_vec,
3271           .fno = gen_helper_sve_subri_b,
3272           .opc = INDEX_op_sub_vec,
3273           .vece = MO_8,
3274           .scalar_first = true },
3275         { .fni8 = tcg_gen_vec_sub16_i64,
3276           .fniv = tcg_gen_sub_vec,
3277           .fno = gen_helper_sve_subri_h,
3278           .opc = INDEX_op_sub_vec,
3279           .vece = MO_16,
3280           .scalar_first = true },
3281         { .fni4 = tcg_gen_sub_i32,
3282           .fniv = tcg_gen_sub_vec,
3283           .fno = gen_helper_sve_subri_s,
3284           .opc = INDEX_op_sub_vec,
3285           .vece = MO_32,
3286           .scalar_first = true },
3287         { .fni8 = tcg_gen_sub_i64,
3288           .fniv = tcg_gen_sub_vec,
3289           .fno = gen_helper_sve_subri_d,
3290           .opc = INDEX_op_sub_vec,
3291           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3292           .vece = MO_64,
3293           .scalar_first = true }
3294     };
3295
3296     if (a->esz == 0 && extract32(insn, 13, 1)) {
3297         return false;
3298     }
3299     if (sve_access_check(s)) {
3300         unsigned vsz = vec_full_reg_size(s);
3301         TCGv_i64 c = tcg_const_i64(a->imm);
3302         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3303                         vec_full_reg_offset(s, a->rn),
3304                         vsz, vsz, c, &op[a->esz]);
3305         tcg_temp_free_i64(c);
3306     }
3307     return true;
3308 }
3309
3310 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3311 {
3312     if (sve_access_check(s)) {
3313         unsigned vsz = vec_full_reg_size(s);
3314         tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3315                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3316     }
3317     return true;
3318 }
3319
3320 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3321                        bool u, bool d)
3322 {
3323     if (a->esz == 0 && extract32(insn, 13, 1)) {
3324         return false;
3325     }
3326     if (sve_access_check(s)) {
3327         TCGv_i64 val = tcg_const_i64(a->imm);
3328         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3329         tcg_temp_free_i64(val);
3330     }
3331     return true;
3332 }
3333
3334 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3335 {
3336     return do_zzi_sat(s, a, insn, false, false);
3337 }
3338
3339 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3340 {
3341     return do_zzi_sat(s, a, insn, true, false);
3342 }
3343
3344 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3345 {
3346     return do_zzi_sat(s, a, insn, false, true);
3347 }
3348
3349 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3350 {
3351     return do_zzi_sat(s, a, insn, true, true);
3352 }
3353
3354 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3355 {
3356     if (sve_access_check(s)) {
3357         unsigned vsz = vec_full_reg_size(s);
3358         TCGv_i64 c = tcg_const_i64(a->imm);
3359
3360         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3361                             vec_full_reg_offset(s, a->rn),
3362                             c, vsz, vsz, 0, fn);
3363         tcg_temp_free_i64(c);
3364     }
3365     return true;
3366 }
3367
3368 #define DO_ZZI(NAME, name) \
3369 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a,         \
3370                                uint32_t insn)                           \
3371 {                                                                       \
3372     static gen_helper_gvec_2i * const fns[4] = {                        \
3373         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3374         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3375     };                                                                  \
3376     return do_zzi_ool(s, a, fns[a->esz]);                               \
3377 }
3378
3379 DO_ZZI(SMAX, smax)
3380 DO_ZZI(UMAX, umax)
3381 DO_ZZI(SMIN, smin)
3382 DO_ZZI(UMIN, umin)
3383
3384 #undef DO_ZZI
3385
3386 /*
3387  *** SVE Floating Point Arithmetic - Unpredicated Group
3388  */
3389
3390 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3391                       gen_helper_gvec_3_ptr *fn)
3392 {
3393     if (fn == NULL) {
3394         return false;
3395     }
3396     if (sve_access_check(s)) {
3397         unsigned vsz = vec_full_reg_size(s);
3398         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3399         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3400                            vec_full_reg_offset(s, a->rn),
3401                            vec_full_reg_offset(s, a->rm),
3402                            status, vsz, vsz, 0, fn);
3403         tcg_temp_free_ptr(status);
3404     }
3405     return true;
3406 }
3407
3408
3409 #define DO_FP3(NAME, name) \
3410 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3411 {                                                                   \
3412     static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3413         NULL, gen_helper_gvec_##name##_h,                           \
3414         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3415     };                                                              \
3416     return do_zzz_fp(s, a, fns[a->esz]);                            \
3417 }
3418
3419 DO_FP3(FADD_zzz, fadd)
3420 DO_FP3(FSUB_zzz, fsub)
3421 DO_FP3(FMUL_zzz, fmul)
3422 DO_FP3(FTSMUL, ftsmul)
3423 DO_FP3(FRECPS, recps)
3424 DO_FP3(FRSQRTS, rsqrts)
3425
3426 #undef DO_FP3
3427
3428
3429 /*
3430  *** SVE Floating Point Unary Operations Predicated Group
3431  */
3432
3433 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3434                        bool is_fp16, gen_helper_gvec_3_ptr *fn)
3435 {
3436     if (sve_access_check(s)) {
3437         unsigned vsz = vec_full_reg_size(s);
3438         TCGv_ptr status = get_fpstatus_ptr(is_fp16);
3439         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3440                            vec_full_reg_offset(s, rn),
3441                            pred_full_reg_offset(s, pg),
3442                            status, vsz, vsz, 0, fn);
3443         tcg_temp_free_ptr(status);
3444     }
3445     return true;
3446 }
3447
3448 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3449 {
3450     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
3451 }
3452
3453 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3454 {
3455     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
3456 }
3457
3458 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3459 {
3460     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
3461 }
3462
3463 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3464 {
3465     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
3466 }
3467
3468 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3469 {
3470     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
3471 }
3472
3473 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3474 {
3475     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
3476 }
3477
3478 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3479 {
3480     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
3481 }
3482
3483 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3484 {
3485     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
3486 }
3487
3488 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3489 {
3490     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
3491 }
3492
3493 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3494 {
3495     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
3496 }
3497
3498 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3499 {
3500     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
3501 }
3502
3503 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3504 {
3505     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
3506 }
3507
3508 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3509 {
3510     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
3511 }
3512
3513 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3514 {
3515     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
3516 }
3517
3518 /*
3519  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3520  */
3521
3522 /* Subroutine loading a vector register at VOFS of LEN bytes.
3523  * The load should begin at the address Rn + IMM.
3524  */
3525
3526 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3527                    int rn, int imm)
3528 {
3529     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3530     uint32_t len_remain = len % 8;
3531     uint32_t nparts = len / 8 + ctpop8(len_remain);
3532     int midx = get_mem_index(s);
3533     TCGv_i64 addr, t0, t1;
3534
3535     addr = tcg_temp_new_i64();
3536     t0 = tcg_temp_new_i64();
3537
3538     /* Note that unpredicated load/store of vector/predicate registers
3539      * are defined as a stream of bytes, which equates to little-endian
3540      * operations on larger quantities.  There is no nice way to force
3541      * a little-endian load for aarch64_be-linux-user out of line.
3542      *
3543      * Attempt to keep code expansion to a minimum by limiting the
3544      * amount of unrolling done.
3545      */
3546     if (nparts <= 4) {
3547         int i;
3548
3549         for (i = 0; i < len_align; i += 8) {
3550             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3551             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3552             tcg_gen_st_i64(t0, cpu_env, vofs + i);
3553         }
3554     } else {
3555         TCGLabel *loop = gen_new_label();
3556         TCGv_ptr tp, i = tcg_const_local_ptr(0);
3557
3558         gen_set_label(loop);
3559
3560         /* Minimize the number of local temps that must be re-read from
3561          * the stack each iteration.  Instead, re-compute values other
3562          * than the loop counter.
3563          */
3564         tp = tcg_temp_new_ptr();
3565         tcg_gen_addi_ptr(tp, i, imm);
3566         tcg_gen_extu_ptr_i64(addr, tp);
3567         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3568
3569         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3570
3571         tcg_gen_add_ptr(tp, cpu_env, i);
3572         tcg_gen_addi_ptr(i, i, 8);
3573         tcg_gen_st_i64(t0, tp, vofs);
3574         tcg_temp_free_ptr(tp);
3575
3576         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3577         tcg_temp_free_ptr(i);
3578     }
3579
3580     /* Predicate register loads can be any multiple of 2.
3581      * Note that we still store the entire 64-bit unit into cpu_env.
3582      */
3583     if (len_remain) {
3584         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3585
3586         switch (len_remain) {
3587         case 2:
3588         case 4:
3589         case 8:
3590             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3591             break;
3592
3593         case 6:
3594             t1 = tcg_temp_new_i64();
3595             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3596             tcg_gen_addi_i64(addr, addr, 4);
3597             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3598             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3599             tcg_temp_free_i64(t1);
3600             break;
3601
3602         default:
3603             g_assert_not_reached();
3604         }
3605         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3606     }
3607     tcg_temp_free_i64(addr);
3608     tcg_temp_free_i64(t0);
3609 }
3610
3611 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3612 {
3613     if (sve_access_check(s)) {
3614         int size = vec_full_reg_size(s);
3615         int off = vec_full_reg_offset(s, a->rd);
3616         do_ldr(s, off, size, a->rn, a->imm * size);
3617     }
3618     return true;
3619 }
3620
3621 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3622 {
3623     if (sve_access_check(s)) {
3624         int size = pred_full_reg_size(s);
3625         int off = pred_full_reg_offset(s, a->rd);
3626         do_ldr(s, off, size, a->rn, a->imm * size);
3627     }
3628     return true;
3629 }
3630
3631 /*
3632  *** SVE Memory - Contiguous Load Group
3633  */
3634
3635 /* The memory mode of the dtype.  */
3636 static const TCGMemOp dtype_mop[16] = {
3637     MO_UB, MO_UB, MO_UB, MO_UB,
3638     MO_SL, MO_UW, MO_UW, MO_UW,
3639     MO_SW, MO_SW, MO_UL, MO_UL,
3640     MO_SB, MO_SB, MO_SB, MO_Q
3641 };
3642
3643 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
3644
3645 /* The vector element size of dtype.  */
3646 static const uint8_t dtype_esz[16] = {
3647     0, 1, 2, 3,
3648     3, 1, 2, 3,
3649     3, 2, 2, 3,
3650     3, 2, 1, 3
3651 };
3652
3653 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
3654                        gen_helper_gvec_mem *fn)
3655 {
3656     unsigned vsz = vec_full_reg_size(s);
3657     TCGv_ptr t_pg;
3658     TCGv_i32 desc;
3659
3660     /* For e.g. LD4, there are not enough arguments to pass all 4
3661      * registers as pointers, so encode the regno into the data field.
3662      * For consistency, do this even for LD1.
3663      */
3664     desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
3665     t_pg = tcg_temp_new_ptr();
3666
3667     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3668     fn(cpu_env, t_pg, addr, desc);
3669
3670     tcg_temp_free_ptr(t_pg);
3671     tcg_temp_free_i32(desc);
3672 }
3673
3674 static void do_ld_zpa(DisasContext *s, int zt, int pg,
3675                       TCGv_i64 addr, int dtype, int nreg)
3676 {
3677     static gen_helper_gvec_mem * const fns[16][4] = {
3678         { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
3679           gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
3680         { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
3681         { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
3682         { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
3683
3684         { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
3685         { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
3686           gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
3687         { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
3688         { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
3689
3690         { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
3691         { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
3692         { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
3693           gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
3694         { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
3695
3696         { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
3697         { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
3698         { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
3699         { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
3700           gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
3701     };
3702     gen_helper_gvec_mem *fn = fns[dtype][nreg];
3703
3704     /* While there are holes in the table, they are not
3705      * accessible via the instruction encoding.
3706      */
3707     assert(fn != NULL);
3708     do_mem_zpa(s, zt, pg, addr, fn);
3709 }
3710
3711 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3712 {
3713     if (a->rm == 31) {
3714         return false;
3715     }
3716     if (sve_access_check(s)) {
3717         TCGv_i64 addr = new_tmp_a64(s);
3718         tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
3719                          (a->nreg + 1) << dtype_msz(a->dtype));
3720         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3721         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3722     }
3723     return true;
3724 }
3725
3726 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3727 {
3728     if (sve_access_check(s)) {
3729         int vsz = vec_full_reg_size(s);
3730         int elements = vsz >> dtype_esz[a->dtype];
3731         TCGv_i64 addr = new_tmp_a64(s);
3732
3733         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
3734                          (a->imm * elements * (a->nreg + 1))
3735                          << dtype_msz(a->dtype));
3736         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3737     }
3738     return true;
3739 }
3740
3741 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3742 {
3743     static gen_helper_gvec_mem * const fns[16] = {
3744         gen_helper_sve_ldff1bb_r,
3745         gen_helper_sve_ldff1bhu_r,
3746         gen_helper_sve_ldff1bsu_r,
3747         gen_helper_sve_ldff1bdu_r,
3748
3749         gen_helper_sve_ldff1sds_r,
3750         gen_helper_sve_ldff1hh_r,
3751         gen_helper_sve_ldff1hsu_r,
3752         gen_helper_sve_ldff1hdu_r,
3753
3754         gen_helper_sve_ldff1hds_r,
3755         gen_helper_sve_ldff1hss_r,
3756         gen_helper_sve_ldff1ss_r,
3757         gen_helper_sve_ldff1sdu_r,
3758
3759         gen_helper_sve_ldff1bds_r,
3760         gen_helper_sve_ldff1bss_r,
3761         gen_helper_sve_ldff1bhs_r,
3762         gen_helper_sve_ldff1dd_r,
3763     };
3764
3765     if (sve_access_check(s)) {
3766         TCGv_i64 addr = new_tmp_a64(s);
3767         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
3768         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3769         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
3770     }
3771     return true;
3772 }
3773
3774 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3775 {
3776     static gen_helper_gvec_mem * const fns[16] = {
3777         gen_helper_sve_ldnf1bb_r,
3778         gen_helper_sve_ldnf1bhu_r,
3779         gen_helper_sve_ldnf1bsu_r,
3780         gen_helper_sve_ldnf1bdu_r,
3781
3782         gen_helper_sve_ldnf1sds_r,
3783         gen_helper_sve_ldnf1hh_r,
3784         gen_helper_sve_ldnf1hsu_r,
3785         gen_helper_sve_ldnf1hdu_r,
3786
3787         gen_helper_sve_ldnf1hds_r,
3788         gen_helper_sve_ldnf1hss_r,
3789         gen_helper_sve_ldnf1ss_r,
3790         gen_helper_sve_ldnf1sdu_r,
3791
3792         gen_helper_sve_ldnf1bds_r,
3793         gen_helper_sve_ldnf1bss_r,
3794         gen_helper_sve_ldnf1bhs_r,
3795         gen_helper_sve_ldnf1dd_r,
3796     };
3797
3798     if (sve_access_check(s)) {
3799         int vsz = vec_full_reg_size(s);
3800         int elements = vsz >> dtype_esz[a->dtype];
3801         int off = (a->imm * elements) << dtype_msz(a->dtype);
3802         TCGv_i64 addr = new_tmp_a64(s);
3803
3804         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
3805         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
3806     }
3807     return true;
3808 }
3809
3810 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
3811 {
3812     static gen_helper_gvec_mem * const fns[4] = {
3813         gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
3814         gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
3815     };
3816     unsigned vsz = vec_full_reg_size(s);
3817     TCGv_ptr t_pg;
3818     TCGv_i32 desc;
3819
3820     /* Load the first quadword using the normal predicated load helpers.  */
3821     desc = tcg_const_i32(simd_desc(16, 16, zt));
3822     t_pg = tcg_temp_new_ptr();
3823
3824     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3825     fns[msz](cpu_env, t_pg, addr, desc);
3826
3827     tcg_temp_free_ptr(t_pg);
3828     tcg_temp_free_i32(desc);
3829
3830     /* Replicate that first quadword.  */
3831     if (vsz > 16) {
3832         unsigned dofs = vec_full_reg_offset(s, zt);
3833         tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
3834     }
3835 }
3836
3837 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3838 {
3839     if (a->rm == 31) {
3840         return false;
3841     }
3842     if (sve_access_check(s)) {
3843         int msz = dtype_msz(a->dtype);
3844         TCGv_i64 addr = new_tmp_a64(s);
3845         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
3846         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3847         do_ldrq(s, a->rd, a->pg, addr, msz);
3848     }
3849     return true;
3850 }
3851
3852 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3853 {
3854     if (sve_access_check(s)) {
3855         TCGv_i64 addr = new_tmp_a64(s);
3856         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
3857         do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
3858     }
3859     return true;
3860 }
3861
3862 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
3863                       int msz, int esz, int nreg)
3864 {
3865     static gen_helper_gvec_mem * const fn_single[4][4] = {
3866         { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
3867           gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
3868         { NULL,                   gen_helper_sve_st1hh_r,
3869           gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
3870         { NULL, NULL,
3871           gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
3872         { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
3873     };
3874     static gen_helper_gvec_mem * const fn_multiple[3][4] = {
3875         { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
3876           gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
3877         { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
3878           gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
3879         { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
3880           gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
3881     };
3882     gen_helper_gvec_mem *fn;
3883
3884     if (nreg == 0) {
3885         /* ST1 */
3886         fn = fn_single[msz][esz];
3887     } else {
3888         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
3889         assert(msz == esz);
3890         fn = fn_multiple[nreg - 1][msz];
3891     }
3892     assert(fn != NULL);
3893     do_mem_zpa(s, zt, pg, addr, fn);
3894 }
3895
3896 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
3897 {
3898     if (a->rm == 31 || a->msz > a->esz) {
3899         return false;
3900     }
3901     if (sve_access_check(s)) {
3902         TCGv_i64 addr = new_tmp_a64(s);
3903         tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
3904         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3905         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
3906     }
3907     return true;
3908 }
3909
3910 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
3911 {
3912     if (a->msz > a->esz) {
3913         return false;
3914     }
3915     if (sve_access_check(s)) {
3916         int vsz = vec_full_reg_size(s);
3917         int elements = vsz >> a->esz;
3918         TCGv_i64 addr = new_tmp_a64(s);
3919
3920         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
3921                          (a->imm * elements * (a->nreg + 1)) << a->msz);
3922         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
3923     }
3924     return true;
3925 }