target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg/tcg-op.h"
  24 #include "tcg/tcg-op-gvec.h"
  25 #include "tcg/tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35 #include "fpu/softfloat.h"
  36
  37
  38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                          TCGv_i64, uint32_t, uint32_t);
  40
  41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                      TCGv_ptr, TCGv_i32);
  43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                          TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50 /*
  51  * Helpers for extracting complex instruction fields.
  52  */
  53
  54 /* See e.g. ASR (immediate, predicated).
  55  * Returns -1 for unallocated encoding; diagnose later.
  56  */
  57 static int tszimm_esz(DisasContext *s, int x)
  58 {
  59     x >>= 3;  /* discard imm3 */
  60     return 31 - clz32(x);
  61 }
  62
  63 static int tszimm_shr(DisasContext *s, int x)
  64 {
  65     return (16 << tszimm_esz(s, x)) - x;
  66 }
  67
  68 /* See e.g. LSL (immediate, predicated).  */
  69 static int tszimm_shl(DisasContext *s, int x)
  70 {
  71     return x - (8 << tszimm_esz(s, x));
  72 }
  73
  74 static inline int plus1(DisasContext *s, int x)
  75 {
  76     return x + 1;
  77 }
  78
  79 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80 static inline int expand_imm_sh8s(DisasContext *s, int x)
  81 {
  82     return (int8_t)x << (x & 0x100 ? 8 : 0);
  83 }
  84
  85 static inline int expand_imm_sh8u(DisasContext *s, int x)
  86 {
  87     return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88 }
  89
  90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92  */
  93 static inline int msz_dtype(DisasContext *s, int msz)
  94 {
  95     static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96     return dtype[msz];
  97 }
  98
  99 /*
 100  * Include the generated decoder.
 101  */
 102
 103 #include "decode-sve.inc.c"
 104
 105 /*
 106  * Implement all of the translator functions referenced by the decoder.
 107  */
 108
 109 /* Return the offset info CPUARMState of the predicate vector register Pn.
 110  * Note for this purpose, FFR is P16.
 111  */
 112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113 {
 114     return offsetof(CPUARMState, vfp.pregs[regno]);
 115 }
 116
 117 /* Return the byte size of the whole predicate register, VL / 64.  */
 118 static inline int pred_full_reg_size(DisasContext *s)
 119 {
 120     return s->sve_len >> 3;
 121 }
 122
 123 /* Round up the size of a register to a size allowed by
 124  * the tcg vector infrastructure.  Any operation which uses this
 125  * size may assume that the bits above pred_full_reg_size are zero,
 126  * and must leave them the same way.
 127  *
 128  * Note that this is not needed for the vector registers as they
 129  * are always properly sized for tcg vectors.
 130  */
 131 static int size_for_gvec(int size)
 132 {
 133     if (size <= 8) {
 134         return 8;
 135     } else {
 136         return QEMU_ALIGN_UP(size, 16);
 137     }
 138 }
 139
 140 static int pred_gvec_reg_size(DisasContext *s)
 141 {
 142     return size_for_gvec(pred_full_reg_size(s));
 143 }
 144
 145 /* Invoke a vector expander on two Zregs.  */
 146 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                          int esz, int rd, int rn)
 148 {
 149     if (sve_access_check(s)) {
 150         unsigned vsz = vec_full_reg_size(s);
 151         gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                 vec_full_reg_offset(s, rn), vsz, vsz);
 153     }
 154     return true;
 155 }
 156
 157 /* Invoke a vector expander on three Zregs.  */
 158 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                          int esz, int rd, int rn, int rm)
 160 {
 161     if (sve_access_check(s)) {
 162         unsigned vsz = vec_full_reg_size(s);
 163         gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                 vec_full_reg_offset(s, rn),
 165                 vec_full_reg_offset(s, rm), vsz, vsz);
 166     }
 167     return true;
 168 }
 169
 170 /* Invoke a vector move on two Zregs.  */
 171 static bool do_mov_z(DisasContext *s, int rd, int rn)
 172 {
 173     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174 }
 175
 176 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 177 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178 {
 179     unsigned vsz = vec_full_reg_size(s);
 180     tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
 181 }
 182
 183 /* Invoke a vector expander on two Pregs.  */
 184 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                          int esz, int rd, int rn)
 186 {
 187     if (sve_access_check(s)) {
 188         unsigned psz = pred_gvec_reg_size(s);
 189         gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                 pred_full_reg_offset(s, rn), psz, psz);
 191     }
 192     return true;
 193 }
 194
 195 /* Invoke a vector expander on three Pregs.  */
 196 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                          int esz, int rd, int rn, int rm)
 198 {
 199     if (sve_access_check(s)) {
 200         unsigned psz = pred_gvec_reg_size(s);
 201         gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                 pred_full_reg_offset(s, rn),
 203                 pred_full_reg_offset(s, rm), psz, psz);
 204     }
 205     return true;
 206 }
 207
 208 /* Invoke a vector operation on four Pregs.  */
 209 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                         int rd, int rn, int rm, int rg)
 211 {
 212     if (sve_access_check(s)) {
 213         unsigned psz = pred_gvec_reg_size(s);
 214         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                        pred_full_reg_offset(s, rn),
 216                        pred_full_reg_offset(s, rm),
 217                        pred_full_reg_offset(s, rg),
 218                        psz, psz, gvec_op);
 219     }
 220     return true;
 221 }
 222
 223 /* Invoke a vector move on two Pregs.  */
 224 static bool do_mov_p(DisasContext *s, int rd, int rn)
 225 {
 226     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227 }
 228
 229 /* Set the cpu flags as per a return from an SVE helper.  */
 230 static void do_pred_flags(TCGv_i32 t)
 231 {
 232     tcg_gen_mov_i32(cpu_NF, t);
 233     tcg_gen_andi_i32(cpu_ZF, t, 2);
 234     tcg_gen_andi_i32(cpu_CF, t, 1);
 235     tcg_gen_movi_i32(cpu_VF, 0);
 236 }
 237
 238 /* Subroutines computing the ARM PredTest psuedofunction.  */
 239 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240 {
 241     TCGv_i32 t = tcg_temp_new_i32();
 242
 243     gen_helper_sve_predtest1(t, d, g);
 244     do_pred_flags(t);
 245     tcg_temp_free_i32(t);
 246 }
 247
 248 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249 {
 250     TCGv_ptr dptr = tcg_temp_new_ptr();
 251     TCGv_ptr gptr = tcg_temp_new_ptr();
 252     TCGv_i32 t;
 253
 254     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256     t = tcg_const_i32(words);
 257
 258     gen_helper_sve_predtest(t, dptr, gptr, t);
 259     tcg_temp_free_ptr(dptr);
 260     tcg_temp_free_ptr(gptr);
 261
 262     do_pred_flags(t);
 263     tcg_temp_free_i32(t);
 264 }
 265
 266 /* For each element size, the bits within a predicate word that are active.  */
 267 const uint64_t pred_esz_masks[4] = {
 268     0xffffffffffffffffull, 0x5555555555555555ull,
 269     0x1111111111111111ull, 0x0101010101010101ull
 270 };
 271
 272 /*
 273  *** SVE Logical - Unpredicated Group
 274  */
 275
 276 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279 }
 280
 281 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
 282 {
 283     return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 284 }
 285
 286 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
 287 {
 288     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 289 }
 290
 291 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
 292 {
 293     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 294 }
 295
 296 /*
 297  *** SVE Integer Arithmetic - Unpredicated Group
 298  */
 299
 300 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
 301 {
 302     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 303 }
 304
 305 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
 306 {
 307     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 308 }
 309
 310 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 311 {
 312     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 313 }
 314
 315 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 316 {
 317     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 318 }
 319
 320 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 321 {
 322     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 323 }
 324
 325 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 326 {
 327     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 328 }
 329
 330 /*
 331  *** SVE Integer Arithmetic - Binary Predicated Group
 332  */
 333
 334 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 335 {
 336     unsigned vsz = vec_full_reg_size(s);
 337     if (fn == NULL) {
 338         return false;
 339     }
 340     if (sve_access_check(s)) {
 341         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 342                            vec_full_reg_offset(s, a->rn),
 343                            vec_full_reg_offset(s, a->rm),
 344                            pred_full_reg_offset(s, a->pg),
 345                            vsz, vsz, 0, fn);
 346     }
 347     return true;
 348 }
 349
 350 /* Select active elememnts from Zn and inactive elements from Zm,
 351  * storing the result in Zd.
 352  */
 353 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 354 {
 355     static gen_helper_gvec_4 * const fns[4] = {
 356         gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 357         gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 358     };
 359     unsigned vsz = vec_full_reg_size(s);
 360     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 361                        vec_full_reg_offset(s, rn),
 362                        vec_full_reg_offset(s, rm),
 363                        pred_full_reg_offset(s, pg),
 364                        vsz, vsz, 0, fns[esz]);
 365 }
 366
 367 #define DO_ZPZZ(NAME, name) \
 368 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a)         \
 369 {                                                                         \
 370     static gen_helper_gvec_4 * const fns[4] = {                           \
 371         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 372         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 373     };                                                                    \
 374     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 375 }
 376
 377 DO_ZPZZ(AND, and)
 378 DO_ZPZZ(EOR, eor)
 379 DO_ZPZZ(ORR, orr)
 380 DO_ZPZZ(BIC, bic)
 381
 382 DO_ZPZZ(ADD, add)
 383 DO_ZPZZ(SUB, sub)
 384
 385 DO_ZPZZ(SMAX, smax)
 386 DO_ZPZZ(UMAX, umax)
 387 DO_ZPZZ(SMIN, smin)
 388 DO_ZPZZ(UMIN, umin)
 389 DO_ZPZZ(SABD, sabd)
 390 DO_ZPZZ(UABD, uabd)
 391
 392 DO_ZPZZ(MUL, mul)
 393 DO_ZPZZ(SMULH, smulh)
 394 DO_ZPZZ(UMULH, umulh)
 395
 396 DO_ZPZZ(ASR, asr)
 397 DO_ZPZZ(LSR, lsr)
 398 DO_ZPZZ(LSL, lsl)
 399
 400 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 401 {
 402     static gen_helper_gvec_4 * const fns[4] = {
 403         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 404     };
 405     return do_zpzz_ool(s, a, fns[a->esz]);
 406 }
 407
 408 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 409 {
 410     static gen_helper_gvec_4 * const fns[4] = {
 411         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 412     };
 413     return do_zpzz_ool(s, a, fns[a->esz]);
 414 }
 415
 416 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
 417 {
 418     if (sve_access_check(s)) {
 419         do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 420     }
 421     return true;
 422 }
 423
 424 #undef DO_ZPZZ
 425
 426 /*
 427  *** SVE Integer Arithmetic - Unary Predicated Group
 428  */
 429
 430 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 431 {
 432     if (fn == NULL) {
 433         return false;
 434     }
 435     if (sve_access_check(s)) {
 436         unsigned vsz = vec_full_reg_size(s);
 437         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 438                            vec_full_reg_offset(s, a->rn),
 439                            pred_full_reg_offset(s, a->pg),
 440                            vsz, vsz, 0, fn);
 441     }
 442     return true;
 443 }
 444
 445 #define DO_ZPZ(NAME, name) \
 446 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)           \
 447 {                                                                   \
 448     static gen_helper_gvec_3 * const fns[4] = {                     \
 449         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 450         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 451     };                                                              \
 452     return do_zpz_ool(s, a, fns[a->esz]);                           \
 453 }
 454
 455 DO_ZPZ(CLS, cls)
 456 DO_ZPZ(CLZ, clz)
 457 DO_ZPZ(CNT_zpz, cnt_zpz)
 458 DO_ZPZ(CNOT, cnot)
 459 DO_ZPZ(NOT_zpz, not_zpz)
 460 DO_ZPZ(ABS, abs)
 461 DO_ZPZ(NEG, neg)
 462
 463 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
 464 {
 465     static gen_helper_gvec_3 * const fns[4] = {
 466         NULL,
 467         gen_helper_sve_fabs_h,
 468         gen_helper_sve_fabs_s,
 469         gen_helper_sve_fabs_d
 470     };
 471     return do_zpz_ool(s, a, fns[a->esz]);
 472 }
 473
 474 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
 475 {
 476     static gen_helper_gvec_3 * const fns[4] = {
 477         NULL,
 478         gen_helper_sve_fneg_h,
 479         gen_helper_sve_fneg_s,
 480         gen_helper_sve_fneg_d
 481     };
 482     return do_zpz_ool(s, a, fns[a->esz]);
 483 }
 484
 485 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
 486 {
 487     static gen_helper_gvec_3 * const fns[4] = {
 488         NULL,
 489         gen_helper_sve_sxtb_h,
 490         gen_helper_sve_sxtb_s,
 491         gen_helper_sve_sxtb_d
 492     };
 493     return do_zpz_ool(s, a, fns[a->esz]);
 494 }
 495
 496 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
 497 {
 498     static gen_helper_gvec_3 * const fns[4] = {
 499         NULL,
 500         gen_helper_sve_uxtb_h,
 501         gen_helper_sve_uxtb_s,
 502         gen_helper_sve_uxtb_d
 503     };
 504     return do_zpz_ool(s, a, fns[a->esz]);
 505 }
 506
 507 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
 508 {
 509     static gen_helper_gvec_3 * const fns[4] = {
 510         NULL, NULL,
 511         gen_helper_sve_sxth_s,
 512         gen_helper_sve_sxth_d
 513     };
 514     return do_zpz_ool(s, a, fns[a->esz]);
 515 }
 516
 517 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
 518 {
 519     static gen_helper_gvec_3 * const fns[4] = {
 520         NULL, NULL,
 521         gen_helper_sve_uxth_s,
 522         gen_helper_sve_uxth_d
 523     };
 524     return do_zpz_ool(s, a, fns[a->esz]);
 525 }
 526
 527 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
 528 {
 529     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 530 }
 531
 532 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
 533 {
 534     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 535 }
 536
 537 #undef DO_ZPZ
 538
 539 /*
 540  *** SVE Integer Reduction Group
 541  */
 542
 543 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 544 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 545                        gen_helper_gvec_reduc *fn)
 546 {
 547     unsigned vsz = vec_full_reg_size(s);
 548     TCGv_ptr t_zn, t_pg;
 549     TCGv_i32 desc;
 550     TCGv_i64 temp;
 551
 552     if (fn == NULL) {
 553         return false;
 554     }
 555     if (!sve_access_check(s)) {
 556         return true;
 557     }
 558
 559     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 560     temp = tcg_temp_new_i64();
 561     t_zn = tcg_temp_new_ptr();
 562     t_pg = tcg_temp_new_ptr();
 563
 564     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 565     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 566     fn(temp, t_zn, t_pg, desc);
 567     tcg_temp_free_ptr(t_zn);
 568     tcg_temp_free_ptr(t_pg);
 569     tcg_temp_free_i32(desc);
 570
 571     write_fp_dreg(s, a->rd, temp);
 572     tcg_temp_free_i64(temp);
 573     return true;
 574 }
 575
 576 #define DO_VPZ(NAME, name) \
 577 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
 578 {                                                                        \
 579     static gen_helper_gvec_reduc * const fns[4] = {                      \
 580         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 581         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 582     };                                                                   \
 583     return do_vpz_ool(s, a, fns[a->esz]);                                \
 584 }
 585
 586 DO_VPZ(ORV, orv)
 587 DO_VPZ(ANDV, andv)
 588 DO_VPZ(EORV, eorv)
 589
 590 DO_VPZ(UADDV, uaddv)
 591 DO_VPZ(SMAXV, smaxv)
 592 DO_VPZ(UMAXV, umaxv)
 593 DO_VPZ(SMINV, sminv)
 594 DO_VPZ(UMINV, uminv)
 595
 596 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
 597 {
 598     static gen_helper_gvec_reduc * const fns[4] = {
 599         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 600         gen_helper_sve_saddv_s, NULL
 601     };
 602     return do_vpz_ool(s, a, fns[a->esz]);
 603 }
 604
 605 #undef DO_VPZ
 606
 607 /*
 608  *** SVE Shift by Immediate - Predicated Group
 609  */
 610
 611 /* Store zero into every active element of Zd.  We will use this for two
 612  * and three-operand predicated instructions for which logic dictates a
 613  * zero result.
 614  */
 615 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 616 {
 617     static gen_helper_gvec_2 * const fns[4] = {
 618         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 619         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 620     };
 621     if (sve_access_check(s)) {
 622         unsigned vsz = vec_full_reg_size(s);
 623         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 624                            pred_full_reg_offset(s, pg),
 625                            vsz, vsz, 0, fns[esz]);
 626     }
 627     return true;
 628 }
 629
 630 /* Copy Zn into Zd, storing zeros into inactive elements.  */
 631 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 632 {
 633     static gen_helper_gvec_3 * const fns[4] = {
 634         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 635         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 636     };
 637     unsigned vsz = vec_full_reg_size(s);
 638     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 639                        vec_full_reg_offset(s, rn),
 640                        pred_full_reg_offset(s, pg),
 641                        vsz, vsz, 0, fns[esz]);
 642 }
 643
 644 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 645                         gen_helper_gvec_3 *fn)
 646 {
 647     if (sve_access_check(s)) {
 648         unsigned vsz = vec_full_reg_size(s);
 649         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 650                            vec_full_reg_offset(s, a->rn),
 651                            pred_full_reg_offset(s, a->pg),
 652                            vsz, vsz, a->imm, fn);
 653     }
 654     return true;
 655 }
 656
 657 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
 658 {
 659     static gen_helper_gvec_3 * const fns[4] = {
 660         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 661         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 662     };
 663     if (a->esz < 0) {
 664         /* Invalid tsz encoding -- see tszimm_esz. */
 665         return false;
 666     }
 667     /* Shift by element size is architecturally valid.  For
 668        arithmetic right-shift, it's the same as by one less. */
 669     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 670     return do_zpzi_ool(s, a, fns[a->esz]);
 671 }
 672
 673 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
 674 {
 675     static gen_helper_gvec_3 * const fns[4] = {
 676         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 677         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 678     };
 679     if (a->esz < 0) {
 680         return false;
 681     }
 682     /* Shift by element size is architecturally valid.
 683        For logical shifts, it is a zeroing operation.  */
 684     if (a->imm >= (8 << a->esz)) {
 685         return do_clr_zp(s, a->rd, a->pg, a->esz);
 686     } else {
 687         return do_zpzi_ool(s, a, fns[a->esz]);
 688     }
 689 }
 690
 691 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
 692 {
 693     static gen_helper_gvec_3 * const fns[4] = {
 694         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 695         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 696     };
 697     if (a->esz < 0) {
 698         return false;
 699     }
 700     /* Shift by element size is architecturally valid.
 701        For logical shifts, it is a zeroing operation.  */
 702     if (a->imm >= (8 << a->esz)) {
 703         return do_clr_zp(s, a->rd, a->pg, a->esz);
 704     } else {
 705         return do_zpzi_ool(s, a, fns[a->esz]);
 706     }
 707 }
 708
 709 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
 710 {
 711     static gen_helper_gvec_3 * const fns[4] = {
 712         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 713         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 714     };
 715     if (a->esz < 0) {
 716         return false;
 717     }
 718     /* Shift by element size is architecturally valid.  For arithmetic
 719        right shift for division, it is a zeroing operation.  */
 720     if (a->imm >= (8 << a->esz)) {
 721         return do_clr_zp(s, a->rd, a->pg, a->esz);
 722     } else {
 723         return do_zpzi_ool(s, a, fns[a->esz]);
 724     }
 725 }
 726
 727 /*
 728  *** SVE Bitwise Shift - Predicated Group
 729  */
 730
 731 #define DO_ZPZW(NAME, name) \
 732 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a)         \
 733 {                                                                         \
 734     static gen_helper_gvec_4 * const fns[3] = {                           \
 735         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 736         gen_helper_sve_##name##_zpzw_s,                                   \
 737     };                                                                    \
 738     if (a->esz < 0 || a->esz >= 3) {                                      \
 739         return false;                                                     \
 740     }                                                                     \
 741     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 742 }
 743
 744 DO_ZPZW(ASR, asr)
 745 DO_ZPZW(LSR, lsr)
 746 DO_ZPZW(LSL, lsl)
 747
 748 #undef DO_ZPZW
 749
 750 /*
 751  *** SVE Bitwise Shift - Unpredicated Group
 752  */
 753
 754 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 755                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 756                                          int64_t, uint32_t, uint32_t))
 757 {
 758     if (a->esz < 0) {
 759         /* Invalid tsz encoding -- see tszimm_esz. */
 760         return false;
 761     }
 762     if (sve_access_check(s)) {
 763         unsigned vsz = vec_full_reg_size(s);
 764         /* Shift by element size is architecturally valid.  For
 765            arithmetic right-shift, it's the same as by one less.
 766            Otherwise it is a zeroing operation.  */
 767         if (a->imm >= 8 << a->esz) {
 768             if (asr) {
 769                 a->imm = (8 << a->esz) - 1;
 770             } else {
 771                 do_dupi_z(s, a->rd, 0);
 772                 return true;
 773             }
 774         }
 775         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 776                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 777     }
 778     return true;
 779 }
 780
 781 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
 782 {
 783     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 784 }
 785
 786 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
 787 {
 788     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 789 }
 790
 791 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
 792 {
 793     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 794 }
 795
 796 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 797 {
 798     if (fn == NULL) {
 799         return false;
 800     }
 801     if (sve_access_check(s)) {
 802         unsigned vsz = vec_full_reg_size(s);
 803         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 804                            vec_full_reg_offset(s, a->rn),
 805                            vec_full_reg_offset(s, a->rm),
 806                            vsz, vsz, 0, fn);
 807     }
 808     return true;
 809 }
 810
 811 #define DO_ZZW(NAME, name) \
 812 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a)           \
 813 {                                                                         \
 814     static gen_helper_gvec_3 * const fns[4] = {                           \
 815         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 816         gen_helper_sve_##name##_zzw_s, NULL                               \
 817     };                                                                    \
 818     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 819 }
 820
 821 DO_ZZW(ASR, asr)
 822 DO_ZZW(LSR, lsr)
 823 DO_ZZW(LSL, lsl)
 824
 825 #undef DO_ZZW
 826
 827 /*
 828  *** SVE Integer Multiply-Add Group
 829  */
 830
 831 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 832                          gen_helper_gvec_5 *fn)
 833 {
 834     if (sve_access_check(s)) {
 835         unsigned vsz = vec_full_reg_size(s);
 836         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 837                            vec_full_reg_offset(s, a->ra),
 838                            vec_full_reg_offset(s, a->rn),
 839                            vec_full_reg_offset(s, a->rm),
 840                            pred_full_reg_offset(s, a->pg),
 841                            vsz, vsz, 0, fn);
 842     }
 843     return true;
 844 }
 845
 846 #define DO_ZPZZZ(NAME, name) \
 847 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
 848 {                                                                    \
 849     static gen_helper_gvec_5 * const fns[4] = {                      \
 850         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 851         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 852     };                                                               \
 853     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 854 }
 855
 856 DO_ZPZZZ(MLA, mla)
 857 DO_ZPZZZ(MLS, mls)
 858
 859 #undef DO_ZPZZZ
 860
 861 /*
 862  *** SVE Index Generation Group
 863  */
 864
 865 static void do_index(DisasContext *s, int esz, int rd,
 866                      TCGv_i64 start, TCGv_i64 incr)
 867 {
 868     unsigned vsz = vec_full_reg_size(s);
 869     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 870     TCGv_ptr t_zd = tcg_temp_new_ptr();
 871
 872     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 873     if (esz == 3) {
 874         gen_helper_sve_index_d(t_zd, start, incr, desc);
 875     } else {
 876         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 877         static index_fn * const fns[3] = {
 878             gen_helper_sve_index_b,
 879             gen_helper_sve_index_h,
 880             gen_helper_sve_index_s,
 881         };
 882         TCGv_i32 s32 = tcg_temp_new_i32();
 883         TCGv_i32 i32 = tcg_temp_new_i32();
 884
 885         tcg_gen_extrl_i64_i32(s32, start);
 886         tcg_gen_extrl_i64_i32(i32, incr);
 887         fns[esz](t_zd, s32, i32, desc);
 888
 889         tcg_temp_free_i32(s32);
 890         tcg_temp_free_i32(i32);
 891     }
 892     tcg_temp_free_ptr(t_zd);
 893     tcg_temp_free_i32(desc);
 894 }
 895
 896 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
 897 {
 898     if (sve_access_check(s)) {
 899         TCGv_i64 start = tcg_const_i64(a->imm1);
 900         TCGv_i64 incr = tcg_const_i64(a->imm2);
 901         do_index(s, a->esz, a->rd, start, incr);
 902         tcg_temp_free_i64(start);
 903         tcg_temp_free_i64(incr);
 904     }
 905     return true;
 906 }
 907
 908 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
 909 {
 910     if (sve_access_check(s)) {
 911         TCGv_i64 start = tcg_const_i64(a->imm);
 912         TCGv_i64 incr = cpu_reg(s, a->rm);
 913         do_index(s, a->esz, a->rd, start, incr);
 914         tcg_temp_free_i64(start);
 915     }
 916     return true;
 917 }
 918
 919 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
 920 {
 921     if (sve_access_check(s)) {
 922         TCGv_i64 start = cpu_reg(s, a->rn);
 923         TCGv_i64 incr = tcg_const_i64(a->imm);
 924         do_index(s, a->esz, a->rd, start, incr);
 925         tcg_temp_free_i64(incr);
 926     }
 927     return true;
 928 }
 929
 930 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
 931 {
 932     if (sve_access_check(s)) {
 933         TCGv_i64 start = cpu_reg(s, a->rn);
 934         TCGv_i64 incr = cpu_reg(s, a->rm);
 935         do_index(s, a->esz, a->rd, start, incr);
 936     }
 937     return true;
 938 }
 939
 940 /*
 941  *** SVE Stack Allocation Group
 942  */
 943
 944 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
 945 {
 946     if (sve_access_check(s)) {
 947         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 948         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 949         tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 950     }
 951     return true;
 952 }
 953
 954 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
 955 {
 956     if (sve_access_check(s)) {
 957         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 958         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 959         tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 960     }
 961     return true;
 962 }
 963
 964 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
 965 {
 966     if (sve_access_check(s)) {
 967         TCGv_i64 reg = cpu_reg(s, a->rd);
 968         tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 969     }
 970     return true;
 971 }
 972
 973 /*
 974  *** SVE Compute Vector Address Group
 975  */
 976
 977 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 978 {
 979     if (sve_access_check(s)) {
 980         unsigned vsz = vec_full_reg_size(s);
 981         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 982                            vec_full_reg_offset(s, a->rn),
 983                            vec_full_reg_offset(s, a->rm),
 984                            vsz, vsz, a->imm, fn);
 985     }
 986     return true;
 987 }
 988
 989 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
 990 {
 991     return do_adr(s, a, gen_helper_sve_adr_p32);
 992 }
 993
 994 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
 995 {
 996     return do_adr(s, a, gen_helper_sve_adr_p64);
 997 }
 998
 999 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
1000 {
1001     return do_adr(s, a, gen_helper_sve_adr_s32);
1002 }
1003
1004 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
1005 {
1006     return do_adr(s, a, gen_helper_sve_adr_u32);
1007 }
1008
1009 /*
1010  *** SVE Integer Misc - Unpredicated Group
1011  */
1012
1013 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
1014 {
1015     static gen_helper_gvec_2 * const fns[4] = {
1016         NULL,
1017         gen_helper_sve_fexpa_h,
1018         gen_helper_sve_fexpa_s,
1019         gen_helper_sve_fexpa_d,
1020     };
1021     if (a->esz == 0) {
1022         return false;
1023     }
1024     if (sve_access_check(s)) {
1025         unsigned vsz = vec_full_reg_size(s);
1026         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1027                            vec_full_reg_offset(s, a->rn),
1028                            vsz, vsz, 0, fns[a->esz]);
1029     }
1030     return true;
1031 }
1032
1033 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1034 {
1035     static gen_helper_gvec_3 * const fns[4] = {
1036         NULL,
1037         gen_helper_sve_ftssel_h,
1038         gen_helper_sve_ftssel_s,
1039         gen_helper_sve_ftssel_d,
1040     };
1041     if (a->esz == 0) {
1042         return false;
1043     }
1044     if (sve_access_check(s)) {
1045         unsigned vsz = vec_full_reg_size(s);
1046         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1047                            vec_full_reg_offset(s, a->rn),
1048                            vec_full_reg_offset(s, a->rm),
1049                            vsz, vsz, 0, fns[a->esz]);
1050     }
1051     return true;
1052 }
1053
1054 /*
1055  *** SVE Predicate Logical Operations Group
1056  */
1057
1058 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1059                           const GVecGen4 *gvec_op)
1060 {
1061     if (!sve_access_check(s)) {
1062         return true;
1063     }
1064
1065     unsigned psz = pred_gvec_reg_size(s);
1066     int dofs = pred_full_reg_offset(s, a->rd);
1067     int nofs = pred_full_reg_offset(s, a->rn);
1068     int mofs = pred_full_reg_offset(s, a->rm);
1069     int gofs = pred_full_reg_offset(s, a->pg);
1070
1071     if (psz == 8) {
1072         /* Do the operation and the flags generation in temps.  */
1073         TCGv_i64 pd = tcg_temp_new_i64();
1074         TCGv_i64 pn = tcg_temp_new_i64();
1075         TCGv_i64 pm = tcg_temp_new_i64();
1076         TCGv_i64 pg = tcg_temp_new_i64();
1077
1078         tcg_gen_ld_i64(pn, cpu_env, nofs);
1079         tcg_gen_ld_i64(pm, cpu_env, mofs);
1080         tcg_gen_ld_i64(pg, cpu_env, gofs);
1081
1082         gvec_op->fni8(pd, pn, pm, pg);
1083         tcg_gen_st_i64(pd, cpu_env, dofs);
1084
1085         do_predtest1(pd, pg);
1086
1087         tcg_temp_free_i64(pd);
1088         tcg_temp_free_i64(pn);
1089         tcg_temp_free_i64(pm);
1090         tcg_temp_free_i64(pg);
1091     } else {
1092         /* The operation and flags generation is large.  The computation
1093          * of the flags depends on the original contents of the guarding
1094          * predicate.  If the destination overwrites the guarding predicate,
1095          * then the easiest way to get this right is to save a copy.
1096           */
1097         int tofs = gofs;
1098         if (a->rd == a->pg) {
1099             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1100             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1101         }
1102
1103         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1104         do_predtest(s, dofs, tofs, psz / 8);
1105     }
1106     return true;
1107 }
1108
1109 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1110 {
1111     tcg_gen_and_i64(pd, pn, pm);
1112     tcg_gen_and_i64(pd, pd, pg);
1113 }
1114
1115 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1116                            TCGv_vec pm, TCGv_vec pg)
1117 {
1118     tcg_gen_and_vec(vece, pd, pn, pm);
1119     tcg_gen_and_vec(vece, pd, pd, pg);
1120 }
1121
1122 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1123 {
1124     static const GVecGen4 op = {
1125         .fni8 = gen_and_pg_i64,
1126         .fniv = gen_and_pg_vec,
1127         .fno = gen_helper_sve_and_pppp,
1128         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1129     };
1130     if (a->s) {
1131         return do_pppp_flags(s, a, &op);
1132     } else if (a->rn == a->rm) {
1133         if (a->pg == a->rn) {
1134             return do_mov_p(s, a->rd, a->rn);
1135         } else {
1136             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1137         }
1138     } else if (a->pg == a->rn || a->pg == a->rm) {
1139         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1140     } else {
1141         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1142     }
1143 }
1144
1145 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1146 {
1147     tcg_gen_andc_i64(pd, pn, pm);
1148     tcg_gen_and_i64(pd, pd, pg);
1149 }
1150
1151 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1152                            TCGv_vec pm, TCGv_vec pg)
1153 {
1154     tcg_gen_andc_vec(vece, pd, pn, pm);
1155     tcg_gen_and_vec(vece, pd, pd, pg);
1156 }
1157
1158 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1159 {
1160     static const GVecGen4 op = {
1161         .fni8 = gen_bic_pg_i64,
1162         .fniv = gen_bic_pg_vec,
1163         .fno = gen_helper_sve_bic_pppp,
1164         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1165     };
1166     if (a->s) {
1167         return do_pppp_flags(s, a, &op);
1168     } else if (a->pg == a->rn) {
1169         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1170     } else {
1171         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1172     }
1173 }
1174
1175 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1176 {
1177     tcg_gen_xor_i64(pd, pn, pm);
1178     tcg_gen_and_i64(pd, pd, pg);
1179 }
1180
1181 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1182                            TCGv_vec pm, TCGv_vec pg)
1183 {
1184     tcg_gen_xor_vec(vece, pd, pn, pm);
1185     tcg_gen_and_vec(vece, pd, pd, pg);
1186 }
1187
1188 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1189 {
1190     static const GVecGen4 op = {
1191         .fni8 = gen_eor_pg_i64,
1192         .fniv = gen_eor_pg_vec,
1193         .fno = gen_helper_sve_eor_pppp,
1194         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1195     };
1196     if (a->s) {
1197         return do_pppp_flags(s, a, &op);
1198     } else {
1199         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1200     }
1201 }
1202
1203 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1204 {
1205     tcg_gen_and_i64(pn, pn, pg);
1206     tcg_gen_andc_i64(pm, pm, pg);
1207     tcg_gen_or_i64(pd, pn, pm);
1208 }
1209
1210 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1211                            TCGv_vec pm, TCGv_vec pg)
1212 {
1213     tcg_gen_and_vec(vece, pn, pn, pg);
1214     tcg_gen_andc_vec(vece, pm, pm, pg);
1215     tcg_gen_or_vec(vece, pd, pn, pm);
1216 }
1217
1218 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1219 {
1220     static const GVecGen4 op = {
1221         .fni8 = gen_sel_pg_i64,
1222         .fniv = gen_sel_pg_vec,
1223         .fno = gen_helper_sve_sel_pppp,
1224         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1225     };
1226     if (a->s) {
1227         return false;
1228     } else {
1229         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1230     }
1231 }
1232
1233 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1234 {
1235     tcg_gen_or_i64(pd, pn, pm);
1236     tcg_gen_and_i64(pd, pd, pg);
1237 }
1238
1239 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1240                            TCGv_vec pm, TCGv_vec pg)
1241 {
1242     tcg_gen_or_vec(vece, pd, pn, pm);
1243     tcg_gen_and_vec(vece, pd, pd, pg);
1244 }
1245
1246 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1247 {
1248     static const GVecGen4 op = {
1249         .fni8 = gen_orr_pg_i64,
1250         .fniv = gen_orr_pg_vec,
1251         .fno = gen_helper_sve_orr_pppp,
1252         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1253     };
1254     if (a->s) {
1255         return do_pppp_flags(s, a, &op);
1256     } else if (a->pg == a->rn && a->rn == a->rm) {
1257         return do_mov_p(s, a->rd, a->rn);
1258     } else {
1259         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1260     }
1261 }
1262
1263 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1264 {
1265     tcg_gen_orc_i64(pd, pn, pm);
1266     tcg_gen_and_i64(pd, pd, pg);
1267 }
1268
1269 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1270                            TCGv_vec pm, TCGv_vec pg)
1271 {
1272     tcg_gen_orc_vec(vece, pd, pn, pm);
1273     tcg_gen_and_vec(vece, pd, pd, pg);
1274 }
1275
1276 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1277 {
1278     static const GVecGen4 op = {
1279         .fni8 = gen_orn_pg_i64,
1280         .fniv = gen_orn_pg_vec,
1281         .fno = gen_helper_sve_orn_pppp,
1282         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1283     };
1284     if (a->s) {
1285         return do_pppp_flags(s, a, &op);
1286     } else {
1287         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1288     }
1289 }
1290
1291 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1292 {
1293     tcg_gen_or_i64(pd, pn, pm);
1294     tcg_gen_andc_i64(pd, pg, pd);
1295 }
1296
1297 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1298                            TCGv_vec pm, TCGv_vec pg)
1299 {
1300     tcg_gen_or_vec(vece, pd, pn, pm);
1301     tcg_gen_andc_vec(vece, pd, pg, pd);
1302 }
1303
1304 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1305 {
1306     static const GVecGen4 op = {
1307         .fni8 = gen_nor_pg_i64,
1308         .fniv = gen_nor_pg_vec,
1309         .fno = gen_helper_sve_nor_pppp,
1310         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1311     };
1312     if (a->s) {
1313         return do_pppp_flags(s, a, &op);
1314     } else {
1315         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1316     }
1317 }
1318
1319 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1320 {
1321     tcg_gen_and_i64(pd, pn, pm);
1322     tcg_gen_andc_i64(pd, pg, pd);
1323 }
1324
1325 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1326                            TCGv_vec pm, TCGv_vec pg)
1327 {
1328     tcg_gen_and_vec(vece, pd, pn, pm);
1329     tcg_gen_andc_vec(vece, pd, pg, pd);
1330 }
1331
1332 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1333 {
1334     static const GVecGen4 op = {
1335         .fni8 = gen_nand_pg_i64,
1336         .fniv = gen_nand_pg_vec,
1337         .fno = gen_helper_sve_nand_pppp,
1338         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1339     };
1340     if (a->s) {
1341         return do_pppp_flags(s, a, &op);
1342     } else {
1343         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1344     }
1345 }
1346
1347 /*
1348  *** SVE Predicate Misc Group
1349  */
1350
1351 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1352 {
1353     if (sve_access_check(s)) {
1354         int nofs = pred_full_reg_offset(s, a->rn);
1355         int gofs = pred_full_reg_offset(s, a->pg);
1356         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1357
1358         if (words == 1) {
1359             TCGv_i64 pn = tcg_temp_new_i64();
1360             TCGv_i64 pg = tcg_temp_new_i64();
1361
1362             tcg_gen_ld_i64(pn, cpu_env, nofs);
1363             tcg_gen_ld_i64(pg, cpu_env, gofs);
1364             do_predtest1(pn, pg);
1365
1366             tcg_temp_free_i64(pn);
1367             tcg_temp_free_i64(pg);
1368         } else {
1369             do_predtest(s, nofs, gofs, words);
1370         }
1371     }
1372     return true;
1373 }
1374
1375 /* See the ARM pseudocode DecodePredCount.  */
1376 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1377 {
1378     unsigned elements = fullsz >> esz;
1379     unsigned bound;
1380
1381     switch (pattern) {
1382     case 0x0: /* POW2 */
1383         return pow2floor(elements);
1384     case 0x1: /* VL1 */
1385     case 0x2: /* VL2 */
1386     case 0x3: /* VL3 */
1387     case 0x4: /* VL4 */
1388     case 0x5: /* VL5 */
1389     case 0x6: /* VL6 */
1390     case 0x7: /* VL7 */
1391     case 0x8: /* VL8 */
1392         bound = pattern;
1393         break;
1394     case 0x9: /* VL16 */
1395     case 0xa: /* VL32 */
1396     case 0xb: /* VL64 */
1397     case 0xc: /* VL128 */
1398     case 0xd: /* VL256 */
1399         bound = 16 << (pattern - 9);
1400         break;
1401     case 0x1d: /* MUL4 */
1402         return elements - elements % 4;
1403     case 0x1e: /* MUL3 */
1404         return elements - elements % 3;
1405     case 0x1f: /* ALL */
1406         return elements;
1407     default:   /* #uimm5 */
1408         return 0;
1409     }
1410     return elements >= bound ? bound : 0;
1411 }
1412
1413 /* This handles all of the predicate initialization instructions,
1414  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1415  * so that decode_pred_count returns 0.  For SETFFR, we will have
1416  * set RD == 16 == FFR.
1417  */
1418 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1419 {
1420     if (!sve_access_check(s)) {
1421         return true;
1422     }
1423
1424     unsigned fullsz = vec_full_reg_size(s);
1425     unsigned ofs = pred_full_reg_offset(s, rd);
1426     unsigned numelem, setsz, i;
1427     uint64_t word, lastword;
1428     TCGv_i64 t;
1429
1430     numelem = decode_pred_count(fullsz, pat, esz);
1431
1432     /* Determine what we must store into each bit, and how many.  */
1433     if (numelem == 0) {
1434         lastword = word = 0;
1435         setsz = fullsz;
1436     } else {
1437         setsz = numelem << esz;
1438         lastword = word = pred_esz_masks[esz];
1439         if (setsz % 64) {
1440             lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1441         }
1442     }
1443
1444     t = tcg_temp_new_i64();
1445     if (fullsz <= 64) {
1446         tcg_gen_movi_i64(t, lastword);
1447         tcg_gen_st_i64(t, cpu_env, ofs);
1448         goto done;
1449     }
1450
1451     if (word == lastword) {
1452         unsigned maxsz = size_for_gvec(fullsz / 8);
1453         unsigned oprsz = size_for_gvec(setsz / 8);
1454
1455         if (oprsz * 8 == setsz) {
1456             tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
1457             goto done;
1458         }
1459     }
1460
1461     setsz /= 8;
1462     fullsz /= 8;
1463
1464     tcg_gen_movi_i64(t, word);
1465     for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1466         tcg_gen_st_i64(t, cpu_env, ofs + i);
1467     }
1468     if (lastword != word) {
1469         tcg_gen_movi_i64(t, lastword);
1470         tcg_gen_st_i64(t, cpu_env, ofs + i);
1471         i += 8;
1472     }
1473     if (i < fullsz) {
1474         tcg_gen_movi_i64(t, 0);
1475         for (; i < fullsz; i += 8) {
1476             tcg_gen_st_i64(t, cpu_env, ofs + i);
1477         }
1478     }
1479
1480  done:
1481     tcg_temp_free_i64(t);
1482
1483     /* PTRUES */
1484     if (setflag) {
1485         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1486         tcg_gen_movi_i32(cpu_CF, word == 0);
1487         tcg_gen_movi_i32(cpu_VF, 0);
1488         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1489     }
1490     return true;
1491 }
1492
1493 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1494 {
1495     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1496 }
1497
1498 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1499 {
1500     /* Note pat == 31 is #all, to set all elements.  */
1501     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1502 }
1503
1504 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1505 {
1506     /* Note pat == 32 is #unimp, to set no elements.  */
1507     return do_predset(s, 0, a->rd, 32, false);
1508 }
1509
1510 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1511 {
1512     /* The path through do_pppp_flags is complicated enough to want to avoid
1513      * duplication.  Frob the arguments into the form of a predicated AND.
1514      */
1515     arg_rprr_s alt_a = {
1516         .rd = a->rd, .pg = a->pg, .s = a->s,
1517         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1518     };
1519     return trans_AND_pppp(s, &alt_a);
1520 }
1521
1522 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1523 {
1524     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1525 }
1526
1527 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1528 {
1529     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1530 }
1531
1532 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1533                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1534                                            TCGv_ptr, TCGv_i32))
1535 {
1536     if (!sve_access_check(s)) {
1537         return true;
1538     }
1539
1540     TCGv_ptr t_pd = tcg_temp_new_ptr();
1541     TCGv_ptr t_pg = tcg_temp_new_ptr();
1542     TCGv_i32 t;
1543     unsigned desc;
1544
1545     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1546     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1547
1548     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1549     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1550     t = tcg_const_i32(desc);
1551
1552     gen_fn(t, t_pd, t_pg, t);
1553     tcg_temp_free_ptr(t_pd);
1554     tcg_temp_free_ptr(t_pg);
1555
1556     do_pred_flags(t);
1557     tcg_temp_free_i32(t);
1558     return true;
1559 }
1560
1561 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1562 {
1563     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1564 }
1565
1566 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1567 {
1568     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1569 }
1570
1571 /*
1572  *** SVE Element Count Group
1573  */
1574
1575 /* Perform an inline saturating addition of a 32-bit value within
1576  * a 64-bit register.  The second operand is known to be positive,
1577  * which halves the comparisions we must perform to bound the result.
1578  */
1579 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1580 {
1581     int64_t ibound;
1582     TCGv_i64 bound;
1583     TCGCond cond;
1584
1585     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1586     if (u) {
1587         tcg_gen_ext32u_i64(reg, reg);
1588     } else {
1589         tcg_gen_ext32s_i64(reg, reg);
1590     }
1591     if (d) {
1592         tcg_gen_sub_i64(reg, reg, val);
1593         ibound = (u ? 0 : INT32_MIN);
1594         cond = TCG_COND_LT;
1595     } else {
1596         tcg_gen_add_i64(reg, reg, val);
1597         ibound = (u ? UINT32_MAX : INT32_MAX);
1598         cond = TCG_COND_GT;
1599     }
1600     bound = tcg_const_i64(ibound);
1601     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1602     tcg_temp_free_i64(bound);
1603 }
1604
1605 /* Similarly with 64-bit values.  */
1606 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1607 {
1608     TCGv_i64 t0 = tcg_temp_new_i64();
1609     TCGv_i64 t1 = tcg_temp_new_i64();
1610     TCGv_i64 t2;
1611
1612     if (u) {
1613         if (d) {
1614             tcg_gen_sub_i64(t0, reg, val);
1615             tcg_gen_movi_i64(t1, 0);
1616             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1617         } else {
1618             tcg_gen_add_i64(t0, reg, val);
1619             tcg_gen_movi_i64(t1, -1);
1620             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1621         }
1622     } else {
1623         if (d) {
1624             /* Detect signed overflow for subtraction.  */
1625             tcg_gen_xor_i64(t0, reg, val);
1626             tcg_gen_sub_i64(t1, reg, val);
1627             tcg_gen_xor_i64(reg, reg, t1);
1628             tcg_gen_and_i64(t0, t0, reg);
1629
1630             /* Bound the result.  */
1631             tcg_gen_movi_i64(reg, INT64_MIN);
1632             t2 = tcg_const_i64(0);
1633             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1634         } else {
1635             /* Detect signed overflow for addition.  */
1636             tcg_gen_xor_i64(t0, reg, val);
1637             tcg_gen_add_i64(reg, reg, val);
1638             tcg_gen_xor_i64(t1, reg, val);
1639             tcg_gen_andc_i64(t0, t1, t0);
1640
1641             /* Bound the result.  */
1642             tcg_gen_movi_i64(t1, INT64_MAX);
1643             t2 = tcg_const_i64(0);
1644             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1645         }
1646         tcg_temp_free_i64(t2);
1647     }
1648     tcg_temp_free_i64(t0);
1649     tcg_temp_free_i64(t1);
1650 }
1651
1652 /* Similarly with a vector and a scalar operand.  */
1653 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1654                               TCGv_i64 val, bool u, bool d)
1655 {
1656     unsigned vsz = vec_full_reg_size(s);
1657     TCGv_ptr dptr, nptr;
1658     TCGv_i32 t32, desc;
1659     TCGv_i64 t64;
1660
1661     dptr = tcg_temp_new_ptr();
1662     nptr = tcg_temp_new_ptr();
1663     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1664     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1665     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1666
1667     switch (esz) {
1668     case MO_8:
1669         t32 = tcg_temp_new_i32();
1670         tcg_gen_extrl_i64_i32(t32, val);
1671         if (d) {
1672             tcg_gen_neg_i32(t32, t32);
1673         }
1674         if (u) {
1675             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1676         } else {
1677             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1678         }
1679         tcg_temp_free_i32(t32);
1680         break;
1681
1682     case MO_16:
1683         t32 = tcg_temp_new_i32();
1684         tcg_gen_extrl_i64_i32(t32, val);
1685         if (d) {
1686             tcg_gen_neg_i32(t32, t32);
1687         }
1688         if (u) {
1689             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1690         } else {
1691             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1692         }
1693         tcg_temp_free_i32(t32);
1694         break;
1695
1696     case MO_32:
1697         t64 = tcg_temp_new_i64();
1698         if (d) {
1699             tcg_gen_neg_i64(t64, val);
1700         } else {
1701             tcg_gen_mov_i64(t64, val);
1702         }
1703         if (u) {
1704             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1705         } else {
1706             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1707         }
1708         tcg_temp_free_i64(t64);
1709         break;
1710
1711     case MO_64:
1712         if (u) {
1713             if (d) {
1714                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1715             } else {
1716                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1717             }
1718         } else if (d) {
1719             t64 = tcg_temp_new_i64();
1720             tcg_gen_neg_i64(t64, val);
1721             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1722             tcg_temp_free_i64(t64);
1723         } else {
1724             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1725         }
1726         break;
1727
1728     default:
1729         g_assert_not_reached();
1730     }
1731
1732     tcg_temp_free_ptr(dptr);
1733     tcg_temp_free_ptr(nptr);
1734     tcg_temp_free_i32(desc);
1735 }
1736
1737 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1738 {
1739     if (sve_access_check(s)) {
1740         unsigned fullsz = vec_full_reg_size(s);
1741         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1742         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1743     }
1744     return true;
1745 }
1746
1747 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1748 {
1749     if (sve_access_check(s)) {
1750         unsigned fullsz = vec_full_reg_size(s);
1751         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752         int inc = numelem * a->imm * (a->d ? -1 : 1);
1753         TCGv_i64 reg = cpu_reg(s, a->rd);
1754
1755         tcg_gen_addi_i64(reg, reg, inc);
1756     }
1757     return true;
1758 }
1759
1760 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1761 {
1762     if (!sve_access_check(s)) {
1763         return true;
1764     }
1765
1766     unsigned fullsz = vec_full_reg_size(s);
1767     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1768     int inc = numelem * a->imm;
1769     TCGv_i64 reg = cpu_reg(s, a->rd);
1770
1771     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1772     if (inc == 0) {
1773         if (a->u) {
1774             tcg_gen_ext32u_i64(reg, reg);
1775         } else {
1776             tcg_gen_ext32s_i64(reg, reg);
1777         }
1778     } else {
1779         TCGv_i64 t = tcg_const_i64(inc);
1780         do_sat_addsub_32(reg, t, a->u, a->d);
1781         tcg_temp_free_i64(t);
1782     }
1783     return true;
1784 }
1785
1786 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1787 {
1788     if (!sve_access_check(s)) {
1789         return true;
1790     }
1791
1792     unsigned fullsz = vec_full_reg_size(s);
1793     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1794     int inc = numelem * a->imm;
1795     TCGv_i64 reg = cpu_reg(s, a->rd);
1796
1797     if (inc != 0) {
1798         TCGv_i64 t = tcg_const_i64(inc);
1799         do_sat_addsub_64(reg, t, a->u, a->d);
1800         tcg_temp_free_i64(t);
1801     }
1802     return true;
1803 }
1804
1805 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1806 {
1807     if (a->esz == 0) {
1808         return false;
1809     }
1810
1811     unsigned fullsz = vec_full_reg_size(s);
1812     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1813     int inc = numelem * a->imm;
1814
1815     if (inc != 0) {
1816         if (sve_access_check(s)) {
1817             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1818             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1819                               vec_full_reg_offset(s, a->rn),
1820                               t, fullsz, fullsz);
1821             tcg_temp_free_i64(t);
1822         }
1823     } else {
1824         do_mov_z(s, a->rd, a->rn);
1825     }
1826     return true;
1827 }
1828
1829 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1830 {
1831     if (a->esz == 0) {
1832         return false;
1833     }
1834
1835     unsigned fullsz = vec_full_reg_size(s);
1836     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1837     int inc = numelem * a->imm;
1838
1839     if (inc != 0) {
1840         if (sve_access_check(s)) {
1841             TCGv_i64 t = tcg_const_i64(inc);
1842             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1843             tcg_temp_free_i64(t);
1844         }
1845     } else {
1846         do_mov_z(s, a->rd, a->rn);
1847     }
1848     return true;
1849 }
1850
1851 /*
1852  *** SVE Bitwise Immediate Group
1853  */
1854
1855 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1856 {
1857     uint64_t imm;
1858     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1859                                 extract32(a->dbm, 0, 6),
1860                                 extract32(a->dbm, 6, 6))) {
1861         return false;
1862     }
1863     if (sve_access_check(s)) {
1864         unsigned vsz = vec_full_reg_size(s);
1865         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1866                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1867     }
1868     return true;
1869 }
1870
1871 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
1872 {
1873     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1874 }
1875
1876 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
1877 {
1878     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1879 }
1880
1881 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
1882 {
1883     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1884 }
1885
1886 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
1887 {
1888     uint64_t imm;
1889     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1890                                 extract32(a->dbm, 0, 6),
1891                                 extract32(a->dbm, 6, 6))) {
1892         return false;
1893     }
1894     if (sve_access_check(s)) {
1895         do_dupi_z(s, a->rd, imm);
1896     }
1897     return true;
1898 }
1899
1900 /*
1901  *** SVE Integer Wide Immediate - Predicated Group
1902  */
1903
1904 /* Implement all merging copies.  This is used for CPY (immediate),
1905  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1906  */
1907 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1908                      TCGv_i64 val)
1909 {
1910     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1911     static gen_cpy * const fns[4] = {
1912         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1913         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1914     };
1915     unsigned vsz = vec_full_reg_size(s);
1916     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1917     TCGv_ptr t_zd = tcg_temp_new_ptr();
1918     TCGv_ptr t_zn = tcg_temp_new_ptr();
1919     TCGv_ptr t_pg = tcg_temp_new_ptr();
1920
1921     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1922     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1923     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1924
1925     fns[esz](t_zd, t_zn, t_pg, val, desc);
1926
1927     tcg_temp_free_ptr(t_zd);
1928     tcg_temp_free_ptr(t_zn);
1929     tcg_temp_free_ptr(t_pg);
1930     tcg_temp_free_i32(desc);
1931 }
1932
1933 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
1934 {
1935     if (a->esz == 0) {
1936         return false;
1937     }
1938     if (sve_access_check(s)) {
1939         /* Decode the VFP immediate.  */
1940         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1941         TCGv_i64 t_imm = tcg_const_i64(imm);
1942         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1943         tcg_temp_free_i64(t_imm);
1944     }
1945     return true;
1946 }
1947
1948 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
1949 {
1950     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1951         return false;
1952     }
1953     if (sve_access_check(s)) {
1954         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1955         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1956         tcg_temp_free_i64(t_imm);
1957     }
1958     return true;
1959 }
1960
1961 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
1962 {
1963     static gen_helper_gvec_2i * const fns[4] = {
1964         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1965         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1966     };
1967
1968     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1969         return false;
1970     }
1971     if (sve_access_check(s)) {
1972         unsigned vsz = vec_full_reg_size(s);
1973         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1974         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1975                             pred_full_reg_offset(s, a->pg),
1976                             t_imm, vsz, vsz, 0, fns[a->esz]);
1977         tcg_temp_free_i64(t_imm);
1978     }
1979     return true;
1980 }
1981
1982 /*
1983  *** SVE Permute Extract Group
1984  */
1985
1986 static bool trans_EXT(DisasContext *s, arg_EXT *a)
1987 {
1988     if (!sve_access_check(s)) {
1989         return true;
1990     }
1991
1992     unsigned vsz = vec_full_reg_size(s);
1993     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1994     unsigned n_siz = vsz - n_ofs;
1995     unsigned d = vec_full_reg_offset(s, a->rd);
1996     unsigned n = vec_full_reg_offset(s, a->rn);
1997     unsigned m = vec_full_reg_offset(s, a->rm);
1998
1999     /* Use host vector move insns if we have appropriate sizes
2000      * and no unfortunate overlap.
2001      */
2002     if (m != d
2003         && n_ofs == size_for_gvec(n_ofs)
2004         && n_siz == size_for_gvec(n_siz)
2005         && (d != n || n_siz <= n_ofs)) {
2006         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2007         if (n_ofs != 0) {
2008             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2009         }
2010     } else {
2011         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2012     }
2013     return true;
2014 }
2015
2016 /*
2017  *** SVE Permute - Unpredicated Group
2018  */
2019
2020 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2021 {
2022     if (sve_access_check(s)) {
2023         unsigned vsz = vec_full_reg_size(s);
2024         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2025                              vsz, vsz, cpu_reg_sp(s, a->rn));
2026     }
2027     return true;
2028 }
2029
2030 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2031 {
2032     if ((a->imm & 0x1f) == 0) {
2033         return false;
2034     }
2035     if (sve_access_check(s)) {
2036         unsigned vsz = vec_full_reg_size(s);
2037         unsigned dofs = vec_full_reg_offset(s, a->rd);
2038         unsigned esz, index;
2039
2040         esz = ctz32(a->imm);
2041         index = a->imm >> (esz + 1);
2042
2043         if ((index << esz) < vsz) {
2044             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2045             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2046         } else {
2047             /*
2048              * While dup_mem handles 128-bit elements, dup_imm does not.
2049              * Thankfully element size doesn't matter for splatting zero.
2050              */
2051             tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
2052         }
2053     }
2054     return true;
2055 }
2056
2057 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2058 {
2059     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2060     static gen_insr * const fns[4] = {
2061         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2062         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2063     };
2064     unsigned vsz = vec_full_reg_size(s);
2065     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2066     TCGv_ptr t_zd = tcg_temp_new_ptr();
2067     TCGv_ptr t_zn = tcg_temp_new_ptr();
2068
2069     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2070     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2071
2072     fns[a->esz](t_zd, t_zn, val, desc);
2073
2074     tcg_temp_free_ptr(t_zd);
2075     tcg_temp_free_ptr(t_zn);
2076     tcg_temp_free_i32(desc);
2077 }
2078
2079 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2080 {
2081     if (sve_access_check(s)) {
2082         TCGv_i64 t = tcg_temp_new_i64();
2083         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2084         do_insr_i64(s, a, t);
2085         tcg_temp_free_i64(t);
2086     }
2087     return true;
2088 }
2089
2090 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2091 {
2092     if (sve_access_check(s)) {
2093         do_insr_i64(s, a, cpu_reg(s, a->rm));
2094     }
2095     return true;
2096 }
2097
2098 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2099 {
2100     static gen_helper_gvec_2 * const fns[4] = {
2101         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2102         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2103     };
2104
2105     if (sve_access_check(s)) {
2106         unsigned vsz = vec_full_reg_size(s);
2107         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2108                            vec_full_reg_offset(s, a->rn),
2109                            vsz, vsz, 0, fns[a->esz]);
2110     }
2111     return true;
2112 }
2113
2114 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2115 {
2116     static gen_helper_gvec_3 * const fns[4] = {
2117         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2118         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2119     };
2120
2121     if (sve_access_check(s)) {
2122         unsigned vsz = vec_full_reg_size(s);
2123         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2124                            vec_full_reg_offset(s, a->rn),
2125                            vec_full_reg_offset(s, a->rm),
2126                            vsz, vsz, 0, fns[a->esz]);
2127     }
2128     return true;
2129 }
2130
2131 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2132 {
2133     static gen_helper_gvec_2 * const fns[4][2] = {
2134         { NULL, NULL },
2135         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2136         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2137         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2138     };
2139
2140     if (a->esz == 0) {
2141         return false;
2142     }
2143     if (sve_access_check(s)) {
2144         unsigned vsz = vec_full_reg_size(s);
2145         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2146                            vec_full_reg_offset(s, a->rn)
2147                            + (a->h ? vsz / 2 : 0),
2148                            vsz, vsz, 0, fns[a->esz][a->u]);
2149     }
2150     return true;
2151 }
2152
2153 /*
2154  *** SVE Permute - Predicates Group
2155  */
2156
2157 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2158                           gen_helper_gvec_3 *fn)
2159 {
2160     if (!sve_access_check(s)) {
2161         return true;
2162     }
2163
2164     unsigned vsz = pred_full_reg_size(s);
2165
2166     /* Predicate sizes may be smaller and cannot use simd_desc.
2167        We cannot round up, as we do elsewhere, because we need
2168        the exact size for ZIP2 and REV.  We retain the style for
2169        the other helpers for consistency.  */
2170     TCGv_ptr t_d = tcg_temp_new_ptr();
2171     TCGv_ptr t_n = tcg_temp_new_ptr();
2172     TCGv_ptr t_m = tcg_temp_new_ptr();
2173     TCGv_i32 t_desc;
2174     int desc;
2175
2176     desc = vsz - 2;
2177     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2178     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2179
2180     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2181     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2182     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2183     t_desc = tcg_const_i32(desc);
2184
2185     fn(t_d, t_n, t_m, t_desc);
2186
2187     tcg_temp_free_ptr(t_d);
2188     tcg_temp_free_ptr(t_n);
2189     tcg_temp_free_ptr(t_m);
2190     tcg_temp_free_i32(t_desc);
2191     return true;
2192 }
2193
2194 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2195                           gen_helper_gvec_2 *fn)
2196 {
2197     if (!sve_access_check(s)) {
2198         return true;
2199     }
2200
2201     unsigned vsz = pred_full_reg_size(s);
2202     TCGv_ptr t_d = tcg_temp_new_ptr();
2203     TCGv_ptr t_n = tcg_temp_new_ptr();
2204     TCGv_i32 t_desc;
2205     int desc;
2206
2207     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2208     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2209
2210     /* Predicate sizes may be smaller and cannot use simd_desc.
2211        We cannot round up, as we do elsewhere, because we need
2212        the exact size for ZIP2 and REV.  We retain the style for
2213        the other helpers for consistency.  */
2214
2215     desc = vsz - 2;
2216     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2217     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2218     t_desc = tcg_const_i32(desc);
2219
2220     fn(t_d, t_n, t_desc);
2221
2222     tcg_temp_free_i32(t_desc);
2223     tcg_temp_free_ptr(t_d);
2224     tcg_temp_free_ptr(t_n);
2225     return true;
2226 }
2227
2228 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2229 {
2230     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2231 }
2232
2233 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2234 {
2235     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2236 }
2237
2238 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2239 {
2240     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2241 }
2242
2243 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2244 {
2245     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2246 }
2247
2248 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2249 {
2250     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2251 }
2252
2253 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2254 {
2255     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2256 }
2257
2258 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2259 {
2260     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2261 }
2262
2263 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2264 {
2265     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2266 }
2267
2268 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2269 {
2270     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2271 }
2272
2273 /*
2274  *** SVE Permute - Interleaving Group
2275  */
2276
2277 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2278 {
2279     static gen_helper_gvec_3 * const fns[4] = {
2280         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2281         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2282     };
2283
2284     if (sve_access_check(s)) {
2285         unsigned vsz = vec_full_reg_size(s);
2286         unsigned high_ofs = high ? vsz / 2 : 0;
2287         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2288                            vec_full_reg_offset(s, a->rn) + high_ofs,
2289                            vec_full_reg_offset(s, a->rm) + high_ofs,
2290                            vsz, vsz, 0, fns[a->esz]);
2291     }
2292     return true;
2293 }
2294
2295 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2296                             gen_helper_gvec_3 *fn)
2297 {
2298     if (sve_access_check(s)) {
2299         unsigned vsz = vec_full_reg_size(s);
2300         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2301                            vec_full_reg_offset(s, a->rn),
2302                            vec_full_reg_offset(s, a->rm),
2303                            vsz, vsz, data, fn);
2304     }
2305     return true;
2306 }
2307
2308 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2309 {
2310     return do_zip(s, a, false);
2311 }
2312
2313 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2314 {
2315     return do_zip(s, a, true);
2316 }
2317
2318 static gen_helper_gvec_3 * const uzp_fns[4] = {
2319     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2320     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2321 };
2322
2323 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2324 {
2325     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2326 }
2327
2328 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2329 {
2330     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2331 }
2332
2333 static gen_helper_gvec_3 * const trn_fns[4] = {
2334     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2335     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2336 };
2337
2338 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2339 {
2340     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2341 }
2342
2343 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2344 {
2345     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2346 }
2347
2348 /*
2349  *** SVE Permute Vector - Predicated Group
2350  */
2351
2352 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2353 {
2354     static gen_helper_gvec_3 * const fns[4] = {
2355         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2356     };
2357     return do_zpz_ool(s, a, fns[a->esz]);
2358 }
2359
2360 /* Call the helper that computes the ARM LastActiveElement pseudocode
2361  * function, scaled by the element size.  This includes the not found
2362  * indication; e.g. not found for esz=3 is -8.
2363  */
2364 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2365 {
2366     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2367      * round up, as we do elsewhere, because we need the exact size.
2368      */
2369     TCGv_ptr t_p = tcg_temp_new_ptr();
2370     TCGv_i32 t_desc;
2371     unsigned vsz = pred_full_reg_size(s);
2372     unsigned desc;
2373
2374     desc = vsz - 2;
2375     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2376
2377     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2378     t_desc = tcg_const_i32(desc);
2379
2380     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2381
2382     tcg_temp_free_i32(t_desc);
2383     tcg_temp_free_ptr(t_p);
2384 }
2385
2386 /* Increment LAST to the offset of the next element in the vector,
2387  * wrapping around to 0.
2388  */
2389 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2390 {
2391     unsigned vsz = vec_full_reg_size(s);
2392
2393     tcg_gen_addi_i32(last, last, 1 << esz);
2394     if (is_power_of_2(vsz)) {
2395         tcg_gen_andi_i32(last, last, vsz - 1);
2396     } else {
2397         TCGv_i32 max = tcg_const_i32(vsz);
2398         TCGv_i32 zero = tcg_const_i32(0);
2399         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2400         tcg_temp_free_i32(max);
2401         tcg_temp_free_i32(zero);
2402     }
2403 }
2404
2405 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2406 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2407 {
2408     unsigned vsz = vec_full_reg_size(s);
2409
2410     if (is_power_of_2(vsz)) {
2411         tcg_gen_andi_i32(last, last, vsz - 1);
2412     } else {
2413         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2414         TCGv_i32 zero = tcg_const_i32(0);
2415         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2416         tcg_temp_free_i32(max);
2417         tcg_temp_free_i32(zero);
2418     }
2419 }
2420
2421 /* Load an unsigned element of ESZ from BASE+OFS.  */
2422 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2423 {
2424     TCGv_i64 r = tcg_temp_new_i64();
2425
2426     switch (esz) {
2427     case 0:
2428         tcg_gen_ld8u_i64(r, base, ofs);
2429         break;
2430     case 1:
2431         tcg_gen_ld16u_i64(r, base, ofs);
2432         break;
2433     case 2:
2434         tcg_gen_ld32u_i64(r, base, ofs);
2435         break;
2436     case 3:
2437         tcg_gen_ld_i64(r, base, ofs);
2438         break;
2439     default:
2440         g_assert_not_reached();
2441     }
2442     return r;
2443 }
2444
2445 /* Load an unsigned element of ESZ from RM[LAST].  */
2446 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2447                                  int rm, int esz)
2448 {
2449     TCGv_ptr p = tcg_temp_new_ptr();
2450     TCGv_i64 r;
2451
2452     /* Convert offset into vector into offset into ENV.
2453      * The final adjustment for the vector register base
2454      * is added via constant offset to the load.
2455      */
2456 #ifdef HOST_WORDS_BIGENDIAN
2457     /* Adjust for element ordering.  See vec_reg_offset.  */
2458     if (esz < 3) {
2459         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2460     }
2461 #endif
2462     tcg_gen_ext_i32_ptr(p, last);
2463     tcg_gen_add_ptr(p, p, cpu_env);
2464
2465     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2466     tcg_temp_free_ptr(p);
2467
2468     return r;
2469 }
2470
2471 /* Compute CLAST for a Zreg.  */
2472 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2473 {
2474     TCGv_i32 last;
2475     TCGLabel *over;
2476     TCGv_i64 ele;
2477     unsigned vsz, esz = a->esz;
2478
2479     if (!sve_access_check(s)) {
2480         return true;
2481     }
2482
2483     last = tcg_temp_local_new_i32();
2484     over = gen_new_label();
2485
2486     find_last_active(s, last, esz, a->pg);
2487
2488     /* There is of course no movcond for a 2048-bit vector,
2489      * so we must branch over the actual store.
2490      */
2491     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2492
2493     if (!before) {
2494         incr_last_active(s, last, esz);
2495     }
2496
2497     ele = load_last_active(s, last, a->rm, esz);
2498     tcg_temp_free_i32(last);
2499
2500     vsz = vec_full_reg_size(s);
2501     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2502     tcg_temp_free_i64(ele);
2503
2504     /* If this insn used MOVPRFX, we may need a second move.  */
2505     if (a->rd != a->rn) {
2506         TCGLabel *done = gen_new_label();
2507         tcg_gen_br(done);
2508
2509         gen_set_label(over);
2510         do_mov_z(s, a->rd, a->rn);
2511
2512         gen_set_label(done);
2513     } else {
2514         gen_set_label(over);
2515     }
2516     return true;
2517 }
2518
2519 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2520 {
2521     return do_clast_vector(s, a, false);
2522 }
2523
2524 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2525 {
2526     return do_clast_vector(s, a, true);
2527 }
2528
2529 /* Compute CLAST for a scalar.  */
2530 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2531                             bool before, TCGv_i64 reg_val)
2532 {
2533     TCGv_i32 last = tcg_temp_new_i32();
2534     TCGv_i64 ele, cmp, zero;
2535
2536     find_last_active(s, last, esz, pg);
2537
2538     /* Extend the original value of last prior to incrementing.  */
2539     cmp = tcg_temp_new_i64();
2540     tcg_gen_ext_i32_i64(cmp, last);
2541
2542     if (!before) {
2543         incr_last_active(s, last, esz);
2544     }
2545
2546     /* The conceit here is that while last < 0 indicates not found, after
2547      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2548      * from which we can load garbage.  We then discard the garbage with
2549      * a conditional move.
2550      */
2551     ele = load_last_active(s, last, rm, esz);
2552     tcg_temp_free_i32(last);
2553
2554     zero = tcg_const_i64(0);
2555     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2556
2557     tcg_temp_free_i64(zero);
2558     tcg_temp_free_i64(cmp);
2559     tcg_temp_free_i64(ele);
2560 }
2561
2562 /* Compute CLAST for a Vreg.  */
2563 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2564 {
2565     if (sve_access_check(s)) {
2566         int esz = a->esz;
2567         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2568         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2569
2570         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2571         write_fp_dreg(s, a->rd, reg);
2572         tcg_temp_free_i64(reg);
2573     }
2574     return true;
2575 }
2576
2577 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2578 {
2579     return do_clast_fp(s, a, false);
2580 }
2581
2582 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
2583 {
2584     return do_clast_fp(s, a, true);
2585 }
2586
2587 /* Compute CLAST for a Xreg.  */
2588 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2589 {
2590     TCGv_i64 reg;
2591
2592     if (!sve_access_check(s)) {
2593         return true;
2594     }
2595
2596     reg = cpu_reg(s, a->rd);
2597     switch (a->esz) {
2598     case 0:
2599         tcg_gen_ext8u_i64(reg, reg);
2600         break;
2601     case 1:
2602         tcg_gen_ext16u_i64(reg, reg);
2603         break;
2604     case 2:
2605         tcg_gen_ext32u_i64(reg, reg);
2606         break;
2607     case 3:
2608         break;
2609     default:
2610         g_assert_not_reached();
2611     }
2612
2613     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2614     return true;
2615 }
2616
2617 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
2618 {
2619     return do_clast_general(s, a, false);
2620 }
2621
2622 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
2623 {
2624     return do_clast_general(s, a, true);
2625 }
2626
2627 /* Compute LAST for a scalar.  */
2628 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2629                                int pg, int rm, bool before)
2630 {
2631     TCGv_i32 last = tcg_temp_new_i32();
2632     TCGv_i64 ret;
2633
2634     find_last_active(s, last, esz, pg);
2635     if (before) {
2636         wrap_last_active(s, last, esz);
2637     } else {
2638         incr_last_active(s, last, esz);
2639     }
2640
2641     ret = load_last_active(s, last, rm, esz);
2642     tcg_temp_free_i32(last);
2643     return ret;
2644 }
2645
2646 /* Compute LAST for a Vreg.  */
2647 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2648 {
2649     if (sve_access_check(s)) {
2650         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2651         write_fp_dreg(s, a->rd, val);
2652         tcg_temp_free_i64(val);
2653     }
2654     return true;
2655 }
2656
2657 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
2658 {
2659     return do_last_fp(s, a, false);
2660 }
2661
2662 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
2663 {
2664     return do_last_fp(s, a, true);
2665 }
2666
2667 /* Compute LAST for a Xreg.  */
2668 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2669 {
2670     if (sve_access_check(s)) {
2671         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2672         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2673         tcg_temp_free_i64(val);
2674     }
2675     return true;
2676 }
2677
2678 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
2679 {
2680     return do_last_general(s, a, false);
2681 }
2682
2683 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
2684 {
2685     return do_last_general(s, a, true);
2686 }
2687
2688 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2689 {
2690     if (sve_access_check(s)) {
2691         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2692     }
2693     return true;
2694 }
2695
2696 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2697 {
2698     if (sve_access_check(s)) {
2699         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2700         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2701         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2702         tcg_temp_free_i64(t);
2703     }
2704     return true;
2705 }
2706
2707 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
2708 {
2709     static gen_helper_gvec_3 * const fns[4] = {
2710         NULL,
2711         gen_helper_sve_revb_h,
2712         gen_helper_sve_revb_s,
2713         gen_helper_sve_revb_d,
2714     };
2715     return do_zpz_ool(s, a, fns[a->esz]);
2716 }
2717
2718 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
2719 {
2720     static gen_helper_gvec_3 * const fns[4] = {
2721         NULL,
2722         NULL,
2723         gen_helper_sve_revh_s,
2724         gen_helper_sve_revh_d,
2725     };
2726     return do_zpz_ool(s, a, fns[a->esz]);
2727 }
2728
2729 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
2730 {
2731     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2732 }
2733
2734 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
2735 {
2736     static gen_helper_gvec_3 * const fns[4] = {
2737         gen_helper_sve_rbit_b,
2738         gen_helper_sve_rbit_h,
2739         gen_helper_sve_rbit_s,
2740         gen_helper_sve_rbit_d,
2741     };
2742     return do_zpz_ool(s, a, fns[a->esz]);
2743 }
2744
2745 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
2746 {
2747     if (sve_access_check(s)) {
2748         unsigned vsz = vec_full_reg_size(s);
2749         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2750                            vec_full_reg_offset(s, a->rn),
2751                            vec_full_reg_offset(s, a->rm),
2752                            pred_full_reg_offset(s, a->pg),
2753                            vsz, vsz, a->esz, gen_helper_sve_splice);
2754     }
2755     return true;
2756 }
2757
2758 /*
2759  *** SVE Integer Compare - Vectors Group
2760  */
2761
2762 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2763                           gen_helper_gvec_flags_4 *gen_fn)
2764 {
2765     TCGv_ptr pd, zn, zm, pg;
2766     unsigned vsz;
2767     TCGv_i32 t;
2768
2769     if (gen_fn == NULL) {
2770         return false;
2771     }
2772     if (!sve_access_check(s)) {
2773         return true;
2774     }
2775
2776     vsz = vec_full_reg_size(s);
2777     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2778     pd = tcg_temp_new_ptr();
2779     zn = tcg_temp_new_ptr();
2780     zm = tcg_temp_new_ptr();
2781     pg = tcg_temp_new_ptr();
2782
2783     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2784     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2785     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2786     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2787
2788     gen_fn(t, pd, zn, zm, pg, t);
2789
2790     tcg_temp_free_ptr(pd);
2791     tcg_temp_free_ptr(zn);
2792     tcg_temp_free_ptr(zm);
2793     tcg_temp_free_ptr(pg);
2794
2795     do_pred_flags(t);
2796
2797     tcg_temp_free_i32(t);
2798     return true;
2799 }
2800
2801 #define DO_PPZZ(NAME, name) \
2802 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)         \
2803 {                                                                         \
2804     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2805         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2806         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2807     };                                                                    \
2808     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2809 }
2810
2811 DO_PPZZ(CMPEQ, cmpeq)
2812 DO_PPZZ(CMPNE, cmpne)
2813 DO_PPZZ(CMPGT, cmpgt)
2814 DO_PPZZ(CMPGE, cmpge)
2815 DO_PPZZ(CMPHI, cmphi)
2816 DO_PPZZ(CMPHS, cmphs)
2817
2818 #undef DO_PPZZ
2819
2820 #define DO_PPZW(NAME, name) \
2821 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a)         \
2822 {                                                                         \
2823     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2824         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2825         gen_helper_sve_##name##_ppzw_s, NULL                              \
2826     };                                                                    \
2827     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2828 }
2829
2830 DO_PPZW(CMPEQ, cmpeq)
2831 DO_PPZW(CMPNE, cmpne)
2832 DO_PPZW(CMPGT, cmpgt)
2833 DO_PPZW(CMPGE, cmpge)
2834 DO_PPZW(CMPHI, cmphi)
2835 DO_PPZW(CMPHS, cmphs)
2836 DO_PPZW(CMPLT, cmplt)
2837 DO_PPZW(CMPLE, cmple)
2838 DO_PPZW(CMPLO, cmplo)
2839 DO_PPZW(CMPLS, cmpls)
2840
2841 #undef DO_PPZW
2842
2843 /*
2844  *** SVE Integer Compare - Immediate Groups
2845  */
2846
2847 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2848                           gen_helper_gvec_flags_3 *gen_fn)
2849 {
2850     TCGv_ptr pd, zn, pg;
2851     unsigned vsz;
2852     TCGv_i32 t;
2853
2854     if (gen_fn == NULL) {
2855         return false;
2856     }
2857     if (!sve_access_check(s)) {
2858         return true;
2859     }
2860
2861     vsz = vec_full_reg_size(s);
2862     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2863     pd = tcg_temp_new_ptr();
2864     zn = tcg_temp_new_ptr();
2865     pg = tcg_temp_new_ptr();
2866
2867     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2868     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2869     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2870
2871     gen_fn(t, pd, zn, pg, t);
2872
2873     tcg_temp_free_ptr(pd);
2874     tcg_temp_free_ptr(zn);
2875     tcg_temp_free_ptr(pg);
2876
2877     do_pred_flags(t);
2878
2879     tcg_temp_free_i32(t);
2880     return true;
2881 }
2882
2883 #define DO_PPZI(NAME, name) \
2884 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a)         \
2885 {                                                                         \
2886     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2887         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2888         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2889     };                                                                    \
2890     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2891 }
2892
2893 DO_PPZI(CMPEQ, cmpeq)
2894 DO_PPZI(CMPNE, cmpne)
2895 DO_PPZI(CMPGT, cmpgt)
2896 DO_PPZI(CMPGE, cmpge)
2897 DO_PPZI(CMPHI, cmphi)
2898 DO_PPZI(CMPHS, cmphs)
2899 DO_PPZI(CMPLT, cmplt)
2900 DO_PPZI(CMPLE, cmple)
2901 DO_PPZI(CMPLO, cmplo)
2902 DO_PPZI(CMPLS, cmpls)
2903
2904 #undef DO_PPZI
2905
2906 /*
2907  *** SVE Partition Break Group
2908  */
2909
2910 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2911                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2912 {
2913     if (!sve_access_check(s)) {
2914         return true;
2915     }
2916
2917     unsigned vsz = pred_full_reg_size(s);
2918
2919     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2920     TCGv_ptr d = tcg_temp_new_ptr();
2921     TCGv_ptr n = tcg_temp_new_ptr();
2922     TCGv_ptr m = tcg_temp_new_ptr();
2923     TCGv_ptr g = tcg_temp_new_ptr();
2924     TCGv_i32 t = tcg_const_i32(vsz - 2);
2925
2926     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2927     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2928     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2929     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2930
2931     if (a->s) {
2932         fn_s(t, d, n, m, g, t);
2933         do_pred_flags(t);
2934     } else {
2935         fn(d, n, m, g, t);
2936     }
2937     tcg_temp_free_ptr(d);
2938     tcg_temp_free_ptr(n);
2939     tcg_temp_free_ptr(m);
2940     tcg_temp_free_ptr(g);
2941     tcg_temp_free_i32(t);
2942     return true;
2943 }
2944
2945 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2946                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2947 {
2948     if (!sve_access_check(s)) {
2949         return true;
2950     }
2951
2952     unsigned vsz = pred_full_reg_size(s);
2953
2954     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2955     TCGv_ptr d = tcg_temp_new_ptr();
2956     TCGv_ptr n = tcg_temp_new_ptr();
2957     TCGv_ptr g = tcg_temp_new_ptr();
2958     TCGv_i32 t = tcg_const_i32(vsz - 2);
2959
2960     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2961     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2962     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2963
2964     if (a->s) {
2965         fn_s(t, d, n, g, t);
2966         do_pred_flags(t);
2967     } else {
2968         fn(d, n, g, t);
2969     }
2970     tcg_temp_free_ptr(d);
2971     tcg_temp_free_ptr(n);
2972     tcg_temp_free_ptr(g);
2973     tcg_temp_free_i32(t);
2974     return true;
2975 }
2976
2977 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
2978 {
2979     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2980 }
2981
2982 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
2983 {
2984     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2985 }
2986
2987 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
2988 {
2989     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2990 }
2991
2992 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
2993 {
2994     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2995 }
2996
2997 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
2998 {
2999     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3000 }
3001
3002 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
3003 {
3004     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3005 }
3006
3007 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
3008 {
3009     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3010 }
3011
3012 /*
3013  *** SVE Predicate Count Group
3014  */
3015
3016 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3017 {
3018     unsigned psz = pred_full_reg_size(s);
3019
3020     if (psz <= 8) {
3021         uint64_t psz_mask;
3022
3023         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3024         if (pn != pg) {
3025             TCGv_i64 g = tcg_temp_new_i64();
3026             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3027             tcg_gen_and_i64(val, val, g);
3028             tcg_temp_free_i64(g);
3029         }
3030
3031         /* Reduce the pred_esz_masks value simply to reduce the
3032          * size of the code generated here.
3033          */
3034         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3035         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3036
3037         tcg_gen_ctpop_i64(val, val);
3038     } else {
3039         TCGv_ptr t_pn = tcg_temp_new_ptr();
3040         TCGv_ptr t_pg = tcg_temp_new_ptr();
3041         unsigned desc;
3042         TCGv_i32 t_desc;
3043
3044         desc = psz - 2;
3045         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3046
3047         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3048         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3049         t_desc = tcg_const_i32(desc);
3050
3051         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3052         tcg_temp_free_ptr(t_pn);
3053         tcg_temp_free_ptr(t_pg);
3054         tcg_temp_free_i32(t_desc);
3055     }
3056 }
3057
3058 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3059 {
3060     if (sve_access_check(s)) {
3061         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3062     }
3063     return true;
3064 }
3065
3066 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3067 {
3068     if (sve_access_check(s)) {
3069         TCGv_i64 reg = cpu_reg(s, a->rd);
3070         TCGv_i64 val = tcg_temp_new_i64();
3071
3072         do_cntp(s, val, a->esz, a->pg, a->pg);
3073         if (a->d) {
3074             tcg_gen_sub_i64(reg, reg, val);
3075         } else {
3076             tcg_gen_add_i64(reg, reg, val);
3077         }
3078         tcg_temp_free_i64(val);
3079     }
3080     return true;
3081 }
3082
3083 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3084 {
3085     if (a->esz == 0) {
3086         return false;
3087     }
3088     if (sve_access_check(s)) {
3089         unsigned vsz = vec_full_reg_size(s);
3090         TCGv_i64 val = tcg_temp_new_i64();
3091         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3092
3093         do_cntp(s, val, a->esz, a->pg, a->pg);
3094         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3095                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3096     }
3097     return true;
3098 }
3099
3100 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3101 {
3102     if (sve_access_check(s)) {
3103         TCGv_i64 reg = cpu_reg(s, a->rd);
3104         TCGv_i64 val = tcg_temp_new_i64();
3105
3106         do_cntp(s, val, a->esz, a->pg, a->pg);
3107         do_sat_addsub_32(reg, val, a->u, a->d);
3108     }
3109     return true;
3110 }
3111
3112 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3113 {
3114     if (sve_access_check(s)) {
3115         TCGv_i64 reg = cpu_reg(s, a->rd);
3116         TCGv_i64 val = tcg_temp_new_i64();
3117
3118         do_cntp(s, val, a->esz, a->pg, a->pg);
3119         do_sat_addsub_64(reg, val, a->u, a->d);
3120     }
3121     return true;
3122 }
3123
3124 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3125 {
3126     if (a->esz == 0) {
3127         return false;
3128     }
3129     if (sve_access_check(s)) {
3130         TCGv_i64 val = tcg_temp_new_i64();
3131         do_cntp(s, val, a->esz, a->pg, a->pg);
3132         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3133     }
3134     return true;
3135 }
3136
3137 /*
3138  *** SVE Integer Compare Scalars Group
3139  */
3140
3141 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3142 {
3143     if (!sve_access_check(s)) {
3144         return true;
3145     }
3146
3147     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3148     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3149     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3150     TCGv_i64 cmp = tcg_temp_new_i64();
3151
3152     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3153     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3154     tcg_temp_free_i64(cmp);
3155
3156     /* VF = !NF & !CF.  */
3157     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3158     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3159
3160     /* Both NF and VF actually look at bit 31.  */
3161     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3162     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3163     return true;
3164 }
3165
3166 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3167 {
3168     TCGv_i64 op0, op1, t0, t1, tmax;
3169     TCGv_i32 t2, t3;
3170     TCGv_ptr ptr;
3171     unsigned desc, vsz = vec_full_reg_size(s);
3172     TCGCond cond;
3173
3174     if (!sve_access_check(s)) {
3175         return true;
3176     }
3177
3178     op0 = read_cpu_reg(s, a->rn, 1);
3179     op1 = read_cpu_reg(s, a->rm, 1);
3180
3181     if (!a->sf) {
3182         if (a->u) {
3183             tcg_gen_ext32u_i64(op0, op0);
3184             tcg_gen_ext32u_i64(op1, op1);
3185         } else {
3186             tcg_gen_ext32s_i64(op0, op0);
3187             tcg_gen_ext32s_i64(op1, op1);
3188         }
3189     }
3190
3191     /* For the helper, compress the different conditions into a computation
3192      * of how many iterations for which the condition is true.
3193      */
3194     t0 = tcg_temp_new_i64();
3195     t1 = tcg_temp_new_i64();
3196     tcg_gen_sub_i64(t0, op1, op0);
3197
3198     tmax = tcg_const_i64(vsz >> a->esz);
3199     if (a->eq) {
3200         /* Equality means one more iteration.  */
3201         tcg_gen_addi_i64(t0, t0, 1);
3202
3203         /* If op1 is max (un)signed integer (and the only time the addition
3204          * above could overflow), then we produce an all-true predicate by
3205          * setting the count to the vector length.  This is because the
3206          * pseudocode is described as an increment + compare loop, and the
3207          * max integer would always compare true.
3208          */
3209         tcg_gen_movi_i64(t1, (a->sf
3210                               ? (a->u ? UINT64_MAX : INT64_MAX)
3211                               : (a->u ? UINT32_MAX : INT32_MAX)));
3212         tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3213     }
3214
3215     /* Bound to the maximum.  */
3216     tcg_gen_umin_i64(t0, t0, tmax);
3217     tcg_temp_free_i64(tmax);
3218
3219     /* Set the count to zero if the condition is false.  */
3220     cond = (a->u
3221             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3222             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3223     tcg_gen_movi_i64(t1, 0);
3224     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3225     tcg_temp_free_i64(t1);
3226
3227     /* Since we're bounded, pass as a 32-bit type.  */
3228     t2 = tcg_temp_new_i32();
3229     tcg_gen_extrl_i64_i32(t2, t0);
3230     tcg_temp_free_i64(t0);
3231
3232     /* Scale elements to bits.  */
3233     tcg_gen_shli_i32(t2, t2, a->esz);
3234
3235     desc = (vsz / 8) - 2;
3236     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3237     t3 = tcg_const_i32(desc);
3238
3239     ptr = tcg_temp_new_ptr();
3240     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3241
3242     gen_helper_sve_while(t2, ptr, t2, t3);
3243     do_pred_flags(t2);
3244
3245     tcg_temp_free_ptr(ptr);
3246     tcg_temp_free_i32(t2);
3247     tcg_temp_free_i32(t3);
3248     return true;
3249 }
3250
3251 /*
3252  *** SVE Integer Wide Immediate - Unpredicated Group
3253  */
3254
3255 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3256 {
3257     if (a->esz == 0) {
3258         return false;
3259     }
3260     if (sve_access_check(s)) {
3261         unsigned vsz = vec_full_reg_size(s);
3262         int dofs = vec_full_reg_offset(s, a->rd);
3263         uint64_t imm;
3264
3265         /* Decode the VFP immediate.  */
3266         imm = vfp_expand_imm(a->esz, a->imm);
3267         tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
3268     }
3269     return true;
3270 }
3271
3272 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3273 {
3274     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3275         return false;
3276     }
3277     if (sve_access_check(s)) {
3278         unsigned vsz = vec_full_reg_size(s);
3279         int dofs = vec_full_reg_offset(s, a->rd);
3280
3281         tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
3282     }
3283     return true;
3284 }
3285
3286 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3287 {
3288     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3289         return false;
3290     }
3291     if (sve_access_check(s)) {
3292         unsigned vsz = vec_full_reg_size(s);
3293         tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3294                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3295     }
3296     return true;
3297 }
3298
3299 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3300 {
3301     a->imm = -a->imm;
3302     return trans_ADD_zzi(s, a);
3303 }
3304
3305 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3306 {
3307     static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3308     static const GVecGen2s op[4] = {
3309         { .fni8 = tcg_gen_vec_sub8_i64,
3310           .fniv = tcg_gen_sub_vec,
3311           .fno = gen_helper_sve_subri_b,
3312           .opt_opc = vecop_list,
3313           .vece = MO_8,
3314           .scalar_first = true },
3315         { .fni8 = tcg_gen_vec_sub16_i64,
3316           .fniv = tcg_gen_sub_vec,
3317           .fno = gen_helper_sve_subri_h,
3318           .opt_opc = vecop_list,
3319           .vece = MO_16,
3320           .scalar_first = true },
3321         { .fni4 = tcg_gen_sub_i32,
3322           .fniv = tcg_gen_sub_vec,
3323           .fno = gen_helper_sve_subri_s,
3324           .opt_opc = vecop_list,
3325           .vece = MO_32,
3326           .scalar_first = true },
3327         { .fni8 = tcg_gen_sub_i64,
3328           .fniv = tcg_gen_sub_vec,
3329           .fno = gen_helper_sve_subri_d,
3330           .opt_opc = vecop_list,
3331           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3332           .vece = MO_64,
3333           .scalar_first = true }
3334     };
3335
3336     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3337         return false;
3338     }
3339     if (sve_access_check(s)) {
3340         unsigned vsz = vec_full_reg_size(s);
3341         TCGv_i64 c = tcg_const_i64(a->imm);
3342         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3343                         vec_full_reg_offset(s, a->rn),
3344                         vsz, vsz, c, &op[a->esz]);
3345         tcg_temp_free_i64(c);
3346     }
3347     return true;
3348 }
3349
3350 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3351 {
3352     if (sve_access_check(s)) {
3353         unsigned vsz = vec_full_reg_size(s);
3354         tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3355                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3356     }
3357     return true;
3358 }
3359
3360 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3361 {
3362     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3363         return false;
3364     }
3365     if (sve_access_check(s)) {
3366         TCGv_i64 val = tcg_const_i64(a->imm);
3367         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3368         tcg_temp_free_i64(val);
3369     }
3370     return true;
3371 }
3372
3373 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3374 {
3375     return do_zzi_sat(s, a, false, false);
3376 }
3377
3378 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3379 {
3380     return do_zzi_sat(s, a, true, false);
3381 }
3382
3383 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3384 {
3385     return do_zzi_sat(s, a, false, true);
3386 }
3387
3388 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3389 {
3390     return do_zzi_sat(s, a, true, true);
3391 }
3392
3393 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3394 {
3395     if (sve_access_check(s)) {
3396         unsigned vsz = vec_full_reg_size(s);
3397         TCGv_i64 c = tcg_const_i64(a->imm);
3398
3399         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3400                             vec_full_reg_offset(s, a->rn),
3401                             c, vsz, vsz, 0, fn);
3402         tcg_temp_free_i64(c);
3403     }
3404     return true;
3405 }
3406
3407 #define DO_ZZI(NAME, name) \
3408 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a)         \
3409 {                                                                       \
3410     static gen_helper_gvec_2i * const fns[4] = {                        \
3411         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3412         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3413     };                                                                  \
3414     return do_zzi_ool(s, a, fns[a->esz]);                               \
3415 }
3416
3417 DO_ZZI(SMAX, smax)
3418 DO_ZZI(UMAX, umax)
3419 DO_ZZI(SMIN, smin)
3420 DO_ZZI(UMIN, umin)
3421
3422 #undef DO_ZZI
3423
3424 static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
3425 {
3426     static gen_helper_gvec_3 * const fns[2][2] = {
3427         { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3428         { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3429     };
3430
3431     if (sve_access_check(s)) {
3432         unsigned vsz = vec_full_reg_size(s);
3433         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3434                            vec_full_reg_offset(s, a->rn),
3435                            vec_full_reg_offset(s, a->rm),
3436                            vsz, vsz, 0, fns[a->u][a->sz]);
3437     }
3438     return true;
3439 }
3440
3441 static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
3442 {
3443     static gen_helper_gvec_3 * const fns[2][2] = {
3444         { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3445         { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3446     };
3447
3448     if (sve_access_check(s)) {
3449         unsigned vsz = vec_full_reg_size(s);
3450         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3451                            vec_full_reg_offset(s, a->rn),
3452                            vec_full_reg_offset(s, a->rm),
3453                            vsz, vsz, a->index, fns[a->u][a->sz]);
3454     }
3455     return true;
3456 }
3457
3458
3459 /*
3460  *** SVE Floating Point Multiply-Add Indexed Group
3461  */
3462
3463 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3464 {
3465     static gen_helper_gvec_4_ptr * const fns[3] = {
3466         gen_helper_gvec_fmla_idx_h,
3467         gen_helper_gvec_fmla_idx_s,
3468         gen_helper_gvec_fmla_idx_d,
3469     };
3470
3471     if (sve_access_check(s)) {
3472         unsigned vsz = vec_full_reg_size(s);
3473         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3474         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3475                            vec_full_reg_offset(s, a->rn),
3476                            vec_full_reg_offset(s, a->rm),
3477                            vec_full_reg_offset(s, a->ra),
3478                            status, vsz, vsz, (a->index << 1) | a->sub,
3479                            fns[a->esz - 1]);
3480         tcg_temp_free_ptr(status);
3481     }
3482     return true;
3483 }
3484
3485 /*
3486  *** SVE Floating Point Multiply Indexed Group
3487  */
3488
3489 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
3490 {
3491     static gen_helper_gvec_3_ptr * const fns[3] = {
3492         gen_helper_gvec_fmul_idx_h,
3493         gen_helper_gvec_fmul_idx_s,
3494         gen_helper_gvec_fmul_idx_d,
3495     };
3496
3497     if (sve_access_check(s)) {
3498         unsigned vsz = vec_full_reg_size(s);
3499         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3500         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3501                            vec_full_reg_offset(s, a->rn),
3502                            vec_full_reg_offset(s, a->rm),
3503                            status, vsz, vsz, a->index, fns[a->esz - 1]);
3504         tcg_temp_free_ptr(status);
3505     }
3506     return true;
3507 }
3508
3509 /*
3510  *** SVE Floating Point Fast Reduction Group
3511  */
3512
3513 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3514                                   TCGv_ptr, TCGv_i32);
3515
3516 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3517                       gen_helper_fp_reduce *fn)
3518 {
3519     unsigned vsz = vec_full_reg_size(s);
3520     unsigned p2vsz = pow2ceil(vsz);
3521     TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3522     TCGv_ptr t_zn, t_pg, status;
3523     TCGv_i64 temp;
3524
3525     temp = tcg_temp_new_i64();
3526     t_zn = tcg_temp_new_ptr();
3527     t_pg = tcg_temp_new_ptr();
3528
3529     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3530     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3531     status = get_fpstatus_ptr(a->esz == MO_16);
3532
3533     fn(temp, t_zn, t_pg, status, t_desc);
3534     tcg_temp_free_ptr(t_zn);
3535     tcg_temp_free_ptr(t_pg);
3536     tcg_temp_free_ptr(status);
3537     tcg_temp_free_i32(t_desc);
3538
3539     write_fp_dreg(s, a->rd, temp);
3540     tcg_temp_free_i64(temp);
3541 }
3542
3543 #define DO_VPZ(NAME, name) \
3544 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
3545 {                                                                        \
3546     static gen_helper_fp_reduce * const fns[3] = {                       \
3547         gen_helper_sve_##name##_h,                                       \
3548         gen_helper_sve_##name##_s,                                       \
3549         gen_helper_sve_##name##_d,                                       \
3550     };                                                                   \
3551     if (a->esz == 0) {                                                   \
3552         return false;                                                    \
3553     }                                                                    \
3554     if (sve_access_check(s)) {                                           \
3555         do_reduce(s, a, fns[a->esz - 1]);                                \
3556     }                                                                    \
3557     return true;                                                         \
3558 }
3559
3560 DO_VPZ(FADDV, faddv)
3561 DO_VPZ(FMINNMV, fminnmv)
3562 DO_VPZ(FMAXNMV, fmaxnmv)
3563 DO_VPZ(FMINV, fminv)
3564 DO_VPZ(FMAXV, fmaxv)
3565
3566 /*
3567  *** SVE Floating Point Unary Operations - Unpredicated Group
3568  */
3569
3570 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3571 {
3572     unsigned vsz = vec_full_reg_size(s);
3573     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3574
3575     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3576                        vec_full_reg_offset(s, a->rn),
3577                        status, vsz, vsz, 0, fn);
3578     tcg_temp_free_ptr(status);
3579 }
3580
3581 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3582 {
3583     static gen_helper_gvec_2_ptr * const fns[3] = {
3584         gen_helper_gvec_frecpe_h,
3585         gen_helper_gvec_frecpe_s,
3586         gen_helper_gvec_frecpe_d,
3587     };
3588     if (a->esz == 0) {
3589         return false;
3590     }
3591     if (sve_access_check(s)) {
3592         do_zz_fp(s, a, fns[a->esz - 1]);
3593     }
3594     return true;
3595 }
3596
3597 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3598 {
3599     static gen_helper_gvec_2_ptr * const fns[3] = {
3600         gen_helper_gvec_frsqrte_h,
3601         gen_helper_gvec_frsqrte_s,
3602         gen_helper_gvec_frsqrte_d,
3603     };
3604     if (a->esz == 0) {
3605         return false;
3606     }
3607     if (sve_access_check(s)) {
3608         do_zz_fp(s, a, fns[a->esz - 1]);
3609     }
3610     return true;
3611 }
3612
3613 /*
3614  *** SVE Floating Point Compare with Zero Group
3615  */
3616
3617 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3618                       gen_helper_gvec_3_ptr *fn)
3619 {
3620     unsigned vsz = vec_full_reg_size(s);
3621     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3622
3623     tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3624                        vec_full_reg_offset(s, a->rn),
3625                        pred_full_reg_offset(s, a->pg),
3626                        status, vsz, vsz, 0, fn);
3627     tcg_temp_free_ptr(status);
3628 }
3629
3630 #define DO_PPZ(NAME, name) \
3631 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)         \
3632 {                                                                 \
3633     static gen_helper_gvec_3_ptr * const fns[3] = {               \
3634         gen_helper_sve_##name##_h,                                \
3635         gen_helper_sve_##name##_s,                                \
3636         gen_helper_sve_##name##_d,                                \
3637     };                                                            \
3638     if (a->esz == 0) {                                            \
3639         return false;                                             \
3640     }                                                             \
3641     if (sve_access_check(s)) {                                    \
3642         do_ppz_fp(s, a, fns[a->esz - 1]);                         \
3643     }                                                             \
3644     return true;                                                  \
3645 }
3646
3647 DO_PPZ(FCMGE_ppz0, fcmge0)
3648 DO_PPZ(FCMGT_ppz0, fcmgt0)
3649 DO_PPZ(FCMLE_ppz0, fcmle0)
3650 DO_PPZ(FCMLT_ppz0, fcmlt0)
3651 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3652 DO_PPZ(FCMNE_ppz0, fcmne0)
3653
3654 #undef DO_PPZ
3655
3656 /*
3657  *** SVE floating-point trig multiply-add coefficient
3658  */
3659
3660 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
3661 {
3662     static gen_helper_gvec_3_ptr * const fns[3] = {
3663         gen_helper_sve_ftmad_h,
3664         gen_helper_sve_ftmad_s,
3665         gen_helper_sve_ftmad_d,
3666     };
3667
3668     if (a->esz == 0) {
3669         return false;
3670     }
3671     if (sve_access_check(s)) {
3672         unsigned vsz = vec_full_reg_size(s);
3673         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3674         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3675                            vec_full_reg_offset(s, a->rn),
3676                            vec_full_reg_offset(s, a->rm),
3677                            status, vsz, vsz, a->imm, fns[a->esz - 1]);
3678         tcg_temp_free_ptr(status);
3679     }
3680     return true;
3681 }
3682
3683 /*
3684  *** SVE Floating Point Accumulating Reduction Group
3685  */
3686
3687 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3688 {
3689     typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3690                           TCGv_ptr, TCGv_ptr, TCGv_i32);
3691     static fadda_fn * const fns[3] = {
3692         gen_helper_sve_fadda_h,
3693         gen_helper_sve_fadda_s,
3694         gen_helper_sve_fadda_d,
3695     };
3696     unsigned vsz = vec_full_reg_size(s);
3697     TCGv_ptr t_rm, t_pg, t_fpst;
3698     TCGv_i64 t_val;
3699     TCGv_i32 t_desc;
3700
3701     if (a->esz == 0) {
3702         return false;
3703     }
3704     if (!sve_access_check(s)) {
3705         return true;
3706     }
3707
3708     t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3709     t_rm = tcg_temp_new_ptr();
3710     t_pg = tcg_temp_new_ptr();
3711     tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3712     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3713     t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3714     t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3715
3716     fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3717
3718     tcg_temp_free_i32(t_desc);
3719     tcg_temp_free_ptr(t_fpst);
3720     tcg_temp_free_ptr(t_pg);
3721     tcg_temp_free_ptr(t_rm);
3722
3723     write_fp_dreg(s, a->rd, t_val);
3724     tcg_temp_free_i64(t_val);
3725     return true;
3726 }
3727
3728 /*
3729  *** SVE Floating Point Arithmetic - Unpredicated Group
3730  */
3731
3732 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3733                       gen_helper_gvec_3_ptr *fn)
3734 {
3735     if (fn == NULL) {
3736         return false;
3737     }
3738     if (sve_access_check(s)) {
3739         unsigned vsz = vec_full_reg_size(s);
3740         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3741         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3742                            vec_full_reg_offset(s, a->rn),
3743                            vec_full_reg_offset(s, a->rm),
3744                            status, vsz, vsz, 0, fn);
3745         tcg_temp_free_ptr(status);
3746     }
3747     return true;
3748 }
3749
3750
3751 #define DO_FP3(NAME, name) \
3752 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)           \
3753 {                                                                   \
3754     static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3755         NULL, gen_helper_gvec_##name##_h,                           \
3756         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3757     };                                                              \
3758     return do_zzz_fp(s, a, fns[a->esz]);                            \
3759 }
3760
3761 DO_FP3(FADD_zzz, fadd)
3762 DO_FP3(FSUB_zzz, fsub)
3763 DO_FP3(FMUL_zzz, fmul)
3764 DO_FP3(FTSMUL, ftsmul)
3765 DO_FP3(FRECPS, recps)
3766 DO_FP3(FRSQRTS, rsqrts)
3767
3768 #undef DO_FP3
3769
3770 /*
3771  *** SVE Floating Point Arithmetic - Predicated Group
3772  */
3773
3774 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3775                        gen_helper_gvec_4_ptr *fn)
3776 {
3777     if (fn == NULL) {
3778         return false;
3779     }
3780     if (sve_access_check(s)) {
3781         unsigned vsz = vec_full_reg_size(s);
3782         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3783         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3784                            vec_full_reg_offset(s, a->rn),
3785                            vec_full_reg_offset(s, a->rm),
3786                            pred_full_reg_offset(s, a->pg),
3787                            status, vsz, vsz, 0, fn);
3788         tcg_temp_free_ptr(status);
3789     }
3790     return true;
3791 }
3792
3793 #define DO_FP3(NAME, name) \
3794 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)          \
3795 {                                                                   \
3796     static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3797         NULL, gen_helper_sve_##name##_h,                            \
3798         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3799     };                                                              \
3800     return do_zpzz_fp(s, a, fns[a->esz]);                           \
3801 }
3802
3803 DO_FP3(FADD_zpzz, fadd)
3804 DO_FP3(FSUB_zpzz, fsub)
3805 DO_FP3(FMUL_zpzz, fmul)
3806 DO_FP3(FMIN_zpzz, fmin)
3807 DO_FP3(FMAX_zpzz, fmax)
3808 DO_FP3(FMINNM_zpzz, fminnum)
3809 DO_FP3(FMAXNM_zpzz, fmaxnum)
3810 DO_FP3(FABD, fabd)
3811 DO_FP3(FSCALE, fscalbn)
3812 DO_FP3(FDIV, fdiv)
3813 DO_FP3(FMULX, fmulx)
3814
3815 #undef DO_FP3
3816
3817 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3818                                       TCGv_i64, TCGv_ptr, TCGv_i32);
3819
3820 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3821                          TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3822 {
3823     unsigned vsz = vec_full_reg_size(s);
3824     TCGv_ptr t_zd, t_zn, t_pg, status;
3825     TCGv_i32 desc;
3826
3827     t_zd = tcg_temp_new_ptr();
3828     t_zn = tcg_temp_new_ptr();
3829     t_pg = tcg_temp_new_ptr();
3830     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3831     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3832     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3833
3834     status = get_fpstatus_ptr(is_fp16);
3835     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3836     fn(t_zd, t_zn, t_pg, scalar, status, desc);
3837
3838     tcg_temp_free_i32(desc);
3839     tcg_temp_free_ptr(status);
3840     tcg_temp_free_ptr(t_pg);
3841     tcg_temp_free_ptr(t_zn);
3842     tcg_temp_free_ptr(t_zd);
3843 }
3844
3845 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3846                       gen_helper_sve_fp2scalar *fn)
3847 {
3848     TCGv_i64 temp = tcg_const_i64(imm);
3849     do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3850     tcg_temp_free_i64(temp);
3851 }
3852
3853 #define DO_FP_IMM(NAME, name, const0, const1) \
3854 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a)         \
3855 {                                                                         \
3856     static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3857         gen_helper_sve_##name##_h,                                        \
3858         gen_helper_sve_##name##_s,                                        \
3859         gen_helper_sve_##name##_d                                         \
3860     };                                                                    \
3861     static uint64_t const val[3][2] = {                                   \
3862         { float16_##const0, float16_##const1 },                           \
3863         { float32_##const0, float32_##const1 },                           \
3864         { float64_##const0, float64_##const1 },                           \
3865     };                                                                    \
3866     if (a->esz == 0) {                                                    \
3867         return false;                                                     \
3868     }                                                                     \
3869     if (sve_access_check(s)) {                                            \
3870         do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3871     }                                                                     \
3872     return true;                                                          \
3873 }
3874
3875 #define float16_two  make_float16(0x4000)
3876 #define float32_two  make_float32(0x40000000)
3877 #define float64_two  make_float64(0x4000000000000000ULL)
3878
3879 DO_FP_IMM(FADD, fadds, half, one)
3880 DO_FP_IMM(FSUB, fsubs, half, one)
3881 DO_FP_IMM(FMUL, fmuls, half, two)
3882 DO_FP_IMM(FSUBR, fsubrs, half, one)
3883 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3884 DO_FP_IMM(FMINNM, fminnms, zero, one)
3885 DO_FP_IMM(FMAX, fmaxs, zero, one)
3886 DO_FP_IMM(FMIN, fmins, zero, one)
3887
3888 #undef DO_FP_IMM
3889
3890 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3891                       gen_helper_gvec_4_ptr *fn)
3892 {
3893     if (fn == NULL) {
3894         return false;
3895     }
3896     if (sve_access_check(s)) {
3897         unsigned vsz = vec_full_reg_size(s);
3898         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3899         tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3900                            vec_full_reg_offset(s, a->rn),
3901                            vec_full_reg_offset(s, a->rm),
3902                            pred_full_reg_offset(s, a->pg),
3903                            status, vsz, vsz, 0, fn);
3904         tcg_temp_free_ptr(status);
3905     }
3906     return true;
3907 }
3908
3909 #define DO_FPCMP(NAME, name) \
3910 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)     \
3911 {                                                                     \
3912     static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3913         NULL, gen_helper_sve_##name##_h,                              \
3914         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3915     };                                                                \
3916     return do_fp_cmp(s, a, fns[a->esz]);                              \
3917 }
3918
3919 DO_FPCMP(FCMGE, fcmge)
3920 DO_FPCMP(FCMGT, fcmgt)
3921 DO_FPCMP(FCMEQ, fcmeq)
3922 DO_FPCMP(FCMNE, fcmne)
3923 DO_FPCMP(FCMUO, fcmuo)
3924 DO_FPCMP(FACGE, facge)
3925 DO_FPCMP(FACGT, facgt)
3926
3927 #undef DO_FPCMP
3928
3929 static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
3930 {
3931     static gen_helper_gvec_4_ptr * const fns[3] = {
3932         gen_helper_sve_fcadd_h,
3933         gen_helper_sve_fcadd_s,
3934         gen_helper_sve_fcadd_d
3935     };
3936
3937     if (a->esz == 0) {
3938         return false;
3939     }
3940     if (sve_access_check(s)) {
3941         unsigned vsz = vec_full_reg_size(s);
3942         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3943         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3944                            vec_full_reg_offset(s, a->rn),
3945                            vec_full_reg_offset(s, a->rm),
3946                            pred_full_reg_offset(s, a->pg),
3947                            status, vsz, vsz, a->rot, fns[a->esz - 1]);
3948         tcg_temp_free_ptr(status);
3949     }
3950     return true;
3951 }
3952
3953 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
3954                     gen_helper_gvec_5_ptr *fn)
3955 {
3956     if (a->esz == 0) {
3957         return false;
3958     }
3959     if (sve_access_check(s)) {
3960         unsigned vsz = vec_full_reg_size(s);
3961         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3962         tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3963                            vec_full_reg_offset(s, a->rn),
3964                            vec_full_reg_offset(s, a->rm),
3965                            vec_full_reg_offset(s, a->ra),
3966                            pred_full_reg_offset(s, a->pg),
3967                            status, vsz, vsz, 0, fn);
3968         tcg_temp_free_ptr(status);
3969     }
3970     return true;
3971 }
3972
3973 #define DO_FMLA(NAME, name) \
3974 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
3975 {                                                                    \
3976     static gen_helper_gvec_5_ptr * const fns[4] = {                  \
3977         NULL, gen_helper_sve_##name##_h,                             \
3978         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3979     };                                                               \
3980     return do_fmla(s, a, fns[a->esz]);                               \
3981 }
3982
3983 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3984 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3985 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3986 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3987
3988 #undef DO_FMLA
3989
3990 static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
3991 {
3992     static gen_helper_gvec_5_ptr * const fns[4] = {
3993         NULL,
3994         gen_helper_sve_fcmla_zpzzz_h,
3995         gen_helper_sve_fcmla_zpzzz_s,
3996         gen_helper_sve_fcmla_zpzzz_d,
3997     };
3998
3999     if (a->esz == 0) {
4000         return false;
4001     }
4002     if (sve_access_check(s)) {
4003         unsigned vsz = vec_full_reg_size(s);
4004         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4005         tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4006                            vec_full_reg_offset(s, a->rn),
4007                            vec_full_reg_offset(s, a->rm),
4008                            vec_full_reg_offset(s, a->ra),
4009                            pred_full_reg_offset(s, a->pg),
4010                            status, vsz, vsz, a->rot, fns[a->esz]);
4011         tcg_temp_free_ptr(status);
4012     }
4013     return true;
4014 }
4015
4016 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
4017 {
4018     static gen_helper_gvec_3_ptr * const fns[2] = {
4019         gen_helper_gvec_fcmlah_idx,
4020         gen_helper_gvec_fcmlas_idx,
4021     };
4022
4023     tcg_debug_assert(a->esz == 1 || a->esz == 2);
4024     tcg_debug_assert(a->rd == a->ra);
4025     if (sve_access_check(s)) {
4026         unsigned vsz = vec_full_reg_size(s);
4027         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4028         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4029                            vec_full_reg_offset(s, a->rn),
4030                            vec_full_reg_offset(s, a->rm),
4031                            status, vsz, vsz,
4032                            a->index * 4 + a->rot,
4033                            fns[a->esz - 1]);
4034         tcg_temp_free_ptr(status);
4035     }
4036     return true;
4037 }
4038
4039 /*
4040  *** SVE Floating Point Unary Operations Predicated Group
4041  */
4042
4043 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4044                        bool is_fp16, gen_helper_gvec_3_ptr *fn)
4045 {
4046     if (sve_access_check(s)) {
4047         unsigned vsz = vec_full_reg_size(s);
4048         TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4049         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4050                            vec_full_reg_offset(s, rn),
4051                            pred_full_reg_offset(s, pg),
4052                            status, vsz, vsz, 0, fn);
4053         tcg_temp_free_ptr(status);
4054     }
4055     return true;
4056 }
4057
4058 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
4059 {
4060     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4061 }
4062
4063 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
4064 {
4065     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4066 }
4067
4068 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
4069 {
4070     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4071 }
4072
4073 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
4074 {
4075     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4076 }
4077
4078 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
4079 {
4080     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4081 }
4082
4083 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4084 {
4085     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4086 }
4087
4088 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4089 {
4090     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4091 }
4092
4093 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4094 {
4095     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4096 }
4097
4098 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4099 {
4100     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4101 }
4102
4103 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4104 {
4105     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4106 }
4107
4108 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4109 {
4110     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4111 }
4112
4113 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4114 {
4115     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4116 }
4117
4118 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4119 {
4120     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4121 }
4122
4123 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4124 {
4125     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4126 }
4127
4128 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4129 {
4130     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4131 }
4132
4133 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4134 {
4135     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4136 }
4137
4138 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4139 {
4140     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4141 }
4142
4143 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4144 {
4145     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4146 }
4147
4148 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4149 {
4150     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4151 }
4152
4153 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4154 {
4155     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4156 }
4157
4158 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4159     gen_helper_sve_frint_h,
4160     gen_helper_sve_frint_s,
4161     gen_helper_sve_frint_d
4162 };
4163
4164 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4165 {
4166     if (a->esz == 0) {
4167         return false;
4168     }
4169     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4170                       frint_fns[a->esz - 1]);
4171 }
4172
4173 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4174 {
4175     static gen_helper_gvec_3_ptr * const fns[3] = {
4176         gen_helper_sve_frintx_h,
4177         gen_helper_sve_frintx_s,
4178         gen_helper_sve_frintx_d
4179     };
4180     if (a->esz == 0) {
4181         return false;
4182     }
4183     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4184 }
4185
4186 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4187 {
4188     if (a->esz == 0) {
4189         return false;
4190     }
4191     if (sve_access_check(s)) {
4192         unsigned vsz = vec_full_reg_size(s);
4193         TCGv_i32 tmode = tcg_const_i32(mode);
4194         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4195
4196         gen_helper_set_rmode(tmode, tmode, status);
4197
4198         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4199                            vec_full_reg_offset(s, a->rn),
4200                            pred_full_reg_offset(s, a->pg),
4201                            status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4202
4203         gen_helper_set_rmode(tmode, tmode, status);
4204         tcg_temp_free_i32(tmode);
4205         tcg_temp_free_ptr(status);
4206     }
4207     return true;
4208 }
4209
4210 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4211 {
4212     return do_frint_mode(s, a, float_round_nearest_even);
4213 }
4214
4215 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4216 {
4217     return do_frint_mode(s, a, float_round_up);
4218 }
4219
4220 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4221 {
4222     return do_frint_mode(s, a, float_round_down);
4223 }
4224
4225 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4226 {
4227     return do_frint_mode(s, a, float_round_to_zero);
4228 }
4229
4230 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4231 {
4232     return do_frint_mode(s, a, float_round_ties_away);
4233 }
4234
4235 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4236 {
4237     static gen_helper_gvec_3_ptr * const fns[3] = {
4238         gen_helper_sve_frecpx_h,
4239         gen_helper_sve_frecpx_s,
4240         gen_helper_sve_frecpx_d
4241     };
4242     if (a->esz == 0) {
4243         return false;
4244     }
4245     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4246 }
4247
4248 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4249 {
4250     static gen_helper_gvec_3_ptr * const fns[3] = {
4251         gen_helper_sve_fsqrt_h,
4252         gen_helper_sve_fsqrt_s,
4253         gen_helper_sve_fsqrt_d
4254     };
4255     if (a->esz == 0) {
4256         return false;
4257     }
4258     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4259 }
4260
4261 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4262 {
4263     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4264 }
4265
4266 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4267 {
4268     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4269 }
4270
4271 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4272 {
4273     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4274 }
4275
4276 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4277 {
4278     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4279 }
4280
4281 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4282 {
4283     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4284 }
4285
4286 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4287 {
4288     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4289 }
4290
4291 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4292 {
4293     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4294 }
4295
4296 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4297 {
4298     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4299 }
4300
4301 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4302 {
4303     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4304 }
4305
4306 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4307 {
4308     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4309 }
4310
4311 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4312 {
4313     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4314 }
4315
4316 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4317 {
4318     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4319 }
4320
4321 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4322 {
4323     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4324 }
4325
4326 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4327 {
4328     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4329 }
4330
4331 /*
4332  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4333  */
4334
4335 /* Subroutine loading a vector register at VOFS of LEN bytes.
4336  * The load should begin at the address Rn + IMM.
4337  */
4338
4339 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4340 {
4341     int len_align = QEMU_ALIGN_DOWN(len, 8);
4342     int len_remain = len % 8;
4343     int nparts = len / 8 + ctpop8(len_remain);
4344     int midx = get_mem_index(s);
4345     TCGv_i64 dirty_addr, clean_addr, t0, t1;
4346
4347     dirty_addr = tcg_temp_new_i64();
4348     tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4349     clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4350     tcg_temp_free_i64(dirty_addr);
4351
4352     /*
4353      * Note that unpredicated load/store of vector/predicate registers
4354      * are defined as a stream of bytes, which equates to little-endian
4355      * operations on larger quantities.
4356      * Attempt to keep code expansion to a minimum by limiting the
4357      * amount of unrolling done.
4358      */
4359     if (nparts <= 4) {
4360         int i;
4361
4362         t0 = tcg_temp_new_i64();
4363         for (i = 0; i < len_align; i += 8) {
4364             tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
4365             tcg_gen_st_i64(t0, cpu_env, vofs + i);
4366             tcg_gen_addi_i64(clean_addr, cpu_reg_sp(s, rn), 8);
4367         }
4368         tcg_temp_free_i64(t0);
4369     } else {
4370         TCGLabel *loop = gen_new_label();
4371         TCGv_ptr tp, i = tcg_const_local_ptr(0);
4372
4373         /* Copy the clean address into a local temp, live across the loop. */
4374         t0 = clean_addr;
4375         clean_addr = tcg_temp_local_new_i64();
4376         tcg_gen_mov_i64(clean_addr, t0);
4377         tcg_temp_free_i64(t0);
4378
4379         gen_set_label(loop);
4380
4381         t0 = tcg_temp_new_i64();
4382         tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
4383         tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4384
4385         tp = tcg_temp_new_ptr();
4386         tcg_gen_add_ptr(tp, cpu_env, i);
4387         tcg_gen_addi_ptr(i, i, 8);
4388         tcg_gen_st_i64(t0, tp, vofs);
4389         tcg_temp_free_ptr(tp);
4390         tcg_temp_free_i64(t0);
4391
4392         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4393         tcg_temp_free_ptr(i);
4394     }
4395
4396     /*
4397      * Predicate register loads can be any multiple of 2.
4398      * Note that we still store the entire 64-bit unit into cpu_env.
4399      */
4400     if (len_remain) {
4401         t0 = tcg_temp_new_i64();
4402         switch (len_remain) {
4403         case 2:
4404         case 4:
4405         case 8:
4406             tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4407                                 MO_LE | ctz32(len_remain));
4408             break;
4409
4410         case 6:
4411             t1 = tcg_temp_new_i64();
4412             tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4413             tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4414             tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
4415             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4416             tcg_temp_free_i64(t1);
4417             break;
4418
4419         default:
4420             g_assert_not_reached();
4421         }
4422         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4423         tcg_temp_free_i64(t0);
4424     }
4425     tcg_temp_free_i64(clean_addr);
4426 }
4427
4428 /* Similarly for stores.  */
4429 static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4430 {
4431     int len_align = QEMU_ALIGN_DOWN(len, 8);
4432     int len_remain = len % 8;
4433     int nparts = len / 8 + ctpop8(len_remain);
4434     int midx = get_mem_index(s);
4435     TCGv_i64 dirty_addr, clean_addr, t0;
4436
4437     dirty_addr = tcg_temp_new_i64();
4438     tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4439     clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4440     tcg_temp_free_i64(dirty_addr);
4441
4442     /* Note that unpredicated load/store of vector/predicate registers
4443      * are defined as a stream of bytes, which equates to little-endian
4444      * operations on larger quantities.  There is no nice way to force
4445      * a little-endian store for aarch64_be-linux-user out of line.
4446      *
4447      * Attempt to keep code expansion to a minimum by limiting the
4448      * amount of unrolling done.
4449      */
4450     if (nparts <= 4) {
4451         int i;
4452
4453         t0 = tcg_temp_new_i64();
4454         for (i = 0; i < len_align; i += 8) {
4455             tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4456             tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4457             tcg_gen_addi_i64(clean_addr, cpu_reg_sp(s, rn), 8);
4458         }
4459         tcg_temp_free_i64(t0);
4460     } else {
4461         TCGLabel *loop = gen_new_label();
4462         TCGv_ptr tp, i = tcg_const_local_ptr(0);
4463
4464         /* Copy the clean address into a local temp, live across the loop. */
4465         t0 = clean_addr;
4466         clean_addr = tcg_temp_local_new_i64();
4467         tcg_gen_mov_i64(clean_addr, t0);
4468         tcg_temp_free_i64(t0);
4469
4470         gen_set_label(loop);
4471
4472         t0 = tcg_temp_new_i64();
4473         tp = tcg_temp_new_ptr();
4474         tcg_gen_add_ptr(tp, cpu_env, i);
4475         tcg_gen_ld_i64(t0, tp, vofs);
4476         tcg_gen_addi_ptr(i, i, 8);
4477         tcg_temp_free_ptr(tp);
4478
4479         tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4480         tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4481         tcg_temp_free_i64(t0);
4482
4483         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4484         tcg_temp_free_ptr(i);
4485     }
4486
4487     /* Predicate register stores can be any multiple of 2.  */
4488     if (len_remain) {
4489         t0 = tcg_temp_new_i64();
4490         tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4491
4492         switch (len_remain) {
4493         case 2:
4494         case 4:
4495         case 8:
4496             tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4497                                 MO_LE | ctz32(len_remain));
4498             break;
4499
4500         case 6:
4501             tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4502             tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4503             tcg_gen_shri_i64(t0, t0, 32);
4504             tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
4505             break;
4506
4507         default:
4508             g_assert_not_reached();
4509         }
4510         tcg_temp_free_i64(t0);
4511     }
4512     tcg_temp_free_i64(clean_addr);
4513 }
4514
4515 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4516 {
4517     if (sve_access_check(s)) {
4518         int size = vec_full_reg_size(s);
4519         int off = vec_full_reg_offset(s, a->rd);
4520         do_ldr(s, off, size, a->rn, a->imm * size);
4521     }
4522     return true;
4523 }
4524
4525 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4526 {
4527     if (sve_access_check(s)) {
4528         int size = pred_full_reg_size(s);
4529         int off = pred_full_reg_offset(s, a->rd);
4530         do_ldr(s, off, size, a->rn, a->imm * size);
4531     }
4532     return true;
4533 }
4534
4535 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4536 {
4537     if (sve_access_check(s)) {
4538         int size = vec_full_reg_size(s);
4539         int off = vec_full_reg_offset(s, a->rd);
4540         do_str(s, off, size, a->rn, a->imm * size);
4541     }
4542     return true;
4543 }
4544
4545 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4546 {
4547     if (sve_access_check(s)) {
4548         int size = pred_full_reg_size(s);
4549         int off = pred_full_reg_offset(s, a->rd);
4550         do_str(s, off, size, a->rn, a->imm * size);
4551     }
4552     return true;
4553 }
4554
4555 /*
4556  *** SVE Memory - Contiguous Load Group
4557  */
4558
4559 /* The memory mode of the dtype.  */
4560 static const MemOp dtype_mop[16] = {
4561     MO_UB, MO_UB, MO_UB, MO_UB,
4562     MO_SL, MO_UW, MO_UW, MO_UW,
4563     MO_SW, MO_SW, MO_UL, MO_UL,
4564     MO_SB, MO_SB, MO_SB, MO_Q
4565 };
4566
4567 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4568
4569 /* The vector element size of dtype.  */
4570 static const uint8_t dtype_esz[16] = {
4571     0, 1, 2, 3,
4572     3, 1, 2, 3,
4573     3, 2, 2, 3,
4574     3, 2, 1, 3
4575 };
4576
4577 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4578                        int dtype, gen_helper_gvec_mem *fn)
4579 {
4580     unsigned vsz = vec_full_reg_size(s);
4581     TCGv_ptr t_pg;
4582     TCGv_i32 t_desc;
4583     int desc;
4584
4585     /* For e.g. LD4, there are not enough arguments to pass all 4
4586      * registers as pointers, so encode the regno into the data field.
4587      * For consistency, do this even for LD1.
4588      */
4589     desc = simd_desc(vsz, vsz, zt);
4590     t_desc = tcg_const_i32(desc);
4591     t_pg = tcg_temp_new_ptr();
4592
4593     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4594     fn(cpu_env, t_pg, addr, t_desc);
4595
4596     tcg_temp_free_ptr(t_pg);
4597     tcg_temp_free_i32(t_desc);
4598 }
4599
4600 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4601                       TCGv_i64 addr, int dtype, int nreg)
4602 {
4603     static gen_helper_gvec_mem * const fns[2][16][4] = {
4604         /* Little-endian */
4605         { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4606             gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4607           { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4608           { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4609           { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4610
4611           { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4612           { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4613             gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4614           { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4615           { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4616
4617           { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4618           { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4619           { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4620             gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4621           { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4622
4623           { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4624           { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4625           { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4626           { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4627             gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4628
4629         /* Big-endian */
4630         { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4631             gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4632           { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4633           { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4634           { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4635
4636           { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4637           { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4638             gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4639           { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4640           { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4641
4642           { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4643           { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4644           { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4645             gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4646           { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4647
4648           { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4649           { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4650           { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4651           { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4652             gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
4653     };
4654     gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
4655
4656     /* While there are holes in the table, they are not
4657      * accessible via the instruction encoding.
4658      */
4659     assert(fn != NULL);
4660     do_mem_zpa(s, zt, pg, addr, dtype, fn);
4661 }
4662
4663 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4664 {
4665     if (a->rm == 31) {
4666         return false;
4667     }
4668     if (sve_access_check(s)) {
4669         TCGv_i64 addr = new_tmp_a64(s);
4670         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4671         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4672         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4673     }
4674     return true;
4675 }
4676
4677 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4678 {
4679     if (sve_access_check(s)) {
4680         int vsz = vec_full_reg_size(s);
4681         int elements = vsz >> dtype_esz[a->dtype];
4682         TCGv_i64 addr = new_tmp_a64(s);
4683
4684         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4685                          (a->imm * elements * (a->nreg + 1))
4686                          << dtype_msz(a->dtype));
4687         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4688     }
4689     return true;
4690 }
4691
4692 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4693 {
4694     static gen_helper_gvec_mem * const fns[2][16] = {
4695         /* Little-endian */
4696         { gen_helper_sve_ldff1bb_r,
4697           gen_helper_sve_ldff1bhu_r,
4698           gen_helper_sve_ldff1bsu_r,
4699           gen_helper_sve_ldff1bdu_r,
4700
4701           gen_helper_sve_ldff1sds_le_r,
4702           gen_helper_sve_ldff1hh_le_r,
4703           gen_helper_sve_ldff1hsu_le_r,
4704           gen_helper_sve_ldff1hdu_le_r,
4705
4706           gen_helper_sve_ldff1hds_le_r,
4707           gen_helper_sve_ldff1hss_le_r,
4708           gen_helper_sve_ldff1ss_le_r,
4709           gen_helper_sve_ldff1sdu_le_r,
4710
4711           gen_helper_sve_ldff1bds_r,
4712           gen_helper_sve_ldff1bss_r,
4713           gen_helper_sve_ldff1bhs_r,
4714           gen_helper_sve_ldff1dd_le_r },
4715
4716         /* Big-endian */
4717         { gen_helper_sve_ldff1bb_r,
4718           gen_helper_sve_ldff1bhu_r,
4719           gen_helper_sve_ldff1bsu_r,
4720           gen_helper_sve_ldff1bdu_r,
4721
4722           gen_helper_sve_ldff1sds_be_r,
4723           gen_helper_sve_ldff1hh_be_r,
4724           gen_helper_sve_ldff1hsu_be_r,
4725           gen_helper_sve_ldff1hdu_be_r,
4726
4727           gen_helper_sve_ldff1hds_be_r,
4728           gen_helper_sve_ldff1hss_be_r,
4729           gen_helper_sve_ldff1ss_be_r,
4730           gen_helper_sve_ldff1sdu_be_r,
4731
4732           gen_helper_sve_ldff1bds_r,
4733           gen_helper_sve_ldff1bss_r,
4734           gen_helper_sve_ldff1bhs_r,
4735           gen_helper_sve_ldff1dd_be_r },
4736     };
4737
4738     if (sve_access_check(s)) {
4739         TCGv_i64 addr = new_tmp_a64(s);
4740         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4741         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4742         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4743                    fns[s->be_data == MO_BE][a->dtype]);
4744     }
4745     return true;
4746 }
4747
4748 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4749 {
4750     static gen_helper_gvec_mem * const fns[2][16] = {
4751         /* Little-endian */
4752         { gen_helper_sve_ldnf1bb_r,
4753           gen_helper_sve_ldnf1bhu_r,
4754           gen_helper_sve_ldnf1bsu_r,
4755           gen_helper_sve_ldnf1bdu_r,
4756
4757           gen_helper_sve_ldnf1sds_le_r,
4758           gen_helper_sve_ldnf1hh_le_r,
4759           gen_helper_sve_ldnf1hsu_le_r,
4760           gen_helper_sve_ldnf1hdu_le_r,
4761
4762           gen_helper_sve_ldnf1hds_le_r,
4763           gen_helper_sve_ldnf1hss_le_r,
4764           gen_helper_sve_ldnf1ss_le_r,
4765           gen_helper_sve_ldnf1sdu_le_r,
4766
4767           gen_helper_sve_ldnf1bds_r,
4768           gen_helper_sve_ldnf1bss_r,
4769           gen_helper_sve_ldnf1bhs_r,
4770           gen_helper_sve_ldnf1dd_le_r },
4771
4772         /* Big-endian */
4773         { gen_helper_sve_ldnf1bb_r,
4774           gen_helper_sve_ldnf1bhu_r,
4775           gen_helper_sve_ldnf1bsu_r,
4776           gen_helper_sve_ldnf1bdu_r,
4777
4778           gen_helper_sve_ldnf1sds_be_r,
4779           gen_helper_sve_ldnf1hh_be_r,
4780           gen_helper_sve_ldnf1hsu_be_r,
4781           gen_helper_sve_ldnf1hdu_be_r,
4782
4783           gen_helper_sve_ldnf1hds_be_r,
4784           gen_helper_sve_ldnf1hss_be_r,
4785           gen_helper_sve_ldnf1ss_be_r,
4786           gen_helper_sve_ldnf1sdu_be_r,
4787
4788           gen_helper_sve_ldnf1bds_r,
4789           gen_helper_sve_ldnf1bss_r,
4790           gen_helper_sve_ldnf1bhs_r,
4791           gen_helper_sve_ldnf1dd_be_r },
4792     };
4793
4794     if (sve_access_check(s)) {
4795         int vsz = vec_full_reg_size(s);
4796         int elements = vsz >> dtype_esz[a->dtype];
4797         int off = (a->imm * elements) << dtype_msz(a->dtype);
4798         TCGv_i64 addr = new_tmp_a64(s);
4799
4800         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4801         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4802                    fns[s->be_data == MO_BE][a->dtype]);
4803     }
4804     return true;
4805 }
4806
4807 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4808 {
4809     static gen_helper_gvec_mem * const fns[2][4] = {
4810         { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_le_r,
4811           gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4812         { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_be_r,
4813           gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
4814     };
4815     unsigned vsz = vec_full_reg_size(s);
4816     TCGv_ptr t_pg;
4817     TCGv_i32 t_desc;
4818     int desc, poff;
4819
4820     /* Load the first quadword using the normal predicated load helpers.  */
4821     desc = simd_desc(16, 16, zt);
4822     t_desc = tcg_const_i32(desc);
4823
4824     poff = pred_full_reg_offset(s, pg);
4825     if (vsz > 16) {
4826         /*
4827          * Zero-extend the first 16 bits of the predicate into a temporary.
4828          * This avoids triggering an assert making sure we don't have bits
4829          * set within a predicate beyond VQ, but we have lowered VQ to 1
4830          * for this load operation.
4831          */
4832         TCGv_i64 tmp = tcg_temp_new_i64();
4833 #ifdef HOST_WORDS_BIGENDIAN
4834         poff += 6;
4835 #endif
4836         tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4837
4838         poff = offsetof(CPUARMState, vfp.preg_tmp);
4839         tcg_gen_st_i64(tmp, cpu_env, poff);
4840         tcg_temp_free_i64(tmp);
4841     }
4842
4843     t_pg = tcg_temp_new_ptr();
4844     tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4845
4846     fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
4847
4848     tcg_temp_free_ptr(t_pg);
4849     tcg_temp_free_i32(t_desc);
4850
4851     /* Replicate that first quadword.  */
4852     if (vsz > 16) {
4853         unsigned dofs = vec_full_reg_offset(s, zt);
4854         tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4855     }
4856 }
4857
4858 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4859 {
4860     if (a->rm == 31) {
4861         return false;
4862     }
4863     if (sve_access_check(s)) {
4864         int msz = dtype_msz(a->dtype);
4865         TCGv_i64 addr = new_tmp_a64(s);
4866         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4867         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4868         do_ldrq(s, a->rd, a->pg, addr, msz);
4869     }
4870     return true;
4871 }
4872
4873 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4874 {
4875     if (sve_access_check(s)) {
4876         TCGv_i64 addr = new_tmp_a64(s);
4877         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4878         do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4879     }
4880     return true;
4881 }
4882
4883 /* Load and broadcast element.  */
4884 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4885 {
4886     unsigned vsz = vec_full_reg_size(s);
4887     unsigned psz = pred_full_reg_size(s);
4888     unsigned esz = dtype_esz[a->dtype];
4889     unsigned msz = dtype_msz(a->dtype);
4890     TCGLabel *over;
4891     TCGv_i64 temp, clean_addr;
4892
4893     if (!sve_access_check(s)) {
4894         return true;
4895     }
4896
4897     over = gen_new_label();
4898
4899     /* If the guarding predicate has no bits set, no load occurs.  */
4900     if (psz <= 8) {
4901         /* Reduce the pred_esz_masks value simply to reduce the
4902          * size of the code generated here.
4903          */
4904         uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4905         temp = tcg_temp_new_i64();
4906         tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4907         tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4908         tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4909         tcg_temp_free_i64(temp);
4910     } else {
4911         TCGv_i32 t32 = tcg_temp_new_i32();
4912         find_last_active(s, t32, esz, a->pg);
4913         tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4914         tcg_temp_free_i32(t32);
4915     }
4916
4917     /* Load the data.  */
4918     temp = tcg_temp_new_i64();
4919     tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4920     clean_addr = gen_mte_check1(s, temp, false, true, msz);
4921
4922     tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
4923                         s->be_data | dtype_mop[a->dtype]);
4924
4925     /* Broadcast to *all* elements.  */
4926     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4927                          vsz, vsz, temp);
4928     tcg_temp_free_i64(temp);
4929
4930     /* Zero the inactive elements.  */
4931     gen_set_label(over);
4932     do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4933     return true;
4934 }
4935
4936 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4937                       int msz, int esz, int nreg)
4938 {
4939     static gen_helper_gvec_mem * const fn_single[2][4][4] = {
4940         { { gen_helper_sve_st1bb_r,
4941             gen_helper_sve_st1bh_r,
4942             gen_helper_sve_st1bs_r,
4943             gen_helper_sve_st1bd_r },
4944           { NULL,
4945             gen_helper_sve_st1hh_le_r,
4946             gen_helper_sve_st1hs_le_r,
4947             gen_helper_sve_st1hd_le_r },
4948           { NULL, NULL,
4949             gen_helper_sve_st1ss_le_r,
4950             gen_helper_sve_st1sd_le_r },
4951           { NULL, NULL, NULL,
4952             gen_helper_sve_st1dd_le_r } },
4953         { { gen_helper_sve_st1bb_r,
4954             gen_helper_sve_st1bh_r,
4955             gen_helper_sve_st1bs_r,
4956             gen_helper_sve_st1bd_r },
4957           { NULL,
4958             gen_helper_sve_st1hh_be_r,
4959             gen_helper_sve_st1hs_be_r,
4960             gen_helper_sve_st1hd_be_r },
4961           { NULL, NULL,
4962             gen_helper_sve_st1ss_be_r,
4963             gen_helper_sve_st1sd_be_r },
4964           { NULL, NULL, NULL,
4965             gen_helper_sve_st1dd_be_r } },
4966     };
4967     static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
4968         { { gen_helper_sve_st2bb_r,
4969             gen_helper_sve_st2hh_le_r,
4970             gen_helper_sve_st2ss_le_r,
4971             gen_helper_sve_st2dd_le_r },
4972           { gen_helper_sve_st3bb_r,
4973             gen_helper_sve_st3hh_le_r,
4974             gen_helper_sve_st3ss_le_r,
4975             gen_helper_sve_st3dd_le_r },
4976           { gen_helper_sve_st4bb_r,
4977             gen_helper_sve_st4hh_le_r,
4978             gen_helper_sve_st4ss_le_r,
4979             gen_helper_sve_st4dd_le_r } },
4980         { { gen_helper_sve_st2bb_r,
4981             gen_helper_sve_st2hh_be_r,
4982             gen_helper_sve_st2ss_be_r,
4983             gen_helper_sve_st2dd_be_r },
4984           { gen_helper_sve_st3bb_r,
4985             gen_helper_sve_st3hh_be_r,
4986             gen_helper_sve_st3ss_be_r,
4987             gen_helper_sve_st3dd_be_r },
4988           { gen_helper_sve_st4bb_r,
4989             gen_helper_sve_st4hh_be_r,
4990             gen_helper_sve_st4ss_be_r,
4991             gen_helper_sve_st4dd_be_r } },
4992     };
4993     gen_helper_gvec_mem *fn;
4994     int be = s->be_data == MO_BE;
4995
4996     if (nreg == 0) {
4997         /* ST1 */
4998         fn = fn_single[be][msz][esz];
4999     } else {
5000         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5001         assert(msz == esz);
5002         fn = fn_multiple[be][nreg - 1][msz];
5003     }
5004     assert(fn != NULL);
5005     do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), fn);
5006 }
5007
5008 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5009 {
5010     if (a->rm == 31 || a->msz > a->esz) {
5011         return false;
5012     }
5013     if (sve_access_check(s)) {
5014         TCGv_i64 addr = new_tmp_a64(s);
5015         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5016         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5017         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5018     }
5019     return true;
5020 }
5021
5022 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5023 {
5024     if (a->msz > a->esz) {
5025         return false;
5026     }
5027     if (sve_access_check(s)) {
5028         int vsz = vec_full_reg_size(s);
5029         int elements = vsz >> a->esz;
5030         TCGv_i64 addr = new_tmp_a64(s);
5031
5032         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5033                          (a->imm * elements * (a->nreg + 1)) << a->msz);
5034         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5035     }
5036     return true;
5037 }
5038
5039 /*
5040  *** SVE gather loads / scatter stores
5041  */
5042
5043 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5044                        int scale, TCGv_i64 scalar, int msz,
5045                        gen_helper_gvec_mem_scatter *fn)
5046 {
5047     unsigned vsz = vec_full_reg_size(s);
5048     TCGv_ptr t_zm = tcg_temp_new_ptr();
5049     TCGv_ptr t_pg = tcg_temp_new_ptr();
5050     TCGv_ptr t_zt = tcg_temp_new_ptr();
5051     TCGv_i32 t_desc;
5052     int desc;
5053
5054     desc = simd_desc(vsz, vsz, scale);
5055     t_desc = tcg_const_i32(desc);
5056
5057     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5058     tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5059     tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
5060     fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
5061
5062     tcg_temp_free_ptr(t_zt);
5063     tcg_temp_free_ptr(t_zm);
5064     tcg_temp_free_ptr(t_pg);
5065     tcg_temp_free_i32(t_desc);
5066 }
5067
5068 /* Indexed by [be][ff][xs][u][msz].  */
5069 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
5070     /* Little-endian */
5071     { { { { gen_helper_sve_ldbss_zsu,
5072             gen_helper_sve_ldhss_le_zsu,
5073             NULL, },
5074           { gen_helper_sve_ldbsu_zsu,
5075             gen_helper_sve_ldhsu_le_zsu,
5076             gen_helper_sve_ldss_le_zsu, } },
5077         { { gen_helper_sve_ldbss_zss,
5078             gen_helper_sve_ldhss_le_zss,
5079             NULL, },
5080           { gen_helper_sve_ldbsu_zss,
5081             gen_helper_sve_ldhsu_le_zss,
5082             gen_helper_sve_ldss_le_zss, } } },
5083
5084       /* First-fault */
5085       { { { gen_helper_sve_ldffbss_zsu,
5086             gen_helper_sve_ldffhss_le_zsu,
5087             NULL, },
5088           { gen_helper_sve_ldffbsu_zsu,
5089             gen_helper_sve_ldffhsu_le_zsu,
5090             gen_helper_sve_ldffss_le_zsu, } },
5091         { { gen_helper_sve_ldffbss_zss,
5092             gen_helper_sve_ldffhss_le_zss,
5093             NULL, },
5094           { gen_helper_sve_ldffbsu_zss,
5095             gen_helper_sve_ldffhsu_le_zss,
5096             gen_helper_sve_ldffss_le_zss, } } } },
5097
5098     /* Big-endian */
5099     { { { { gen_helper_sve_ldbss_zsu,
5100             gen_helper_sve_ldhss_be_zsu,
5101             NULL, },
5102           { gen_helper_sve_ldbsu_zsu,
5103             gen_helper_sve_ldhsu_be_zsu,
5104             gen_helper_sve_ldss_be_zsu, } },
5105         { { gen_helper_sve_ldbss_zss,
5106             gen_helper_sve_ldhss_be_zss,
5107             NULL, },
5108           { gen_helper_sve_ldbsu_zss,
5109             gen_helper_sve_ldhsu_be_zss,
5110             gen_helper_sve_ldss_be_zss, } } },
5111
5112       /* First-fault */
5113       { { { gen_helper_sve_ldffbss_zsu,
5114             gen_helper_sve_ldffhss_be_zsu,
5115             NULL, },
5116           { gen_helper_sve_ldffbsu_zsu,
5117             gen_helper_sve_ldffhsu_be_zsu,
5118             gen_helper_sve_ldffss_be_zsu, } },
5119         { { gen_helper_sve_ldffbss_zss,
5120             gen_helper_sve_ldffhss_be_zss,
5121             NULL, },
5122           { gen_helper_sve_ldffbsu_zss,
5123             gen_helper_sve_ldffhsu_be_zss,
5124             gen_helper_sve_ldffss_be_zss, } } } },
5125 };
5126
5127 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5128 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
5129     /* Little-endian */
5130     { { { { gen_helper_sve_ldbds_zsu,
5131             gen_helper_sve_ldhds_le_zsu,
5132             gen_helper_sve_ldsds_le_zsu,
5133             NULL, },
5134           { gen_helper_sve_ldbdu_zsu,
5135             gen_helper_sve_ldhdu_le_zsu,
5136             gen_helper_sve_ldsdu_le_zsu,
5137             gen_helper_sve_lddd_le_zsu, } },
5138         { { gen_helper_sve_ldbds_zss,
5139             gen_helper_sve_ldhds_le_zss,
5140             gen_helper_sve_ldsds_le_zss,
5141             NULL, },
5142           { gen_helper_sve_ldbdu_zss,
5143             gen_helper_sve_ldhdu_le_zss,
5144             gen_helper_sve_ldsdu_le_zss,
5145             gen_helper_sve_lddd_le_zss, } },
5146         { { gen_helper_sve_ldbds_zd,
5147             gen_helper_sve_ldhds_le_zd,
5148             gen_helper_sve_ldsds_le_zd,
5149             NULL, },
5150           { gen_helper_sve_ldbdu_zd,
5151             gen_helper_sve_ldhdu_le_zd,
5152             gen_helper_sve_ldsdu_le_zd,
5153             gen_helper_sve_lddd_le_zd, } } },
5154
5155       /* First-fault */
5156       { { { gen_helper_sve_ldffbds_zsu,
5157             gen_helper_sve_ldffhds_le_zsu,
5158             gen_helper_sve_ldffsds_le_zsu,
5159             NULL, },
5160           { gen_helper_sve_ldffbdu_zsu,
5161             gen_helper_sve_ldffhdu_le_zsu,
5162             gen_helper_sve_ldffsdu_le_zsu,
5163             gen_helper_sve_ldffdd_le_zsu, } },
5164         { { gen_helper_sve_ldffbds_zss,
5165             gen_helper_sve_ldffhds_le_zss,
5166             gen_helper_sve_ldffsds_le_zss,
5167             NULL, },
5168           { gen_helper_sve_ldffbdu_zss,
5169             gen_helper_sve_ldffhdu_le_zss,
5170             gen_helper_sve_ldffsdu_le_zss,
5171             gen_helper_sve_ldffdd_le_zss, } },
5172         { { gen_helper_sve_ldffbds_zd,
5173             gen_helper_sve_ldffhds_le_zd,
5174             gen_helper_sve_ldffsds_le_zd,
5175             NULL, },
5176           { gen_helper_sve_ldffbdu_zd,
5177             gen_helper_sve_ldffhdu_le_zd,
5178             gen_helper_sve_ldffsdu_le_zd,
5179             gen_helper_sve_ldffdd_le_zd, } } } },
5180
5181     /* Big-endian */
5182     { { { { gen_helper_sve_ldbds_zsu,
5183             gen_helper_sve_ldhds_be_zsu,
5184             gen_helper_sve_ldsds_be_zsu,
5185             NULL, },
5186           { gen_helper_sve_ldbdu_zsu,
5187             gen_helper_sve_ldhdu_be_zsu,
5188             gen_helper_sve_ldsdu_be_zsu,
5189             gen_helper_sve_lddd_be_zsu, } },
5190         { { gen_helper_sve_ldbds_zss,
5191             gen_helper_sve_ldhds_be_zss,
5192             gen_helper_sve_ldsds_be_zss,
5193             NULL, },
5194           { gen_helper_sve_ldbdu_zss,
5195             gen_helper_sve_ldhdu_be_zss,
5196             gen_helper_sve_ldsdu_be_zss,
5197             gen_helper_sve_lddd_be_zss, } },
5198         { { gen_helper_sve_ldbds_zd,
5199             gen_helper_sve_ldhds_be_zd,
5200             gen_helper_sve_ldsds_be_zd,
5201             NULL, },
5202           { gen_helper_sve_ldbdu_zd,
5203             gen_helper_sve_ldhdu_be_zd,
5204             gen_helper_sve_ldsdu_be_zd,
5205             gen_helper_sve_lddd_be_zd, } } },
5206
5207       /* First-fault */
5208       { { { gen_helper_sve_ldffbds_zsu,
5209             gen_helper_sve_ldffhds_be_zsu,
5210             gen_helper_sve_ldffsds_be_zsu,
5211             NULL, },
5212           { gen_helper_sve_ldffbdu_zsu,
5213             gen_helper_sve_ldffhdu_be_zsu,
5214             gen_helper_sve_ldffsdu_be_zsu,
5215             gen_helper_sve_ldffdd_be_zsu, } },
5216         { { gen_helper_sve_ldffbds_zss,
5217             gen_helper_sve_ldffhds_be_zss,
5218             gen_helper_sve_ldffsds_be_zss,
5219             NULL, },
5220           { gen_helper_sve_ldffbdu_zss,
5221             gen_helper_sve_ldffhdu_be_zss,
5222             gen_helper_sve_ldffsdu_be_zss,
5223             gen_helper_sve_ldffdd_be_zss, } },
5224         { { gen_helper_sve_ldffbds_zd,
5225             gen_helper_sve_ldffhds_be_zd,
5226             gen_helper_sve_ldffsds_be_zd,
5227             NULL, },
5228           { gen_helper_sve_ldffbdu_zd,
5229             gen_helper_sve_ldffhdu_be_zd,
5230             gen_helper_sve_ldffsdu_be_zd,
5231             gen_helper_sve_ldffdd_be_zd, } } } },
5232 };
5233
5234 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5235 {
5236     gen_helper_gvec_mem_scatter *fn = NULL;
5237     int be = s->be_data == MO_BE;
5238
5239     if (!sve_access_check(s)) {
5240         return true;
5241     }
5242
5243     switch (a->esz) {
5244     case MO_32:
5245         fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
5246         break;
5247     case MO_64:
5248         fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
5249         break;
5250     }
5251     assert(fn != NULL);
5252
5253     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5254                cpu_reg_sp(s, a->rn), a->msz, fn);
5255     return true;
5256 }
5257
5258 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5259 {
5260     gen_helper_gvec_mem_scatter *fn = NULL;
5261     int be = s->be_data == MO_BE;
5262     TCGv_i64 imm;
5263
5264     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5265         return false;
5266     }
5267     if (!sve_access_check(s)) {
5268         return true;
5269     }
5270
5271     switch (a->esz) {
5272     case MO_32:
5273         fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
5274         break;
5275     case MO_64:
5276         fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
5277         break;
5278     }
5279     assert(fn != NULL);
5280
5281     /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5282      * by loading the immediate into the scalar parameter.
5283      */
5284     imm = tcg_const_i64(a->imm << a->msz);
5285     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5286     tcg_temp_free_i64(imm);
5287     return true;
5288 }
5289
5290 /* Indexed by [be][xs][msz].  */
5291 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = {
5292     /* Little-endian */
5293     { { gen_helper_sve_stbs_zsu,
5294         gen_helper_sve_sths_le_zsu,
5295         gen_helper_sve_stss_le_zsu, },
5296       { gen_helper_sve_stbs_zss,
5297         gen_helper_sve_sths_le_zss,
5298         gen_helper_sve_stss_le_zss, } },
5299     /* Big-endian */
5300     { { gen_helper_sve_stbs_zsu,
5301         gen_helper_sve_sths_be_zsu,
5302         gen_helper_sve_stss_be_zsu, },
5303       { gen_helper_sve_stbs_zss,
5304         gen_helper_sve_sths_be_zss,
5305         gen_helper_sve_stss_be_zss, } },
5306 };
5307
5308 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5309 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = {
5310     /* Little-endian */
5311     { { gen_helper_sve_stbd_zsu,
5312         gen_helper_sve_sthd_le_zsu,
5313         gen_helper_sve_stsd_le_zsu,
5314         gen_helper_sve_stdd_le_zsu, },
5315       { gen_helper_sve_stbd_zss,
5316         gen_helper_sve_sthd_le_zss,
5317         gen_helper_sve_stsd_le_zss,
5318         gen_helper_sve_stdd_le_zss, },
5319       { gen_helper_sve_stbd_zd,
5320         gen_helper_sve_sthd_le_zd,
5321         gen_helper_sve_stsd_le_zd,
5322         gen_helper_sve_stdd_le_zd, } },
5323     /* Big-endian */
5324     { { gen_helper_sve_stbd_zsu,
5325         gen_helper_sve_sthd_be_zsu,
5326         gen_helper_sve_stsd_be_zsu,
5327         gen_helper_sve_stdd_be_zsu, },
5328       { gen_helper_sve_stbd_zss,
5329         gen_helper_sve_sthd_be_zss,
5330         gen_helper_sve_stsd_be_zss,
5331         gen_helper_sve_stdd_be_zss, },
5332       { gen_helper_sve_stbd_zd,
5333         gen_helper_sve_sthd_be_zd,
5334         gen_helper_sve_stsd_be_zd,
5335         gen_helper_sve_stdd_be_zd, } },
5336 };
5337
5338 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5339 {
5340     gen_helper_gvec_mem_scatter *fn;
5341     int be = s->be_data == MO_BE;
5342
5343     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5344         return false;
5345     }
5346     if (!sve_access_check(s)) {
5347         return true;
5348     }
5349     switch (a->esz) {
5350     case MO_32:
5351         fn = scatter_store_fn32[be][a->xs][a->msz];
5352         break;
5353     case MO_64:
5354         fn = scatter_store_fn64[be][a->xs][a->msz];
5355         break;
5356     default:
5357         g_assert_not_reached();
5358     }
5359     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5360                cpu_reg_sp(s, a->rn), a->msz, fn);
5361     return true;
5362 }
5363
5364 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5365 {
5366     gen_helper_gvec_mem_scatter *fn = NULL;
5367     int be = s->be_data == MO_BE;
5368     TCGv_i64 imm;
5369
5370     if (a->esz < a->msz) {
5371         return false;
5372     }
5373     if (!sve_access_check(s)) {
5374         return true;
5375     }
5376
5377     switch (a->esz) {
5378     case MO_32:
5379         fn = scatter_store_fn32[be][0][a->msz];
5380         break;
5381     case MO_64:
5382         fn = scatter_store_fn64[be][2][a->msz];
5383         break;
5384     }
5385     assert(fn != NULL);
5386
5387     /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5388      * by loading the immediate into the scalar parameter.
5389      */
5390     imm = tcg_const_i64(a->imm << a->msz);
5391     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5392     tcg_temp_free_i64(imm);
5393     return true;
5394 }
5395
5396 /*
5397  * Prefetches
5398  */
5399
5400 static bool trans_PRF(DisasContext *s, arg_PRF *a)
5401 {
5402     /* Prefetch is a nop within QEMU.  */
5403     (void)sve_access_check(s);
5404     return true;
5405 }
5406
5407 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5408 {
5409     if (a->rm == 31) {
5410         return false;
5411     }
5412     /* Prefetch is a nop within QEMU.  */
5413     (void)sve_access_check(s);
5414     return true;
5415 }
5416
5417 /*
5418  * Move Prefix
5419  *
5420  * TODO: The implementation so far could handle predicated merging movprfx.
5421  * The helper functions as written take an extra source register to
5422  * use in the operation, but the result is only written when predication
5423  * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
5424  * to allow the final write back to the destination to be unconditional.
5425  * For predicated zeroing movprfx, we need to rearrange the helpers to
5426  * allow the final write back to zero inactives.
5427  *
5428  * In the meantime, just emit the moves.
5429  */
5430
5431 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
5432 {
5433     return do_mov_z(s, a->rd, a->rn);
5434 }
5435
5436 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
5437 {
5438     if (sve_access_check(s)) {
5439         do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5440     }
5441     return true;
5442 }
5443
5444 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
5445 {
5446     if (sve_access_check(s)) {
5447         do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5448     }
5449     return true;
5450 }