target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "qemu/log.h"
  26 #include "arm_ldst.h"
  27 #include "translate.h"
  28 #include "internals.h"
  29 #include "exec/helper-proto.h"
  30 #include "exec/helper-gen.h"
  31 #include "exec/log.h"
  32 #include "trace-tcg.h"
  33 #include "translate-a64.h"
  34
  35 /*
  36  * Include the generated decoder.
  37  */
  38
  39 #include "decode-sve.inc.c"
  40
  41 /*
  42  * Implement all of the translator functions referenced by the decoder.
  43  */
  44
  45 /* Return the offset info CPUARMState of the predicate vector register Pn.
  46  * Note for this purpose, FFR is P16.
  47  */
  48 static inline int pred_full_reg_offset(DisasContext *s, int regno)
  49 {
  50     return offsetof(CPUARMState, vfp.pregs[regno]);
  51 }
  52
  53 /* Return the byte size of the whole predicate register, VL / 64.  */
  54 static inline int pred_full_reg_size(DisasContext *s)
  55 {
  56     return s->sve_len >> 3;
  57 }
  58
  59 /* Round up the size of a register to a size allowed by
  60  * the tcg vector infrastructure.  Any operation which uses this
  61  * size may assume that the bits above pred_full_reg_size are zero,
  62  * and must leave them the same way.
  63  *
  64  * Note that this is not needed for the vector registers as they
  65  * are always properly sized for tcg vectors.
  66  */
  67 static int size_for_gvec(int size)
  68 {
  69     if (size <= 8) {
  70         return 8;
  71     } else {
  72         return QEMU_ALIGN_UP(size, 16);
  73     }
  74 }
  75
  76 static int pred_gvec_reg_size(DisasContext *s)
  77 {
  78     return size_for_gvec(pred_full_reg_size(s));
  79 }
  80
  81 /* Invoke a vector expander on two Zregs.  */
  82 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
  83                          int esz, int rd, int rn)
  84 {
  85     if (sve_access_check(s)) {
  86         unsigned vsz = vec_full_reg_size(s);
  87         gvec_fn(esz, vec_full_reg_offset(s, rd),
  88                 vec_full_reg_offset(s, rn), vsz, vsz);
  89     }
  90     return true;
  91 }
  92
  93 /* Invoke a vector expander on three Zregs.  */
  94 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
  95                          int esz, int rd, int rn, int rm)
  96 {
  97     if (sve_access_check(s)) {
  98         unsigned vsz = vec_full_reg_size(s);
  99         gvec_fn(esz, vec_full_reg_offset(s, rd),
 100                 vec_full_reg_offset(s, rn),
 101                 vec_full_reg_offset(s, rm), vsz, vsz);
 102     }
 103     return true;
 104 }
 105
 106 /* Invoke a vector move on two Zregs.  */
 107 static bool do_mov_z(DisasContext *s, int rd, int rn)
 108 {
 109     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 110 }
 111
 112 /* Invoke a vector expander on two Pregs.  */
 113 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 114                          int esz, int rd, int rn)
 115 {
 116     if (sve_access_check(s)) {
 117         unsigned psz = pred_gvec_reg_size(s);
 118         gvec_fn(esz, pred_full_reg_offset(s, rd),
 119                 pred_full_reg_offset(s, rn), psz, psz);
 120     }
 121     return true;
 122 }
 123
 124 /* Invoke a vector expander on three Pregs.  */
 125 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 126                          int esz, int rd, int rn, int rm)
 127 {
 128     if (sve_access_check(s)) {
 129         unsigned psz = pred_gvec_reg_size(s);
 130         gvec_fn(esz, pred_full_reg_offset(s, rd),
 131                 pred_full_reg_offset(s, rn),
 132                 pred_full_reg_offset(s, rm), psz, psz);
 133     }
 134     return true;
 135 }
 136
 137 /* Invoke a vector operation on four Pregs.  */
 138 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 139                         int rd, int rn, int rm, int rg)
 140 {
 141     if (sve_access_check(s)) {
 142         unsigned psz = pred_gvec_reg_size(s);
 143         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 144                        pred_full_reg_offset(s, rn),
 145                        pred_full_reg_offset(s, rm),
 146                        pred_full_reg_offset(s, rg),
 147                        psz, psz, gvec_op);
 148     }
 149     return true;
 150 }
 151
 152 /* Invoke a vector move on two Pregs.  */
 153 static bool do_mov_p(DisasContext *s, int rd, int rn)
 154 {
 155     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 156 }
 157
 158 /* Set the cpu flags as per a return from an SVE helper.  */
 159 static void do_pred_flags(TCGv_i32 t)
 160 {
 161     tcg_gen_mov_i32(cpu_NF, t);
 162     tcg_gen_andi_i32(cpu_ZF, t, 2);
 163     tcg_gen_andi_i32(cpu_CF, t, 1);
 164     tcg_gen_movi_i32(cpu_VF, 0);
 165 }
 166
 167 /* Subroutines computing the ARM PredTest psuedofunction.  */
 168 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 169 {
 170     TCGv_i32 t = tcg_temp_new_i32();
 171
 172     gen_helper_sve_predtest1(t, d, g);
 173     do_pred_flags(t);
 174     tcg_temp_free_i32(t);
 175 }
 176
 177 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 178 {
 179     TCGv_ptr dptr = tcg_temp_new_ptr();
 180     TCGv_ptr gptr = tcg_temp_new_ptr();
 181     TCGv_i32 t;
 182
 183     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 184     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 185     t = tcg_const_i32(words);
 186
 187     gen_helper_sve_predtest(t, dptr, gptr, t);
 188     tcg_temp_free_ptr(dptr);
 189     tcg_temp_free_ptr(gptr);
 190
 191     do_pred_flags(t);
 192     tcg_temp_free_i32(t);
 193 }
 194
 195 /*
 196  *** SVE Logical - Unpredicated Group
 197  */
 198
 199 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 200 {
 201     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 202 }
 203
 204 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 205 {
 206     if (a->rn == a->rm) { /* MOV */
 207         return do_mov_z(s, a->rd, a->rn);
 208     } else {
 209         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 210     }
 211 }
 212
 213 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 214 {
 215     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 216 }
 217
 218 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 219 {
 220     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 221 }
 222
 223 /*
 224  *** SVE Predicate Logical Operations Group
 225  */
 226
 227 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
 228                           const GVecGen4 *gvec_op)
 229 {
 230     if (!sve_access_check(s)) {
 231         return true;
 232     }
 233
 234     unsigned psz = pred_gvec_reg_size(s);
 235     int dofs = pred_full_reg_offset(s, a->rd);
 236     int nofs = pred_full_reg_offset(s, a->rn);
 237     int mofs = pred_full_reg_offset(s, a->rm);
 238     int gofs = pred_full_reg_offset(s, a->pg);
 239
 240     if (psz == 8) {
 241         /* Do the operation and the flags generation in temps.  */
 242         TCGv_i64 pd = tcg_temp_new_i64();
 243         TCGv_i64 pn = tcg_temp_new_i64();
 244         TCGv_i64 pm = tcg_temp_new_i64();
 245         TCGv_i64 pg = tcg_temp_new_i64();
 246
 247         tcg_gen_ld_i64(pn, cpu_env, nofs);
 248         tcg_gen_ld_i64(pm, cpu_env, mofs);
 249         tcg_gen_ld_i64(pg, cpu_env, gofs);
 250
 251         gvec_op->fni8(pd, pn, pm, pg);
 252         tcg_gen_st_i64(pd, cpu_env, dofs);
 253
 254         do_predtest1(pd, pg);
 255
 256         tcg_temp_free_i64(pd);
 257         tcg_temp_free_i64(pn);
 258         tcg_temp_free_i64(pm);
 259         tcg_temp_free_i64(pg);
 260     } else {
 261         /* The operation and flags generation is large.  The computation
 262          * of the flags depends on the original contents of the guarding
 263          * predicate.  If the destination overwrites the guarding predicate,
 264          * then the easiest way to get this right is to save a copy.
 265           */
 266         int tofs = gofs;
 267         if (a->rd == a->pg) {
 268             tofs = offsetof(CPUARMState, vfp.preg_tmp);
 269             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
 270         }
 271
 272         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
 273         do_predtest(s, dofs, tofs, psz / 8);
 274     }
 275     return true;
 276 }
 277
 278 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 279 {
 280     tcg_gen_and_i64(pd, pn, pm);
 281     tcg_gen_and_i64(pd, pd, pg);
 282 }
 283
 284 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 285                            TCGv_vec pm, TCGv_vec pg)
 286 {
 287     tcg_gen_and_vec(vece, pd, pn, pm);
 288     tcg_gen_and_vec(vece, pd, pd, pg);
 289 }
 290
 291 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 292 {
 293     static const GVecGen4 op = {
 294         .fni8 = gen_and_pg_i64,
 295         .fniv = gen_and_pg_vec,
 296         .fno = gen_helper_sve_and_pppp,
 297         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 298     };
 299     if (a->s) {
 300         return do_pppp_flags(s, a, &op);
 301     } else if (a->rn == a->rm) {
 302         if (a->pg == a->rn) {
 303             return do_mov_p(s, a->rd, a->rn);
 304         } else {
 305             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
 306         }
 307     } else if (a->pg == a->rn || a->pg == a->rm) {
 308         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 309     } else {
 310         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 311     }
 312 }
 313
 314 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 315 {
 316     tcg_gen_andc_i64(pd, pn, pm);
 317     tcg_gen_and_i64(pd, pd, pg);
 318 }
 319
 320 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 321                            TCGv_vec pm, TCGv_vec pg)
 322 {
 323     tcg_gen_andc_vec(vece, pd, pn, pm);
 324     tcg_gen_and_vec(vece, pd, pd, pg);
 325 }
 326
 327 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 328 {
 329     static const GVecGen4 op = {
 330         .fni8 = gen_bic_pg_i64,
 331         .fniv = gen_bic_pg_vec,
 332         .fno = gen_helper_sve_bic_pppp,
 333         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 334     };
 335     if (a->s) {
 336         return do_pppp_flags(s, a, &op);
 337     } else if (a->pg == a->rn) {
 338         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 339     } else {
 340         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 341     }
 342 }
 343
 344 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 345 {
 346     tcg_gen_xor_i64(pd, pn, pm);
 347     tcg_gen_and_i64(pd, pd, pg);
 348 }
 349
 350 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 351                            TCGv_vec pm, TCGv_vec pg)
 352 {
 353     tcg_gen_xor_vec(vece, pd, pn, pm);
 354     tcg_gen_and_vec(vece, pd, pd, pg);
 355 }
 356
 357 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 358 {
 359     static const GVecGen4 op = {
 360         .fni8 = gen_eor_pg_i64,
 361         .fniv = gen_eor_pg_vec,
 362         .fno = gen_helper_sve_eor_pppp,
 363         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 364     };
 365     if (a->s) {
 366         return do_pppp_flags(s, a, &op);
 367     } else {
 368         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 369     }
 370 }
 371
 372 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 373 {
 374     tcg_gen_and_i64(pn, pn, pg);
 375     tcg_gen_andc_i64(pm, pm, pg);
 376     tcg_gen_or_i64(pd, pn, pm);
 377 }
 378
 379 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 380                            TCGv_vec pm, TCGv_vec pg)
 381 {
 382     tcg_gen_and_vec(vece, pn, pn, pg);
 383     tcg_gen_andc_vec(vece, pm, pm, pg);
 384     tcg_gen_or_vec(vece, pd, pn, pm);
 385 }
 386
 387 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 388 {
 389     static const GVecGen4 op = {
 390         .fni8 = gen_sel_pg_i64,
 391         .fniv = gen_sel_pg_vec,
 392         .fno = gen_helper_sve_sel_pppp,
 393         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 394     };
 395     if (a->s) {
 396         return false;
 397     } else {
 398         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 399     }
 400 }
 401
 402 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 403 {
 404     tcg_gen_or_i64(pd, pn, pm);
 405     tcg_gen_and_i64(pd, pd, pg);
 406 }
 407
 408 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 409                            TCGv_vec pm, TCGv_vec pg)
 410 {
 411     tcg_gen_or_vec(vece, pd, pn, pm);
 412     tcg_gen_and_vec(vece, pd, pd, pg);
 413 }
 414
 415 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 416 {
 417     static const GVecGen4 op = {
 418         .fni8 = gen_orr_pg_i64,
 419         .fniv = gen_orr_pg_vec,
 420         .fno = gen_helper_sve_orr_pppp,
 421         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 422     };
 423     if (a->s) {
 424         return do_pppp_flags(s, a, &op);
 425     } else if (a->pg == a->rn && a->rn == a->rm) {
 426         return do_mov_p(s, a->rd, a->rn);
 427     } else {
 428         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 429     }
 430 }
 431
 432 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 433 {
 434     tcg_gen_orc_i64(pd, pn, pm);
 435     tcg_gen_and_i64(pd, pd, pg);
 436 }
 437
 438 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 439                            TCGv_vec pm, TCGv_vec pg)
 440 {
 441     tcg_gen_orc_vec(vece, pd, pn, pm);
 442     tcg_gen_and_vec(vece, pd, pd, pg);
 443 }
 444
 445 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 446 {
 447     static const GVecGen4 op = {
 448         .fni8 = gen_orn_pg_i64,
 449         .fniv = gen_orn_pg_vec,
 450         .fno = gen_helper_sve_orn_pppp,
 451         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 452     };
 453     if (a->s) {
 454         return do_pppp_flags(s, a, &op);
 455     } else {
 456         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 457     }
 458 }
 459
 460 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 461 {
 462     tcg_gen_or_i64(pd, pn, pm);
 463     tcg_gen_andc_i64(pd, pg, pd);
 464 }
 465
 466 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 467                            TCGv_vec pm, TCGv_vec pg)
 468 {
 469     tcg_gen_or_vec(vece, pd, pn, pm);
 470     tcg_gen_andc_vec(vece, pd, pg, pd);
 471 }
 472
 473 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 474 {
 475     static const GVecGen4 op = {
 476         .fni8 = gen_nor_pg_i64,
 477         .fniv = gen_nor_pg_vec,
 478         .fno = gen_helper_sve_nor_pppp,
 479         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 480     };
 481     if (a->s) {
 482         return do_pppp_flags(s, a, &op);
 483     } else {
 484         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 485     }
 486 }
 487
 488 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
 489 {
 490     tcg_gen_and_i64(pd, pn, pm);
 491     tcg_gen_andc_i64(pd, pg, pd);
 492 }
 493
 494 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
 495                            TCGv_vec pm, TCGv_vec pg)
 496 {
 497     tcg_gen_and_vec(vece, pd, pn, pm);
 498     tcg_gen_andc_vec(vece, pd, pg, pd);
 499 }
 500
 501 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
 502 {
 503     static const GVecGen4 op = {
 504         .fni8 = gen_nand_pg_i64,
 505         .fniv = gen_nand_pg_vec,
 506         .fno = gen_helper_sve_nand_pppp,
 507         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 508     };
 509     if (a->s) {
 510         return do_pppp_flags(s, a, &op);
 511     } else {
 512         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
 513     }
 514 }
 515
 516 /*
 517  *** SVE Predicate Misc Group
 518  */
 519
 520 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
 521 {
 522     if (sve_access_check(s)) {
 523         int nofs = pred_full_reg_offset(s, a->rn);
 524         int gofs = pred_full_reg_offset(s, a->pg);
 525         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
 526
 527         if (words == 1) {
 528             TCGv_i64 pn = tcg_temp_new_i64();
 529             TCGv_i64 pg = tcg_temp_new_i64();
 530
 531             tcg_gen_ld_i64(pn, cpu_env, nofs);
 532             tcg_gen_ld_i64(pg, cpu_env, gofs);
 533             do_predtest1(pn, pg);
 534
 535             tcg_temp_free_i64(pn);
 536             tcg_temp_free_i64(pg);
 537         } else {
 538             do_predtest(s, nofs, gofs, words);
 539         }
 540     }
 541     return true;
 542 }
 543
 544 /*
 545  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
 546  */
 547
 548 /* Subroutine loading a vector register at VOFS of LEN bytes.
 549  * The load should begin at the address Rn + IMM.
 550  */
 551
 552 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
 553                    int rn, int imm)
 554 {
 555     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
 556     uint32_t len_remain = len % 8;
 557     uint32_t nparts = len / 8 + ctpop8(len_remain);
 558     int midx = get_mem_index(s);
 559     TCGv_i64 addr, t0, t1;
 560
 561     addr = tcg_temp_new_i64();
 562     t0 = tcg_temp_new_i64();
 563
 564     /* Note that unpredicated load/store of vector/predicate registers
 565      * are defined as a stream of bytes, which equates to little-endian
 566      * operations on larger quantities.  There is no nice way to force
 567      * a little-endian load for aarch64_be-linux-user out of line.
 568      *
 569      * Attempt to keep code expansion to a minimum by limiting the
 570      * amount of unrolling done.
 571      */
 572     if (nparts <= 4) {
 573         int i;
 574
 575         for (i = 0; i < len_align; i += 8) {
 576             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
 577             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
 578             tcg_gen_st_i64(t0, cpu_env, vofs + i);
 579         }
 580     } else {
 581         TCGLabel *loop = gen_new_label();
 582         TCGv_ptr tp, i = tcg_const_local_ptr(0);
 583
 584         gen_set_label(loop);
 585
 586         /* Minimize the number of local temps that must be re-read from
 587          * the stack each iteration.  Instead, re-compute values other
 588          * than the loop counter.
 589          */
 590         tp = tcg_temp_new_ptr();
 591         tcg_gen_addi_ptr(tp, i, imm);
 592         tcg_gen_extu_ptr_i64(addr, tp);
 593         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
 594
 595         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
 596
 597         tcg_gen_add_ptr(tp, cpu_env, i);
 598         tcg_gen_addi_ptr(i, i, 8);
 599         tcg_gen_st_i64(t0, tp, vofs);
 600         tcg_temp_free_ptr(tp);
 601
 602         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
 603         tcg_temp_free_ptr(i);
 604     }
 605
 606     /* Predicate register loads can be any multiple of 2.
 607      * Note that we still store the entire 64-bit unit into cpu_env.
 608      */
 609     if (len_remain) {
 610         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
 611
 612         switch (len_remain) {
 613         case 2:
 614         case 4:
 615         case 8:
 616             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
 617             break;
 618
 619         case 6:
 620             t1 = tcg_temp_new_i64();
 621             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
 622             tcg_gen_addi_i64(addr, addr, 4);
 623             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
 624             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
 625             tcg_temp_free_i64(t1);
 626             break;
 627
 628         default:
 629             g_assert_not_reached();
 630         }
 631         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
 632     }
 633     tcg_temp_free_i64(addr);
 634     tcg_temp_free_i64(t0);
 635 }
 636
 637 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
 638 {
 639     if (sve_access_check(s)) {
 640         int size = vec_full_reg_size(s);
 641         int off = vec_full_reg_offset(s, a->rd);
 642         do_ldr(s, off, size, a->rn, a->imm * size);
 643     }
 644     return true;
 645 }
 646
 647 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
 648 {
 649     if (sve_access_check(s)) {
 650         int size = pred_full_reg_size(s);
 651         int off = pred_full_reg_offset(s, a->rd);
 652         do_ldr(s, off, size, a->rn, a->imm * size);
 653     }
 654     return true;
 655 }