2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
5 * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
6 * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 /* We only support generating code for 64-bit mode. */
28 #if TCG_TARGET_REG_BITS != 64
29 #error "unsupported code generation mode"
32 #include "../tcg-ldst.c.inc"
33 #include "../tcg-pool.c.inc"
36 #define TCG_CT_CONST_S16 (1 << 8)
37 #define TCG_CT_CONST_S32 (1 << 9)
38 #define TCG_CT_CONST_S33 (1 << 10)
39 #define TCG_CT_CONST_ZERO (1 << 11)
40 #define TCG_CT_CONST_P32 (1 << 12)
41 #define TCG_CT_CONST_INV (1 << 13)
42 #define TCG_CT_CONST_INVRISBG (1 << 14)
44 #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
45 #define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
48 * For softmmu, we need to avoid conflicts with the first 3
49 * argument registers to perform the tlb lookup, and to call
50 * the helper function.
53 #define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
55 #define SOFTMMU_RESERVE_REGS 0
59 /* Several places within the instruction set 0 means "no register"
60 rather than TCG_REG_R0. */
61 #define TCG_REG_NONE 0
63 /* A scratch register that may be be used throughout the backend. */
64 #define TCG_TMP0 TCG_REG_R1
66 #ifndef CONFIG_SOFTMMU
67 #define TCG_GUEST_BASE_REG TCG_REG_R13
70 /* All of the following instructions are prefixed with their instruction
71 format, and are defined as 8- or 16-bit quantities, even when the two
72 halves of the 16-bit quantity may appear 32 bits apart in the insn.
73 This makes it easy to copy the values from the tables in Appendix B. */
74 typedef enum S390Opcode {
146 RIEg_LOCGHI = 0xec46,
183 RRFa_MSGRKC = 0xb9ed,
205 RRFam_SELGR = 0xb9e3,
209 RRFc_POPCNT = 0xb9e1,
291 VRRc_VCEQ = 0xe7f8, /* we leave the m5 cs field 0 */
292 VRRc_VCH = 0xe7fb, /* " */
293 VRRc_VCHL = 0xe7f9, /* " */
294 VRRc_VERLLV = 0xe773,
296 VRRc_VESRAV = 0xe77a,
297 VRRc_VESRLV = 0xe778,
310 VRRc_VPKS = 0xe797, /* we leave the m5 cs field 0 */
335 #ifdef CONFIG_DEBUG_TCG
336 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
337 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
338 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
339 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
340 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
341 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
342 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
343 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
347 /* Since R6 is a potential argument register, choose it last of the
348 call-saved registers. Likewise prefer the call-clobbered registers
349 in reverse order to maximize the chance of avoiding the arguments. */
350 static const int tcg_target_reg_alloc_order[] = {
351 /* Call saved registers. */
360 /* Call clobbered registers. */
364 /* Argument registers, in reverse order of allocation. */
370 /* V8-V15 are call saved, and omitted. */
397 static const int tcg_target_call_iarg_regs[] = {
405 static const int tcg_target_call_oarg_regs[] = {
413 #define S390_CC_NE (S390_CC_LT | S390_CC_GT)
414 #define S390_CC_LE (S390_CC_LT | S390_CC_EQ)
415 #define S390_CC_GE (S390_CC_GT | S390_CC_EQ)
416 #define S390_CC_NEVER 0
417 #define S390_CC_ALWAYS 15
419 /* Condition codes that result from a COMPARE and COMPARE LOGICAL. */
420 static const uint8_t tcg_cond_to_s390_cond[] = {
421 [TCG_COND_EQ] = S390_CC_EQ,
422 [TCG_COND_NE] = S390_CC_NE,
423 [TCG_COND_LT] = S390_CC_LT,
424 [TCG_COND_LE] = S390_CC_LE,
425 [TCG_COND_GT] = S390_CC_GT,
426 [TCG_COND_GE] = S390_CC_GE,
427 [TCG_COND_LTU] = S390_CC_LT,
428 [TCG_COND_LEU] = S390_CC_LE,
429 [TCG_COND_GTU] = S390_CC_GT,
430 [TCG_COND_GEU] = S390_CC_GE,
433 /* Condition codes that result from a LOAD AND TEST. Here, we have no
434 unsigned instruction variation, however since the test is vs zero we
435 can re-map the outcomes appropriately. */
436 static const uint8_t tcg_cond_to_ltr_cond[] = {
437 [TCG_COND_EQ] = S390_CC_EQ,
438 [TCG_COND_NE] = S390_CC_NE,
439 [TCG_COND_LT] = S390_CC_LT,
440 [TCG_COND_LE] = S390_CC_LE,
441 [TCG_COND_GT] = S390_CC_GT,
442 [TCG_COND_GE] = S390_CC_GE,
443 [TCG_COND_LTU] = S390_CC_NEVER,
444 [TCG_COND_LEU] = S390_CC_EQ,
445 [TCG_COND_GTU] = S390_CC_NE,
446 [TCG_COND_GEU] = S390_CC_ALWAYS,
449 #ifdef CONFIG_SOFTMMU
450 static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
451 [MO_UB] = helper_ret_ldub_mmu,
452 [MO_SB] = helper_ret_ldsb_mmu,
453 [MO_LEUW] = helper_le_lduw_mmu,
454 [MO_LESW] = helper_le_ldsw_mmu,
455 [MO_LEUL] = helper_le_ldul_mmu,
456 [MO_LESL] = helper_le_ldsl_mmu,
457 [MO_LEUQ] = helper_le_ldq_mmu,
458 [MO_BEUW] = helper_be_lduw_mmu,
459 [MO_BESW] = helper_be_ldsw_mmu,
460 [MO_BEUL] = helper_be_ldul_mmu,
461 [MO_BESL] = helper_be_ldsl_mmu,
462 [MO_BEUQ] = helper_be_ldq_mmu,
465 static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
466 [MO_UB] = helper_ret_stb_mmu,
467 [MO_LEUW] = helper_le_stw_mmu,
468 [MO_LEUL] = helper_le_stl_mmu,
469 [MO_LEUQ] = helper_le_stq_mmu,
470 [MO_BEUW] = helper_be_stw_mmu,
471 [MO_BEUL] = helper_be_stl_mmu,
472 [MO_BEUQ] = helper_be_stq_mmu,
476 static const tcg_insn_unit *tb_ret_addr;
477 uint64_t s390_facilities[3];
479 static inline bool is_general_reg(TCGReg r)
481 return r <= TCG_REG_R15;
484 static inline bool is_vector_reg(TCGReg r)
486 return r >= TCG_REG_V0 && r <= TCG_REG_V31;
489 static bool patch_reloc(tcg_insn_unit *src_rw, int type,
490 intptr_t value, intptr_t addend)
492 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
497 pcrel2 = (tcg_insn_unit *)value - src_rx;
501 if (pcrel2 == (int16_t)pcrel2) {
502 tcg_patch16(src_rw, pcrel2);
507 if (pcrel2 == (int32_t)pcrel2) {
508 tcg_patch32(src_rw, pcrel2);
513 if (value == sextract64(value, 0, 20)) {
514 old = *(uint32_t *)src_rw & 0xf00000ff;
515 old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
516 tcg_patch32(src_rw, old);
521 g_assert_not_reached();
526 static int is_const_p16(uint64_t val)
528 for (int i = 0; i < 4; ++i) {
529 uint64_t mask = 0xffffull << (i * 16);
530 if ((val & ~mask) == 0) {
537 static int is_const_p32(uint64_t val)
539 if ((val & 0xffffffff00000000ull) == 0) {
542 if ((val & 0x00000000ffffffffull) == 0) {
549 * Accept bit patterns like these:
554 * Copied from gcc sources.
556 static bool risbg_mask(uint64_t c)
559 /* We don't change the number of transitions by inverting,
560 so make sure we start with the LSB zero. */
564 /* Reject all zeros or all ones. */
568 /* Find the first transition. */
570 /* Invert to look for a second transition. */
572 /* Erase the first transition. */
574 /* Find the second transition, if any. */
576 /* Match if all the bits are 1's, or if c is zero. */
580 /* Test if a constant matches the constraint. */
581 static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
583 if (ct & TCG_CT_CONST) {
587 if (type == TCG_TYPE_I32) {
591 /* The following are mutually exclusive. */
592 if (ct & TCG_CT_CONST_S16) {
593 return val == (int16_t)val;
594 } else if (ct & TCG_CT_CONST_S32) {
595 return val == (int32_t)val;
596 } else if (ct & TCG_CT_CONST_S33) {
597 return val >= -0xffffffffll && val <= 0xffffffffll;
598 } else if (ct & TCG_CT_CONST_ZERO) {
602 if (ct & TCG_CT_CONST_INV) {
606 * Note that is_const_p16 is a subset of is_const_p32,
607 * so we don't need both constraints.
609 if ((ct & TCG_CT_CONST_P32) && is_const_p32(val) >= 0) {
612 if ((ct & TCG_CT_CONST_INVRISBG) && risbg_mask(~val)) {
619 /* Emit instructions according to the given instruction format. */
621 static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
623 tcg_out16(s, (op << 8) | (r1 << 4) | r2);
626 static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
627 TCGReg r1, TCGReg r2)
629 tcg_out32(s, (op << 16) | (r1 << 4) | r2);
632 /* RRF-a without the m4 field */
633 static void tcg_out_insn_RRFa(TCGContext *s, S390Opcode op,
634 TCGReg r1, TCGReg r2, TCGReg r3)
636 tcg_out32(s, (op << 16) | (r3 << 12) | (r1 << 4) | r2);
639 /* RRF-a with the m4 field */
640 static void tcg_out_insn_RRFam(TCGContext *s, S390Opcode op,
641 TCGReg r1, TCGReg r2, TCGReg r3, int m4)
643 tcg_out32(s, (op << 16) | (r3 << 12) | (m4 << 8) | (r1 << 4) | r2);
646 static void tcg_out_insn_RRFc(TCGContext *s, S390Opcode op,
647 TCGReg r1, TCGReg r2, int m3)
649 tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
652 static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
654 tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
657 static void tcg_out_insn_RIEg(TCGContext *s, S390Opcode op, TCGReg r1,
660 tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
661 tcg_out32(s, (i2 << 16) | (op & 0xff));
664 static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
666 tcg_out16(s, op | (r1 << 4));
670 static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
671 TCGReg b2, TCGReg r3, int disp)
673 tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
677 static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
678 TCGReg b2, TCGReg r3, int disp)
680 tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
681 tcg_out32(s, (op & 0xff) | (b2 << 28)
682 | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
685 #define tcg_out_insn_RX tcg_out_insn_RS
686 #define tcg_out_insn_RXY tcg_out_insn_RSY
688 static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
691 * Shift bit 4 of each regno to its corresponding bit of RXB.
692 * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
693 * is the left-shift of the 4th operand.
695 return ((v1 & 0x10) << (4 + 3))
696 | ((v2 & 0x10) << (4 + 2))
697 | ((v3 & 0x10) << (4 + 1))
698 | ((v4 & 0x10) << (4 + 0));
701 static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
702 TCGReg v1, uint16_t i2, int m3)
704 tcg_debug_assert(is_vector_reg(v1));
705 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
707 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
710 static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
711 TCGReg v1, uint8_t i2, uint8_t i3, int m4)
713 tcg_debug_assert(is_vector_reg(v1));
714 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
715 tcg_out16(s, (i2 << 8) | (i3 & 0xff));
716 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
719 static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
720 TCGReg v1, uint16_t i2, TCGReg v3, int m4)
722 tcg_debug_assert(is_vector_reg(v1));
723 tcg_debug_assert(is_vector_reg(v3));
724 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
726 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
729 static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
730 TCGReg v1, TCGReg v2, int m3)
732 tcg_debug_assert(is_vector_reg(v1));
733 tcg_debug_assert(is_vector_reg(v2));
734 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
735 tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
738 static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
739 TCGReg v1, TCGReg v2, TCGReg v3, int m4)
741 tcg_debug_assert(is_vector_reg(v1));
742 tcg_debug_assert(is_vector_reg(v2));
743 tcg_debug_assert(is_vector_reg(v3));
744 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
745 tcg_out16(s, v3 << 12);
746 tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
749 static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
750 TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
752 tcg_debug_assert(is_vector_reg(v1));
753 tcg_debug_assert(is_vector_reg(v2));
754 tcg_debug_assert(is_vector_reg(v3));
755 tcg_debug_assert(is_vector_reg(v4));
756 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
757 tcg_out16(s, v3 << 12);
758 tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
761 static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
762 TCGReg v1, TCGReg r2, TCGReg r3)
764 tcg_debug_assert(is_vector_reg(v1));
765 tcg_debug_assert(is_general_reg(r2));
766 tcg_debug_assert(is_general_reg(r3));
767 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
768 tcg_out16(s, r3 << 12);
769 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
772 static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
773 intptr_t d2, TCGReg b2, TCGReg v3, int m4)
775 tcg_debug_assert(is_vector_reg(v1));
776 tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
777 tcg_debug_assert(is_general_reg(b2));
778 tcg_debug_assert(is_vector_reg(v3));
779 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
780 tcg_out16(s, b2 << 12 | d2);
781 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
784 static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
785 intptr_t d2, TCGReg b2, TCGReg r3, int m4)
787 tcg_debug_assert(is_vector_reg(v1));
788 tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
789 tcg_debug_assert(is_general_reg(b2));
790 tcg_debug_assert(is_general_reg(r3));
791 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
792 tcg_out16(s, b2 << 12 | d2);
793 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
796 static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
797 intptr_t d2, TCGReg b2, TCGReg v3, int m4)
799 tcg_debug_assert(is_general_reg(r1));
800 tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
801 tcg_debug_assert(is_general_reg(b2));
802 tcg_debug_assert(is_vector_reg(v3));
803 tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
804 tcg_out16(s, b2 << 12 | d2);
805 tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
808 static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
809 TCGReg b2, TCGReg x2, intptr_t d2, int m3)
811 tcg_debug_assert(is_vector_reg(v1));
812 tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
813 tcg_debug_assert(is_general_reg(x2));
814 tcg_debug_assert(is_general_reg(b2));
815 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
816 tcg_out16(s, (b2 << 12) | d2);
817 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
820 /* Emit an opcode with "type-checking" of the format. */
821 #define tcg_out_insn(S, FMT, OP, ...) \
822 glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
825 /* emit 64-bit shifts */
826 static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
827 TCGReg src, TCGReg sh_reg, int sh_imm)
829 tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
832 /* emit 32-bit shifts */
833 static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
834 TCGReg sh_reg, int sh_imm)
836 tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
839 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
846 if (likely(is_general_reg(dst) && is_general_reg(src))) {
847 tcg_out_insn(s, RR, LR, dst, src);
853 if (likely(is_general_reg(dst))) {
854 if (likely(is_general_reg(src))) {
855 tcg_out_insn(s, RRE, LGR, dst, src);
857 tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
860 } else if (is_general_reg(src)) {
861 tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
868 tcg_out_insn(s, VRRa, VLR, dst, src, 0);
872 g_assert_not_reached();
877 static const S390Opcode li_insns[4] = {
878 RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
880 static const S390Opcode oi_insns[4] = {
881 RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
883 static const S390Opcode lif_insns[2] = {
884 RIL_LLILF, RIL_LLIHF,
887 /* load a register with an immediate value */
888 static void tcg_out_movi(TCGContext *s, TCGType type,
889 TCGReg ret, tcg_target_long sval)
891 tcg_target_ulong uval = sval;
895 if (type == TCG_TYPE_I32) {
896 uval = (uint32_t)sval;
897 sval = (int32_t)sval;
900 /* Try all 32-bit insns that can load it in one go. */
901 if (sval >= -0x8000 && sval < 0x8000) {
902 tcg_out_insn(s, RI, LGHI, ret, sval);
906 i = is_const_p16(uval);
908 tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
912 /* Try all 48-bit insns that can load it in one go. */
913 if (sval == (int32_t)sval) {
914 tcg_out_insn(s, RIL, LGFI, ret, sval);
918 i = is_const_p32(uval);
920 tcg_out_insn_RIL(s, lif_insns[i], ret, uval >> (i * 32));
924 /* Try for PC-relative address load. For odd addresses, add one. */
925 pc_off = tcg_pcrel_diff(s, (void *)sval) >> 1;
926 if (pc_off == (int32_t)pc_off) {
927 tcg_out_insn(s, RIL, LARL, ret, pc_off);
929 tcg_out_insn(s, RI, AGHI, ret, 1);
934 /* Otherwise, load it by parts. */
935 i = is_const_p16((uint32_t)uval);
937 tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
939 tcg_out_insn(s, RIL, LLILF, ret, uval);
942 i = is_const_p16(uval);
944 tcg_out_insn_RI(s, oi_insns[i + 2], ret, uval >> (i * 16));
946 tcg_out_insn(s, RIL, OIHF, ret, uval);
950 /* Emit a load/store type instruction. Inputs are:
951 DATA: The register to be loaded or stored.
952 BASE+OFS: The effective address.
953 OPC_RX: If the operation has an RX format opcode (e.g. STC), otherwise 0.
954 OPC_RXY: The RXY format opcode for the operation (e.g. STCY). */
956 static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
957 TCGReg data, TCGReg base, TCGReg index,
960 if (ofs < -0x80000 || ofs >= 0x80000) {
961 /* Combine the low 20 bits of the offset with the actual load insn;
962 the high 44 bits must come from an immediate load. */
963 tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
964 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
967 /* If we were already given an index register, add it in. */
968 if (index != TCG_REG_NONE) {
969 tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
974 if (opc_rx && ofs >= 0 && ofs < 0x1000) {
975 tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
977 tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
981 static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
982 TCGReg data, TCGReg base, TCGReg index,
983 tcg_target_long ofs, int m3)
985 if (ofs < 0 || ofs >= 0x1000) {
986 if (ofs >= -0x80000 && ofs < 0x80000) {
987 tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
989 index = TCG_REG_NONE;
992 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
993 if (index != TCG_REG_NONE) {
994 tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
1000 tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
1003 /* load data without address translation or endianness conversion */
1004 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
1005 TCGReg base, intptr_t ofs)
1009 if (likely(is_general_reg(data))) {
1010 tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
1013 tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
1017 if (likely(is_general_reg(data))) {
1018 tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
1024 tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
1028 /* Hint quadword aligned. */
1029 tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
1033 g_assert_not_reached();
1037 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
1038 TCGReg base, intptr_t ofs)
1042 if (likely(is_general_reg(data))) {
1043 tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
1045 tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
1050 if (likely(is_general_reg(data))) {
1051 tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
1057 tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
1061 /* Hint quadword aligned. */
1062 tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
1066 g_assert_not_reached();
1070 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1071 TCGReg base, intptr_t ofs)
1076 static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
1077 int msb, int lsb, int ofs, int z)
1080 tcg_out16(s, (RIEf_RISBG & 0xff00) | (dest << 4) | src);
1081 tcg_out16(s, (msb << 8) | (z << 7) | lsb);
1082 tcg_out16(s, (ofs << 8) | (RIEf_RISBG & 0xff));
1085 static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1087 tcg_out_insn(s, RRE, LGBR, dest, src);
1090 static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1092 tcg_out_insn(s, RRE, LLGCR, dest, src);
1095 static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1097 tcg_out_insn(s, RRE, LGHR, dest, src);
1100 static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1102 tcg_out_insn(s, RRE, LLGHR, dest, src);
1105 static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1107 tcg_out_insn(s, RRE, LGFR, dest, src);
1110 static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1112 tcg_out_insn(s, RRE, LLGFR, dest, src);
1115 static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
1118 if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
1119 /* Achieve wraparound by swapping msb and lsb. */
1120 msb = 64 - ctz64(~val);
1121 lsb = clz64(~val) - 1;
1124 lsb = 63 - ctz64(val);
1126 tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
1129 static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1131 static const S390Opcode ni_insns[4] = {
1132 RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
1134 static const S390Opcode nif_insns[2] = {
1137 uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
1140 /* Look for the zero-extensions. */
1141 if ((val & valid) == 0xffffffff) {
1142 tgen_ext32u(s, dest, dest);
1145 if ((val & valid) == 0xff) {
1146 tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
1149 if ((val & valid) == 0xffff) {
1150 tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
1154 i = is_const_p16(~val & valid);
1156 tcg_out_insn_RI(s, ni_insns[i], dest, val >> (i * 16));
1160 i = is_const_p32(~val & valid);
1161 tcg_debug_assert(i == 0 || type != TCG_TYPE_I32);
1163 tcg_out_insn_RIL(s, nif_insns[i], dest, val >> (i * 32));
1167 if (risbg_mask(val)) {
1168 tgen_andi_risbg(s, dest, dest, val);
1172 g_assert_not_reached();
1175 static void tgen_ori(TCGContext *s, TCGReg dest, uint64_t val)
1177 static const S390Opcode oif_insns[2] = {
1183 i = is_const_p16(val);
1185 tcg_out_insn_RI(s, oi_insns[i], dest, val >> (i * 16));
1189 i = is_const_p32(val);
1191 tcg_out_insn_RIL(s, oif_insns[i], dest, val >> (i * 32));
1195 g_assert_not_reached();
1198 static void tgen_xori(TCGContext *s, TCGReg dest, uint64_t val)
1200 switch (is_const_p32(val)) {
1202 tcg_out_insn(s, RIL, XILF, dest, val);
1205 tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1208 g_assert_not_reached();
1212 static int tgen_cmp2(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1213 TCGArg c2, bool c2const, bool need_carry, int *inv_cc)
1215 bool is_unsigned = is_unsigned_cond(c);
1216 TCGCond inv_c = tcg_invert_cond(c);
1221 if (!(is_unsigned && need_carry)) {
1222 if (type == TCG_TYPE_I32) {
1223 tcg_out_insn(s, RR, LTR, r1, r1);
1225 tcg_out_insn(s, RRE, LTGR, r1, r1);
1227 *inv_cc = tcg_cond_to_ltr_cond[inv_c];
1228 return tcg_cond_to_ltr_cond[c];
1232 if (!is_unsigned && c2 == (int16_t)c2) {
1233 op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
1234 tcg_out_insn_RI(s, op, r1, c2);
1238 if (type == TCG_TYPE_I32) {
1239 op = (is_unsigned ? RIL_CLFI : RIL_CFI);
1240 tcg_out_insn_RIL(s, op, r1, c2);
1245 * Constraints are for a signed 33-bit operand, which is a
1246 * convenient superset of this signed/unsigned test.
1248 if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
1249 op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
1250 tcg_out_insn_RIL(s, op, r1, c2);
1254 /* Load everything else into a register. */
1255 tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, c2);
1259 if (type == TCG_TYPE_I32) {
1260 op = (is_unsigned ? RR_CLR : RR_CR);
1261 tcg_out_insn_RR(s, op, r1, c2);
1263 op = (is_unsigned ? RRE_CLGR : RRE_CGR);
1264 tcg_out_insn_RRE(s, op, r1, c2);
1268 *inv_cc = tcg_cond_to_s390_cond[inv_c];
1269 return tcg_cond_to_s390_cond[c];
1272 static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1273 TCGArg c2, bool c2const, bool need_carry)
1276 return tgen_cmp2(s, type, c, r1, c2, c2const, need_carry, &inv_cc);
1279 static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1280 TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
1284 /* With LOC2, we can always emit the minimum 3 insns. */
1285 if (HAVE_FACILITY(LOAD_ON_COND2)) {
1286 /* Emit: d = 0, d = (cc ? 1 : d). */
1287 cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1288 tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1289 tcg_out_insn(s, RIEg, LOCGHI, dest, 1, cc);
1296 /* X != 0 is X > 0. */
1297 if (c2const && c2 == 0) {
1298 cond = TCG_COND_GTU;
1306 /* The result of a compare has CC=2 for GT and CC=3 unused.
1307 ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit. */
1308 tgen_cmp(s, type, cond, c1, c2, c2const, true);
1309 tcg_out_movi(s, type, dest, 0);
1310 tcg_out_insn(s, RRE, ALCGR, dest, dest);
1314 /* X == 0 is X <= 0. */
1315 if (c2const && c2 == 0) {
1316 cond = TCG_COND_LEU;
1324 /* As above, but we're looking for borrow, or !carry.
1325 The second insn computes d - d - borrow, or -1 for true
1326 and 0 for false. So we must mask to 1 bit afterward. */
1327 tgen_cmp(s, type, cond, c1, c2, c2const, true);
1328 tcg_out_insn(s, RRE, SLBGR, dest, dest);
1329 tgen_andi(s, type, dest, 1);
1336 /* Swap operands so that we can use LEU/GTU/GT/LE. */
1341 cond = tcg_swap_cond(cond);
1347 g_assert_not_reached();
1350 cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1351 /* Emit: d = 0, t = 1, d = (cc ? t : d). */
1352 tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1353 tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
1354 tcg_out_insn(s, RRFc, LOCGR, dest, TCG_TMP0, cc);
1357 static void tgen_movcond_int(TCGContext *s, TCGType type, TCGReg dest,
1358 TCGArg v3, int v3const, TCGReg v4,
1365 if (HAVE_FACILITY(LOAD_ON_COND2)) {
1366 /* Emit: if (cc) dest = v3. */
1367 tcg_out_insn(s, RIEg, LOCGHI, dest, v3, cc);
1370 tcg_out_insn(s, RI, LGHI, TCG_TMP0, v3);
1373 /* LGR+LOCGHI is larger than LGHI+LOCGR. */
1374 tcg_out_insn(s, RI, LGHI, dest, v3);
1379 if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1380 /* Emit: dest = cc ? v3 : v4. */
1381 tcg_out_insn(s, RRFam, SELGR, dest, v3, v4, cc);
1387 tcg_out_mov(s, type, dest, v3);
1393 /* Emit: if (cc) dest = src. */
1394 tcg_out_insn(s, RRFc, LOCGR, dest, src, cc);
1397 static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1398 TCGReg c1, TCGArg c2, int c2const,
1399 TCGArg v3, int v3const, TCGReg v4)
1403 cc = tgen_cmp2(s, type, c, c1, c2, c2const, false, &inv_cc);
1404 tgen_movcond_int(s, type, dest, v3, v3const, v4, cc, inv_cc);
1407 static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
1408 TCGArg a2, int a2const)
1410 /* Since this sets both R and R+1, we have no choice but to store the
1411 result into R0, allowing R1 == TCG_TMP0 to be clobbered as well. */
1412 QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
1413 tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
1415 if (a2const && a2 == 64) {
1416 tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
1421 * Conditions from FLOGR are:
1422 * 2 -> one bit found
1423 * 8 -> no one bit found
1425 tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
1428 static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1430 /* With MIE3, and bit 0 of m4 set, we get the complete result. */
1431 if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1432 if (type == TCG_TYPE_I32) {
1433 tgen_ext32u(s, dest, src);
1436 tcg_out_insn(s, RRFc, POPCNT, dest, src, 8);
1440 /* Without MIE3, each byte gets the count of bits for the byte. */
1441 tcg_out_insn(s, RRFc, POPCNT, dest, src, 0);
1443 /* Multiply to sum each byte at the top of the word. */
1444 if (type == TCG_TYPE_I32) {
1445 tcg_out_insn(s, RIL, MSFI, dest, 0x01010101);
1446 tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24);
1448 tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull);
1449 tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0);
1450 tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
1454 static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1455 int ofs, int len, int z)
1457 int lsb = (63 - ofs);
1458 int msb = lsb - (len - 1);
1459 tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
1462 static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
1465 tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
1468 static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
1470 ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1471 if (off == (int16_t)off) {
1472 tcg_out_insn(s, RI, BRC, cc, off);
1473 } else if (off == (int32_t)off) {
1474 tcg_out_insn(s, RIL, BRCL, cc, off);
1476 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1477 tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1481 static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
1484 tgen_gotoi(s, cc, l->u.value_ptr);
1486 tcg_out16(s, RI_BRC | (cc << 4));
1487 tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
1492 static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1493 TCGReg r1, TCGReg r2, TCGLabel *l)
1495 tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1497 tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1499 tcg_out16(s, cc << 12 | (opc & 0xff));
1502 static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1503 TCGReg r1, int i2, TCGLabel *l)
1505 tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1507 tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1509 tcg_out16(s, (i2 << 8) | (opc & 0xff));
1512 static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1513 TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
1516 bool is_unsigned = is_unsigned_cond(c);
1520 cc = tcg_cond_to_s390_cond[c];
1523 opc = (type == TCG_TYPE_I32
1524 ? (is_unsigned ? RIEb_CLRJ : RIEb_CRJ)
1525 : (is_unsigned ? RIEb_CLGRJ : RIEb_CGRJ));
1526 tgen_compare_branch(s, opc, cc, r1, c2, l);
1531 * COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1532 * If the immediate we've been given does not fit that range, we'll
1533 * fall back to separate compare and branch instructions using the
1534 * larger comparison range afforded by COMPARE IMMEDIATE.
1536 if (type == TCG_TYPE_I32) {
1539 in_range = (uint32_t)c2 == (uint8_t)c2;
1542 in_range = (int32_t)c2 == (int8_t)c2;
1547 in_range = (uint64_t)c2 == (uint8_t)c2;
1550 in_range = (int64_t)c2 == (int8_t)c2;
1554 tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
1558 cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
1559 tgen_branch(s, cc, l);
1562 static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest)
1564 ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1565 if (off == (int32_t)off) {
1566 tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1568 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1569 tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1573 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
1574 const TCGHelperInfo *info)
1576 tcg_out_call_int(s, dest);
1579 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
1580 TCGReg base, TCGReg index, int disp)
1582 switch (opc & (MO_SSIZE | MO_BSWAP)) {
1584 tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
1587 tcg_out_insn(s, RXY, LGB, data, base, index, disp);
1590 case MO_UW | MO_BSWAP:
1591 /* swapped unsigned halfword load with upper bits zeroed */
1592 tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1593 tgen_ext16u(s, TCG_TYPE_I64, data, data);
1596 tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
1599 case MO_SW | MO_BSWAP:
1600 /* swapped sign-extended halfword load */
1601 tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1602 tgen_ext16s(s, TCG_TYPE_I64, data, data);
1605 tcg_out_insn(s, RXY, LGH, data, base, index, disp);
1608 case MO_UL | MO_BSWAP:
1609 /* swapped unsigned int load with upper bits zeroed */
1610 tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1611 tgen_ext32u(s, data, data);
1614 tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
1617 case MO_SL | MO_BSWAP:
1618 /* swapped sign-extended int load */
1619 tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1620 tgen_ext32s(s, data, data);
1623 tcg_out_insn(s, RXY, LGF, data, base, index, disp);
1626 case MO_UQ | MO_BSWAP:
1627 tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
1630 tcg_out_insn(s, RXY, LG, data, base, index, disp);
1638 static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
1639 TCGReg base, TCGReg index, int disp)
1641 switch (opc & (MO_SIZE | MO_BSWAP)) {
1643 if (disp >= 0 && disp < 0x1000) {
1644 tcg_out_insn(s, RX, STC, data, base, index, disp);
1646 tcg_out_insn(s, RXY, STCY, data, base, index, disp);
1650 case MO_UW | MO_BSWAP:
1651 tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
1654 if (disp >= 0 && disp < 0x1000) {
1655 tcg_out_insn(s, RX, STH, data, base, index, disp);
1657 tcg_out_insn(s, RXY, STHY, data, base, index, disp);
1661 case MO_UL | MO_BSWAP:
1662 tcg_out_insn(s, RXY, STRV, data, base, index, disp);
1665 if (disp >= 0 && disp < 0x1000) {
1666 tcg_out_insn(s, RX, ST, data, base, index, disp);
1668 tcg_out_insn(s, RXY, STY, data, base, index, disp);
1672 case MO_UQ | MO_BSWAP:
1673 tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
1676 tcg_out_insn(s, RXY, STG, data, base, index, disp);
1684 #if defined(CONFIG_SOFTMMU)
1685 /* We're expecting to use a 20-bit negative offset on the tlb memory ops. */
1686 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1687 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
1689 /* Load and compare a TLB entry, leaving the flags set. Loads the TLB
1690 addend into R2. Returns a register with the santitized guest address. */
1691 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1692 int mem_index, bool is_ld)
1694 unsigned s_bits = opc & MO_SIZE;
1695 unsigned a_bits = get_alignment_bits(opc);
1696 unsigned s_mask = (1 << s_bits) - 1;
1697 unsigned a_mask = (1 << a_bits) - 1;
1698 int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1699 int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1700 int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1704 tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
1705 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1706 tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
1707 tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
1709 /* For aligned accesses, we check the first byte and include the alignment
1710 bits within the address. For unaligned access, we check that we don't
1711 cross pages using the address of the last byte of the access. */
1712 a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
1713 tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1715 tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
1717 tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
1718 tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
1722 ofs = offsetof(CPUTLBEntry, addr_read);
1724 ofs = offsetof(CPUTLBEntry, addr_write);
1726 if (TARGET_LONG_BITS == 32) {
1727 tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1729 tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1732 tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
1733 offsetof(CPUTLBEntry, addend));
1735 if (TARGET_LONG_BITS == 32) {
1736 tgen_ext32u(s, TCG_REG_R3, addr_reg);
1742 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1743 TCGReg data, TCGReg addr,
1744 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1746 TCGLabelQemuLdst *label = new_ldst_label(s);
1748 label->is_ld = is_ld;
1750 label->datalo_reg = data;
1751 label->addrlo_reg = addr;
1752 label->raddr = tcg_splitwx_to_rx(raddr);
1753 label->label_ptr[0] = label_ptr;
1756 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1758 TCGReg addr_reg = lb->addrlo_reg;
1759 TCGReg data_reg = lb->datalo_reg;
1760 MemOpIdx oi = lb->oi;
1761 MemOp opc = get_memop(oi);
1763 if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1764 (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1768 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1769 if (TARGET_LONG_BITS == 64) {
1770 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1772 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
1773 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
1774 tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
1775 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
1777 tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1781 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1783 TCGReg addr_reg = lb->addrlo_reg;
1784 TCGReg data_reg = lb->datalo_reg;
1785 MemOpIdx oi = lb->oi;
1786 MemOp opc = get_memop(oi);
1788 if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1789 (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1793 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1794 if (TARGET_LONG_BITS == 64) {
1795 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1797 switch (opc & MO_SIZE) {
1799 tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1802 tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1805 tgen_ext32u(s, TCG_REG_R4, data_reg);
1808 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1813 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
1814 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
1815 tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1817 tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1821 static void tcg_out_test_alignment(TCGContext *s, bool is_ld,
1822 TCGReg addrlo, unsigned a_bits)
1824 unsigned a_mask = (1 << a_bits) - 1;
1825 TCGLabelQemuLdst *l = new_ldst_label(s);
1828 l->addrlo_reg = addrlo;
1830 /* We are expecting a_bits to max out at 7, much lower than TMLL. */
1831 tcg_debug_assert(a_bits < 16);
1832 tcg_out_insn(s, RI, TMLL, addrlo, a_mask);
1834 tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
1835 l->label_ptr[0] = s->code_ptr;
1838 l->raddr = tcg_splitwx_to_rx(s->code_ptr);
1841 static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1843 if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
1844 (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1848 tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg);
1849 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1851 /* "Tail call" to the helper, with the return address back inline. */
1852 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr);
1853 tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld
1854 : helper_unaligned_st));
1858 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1860 return tcg_out_fail_alignment(s, l);
1863 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1865 return tcg_out_fail_alignment(s, l);
1868 static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
1869 TCGReg *index_reg, tcg_target_long *disp)
1871 if (TARGET_LONG_BITS == 32) {
1872 tgen_ext32u(s, TCG_TMP0, *addr_reg);
1873 *addr_reg = TCG_TMP0;
1875 if (guest_base < 0x80000) {
1876 *index_reg = TCG_REG_NONE;
1879 *index_reg = TCG_GUEST_BASE_REG;
1883 #endif /* CONFIG_SOFTMMU */
1885 static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1888 MemOp opc = get_memop(oi);
1889 #ifdef CONFIG_SOFTMMU
1890 unsigned mem_index = get_mmuidx(oi);
1891 tcg_insn_unit *label_ptr;
1894 base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
1896 tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1897 label_ptr = s->code_ptr;
1900 tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1902 add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
1905 tcg_target_long disp;
1906 unsigned a_bits = get_alignment_bits(opc);
1909 tcg_out_test_alignment(s, true, addr_reg, a_bits);
1911 tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1912 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1916 static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1919 MemOp opc = get_memop(oi);
1920 #ifdef CONFIG_SOFTMMU
1921 unsigned mem_index = get_mmuidx(oi);
1922 tcg_insn_unit *label_ptr;
1925 base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
1927 tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1928 label_ptr = s->code_ptr;
1931 tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1933 add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
1936 tcg_target_long disp;
1937 unsigned a_bits = get_alignment_bits(opc);
1940 tcg_out_test_alignment(s, false, addr_reg, a_bits);
1942 tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1943 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1947 static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1949 /* Reuse the zeroing that exists for goto_ptr. */
1951 tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
1953 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
1954 tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
1958 static void tcg_out_goto_tb(TCGContext *s, int which)
1961 * Branch displacement must be aligned for atomic patching;
1962 * see if we need to add extra nop before branch
1964 if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
1967 tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
1968 set_jmp_insn_offset(s, which);
1970 set_jmp_reset_offset(s, which);
1973 # define OP_32_64(x) \
1974 case glue(glue(INDEX_op_,x),_i32): \
1975 case glue(glue(INDEX_op_,x),_i64)
1977 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1978 const TCGArg args[TCG_MAX_OP_ARGS],
1979 const int const_args[TCG_MAX_OP_ARGS])
1985 case INDEX_op_goto_ptr:
1987 tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
1991 /* ??? LLC (RXY format) is only present with the extended-immediate
1992 facility, whereas LLGC is always present. */
1993 tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
1997 /* ??? LB is no smaller than LGB, so no point to using it. */
1998 tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
2002 /* ??? LLH (RXY format) is only present with the extended-immediate
2003 facility, whereas LLGH is always present. */
2004 tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
2007 case INDEX_op_ld16s_i32:
2008 tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
2011 case INDEX_op_ld_i32:
2012 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2016 tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
2017 TCG_REG_NONE, args[2]);
2021 tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
2022 TCG_REG_NONE, args[2]);
2025 case INDEX_op_st_i32:
2026 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2029 case INDEX_op_add_i32:
2030 a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2031 if (const_args[2]) {
2034 if (a2 == (int16_t)a2) {
2035 tcg_out_insn(s, RI, AHI, a0, a2);
2038 tcg_out_insn(s, RIL, AFI, a0, a2);
2041 tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2042 } else if (a0 == a1) {
2043 tcg_out_insn(s, RR, AR, a0, a2);
2045 tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2048 case INDEX_op_sub_i32:
2049 a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2050 if (const_args[2]) {
2053 } else if (a0 == a1) {
2054 tcg_out_insn(s, RR, SR, a0, a2);
2056 tcg_out_insn(s, RRFa, SRK, a0, a1, a2);
2060 case INDEX_op_and_i32:
2061 a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2062 if (const_args[2]) {
2063 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2064 tgen_andi(s, TCG_TYPE_I32, a0, a2);
2065 } else if (a0 == a1) {
2066 tcg_out_insn(s, RR, NR, a0, a2);
2068 tcg_out_insn(s, RRFa, NRK, a0, a1, a2);
2071 case INDEX_op_or_i32:
2072 a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2073 if (const_args[2]) {
2074 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2075 tgen_ori(s, a0, a2);
2076 } else if (a0 == a1) {
2077 tcg_out_insn(s, RR, OR, a0, a2);
2079 tcg_out_insn(s, RRFa, ORK, a0, a1, a2);
2082 case INDEX_op_xor_i32:
2083 a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2084 if (const_args[2]) {
2085 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2086 tcg_out_insn(s, RIL, XILF, a0, a2);
2087 } else if (a0 == a1) {
2088 tcg_out_insn(s, RR, XR, args[0], args[2]);
2090 tcg_out_insn(s, RRFa, XRK, a0, a1, a2);
2094 case INDEX_op_andc_i32:
2095 a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2096 if (const_args[2]) {
2097 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2098 tgen_andi(s, TCG_TYPE_I32, a0, (uint32_t)~a2);
2100 tcg_out_insn(s, RRFa, NCRK, a0, a1, a2);
2103 case INDEX_op_orc_i32:
2104 a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2105 if (const_args[2]) {
2106 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2107 tgen_ori(s, a0, (uint32_t)~a2);
2109 tcg_out_insn(s, RRFa, OCRK, a0, a1, a2);
2112 case INDEX_op_eqv_i32:
2113 a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2114 if (const_args[2]) {
2115 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2116 tcg_out_insn(s, RIL, XILF, a0, ~a2);
2118 tcg_out_insn(s, RRFa, NXRK, a0, a1, a2);
2121 case INDEX_op_nand_i32:
2122 tcg_out_insn(s, RRFa, NNRK, args[0], args[1], args[2]);
2124 case INDEX_op_nor_i32:
2125 tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[2]);
2128 case INDEX_op_neg_i32:
2129 tcg_out_insn(s, RR, LCR, args[0], args[1]);
2131 case INDEX_op_not_i32:
2132 tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[1]);
2135 case INDEX_op_mul_i32:
2136 a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2137 if (const_args[2]) {
2138 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2139 if (a2 == (int16_t)a2) {
2140 tcg_out_insn(s, RI, MHI, a0, a2);
2142 tcg_out_insn(s, RIL, MSFI, a0, a2);
2144 } else if (a0 == a1) {
2145 tcg_out_insn(s, RRE, MSR, a0, a2);
2147 tcg_out_insn(s, RRFa, MSRKC, a0, a1, a2);
2151 case INDEX_op_div2_i32:
2152 tcg_debug_assert(args[0] == args[2]);
2153 tcg_debug_assert(args[1] == args[3]);
2154 tcg_debug_assert((args[1] & 1) == 0);
2155 tcg_debug_assert(args[0] == args[1] + 1);
2156 tcg_out_insn(s, RR, DR, args[1], args[4]);
2158 case INDEX_op_divu2_i32:
2159 tcg_debug_assert(args[0] == args[2]);
2160 tcg_debug_assert(args[1] == args[3]);
2161 tcg_debug_assert((args[1] & 1) == 0);
2162 tcg_debug_assert(args[0] == args[1] + 1);
2163 tcg_out_insn(s, RRE, DLR, args[1], args[4]);
2166 case INDEX_op_shl_i32:
2170 a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2172 if (const_args[2]) {
2173 tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
2175 tcg_out_sh32(s, op, a0, a2, 0);
2178 /* Using tcg_out_sh64 here for the format; it is a 32-bit shift. */
2179 if (const_args[2]) {
2180 tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
2182 tcg_out_sh64(s, op2, a0, a1, a2, 0);
2186 case INDEX_op_shr_i32:
2190 case INDEX_op_sar_i32:
2195 case INDEX_op_rotl_i32:
2196 /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */
2197 if (const_args[2]) {
2198 tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
2200 tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
2203 case INDEX_op_rotr_i32:
2204 if (const_args[2]) {
2205 tcg_out_sh64(s, RSY_RLL, args[0], args[1],
2206 TCG_REG_NONE, (32 - args[2]) & 31);
2208 tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2209 tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
2213 case INDEX_op_ext8s_i32:
2214 tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
2216 case INDEX_op_ext16s_i32:
2217 tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
2219 case INDEX_op_ext8u_i32:
2220 tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
2222 case INDEX_op_ext16u_i32:
2223 tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
2226 case INDEX_op_bswap16_i32:
2227 a0 = args[0], a1 = args[1], a2 = args[2];
2228 tcg_out_insn(s, RRE, LRVR, a0, a1);
2229 if (a2 & TCG_BSWAP_OS) {
2230 tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
2232 tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
2235 case INDEX_op_bswap16_i64:
2236 a0 = args[0], a1 = args[1], a2 = args[2];
2237 tcg_out_insn(s, RRE, LRVGR, a0, a1);
2238 if (a2 & TCG_BSWAP_OS) {
2239 tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
2241 tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
2245 case INDEX_op_bswap32_i32:
2246 tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
2248 case INDEX_op_bswap32_i64:
2249 a0 = args[0], a1 = args[1], a2 = args[2];
2250 tcg_out_insn(s, RRE, LRVR, a0, a1);
2251 if (a2 & TCG_BSWAP_OS) {
2252 tgen_ext32s(s, a0, a0);
2253 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2254 tgen_ext32u(s, a0, a0);
2258 case INDEX_op_add2_i32:
2259 if (const_args[4]) {
2260 tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
2262 tcg_out_insn(s, RR, ALR, args[0], args[4]);
2264 tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
2266 case INDEX_op_sub2_i32:
2267 if (const_args[4]) {
2268 tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
2270 tcg_out_insn(s, RR, SLR, args[0], args[4]);
2272 tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
2276 tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
2279 case INDEX_op_brcond_i32:
2280 tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
2281 args[1], const_args[1], arg_label(args[3]));
2283 case INDEX_op_setcond_i32:
2284 tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2285 args[2], const_args[2]);
2287 case INDEX_op_movcond_i32:
2288 tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
2289 args[2], const_args[2], args[3], const_args[3], args[4]);
2292 case INDEX_op_qemu_ld_i32:
2293 /* ??? Technically we can use a non-extending instruction. */
2294 case INDEX_op_qemu_ld_i64:
2295 tcg_out_qemu_ld(s, args[0], args[1], args[2]);
2297 case INDEX_op_qemu_st_i32:
2298 case INDEX_op_qemu_st_i64:
2299 tcg_out_qemu_st(s, args[0], args[1], args[2]);
2302 case INDEX_op_ld16s_i64:
2303 tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
2305 case INDEX_op_ld32u_i64:
2306 tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
2308 case INDEX_op_ld32s_i64:
2309 tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
2311 case INDEX_op_ld_i64:
2312 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2315 case INDEX_op_st32_i64:
2316 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2318 case INDEX_op_st_i64:
2319 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2322 case INDEX_op_add_i64:
2323 a0 = args[0], a1 = args[1], a2 = args[2];
2324 if (const_args[2]) {
2327 if (a2 == (int16_t)a2) {
2328 tcg_out_insn(s, RI, AGHI, a0, a2);
2331 if (a2 == (int32_t)a2) {
2332 tcg_out_insn(s, RIL, AGFI, a0, a2);
2335 if (a2 == (uint32_t)a2) {
2336 tcg_out_insn(s, RIL, ALGFI, a0, a2);
2339 if (-a2 == (uint32_t)-a2) {
2340 tcg_out_insn(s, RIL, SLGFI, a0, -a2);
2344 tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2345 } else if (a0 == a1) {
2346 tcg_out_insn(s, RRE, AGR, a0, a2);
2348 tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2351 case INDEX_op_sub_i64:
2352 a0 = args[0], a1 = args[1], a2 = args[2];
2353 if (const_args[2]) {
2357 tcg_out_insn(s, RRFa, SGRK, a0, a1, a2);
2361 case INDEX_op_and_i64:
2362 a0 = args[0], a1 = args[1], a2 = args[2];
2363 if (const_args[2]) {
2364 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2365 tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
2367 tcg_out_insn(s, RRFa, NGRK, a0, a1, a2);
2370 case INDEX_op_or_i64:
2371 a0 = args[0], a1 = args[1], a2 = args[2];
2372 if (const_args[2]) {
2373 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2374 tgen_ori(s, a0, a2);
2376 tcg_out_insn(s, RRFa, OGRK, a0, a1, a2);
2379 case INDEX_op_xor_i64:
2380 a0 = args[0], a1 = args[1], a2 = args[2];
2381 if (const_args[2]) {
2382 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2383 tgen_xori(s, a0, a2);
2385 tcg_out_insn(s, RRFa, XGRK, a0, a1, a2);
2389 case INDEX_op_andc_i64:
2390 a0 = args[0], a1 = args[1], a2 = args[2];
2391 if (const_args[2]) {
2392 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2393 tgen_andi(s, TCG_TYPE_I64, a0, ~a2);
2395 tcg_out_insn(s, RRFa, NCGRK, a0, a1, a2);
2398 case INDEX_op_orc_i64:
2399 a0 = args[0], a1 = args[1], a2 = args[2];
2400 if (const_args[2]) {
2401 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2402 tgen_ori(s, a0, ~a2);
2404 tcg_out_insn(s, RRFa, OCGRK, a0, a1, a2);
2407 case INDEX_op_eqv_i64:
2408 a0 = args[0], a1 = args[1], a2 = args[2];
2409 if (const_args[2]) {
2410 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2411 tgen_xori(s, a0, ~a2);
2413 tcg_out_insn(s, RRFa, NXGRK, a0, a1, a2);
2416 case INDEX_op_nand_i64:
2417 tcg_out_insn(s, RRFa, NNGRK, args[0], args[1], args[2]);
2419 case INDEX_op_nor_i64:
2420 tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[2]);
2423 case INDEX_op_neg_i64:
2424 tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2426 case INDEX_op_not_i64:
2427 tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[1]);
2429 case INDEX_op_bswap64_i64:
2430 tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2433 case INDEX_op_mul_i64:
2434 a0 = args[0], a1 = args[1], a2 = args[2];
2435 if (const_args[2]) {
2436 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2437 if (a2 == (int16_t)a2) {
2438 tcg_out_insn(s, RI, MGHI, a0, a2);
2440 tcg_out_insn(s, RIL, MSGFI, a0, a2);
2442 } else if (a0 == a1) {
2443 tcg_out_insn(s, RRE, MSGR, a0, a2);
2445 tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2);
2449 case INDEX_op_div2_i64:
2451 * ??? We get an unnecessary sign-extension of the dividend
2452 * into op0 with this definition, but as we do in fact always
2453 * produce both quotient and remainder using INDEX_op_div_i64
2454 * instead requires jumping through even more hoops.
2456 tcg_debug_assert(args[0] == args[2]);
2457 tcg_debug_assert(args[1] == args[3]);
2458 tcg_debug_assert((args[1] & 1) == 0);
2459 tcg_debug_assert(args[0] == args[1] + 1);
2460 tcg_out_insn(s, RRE, DSGR, args[1], args[4]);
2462 case INDEX_op_divu2_i64:
2463 tcg_debug_assert(args[0] == args[2]);
2464 tcg_debug_assert(args[1] == args[3]);
2465 tcg_debug_assert((args[1] & 1) == 0);
2466 tcg_debug_assert(args[0] == args[1] + 1);
2467 tcg_out_insn(s, RRE, DLGR, args[1], args[4]);
2469 case INDEX_op_mulu2_i64:
2470 tcg_debug_assert(args[0] == args[2]);
2471 tcg_debug_assert((args[1] & 1) == 0);
2472 tcg_debug_assert(args[0] == args[1] + 1);
2473 tcg_out_insn(s, RRE, MLGR, args[1], args[3]);
2475 case INDEX_op_muls2_i64:
2476 tcg_debug_assert((args[1] & 1) == 0);
2477 tcg_debug_assert(args[0] == args[1] + 1);
2478 tcg_out_insn(s, RRFa, MGRK, args[1], args[2], args[3]);
2481 case INDEX_op_shl_i64:
2484 if (const_args[2]) {
2485 tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2487 tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2490 case INDEX_op_shr_i64:
2493 case INDEX_op_sar_i64:
2497 case INDEX_op_rotl_i64:
2498 if (const_args[2]) {
2499 tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2500 TCG_REG_NONE, args[2]);
2502 tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2505 case INDEX_op_rotr_i64:
2506 if (const_args[2]) {
2507 tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2508 TCG_REG_NONE, (64 - args[2]) & 63);
2510 /* We can use the smaller 32-bit negate because only the
2511 low 6 bits are examined for the rotate. */
2512 tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2513 tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2517 case INDEX_op_ext8s_i64:
2518 tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
2520 case INDEX_op_ext16s_i64:
2521 tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
2523 case INDEX_op_ext_i32_i64:
2524 case INDEX_op_ext32s_i64:
2525 tgen_ext32s(s, args[0], args[1]);
2527 case INDEX_op_ext8u_i64:
2528 tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
2530 case INDEX_op_ext16u_i64:
2531 tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
2533 case INDEX_op_extu_i32_i64:
2534 case INDEX_op_ext32u_i64:
2535 tgen_ext32u(s, args[0], args[1]);
2538 case INDEX_op_add2_i64:
2539 if (const_args[4]) {
2540 if ((int64_t)args[4] >= 0) {
2541 tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2543 tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2546 tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2548 tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2550 case INDEX_op_sub2_i64:
2551 if (const_args[4]) {
2552 if ((int64_t)args[4] >= 0) {
2553 tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2555 tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2558 tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2560 tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2563 case INDEX_op_brcond_i64:
2564 tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2565 args[1], const_args[1], arg_label(args[3]));
2567 case INDEX_op_setcond_i64:
2568 tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2569 args[2], const_args[2]);
2571 case INDEX_op_movcond_i64:
2572 tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2573 args[2], const_args[2], args[3], const_args[3], args[4]);
2577 a0 = args[0], a1 = args[1], a2 = args[2];
2578 if (const_args[1]) {
2579 tgen_deposit(s, a0, a2, args[3], args[4], 1);
2581 /* Since we can't support "0Z" as a constraint, we allow a1 in
2582 any register. Fix things up as if a matching constraint. */
2584 TCGType type = (opc == INDEX_op_deposit_i64);
2586 tcg_out_mov(s, type, TCG_TMP0, a2);
2589 tcg_out_mov(s, type, a0, a1);
2591 tgen_deposit(s, a0, a2, args[3], args[4], 0);
2596 tgen_extract(s, args[0], args[1], args[2], args[3]);
2599 case INDEX_op_clz_i64:
2600 tgen_clz(s, args[0], args[1], args[2], const_args[2]);
2603 case INDEX_op_ctpop_i32:
2604 tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
2606 case INDEX_op_ctpop_i64:
2607 tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
2611 /* The host memory model is quite strong, we simply need to
2612 serialize the instruction stream. */
2613 if (args[0] & TCG_MO_ST_LD) {
2614 /* fast-bcr-serialization facility (45) is present */
2615 tcg_out_insn(s, RR, BCR, 14, 0);
2619 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2620 case INDEX_op_mov_i64:
2621 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2622 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */
2623 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */
2629 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2630 TCGReg dst, TCGReg src)
2632 if (is_general_reg(src)) {
2633 /* Replicate general register into two MO_64. */
2634 tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
2635 if (vece == MO_64) {
2642 * Recall that the "standard" integer, within a vector, is the
2643 * rightmost element of the leftmost doubleword, a-la VLLEZ.
2645 tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
2649 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2650 TCGReg dst, TCGReg base, intptr_t offset)
2652 tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
2656 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2657 TCGReg dst, int64_t val)
2659 int i, mask, msb, lsb;
2661 /* Look for int16_t elements. */
2662 if (vece <= MO_16 ||
2663 (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
2664 tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
2668 /* Look for bit masks. */
2669 if (vece == MO_32) {
2670 if (risbg_mask((int32_t)val)) {
2671 /* Handle wraparound by swapping msb and lsb. */
2672 if ((val & 0x80000001u) == 0x80000001u) {
2673 msb = 32 - ctz32(~val);
2674 lsb = clz32(~val) - 1;
2677 lsb = 31 - ctz32(val);
2679 tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32);
2683 if (risbg_mask(val)) {
2684 /* Handle wraparound by swapping msb and lsb. */
2685 if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
2686 /* Handle wraparound by swapping msb and lsb. */
2687 msb = 64 - ctz64(~val);
2688 lsb = clz64(~val) - 1;
2691 lsb = 63 - ctz64(val);
2693 tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64);
2698 /* Look for all bytes 0x00 or 0xff. */
2699 for (i = mask = 0; i < 8; i++) {
2700 uint8_t byte = val >> (i * 8);
2703 } else if (byte != 0) {
2708 tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
2712 /* Otherwise, stuff it in the constant pool. */
2713 tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
2714 new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
2715 tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
2718 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2719 unsigned vecl, unsigned vece,
2720 const TCGArg args[TCG_MAX_OP_ARGS],
2721 const int const_args[TCG_MAX_OP_ARGS])
2723 TCGType type = vecl + TCG_TYPE_V64;
2724 TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
2727 case INDEX_op_ld_vec:
2728 tcg_out_ld(s, type, a0, a1, a2);
2730 case INDEX_op_st_vec:
2731 tcg_out_st(s, type, a0, a1, a2);
2733 case INDEX_op_dupm_vec:
2734 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2737 case INDEX_op_abs_vec:
2738 tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
2740 case INDEX_op_neg_vec:
2741 tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
2743 case INDEX_op_not_vec:
2744 tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
2747 case INDEX_op_add_vec:
2748 tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
2750 case INDEX_op_sub_vec:
2751 tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
2753 case INDEX_op_and_vec:
2754 tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
2756 case INDEX_op_andc_vec:
2757 tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
2759 case INDEX_op_mul_vec:
2760 tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
2762 case INDEX_op_or_vec:
2763 tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
2765 case INDEX_op_orc_vec:
2766 tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
2768 case INDEX_op_xor_vec:
2769 tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
2771 case INDEX_op_nand_vec:
2772 tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
2774 case INDEX_op_nor_vec:
2775 tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
2777 case INDEX_op_eqv_vec:
2778 tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
2781 case INDEX_op_shli_vec:
2782 tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
2784 case INDEX_op_shri_vec:
2785 tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
2787 case INDEX_op_sari_vec:
2788 tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
2790 case INDEX_op_rotli_vec:
2791 tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
2793 case INDEX_op_shls_vec:
2794 tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
2796 case INDEX_op_shrs_vec:
2797 tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
2799 case INDEX_op_sars_vec:
2800 tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
2802 case INDEX_op_rotls_vec:
2803 tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
2805 case INDEX_op_shlv_vec:
2806 tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
2808 case INDEX_op_shrv_vec:
2809 tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
2811 case INDEX_op_sarv_vec:
2812 tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
2814 case INDEX_op_rotlv_vec:
2815 tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
2818 case INDEX_op_smin_vec:
2819 tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
2821 case INDEX_op_smax_vec:
2822 tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
2824 case INDEX_op_umin_vec:
2825 tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
2827 case INDEX_op_umax_vec:
2828 tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
2831 case INDEX_op_bitsel_vec:
2832 tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1);
2835 case INDEX_op_cmp_vec:
2836 switch ((TCGCond)args[3]) {
2838 tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
2841 tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
2844 tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
2847 g_assert_not_reached();
2851 case INDEX_op_s390_vuph_vec:
2852 tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
2854 case INDEX_op_s390_vupl_vec:
2855 tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
2857 case INDEX_op_s390_vpks_vec:
2858 tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
2861 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
2862 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
2864 g_assert_not_reached();
2868 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2871 case INDEX_op_abs_vec:
2872 case INDEX_op_add_vec:
2873 case INDEX_op_and_vec:
2874 case INDEX_op_andc_vec:
2875 case INDEX_op_bitsel_vec:
2876 case INDEX_op_eqv_vec:
2877 case INDEX_op_nand_vec:
2878 case INDEX_op_neg_vec:
2879 case INDEX_op_nor_vec:
2880 case INDEX_op_not_vec:
2881 case INDEX_op_or_vec:
2882 case INDEX_op_orc_vec:
2883 case INDEX_op_rotli_vec:
2884 case INDEX_op_rotls_vec:
2885 case INDEX_op_rotlv_vec:
2886 case INDEX_op_sari_vec:
2887 case INDEX_op_sars_vec:
2888 case INDEX_op_sarv_vec:
2889 case INDEX_op_shli_vec:
2890 case INDEX_op_shls_vec:
2891 case INDEX_op_shlv_vec:
2892 case INDEX_op_shri_vec:
2893 case INDEX_op_shrs_vec:
2894 case INDEX_op_shrv_vec:
2895 case INDEX_op_smax_vec:
2896 case INDEX_op_smin_vec:
2897 case INDEX_op_sub_vec:
2898 case INDEX_op_umax_vec:
2899 case INDEX_op_umin_vec:
2900 case INDEX_op_xor_vec:
2902 case INDEX_op_cmp_vec:
2903 case INDEX_op_cmpsel_vec:
2904 case INDEX_op_rotrv_vec:
2906 case INDEX_op_mul_vec:
2907 return vece < MO_64;
2908 case INDEX_op_ssadd_vec:
2909 case INDEX_op_sssub_vec:
2910 return vece < MO_64 ? -1 : 0;
2916 static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
2917 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2919 bool need_swap = false, need_inv = false;
2937 need_swap = need_inv = true;
2940 g_assert_not_reached();
2944 cond = tcg_invert_cond(cond);
2948 t1 = v1, v1 = v2, v2 = t1;
2949 cond = tcg_swap_cond(cond);
2952 vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
2953 tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
2958 static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
2959 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2961 if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
2962 tcg_gen_not_vec(vece, v0, v0);
2966 static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
2967 TCGv_vec c1, TCGv_vec c2,
2968 TCGv_vec v3, TCGv_vec v4, TCGCond cond)
2970 TCGv_vec t = tcg_temp_new_vec(type);
2972 if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
2973 /* Invert the sense of the compare by swapping arguments. */
2974 tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
2976 tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
2978 tcg_temp_free_vec(t);
2981 static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
2982 TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
2984 TCGv_vec h1 = tcg_temp_new_vec(type);
2985 TCGv_vec h2 = tcg_temp_new_vec(type);
2986 TCGv_vec l1 = tcg_temp_new_vec(type);
2987 TCGv_vec l2 = tcg_temp_new_vec(type);
2989 tcg_debug_assert (vece < MO_64);
2991 /* Unpack with sign-extension. */
2992 vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
2993 tcgv_vec_arg(h1), tcgv_vec_arg(v1));
2994 vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
2995 tcgv_vec_arg(h2), tcgv_vec_arg(v2));
2997 vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
2998 tcgv_vec_arg(l1), tcgv_vec_arg(v1));
2999 vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3000 tcgv_vec_arg(l2), tcgv_vec_arg(v2));
3002 /* Arithmetic on a wider element size. */
3003 vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
3004 tcgv_vec_arg(h1), tcgv_vec_arg(h2));
3005 vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
3006 tcgv_vec_arg(l1), tcgv_vec_arg(l2));
3008 /* Pack with saturation. */
3009 vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
3010 tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
3012 tcg_temp_free_vec(h1);
3013 tcg_temp_free_vec(h2);
3014 tcg_temp_free_vec(l1);
3015 tcg_temp_free_vec(l2);
3018 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3022 TCGv_vec v0, v1, v2, v3, v4, t0;
3025 v0 = temp_tcgv_vec(arg_temp(a0));
3026 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3027 v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3030 case INDEX_op_cmp_vec:
3031 expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3034 case INDEX_op_cmpsel_vec:
3035 v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3036 v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3037 expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
3040 case INDEX_op_rotrv_vec:
3041 t0 = tcg_temp_new_vec(type);
3042 tcg_gen_neg_vec(vece, t0, v2);
3043 tcg_gen_rotlv_vec(vece, v0, v1, t0);
3044 tcg_temp_free_vec(t0);
3047 case INDEX_op_ssadd_vec:
3048 expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
3050 case INDEX_op_sssub_vec:
3051 expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
3055 g_assert_not_reached();
3060 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3063 case INDEX_op_goto_ptr:
3066 case INDEX_op_ld8u_i32:
3067 case INDEX_op_ld8u_i64:
3068 case INDEX_op_ld8s_i32:
3069 case INDEX_op_ld8s_i64:
3070 case INDEX_op_ld16u_i32:
3071 case INDEX_op_ld16u_i64:
3072 case INDEX_op_ld16s_i32:
3073 case INDEX_op_ld16s_i64:
3074 case INDEX_op_ld_i32:
3075 case INDEX_op_ld32u_i64:
3076 case INDEX_op_ld32s_i64:
3077 case INDEX_op_ld_i64:
3078 return C_O1_I1(r, r);
3080 case INDEX_op_st8_i32:
3081 case INDEX_op_st8_i64:
3082 case INDEX_op_st16_i32:
3083 case INDEX_op_st16_i64:
3084 case INDEX_op_st_i32:
3085 case INDEX_op_st32_i64:
3086 case INDEX_op_st_i64:
3087 return C_O0_I2(r, r);
3089 case INDEX_op_add_i32:
3090 case INDEX_op_add_i64:
3091 case INDEX_op_shl_i64:
3092 case INDEX_op_shr_i64:
3093 case INDEX_op_sar_i64:
3094 case INDEX_op_rotl_i32:
3095 case INDEX_op_rotl_i64:
3096 case INDEX_op_rotr_i32:
3097 case INDEX_op_rotr_i64:
3098 case INDEX_op_setcond_i32:
3099 return C_O1_I2(r, r, ri);
3100 case INDEX_op_setcond_i64:
3101 return C_O1_I2(r, r, rA);
3103 case INDEX_op_clz_i64:
3104 return C_O1_I2(r, r, rI);
3106 case INDEX_op_sub_i32:
3107 case INDEX_op_sub_i64:
3108 case INDEX_op_and_i32:
3109 case INDEX_op_or_i32:
3110 case INDEX_op_xor_i32:
3111 return C_O1_I2(r, r, ri);
3112 case INDEX_op_and_i64:
3113 return C_O1_I2(r, r, rNKR);
3114 case INDEX_op_or_i64:
3115 case INDEX_op_xor_i64:
3116 return C_O1_I2(r, r, rK);
3118 case INDEX_op_andc_i32:
3119 case INDEX_op_orc_i32:
3120 case INDEX_op_eqv_i32:
3121 return C_O1_I2(r, r, ri);
3122 case INDEX_op_andc_i64:
3123 return C_O1_I2(r, r, rKR);
3124 case INDEX_op_orc_i64:
3125 case INDEX_op_eqv_i64:
3126 return C_O1_I2(r, r, rNK);
3128 case INDEX_op_nand_i32:
3129 case INDEX_op_nand_i64:
3130 case INDEX_op_nor_i32:
3131 case INDEX_op_nor_i64:
3132 return C_O1_I2(r, r, r);
3134 case INDEX_op_mul_i32:
3135 return (HAVE_FACILITY(MISC_INSN_EXT2)
3137 : C_O1_I2(r, 0, ri));
3138 case INDEX_op_mul_i64:
3139 return (HAVE_FACILITY(MISC_INSN_EXT2)
3141 : C_O1_I2(r, 0, rJ));
3143 case INDEX_op_shl_i32:
3144 case INDEX_op_shr_i32:
3145 case INDEX_op_sar_i32:
3146 return C_O1_I2(r, r, ri);
3148 case INDEX_op_brcond_i32:
3149 return C_O0_I2(r, ri);
3150 case INDEX_op_brcond_i64:
3151 return C_O0_I2(r, rA);
3153 case INDEX_op_bswap16_i32:
3154 case INDEX_op_bswap16_i64:
3155 case INDEX_op_bswap32_i32:
3156 case INDEX_op_bswap32_i64:
3157 case INDEX_op_bswap64_i64:
3158 case INDEX_op_neg_i32:
3159 case INDEX_op_neg_i64:
3160 case INDEX_op_not_i32:
3161 case INDEX_op_not_i64:
3162 case INDEX_op_ext8s_i32:
3163 case INDEX_op_ext8s_i64:
3164 case INDEX_op_ext8u_i32:
3165 case INDEX_op_ext8u_i64:
3166 case INDEX_op_ext16s_i32:
3167 case INDEX_op_ext16s_i64:
3168 case INDEX_op_ext16u_i32:
3169 case INDEX_op_ext16u_i64:
3170 case INDEX_op_ext32s_i64:
3171 case INDEX_op_ext32u_i64:
3172 case INDEX_op_ext_i32_i64:
3173 case INDEX_op_extu_i32_i64:
3174 case INDEX_op_extract_i32:
3175 case INDEX_op_extract_i64:
3176 case INDEX_op_ctpop_i32:
3177 case INDEX_op_ctpop_i64:
3178 return C_O1_I1(r, r);
3180 case INDEX_op_qemu_ld_i32:
3181 case INDEX_op_qemu_ld_i64:
3182 return C_O1_I1(r, L);
3183 case INDEX_op_qemu_st_i64:
3184 case INDEX_op_qemu_st_i32:
3185 return C_O0_I2(L, L);
3187 case INDEX_op_deposit_i32:
3188 case INDEX_op_deposit_i64:
3189 return C_O1_I2(r, rZ, r);
3191 case INDEX_op_movcond_i32:
3192 return C_O1_I4(r, r, ri, rI, r);
3193 case INDEX_op_movcond_i64:
3194 return C_O1_I4(r, r, rA, rI, r);
3196 case INDEX_op_div2_i32:
3197 case INDEX_op_div2_i64:
3198 case INDEX_op_divu2_i32:
3199 case INDEX_op_divu2_i64:
3200 return C_O2_I3(o, m, 0, 1, r);
3202 case INDEX_op_mulu2_i64:
3203 return C_O2_I2(o, m, 0, r);
3204 case INDEX_op_muls2_i64:
3205 return C_O2_I2(o, m, r, r);
3207 case INDEX_op_add2_i32:
3208 case INDEX_op_sub2_i32:
3209 return C_O2_I4(r, r, 0, 1, ri, r);
3211 case INDEX_op_add2_i64:
3212 case INDEX_op_sub2_i64:
3213 return C_O2_I4(r, r, 0, 1, rA, r);
3215 case INDEX_op_st_vec:
3216 return C_O0_I2(v, r);
3217 case INDEX_op_ld_vec:
3218 case INDEX_op_dupm_vec:
3219 return C_O1_I1(v, r);
3220 case INDEX_op_dup_vec:
3221 return C_O1_I1(v, vr);
3222 case INDEX_op_abs_vec:
3223 case INDEX_op_neg_vec:
3224 case INDEX_op_not_vec:
3225 case INDEX_op_rotli_vec:
3226 case INDEX_op_sari_vec:
3227 case INDEX_op_shli_vec:
3228 case INDEX_op_shri_vec:
3229 case INDEX_op_s390_vuph_vec:
3230 case INDEX_op_s390_vupl_vec:
3231 return C_O1_I1(v, v);
3232 case INDEX_op_add_vec:
3233 case INDEX_op_sub_vec:
3234 case INDEX_op_and_vec:
3235 case INDEX_op_andc_vec:
3236 case INDEX_op_or_vec:
3237 case INDEX_op_orc_vec:
3238 case INDEX_op_xor_vec:
3239 case INDEX_op_nand_vec:
3240 case INDEX_op_nor_vec:
3241 case INDEX_op_eqv_vec:
3242 case INDEX_op_cmp_vec:
3243 case INDEX_op_mul_vec:
3244 case INDEX_op_rotlv_vec:
3245 case INDEX_op_rotrv_vec:
3246 case INDEX_op_shlv_vec:
3247 case INDEX_op_shrv_vec:
3248 case INDEX_op_sarv_vec:
3249 case INDEX_op_smax_vec:
3250 case INDEX_op_smin_vec:
3251 case INDEX_op_umax_vec:
3252 case INDEX_op_umin_vec:
3253 case INDEX_op_s390_vpks_vec:
3254 return C_O1_I2(v, v, v);
3255 case INDEX_op_rotls_vec:
3256 case INDEX_op_shls_vec:
3257 case INDEX_op_shrs_vec:
3258 case INDEX_op_sars_vec:
3259 return C_O1_I2(v, v, r);
3260 case INDEX_op_bitsel_vec:
3261 return C_O1_I3(v, v, v, v);
3264 g_assert_not_reached();
3269 * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
3270 * Some distros have fixed this up locally, others have not.
3272 #ifndef HWCAP_S390_VXRS
3273 #define HWCAP_S390_VXRS 2048
3276 static void query_s390_facilities(void)
3278 unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3281 /* Is STORE FACILITY LIST EXTENDED available? Honestly, I believe this
3282 is present on all 64-bit systems, but let's check for it anyway. */
3283 if (hwcap & HWCAP_S390_STFLE) {
3284 register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
3285 register void *r1 __asm__("1") = s390_facilities;
3288 asm volatile(".word 0xb2b0,0x1000"
3289 : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
3293 * Use of vector registers requires os support beyond the facility bit.
3294 * If the kernel does not advertise support, disable the facility bits.
3295 * There is nothing else we currently care about in the 3rd word, so
3296 * disable VECTOR with one store.
3298 if (!(hwcap & HWCAP_S390_VXRS)) {
3299 s390_facilities[2] = 0;
3303 * Minimum supported cpu revision is z196.
3304 * Check for all required facilities.
3305 * ZARCH_ACTIVE is done via preprocessor check for 64-bit.
3307 if (!HAVE_FACILITY(LONG_DISP)) {
3308 which = "long-displacement";
3311 if (!HAVE_FACILITY(EXT_IMM)) {
3312 which = "extended-immediate";
3315 if (!HAVE_FACILITY(GEN_INST_EXT)) {
3316 which = "general-instructions-extension";
3320 * Facility 45 is a big bin that contains: distinct-operands,
3321 * fast-BCR-serialization, high-word, population-count,
3322 * interlocked-access-1, and load/store-on-condition-1
3324 if (!HAVE_FACILITY(45)) {
3331 error_report("%s: missing required facility %s", __func__, which);
3335 static void tcg_target_init(TCGContext *s)
3337 query_s390_facilities();
3339 tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
3340 tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
3341 if (HAVE_FACILITY(VECTOR)) {
3342 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3343 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3346 tcg_target_call_clobber_regs = 0;
3347 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3348 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
3349 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3350 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3351 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3352 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3353 /* The r6 register is technically call-saved, but it's also a parameter
3354 register, so it can get killed by setup for the qemu_st helper. */
3355 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3356 /* The return register can be considered call-clobbered. */
3357 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
3359 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3360 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3361 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3362 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3363 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3364 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3365 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3366 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3367 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3368 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3369 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3370 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3371 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
3372 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
3373 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
3374 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
3375 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
3376 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
3377 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
3378 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
3379 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
3380 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
3381 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
3382 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
3384 s->reserved_regs = 0;
3385 tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
3386 /* XXX many insns can't be used with R0, so we better avoid it for now */
3387 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
3388 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3391 #define FRAME_SIZE ((int)(TCG_TARGET_CALL_STACK_OFFSET \
3392 + TCG_STATIC_CALL_ARGS_SIZE \
3393 + CPU_TEMP_BUF_NLONGS * sizeof(long)))
3395 static void tcg_target_qemu_prologue(TCGContext *s)
3397 /* stmg %r6,%r15,48(%r15) (save registers) */
3398 tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
3400 /* aghi %r15,-frame_size */
3401 tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
3403 tcg_set_frame(s, TCG_REG_CALL_STACK,
3404 TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
3405 CPU_TEMP_BUF_NLONGS * sizeof(long));
3407 #ifndef CONFIG_SOFTMMU
3408 if (guest_base >= 0x80000) {
3409 tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
3410 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3414 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3416 /* br %r3 (go to TB) */
3417 tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
3420 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3421 * and fall through to the rest of the epilogue.
3423 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3424 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
3427 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3429 /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
3430 tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
3433 /* br %r14 (return) */
3434 tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
3437 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3439 memset(p, 0x07, count * sizeof(tcg_insn_unit));
3444 uint8_t fde_def_cfa[4];
3445 uint8_t fde_reg_ofs[18];
3448 /* We're expecting a 2 byte uleb128 encoded value. */
3449 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3451 #define ELF_HOST_MACHINE EM_S390
3453 static const DebugFrame debug_frame = {
3454 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3457 .h.cie.code_align = 1,
3458 .h.cie.data_align = 8, /* sleb128 8 */
3459 .h.cie.return_column = TCG_REG_R14,
3461 /* Total FDE size does not include the "len" member. */
3462 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3465 12, TCG_REG_CALL_STACK, /* DW_CFA_def_cfa %r15, ... */
3466 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
3470 0x86, 6, /* DW_CFA_offset, %r6, 48 */
3471 0x87, 7, /* DW_CFA_offset, %r7, 56 */
3472 0x88, 8, /* DW_CFA_offset, %r8, 64 */
3473 0x89, 9, /* DW_CFA_offset, %r92, 72 */
3474 0x8a, 10, /* DW_CFA_offset, %r10, 80 */
3475 0x8b, 11, /* DW_CFA_offset, %r11, 88 */
3476 0x8c, 12, /* DW_CFA_offset, %r12, 96 */
3477 0x8d, 13, /* DW_CFA_offset, %r13, 104 */
3478 0x8e, 14, /* DW_CFA_offset, %r14, 112 */
3482 void tcg_register_jit(const void *buf, size_t buf_size)
3484 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));