]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/sparc64/tcg-target.c.inc
tcg: Split out tcg_out_goto_tb
[mirror_qemu.git] / tcg / sparc64 / tcg-target.c.inc
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* We only support generating code for 64-bit mode. */
26 #ifndef __arch64__
27 #error "unsupported code generation mode"
28 #endif
29
30 #include "../tcg-pool.c.inc"
31
32 #ifdef CONFIG_DEBUG_TCG
33 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
34 "%g0",
35 "%g1",
36 "%g2",
37 "%g3",
38 "%g4",
39 "%g5",
40 "%g6",
41 "%g7",
42 "%o0",
43 "%o1",
44 "%o2",
45 "%o3",
46 "%o4",
47 "%o5",
48 "%o6",
49 "%o7",
50 "%l0",
51 "%l1",
52 "%l2",
53 "%l3",
54 "%l4",
55 "%l5",
56 "%l6",
57 "%l7",
58 "%i0",
59 "%i1",
60 "%i2",
61 "%i3",
62 "%i4",
63 "%i5",
64 "%i6",
65 "%i7",
66 };
67 #endif
68
69 #define TCG_CT_CONST_S11 0x100
70 #define TCG_CT_CONST_S13 0x200
71 #define TCG_CT_CONST_ZERO 0x400
72
73 /*
74 * For softmmu, we need to avoid conflicts with the first 3
75 * argument registers to perform the tlb lookup, and to call
76 * the helper function.
77 */
78 #ifdef CONFIG_SOFTMMU
79 #define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_O0, 3)
80 #else
81 #define SOFTMMU_RESERVE_REGS 0
82 #endif
83 #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
84 #define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
85
86 /* Define some temporary registers. T2 is used for constant generation. */
87 #define TCG_REG_T1 TCG_REG_G1
88 #define TCG_REG_T2 TCG_REG_O7
89
90 #ifndef CONFIG_SOFTMMU
91 # define TCG_GUEST_BASE_REG TCG_REG_I5
92 #endif
93
94 #define TCG_REG_TB TCG_REG_I1
95 #define USE_REG_TB (sizeof(void *) > 4)
96
97 static const int tcg_target_reg_alloc_order[] = {
98 TCG_REG_L0,
99 TCG_REG_L1,
100 TCG_REG_L2,
101 TCG_REG_L3,
102 TCG_REG_L4,
103 TCG_REG_L5,
104 TCG_REG_L6,
105 TCG_REG_L7,
106
107 TCG_REG_I0,
108 TCG_REG_I1,
109 TCG_REG_I2,
110 TCG_REG_I3,
111 TCG_REG_I4,
112 TCG_REG_I5,
113
114 TCG_REG_G2,
115 TCG_REG_G3,
116 TCG_REG_G4,
117 TCG_REG_G5,
118
119 TCG_REG_O0,
120 TCG_REG_O1,
121 TCG_REG_O2,
122 TCG_REG_O3,
123 TCG_REG_O4,
124 TCG_REG_O5,
125 };
126
127 static const int tcg_target_call_iarg_regs[6] = {
128 TCG_REG_O0,
129 TCG_REG_O1,
130 TCG_REG_O2,
131 TCG_REG_O3,
132 TCG_REG_O4,
133 TCG_REG_O5,
134 };
135
136 static const int tcg_target_call_oarg_regs[] = {
137 TCG_REG_O0,
138 TCG_REG_O1,
139 TCG_REG_O2,
140 TCG_REG_O3,
141 };
142
143 #define INSN_OP(x) ((x) << 30)
144 #define INSN_OP2(x) ((x) << 22)
145 #define INSN_OP3(x) ((x) << 19)
146 #define INSN_OPF(x) ((x) << 5)
147 #define INSN_RD(x) ((x) << 25)
148 #define INSN_RS1(x) ((x) << 14)
149 #define INSN_RS2(x) (x)
150 #define INSN_ASI(x) ((x) << 5)
151
152 #define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff))
153 #define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
154 #define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
155 #define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20))
156 #define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
157 #define INSN_COND(x) ((x) << 25)
158
159 #define COND_N 0x0
160 #define COND_E 0x1
161 #define COND_LE 0x2
162 #define COND_L 0x3
163 #define COND_LEU 0x4
164 #define COND_CS 0x5
165 #define COND_NEG 0x6
166 #define COND_VS 0x7
167 #define COND_A 0x8
168 #define COND_NE 0x9
169 #define COND_G 0xa
170 #define COND_GE 0xb
171 #define COND_GU 0xc
172 #define COND_CC 0xd
173 #define COND_POS 0xe
174 #define COND_VC 0xf
175 #define BA (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2))
176
177 #define RCOND_Z 1
178 #define RCOND_LEZ 2
179 #define RCOND_LZ 3
180 #define RCOND_NZ 5
181 #define RCOND_GZ 6
182 #define RCOND_GEZ 7
183
184 #define MOVCC_ICC (1 << 18)
185 #define MOVCC_XCC (1 << 18 | 1 << 12)
186
187 #define BPCC_ICC 0
188 #define BPCC_XCC (2 << 20)
189 #define BPCC_PT (1 << 19)
190 #define BPCC_PN 0
191 #define BPCC_A (1 << 29)
192
193 #define BPR_PT BPCC_PT
194
195 #define ARITH_ADD (INSN_OP(2) | INSN_OP3(0x00))
196 #define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10))
197 #define ARITH_AND (INSN_OP(2) | INSN_OP3(0x01))
198 #define ARITH_ANDCC (INSN_OP(2) | INSN_OP3(0x11))
199 #define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05))
200 #define ARITH_OR (INSN_OP(2) | INSN_OP3(0x02))
201 #define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12))
202 #define ARITH_ORN (INSN_OP(2) | INSN_OP3(0x06))
203 #define ARITH_XOR (INSN_OP(2) | INSN_OP3(0x03))
204 #define ARITH_SUB (INSN_OP(2) | INSN_OP3(0x04))
205 #define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14))
206 #define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08))
207 #define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c))
208 #define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a))
209 #define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b))
210 #define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e))
211 #define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f))
212 #define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09))
213 #define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d))
214 #define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d))
215 #define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c))
216 #define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f))
217
218 #define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11))
219 #define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16))
220
221 #define SHIFT_SLL (INSN_OP(2) | INSN_OP3(0x25))
222 #define SHIFT_SRL (INSN_OP(2) | INSN_OP3(0x26))
223 #define SHIFT_SRA (INSN_OP(2) | INSN_OP3(0x27))
224
225 #define SHIFT_SLLX (INSN_OP(2) | INSN_OP3(0x25) | (1 << 12))
226 #define SHIFT_SRLX (INSN_OP(2) | INSN_OP3(0x26) | (1 << 12))
227 #define SHIFT_SRAX (INSN_OP(2) | INSN_OP3(0x27) | (1 << 12))
228
229 #define RDY (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0))
230 #define WRY (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0))
231 #define JMPL (INSN_OP(2) | INSN_OP3(0x38))
232 #define RETURN (INSN_OP(2) | INSN_OP3(0x39))
233 #define SAVE (INSN_OP(2) | INSN_OP3(0x3c))
234 #define RESTORE (INSN_OP(2) | INSN_OP3(0x3d))
235 #define SETHI (INSN_OP(0) | INSN_OP2(0x4))
236 #define CALL INSN_OP(1)
237 #define LDUB (INSN_OP(3) | INSN_OP3(0x01))
238 #define LDSB (INSN_OP(3) | INSN_OP3(0x09))
239 #define LDUH (INSN_OP(3) | INSN_OP3(0x02))
240 #define LDSH (INSN_OP(3) | INSN_OP3(0x0a))
241 #define LDUW (INSN_OP(3) | INSN_OP3(0x00))
242 #define LDSW (INSN_OP(3) | INSN_OP3(0x08))
243 #define LDX (INSN_OP(3) | INSN_OP3(0x0b))
244 #define STB (INSN_OP(3) | INSN_OP3(0x05))
245 #define STH (INSN_OP(3) | INSN_OP3(0x06))
246 #define STW (INSN_OP(3) | INSN_OP3(0x04))
247 #define STX (INSN_OP(3) | INSN_OP3(0x0e))
248 #define LDUBA (INSN_OP(3) | INSN_OP3(0x11))
249 #define LDSBA (INSN_OP(3) | INSN_OP3(0x19))
250 #define LDUHA (INSN_OP(3) | INSN_OP3(0x12))
251 #define LDSHA (INSN_OP(3) | INSN_OP3(0x1a))
252 #define LDUWA (INSN_OP(3) | INSN_OP3(0x10))
253 #define LDSWA (INSN_OP(3) | INSN_OP3(0x18))
254 #define LDXA (INSN_OP(3) | INSN_OP3(0x1b))
255 #define STBA (INSN_OP(3) | INSN_OP3(0x15))
256 #define STHA (INSN_OP(3) | INSN_OP3(0x16))
257 #define STWA (INSN_OP(3) | INSN_OP3(0x14))
258 #define STXA (INSN_OP(3) | INSN_OP3(0x1e))
259
260 #define MEMBAR (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(15) | (1 << 13))
261
262 #define NOP (SETHI | INSN_RD(TCG_REG_G0) | 0)
263
264 #ifndef ASI_PRIMARY_LITTLE
265 #define ASI_PRIMARY_LITTLE 0x88
266 #endif
267
268 #define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
269 #define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
270 #define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
271 #define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
272 #define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE))
273
274 #define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE))
275 #define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE))
276 #define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE))
277
278 #ifndef use_vis3_instructions
279 bool use_vis3_instructions;
280 #endif
281
282 static bool check_fit_i64(int64_t val, unsigned int bits)
283 {
284 return val == sextract64(val, 0, bits);
285 }
286
287 static bool check_fit_i32(int32_t val, unsigned int bits)
288 {
289 return val == sextract32(val, 0, bits);
290 }
291
292 #define check_fit_tl check_fit_i64
293 #define check_fit_ptr check_fit_i64
294
295 static bool patch_reloc(tcg_insn_unit *src_rw, int type,
296 intptr_t value, intptr_t addend)
297 {
298 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
299 uint32_t insn = *src_rw;
300 intptr_t pcrel;
301
302 value += addend;
303 pcrel = tcg_ptr_byte_diff((tcg_insn_unit *)value, src_rx);
304
305 switch (type) {
306 case R_SPARC_WDISP16:
307 if (!check_fit_ptr(pcrel >> 2, 16)) {
308 return false;
309 }
310 insn &= ~INSN_OFF16(-1);
311 insn |= INSN_OFF16(pcrel);
312 break;
313 case R_SPARC_WDISP19:
314 if (!check_fit_ptr(pcrel >> 2, 19)) {
315 return false;
316 }
317 insn &= ~INSN_OFF19(-1);
318 insn |= INSN_OFF19(pcrel);
319 break;
320 case R_SPARC_13:
321 if (!check_fit_ptr(value, 13)) {
322 return false;
323 }
324 insn &= ~INSN_IMM13(-1);
325 insn |= INSN_IMM13(value);
326 break;
327 default:
328 g_assert_not_reached();
329 }
330
331 *src_rw = insn;
332 return true;
333 }
334
335 /* test if a constant matches the constraint */
336 static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
337 {
338 if (ct & TCG_CT_CONST) {
339 return 1;
340 }
341
342 if (type == TCG_TYPE_I32) {
343 val = (int32_t)val;
344 }
345
346 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
347 return 1;
348 } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) {
349 return 1;
350 } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) {
351 return 1;
352 } else {
353 return 0;
354 }
355 }
356
357 static void tcg_out_nop(TCGContext *s)
358 {
359 tcg_out32(s, NOP);
360 }
361
362 static void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1,
363 TCGReg rs2, int op)
364 {
365 tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2));
366 }
367
368 static void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1,
369 int32_t offset, int op)
370 {
371 tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset));
372 }
373
374 static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1,
375 int32_t val2, int val2const, int op)
376 {
377 tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1)
378 | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2)));
379 }
380
381 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
382 {
383 if (ret != arg) {
384 tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
385 }
386 return true;
387 }
388
389 static void tcg_out_mov_delay(TCGContext *s, TCGReg ret, TCGReg arg)
390 {
391 if (ret != arg) {
392 tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
393 } else {
394 tcg_out_nop(s);
395 }
396 }
397
398 static void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
399 {
400 tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
401 }
402
403 static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
404 {
405 tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
406 }
407
408 static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
409 {
410 if (check_fit_i32(arg, 13)) {
411 /* A 13-bit constant sign-extended to 64-bits. */
412 tcg_out_movi_imm13(s, ret, arg);
413 } else {
414 /* A 32-bit constant zero-extended to 64 bits. */
415 tcg_out_sethi(s, ret, arg);
416 if (arg & 0x3ff) {
417 tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
418 }
419 }
420 }
421
422 static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
423 tcg_target_long arg, bool in_prologue,
424 TCGReg scratch)
425 {
426 tcg_target_long hi, lo = (int32_t)arg;
427 tcg_target_long test, lsb;
428
429 /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
430 if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
431 tcg_out_movi_imm32(s, ret, arg);
432 return;
433 }
434
435 /* A 13-bit constant sign-extended to 64-bits. */
436 if (check_fit_tl(arg, 13)) {
437 tcg_out_movi_imm13(s, ret, arg);
438 return;
439 }
440
441 /* A 13-bit constant relative to the TB. */
442 if (!in_prologue && USE_REG_TB) {
443 test = tcg_tbrel_diff(s, (void *)arg);
444 if (check_fit_ptr(test, 13)) {
445 tcg_out_arithi(s, ret, TCG_REG_TB, test, ARITH_ADD);
446 return;
447 }
448 }
449
450 /* A 32-bit constant sign-extended to 64-bits. */
451 if (arg == lo) {
452 tcg_out_sethi(s, ret, ~arg);
453 tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
454 return;
455 }
456
457 /* A 32-bit constant, shifted. */
458 lsb = ctz64(arg);
459 test = (tcg_target_long)arg >> lsb;
460 if (lsb > 10 && test == extract64(test, 0, 21)) {
461 tcg_out_sethi(s, ret, test << 10);
462 tcg_out_arithi(s, ret, ret, lsb - 10, SHIFT_SLLX);
463 return;
464 } else if (test == (uint32_t)test || test == (int32_t)test) {
465 tcg_out_movi_int(s, TCG_TYPE_I64, ret, test, in_prologue, scratch);
466 tcg_out_arithi(s, ret, ret, lsb, SHIFT_SLLX);
467 return;
468 }
469
470 /* Use the constant pool, if possible. */
471 if (!in_prologue && USE_REG_TB) {
472 new_pool_label(s, arg, R_SPARC_13, s->code_ptr,
473 tcg_tbrel_diff(s, NULL));
474 tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(TCG_REG_TB));
475 return;
476 }
477
478 /* A 64-bit constant decomposed into 2 32-bit pieces. */
479 if (check_fit_i32(lo, 13)) {
480 hi = (arg - lo) >> 32;
481 tcg_out_movi_imm32(s, ret, hi);
482 tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
483 tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
484 } else {
485 hi = arg >> 32;
486 tcg_out_movi_imm32(s, ret, hi);
487 tcg_out_movi_imm32(s, scratch, lo);
488 tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
489 tcg_out_arith(s, ret, ret, scratch, ARITH_OR);
490 }
491 }
492
493 static void tcg_out_movi(TCGContext *s, TCGType type,
494 TCGReg ret, tcg_target_long arg)
495 {
496 tcg_debug_assert(ret != TCG_REG_T2);
497 tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T2);
498 }
499
500 static void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1,
501 TCGReg a2, int op)
502 {
503 tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
504 }
505
506 static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
507 intptr_t offset, int op)
508 {
509 if (check_fit_ptr(offset, 13)) {
510 tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
511 INSN_IMM13(offset));
512 } else {
513 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset);
514 tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op);
515 }
516 }
517
518 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
519 TCGReg arg1, intptr_t arg2)
520 {
521 tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
522 }
523
524 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
525 TCGReg arg1, intptr_t arg2)
526 {
527 tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
528 }
529
530 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
531 TCGReg base, intptr_t ofs)
532 {
533 if (val == 0) {
534 tcg_out_st(s, type, TCG_REG_G0, base, ofs);
535 return true;
536 }
537 return false;
538 }
539
540 static void tcg_out_sety(TCGContext *s, TCGReg rs)
541 {
542 tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs));
543 }
544
545 static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1,
546 int32_t val2, int val2const, int uns)
547 {
548 /* Load Y with the sign/zero extension of RS1 to 64-bits. */
549 if (uns) {
550 tcg_out_sety(s, TCG_REG_G0);
551 } else {
552 tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA);
553 tcg_out_sety(s, TCG_REG_T1);
554 }
555
556 tcg_out_arithc(s, rd, rs1, val2, val2const,
557 uns ? ARITH_UDIV : ARITH_SDIV);
558 }
559
560 static const uint8_t tcg_cond_to_bcond[] = {
561 [TCG_COND_EQ] = COND_E,
562 [TCG_COND_NE] = COND_NE,
563 [TCG_COND_LT] = COND_L,
564 [TCG_COND_GE] = COND_GE,
565 [TCG_COND_LE] = COND_LE,
566 [TCG_COND_GT] = COND_G,
567 [TCG_COND_LTU] = COND_CS,
568 [TCG_COND_GEU] = COND_CC,
569 [TCG_COND_LEU] = COND_LEU,
570 [TCG_COND_GTU] = COND_GU,
571 };
572
573 static const uint8_t tcg_cond_to_rcond[] = {
574 [TCG_COND_EQ] = RCOND_Z,
575 [TCG_COND_NE] = RCOND_NZ,
576 [TCG_COND_LT] = RCOND_LZ,
577 [TCG_COND_GT] = RCOND_GZ,
578 [TCG_COND_LE] = RCOND_LEZ,
579 [TCG_COND_GE] = RCOND_GEZ
580 };
581
582 static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
583 {
584 tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19);
585 }
586
587 static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l)
588 {
589 int off19 = 0;
590
591 if (l->has_value) {
592 off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr));
593 } else {
594 tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0);
595 }
596 tcg_out_bpcc0(s, scond, flags, off19);
597 }
598
599 static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const)
600 {
601 tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
602 }
603
604 static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1,
605 int32_t arg2, int const_arg2, TCGLabel *l)
606 {
607 tcg_out_cmp(s, arg1, arg2, const_arg2);
608 tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l);
609 tcg_out_nop(s);
610 }
611
612 static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret,
613 int32_t v1, int v1const)
614 {
615 tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret)
616 | INSN_RS1(tcg_cond_to_bcond[cond])
617 | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1)));
618 }
619
620 static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
621 TCGReg c1, int32_t c2, int c2const,
622 int32_t v1, int v1const)
623 {
624 tcg_out_cmp(s, c1, c2, c2const);
625 tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const);
626 }
627
628 static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1,
629 int32_t arg2, int const_arg2, TCGLabel *l)
630 {
631 /* For 64-bit signed comparisons vs zero, we can avoid the compare. */
632 if (arg2 == 0 && !is_unsigned_cond(cond)) {
633 int off16 = 0;
634
635 if (l->has_value) {
636 off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr));
637 } else {
638 tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0);
639 }
640 tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
641 | INSN_COND(tcg_cond_to_rcond[cond]) | off16);
642 } else {
643 tcg_out_cmp(s, arg1, arg2, const_arg2);
644 tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l);
645 }
646 tcg_out_nop(s);
647 }
648
649 static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1,
650 int32_t v1, int v1const)
651 {
652 tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1)
653 | (tcg_cond_to_rcond[cond] << 10)
654 | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1)));
655 }
656
657 static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
658 TCGReg c1, int32_t c2, int c2const,
659 int32_t v1, int v1const)
660 {
661 /* For 64-bit signed comparisons vs zero, we can avoid the compare.
662 Note that the immediate range is one bit smaller, so we must check
663 for that as well. */
664 if (c2 == 0 && !is_unsigned_cond(cond)
665 && (!v1const || check_fit_i32(v1, 10))) {
666 tcg_out_movr(s, cond, ret, c1, v1, v1const);
667 } else {
668 tcg_out_cmp(s, c1, c2, c2const);
669 tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
670 }
671 }
672
673 static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
674 TCGReg c1, int32_t c2, int c2const)
675 {
676 /* For 32-bit comparisons, we can play games with ADDC/SUBC. */
677 switch (cond) {
678 case TCG_COND_LTU:
679 case TCG_COND_GEU:
680 /* The result of the comparison is in the carry bit. */
681 break;
682
683 case TCG_COND_EQ:
684 case TCG_COND_NE:
685 /* For equality, we can transform to inequality vs zero. */
686 if (c2 != 0) {
687 tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_XOR);
688 c2 = TCG_REG_T1;
689 } else {
690 c2 = c1;
691 }
692 c1 = TCG_REG_G0, c2const = 0;
693 cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU);
694 break;
695
696 case TCG_COND_GTU:
697 case TCG_COND_LEU:
698 /* If we don't need to load a constant into a register, we can
699 swap the operands on GTU/LEU. There's no benefit to loading
700 the constant into a temporary register. */
701 if (!c2const || c2 == 0) {
702 TCGReg t = c1;
703 c1 = c2;
704 c2 = t;
705 c2const = 0;
706 cond = tcg_swap_cond(cond);
707 break;
708 }
709 /* FALLTHRU */
710
711 default:
712 tcg_out_cmp(s, c1, c2, c2const);
713 tcg_out_movi_imm13(s, ret, 0);
714 tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
715 return;
716 }
717
718 tcg_out_cmp(s, c1, c2, c2const);
719 if (cond == TCG_COND_LTU) {
720 tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC);
721 } else {
722 tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC);
723 }
724 }
725
726 static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
727 TCGReg c1, int32_t c2, int c2const)
728 {
729 if (use_vis3_instructions) {
730 switch (cond) {
731 case TCG_COND_NE:
732 if (c2 != 0) {
733 break;
734 }
735 c2 = c1, c2const = 0, c1 = TCG_REG_G0;
736 /* FALLTHRU */
737 case TCG_COND_LTU:
738 tcg_out_cmp(s, c1, c2, c2const);
739 tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC);
740 return;
741 default:
742 break;
743 }
744 }
745
746 /* For 64-bit signed comparisons vs zero, we can avoid the compare
747 if the input does not overlap the output. */
748 if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
749 tcg_out_movi_imm13(s, ret, 0);
750 tcg_out_movr(s, cond, ret, c1, 1, 1);
751 } else {
752 tcg_out_cmp(s, c1, c2, c2const);
753 tcg_out_movi_imm13(s, ret, 0);
754 tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
755 }
756 }
757
758 static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh,
759 TCGReg al, TCGReg ah, int32_t bl, int blconst,
760 int32_t bh, int bhconst, int opl, int oph)
761 {
762 TCGReg tmp = TCG_REG_T1;
763
764 /* Note that the low parts are fully consumed before tmp is set. */
765 if (rl != ah && (bhconst || rl != bh)) {
766 tmp = rl;
767 }
768
769 tcg_out_arithc(s, tmp, al, bl, blconst, opl);
770 tcg_out_arithc(s, rh, ah, bh, bhconst, oph);
771 tcg_out_mov(s, TCG_TYPE_I32, rl, tmp);
772 }
773
774 static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
775 TCGReg al, TCGReg ah, int32_t bl, int blconst,
776 int32_t bh, int bhconst, bool is_sub)
777 {
778 TCGReg tmp = TCG_REG_T1;
779
780 /* Note that the low parts are fully consumed before tmp is set. */
781 if (rl != ah && (bhconst || rl != bh)) {
782 tmp = rl;
783 }
784
785 tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC);
786
787 if (use_vis3_instructions && !is_sub) {
788 /* Note that ADDXC doesn't accept immediates. */
789 if (bhconst && bh != 0) {
790 tcg_out_movi_imm13(s, TCG_REG_T2, bh);
791 bh = TCG_REG_T2;
792 }
793 tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
794 } else if (bh == TCG_REG_G0) {
795 /* If we have a zero, we can perform the operation in two insns,
796 with the arithmetic first, and a conditional move into place. */
797 if (rh == ah) {
798 tcg_out_arithi(s, TCG_REG_T2, ah, 1,
799 is_sub ? ARITH_SUB : ARITH_ADD);
800 tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0);
801 } else {
802 tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD);
803 tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0);
804 }
805 } else {
806 /*
807 * Otherwise adjust BH as if there is carry into T2.
808 * Note that constant BH is constrained to 11 bits for the MOVCC,
809 * so the adjustment fits 12 bits.
810 */
811 if (bhconst) {
812 tcg_out_movi_imm13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1));
813 } else {
814 tcg_out_arithi(s, TCG_REG_T2, bh, 1,
815 is_sub ? ARITH_SUB : ARITH_ADD);
816 }
817 /* ... smoosh T2 back to original BH if carry is clear ... */
818 tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst);
819 /* ... and finally perform the arithmetic with the new operand. */
820 tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD);
821 }
822
823 tcg_out_mov(s, TCG_TYPE_I64, rl, tmp);
824 }
825
826 static void tcg_out_jmpl_const(TCGContext *s, const tcg_insn_unit *dest,
827 bool in_prologue, bool tail_call)
828 {
829 uintptr_t desti = (uintptr_t)dest;
830
831 /* Be careful not to clobber %o7 for a tail call. */
832 tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1,
833 desti & ~0xfff, in_prologue,
834 tail_call ? TCG_REG_G2 : TCG_REG_O7);
835 tcg_out_arithi(s, tail_call ? TCG_REG_G0 : TCG_REG_O7,
836 TCG_REG_T1, desti & 0xfff, JMPL);
837 }
838
839 static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest,
840 bool in_prologue)
841 {
842 ptrdiff_t disp = tcg_pcrel_diff(s, dest);
843
844 if (disp == (int32_t)disp) {
845 tcg_out32(s, CALL | (uint32_t)disp >> 2);
846 } else {
847 tcg_out_jmpl_const(s, dest, in_prologue, false);
848 }
849 }
850
851 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
852 const TCGHelperInfo *info)
853 {
854 tcg_out_call_nodelay(s, dest, false);
855 tcg_out_nop(s);
856 }
857
858 static void tcg_out_mb(TCGContext *s, TCGArg a0)
859 {
860 /* Note that the TCG memory order constants mirror the Sparc MEMBAR. */
861 tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL));
862 }
863
864 #ifdef CONFIG_SOFTMMU
865 static const tcg_insn_unit *qemu_ld_trampoline[(MO_SSIZE | MO_BSWAP) + 1];
866 static const tcg_insn_unit *qemu_st_trampoline[(MO_SIZE | MO_BSWAP) + 1];
867
868 static void emit_extend(TCGContext *s, TCGReg r, int op)
869 {
870 /* Emit zero extend of 8, 16 or 32 bit data as
871 * required by the MO_* value op; do nothing for 64 bit.
872 */
873 switch (op & MO_SIZE) {
874 case MO_8:
875 tcg_out_arithi(s, r, r, 0xff, ARITH_AND);
876 break;
877 case MO_16:
878 tcg_out_arithi(s, r, r, 16, SHIFT_SLL);
879 tcg_out_arithi(s, r, r, 16, SHIFT_SRL);
880 break;
881 case MO_32:
882 tcg_out_arith(s, r, r, 0, SHIFT_SRL);
883 break;
884 case MO_64:
885 break;
886 }
887 }
888
889 static void build_trampolines(TCGContext *s)
890 {
891 static void * const qemu_ld_helpers[] = {
892 [MO_UB] = helper_ret_ldub_mmu,
893 [MO_SB] = helper_ret_ldsb_mmu,
894 [MO_LEUW] = helper_le_lduw_mmu,
895 [MO_LESW] = helper_le_ldsw_mmu,
896 [MO_LEUL] = helper_le_ldul_mmu,
897 [MO_LEUQ] = helper_le_ldq_mmu,
898 [MO_BEUW] = helper_be_lduw_mmu,
899 [MO_BESW] = helper_be_ldsw_mmu,
900 [MO_BEUL] = helper_be_ldul_mmu,
901 [MO_BEUQ] = helper_be_ldq_mmu,
902 };
903 static void * const qemu_st_helpers[] = {
904 [MO_UB] = helper_ret_stb_mmu,
905 [MO_LEUW] = helper_le_stw_mmu,
906 [MO_LEUL] = helper_le_stl_mmu,
907 [MO_LEUQ] = helper_le_stq_mmu,
908 [MO_BEUW] = helper_be_stw_mmu,
909 [MO_BEUL] = helper_be_stl_mmu,
910 [MO_BEUQ] = helper_be_stq_mmu,
911 };
912
913 int i;
914
915 for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
916 if (qemu_ld_helpers[i] == NULL) {
917 continue;
918 }
919
920 /* May as well align the trampoline. */
921 while ((uintptr_t)s->code_ptr & 15) {
922 tcg_out_nop(s);
923 }
924 qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
925
926 /* Set the retaddr operand. */
927 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O3, TCG_REG_O7);
928 /* Tail call. */
929 tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true);
930 /* delay slot -- set the env argument */
931 tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
932 }
933
934 for (i = 0; i < ARRAY_SIZE(qemu_st_helpers); ++i) {
935 if (qemu_st_helpers[i] == NULL) {
936 continue;
937 }
938
939 /* May as well align the trampoline. */
940 while ((uintptr_t)s->code_ptr & 15) {
941 tcg_out_nop(s);
942 }
943 qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
944
945 emit_extend(s, TCG_REG_O2, i);
946
947 /* Set the retaddr operand. */
948 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O4, TCG_REG_O7);
949
950 /* Tail call. */
951 tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true);
952 /* delay slot -- set the env argument */
953 tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
954 }
955 }
956 #else
957 static const tcg_insn_unit *qemu_unalign_ld_trampoline;
958 static const tcg_insn_unit *qemu_unalign_st_trampoline;
959
960 static void build_trampolines(TCGContext *s)
961 {
962 for (int ld = 0; ld < 2; ++ld) {
963 void *helper;
964
965 while ((uintptr_t)s->code_ptr & 15) {
966 tcg_out_nop(s);
967 }
968
969 if (ld) {
970 helper = helper_unaligned_ld;
971 qemu_unalign_ld_trampoline = tcg_splitwx_to_rx(s->code_ptr);
972 } else {
973 helper = helper_unaligned_st;
974 qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr);
975 }
976
977 /* Tail call. */
978 tcg_out_jmpl_const(s, helper, true, true);
979 /* delay slot -- set the env argument */
980 tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
981 }
982 }
983 #endif
984
985 /* Generate global QEMU prologue and epilogue code */
986 static void tcg_target_qemu_prologue(TCGContext *s)
987 {
988 int tmp_buf_size, frame_size;
989
990 /*
991 * The TCG temp buffer is at the top of the frame, immediately
992 * below the frame pointer. Use the logical (aligned) offset here;
993 * the stack bias is applied in temp_allocate_frame().
994 */
995 tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long);
996 tcg_set_frame(s, TCG_REG_I6, -tmp_buf_size, tmp_buf_size);
997
998 /*
999 * TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
1000 * otherwise the minimal frame usable by callees.
1001 */
1002 frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS;
1003 frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size;
1004 frame_size += TCG_TARGET_STACK_ALIGN - 1;
1005 frame_size &= -TCG_TARGET_STACK_ALIGN;
1006 tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
1007 INSN_IMM13(-frame_size));
1008
1009 #ifndef CONFIG_SOFTMMU
1010 if (guest_base != 0) {
1011 tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG,
1012 guest_base, true, TCG_REG_T1);
1013 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
1014 }
1015 #endif
1016
1017 /* We choose TCG_REG_TB such that no move is required. */
1018 if (USE_REG_TB) {
1019 QEMU_BUILD_BUG_ON(TCG_REG_TB != TCG_REG_I1);
1020 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
1021 }
1022
1023 tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL);
1024 /* delay slot */
1025 tcg_out_nop(s);
1026
1027 /* Epilogue for goto_ptr. */
1028 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
1029 tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1030 /* delay slot */
1031 tcg_out_movi_imm13(s, TCG_REG_O0, 0);
1032
1033 build_trampolines(s);
1034 }
1035
1036 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
1037 {
1038 int i;
1039 for (i = 0; i < count; ++i) {
1040 p[i] = NOP;
1041 }
1042 }
1043
1044 #if defined(CONFIG_SOFTMMU)
1045
1046 /* We expect to use a 13-bit negative offset from ENV. */
1047 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1048 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
1049
1050 /* Perform the TLB load and compare.
1051
1052 Inputs:
1053 ADDRLO and ADDRHI contain the possible two parts of the address.
1054
1055 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1056
1057 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1058 This should be offsetof addr_read or addr_write.
1059
1060 The result of the TLB comparison is in %[ix]cc. The sanitized address
1061 is in the returned register, maybe %o0. The TLB addend is in %o1. */
1062
1063 static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
1064 MemOp opc, int which)
1065 {
1066 int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1067 int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1068 int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1069 const TCGReg r0 = TCG_REG_O0;
1070 const TCGReg r1 = TCG_REG_O1;
1071 const TCGReg r2 = TCG_REG_O2;
1072 unsigned s_bits = opc & MO_SIZE;
1073 unsigned a_bits = get_alignment_bits(opc);
1074 tcg_target_long compare_mask;
1075
1076 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
1077 tcg_out_ld(s, TCG_TYPE_PTR, r0, TCG_AREG0, mask_off);
1078 tcg_out_ld(s, TCG_TYPE_PTR, r1, TCG_AREG0, table_off);
1079
1080 /* Extract the page index, shifted into place for tlb index. */
1081 tcg_out_arithi(s, r2, addr, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
1082 SHIFT_SRL);
1083 tcg_out_arith(s, r2, r2, r0, ARITH_AND);
1084
1085 /* Add the tlb_table pointer, creating the CPUTLBEntry address into R2. */
1086 tcg_out_arith(s, r2, r2, r1, ARITH_ADD);
1087
1088 /* Load the tlb comparator and the addend. */
1089 tcg_out_ld(s, TCG_TYPE_TL, r0, r2, which);
1090 tcg_out_ld(s, TCG_TYPE_PTR, r1, r2, offsetof(CPUTLBEntry, addend));
1091
1092 /* Mask out the page offset, except for the required alignment.
1093 We don't support unaligned accesses. */
1094 if (a_bits < s_bits) {
1095 a_bits = s_bits;
1096 }
1097 compare_mask = (tcg_target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
1098 if (check_fit_tl(compare_mask, 13)) {
1099 tcg_out_arithi(s, r2, addr, compare_mask, ARITH_AND);
1100 } else {
1101 tcg_out_movi(s, TCG_TYPE_TL, r2, compare_mask);
1102 tcg_out_arith(s, r2, addr, r2, ARITH_AND);
1103 }
1104 tcg_out_cmp(s, r0, r2, 0);
1105
1106 /* If the guest address must be zero-extended, do so now. */
1107 if (TARGET_LONG_BITS == 32) {
1108 tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL);
1109 return r0;
1110 }
1111 return addr;
1112 }
1113 #endif /* CONFIG_SOFTMMU */
1114
1115 static const int qemu_ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
1116 [MO_UB] = LDUB,
1117 [MO_SB] = LDSB,
1118 [MO_UB | MO_LE] = LDUB,
1119 [MO_SB | MO_LE] = LDSB,
1120
1121 [MO_BEUW] = LDUH,
1122 [MO_BESW] = LDSH,
1123 [MO_BEUL] = LDUW,
1124 [MO_BESL] = LDSW,
1125 [MO_BEUQ] = LDX,
1126 [MO_BESQ] = LDX,
1127
1128 [MO_LEUW] = LDUH_LE,
1129 [MO_LESW] = LDSH_LE,
1130 [MO_LEUL] = LDUW_LE,
1131 [MO_LESL] = LDSW_LE,
1132 [MO_LEUQ] = LDX_LE,
1133 [MO_LESQ] = LDX_LE,
1134 };
1135
1136 static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
1137 [MO_UB] = STB,
1138
1139 [MO_BEUW] = STH,
1140 [MO_BEUL] = STW,
1141 [MO_BEUQ] = STX,
1142
1143 [MO_LEUW] = STH_LE,
1144 [MO_LEUL] = STW_LE,
1145 [MO_LEUQ] = STX_LE,
1146 };
1147
1148 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
1149 MemOpIdx oi, bool is_64)
1150 {
1151 MemOp memop = get_memop(oi);
1152 tcg_insn_unit *label_ptr;
1153
1154 #ifdef CONFIG_SOFTMMU
1155 unsigned memi = get_mmuidx(oi);
1156 TCGReg addrz;
1157 const tcg_insn_unit *func;
1158
1159 addrz = tcg_out_tlb_load(s, addr, memi, memop,
1160 offsetof(CPUTLBEntry, addr_read));
1161
1162 /* The fast path is exactly one insn. Thus we can perform the
1163 entire TLB Hit in the (annulled) delay slot of the branch
1164 over the TLB Miss case. */
1165
1166 /* beq,a,pt %[xi]cc, label0 */
1167 label_ptr = s->code_ptr;
1168 tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
1169 | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
1170 /* delay slot */
1171 tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
1172 qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
1173
1174 /* TLB Miss. */
1175
1176 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
1177
1178 /* We use the helpers to extend SB and SW data, leaving the case
1179 of SL needing explicit extending below. */
1180 if ((memop & MO_SSIZE) == MO_SL) {
1181 func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1182 } else {
1183 func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)];
1184 }
1185 tcg_debug_assert(func != NULL);
1186 tcg_out_call_nodelay(s, func, false);
1187 /* delay slot */
1188 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O2, oi);
1189
1190 /* We let the helper sign-extend SB and SW, but leave SL for here. */
1191 if (is_64 && (memop & MO_SSIZE) == MO_SL) {
1192 tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
1193 } else {
1194 tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
1195 }
1196
1197 *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
1198 #else
1199 TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
1200 unsigned a_bits = get_alignment_bits(memop);
1201 unsigned s_bits = memop & MO_SIZE;
1202 unsigned t_bits;
1203
1204 if (TARGET_LONG_BITS == 32) {
1205 tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
1206 addr = TCG_REG_T1;
1207 }
1208
1209 /*
1210 * Normal case: alignment equal to access size.
1211 */
1212 if (a_bits == s_bits) {
1213 tcg_out_ldst_rr(s, data, addr, index,
1214 qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
1215 return;
1216 }
1217
1218 /*
1219 * Test for at least natural alignment, and assume most accesses
1220 * will be aligned -- perform a straight load in the delay slot.
1221 * This is required to preserve atomicity for aligned accesses.
1222 */
1223 t_bits = MAX(a_bits, s_bits);
1224 tcg_debug_assert(t_bits < 13);
1225 tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
1226
1227 /* beq,a,pt %icc, label */
1228 label_ptr = s->code_ptr;
1229 tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
1230 /* delay slot */
1231 tcg_out_ldst_rr(s, data, addr, index,
1232 qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
1233
1234 if (a_bits >= s_bits) {
1235 /*
1236 * Overalignment: A successful alignment test will perform the memory
1237 * operation in the delay slot, and failure need only invoke the
1238 * handler for SIGBUS.
1239 */
1240 tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false);
1241 /* delay slot -- move to low part of argument reg */
1242 tcg_out_mov_delay(s, TCG_REG_O1, addr);
1243 } else {
1244 /* Underalignment: load by pieces of minimum alignment. */
1245 int ld_opc, a_size, s_size, i;
1246
1247 /*
1248 * Force full address into T1 early; avoids problems with
1249 * overlap between @addr and @data.
1250 */
1251 tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
1252
1253 a_size = 1 << a_bits;
1254 s_size = 1 << s_bits;
1255 if ((memop & MO_BSWAP) == MO_BE) {
1256 ld_opc = qemu_ld_opc[a_bits | MO_BE | (memop & MO_SIGN)];
1257 tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
1258 ld_opc = qemu_ld_opc[a_bits | MO_BE];
1259 for (i = a_size; i < s_size; i += a_size) {
1260 tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
1261 tcg_out_arithi(s, data, data, a_size, SHIFT_SLLX);
1262 tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
1263 }
1264 } else if (a_bits == 0) {
1265 ld_opc = LDUB;
1266 tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
1267 for (i = a_size; i < s_size; i += a_size) {
1268 if ((memop & MO_SIGN) && i == s_size - a_size) {
1269 ld_opc = LDSB;
1270 }
1271 tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
1272 tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
1273 tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
1274 }
1275 } else {
1276 ld_opc = qemu_ld_opc[a_bits | MO_LE];
1277 tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, ld_opc);
1278 for (i = a_size; i < s_size; i += a_size) {
1279 tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
1280 if ((memop & MO_SIGN) && i == s_size - a_size) {
1281 ld_opc = qemu_ld_opc[a_bits | MO_LE | MO_SIGN];
1282 }
1283 tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, ld_opc);
1284 tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
1285 tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
1286 }
1287 }
1288 }
1289
1290 *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
1291 #endif /* CONFIG_SOFTMMU */
1292 }
1293
1294 static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
1295 MemOpIdx oi)
1296 {
1297 MemOp memop = get_memop(oi);
1298 tcg_insn_unit *label_ptr;
1299
1300 #ifdef CONFIG_SOFTMMU
1301 unsigned memi = get_mmuidx(oi);
1302 TCGReg addrz;
1303 const tcg_insn_unit *func;
1304
1305 addrz = tcg_out_tlb_load(s, addr, memi, memop,
1306 offsetof(CPUTLBEntry, addr_write));
1307
1308 /* The fast path is exactly one insn. Thus we can perform the entire
1309 TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
1310 /* beq,a,pt %[xi]cc, label0 */
1311 label_ptr = s->code_ptr;
1312 tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
1313 | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
1314 /* delay slot */
1315 tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
1316 qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
1317
1318 /* TLB Miss. */
1319
1320 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
1321 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O2, data);
1322
1323 func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1324 tcg_debug_assert(func != NULL);
1325 tcg_out_call_nodelay(s, func, false);
1326 /* delay slot */
1327 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O3, oi);
1328
1329 *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
1330 #else
1331 TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
1332 unsigned a_bits = get_alignment_bits(memop);
1333 unsigned s_bits = memop & MO_SIZE;
1334 unsigned t_bits;
1335
1336 if (TARGET_LONG_BITS == 32) {
1337 tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
1338 addr = TCG_REG_T1;
1339 }
1340
1341 /*
1342 * Normal case: alignment equal to access size.
1343 */
1344 if (a_bits == s_bits) {
1345 tcg_out_ldst_rr(s, data, addr, index,
1346 qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
1347 return;
1348 }
1349
1350 /*
1351 * Test for at least natural alignment, and assume most accesses
1352 * will be aligned -- perform a straight store in the delay slot.
1353 * This is required to preserve atomicity for aligned accesses.
1354 */
1355 t_bits = MAX(a_bits, s_bits);
1356 tcg_debug_assert(t_bits < 13);
1357 tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
1358
1359 /* beq,a,pt %icc, label */
1360 label_ptr = s->code_ptr;
1361 tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
1362 /* delay slot */
1363 tcg_out_ldst_rr(s, data, addr, index,
1364 qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
1365
1366 if (a_bits >= s_bits) {
1367 /*
1368 * Overalignment: A successful alignment test will perform the memory
1369 * operation in the delay slot, and failure need only invoke the
1370 * handler for SIGBUS.
1371 */
1372 tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false);
1373 /* delay slot -- move to low part of argument reg */
1374 tcg_out_mov_delay(s, TCG_REG_O1, addr);
1375 } else {
1376 /* Underalignment: store by pieces of minimum alignment. */
1377 int st_opc, a_size, s_size, i;
1378
1379 /*
1380 * Force full address into T1 early; avoids problems with
1381 * overlap between @addr and @data.
1382 */
1383 tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
1384
1385 a_size = 1 << a_bits;
1386 s_size = 1 << s_bits;
1387 if ((memop & MO_BSWAP) == MO_BE) {
1388 st_opc = qemu_st_opc[a_bits | MO_BE];
1389 for (i = 0; i < s_size; i += a_size) {
1390 TCGReg d = data;
1391 int shift = (s_size - a_size - i) * 8;
1392 if (shift) {
1393 d = TCG_REG_T2;
1394 tcg_out_arithi(s, d, data, shift, SHIFT_SRLX);
1395 }
1396 tcg_out_ldst(s, d, TCG_REG_T1, i, st_opc);
1397 }
1398 } else if (a_bits == 0) {
1399 tcg_out_ldst(s, data, TCG_REG_T1, 0, STB);
1400 for (i = 1; i < s_size; i++) {
1401 tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
1402 tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, STB);
1403 }
1404 } else {
1405 /* Note that ST*A with immediate asi must use indexed address. */
1406 st_opc = qemu_st_opc[a_bits + MO_LE];
1407 tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, st_opc);
1408 for (i = a_size; i < s_size; i += a_size) {
1409 tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
1410 tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
1411 tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, st_opc);
1412 }
1413 }
1414 }
1415
1416 *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
1417 #endif /* CONFIG_SOFTMMU */
1418 }
1419
1420 static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1421 {
1422 if (check_fit_ptr(a0, 13)) {
1423 tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1424 tcg_out_movi_imm13(s, TCG_REG_O0, a0);
1425 return;
1426 } else if (USE_REG_TB) {
1427 intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0);
1428 if (check_fit_ptr(tb_diff, 13)) {
1429 tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1430 /* Note that TCG_REG_TB has been unwound to O1. */
1431 tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O1, tb_diff, ARITH_ADD);
1432 return;
1433 }
1434 }
1435 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff);
1436 tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1437 tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR);
1438 }
1439
1440 static void tcg_out_goto_tb(TCGContext *s, int which)
1441 {
1442 /* Direct jump. */
1443 if (USE_REG_TB) {
1444 /* make sure the patch is 8-byte aligned. */
1445 if ((intptr_t)s->code_ptr & 4) {
1446 tcg_out_nop(s);
1447 }
1448 set_jmp_insn_offset(s, which);
1449 tcg_out_sethi(s, TCG_REG_T1, 0);
1450 tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR);
1451 tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL);
1452 tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD);
1453 } else {
1454 set_jmp_insn_offset(s, which);
1455 tcg_out32(s, CALL);
1456 tcg_out_nop(s);
1457 }
1458 set_jmp_reset_offset(s, which);
1459
1460 /*
1461 * For the unlinked path of goto_tb, we need to reset TCG_REG_TB
1462 * to the beginning of this TB.
1463 */
1464 if (USE_REG_TB) {
1465 int c = -tcg_current_code_size(s);
1466 if (check_fit_i32(c, 13)) {
1467 tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD);
1468 } else {
1469 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c);
1470 tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD);
1471 }
1472 }
1473 }
1474
1475 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1476 const TCGArg args[TCG_MAX_OP_ARGS],
1477 const int const_args[TCG_MAX_OP_ARGS])
1478 {
1479 TCGArg a0, a1, a2;
1480 int c, c2;
1481
1482 /* Hoist the loads of the most common arguments. */
1483 a0 = args[0];
1484 a1 = args[1];
1485 a2 = args[2];
1486 c2 = const_args[2];
1487
1488 switch (opc) {
1489 case INDEX_op_goto_ptr:
1490 tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL);
1491 if (USE_REG_TB) {
1492 tcg_out_mov_delay(s, TCG_REG_TB, a0);
1493 } else {
1494 tcg_out_nop(s);
1495 }
1496 break;
1497 case INDEX_op_br:
1498 tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0));
1499 tcg_out_nop(s);
1500 break;
1501
1502 #define OP_32_64(x) \
1503 glue(glue(case INDEX_op_, x), _i32): \
1504 glue(glue(case INDEX_op_, x), _i64)
1505
1506 OP_32_64(ld8u):
1507 tcg_out_ldst(s, a0, a1, a2, LDUB);
1508 break;
1509 OP_32_64(ld8s):
1510 tcg_out_ldst(s, a0, a1, a2, LDSB);
1511 break;
1512 OP_32_64(ld16u):
1513 tcg_out_ldst(s, a0, a1, a2, LDUH);
1514 break;
1515 OP_32_64(ld16s):
1516 tcg_out_ldst(s, a0, a1, a2, LDSH);
1517 break;
1518 case INDEX_op_ld_i32:
1519 case INDEX_op_ld32u_i64:
1520 tcg_out_ldst(s, a0, a1, a2, LDUW);
1521 break;
1522 OP_32_64(st8):
1523 tcg_out_ldst(s, a0, a1, a2, STB);
1524 break;
1525 OP_32_64(st16):
1526 tcg_out_ldst(s, a0, a1, a2, STH);
1527 break;
1528 case INDEX_op_st_i32:
1529 case INDEX_op_st32_i64:
1530 tcg_out_ldst(s, a0, a1, a2, STW);
1531 break;
1532 OP_32_64(add):
1533 c = ARITH_ADD;
1534 goto gen_arith;
1535 OP_32_64(sub):
1536 c = ARITH_SUB;
1537 goto gen_arith;
1538 OP_32_64(and):
1539 c = ARITH_AND;
1540 goto gen_arith;
1541 OP_32_64(andc):
1542 c = ARITH_ANDN;
1543 goto gen_arith;
1544 OP_32_64(or):
1545 c = ARITH_OR;
1546 goto gen_arith;
1547 OP_32_64(orc):
1548 c = ARITH_ORN;
1549 goto gen_arith;
1550 OP_32_64(xor):
1551 c = ARITH_XOR;
1552 goto gen_arith;
1553 case INDEX_op_shl_i32:
1554 c = SHIFT_SLL;
1555 do_shift32:
1556 /* Limit immediate shift count lest we create an illegal insn. */
1557 tcg_out_arithc(s, a0, a1, a2 & 31, c2, c);
1558 break;
1559 case INDEX_op_shr_i32:
1560 c = SHIFT_SRL;
1561 goto do_shift32;
1562 case INDEX_op_sar_i32:
1563 c = SHIFT_SRA;
1564 goto do_shift32;
1565 case INDEX_op_mul_i32:
1566 c = ARITH_UMUL;
1567 goto gen_arith;
1568
1569 OP_32_64(neg):
1570 c = ARITH_SUB;
1571 goto gen_arith1;
1572 OP_32_64(not):
1573 c = ARITH_ORN;
1574 goto gen_arith1;
1575
1576 case INDEX_op_div_i32:
1577 tcg_out_div32(s, a0, a1, a2, c2, 0);
1578 break;
1579 case INDEX_op_divu_i32:
1580 tcg_out_div32(s, a0, a1, a2, c2, 1);
1581 break;
1582
1583 case INDEX_op_brcond_i32:
1584 tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3]));
1585 break;
1586 case INDEX_op_setcond_i32:
1587 tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2);
1588 break;
1589 case INDEX_op_movcond_i32:
1590 tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
1591 break;
1592
1593 case INDEX_op_add2_i32:
1594 tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
1595 args[4], const_args[4], args[5], const_args[5],
1596 ARITH_ADDCC, ARITH_ADDC);
1597 break;
1598 case INDEX_op_sub2_i32:
1599 tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
1600 args[4], const_args[4], args[5], const_args[5],
1601 ARITH_SUBCC, ARITH_SUBC);
1602 break;
1603 case INDEX_op_mulu2_i32:
1604 c = ARITH_UMUL;
1605 goto do_mul2;
1606 case INDEX_op_muls2_i32:
1607 c = ARITH_SMUL;
1608 do_mul2:
1609 /* The 32-bit multiply insns produce a full 64-bit result. */
1610 tcg_out_arithc(s, a0, a2, args[3], const_args[3], c);
1611 tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
1612 break;
1613
1614 case INDEX_op_qemu_ld_i32:
1615 tcg_out_qemu_ld(s, a0, a1, a2, false);
1616 break;
1617 case INDEX_op_qemu_ld_i64:
1618 tcg_out_qemu_ld(s, a0, a1, a2, true);
1619 break;
1620 case INDEX_op_qemu_st_i32:
1621 case INDEX_op_qemu_st_i64:
1622 tcg_out_qemu_st(s, a0, a1, a2);
1623 break;
1624
1625 case INDEX_op_ld32s_i64:
1626 tcg_out_ldst(s, a0, a1, a2, LDSW);
1627 break;
1628 case INDEX_op_ld_i64:
1629 tcg_out_ldst(s, a0, a1, a2, LDX);
1630 break;
1631 case INDEX_op_st_i64:
1632 tcg_out_ldst(s, a0, a1, a2, STX);
1633 break;
1634 case INDEX_op_shl_i64:
1635 c = SHIFT_SLLX;
1636 do_shift64:
1637 /* Limit immediate shift count lest we create an illegal insn. */
1638 tcg_out_arithc(s, a0, a1, a2 & 63, c2, c);
1639 break;
1640 case INDEX_op_shr_i64:
1641 c = SHIFT_SRLX;
1642 goto do_shift64;
1643 case INDEX_op_sar_i64:
1644 c = SHIFT_SRAX;
1645 goto do_shift64;
1646 case INDEX_op_mul_i64:
1647 c = ARITH_MULX;
1648 goto gen_arith;
1649 case INDEX_op_div_i64:
1650 c = ARITH_SDIVX;
1651 goto gen_arith;
1652 case INDEX_op_divu_i64:
1653 c = ARITH_UDIVX;
1654 goto gen_arith;
1655 case INDEX_op_ext_i32_i64:
1656 case INDEX_op_ext32s_i64:
1657 tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA);
1658 break;
1659 case INDEX_op_extu_i32_i64:
1660 case INDEX_op_ext32u_i64:
1661 tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL);
1662 break;
1663 case INDEX_op_extrl_i64_i32:
1664 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
1665 break;
1666 case INDEX_op_extrh_i64_i32:
1667 tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX);
1668 break;
1669
1670 case INDEX_op_brcond_i64:
1671 tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3]));
1672 break;
1673 case INDEX_op_setcond_i64:
1674 tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2);
1675 break;
1676 case INDEX_op_movcond_i64:
1677 tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
1678 break;
1679 case INDEX_op_add2_i64:
1680 tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
1681 const_args[4], args[5], const_args[5], false);
1682 break;
1683 case INDEX_op_sub2_i64:
1684 tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
1685 const_args[4], args[5], const_args[5], true);
1686 break;
1687 case INDEX_op_muluh_i64:
1688 tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI);
1689 break;
1690
1691 gen_arith:
1692 tcg_out_arithc(s, a0, a1, a2, c2, c);
1693 break;
1694
1695 gen_arith1:
1696 tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c);
1697 break;
1698
1699 case INDEX_op_mb:
1700 tcg_out_mb(s, a0);
1701 break;
1702
1703 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1704 case INDEX_op_mov_i64:
1705 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1706 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */
1707 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */
1708 default:
1709 tcg_abort();
1710 }
1711 }
1712
1713 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1714 {
1715 switch (op) {
1716 case INDEX_op_goto_ptr:
1717 return C_O0_I1(r);
1718
1719 case INDEX_op_ld8u_i32:
1720 case INDEX_op_ld8u_i64:
1721 case INDEX_op_ld8s_i32:
1722 case INDEX_op_ld8s_i64:
1723 case INDEX_op_ld16u_i32:
1724 case INDEX_op_ld16u_i64:
1725 case INDEX_op_ld16s_i32:
1726 case INDEX_op_ld16s_i64:
1727 case INDEX_op_ld_i32:
1728 case INDEX_op_ld32u_i64:
1729 case INDEX_op_ld32s_i64:
1730 case INDEX_op_ld_i64:
1731 case INDEX_op_neg_i32:
1732 case INDEX_op_neg_i64:
1733 case INDEX_op_not_i32:
1734 case INDEX_op_not_i64:
1735 case INDEX_op_ext32s_i64:
1736 case INDEX_op_ext32u_i64:
1737 case INDEX_op_ext_i32_i64:
1738 case INDEX_op_extu_i32_i64:
1739 case INDEX_op_extrl_i64_i32:
1740 case INDEX_op_extrh_i64_i32:
1741 return C_O1_I1(r, r);
1742
1743 case INDEX_op_st8_i32:
1744 case INDEX_op_st8_i64:
1745 case INDEX_op_st16_i32:
1746 case INDEX_op_st16_i64:
1747 case INDEX_op_st_i32:
1748 case INDEX_op_st32_i64:
1749 case INDEX_op_st_i64:
1750 return C_O0_I2(rZ, r);
1751
1752 case INDEX_op_add_i32:
1753 case INDEX_op_add_i64:
1754 case INDEX_op_mul_i32:
1755 case INDEX_op_mul_i64:
1756 case INDEX_op_div_i32:
1757 case INDEX_op_div_i64:
1758 case INDEX_op_divu_i32:
1759 case INDEX_op_divu_i64:
1760 case INDEX_op_sub_i32:
1761 case INDEX_op_sub_i64:
1762 case INDEX_op_and_i32:
1763 case INDEX_op_and_i64:
1764 case INDEX_op_andc_i32:
1765 case INDEX_op_andc_i64:
1766 case INDEX_op_or_i32:
1767 case INDEX_op_or_i64:
1768 case INDEX_op_orc_i32:
1769 case INDEX_op_orc_i64:
1770 case INDEX_op_xor_i32:
1771 case INDEX_op_xor_i64:
1772 case INDEX_op_shl_i32:
1773 case INDEX_op_shl_i64:
1774 case INDEX_op_shr_i32:
1775 case INDEX_op_shr_i64:
1776 case INDEX_op_sar_i32:
1777 case INDEX_op_sar_i64:
1778 case INDEX_op_setcond_i32:
1779 case INDEX_op_setcond_i64:
1780 return C_O1_I2(r, rZ, rJ);
1781
1782 case INDEX_op_brcond_i32:
1783 case INDEX_op_brcond_i64:
1784 return C_O0_I2(rZ, rJ);
1785 case INDEX_op_movcond_i32:
1786 case INDEX_op_movcond_i64:
1787 return C_O1_I4(r, rZ, rJ, rI, 0);
1788 case INDEX_op_add2_i32:
1789 case INDEX_op_add2_i64:
1790 case INDEX_op_sub2_i32:
1791 case INDEX_op_sub2_i64:
1792 return C_O2_I4(r, r, rZ, rZ, rJ, rJ);
1793 case INDEX_op_mulu2_i32:
1794 case INDEX_op_muls2_i32:
1795 return C_O2_I2(r, r, rZ, rJ);
1796 case INDEX_op_muluh_i64:
1797 return C_O1_I2(r, r, r);
1798
1799 case INDEX_op_qemu_ld_i32:
1800 case INDEX_op_qemu_ld_i64:
1801 return C_O1_I1(r, s);
1802 case INDEX_op_qemu_st_i32:
1803 case INDEX_op_qemu_st_i64:
1804 return C_O0_I2(sZ, s);
1805
1806 default:
1807 g_assert_not_reached();
1808 }
1809 }
1810
1811 static void tcg_target_init(TCGContext *s)
1812 {
1813 /*
1814 * Only probe for the platform and capabilities if we haven't already
1815 * determined maximum values at compile time.
1816 */
1817 #ifndef use_vis3_instructions
1818 {
1819 unsigned long hwcap = qemu_getauxval(AT_HWCAP);
1820 use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0;
1821 }
1822 #endif
1823
1824 tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
1825 tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
1826
1827 tcg_target_call_clobber_regs = 0;
1828 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G1);
1829 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G2);
1830 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G3);
1831 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G4);
1832 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G5);
1833 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G6);
1834 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G7);
1835 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O0);
1836 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O1);
1837 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O2);
1838 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O3);
1839 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O4);
1840 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O5);
1841 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O6);
1842 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O7);
1843
1844 s->reserved_regs = 0;
1845 tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */
1846 tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */
1847 tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */
1848 tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */
1849 tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */
1850 tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
1851 tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
1852 tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
1853 }
1854
1855 #define ELF_HOST_MACHINE EM_SPARCV9
1856
1857 typedef struct {
1858 DebugFrameHeader h;
1859 uint8_t fde_def_cfa[4];
1860 uint8_t fde_win_save;
1861 uint8_t fde_ret_save[3];
1862 } DebugFrame;
1863
1864 static const DebugFrame debug_frame = {
1865 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1866 .h.cie.id = -1,
1867 .h.cie.version = 1,
1868 .h.cie.code_align = 1,
1869 .h.cie.data_align = -sizeof(void *) & 0x7f,
1870 .h.cie.return_column = 15, /* o7 */
1871
1872 /* Total FDE size does not include the "len" member. */
1873 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1874
1875 .fde_def_cfa = {
1876 12, 30, /* DW_CFA_def_cfa i6, 2047 */
1877 (2047 & 0x7f) | 0x80, (2047 >> 7)
1878 },
1879 .fde_win_save = 0x2d, /* DW_CFA_GNU_window_save */
1880 .fde_ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */
1881 };
1882
1883 void tcg_register_jit(const void *buf, size_t buf_size)
1884 {
1885 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
1886 }
1887
1888 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1889 uintptr_t jmp_rw, uintptr_t addr)
1890 {
1891 intptr_t tb_disp = addr - tc_ptr;
1892 intptr_t br_disp = addr - jmp_rx;
1893 tcg_insn_unit i1, i2;
1894
1895 /* We can reach the entire address space for ILP32.
1896 For LP64, the code_gen_buffer can't be larger than 2GB. */
1897 tcg_debug_assert(tb_disp == (int32_t)tb_disp);
1898 tcg_debug_assert(br_disp == (int32_t)br_disp);
1899
1900 if (!USE_REG_TB) {
1901 qatomic_set((uint32_t *)jmp_rw,
1902 deposit32(CALL, 0, 30, br_disp >> 2));
1903 flush_idcache_range(jmp_rx, jmp_rw, 4);
1904 return;
1905 }
1906
1907 /* This does not exercise the range of the branch, but we do
1908 still need to be able to load the new value of TCG_REG_TB.
1909 But this does still happen quite often. */
1910 if (check_fit_ptr(tb_disp, 13)) {
1911 /* ba,pt %icc, addr */
1912 i1 = (INSN_OP(0) | INSN_OP2(1) | INSN_COND(COND_A)
1913 | BPCC_ICC | BPCC_PT | INSN_OFF19(br_disp));
1914 i2 = (ARITH_ADD | INSN_RD(TCG_REG_TB) | INSN_RS1(TCG_REG_TB)
1915 | INSN_IMM13(tb_disp));
1916 } else if (tb_disp >= 0) {
1917 i1 = SETHI | INSN_RD(TCG_REG_T1) | ((tb_disp & 0xfffffc00) >> 10);
1918 i2 = (ARITH_OR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1)
1919 | INSN_IMM13(tb_disp & 0x3ff));
1920 } else {
1921 i1 = SETHI | INSN_RD(TCG_REG_T1) | ((~tb_disp & 0xfffffc00) >> 10);
1922 i2 = (ARITH_XOR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1)
1923 | INSN_IMM13((tb_disp & 0x3ff) | -0x400));
1924 }
1925
1926 qatomic_set((uint64_t *)jmp_rw, deposit64(i2, 32, 32, i1));
1927 flush_idcache_range(jmp_rx, jmp_rw, 8);
1928 }