+ /* Test all bytes equal first. */
+ if (v64 == dup_const(MO_8, v64)) {
+ imm8 = (uint8_t)v64;
+ tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
+ return;
+ }
+
+ /*
+ * Test all bytes 0x00 or 0xff second. This can match cases that
+ * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
+ */
+ for (i = imm8 = 0; i < 8; i++) {
+ uint8_t byte = v64 >> (i * 8);
+ if (byte == 0xff) {
+ imm8 |= 1 << i;
+ } else if (byte != 0) {
+ goto fail_bytes;
+ }
+ }
+ tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
+ return;
+ fail_bytes:
+
+ /*
+ * Tests for various replications. For each element width, if we
+ * cannot find an expansion there's no point checking a larger
+ * width because we already know by replication it cannot match.
+ */
+ if (v64 == dup_const(MO_16, v64)) {
+ uint16_t v16 = v64;
+
+ if (is_shimm16(v16, &cmode, &imm8)) {
+ tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
+ return;
+ }
+ if (is_shimm16(~v16, &cmode, &imm8)) {
+ tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
+ return;
+ }
+
+ /*
+ * Otherwise, all remaining constants can be loaded in two insns:
+ * rd = v16 & 0xff, rd |= v16 & 0xff00.
+ */
+ tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
+ tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
+ return;
+ } else if (v64 == dup_const(MO_32, v64)) {
+ uint32_t v32 = v64;
+ uint32_t n32 = ~v32;
+
+ if (is_shimm32(v32, &cmode, &imm8) ||
+ is_soimm32(v32, &cmode, &imm8) ||
+ is_fimm32(v32, &cmode, &imm8)) {
+ tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
+ return;
+ }
+ if (is_shimm32(n32, &cmode, &imm8) ||
+ is_soimm32(n32, &cmode, &imm8)) {
+ tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
+ return;
+ }
+
+ /*
+ * Restrict the set of constants to those we can load with
+ * two instructions. Others we load from the pool.
+ */
+ i = is_shimm32_pair(v32, &cmode, &imm8);
+ if (i) {
+ tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
+ tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
+ return;
+ }
+ i = is_shimm32_pair(n32, &cmode, &imm8);
+ if (i) {
+ tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
+ tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
+ return;
+ }
+ } else if (is_fimm64(v64, &cmode, &imm8)) {
+ tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
+ return;
+ }
+
+ /*
+ * As a last resort, load from the constant pool. Sadly there
+ * is no LD1R (literal), so store the full 16-byte vector.
+ */
+ if (type == TCG_TYPE_V128) {