]> git.proxmox.com Git - mirror_qemu.git/commitdiff
Merge tag 'pull-ppc-20230918' of https://gitlab.com/danielhb/qemu into staging
authorStefan Hajnoczi <stefanha@redhat.com>
Tue, 19 Sep 2023 17:22:02 +0000 (13:22 -0400)
committerStefan Hajnoczi <stefanha@redhat.com>
Tue, 19 Sep 2023 17:22:02 +0000 (13:22 -0400)
ppc patch queue for 2023-09-18:

In this short queue we're making two important changes:

- Nicholas Piggin is now the qemu-ppc maintainer. Cédric Le Goater and
Daniel Barboza will act as backup during Nick's transition to this new
role.

- Support for NVIDIA V100 GPU with NVLink2 is dropped from qemu-ppc.
Linux removed the same support back in 5.13, we're following suit now.

A xive Coverity fix is also included.

# -----BEGIN PGP SIGNATURE-----
#
# iIwEABYKADQWIQQX6/+ZI9AYAK8oOBk82cqW3gMxZAUCZQhPnBYcZGFuaWVsaGI0
# MTNAZ21haWwuY29tAAoJEDzZypbeAzFk5QUBAJJNnCtv/SPP6bQVNGMgtfI9sz2z
# MEttDa7SINyLCiVxAP0Y9z8ZHEj6vhztTX0AAv2QubCKWIVbJZbPV5RWrHCEBQ==
# =y3nh
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 18 Sep 2023 09:24:44 EDT
# gpg:                using EDDSA key 17EBFF9923D01800AF2838193CD9CA96DE033164
# gpg:                issuer "danielhb413@gmail.com"
# gpg: Good signature from "Daniel Henrique Barboza <danielhb413@gmail.com>" [unknown]
# gpg: WARNING: The key's User ID is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 17EB FF99 23D0 1800 AF28  3819 3CD9 CA96 DE03 3164

* tag 'pull-ppc-20230918' of https://gitlab.com/danielhb/qemu:
  spapr: Remove support for NVIDIA V100 GPU with NVLink2
  ppc/xive: Fix uint32_t overflow
  MAINTAINERS: Nick Piggin PPC maintainer, other PPC changes

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
105 files changed:
MAINTAINERS
accel/tcg/cputlb.c
accel/tcg/tcg-accel-ops-mttcg.c
accel/tcg/tcg-runtime-gvec.c
accel/tcg/tcg-runtime.h
crypto/clmul.c [new file with mode: 0644]
crypto/meson.build
fpu/softfloat-parts.c.inc
fpu/softfloat.c
hmp-commands.hx
host/include/aarch64/host/cpuinfo.h
host/include/aarch64/host/crypto/clmul.h [new file with mode: 0644]
host/include/generic/host/crypto/clmul.h [new file with mode: 0644]
host/include/i386/host/cpuinfo.h
host/include/i386/host/crypto/clmul.h [new file with mode: 0644]
host/include/x86_64/host/crypto/clmul.h [new file with mode: 0644]
hw/core/machine.c
hw/net/e1000e_core.c
hw/net/fsl_etsec/rings.c
hw/net/igb_core.c
hw/net/igb_regs.h
hw/net/rocker/rocker_of_dpa.c
hw/net/trace-events
hw/net/vhost_net.c
hw/net/virtio-net.c
hw/net/vmxnet3.c
include/crypto/clmul.h [new file with mode: 0644]
include/exec/cpu-defs.h
include/exec/user/thunk.h
include/fpu/softfloat.h
include/hw/core/cpu.h
include/net/net.h
include/qemu/cpuid.h
include/qemu/plugin-memory.h
include/qemu/typedefs.h
include/tcg/tcg-op-gvec-common.h
meson.build
meson_options.txt
net/af-xdp.c [new file with mode: 0644]
net/clients.h
net/dump.c
net/meson.build
net/net.c
net/netmap.c
net/tap-bsd.c
net/tap-linux.c
net/tap-linux.h
net/tap-solaris.c
net/tap-stub.c
net/tap-win32.c
net/tap.c
net/tap_int.h
net/vhost-vdpa.c
plugins/api.c
qapi/net.json
qemu-options.hx
scripts/ci/org.centos/stream/8/x86_64/configure
scripts/meson-buildoptions.sh
softmmu/async-teardown.c
target/arm/tcg/mve_helper.c
target/arm/tcg/translate.c
target/arm/tcg/vec_helper.c
target/arm/tcg/vec_internal.h
target/i386/ops_sse.h
target/ppc/int_helper.c
target/s390x/tcg/vec_int_helper.c
tcg/aarch64/tcg-target.c.inc
tcg/arm/tcg-target.c.inc
tcg/i386/tcg-target.c.inc
tcg/loongarch64/tcg-insn-defs.c.inc
tcg/loongarch64/tcg-target-con-set.h
tcg/loongarch64/tcg-target-con-str.h
tcg/loongarch64/tcg-target.c.inc
tcg/loongarch64/tcg-target.h
tcg/loongarch64/tcg-target.opc.h [new file with mode: 0644]
tcg/mips/tcg-target.c.inc
tcg/ppc/tcg-target.c.inc
tcg/region.c
tcg/riscv/tcg-target.c.inc
tcg/s390x/tcg-target.c.inc
tcg/sparc64/tcg-target.c.inc
tcg/tcg-op-gvec.c
tcg/tcg.c
tcg/tci/tcg-target.c.inc
tests/docker/dockerfiles/alpine.docker
tests/docker/dockerfiles/centos8.docker
tests/docker/dockerfiles/debian-amd64-cross.docker
tests/docker/dockerfiles/debian-amd64.docker
tests/docker/dockerfiles/debian-arm64-cross.docker
tests/docker/dockerfiles/debian-armel-cross.docker
tests/docker/dockerfiles/debian-armhf-cross.docker
tests/docker/dockerfiles/debian-ppc64el-cross.docker
tests/docker/dockerfiles/debian-s390x-cross.docker
tests/docker/dockerfiles/fedora.docker
tests/docker/dockerfiles/opensuse-leap.docker
tests/docker/dockerfiles/ubuntu2004.docker
tests/docker/dockerfiles/ubuntu2204.docker
tests/lcitool/libvirt-ci
tests/lcitool/projects/qemu.yml
tests/qtest/libqos/igb.c
tests/tcg/m68k/Makefile.target
tests/tcg/m68k/denormal.c [new file with mode: 0644]
util/cpuinfo-aarch64.c
util/cpuinfo-i386.c
util/oslib-posix.c

index c4aa1c1c9f4c61e3b8b216609b06b5a97eaa7e05..d5773b1cbd70e4e14831e613eb0054029aa72dd3 100644 (file)
@@ -2951,6 +2951,10 @@ W: http://info.iet.unipi.it/~luigi/netmap/
 S: Maintained
 F: net/netmap.c
 
+AF_XDP network backend
+R: Ilya Maximets <i.maximets@ovn.org>
+F: net/af-xdp.c
+
 Host Memory Backends
 M: David Hildenbrand <david@redhat.com>
 M: Igor Mammedov <imammedo@redhat.com>
index c643d661909ec38f1c2d682f441b5ed080e7776d..3270f65c20e0dd5c037cd2dee79d64d24060a562 100644 (file)
@@ -1193,6 +1193,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
     write_flags = read_flags;
     if (is_ram) {
         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
+        assert(!(iotlb & ~TARGET_PAGE_MASK));
         /*
          * Computing is_clean is expensive; avoid all that unless
          * the page is actually writable.
@@ -1255,16 +1256,18 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
 
     /* refill the tlb */
     /*
-     * At this point iotlb contains a physical section number in the lower
-     * TARGET_PAGE_BITS, and either
-     *  + the ram_addr_t of the page base of the target RAM (RAM)
-     *  + the offset within section->mr of the page base (I/O, ROMD)
+     * When memory region is ram, iotlb contains a TARGET_PAGE_BITS
+     * aligned ram_addr_t of the page base of the target RAM.
+     * Otherwise, iotlb contains
+     *  - a physical section number in the lower TARGET_PAGE_BITS
+     *  - the offset within section->mr of the page base (I/O, ROMD) with the
+     *    TARGET_PAGE_BITS masked off.
      * We subtract addr_page (which is page aligned and thus won't
      * disturb the low bits) to give an offset which can be added to the
      * (non-page-aligned) vaddr of the eventual memory access to get
      * the MemoryRegion offset for the access. Note that the vaddr we
      * subtract here is that of the page base, and not the same as the
-     * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
+     * vaddr we add back in io_prepare()/get_page_addr_code().
      */
     desc->fulltlb[index] = *full;
     full = &desc->fulltlb[index];
@@ -1347,116 +1350,41 @@ static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
                                           mmu_idx, retaddr);
 }
 
-static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
-                                          vaddr addr, unsigned size,
-                                          MMUAccessType access_type,
-                                          int mmu_idx, MemTxAttrs attrs,
-                                          MemTxResult response,
-                                          uintptr_t retaddr)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (!cpu->ignore_memory_transaction_failures &&
-        cc->tcg_ops->do_transaction_failed) {
-        cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size,
-                                           access_type, mmu_idx, attrs,
-                                           response, retaddr);
-    }
-}
-
-/*
- * Save a potentially trashed CPUTLBEntryFull for later lookup by plugin.
- * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
- * because of the side effect of io_writex changing memory layout.
- */
-static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section,
-                            hwaddr mr_offset)
-{
-#ifdef CONFIG_PLUGIN
-    SavedIOTLB *saved = &cs->saved_iotlb;
-    saved->section = section;
-    saved->mr_offset = mr_offset;
-#endif
-}
-
-static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
-                         int mmu_idx, vaddr addr, uintptr_t retaddr,
-                         MMUAccessType access_type, MemOp op)
+static MemoryRegionSection *
+io_prepare(hwaddr *out_offset, CPUArchState *env, hwaddr xlat,
+           MemTxAttrs attrs, vaddr addr, uintptr_t retaddr)
 {
     CPUState *cpu = env_cpu(env);
-    hwaddr mr_offset;
     MemoryRegionSection *section;
-    MemoryRegion *mr;
-    uint64_t val;
-    MemTxResult r;
+    hwaddr mr_offset;
 
-    section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
-    mr = section->mr;
-    mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
+    section = iotlb_to_section(cpu, xlat, attrs);
+    mr_offset = (xlat & TARGET_PAGE_MASK) + addr;
     cpu->mem_io_pc = retaddr;
     if (!cpu->can_do_io) {
         cpu_io_recompile(cpu, retaddr);
     }
 
-    /*
-     * The memory_region_dispatch may trigger a flush/resize
-     * so for plugins we save the iotlb_data just in case.
-     */
-    save_iotlb_data(cpu, section, mr_offset);
-
-    {
-        QEMU_IOTHREAD_LOCK_GUARD();
-        r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs);
-    }
-
-    if (r != MEMTX_OK) {
-        hwaddr physaddr = mr_offset +
-            section->offset_within_address_space -
-            section->offset_within_region;
-
-        cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
-                               mmu_idx, full->attrs, r, retaddr);
-    }
-    return val;
+    *out_offset = mr_offset;
+    return section;
 }
 
-static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
-                      int mmu_idx, uint64_t val, vaddr addr,
-                      uintptr_t retaddr, MemOp op)
+static void io_failed(CPUArchState *env, CPUTLBEntryFull *full, vaddr addr,
+                      unsigned size, MMUAccessType access_type, int mmu_idx,
+                      MemTxResult response, uintptr_t retaddr)
 {
     CPUState *cpu = env_cpu(env);
-    hwaddr mr_offset;
-    MemoryRegionSection *section;
-    MemoryRegion *mr;
-    MemTxResult r;
-
-    section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
-    mr = section->mr;
-    mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
-    if (!cpu->can_do_io) {
-        cpu_io_recompile(cpu, retaddr);
-    }
-    cpu->mem_io_pc = retaddr;
 
-    /*
-     * The memory_region_dispatch may trigger a flush/resize
-     * so for plugins we save the iotlb_data just in case.
-     */
-    save_iotlb_data(cpu, section, mr_offset);
-
-    {
-        QEMU_IOTHREAD_LOCK_GUARD();
-        r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs);
-    }
+    if (!cpu->ignore_memory_transaction_failures) {
+        CPUClass *cc = CPU_GET_CLASS(cpu);
 
-    if (r != MEMTX_OK) {
-        hwaddr physaddr = mr_offset +
-            section->offset_within_address_space -
-            section->offset_within_region;
+        if (cc->tcg_ops->do_transaction_failed) {
+            hwaddr physaddr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
 
-        cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
-                               MMU_DATA_STORE, mmu_idx, full->attrs, r,
-                               retaddr);
+            cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size,
+                                               access_type, mmu_idx,
+                                               full->attrs, response, retaddr);
+        }
     }
 }
 
@@ -1726,45 +1654,41 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
  * in the softmmu lookup code (or helper). We don't handle re-fills or
  * checking the victim table. This is purely informational.
  *
- * This almost never fails as the memory access being instrumented
- * should have just filled the TLB. The one corner case is io_writex
- * which can cause TLB flushes and potential resizing of the TLBs
- * losing the information we need. In those cases we need to recover
- * data from a copy of the CPUTLBEntryFull. As long as this always occurs
- * from the same thread (which a mem callback will be) this is safe.
+ * The one corner case is i/o write, which can cause changes to the
+ * address space.  Those changes, and the corresponding tlb flush,
+ * should be delayed until the next TB, so even then this ought not fail.
+ * But check, Just in Case.
  */
-
 bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
                        bool is_store, struct qemu_plugin_hwaddr *data)
 {
     CPUArchState *env = cpu->env_ptr;
     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
     uintptr_t index = tlb_index(env, mmu_idx, addr);
-    uint64_t tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
-
-    if (likely(tlb_hit(tlb_addr, addr))) {
-        /* We must have an iotlb entry for MMIO */
-        if (tlb_addr & TLB_MMIO) {
-            CPUTLBEntryFull *full;
-            full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
-            data->is_io = true;
-            data->v.io.section =
-                iotlb_to_section(cpu, full->xlat_section, full->attrs);
-            data->v.io.offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
-        } else {
-            data->is_io = false;
-            data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
-        }
-        return true;
-    } else {
-        SavedIOTLB *saved = &cpu->saved_iotlb;
+    MMUAccessType access_type = is_store ? MMU_DATA_STORE : MMU_DATA_LOAD;
+    uint64_t tlb_addr = tlb_read_idx(tlbe, access_type);
+    CPUTLBEntryFull *full;
+
+    if (unlikely(!tlb_hit(tlb_addr, addr))) {
+        return false;
+    }
+
+    full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
+    data->phys_addr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
+
+    /* We must have an iotlb entry for MMIO */
+    if (tlb_addr & TLB_MMIO) {
+        MemoryRegionSection *section =
+            iotlb_to_section(cpu, full->xlat_section & ~TARGET_PAGE_MASK,
+                             full->attrs);
         data->is_io = true;
-        data->v.io.section = saved->section;
-        data->v.io.offset = saved->mr_offset;
-        return true;
+        data->mr = section->mr;
+    } else {
+        data->is_io = false;
+        data->mr = NULL;
     }
+    return true;
 }
-
 #endif
 
 /*
@@ -2084,47 +2008,90 @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,
  * Load @size bytes from @addr, which is memory-mapped i/o.
  * The bytes are concatenated in big-endian order with @ret_be.
  */
-static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
-                               uint64_t ret_be, vaddr addr, int size,
-                               int mmu_idx, MMUAccessType type, uintptr_t ra)
+static uint64_t int_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
+                                uint64_t ret_be, vaddr addr, int size,
+                                int mmu_idx, MMUAccessType type, uintptr_t ra,
+                                MemoryRegion *mr, hwaddr mr_offset)
 {
-    uint64_t t;
-
-    tcg_debug_assert(size > 0 && size <= 8);
     do {
+        MemOp this_mop;
+        unsigned this_size;
+        uint64_t val;
+        MemTxResult r;
+
         /* Read aligned pieces up to 8 bytes. */
-        switch ((size | (int)addr) & 7) {
-        case 1:
-        case 3:
-        case 5:
-        case 7:
-            t = io_readx(env, full, mmu_idx, addr, ra, type, MO_UB);
-            ret_be = (ret_be << 8) | t;
-            size -= 1;
-            addr += 1;
-            break;
-        case 2:
-        case 6:
-            t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUW);
-            ret_be = (ret_be << 16) | t;
-            size -= 2;
-            addr += 2;
-            break;
-        case 4:
-            t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUL);
-            ret_be = (ret_be << 32) | t;
-            size -= 4;
-            addr += 4;
-            break;
-        case 0:
-            return io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUQ);
-        default:
-            qemu_build_not_reached();
+        this_mop = ctz32(size | (int)addr | 8);
+        this_size = 1 << this_mop;
+        this_mop |= MO_BE;
+
+        r = memory_region_dispatch_read(mr, mr_offset, &val,
+                                        this_mop, full->attrs);
+        if (unlikely(r != MEMTX_OK)) {
+            io_failed(env, full, addr, this_size, type, mmu_idx, r, ra);
         }
+        if (this_size == 8) {
+            return val;
+        }
+
+        ret_be = (ret_be << (this_size * 8)) | val;
+        addr += this_size;
+        mr_offset += this_size;
+        size -= this_size;
     } while (size);
+
     return ret_be;
 }
 
+static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
+                               uint64_t ret_be, vaddr addr, int size,
+                               int mmu_idx, MMUAccessType type, uintptr_t ra)
+{
+    MemoryRegionSection *section;
+    MemoryRegion *mr;
+    hwaddr mr_offset;
+    MemTxAttrs attrs;
+    uint64_t ret;
+
+    tcg_debug_assert(size > 0 && size <= 8);
+
+    attrs = full->attrs;
+    section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
+    mr = section->mr;
+
+    qemu_mutex_lock_iothread();
+    ret = int_ld_mmio_beN(env, full, ret_be, addr, size, mmu_idx,
+                          type, ra, mr, mr_offset);
+    qemu_mutex_unlock_iothread();
+
+    return ret;
+}
+
+static Int128 do_ld16_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full,
+                               uint64_t ret_be, vaddr addr, int size,
+                               int mmu_idx, uintptr_t ra)
+{
+    MemoryRegionSection *section;
+    MemoryRegion *mr;
+    hwaddr mr_offset;
+    MemTxAttrs attrs;
+    uint64_t a, b;
+
+    tcg_debug_assert(size > 8 && size <= 16);
+
+    attrs = full->attrs;
+    section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
+    mr = section->mr;
+
+    qemu_mutex_lock_iothread();
+    a = int_ld_mmio_beN(env, full, ret_be, addr, size - 8, mmu_idx,
+                        MMU_DATA_LOAD, ra, mr, mr_offset);
+    b = int_ld_mmio_beN(env, full, ret_be, addr + size - 8, 8, mmu_idx,
+                        MMU_DATA_LOAD, ra, mr, mr_offset + size - 8);
+    qemu_mutex_unlock_iothread();
+
+    return int128_make128(b, a);
+}
+
 /**
  * do_ld_bytes_beN
  * @p: translation parameters
@@ -2267,7 +2234,6 @@ static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
     unsigned tmp, half_size;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        QEMU_IOTHREAD_LOCK_GUARD();
         return do_ld_mmio_beN(env, p->full, ret_be, p->addr, p->size,
                               mmu_idx, type, ra);
     }
@@ -2318,12 +2284,7 @@ static Int128 do_ld16_beN(CPUArchState *env, MMULookupPageData *p,
     MemOp atom;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        QEMU_IOTHREAD_LOCK_GUARD();
-        a = do_ld_mmio_beN(env, p->full, a, p->addr, size - 8,
-                           mmu_idx, MMU_DATA_LOAD, ra);
-        b = do_ld_mmio_beN(env, p->full, 0, p->addr + 8, 8,
-                           mmu_idx, MMU_DATA_LOAD, ra);
-        return int128_make128(b, a);
+        return do_ld16_mmio_beN(env, p->full, a, p->addr, size, mmu_idx, ra);
     }
 
     /*
@@ -2368,7 +2329,7 @@ static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
                        MMUAccessType type, uintptr_t ra)
 {
     if (unlikely(p->flags & TLB_MMIO)) {
-        return io_readx(env, p->full, mmu_idx, p->addr, ra, type, MO_UB);
+        return do_ld_mmio_beN(env, p->full, 0, p->addr, 1, mmu_idx, type, ra);
     } else {
         return *(uint8_t *)p->haddr;
     }
@@ -2380,7 +2341,6 @@ static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
     uint16_t ret;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        QEMU_IOTHREAD_LOCK_GUARD();
         ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 2, mmu_idx, type, ra);
         if ((memop & MO_BSWAP) == MO_LE) {
             ret = bswap16(ret);
@@ -2401,7 +2361,6 @@ static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
     uint32_t ret;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        QEMU_IOTHREAD_LOCK_GUARD();
         ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 4, mmu_idx, type, ra);
         if ((memop & MO_BSWAP) == MO_LE) {
             ret = bswap32(ret);
@@ -2422,7 +2381,6 @@ static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
     uint64_t ret;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        QEMU_IOTHREAD_LOCK_GUARD();
         ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 8, mmu_idx, type, ra);
         if ((memop & MO_BSWAP) == MO_LE) {
             ret = bswap64(ret);
@@ -2581,12 +2539,8 @@ static Int128 do_ld16_mmu(CPUArchState *env, vaddr addr,
     crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l);
     if (likely(!crosspage)) {
         if (unlikely(l.page[0].flags & TLB_MMIO)) {
-            QEMU_IOTHREAD_LOCK_GUARD();
-            a = do_ld_mmio_beN(env, l.page[0].full, 0, addr, 8,
-                               l.mmu_idx, MMU_DATA_LOAD, ra);
-            b = do_ld_mmio_beN(env, l.page[0].full, 0, addr + 8, 8,
-                               l.mmu_idx, MMU_DATA_LOAD, ra);
-            ret = int128_make128(b, a);
+            ret = do_ld16_mmio_beN(env, l.page[0].full, 0, addr, 16,
+                                   l.mmu_idx, ra);
             if ((l.memop & MO_BSWAP) == MO_LE) {
                 ret = bswap128(ret);
             }
@@ -2727,48 +2681,90 @@ Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
  * The bytes to store are extracted in little-endian order from @val_le;
  * return the bytes of @val_le beyond @p->size that have not been stored.
  */
-static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
-                               uint64_t val_le, vaddr addr, int size,
-                               int mmu_idx, uintptr_t ra)
+static uint64_t int_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
+                                uint64_t val_le, vaddr addr, int size,
+                                int mmu_idx, uintptr_t ra,
+                                MemoryRegion *mr, hwaddr mr_offset)
 {
-    tcg_debug_assert(size > 0 && size <= 8);
-
     do {
+        MemOp this_mop;
+        unsigned this_size;
+        MemTxResult r;
+
         /* Store aligned pieces up to 8 bytes. */
-        switch ((size | (int)addr) & 7) {
-        case 1:
-        case 3:
-        case 5:
-        case 7:
-            io_writex(env, full, mmu_idx, val_le, addr, ra, MO_UB);
-            val_le >>= 8;
-            size -= 1;
-            addr += 1;
-            break;
-        case 2:
-        case 6:
-            io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUW);
-            val_le >>= 16;
-            size -= 2;
-            addr += 2;
-            break;
-        case 4:
-            io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUL);
-            val_le >>= 32;
-            size -= 4;
-            addr += 4;
-            break;
-        case 0:
-            io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUQ);
+        this_mop = ctz32(size | (int)addr | 8);
+        this_size = 1 << this_mop;
+        this_mop |= MO_LE;
+
+        r = memory_region_dispatch_write(mr, mr_offset, val_le,
+                                         this_mop, full->attrs);
+        if (unlikely(r != MEMTX_OK)) {
+            io_failed(env, full, addr, this_size, MMU_DATA_STORE,
+                      mmu_idx, r, ra);
+        }
+        if (this_size == 8) {
             return 0;
-        default:
-            qemu_build_not_reached();
         }
+
+        val_le >>= this_size * 8;
+        addr += this_size;
+        mr_offset += this_size;
+        size -= this_size;
     } while (size);
 
     return val_le;
 }
 
+static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
+                               uint64_t val_le, vaddr addr, int size,
+                               int mmu_idx, uintptr_t ra)
+{
+    MemoryRegionSection *section;
+    hwaddr mr_offset;
+    MemoryRegion *mr;
+    MemTxAttrs attrs;
+    uint64_t ret;
+
+    tcg_debug_assert(size > 0 && size <= 8);
+
+    attrs = full->attrs;
+    section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
+    mr = section->mr;
+
+    qemu_mutex_lock_iothread();
+    ret = int_st_mmio_leN(env, full, val_le, addr, size, mmu_idx,
+                          ra, mr, mr_offset);
+    qemu_mutex_unlock_iothread();
+
+    return ret;
+}
+
+static uint64_t do_st16_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full,
+                                 Int128 val_le, vaddr addr, int size,
+                                 int mmu_idx, uintptr_t ra)
+{
+    MemoryRegionSection *section;
+    MemoryRegion *mr;
+    hwaddr mr_offset;
+    MemTxAttrs attrs;
+    uint64_t ret;
+
+    tcg_debug_assert(size > 8 && size <= 16);
+
+    attrs = full->attrs;
+    section = io_prepare(&mr_offset, env, full->xlat_section, attrs, addr, ra);
+    mr = section->mr;
+
+    qemu_mutex_lock_iothread();
+    int_st_mmio_leN(env, full, int128_getlo(val_le), addr, 8,
+                    mmu_idx, ra, mr, mr_offset);
+    ret = int_st_mmio_leN(env, full, int128_gethi(val_le), addr + 8,
+                          size - 8, mmu_idx, ra, mr, mr_offset + 8);
+    qemu_mutex_unlock_iothread();
+
+    return ret;
+}
+
 /*
  * Wrapper for the above.
  */
@@ -2780,7 +2776,6 @@ static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
     unsigned tmp, half_size;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        QEMU_IOTHREAD_LOCK_GUARD();
         return do_st_mmio_leN(env, p->full, val_le, p->addr,
                               p->size, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
@@ -2835,11 +2830,8 @@ static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p,
     MemOp atom;
 
     if (unlikely(p->flags & TLB_MMIO)) {
-        QEMU_IOTHREAD_LOCK_GUARD();
-        do_st_mmio_leN(env, p->full, int128_getlo(val_le),
-                       p->addr, 8, mmu_idx, ra);
-        return do_st_mmio_leN(env, p->full, int128_gethi(val_le),
-                              p->addr + 8, size - 8, mmu_idx, ra);
+        return do_st16_mmio_leN(env, p->full, val_le, p->addr,
+                                size, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
         return int128_gethi(val_le) >> ((size - 8) * 8);
     }
@@ -2883,7 +2875,7 @@ static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
                     int mmu_idx, uintptr_t ra)
 {
     if (unlikely(p->flags & TLB_MMIO)) {
-        io_writex(env, p->full, mmu_idx, val, p->addr, ra, MO_UB);
+        do_st_mmio_leN(env, p->full, val, p->addr, 1, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
         /* nothing */
     } else {
@@ -2898,7 +2890,6 @@ static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
         if ((memop & MO_BSWAP) != MO_LE) {
             val = bswap16(val);
         }
-        QEMU_IOTHREAD_LOCK_GUARD();
         do_st_mmio_leN(env, p->full, val, p->addr, 2, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
         /* nothing */
@@ -2918,7 +2909,6 @@ static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
         if ((memop & MO_BSWAP) != MO_LE) {
             val = bswap32(val);
         }
-        QEMU_IOTHREAD_LOCK_GUARD();
         do_st_mmio_leN(env, p->full, val, p->addr, 4, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
         /* nothing */
@@ -2938,7 +2928,6 @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
         if ((memop & MO_BSWAP) != MO_LE) {
             val = bswap64(val);
         }
-        QEMU_IOTHREAD_LOCK_GUARD();
         do_st_mmio_leN(env, p->full, val, p->addr, 8, mmu_idx, ra);
     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
         /* nothing */
@@ -3066,11 +3055,7 @@ static void do_st16_mmu(CPUArchState *env, vaddr addr, Int128 val,
             if ((l.memop & MO_BSWAP) != MO_LE) {
                 val = bswap128(val);
             }
-            a = int128_getlo(val);
-            b = int128_gethi(val);
-            QEMU_IOTHREAD_LOCK_GUARD();
-            do_st_mmio_leN(env, l.page[0].full, a, addr, 8, l.mmu_idx, ra);
-            do_st_mmio_leN(env, l.page[0].full, b, addr + 8, 8, l.mmu_idx, ra);
+            do_st16_mmio_leN(env, l.page[0].full, val, addr, 16, l.mmu_idx, ra);
         } else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) {
             /* nothing */
         } else {
index b2762620079ddd1670ea11fb6c0e78e971c97029..4b0dfb4be732c614770235e3187b66060d32525d 100644 (file)
@@ -100,14 +100,9 @@ static void *mttcg_cpu_thread_fn(void *arg)
                 break;
             case EXCP_HALTED:
                 /*
-                 * during start-up the vCPU is reset and the thread is
-                 * kicked several times. If we don't ensure we go back
-                 * to sleep in the halted state we won't cleanly
-                 * start-up when the vCPU is enabled.
-                 *
-                 * cpu->halted should ensure we sleep in wait_io_event
+                 * Usually cpu->halted is set, but may have already been
+                 * reset by another thread by the time we arrive here.
                  */
-                g_assert(cpu->halted);
                 break;
             case EXCP_ATOMIC:
                 qemu_mutex_unlock_iothread();
index 6c99f952caa750a87355a54607600b8bf3e4493b..afca89baa1cbc32c5e0cd4e6b1ba7bf751774f1a 100644 (file)
@@ -1042,6 +1042,32 @@ DO_CMP2(64)
 #undef DO_CMP1
 #undef DO_CMP2
 
+#define DO_CMP1(NAME, TYPE, OP)                                            \
+void HELPER(NAME)(void *d, void *a, uint64_t b64, uint32_t desc)           \
+{                                                                          \
+    intptr_t oprsz = simd_oprsz(desc);                                     \
+    TYPE inv = simd_data(desc), b = b64;                                   \
+    for (intptr_t i = 0; i < oprsz; i += sizeof(TYPE)) {                   \
+        *(TYPE *)(d + i) = -((*(TYPE *)(a + i) OP b) ^ inv);               \
+    }                                                                      \
+    clear_high(d, oprsz, desc);                                            \
+}
+
+#define DO_CMP2(SZ) \
+    DO_CMP1(gvec_eqs##SZ, uint##SZ##_t, ==)    \
+    DO_CMP1(gvec_lts##SZ, int##SZ##_t, <)      \
+    DO_CMP1(gvec_les##SZ, int##SZ##_t, <=)     \
+    DO_CMP1(gvec_ltus##SZ, uint##SZ##_t, <)    \
+    DO_CMP1(gvec_leus##SZ, uint##SZ##_t, <=)
+
+DO_CMP2(8)
+DO_CMP2(16)
+DO_CMP2(32)
+DO_CMP2(64)
+
+#undef DO_CMP1
+#undef DO_CMP2
+
 void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
index 186899a2c7ec5eb3206001001047eef9e018b1d2..c23b5e66c4631c36aba703f160e021896bf025c9 100644 (file)
@@ -297,4 +297,29 @@ DEF_HELPER_FLAGS_4(gvec_leu16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_leu32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_leu64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(gvec_eqs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_eqs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_eqs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_eqs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(gvec_lts8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_lts16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_lts32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_lts64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(gvec_les8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_les16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_les32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_les64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(gvec_ltus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_ltus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_ltus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_ltus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(gvec_leus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_leus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_leus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_leus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
 DEF_HELPER_FLAGS_5(gvec_bitsel, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
diff --git a/crypto/clmul.c b/crypto/clmul.c
new file mode 100644 (file)
index 0000000..9e3e61a
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * Carry-less multiply operations.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#include "qemu/osdep.h"
+#include "crypto/clmul.h"
+
+uint64_t clmul_8x8_low(uint64_t n, uint64_t m)
+{
+    uint64_t r = 0;
+
+    for (int i = 0; i < 8; ++i) {
+        uint64_t mask = (n & 0x0101010101010101ull) * 0xff;
+        r ^= m & mask;
+        m = (m << 1) & 0xfefefefefefefefeull;
+        n >>= 1;
+    }
+    return r;
+}
+
+static uint64_t clmul_8x4_even_int(uint64_t n, uint64_t m)
+{
+    uint64_t r = 0;
+
+    for (int i = 0; i < 8; ++i) {
+        uint64_t mask = (n & 0x0001000100010001ull) * 0xffff;
+        r ^= m & mask;
+        n >>= 1;
+        m <<= 1;
+    }
+    return r;
+}
+
+uint64_t clmul_8x4_even(uint64_t n, uint64_t m)
+{
+    n &= 0x00ff00ff00ff00ffull;
+    m &= 0x00ff00ff00ff00ffull;
+    return clmul_8x4_even_int(n, m);
+}
+
+uint64_t clmul_8x4_odd(uint64_t n, uint64_t m)
+{
+    return clmul_8x4_even(n >> 8, m >> 8);
+}
+
+static uint64_t unpack_8_to_16(uint64_t x)
+{
+    return  (x & 0x000000ff)
+         | ((x & 0x0000ff00) << 8)
+         | ((x & 0x00ff0000) << 16)
+         | ((x & 0xff000000) << 24);
+}
+
+uint64_t clmul_8x4_packed(uint32_t n, uint32_t m)
+{
+    return clmul_8x4_even_int(unpack_8_to_16(n), unpack_8_to_16(m));
+}
+
+uint64_t clmul_16x2_even(uint64_t n, uint64_t m)
+{
+    uint64_t r = 0;
+
+    n &= 0x0000ffff0000ffffull;
+    m &= 0x0000ffff0000ffffull;
+
+    for (int i = 0; i < 16; ++i) {
+        uint64_t mask = (n & 0x0000000100000001ull) * 0xffffffffull;
+        r ^= m & mask;
+        n >>= 1;
+        m <<= 1;
+    }
+    return r;
+}
+
+uint64_t clmul_16x2_odd(uint64_t n, uint64_t m)
+{
+    return clmul_16x2_even(n >> 16, m >> 16);
+}
+
+uint64_t clmul_32(uint32_t n, uint32_t m32)
+{
+    uint64_t r = 0;
+    uint64_t m = m32;
+
+    for (int i = 0; i < 32; ++i) {
+        r ^= n & 1 ? m : 0;
+        n >>= 1;
+        m <<= 1;
+    }
+    return r;
+}
+
+Int128 clmul_64_gen(uint64_t n, uint64_t m)
+{
+    uint64_t rl = 0, rh = 0;
+
+    /* Bit 0 can only influence the low 64-bit result.  */
+    if (n & 1) {
+        rl = m;
+    }
+
+    for (int i = 1; i < 64; ++i) {
+        uint64_t mask = -((n >> i) & 1);
+        rl ^= (m << i) & mask;
+        rh ^= (m >> (64 - i)) & mask;
+    }
+    return int128_make128(rl, rh);
+}
index 5f03a30d342d982ac3b58d1f96fd43f2a2e790cb..9ac1a898027a209bae638d3ecae861713cab4f58 100644 (file)
@@ -48,9 +48,12 @@ if have_afalg
 endif
 crypto_ss.add(when: gnutls, if_true: files('tls-cipher-suites.c'))
 
-util_ss.add(files('sm4.c'))
-util_ss.add(files('aes.c'))
-util_ss.add(files('init.c'))
+util_ss.add(files(
+  'aes.c',
+  'clmul.c',
+  'init.c',
+  'sm4.c',
+))
 if gnutls.found()
   util_ss.add(gnutls)
 endif
index 527e15e6abbf7993b926a3ec000a4cd0f138cbe5..a44649f4f4a92a128a32776a2b0ec7db430722a1 100644 (file)
@@ -118,7 +118,8 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
         } else {
             int shift = frac_normalize(p);
             p->cls = float_class_normal;
-            p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
+            p->exp = fmt->frac_shift - fmt->exp_bias
+                   - shift + !fmt->m68k_denormal;
         }
     } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
         p->cls = float_class_normal;
@@ -256,7 +257,7 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
             is_tiny = !frac_addi(&discard, p, inc);
         }
 
-        frac_shrjam(p, 1 - exp);
+        frac_shrjam(p, !fmt->m68k_denormal - exp);
 
         if (p->frac_lo & round_mask) {
             /* Need to recompute round-to-even/round-to-odd. */
@@ -287,7 +288,7 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
             p->frac_lo &= ~round_mask;
         }
 
-        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
+        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
         frac_shr(p, frac_shift);
 
         if (is_tiny && (flags & float_flag_inexact)) {
index 0cc130ae9b27584a090a4f9b508048349743c2f8..027a8e576d368223235c77b67de650faf836f9ec 100644 (file)
@@ -517,6 +517,7 @@ typedef struct {
  *   round_mask: bits below lsb which must be rounded
  * The following optional modifiers are available:
  *   arm_althp: handle ARM Alternative Half Precision
+ *   m68k_denormal: explicit integer bit for extended precision may be 1
  */
 typedef struct {
     int exp_size;
@@ -526,6 +527,7 @@ typedef struct {
     int frac_size;
     int frac_shift;
     bool arm_althp;
+    bool m68k_denormal;
     uint64_t round_mask;
 } FloatFmt;
 
@@ -576,7 +578,12 @@ static const FloatFmt float128_params = {
 static const FloatFmt floatx80_params[3] = {
     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
-    [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
+    [floatx80_precision_x] = {
+        FLOATX80_PARAMS(64),
+#ifdef TARGET_M68K
+        .m68k_denormal = true,
+#endif
+    },
 };
 
 /* Unpack a float to parts, but do not canonicalize.  */
@@ -3126,6 +3133,15 @@ int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
 }
 
+int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
+                               float_status *s)
+{
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
+}
+
 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
                                  float_status *s)
 {
@@ -3392,6 +3408,11 @@ int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
 }
 
+int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
 {
     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
@@ -3407,6 +3428,11 @@ int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
+int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
+}
+
 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
 {
     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
@@ -3534,6 +3560,15 @@ uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
 }
 
+uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
+                                 int scale, float_status *s)
+{
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
+}
+
 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
                                    int scale, float_status *s)
 {
@@ -3759,6 +3794,11 @@ Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
 }
 
+uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
 {
     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
@@ -3774,6 +3814,11 @@ uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
+uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
+}
+
 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
 {
     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
@@ -3929,6 +3974,11 @@ bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
     return int64_to_bfloat16_scalbn(a, scale, status);
 }
 
+bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
+{
+    return int64_to_bfloat16_scalbn(a, scale, status);
+}
+
 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
 {
     return int64_to_bfloat16_scalbn(a, 0, status);
@@ -3944,6 +3994,11 @@ bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
     return int64_to_bfloat16_scalbn(a, 0, status);
 }
 
+bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
+{
+    return int64_to_bfloat16_scalbn(a, 0, status);
+}
+
 float128 int128_to_float128(Int128 a, float_status *status)
 {
     FloatParts128 p = { };
@@ -4139,6 +4194,11 @@ bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
     return uint64_to_bfloat16_scalbn(a, scale, status);
 }
 
+bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
+{
+    return uint64_to_bfloat16_scalbn(a, scale, status);
+}
+
 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
 {
     return uint64_to_bfloat16_scalbn(a, 0, status);
@@ -4154,6 +4214,11 @@ bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
     return uint64_to_bfloat16_scalbn(a, 0, status);
 }
 
+bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
+{
+    return uint64_to_bfloat16_scalbn(a, 0, status);
+}
+
 float128 uint64_to_float128(uint64_t a, float_status *status)
 {
     FloatParts128 p;
index 2cbd0f77a0257dda5cf97e7c1245ccc67cd93ad3..63eac22734c803c21b735b1f75b164d40947817e 100644 (file)
@@ -1296,6 +1296,9 @@ ERST
         .name       = "netdev_add",
         .args_type  = "netdev:O",
         .params     = "[user|tap|socket|stream|dgram|vde|bridge|hubport|netmap|vhost-user"
+#ifdef CONFIG_AF_XDP
+                      "|af-xdp"
+#endif
 #ifdef CONFIG_VMNET
                       "|vmnet-host|vmnet-shared|vmnet-bridged"
 #endif
index 769626b0988156e016cca8fc694ba9fbf3ae624c..fe671534e40fa0198a42a08e69af4c6054f09be3 100644 (file)
@@ -10,6 +10,8 @@
 #define CPUINFO_LSE             (1u << 1)
 #define CPUINFO_LSE2            (1u << 2)
 #define CPUINFO_AES             (1u << 3)
+#define CPUINFO_PMULL           (1u << 4)
+#define CPUINFO_BTI             (1u << 5)
 
 /* Initialized with a constructor. */
 extern unsigned cpuinfo;
diff --git a/host/include/aarch64/host/crypto/clmul.h b/host/include/aarch64/host/crypto/clmul.h
new file mode 100644 (file)
index 0000000..bb516d8
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * AArch64 specific clmul acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef AARCH64_HOST_CRYPTO_CLMUL_H
+#define AARCH64_HOST_CRYPTO_CLMUL_H
+
+#include "host/cpuinfo.h"
+#include <arm_neon.h>
+
+/*
+ * 64x64->128 pmull is available with FEAT_PMULL.
+ * Both FEAT_AES and FEAT_PMULL are covered under the same macro.
+ */
+#ifdef __ARM_FEATURE_AES
+# define HAVE_CLMUL_ACCEL  true
+#else
+# define HAVE_CLMUL_ACCEL  likely(cpuinfo & CPUINFO_PMULL)
+#endif
+#if !defined(__ARM_FEATURE_AES) && defined(CONFIG_ARM_AES_BUILTIN)
+# define ATTR_CLMUL_ACCEL  __attribute__((target("+crypto")))
+#else
+# define ATTR_CLMUL_ACCEL
+#endif
+
+static inline Int128 ATTR_CLMUL_ACCEL
+clmul_64_accel(uint64_t n, uint64_t m)
+{
+    union { poly128_t v; Int128 s; } u;
+
+#ifdef CONFIG_ARM_AES_BUILTIN
+    u.v = vmull_p64((poly64_t)n, (poly64_t)m);
+#else
+    asm(".arch_extension aes\n\t"
+        "pmull %0.1q, %1.1d, %2.1d" : "=w"(u.v) : "w"(n), "w"(m));
+#endif
+    return u.s;
+}
+
+#endif /* AARCH64_HOST_CRYPTO_CLMUL_H */
diff --git a/host/include/generic/host/crypto/clmul.h b/host/include/generic/host/crypto/clmul.h
new file mode 100644 (file)
index 0000000..915bfb8
--- /dev/null
@@ -0,0 +1,15 @@
+/*
+ * No host specific carry-less multiply acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef GENERIC_HOST_CRYPTO_CLMUL_H
+#define GENERIC_HOST_CRYPTO_CLMUL_H
+
+#define HAVE_CLMUL_ACCEL  false
+#define ATTR_CLMUL_ACCEL
+
+Int128 clmul_64_accel(uint64_t, uint64_t)
+    QEMU_ERROR("unsupported accel");
+
+#endif /* GENERIC_HOST_CRYPTO_CLMUL_H */
index 073d0a426f31487dc6ed0e5f95fc8ed43bdbd0e7..7ae21568f76ab152d13cb103e29560411a6c87a1 100644 (file)
@@ -27,6 +27,7 @@
 #define CPUINFO_ATOMIC_VMOVDQA  (1u << 16)
 #define CPUINFO_ATOMIC_VMOVDQU  (1u << 17)
 #define CPUINFO_AES             (1u << 18)
+#define CPUINFO_PCLMUL          (1u << 19)
 
 /* Initialized with a constructor. */
 extern unsigned cpuinfo;
diff --git a/host/include/i386/host/crypto/clmul.h b/host/include/i386/host/crypto/clmul.h
new file mode 100644 (file)
index 0000000..dc3c814
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * x86 specific clmul acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef X86_HOST_CRYPTO_CLMUL_H
+#define X86_HOST_CRYPTO_CLMUL_H
+
+#include "host/cpuinfo.h"
+#include <immintrin.h>
+
+#if defined(__PCLMUL__)
+# define HAVE_CLMUL_ACCEL  true
+# define ATTR_CLMUL_ACCEL
+#else
+# define HAVE_CLMUL_ACCEL  likely(cpuinfo & CPUINFO_PCLMUL)
+# define ATTR_CLMUL_ACCEL  __attribute__((target("pclmul")))
+#endif
+
+static inline Int128 ATTR_CLMUL_ACCEL
+clmul_64_accel(uint64_t n, uint64_t m)
+{
+    union { __m128i v; Int128 s; } u;
+
+    u.v = _mm_clmulepi64_si128(_mm_set_epi64x(0, n), _mm_set_epi64x(0, m), 0);
+    return u.s;
+}
+
+#endif /* X86_HOST_CRYPTO_CLMUL_H */
diff --git a/host/include/x86_64/host/crypto/clmul.h b/host/include/x86_64/host/crypto/clmul.h
new file mode 100644 (file)
index 0000000..f25eced
--- /dev/null
@@ -0,0 +1 @@
+#include "host/include/i386/host/crypto/clmul.h"
index da699cf4e14772f6a2843b58fe6663d3709e759a..230aab819c04d45f87559f2136081dd263589df0 100644 (file)
@@ -38,6 +38,7 @@
 #include "exec/confidential-guest-support.h"
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-pci.h"
+#include "hw/virtio/virtio-net.h"
 
 GlobalProperty hw_compat_8_1[] = {};
 const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1);
@@ -45,6 +46,9 @@ const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1);
 GlobalProperty hw_compat_8_0[] = {
     { "migration", "multifd-flush-after-each-section", "on"},
     { TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
+    { TYPE_VIRTIO_NET, "host_uso", "off"},
+    { TYPE_VIRTIO_NET, "guest_uso4", "off"},
+    { TYPE_VIRTIO_NET, "guest_uso6", "off"},
 };
 const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
 
index f8aeafa16be9ebc2ebafe06c1018f3d324a6df2f..e324c02dd5898f68a2f5f8d48fb7ded09bb9ef1c 100644 (file)
@@ -810,24 +810,24 @@ e1000e_txdesc_writeback(E1000ECore *core, dma_addr_t base,
     return e1000e_tx_wb_interrupt_cause(core, queue_idx);
 }
 
-typedef struct E1000E_RingInfo_st {
+typedef struct E1000ERingInfo {
     int dbah;
     int dbal;
     int dlen;
     int dh;
     int dt;
     int idx;
-} E1000E_RingInfo;
+} E1000ERingInfo;
 
 static inline bool
-e1000e_ring_empty(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_empty(E1000ECore *core, const E1000ERingInfo *r)
 {
     return core->mac[r->dh] == core->mac[r->dt] ||
                 core->mac[r->dt] >= core->mac[r->dlen] / E1000_RING_DESC_LEN;
 }
 
 static inline uint64_t
-e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_base(E1000ECore *core, const E1000ERingInfo *r)
 {
     uint64_t bah = core->mac[r->dbah];
     uint64_t bal = core->mac[r->dbal];
@@ -836,13 +836,13 @@ e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r)
 }
 
 static inline uint64_t
-e1000e_ring_head_descr(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_head_descr(E1000ECore *core, const E1000ERingInfo *r)
 {
     return e1000e_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh];
 }
 
 static inline void
-e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count)
+e1000e_ring_advance(E1000ECore *core, const E1000ERingInfo *r, uint32_t count)
 {
     core->mac[r->dh] += count;
 
@@ -852,7 +852,7 @@ e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count)
 }
 
 static inline uint32_t
-e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_free_descr_num(E1000ECore *core, const E1000ERingInfo *r)
 {
     trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen],
                                  core->mac[r->dh],  core->mac[r->dt]);
@@ -871,19 +871,19 @@ e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r)
 }
 
 static inline bool
-e1000e_ring_enabled(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_enabled(E1000ECore *core, const E1000ERingInfo *r)
 {
     return core->mac[r->dlen] > 0;
 }
 
 static inline uint32_t
-e1000e_ring_len(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_len(E1000ECore *core, const E1000ERingInfo *r)
 {
     return core->mac[r->dlen];
 }
 
 typedef struct E1000E_TxRing_st {
-    const E1000E_RingInfo *i;
+    const E1000ERingInfo *i;
     struct e1000e_tx *tx;
 } E1000E_TxRing;
 
@@ -896,7 +896,7 @@ e1000e_mq_queue_idx(int base_reg_idx, int reg_idx)
 static inline void
 e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx)
 {
-    static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = {
+    static const E1000ERingInfo i[E1000E_NUM_QUEUES] = {
         { TDBAH,  TDBAL,  TDLEN,  TDH,  TDT, 0 },
         { TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 }
     };
@@ -908,13 +908,13 @@ e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx)
 }
 
 typedef struct E1000E_RxRing_st {
-    const E1000E_RingInfo *i;
+    const E1000ERingInfo *i;
 } E1000E_RxRing;
 
 static inline void
 e1000e_rx_ring_init(E1000ECore *core, E1000E_RxRing *rxr, int idx)
 {
-    static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = {
+    static const E1000ERingInfo i[E1000E_NUM_QUEUES] = {
         { RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 },
         { RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 }
     };
@@ -930,7 +930,7 @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr)
     dma_addr_t base;
     struct e1000_tx_desc desc;
     bool ide = false;
-    const E1000E_RingInfo *txi = txr->i;
+    const E1000ERingInfo *txi = txr->i;
     uint32_t cause = E1000_ICS_TXQE;
 
     if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
@@ -960,7 +960,7 @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr)
 }
 
 static bool
-e1000e_has_rxbufs(E1000ECore *core, const E1000E_RingInfo *r,
+e1000e_has_rxbufs(E1000ECore *core, const E1000ERingInfo *r,
                   size_t total_size)
 {
     uint32_t bufs = e1000e_ring_free_descr_num(core, r);
@@ -1397,17 +1397,17 @@ e1000e_pci_dma_write_rx_desc(E1000ECore *core, dma_addr_t addr,
     }
 }
 
-typedef struct e1000e_ba_state_st {
+typedef struct E1000EBAState {
     uint16_t written[MAX_PS_BUFFERS];
     uint8_t cur_idx;
-} e1000e_ba_state;
+} E1000EBAState;
 
 static inline void
-e1000e_write_hdr_to_rx_buffers(E1000ECore *core,
-                               hwaddr ba[MAX_PS_BUFFERS],
-                               e1000e_ba_state *bastate,
-                               const char *data,
-                               dma_addr_t data_len)
+e1000e_write_hdr_frag_to_rx_buffers(E1000ECore *core,
+                                    hwaddr ba[MAX_PS_BUFFERS],
+                                    E1000EBAState *bastate,
+                                    const char *data,
+                                    dma_addr_t data_len)
 {
     assert(data_len <= core->rxbuf_sizes[0] - bastate->written[0]);
 
@@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core,
 }
 
 static void
-e1000e_write_to_rx_buffers(E1000ECore *core,
-                           hwaddr ba[MAX_PS_BUFFERS],
-                           e1000e_ba_state *bastate,
-                           const char *data,
-                           dma_addr_t data_len)
+e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core,
+                                        hwaddr ba[MAX_PS_BUFFERS],
+                                        E1000EBAState *bastate,
+                                        const char *data,
+                                        dma_addr_t data_len)
 {
     while (data_len > 0) {
         uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx];
@@ -1460,7 +1460,7 @@ e1000e_update_rx_stats(E1000ECore *core, size_t pkt_size, size_t pkt_fcs_size)
 }
 
 static inline bool
-e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000E_RingInfo *rxi)
+e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000ERingInfo *rxi)
 {
     return e1000e_ring_free_descr_num(core, rxi) ==
            e1000e_ring_len(core, rxi) >> core->rxbuf_min_shift;
@@ -1521,7 +1521,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
     struct iovec *iov = net_rx_pkt_get_iovec(pkt);
     size_t size = net_rx_pkt_get_total_len(pkt);
     size_t total_size = size + e1000x_fcs_len(core->mac);
-    const E1000E_RingInfo *rxi;
+    const E1000ERingInfo *rxi;
     size_t ps_hdr_len = 0;
     bool do_ps = e1000e_do_ps(core, pkt, &ps_hdr_len);
     bool is_first = true;
@@ -1530,7 +1530,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
 
     do {
         hwaddr ba[MAX_PS_BUFFERS];
-        e1000e_ba_state bastate = { { 0 } };
+        E1000EBAState bastate = { { 0 } };
         bool is_last = false;
 
         desc_size = total_size - desc_offset;
@@ -1568,8 +1568,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
                             iov_copy = MIN(ps_hdr_len - ps_hdr_copied,
                                            iov->iov_len - iov_ofs);
 
-                            e1000e_write_hdr_to_rx_buffers(core, ba, &bastate,
-                                                      iov->iov_base, iov_copy);
+                            e1000e_write_hdr_frag_to_rx_buffers(core, ba,
+                                                                &bastate,
+                                                                iov->iov_base,
+                                                                iov_copy);
 
                             copy_size -= iov_copy;
                             ps_hdr_copied += iov_copy;
@@ -1585,8 +1587,8 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
                     } else {
                         /* Leave buffer 0 of each descriptor except first */
                         /* empty as per spec 7.1.5.1                      */
-                        e1000e_write_hdr_to_rx_buffers(core, ba, &bastate,
-                                                       NULL, 0);
+                        e1000e_write_hdr_frag_to_rx_buffers(core, ba, &bastate,
+                                                            NULL, 0);
                     }
                 }
 
@@ -1594,8 +1596,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
                 while (copy_size) {
                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
 
-                    e1000e_write_to_rx_buffers(core, ba, &bastate,
-                                            iov->iov_base + iov_ofs, iov_copy);
+                    e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
+                                                            iov->iov_base +
+                                                            iov_ofs,
+                                                            iov_copy);
 
                     copy_size -= iov_copy;
                     iov_ofs += iov_copy;
@@ -1607,7 +1611,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
 
                 if (desc_offset + desc_size >= total_size) {
                     /* Simulate FCS checksum presence in the last descriptor */
-                    e1000e_write_to_rx_buffers(core, ba, &bastate,
+                    e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
                           (const char *) &fcs_pad, e1000x_fcs_len(core->mac));
                 }
             }
@@ -2852,7 +2856,7 @@ e1000e_update_rx_offloads(E1000ECore *core)
 
     if (core->has_vnet) {
         qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
-                         cso_state, 0, 0, 0, 0);
+                         cso_state, 0, 0, 0, 0, 0, 0);
     }
 }
 
index 788463f1b62b2ee59610b5d63dca33660620b1e4..2f2f359f7a53ba28ea3c4b946abe6d1ecc3fd92e 100644 (file)
@@ -372,6 +372,12 @@ void etsec_walk_tx_ring(eTSEC *etsec, int ring_nbr)
     etsec->regs[TSTAT].value |= 1 << (31 - ring_nbr);
 }
 
+/*
+ * rx_init_frame() ensures we never do more padding than this
+ * (checksum plus minimum data packet size)
+ */
+#define MAX_RX_PADDING 64
+
 static void fill_rx_bd(eTSEC          *etsec,
                        eTSEC_rxtx_bd  *bd,
                        const uint8_t **buf,
@@ -380,9 +386,11 @@ static void fill_rx_bd(eTSEC          *etsec,
     uint16_t to_write;
     hwaddr   bufptr = bd->bufptr +
         ((hwaddr)(etsec->regs[TBDBPH].value & 0xF) << 32);
-    uint8_t  padd[etsec->rx_padding];
+    uint8_t  padd[MAX_RX_PADDING];
     uint8_t  rem;
 
+    assert(etsec->rx_padding <= MAX_RX_PADDING);
+
     RING_DEBUG("eTSEC fill Rx buffer @ 0x%016" HWADDR_PRIx
                " size:%zu(padding + crc:%u) + fcb:%u\n",
                bufptr, *size, etsec->rx_padding, etsec->rx_fcb_size);
@@ -426,7 +434,7 @@ static void fill_rx_bd(eTSEC          *etsec,
         rem = MIN(etsec->regs[MRBLR].value - bd->length, etsec->rx_padding);
 
         if (rem > 0) {
-            memset(padd, 0x0, sizeof(padd));
+            memset(padd, 0x0, rem);
             etsec->rx_padding -= rem;
             *size             -= rem;
             bd->length        += rem;
index 8b6b75c522f6d02cb27c4345b6fe76fb19afa488..f6a5e2327b5abe775e025ebe490f2e26302e1ccd 100644 (file)
@@ -267,6 +267,29 @@ igb_rx_use_legacy_descriptor(IGBCore *core)
     return false;
 }
 
+typedef struct E1000ERingInfo {
+    int dbah;
+    int dbal;
+    int dlen;
+    int dh;
+    int dt;
+    int idx;
+} E1000ERingInfo;
+
+static uint32_t
+igb_rx_queue_desctyp_get(IGBCore *core, const E1000ERingInfo *r)
+{
+    return core->mac[E1000_SRRCTL(r->idx) >> 2] & E1000_SRRCTL_DESCTYPE_MASK;
+}
+
+static bool
+igb_rx_use_ps_descriptor(IGBCore *core, const E1000ERingInfo *r)
+{
+    uint32_t desctyp = igb_rx_queue_desctyp_get(core, r);
+    return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT ||
+           desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+}
+
 static inline bool
 igb_rss_enabled(IGBCore *core)
 {
@@ -694,24 +717,15 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx)
     return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0;
 }
 
-typedef struct E1000E_RingInfo_st {
-    int dbah;
-    int dbal;
-    int dlen;
-    int dh;
-    int dt;
-    int idx;
-} E1000E_RingInfo;
-
 static inline bool
-igb_ring_empty(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_empty(IGBCore *core, const E1000ERingInfo *r)
 {
     return core->mac[r->dh] == core->mac[r->dt] ||
                 core->mac[r->dt] >= core->mac[r->dlen] / E1000_RING_DESC_LEN;
 }
 
 static inline uint64_t
-igb_ring_base(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_base(IGBCore *core, const E1000ERingInfo *r)
 {
     uint64_t bah = core->mac[r->dbah];
     uint64_t bal = core->mac[r->dbal];
@@ -720,13 +734,13 @@ igb_ring_base(IGBCore *core, const E1000E_RingInfo *r)
 }
 
 static inline uint64_t
-igb_ring_head_descr(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_head_descr(IGBCore *core, const E1000ERingInfo *r)
 {
     return igb_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh];
 }
 
 static inline void
-igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count)
+igb_ring_advance(IGBCore *core, const E1000ERingInfo *r, uint32_t count)
 {
     core->mac[r->dh] += count;
 
@@ -736,7 +750,7 @@ igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count)
 }
 
 static inline uint32_t
-igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_free_descr_num(IGBCore *core, const E1000ERingInfo *r)
 {
     trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen],
                                  core->mac[r->dh],  core->mac[r->dt]);
@@ -755,13 +769,13 @@ igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r)
 }
 
 static inline bool
-igb_ring_enabled(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_enabled(IGBCore *core, const E1000ERingInfo *r)
 {
     return core->mac[r->dlen] > 0;
 }
 
 typedef struct IGB_TxRing_st {
-    const E1000E_RingInfo *i;
+    const E1000ERingInfo *i;
     struct igb_tx *tx;
 } IGB_TxRing;
 
@@ -774,7 +788,7 @@ igb_mq_queue_idx(int base_reg_idx, int reg_idx)
 static inline void
 igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx)
 {
-    static const E1000E_RingInfo i[IGB_NUM_QUEUES] = {
+    static const E1000ERingInfo i[IGB_NUM_QUEUES] = {
         { TDBAH0, TDBAL0, TDLEN0, TDH0, TDT0, 0 },
         { TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 },
         { TDBAH2, TDBAL2, TDLEN2, TDH2, TDT2, 2 },
@@ -800,13 +814,13 @@ igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx)
 }
 
 typedef struct E1000E_RxRing_st {
-    const E1000E_RingInfo *i;
+    const E1000ERingInfo *i;
 } E1000E_RxRing;
 
 static inline void
 igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx)
 {
-    static const E1000E_RingInfo i[IGB_NUM_QUEUES] = {
+    static const E1000ERingInfo i[IGB_NUM_QUEUES] = {
         { RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 },
         { RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 },
         { RDBAH2, RDBAL2, RDLEN2, RDH2, RDT2, 2 },
@@ -833,7 +847,7 @@ igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx)
 static uint32_t
 igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
                      union e1000_adv_tx_desc *tx_desc,
-                     const E1000E_RingInfo *txi)
+                     const E1000ERingInfo *txi)
 {
     PCIDevice *d;
     uint32_t cmd_type_len = le32_to_cpu(tx_desc->read.cmd_type_len);
@@ -866,7 +880,7 @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
 }
 
 static inline bool
-igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
+igb_tx_enabled(IGBCore *core, const E1000ERingInfo *txi)
 {
     bool vmdq = core->mac[MRQC] & 1;
     uint16_t qn = txi->idx;
@@ -883,7 +897,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
     PCIDevice *d;
     dma_addr_t base;
     union e1000_adv_tx_desc desc;
-    const E1000E_RingInfo *txi = txr->i;
+    const E1000ERingInfo *txi = txr->i;
     uint32_t eic = 0;
 
     if (!igb_tx_enabled(core, txi)) {
@@ -918,7 +932,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
 }
 
 static uint32_t
-igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r)
+igb_rxbufsize(IGBCore *core, const E1000ERingInfo *r)
 {
     uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2];
     uint32_t bsizepkt = srrctl & E1000_SRRCTL_BSIZEPKT_MASK;
@@ -930,7 +944,7 @@ igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r)
 }
 
 static bool
-igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size)
+igb_has_rxbufs(IGBCore *core, const E1000ERingInfo *r, size_t total_size)
 {
     uint32_t bufs = igb_ring_free_descr_num(core, r);
     uint32_t bufsize = igb_rxbufsize(core, r);
@@ -941,6 +955,14 @@ igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size)
                          bufsize;
 }
 
+static uint32_t
+igb_rxhdrbufsize(IGBCore *core, const E1000ERingInfo *r)
+{
+    uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2];
+    return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >>
+           E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+}
+
 void
 igb_start_recv(IGBCore *core)
 {
@@ -1225,21 +1247,77 @@ igb_read_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc,
 }
 
 static inline void
-igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
-                      hwaddr *buff_addr)
+igb_read_adv_rx_single_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
+                                 hwaddr *buff_addr)
 {
     *buff_addr = le64_to_cpu(desc->read.pkt_addr);
 }
 
 static inline void
-igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
-                  hwaddr *buff_addr)
+igb_read_adv_rx_split_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
+                                hwaddr *buff_addr)
 {
+    buff_addr[0] = le64_to_cpu(desc->read.hdr_addr);
+    buff_addr[1] = le64_to_cpu(desc->read.pkt_addr);
+}
+
+typedef struct IGBBAState {
+    uint16_t written[IGB_MAX_PS_BUFFERS];
+    uint8_t cur_idx;
+} IGBBAState;
+
+typedef struct IGBSplitDescriptorData {
+    bool sph;
+    bool hbo;
+    size_t hdr_len;
+} IGBSplitDescriptorData;
+
+typedef struct IGBPacketRxDMAState {
+    size_t size;
+    size_t total_size;
+    size_t ps_hdr_len;
+    size_t desc_size;
+    size_t desc_offset;
+    uint32_t rx_desc_packet_buf_size;
+    uint32_t rx_desc_header_buf_size;
+    struct iovec *iov;
+    size_t iov_ofs;
+    bool do_ps;
+    bool is_first;
+    IGBBAState bastate;
+    hwaddr ba[IGB_MAX_PS_BUFFERS];
+    IGBSplitDescriptorData ps_desc_data;
+} IGBPacketRxDMAState;
+
+static inline void
+igb_read_rx_descr(IGBCore *core,
+                  union e1000_rx_desc_union *desc,
+                  IGBPacketRxDMAState *pdma_st,
+                  const E1000ERingInfo *r)
+{
+    uint32_t desc_type;
+
     if (igb_rx_use_legacy_descriptor(core)) {
-        igb_read_lgcy_rx_descr(core, &desc->legacy, buff_addr);
-    } else {
-        igb_read_adv_rx_descr(core, &desc->adv, buff_addr);
+        igb_read_lgcy_rx_descr(core, &desc->legacy, &pdma_st->ba[1]);
+        pdma_st->ba[0] = 0;
+        return;
     }
+
+    /* advanced header split descriptor */
+    if (igb_rx_use_ps_descriptor(core, r)) {
+        igb_read_adv_rx_split_buf_descr(core, &desc->adv, &pdma_st->ba[0]);
+        return;
+    }
+
+    /* descriptor replication modes not supported */
+    desc_type = igb_rx_queue_desctyp_get(core, r);
+    if (desc_type != E1000_SRRCTL_DESCTYPE_ADV_ONEBUF) {
+        trace_igb_wrn_rx_desc_modes_not_supp(desc_type);
+    }
+
+    /* advanced single buffer descriptor */
+    igb_read_adv_rx_single_buf_descr(core, &desc->adv, &pdma_st->ba[1]);
+    pdma_st->ba[0] = 0;
 }
 
 static void
@@ -1281,15 +1359,11 @@ igb_verify_csum_in_sw(IGBCore *core,
 }
 
 static void
-igb_build_rx_metadata(IGBCore *core,
-                      struct NetRxPkt *pkt,
-                      bool is_eop,
-                      const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
-                      uint16_t *pkt_info, uint16_t *hdr_info,
-                      uint32_t *rss,
-                      uint32_t *status_flags,
-                      uint16_t *ip_id,
-                      uint16_t *vlan_tag)
+igb_build_rx_metadata_common(IGBCore *core,
+                             struct NetRxPkt *pkt,
+                             bool is_eop,
+                             uint32_t *status_flags,
+                             uint16_t *vlan_tag)
 {
     struct virtio_net_hdr *vhdr;
     bool hasip4, hasip6, csum_valid;
@@ -1298,7 +1372,6 @@ igb_build_rx_metadata(IGBCore *core,
     *status_flags = E1000_RXD_STAT_DD;
 
     /* No additional metadata needed for non-EOP descriptors */
-    /* TODO: EOP apply only to status so don't skip whole function. */
     if (!is_eop) {
         goto func_exit;
     }
@@ -1315,64 +1388,6 @@ igb_build_rx_metadata(IGBCore *core,
         trace_e1000e_rx_metadata_vlan(*vlan_tag);
     }
 
-    /* Packet parsing results */
-    if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) {
-        if (rss_info->enabled) {
-            *rss = cpu_to_le32(rss_info->hash);
-            trace_igb_rx_metadata_rss(*rss);
-        }
-    } else if (hasip4) {
-            *status_flags |= E1000_RXD_STAT_IPIDV;
-            *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt));
-            trace_e1000e_rx_metadata_ip_id(*ip_id);
-    }
-
-    if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && net_rx_pkt_is_tcp_ack(pkt)) {
-        *status_flags |= E1000_RXD_STAT_ACK;
-        trace_e1000e_rx_metadata_ack();
-    }
-
-    if (pkt_info) {
-        *pkt_info = rss_info->enabled ? rss_info->type : 0;
-
-        if (etqf < 8) {
-            *pkt_info |= (BIT(11) | etqf) << 4;
-        } else {
-            if (hasip4) {
-                *pkt_info |= E1000_ADVRXD_PKT_IP4;
-            }
-
-            if (hasip6) {
-                *pkt_info |= E1000_ADVRXD_PKT_IP6;
-            }
-
-            switch (l4hdr_proto) {
-            case ETH_L4_HDR_PROTO_TCP:
-                *pkt_info |= E1000_ADVRXD_PKT_TCP;
-                break;
-
-            case ETH_L4_HDR_PROTO_UDP:
-                *pkt_info |= E1000_ADVRXD_PKT_UDP;
-                break;
-
-            case ETH_L4_HDR_PROTO_SCTP:
-                *pkt_info |= E1000_ADVRXD_PKT_SCTP;
-                break;
-
-            default:
-                break;
-            }
-        }
-    }
-
-    if (hdr_info) {
-        *hdr_info = 0;
-    }
-
-    if (ts) {
-        *status_flags |= BIT(16);
-    }
-
     /* RX CSO information */
     if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) {
         trace_e1000e_rx_metadata_ipv6_sum_disabled();
@@ -1428,56 +1443,168 @@ func_exit:
 static inline void
 igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc,
                         struct NetRxPkt *pkt,
-                        const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
+                        const E1000E_RSSInfo *rss_info,
                         uint16_t length)
 {
-    uint32_t status_flags, rss;
-    uint16_t ip_id;
+    uint32_t status_flags;
 
     assert(!rss_info->enabled);
+
+    memset(desc, 0, sizeof(*desc));
     desc->length = cpu_to_le16(length);
-    desc->csum = 0;
+    igb_build_rx_metadata_common(core, pkt, pkt != NULL,
+                                 &status_flags,
+                                 &desc->special);
 
-    igb_build_rx_metadata(core, pkt, pkt != NULL,
-                          rss_info, etqf, ts,
-                          NULL, NULL, &rss,
-                          &status_flags, &ip_id,
-                          &desc->special);
     desc->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24);
     desc->status = (uint8_t) le32_to_cpu(status_flags);
 }
 
+static bool
+igb_rx_ps_descriptor_split_always(IGBCore *core, const E1000ERingInfo *r)
+{
+    uint32_t desctyp = igb_rx_queue_desctyp_get(core, r);
+    return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+}
+
+static uint16_t
+igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf)
+{
+    uint16_t pkt_type;
+    bool hasip4, hasip6;
+    EthL4HdrProto l4hdr_proto;
+
+    if (etqf < 8) {
+        pkt_type = BIT(11) | etqf;
+        return pkt_type;
+    }
+
+    net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
+
+    if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) {
+        eth_ip6_hdr_info *ip6hdr_info = net_rx_pkt_get_ip6_info(pkt);
+        pkt_type = ip6hdr_info->has_ext_hdrs ? E1000_ADVRXD_PKT_IP6E :
+                                               E1000_ADVRXD_PKT_IP6;
+    } else if (hasip4) {
+        pkt_type = E1000_ADVRXD_PKT_IP4;
+    } else {
+        pkt_type = 0;
+    }
+
+    switch (l4hdr_proto) {
+    case ETH_L4_HDR_PROTO_TCP:
+        pkt_type |= E1000_ADVRXD_PKT_TCP;
+        break;
+    case ETH_L4_HDR_PROTO_UDP:
+        pkt_type |= E1000_ADVRXD_PKT_UDP;
+        break;
+    case ETH_L4_HDR_PROTO_SCTP:
+        pkt_type |= E1000_ADVRXD_PKT_SCTP;
+        break;
+    default:
+        break;
+    }
+
+    return pkt_type;
+}
+
 static inline void
 igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
                        struct NetRxPkt *pkt,
                        const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
                        uint16_t length)
 {
+    bool hasip4, hasip6;
+    EthL4HdrProto l4hdr_proto;
+    uint16_t rss_type = 0, pkt_type;
+    bool eop = (pkt != NULL);
+    uint32_t adv_desc_status_error = 0;
     memset(&desc->wb, 0, sizeof(desc->wb));
 
     desc->wb.upper.length = cpu_to_le16(length);
+    igb_build_rx_metadata_common(core, pkt, eop,
+                                 &desc->wb.upper.status_error,
+                                 &desc->wb.upper.vlan);
+
+    if (!eop) {
+        return;
+    }
+
+    net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
+
+    if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) {
+        if (rss_info->enabled) {
+            desc->wb.lower.hi_dword.rss = cpu_to_le32(rss_info->hash);
+            rss_type = rss_info->type;
+            trace_igb_rx_metadata_rss(desc->wb.lower.hi_dword.rss, rss_type);
+        }
+    } else if (hasip4) {
+            adv_desc_status_error |= E1000_RXD_STAT_IPIDV;
+            desc->wb.lower.hi_dword.csum_ip.ip_id =
+                cpu_to_le16(net_rx_pkt_get_ip_id(pkt));
+            trace_e1000e_rx_metadata_ip_id(
+                desc->wb.lower.hi_dword.csum_ip.ip_id);
+    }
+
+    if (ts) {
+        adv_desc_status_error |= BIT(16);
+    }
+
+    pkt_type = igb_rx_desc_get_packet_type(core, pkt, etqf);
+    trace_e1000e_rx_metadata_pkt_type(pkt_type);
+    desc->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type << 4));
+    desc->wb.upper.status_error |= cpu_to_le32(adv_desc_status_error);
+}
+
+static inline void
+igb_write_adv_ps_rx_descr(IGBCore *core,
+                          union e1000_adv_rx_desc *desc,
+                          struct NetRxPkt *pkt,
+                          const E1000E_RSSInfo *rss_info,
+                          const E1000ERingInfo *r,
+                          uint16_t etqf,
+                          bool ts,
+                          IGBPacketRxDMAState *pdma_st)
+{
+    size_t pkt_len;
+    uint16_t hdr_info = 0;
 
-    igb_build_rx_metadata(core, pkt, pkt != NULL,
-                          rss_info, etqf, ts,
-                          &desc->wb.lower.lo_dword.pkt_info,
-                          &desc->wb.lower.lo_dword.hdr_info,
-                          &desc->wb.lower.hi_dword.rss,
-                          &desc->wb.upper.status_error,
-                          &desc->wb.lower.hi_dword.csum_ip.ip_id,
-                          &desc->wb.upper.vlan);
+    if (pdma_st->do_ps) {
+        pkt_len = pdma_st->bastate.written[1];
+    } else {
+        pkt_len = pdma_st->bastate.written[0] + pdma_st->bastate.written[1];
+    }
+
+    igb_write_adv_rx_descr(core, desc, pkt, rss_info, etqf, ts, pkt_len);
+
+    hdr_info = (pdma_st->ps_desc_data.hdr_len << E1000_ADVRXD_HDR_LEN_OFFSET) &
+               E1000_ADVRXD_ADV_HDR_LEN_MASK;
+    hdr_info |= pdma_st->ps_desc_data.sph ? E1000_ADVRXD_HDR_SPH : 0;
+    desc->wb.lower.lo_dword.hdr_info = cpu_to_le16(hdr_info);
+
+    desc->wb.upper.status_error |= cpu_to_le32(
+        pdma_st->ps_desc_data.hbo ? E1000_ADVRXD_ST_ERR_HBO_OFFSET : 0);
 }
 
 static inline void
-igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
-                   struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info,
-                   uint16_t etqf, bool ts, uint16_t length)
+igb_write_rx_descr(IGBCore *core,
+                   union e1000_rx_desc_union *desc,
+                   struct NetRxPkt *pkt,
+                   const E1000E_RSSInfo *rss_info,
+                   uint16_t etqf,
+                   bool ts,
+                   IGBPacketRxDMAState *pdma_st,
+                   const E1000ERingInfo *r)
 {
     if (igb_rx_use_legacy_descriptor(core)) {
         igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info,
-                                etqf, ts, length);
+                                pdma_st->bastate.written[1]);
+    } else if (igb_rx_use_ps_descriptor(core, r)) {
+        igb_write_adv_ps_rx_descr(core, &desc->adv, pkt, rss_info, r, etqf, ts,
+                                  pdma_st);
     } else {
         igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info,
-                               etqf, ts, length);
+                               etqf, ts, pdma_st->bastate.written[1]);
     }
 }
 
@@ -1514,20 +1641,7 @@ igb_pci_dma_write_rx_desc(IGBCore *core, PCIDevice *dev, dma_addr_t addr,
 }
 
 static void
-igb_write_to_rx_buffers(IGBCore *core,
-                        PCIDevice *d,
-                        hwaddr ba,
-                        uint16_t *written,
-                        const char *data,
-                        dma_addr_t data_len)
-{
-    trace_igb_rx_desc_buff_write(ba, *written, data, data_len);
-    pci_dma_write(d, ba + *written, data, data_len);
-    *written += data_len;
-}
-
-static void
-igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
+igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi,
                     size_t pkt_size, size_t pkt_fcs_size)
 {
     eth_pkt_types_e pkt_type = net_rx_pkt_get_packet_type(core->rx_pkt);
@@ -1545,12 +1659,256 @@ igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
 }
 
 static inline bool
-igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi)
+igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi)
 {
     return igb_ring_free_descr_num(core, rxi) ==
            ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16;
 }
 
+static bool
+igb_do_ps(IGBCore *core,
+          const E1000ERingInfo *r,
+          struct NetRxPkt *pkt,
+          IGBPacketRxDMAState *pdma_st)
+{
+    bool hasip4, hasip6;
+    EthL4HdrProto l4hdr_proto;
+    bool fragment;
+    bool split_always;
+    size_t bheader_size;
+    size_t total_pkt_len;
+
+    if (!igb_rx_use_ps_descriptor(core, r)) {
+        return false;
+    }
+
+    total_pkt_len = net_rx_pkt_get_total_len(pkt);
+    bheader_size = igb_rxhdrbufsize(core, r);
+    split_always = igb_rx_ps_descriptor_split_always(core, r);
+    if (split_always && total_pkt_len <= bheader_size) {
+        pdma_st->ps_hdr_len = total_pkt_len;
+        pdma_st->ps_desc_data.hdr_len = total_pkt_len;
+        return true;
+    }
+
+    net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
+
+    if (hasip4) {
+        fragment = net_rx_pkt_get_ip4_info(pkt)->fragment;
+    } else if (hasip6) {
+        fragment = net_rx_pkt_get_ip6_info(pkt)->fragment;
+    } else {
+        pdma_st->ps_desc_data.hdr_len = bheader_size;
+        goto header_not_handled;
+    }
+
+    if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) {
+        pdma_st->ps_desc_data.hdr_len = bheader_size;
+        goto header_not_handled;
+    }
+
+    /* no header splitting for SCTP */
+    if (!fragment && (l4hdr_proto == ETH_L4_HDR_PROTO_UDP ||
+                      l4hdr_proto == ETH_L4_HDR_PROTO_TCP)) {
+        pdma_st->ps_hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt);
+    } else {
+        pdma_st->ps_hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt);
+    }
+
+    pdma_st->ps_desc_data.sph = true;
+    pdma_st->ps_desc_data.hdr_len = pdma_st->ps_hdr_len;
+
+    if (pdma_st->ps_hdr_len > bheader_size) {
+        pdma_st->ps_desc_data.hbo = true;
+        goto header_not_handled;
+    }
+
+    return true;
+
+header_not_handled:
+    if (split_always) {
+        pdma_st->ps_hdr_len = bheader_size;
+        return true;
+    }
+
+    return false;
+}
+
+static void
+igb_truncate_to_descriptor_size(IGBPacketRxDMAState *pdma_st, size_t *size)
+{
+    if (pdma_st->do_ps && pdma_st->is_first) {
+        if (*size > pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len) {
+            *size = pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len;
+        }
+    } else {
+        if (*size > pdma_st->rx_desc_packet_buf_size) {
+            *size = pdma_st->rx_desc_packet_buf_size;
+        }
+    }
+}
+
+static inline void
+igb_write_hdr_frag_to_rx_buffers(IGBCore *core,
+                                 PCIDevice *d,
+                                 IGBPacketRxDMAState *pdma_st,
+                                 const char *data,
+                                 dma_addr_t data_len)
+{
+    assert(data_len <= pdma_st->rx_desc_header_buf_size -
+                       pdma_st->bastate.written[0]);
+    pci_dma_write(d,
+                  pdma_st->ba[0] + pdma_st->bastate.written[0],
+                  data, data_len);
+    pdma_st->bastate.written[0] += data_len;
+    pdma_st->bastate.cur_idx = 1;
+}
+
+static void
+igb_write_header_to_rx_buffers(IGBCore *core,
+                               struct NetRxPkt *pkt,
+                               PCIDevice *d,
+                               IGBPacketRxDMAState *pdma_st,
+                               size_t *copy_size)
+{
+    size_t iov_copy;
+    size_t ps_hdr_copied = 0;
+
+    if (!pdma_st->is_first) {
+        /* Leave buffer 0 of each descriptor except first */
+        /* empty                                          */
+        pdma_st->bastate.cur_idx = 1;
+        return;
+    }
+
+    do {
+        iov_copy = MIN(pdma_st->ps_hdr_len - ps_hdr_copied,
+                       pdma_st->iov->iov_len - pdma_st->iov_ofs);
+
+        igb_write_hdr_frag_to_rx_buffers(core, d, pdma_st,
+                                         pdma_st->iov->iov_base,
+                                         iov_copy);
+
+        *copy_size -= iov_copy;
+        ps_hdr_copied += iov_copy;
+
+        pdma_st->iov_ofs += iov_copy;
+        if (pdma_st->iov_ofs == pdma_st->iov->iov_len) {
+            pdma_st->iov++;
+            pdma_st->iov_ofs = 0;
+        }
+    } while (ps_hdr_copied < pdma_st->ps_hdr_len);
+
+    pdma_st->is_first = false;
+}
+
+static void
+igb_write_payload_frag_to_rx_buffers(IGBCore *core,
+                                     PCIDevice *d,
+                                     IGBPacketRxDMAState *pdma_st,
+                                     const char *data,
+                                     dma_addr_t data_len)
+{
+    while (data_len > 0) {
+        assert(pdma_st->bastate.cur_idx < IGB_MAX_PS_BUFFERS);
+
+        uint32_t cur_buf_bytes_left =
+            pdma_st->rx_desc_packet_buf_size -
+            pdma_st->bastate.written[pdma_st->bastate.cur_idx];
+        uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left);
+
+        trace_igb_rx_desc_buff_write(
+            pdma_st->bastate.cur_idx,
+            pdma_st->ba[pdma_st->bastate.cur_idx],
+            pdma_st->bastate.written[pdma_st->bastate.cur_idx],
+            data,
+            bytes_to_write);
+
+        pci_dma_write(d,
+                      pdma_st->ba[pdma_st->bastate.cur_idx] +
+                      pdma_st->bastate.written[pdma_st->bastate.cur_idx],
+                      data, bytes_to_write);
+
+        pdma_st->bastate.written[pdma_st->bastate.cur_idx] += bytes_to_write;
+        data += bytes_to_write;
+        data_len -= bytes_to_write;
+
+        if (pdma_st->bastate.written[pdma_st->bastate.cur_idx] ==
+            pdma_st->rx_desc_packet_buf_size) {
+            pdma_st->bastate.cur_idx++;
+        }
+    }
+}
+
+static void
+igb_write_payload_to_rx_buffers(IGBCore *core,
+                                struct NetRxPkt *pkt,
+                                PCIDevice *d,
+                                IGBPacketRxDMAState *pdma_st,
+                                size_t *copy_size)
+{
+    static const uint32_t fcs_pad;
+    size_t iov_copy;
+
+    /* Copy packet payload */
+    while (*copy_size) {
+        iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs);
+        igb_write_payload_frag_to_rx_buffers(core, d,
+                                             pdma_st,
+                                             pdma_st->iov->iov_base +
+                                             pdma_st->iov_ofs,
+                                             iov_copy);
+
+        *copy_size -= iov_copy;
+        pdma_st->iov_ofs += iov_copy;
+        if (pdma_st->iov_ofs == pdma_st->iov->iov_len) {
+            pdma_st->iov++;
+            pdma_st->iov_ofs = 0;
+        }
+    }
+
+    if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) {
+        /* Simulate FCS checksum presence in the last descriptor */
+        igb_write_payload_frag_to_rx_buffers(core, d,
+                                             pdma_st,
+                                             (const char *) &fcs_pad,
+                                             e1000x_fcs_len(core->mac));
+    }
+}
+
+static void
+igb_write_to_rx_buffers(IGBCore *core,
+                        struct NetRxPkt *pkt,
+                        PCIDevice *d,
+                        IGBPacketRxDMAState *pdma_st)
+{
+    size_t copy_size;
+
+    if (!(pdma_st->ba)[1] || (pdma_st->do_ps && !(pdma_st->ba[0]))) {
+        /* as per intel docs; skip descriptors with null buf addr */
+        trace_e1000e_rx_null_descriptor();
+        return;
+    }
+
+    if (pdma_st->desc_offset >= pdma_st->size) {
+        return;
+    }
+
+    pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset;
+    igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size);
+    copy_size = pdma_st->size - pdma_st->desc_offset;
+    igb_truncate_to_descriptor_size(pdma_st, &copy_size);
+
+    /* For PS mode copy the packet header first */
+    if (pdma_st->do_ps) {
+        igb_write_header_to_rx_buffers(core, pkt, d, pdma_st, &copy_size);
+    } else {
+        pdma_st->bastate.cur_idx = 1;
+    }
+
+    igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, &copy_size);
+}
+
 static void
 igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
                           const E1000E_RxRing *rxr,
@@ -1560,95 +1918,61 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
     PCIDevice *d;
     dma_addr_t base;
     union e1000_rx_desc_union desc;
-    size_t desc_size;
-    size_t desc_offset = 0;
-    size_t iov_ofs = 0;
-
-    struct iovec *iov = net_rx_pkt_get_iovec(pkt);
-    size_t size = net_rx_pkt_get_total_len(pkt);
-    size_t total_size = size + e1000x_fcs_len(core->mac);
-    const E1000E_RingInfo *rxi = rxr->i;
-    size_t bufsize = igb_rxbufsize(core, rxi);
-
+    const E1000ERingInfo *rxi;
+    size_t rx_desc_len;
+
+    IGBPacketRxDMAState pdma_st = {0};
+    pdma_st.is_first = true;
+    pdma_st.size = net_rx_pkt_get_total_len(pkt);
+    pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac);
+
+    rxi = rxr->i;
+    rx_desc_len = core->rx_desc_len;
+    pdma_st.rx_desc_packet_buf_size = igb_rxbufsize(core, rxi);
+    pdma_st.rx_desc_header_buf_size = igb_rxhdrbufsize(core, rxi);
+    pdma_st.iov = net_rx_pkt_get_iovec(pkt);
     d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8);
     if (!d) {
         d = core->owner;
     }
 
+    pdma_st.do_ps = igb_do_ps(core, rxi, pkt, &pdma_st);
+
     do {
-        hwaddr ba;
-        uint16_t written = 0;
+        memset(&pdma_st.bastate, 0, sizeof(IGBBAState));
         bool is_last = false;
 
-        desc_size = total_size - desc_offset;
-
-        if (desc_size > bufsize) {
-            desc_size = bufsize;
-        }
-
         if (igb_ring_empty(core, rxi)) {
             return;
         }
 
         base = igb_ring_head_descr(core, rxi);
+        pci_dma_read(d, base, &desc, rx_desc_len);
+        trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len);
 
-        pci_dma_read(d, base, &desc, core->rx_desc_len);
-
-        trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len);
-
-        igb_read_rx_descr(core, &desc, &ba);
-
-        if (ba) {
-            if (desc_offset < size) {
-                static const uint32_t fcs_pad;
-                size_t iov_copy;
-                size_t copy_size = size - desc_offset;
-                if (copy_size > bufsize) {
-                    copy_size = bufsize;
-                }
-
-                /* Copy packet payload */
-                while (copy_size) {
-                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
+        igb_read_rx_descr(core, &desc, &pdma_st, rxi);
 
-                    igb_write_to_rx_buffers(core, d, ba, &written,
-                                            iov->iov_base + iov_ofs, iov_copy);
-
-                    copy_size -= iov_copy;
-                    iov_ofs += iov_copy;
-                    if (iov_ofs == iov->iov_len) {
-                        iov++;
-                        iov_ofs = 0;
-                    }
-                }
-
-                if (desc_offset + desc_size >= total_size) {
-                    /* Simulate FCS checksum presence in the last descriptor */
-                    igb_write_to_rx_buffers(core, d, ba, &written,
-                          (const char *) &fcs_pad, e1000x_fcs_len(core->mac));
-                }
-            }
-        } else { /* as per intel docs; skip descriptors with null buf addr */
-            trace_e1000e_rx_null_descriptor();
-        }
-        desc_offset += desc_size;
-        if (desc_offset >= total_size) {
+        igb_write_to_rx_buffers(core, pkt, d, &pdma_st);
+        pdma_st.desc_offset += pdma_st.desc_size;
+        if (pdma_st.desc_offset >= pdma_st.total_size) {
             is_last = true;
         }
 
-        igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL,
-                           rss_info, etqf, ts, written);
-        igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len);
-
-        igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN);
-
-    } while (desc_offset < total_size);
+        igb_write_rx_descr(core, &desc,
+                           is_last ? pkt : NULL,
+                           rss_info,
+                           etqf, ts,
+                           &pdma_st,
+                           rxi);
+        igb_pci_dma_write_rx_desc(core, d, base, &desc, rx_desc_len);
+        igb_ring_advance(core, rxi, rx_desc_len / E1000_MIN_RX_DESC_LEN);
+    } while (pdma_st.desc_offset < pdma_st.total_size);
 
-    igb_update_rx_stats(core, rxi, size, total_size);
+    igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size);
 }
 
 static bool
-igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
+igb_rx_strip_vlan(IGBCore *core, const E1000ERingInfo *rxi)
 {
     if (core->mac[MRQC] & 1) {
         uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
@@ -2753,7 +3077,7 @@ igb_update_rx_offloads(IGBCore *core)
 
     if (core->has_vnet) {
         qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
-                         cso_state, 0, 0, 0, 0);
+                         cso_state, 0, 0, 0, 0, 0, 0);
     }
 }
 
index 82ff195dfc3cfc5e6f189555f819b076c034f40f..ed7427b8fe7087054f7fe2d0f5ee555bfe84cffd 100644 (file)
@@ -452,6 +452,7 @@ union e1000_adv_rx_desc {
 #define E1000_SRRCTL_BSIZEHDRSIZE_MASK         0x00000F00
 #define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT        2  /* Shift _left_ */
 #define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF       0x02000000
+#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT        0x04000000
 #define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000
 #define E1000_SRRCTL_DESCTYPE_MASK             0x0E000000
 #define E1000_SRRCTL_DROP_EN                   0x80000000
@@ -692,11 +693,20 @@ union e1000_adv_rx_desc {
 
 #define E1000_STATUS_NUM_VFS_SHIFT 14
 
-#define E1000_ADVRXD_PKT_IP4 BIT(4)
-#define E1000_ADVRXD_PKT_IP6 BIT(6)
-#define E1000_ADVRXD_PKT_TCP BIT(8)
-#define E1000_ADVRXD_PKT_UDP BIT(9)
-#define E1000_ADVRXD_PKT_SCTP BIT(10)
+#define E1000_ADVRXD_PKT_IP4  BIT(0)
+#define E1000_ADVRXD_PKT_IP6  BIT(2)
+#define E1000_ADVRXD_PKT_IP6E BIT(3)
+#define E1000_ADVRXD_PKT_TCP  BIT(4)
+#define E1000_ADVRXD_PKT_UDP  BIT(5)
+#define E1000_ADVRXD_PKT_SCTP BIT(6)
+
+#define IGB_MAX_PS_BUFFERS 2
+
+#define E1000_ADVRXD_HDR_LEN_OFFSET    (21 - 16)
+#define E1000_ADVRXD_ADV_HDR_LEN_MASK  ((BIT(10) - 1) << \
+                                        E1000_ADVRXD_HDR_LEN_OFFSET)
+#define E1000_ADVRXD_HDR_SPH           BIT(15)
+#define E1000_ADVRXD_ST_ERR_HBO_OFFSET BIT(3 + 20)
 
 static inline uint8_t igb_ivar_entry_rx(uint8_t i)
 {
index dfe475446942982d4beed9a1657cb7cee491232a..5e16056be661e54ff0da2ff8bdcf116055fe1e45 100644 (file)
@@ -1043,7 +1043,7 @@ static void of_dpa_flow_ig_tbl(OfDpaFlowContext *fc, uint32_t tbl_id)
 static ssize_t of_dpa_ig(World *world, uint32_t pport,
                          const struct iovec *iov, int iovcnt)
 {
-    struct iovec iov_copy[iovcnt + 2];
+    g_autofree struct iovec *iov_copy = g_new(struct iovec, iovcnt + 2);
     OfDpaFlowContext fc = {
         .of_dpa = world_private(world),
         .in_pport = pport,
index 6b5ba669a25df167a318f56ec5d4432352a8f2d2..3abfd65e5bf917d692fd524aab0c8d8aff54d5c3 100644 (file)
@@ -278,9 +278,9 @@ igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
 igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d"
 
 igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
-igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
+igb_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer %u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
 
-igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X"
+igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: 0x%X, rss_pkt_type: 0x%X"
 
 igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled"
 igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x"
@@ -295,6 +295,8 @@ igb_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = 0x%x"
 igb_set_pfmailbox(uint32_t vf_num, uint32_t val) "PFMailbox[%d]: 0x%x"
 igb_set_vfmailbox(uint32_t vf_num, uint32_t val) "VFMailbox[%d]: 0x%x"
 
+igb_wrn_rx_desc_modes_not_supp(int desc_type) "Not supported descriptor type: %d"
+
 # igbvf.c
 igbvf_wrn_io_addr_unknown(uint64_t addr) "IO unknown register 0x%"PRIx64
 
index 6b958d6363e56a0a9474c57919ac914fd2394f51..57427a3997c8f95ddff62a4f8b00ec9781e19a1c 100644 (file)
@@ -78,6 +78,9 @@ static const int user_feature_bits[] = {
     VIRTIO_F_RING_RESET,
     VIRTIO_NET_F_RSS,
     VIRTIO_NET_F_HASH_REPORT,
+    VIRTIO_NET_F_GUEST_USO4,
+    VIRTIO_NET_F_GUEST_USO6,
+    VIRTIO_NET_F_HOST_USO,
 
     /* This bit implies RARP isn't sent by QEMU out of band */
     VIRTIO_NET_F_GUEST_ANNOUNCE,
index 7102ec4817f4c5a01ac62f55b7b7d7e4d82aff95..bd0ead94fe0a521ca1027f6829b4f0830d75fb84 100644 (file)
@@ -659,6 +659,15 @@ static int peer_has_ufo(VirtIONet *n)
     return n->has_ufo;
 }
 
+static int peer_has_uso(VirtIONet *n)
+{
+    if (!peer_has_vnet_hdr(n)) {
+        return 0;
+    }
+
+    return qemu_has_uso(qemu_get_queue(n->nic)->peer);
+}
+
 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
                                        int version_1, int hash_report)
 {
@@ -796,6 +805,10 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 
+        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
+        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
+        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
+
         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
     }
 
@@ -804,6 +817,12 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
     }
 
+    if (!peer_has_uso(n)) {
+        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
+        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
+        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
+    }
+
     if (!get_vhost_net(nc->peer)) {
         return features;
     }
@@ -859,17 +878,21 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
-            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
+            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
+            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
+            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
 }
 
-static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
+static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
 {
     static const uint64_t guest_offloads_mask =
         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
-        (1ULL << VIRTIO_NET_F_GUEST_UFO);
+        (1ULL << VIRTIO_NET_F_GUEST_UFO)  |
+        (1ULL << VIRTIO_NET_F_GUEST_USO4) |
+        (1ULL << VIRTIO_NET_F_GUEST_USO6);
 
     return guest_offloads_mask & features;
 }
@@ -3922,6 +3945,12 @@ static Property virtio_net_properties[] = {
     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
+    DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
+                      VIRTIO_NET_F_GUEST_USO4, true),
+    DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
+                      VIRTIO_NET_F_GUEST_USO6, true),
+    DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
+                      VIRTIO_NET_F_HOST_USO, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
index 3fb108751a2c5cc0f59d8a183bb072ca4916719f..226c0777f0eae8c2de59413b9ba5dcfdff89f6b8 100644 (file)
@@ -1341,6 +1341,8 @@ static void vmxnet3_update_features(VMXNET3State *s)
                          s->lro_supported,
                          s->lro_supported,
                          0,
+                         0,
+                         0,
                          0);
     }
 }
diff --git a/include/crypto/clmul.h b/include/crypto/clmul.h
new file mode 100644 (file)
index 0000000..446931f
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Carry-less multiply operations.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#ifndef CRYPTO_CLMUL_H
+#define CRYPTO_CLMUL_H
+
+#include "qemu/int128.h"
+#include "host/crypto/clmul.h"
+
+/**
+ * clmul_8x8_low:
+ *
+ * Perform eight 8x8->8 carry-less multiplies.
+ */
+uint64_t clmul_8x8_low(uint64_t, uint64_t);
+
+/**
+ * clmul_8x4_even:
+ *
+ * Perform four 8x8->16 carry-less multiplies.
+ * The odd bytes of the inputs are ignored.
+ */
+uint64_t clmul_8x4_even(uint64_t, uint64_t);
+
+/**
+ * clmul_8x4_odd:
+ *
+ * Perform four 8x8->16 carry-less multiplies.
+ * The even bytes of the inputs are ignored.
+ */
+uint64_t clmul_8x4_odd(uint64_t, uint64_t);
+
+/**
+ * clmul_8x4_packed:
+ *
+ * Perform four 8x8->16 carry-less multiplies.
+ */
+uint64_t clmul_8x4_packed(uint32_t, uint32_t);
+
+/**
+ * clmul_16x2_even:
+ *
+ * Perform two 16x16->32 carry-less multiplies.
+ * The odd words of the inputs are ignored.
+ */
+uint64_t clmul_16x2_even(uint64_t, uint64_t);
+
+/**
+ * clmul_16x2_odd:
+ *
+ * Perform two 16x16->32 carry-less multiplies.
+ * The even words of the inputs are ignored.
+ */
+uint64_t clmul_16x2_odd(uint64_t, uint64_t);
+
+/**
+ * clmul_32:
+ *
+ * Perform a 32x32->64 carry-less multiply.
+ */
+uint64_t clmul_32(uint32_t, uint32_t);
+
+/**
+ * clmul_64:
+ *
+ * Perform a 64x64->128 carry-less multiply.
+ */
+Int128 clmul_64_gen(uint64_t, uint64_t);
+
+static inline Int128 clmul_64(uint64_t a, uint64_t b)
+{
+    if (HAVE_CLMUL_ACCEL) {
+        return clmul_64_accel(a, b);
+    } else {
+        return clmul_64_gen(a, b);
+    }
+}
+
+#endif /* CRYPTO_CLMUL_H */
index fb4c8d480ff91a3640aaad8c7abb8e129275fa49..350287852eff8cd66ca01ec57e4da5477108efe2 100644 (file)
 typedef struct CPUTLBEntryFull {
     /*
      * @xlat_section contains:
-     *  - in the lower TARGET_PAGE_BITS, a physical section number
-     *  - with the lower TARGET_PAGE_BITS masked off, an offset which
-     *    must be added to the virtual address to obtain:
-     *     + the ram_addr_t of the target RAM (if the physical section
-     *       number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM)
-     *     + the offset within the target MemoryRegion (otherwise)
+     *  - For ram, an offset which must be added to the virtual address
+     *    to obtain the ram_addr_t of the target RAM
+     *  - For other memory regions,
+     *     + in the lower TARGET_PAGE_BITS, the physical section number
+     *     + with the TARGET_PAGE_BITS masked off, the offset within
+     *       the target MemoryRegion
      */
     hwaddr xlat_section;
 
index 6eedef48d8cf8fbe5317efec4595aa25f6e958af..2ebfecf58eb2f62da322a595ea79f7c951565b96 100644 (file)
@@ -111,8 +111,7 @@ static inline int thunk_type_size(const argtype *type_ptr, int is_host)
         if (is_host) {
 #if defined(HOST_X86_64)
             return 8;
-#elif defined(HOST_ALPHA) || defined(HOST_IA64) || defined(HOST_MIPS) || \
-      defined(HOST_PARISC) || defined(HOST_SPARC64)
+#elif defined(HOST_MIPS) || defined(HOST_SPARC64)
             return 4;
 #elif defined(HOST_PPC)
             return sizeof(void *);
index cd130564d8b6ca97ae61fec9205e1c0dc876234f..eb64075b9cb4d21ac1a6194af5e8127aa2a5ad26 100644 (file)
@@ -366,6 +366,8 @@ float32 bfloat16_to_float32(bfloat16, float_status *status);
 bfloat16 float64_to_bfloat16(float64 a, float_status *status);
 float64 bfloat16_to_float64(bfloat16 a, float_status *status);
 
+int8_t bfloat16_to_int8_scalbn(bfloat16, FloatRoundMode,
+                               int, float_status *status);
 int16_t bfloat16_to_int16_scalbn(bfloat16, FloatRoundMode,
                                  int, float_status *status);
 int32_t bfloat16_to_int32_scalbn(bfloat16, FloatRoundMode,
@@ -373,14 +375,18 @@ int32_t bfloat16_to_int32_scalbn(bfloat16, FloatRoundMode,
 int64_t bfloat16_to_int64_scalbn(bfloat16, FloatRoundMode,
                                  int, float_status *status);
 
+int8_t bfloat16_to_int8(bfloat16, float_status *status);
 int16_t bfloat16_to_int16(bfloat16, float_status *status);
 int32_t bfloat16_to_int32(bfloat16, float_status *status);
 int64_t bfloat16_to_int64(bfloat16, float_status *status);
 
+int8_t bfloat16_to_int8_round_to_zero(bfloat16, float_status *status);
 int16_t bfloat16_to_int16_round_to_zero(bfloat16, float_status *status);
 int32_t bfloat16_to_int32_round_to_zero(bfloat16, float_status *status);
 int64_t bfloat16_to_int64_round_to_zero(bfloat16, float_status *status);
 
+uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode,
+                                 int, float_status *status);
 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode,
                                    int, float_status *status);
 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode,
@@ -388,24 +394,30 @@ uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode,
 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode,
                                    int, float_status *status);
 
+uint8_t bfloat16_to_uint8(bfloat16 a, float_status *status);
 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *status);
 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *status);
 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *status);
 
+uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *status);
 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *status);
 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *status);
 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *status);
 
+bfloat16 int8_to_bfloat16_scalbn(int8_t a, int, float_status *status);
 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int, float_status *status);
 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int, float_status *status);
 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int, float_status *status);
+bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int, float_status *status);
 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int, float_status *status);
 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int, float_status *status);
 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int, float_status *status);
 
+bfloat16 int8_to_bfloat16(int8_t a, float_status *status);
 bfloat16 int16_to_bfloat16(int16_t a, float_status *status);
 bfloat16 int32_to_bfloat16(int32_t a, float_status *status);
 bfloat16 int64_to_bfloat16(int64_t a, float_status *status);
+bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status);
 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status);
 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status);
 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status);
index 92a4234439a3945de038b8fc37094a078f26972b..648b5b3586b4c98a64eada0f473288961eb34ea7 100644 (file)
@@ -227,17 +227,6 @@ struct CPUWatchpoint {
     QTAILQ_ENTRY(CPUWatchpoint) entry;
 };
 
-#ifdef CONFIG_PLUGIN
-/*
- * For plugins we sometime need to save the resolved iotlb data before
- * the memory regions get moved around  by io_writex.
- */
-typedef struct SavedIOTLB {
-    MemoryRegionSection *section;
-    hwaddr mr_offset;
-} SavedIOTLB;
-#endif
-
 struct KVMState;
 struct kvm_run;
 
@@ -409,8 +398,6 @@ struct CPUState {
 
 #ifdef CONFIG_PLUGIN
     GArray *plugin_mem_cbs;
-    /* saved iotlb data from io_writex */
-    SavedIOTLB saved_iotlb;
 #endif
 
     /* TODO Move common fields from CPUArchState here. */
index 1448d00afbc6a96f50aec64a4dfad958331a4a41..330d2859300ad48e918f28308d11cb9263e390d4 100644 (file)
@@ -54,11 +54,12 @@ typedef void (LinkStatusChanged)(NetClientState *);
 typedef void (NetClientDestructor)(NetClientState *);
 typedef RxFilterInfo *(QueryRxFilter)(NetClientState *);
 typedef bool (HasUfo)(NetClientState *);
+typedef bool (HasUso)(NetClientState *);
 typedef bool (HasVnetHdr)(NetClientState *);
 typedef bool (HasVnetHdrLen)(NetClientState *, int);
 typedef bool (GetUsingVnetHdr)(NetClientState *);
 typedef void (UsingVnetHdr)(NetClientState *, bool);
-typedef void (SetOffload)(NetClientState *, int, int, int, int, int);
+typedef void (SetOffload)(NetClientState *, int, int, int, int, int, int, int);
 typedef int (GetVnetHdrLen)(NetClientState *);
 typedef void (SetVnetHdrLen)(NetClientState *, int);
 typedef int (SetVnetLE)(NetClientState *, bool);
@@ -84,6 +85,7 @@ typedef struct NetClientInfo {
     QueryRxFilter *query_rx_filter;
     NetPoll *poll;
     HasUfo *has_ufo;
+    HasUso *has_uso;
     HasVnetHdr *has_vnet_hdr;
     HasVnetHdrLen *has_vnet_hdr_len;
     GetUsingVnetHdr *get_using_vnet_hdr;
@@ -187,12 +189,13 @@ void qemu_set_info_str(NetClientState *nc,
                        const char *fmt, ...) G_GNUC_PRINTF(2, 3);
 void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]);
 bool qemu_has_ufo(NetClientState *nc);
+bool qemu_has_uso(NetClientState *nc);
 bool qemu_has_vnet_hdr(NetClientState *nc);
 bool qemu_has_vnet_hdr_len(NetClientState *nc, int len);
 bool qemu_get_using_vnet_hdr(NetClientState *nc);
 void qemu_using_vnet_hdr(NetClientState *nc, bool enable);
 void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
-                      int ecn, int ufo);
+                      int ecn, int ufo, int uso4, int uso6);
 int qemu_get_vnet_hdr_len(NetClientState *nc);
 void qemu_set_vnet_hdr_len(NetClientState *nc, int len);
 int qemu_set_vnet_le(NetClientState *nc, bool is_le);
index 35325f1995f70032731f7df1517822c4cd1bdf49..b11161555b691f3a34d8b4c544e20f5d17a3a7fa 100644 (file)
@@ -25,6 +25,9 @@
 #endif
 
 /* Leaf 1, %ecx */
+#ifndef bit_PCLMUL
+#define bit_PCLMUL      (1 << 1)
+#endif
 #ifndef bit_SSE4_1
 #define bit_SSE4_1      (1 << 19)
 #endif
index 43165f24523b304bdf2ca64e7df290f69d9f3339..71c1123308e0f3b35062262369f1911c9dd078ba 100644 (file)
 struct qemu_plugin_hwaddr {
     bool is_io;
     bool is_store;
-    union {
-        struct {
-            MemoryRegionSection *section;
-            hwaddr    offset;
-        } io;
-        struct {
-            void *hostaddr;
-        } ram;
-    } v;
+    hwaddr phys_addr;
+    MemoryRegion *mr;
 };
 
 /**
index 834b0e47a0c503d70650aef52052788bdd7b1c01..5abdbc3874796bad57e0210d2cdd74f8cda8a611 100644 (file)
@@ -129,7 +129,6 @@ typedef struct QString QString;
 typedef struct RAMBlock RAMBlock;
 typedef struct Range Range;
 typedef struct ReservedRegion ReservedRegion;
-typedef struct SavedIOTLB SavedIOTLB;
 typedef struct SHPCDevice SHPCDevice;
 typedef struct SSIBus SSIBus;
 typedef struct TCGHelperInfo TCGHelperInfo;
index e2683d487f5732b8101618eb9d03760cb16a4758..4db8a58c148ecc1919758c2b0277b61a0fdf36bb 100644 (file)
@@ -374,6 +374,12 @@ void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
 void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
                       uint32_t aofs, uint32_t bofs,
                       uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, int64_t c,
+                       uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, TCGv_i64 c,
+                       uint32_t oprsz, uint32_t maxsz);
 
 /*
  * Perform vector bit select: d = (b & a) | (c & ~a).
index 5150a74831bacdbac02566da6cb9ae593b1ab00a..f426861d9066b70381aa14730ab57356737bb547 100644 (file)
@@ -1873,6 +1873,13 @@ if libbpf.found() and not cc.links('''
   endif
 endif
 
+# libxdp
+libxdp = not_found
+if not get_option('af_xdp').auto() or have_system
+    libxdp = dependency('libxdp', required: get_option('af_xdp'),
+                        version: '>=1.4.0', method: 'pkg-config')
+endif
+
 # libdw
 libdw = not_found
 if not get_option('libdw').auto() or \
@@ -2099,6 +2106,7 @@ config_host_data.set('CONFIG_HEXAGON_IDEF_PARSER', get_option('hexagon_idef_pars
 config_host_data.set('CONFIG_LIBATTR', have_old_libattr)
 config_host_data.set('CONFIG_LIBCAP_NG', libcap_ng.found())
 config_host_data.set('CONFIG_EBPF', libbpf.found())
+config_host_data.set('CONFIG_AF_XDP', libxdp.found())
 config_host_data.set('CONFIG_LIBDAXCTL', libdaxctl.found())
 config_host_data.set('CONFIG_LIBISCSI', libiscsi.found())
 config_host_data.set('CONFIG_LIBNFS', libnfs.found())
@@ -4270,6 +4278,7 @@ summary_info = {}
 if targetos == 'darwin'
   summary_info += {'vmnet.framework support': vmnet}
 endif
+summary_info += {'AF_XDP support':    libxdp}
 summary_info += {'slirp support':     slirp}
 summary_info += {'vde support':       vde}
 summary_info += {'netmap support':    have_netmap}
index f82d88b7c66222c3a5902ee6f7fd52a133db226b..2ca40f22e96547701877bb9c92124f6053e8240f 100644 (file)
@@ -122,6 +122,8 @@ option('avx512bw', type: 'feature', value: 'auto',
 option('keyring', type: 'feature', value: 'auto',
        description: 'Linux keyring support')
 
+option('af_xdp', type : 'feature', value : 'auto',
+       description: 'AF_XDP network backend support')
 option('attr', type : 'feature', value : 'auto',
        description: 'attr/xattr support')
 option('auth_pam', type : 'feature', value : 'auto',
diff --git a/net/af-xdp.c b/net/af-xdp.c
new file mode 100644 (file)
index 0000000..6c65028
--- /dev/null
@@ -0,0 +1,526 @@
+/*
+ * AF_XDP network backend.
+ *
+ * Copyright (c) 2023 Red Hat, Inc.
+ *
+ * Authors:
+ *  Ilya Maximets <i.maximets@ovn.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+
+#include "qemu/osdep.h"
+#include <bpf/bpf.h>
+#include <inttypes.h>
+#include <linux/if_link.h>
+#include <linux/if_xdp.h>
+#include <net/if.h>
+#include <xdp/xsk.h>
+
+#include "clients.h"
+#include "monitor/monitor.h"
+#include "net/net.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+#include "qemu/memalign.h"
+
+
+typedef struct AFXDPState {
+    NetClientState       nc;
+
+    struct xsk_socket    *xsk;
+    struct xsk_ring_cons rx;
+    struct xsk_ring_prod tx;
+    struct xsk_ring_cons cq;
+    struct xsk_ring_prod fq;
+
+    char                 ifname[IFNAMSIZ];
+    int                  ifindex;
+    bool                 read_poll;
+    bool                 write_poll;
+    uint32_t             outstanding_tx;
+
+    uint64_t             *pool;
+    uint32_t             n_pool;
+    char                 *buffer;
+    struct xsk_umem      *umem;
+
+    uint32_t             n_queues;
+    uint32_t             xdp_flags;
+    bool                 inhibit;
+} AFXDPState;
+
+#define AF_XDP_BATCH_SIZE 64
+
+static void af_xdp_send(void *opaque);
+static void af_xdp_writable(void *opaque);
+
+/* Set the event-loop handlers for the af-xdp backend. */
+static void af_xdp_update_fd_handler(AFXDPState *s)
+{
+    qemu_set_fd_handler(xsk_socket__fd(s->xsk),
+                        s->read_poll ? af_xdp_send : NULL,
+                        s->write_poll ? af_xdp_writable : NULL,
+                        s);
+}
+
+/* Update the read handler. */
+static void af_xdp_read_poll(AFXDPState *s, bool enable)
+{
+    if (s->read_poll != enable) {
+        s->read_poll = enable;
+        af_xdp_update_fd_handler(s);
+    }
+}
+
+/* Update the write handler. */
+static void af_xdp_write_poll(AFXDPState *s, bool enable)
+{
+    if (s->write_poll != enable) {
+        s->write_poll = enable;
+        af_xdp_update_fd_handler(s);
+    }
+}
+
+static void af_xdp_poll(NetClientState *nc, bool enable)
+{
+    AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
+
+    if (s->read_poll != enable || s->write_poll != enable) {
+        s->write_poll = enable;
+        s->read_poll  = enable;
+        af_xdp_update_fd_handler(s);
+    }
+}
+
+static void af_xdp_complete_tx(AFXDPState *s)
+{
+    uint32_t idx = 0;
+    uint32_t done, i;
+    uint64_t *addr;
+
+    done = xsk_ring_cons__peek(&s->cq, XSK_RING_CONS__DEFAULT_NUM_DESCS, &idx);
+
+    for (i = 0; i < done; i++) {
+        addr = (void *) xsk_ring_cons__comp_addr(&s->cq, idx++);
+        s->pool[s->n_pool++] = *addr;
+        s->outstanding_tx--;
+    }
+
+    if (done) {
+        xsk_ring_cons__release(&s->cq, done);
+    }
+}
+
+/*
+ * The fd_write() callback, invoked if the fd is marked as writable
+ * after a poll.
+ */
+static void af_xdp_writable(void *opaque)
+{
+    AFXDPState *s = opaque;
+
+    /* Try to recover buffers that are already sent. */
+    af_xdp_complete_tx(s);
+
+    /*
+     * Unregister the handler, unless we still have packets to transmit
+     * and kernel needs a wake up.
+     */
+    if (!s->outstanding_tx || !xsk_ring_prod__needs_wakeup(&s->tx)) {
+        af_xdp_write_poll(s, false);
+    }
+
+    /* Flush any buffered packets. */
+    qemu_flush_queued_packets(&s->nc);
+}
+
+static ssize_t af_xdp_receive(NetClientState *nc,
+                              const uint8_t *buf, size_t size)
+{
+    AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
+    struct xdp_desc *desc;
+    uint32_t idx;
+    void *data;
+
+    /* Try to recover buffers that are already sent. */
+    af_xdp_complete_tx(s);
+
+    if (size > XSK_UMEM__DEFAULT_FRAME_SIZE) {
+        /* We can't transmit packet this size... */
+        return size;
+    }
+
+    if (!s->n_pool || !xsk_ring_prod__reserve(&s->tx, 1, &idx)) {
+        /*
+         * Out of buffers or space in tx ring.  Poll until we can write.
+         * This will also kick the Tx, if it was waiting on CQ.
+         */
+        af_xdp_write_poll(s, true);
+        return 0;
+    }
+
+    desc = xsk_ring_prod__tx_desc(&s->tx, idx);
+    desc->addr = s->pool[--s->n_pool];
+    desc->len = size;
+
+    data = xsk_umem__get_data(s->buffer, desc->addr);
+    memcpy(data, buf, size);
+
+    xsk_ring_prod__submit(&s->tx, 1);
+    s->outstanding_tx++;
+
+    if (xsk_ring_prod__needs_wakeup(&s->tx)) {
+        af_xdp_write_poll(s, true);
+    }
+
+    return size;
+}
+
+/*
+ * Complete a previous send (backend --> guest) and enable the
+ * fd_read callback.
+ */
+static void af_xdp_send_completed(NetClientState *nc, ssize_t len)
+{
+    AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
+
+    af_xdp_read_poll(s, true);
+}
+
+static void af_xdp_fq_refill(AFXDPState *s, uint32_t n)
+{
+    uint32_t i, idx = 0;
+
+    /* Leave one packet for Tx, just in case. */
+    if (s->n_pool < n + 1) {
+        n = s->n_pool;
+    }
+
+    if (!n || !xsk_ring_prod__reserve(&s->fq, n, &idx)) {
+        return;
+    }
+
+    for (i = 0; i < n; i++) {
+        *xsk_ring_prod__fill_addr(&s->fq, idx++) = s->pool[--s->n_pool];
+    }
+    xsk_ring_prod__submit(&s->fq, n);
+
+    if (xsk_ring_prod__needs_wakeup(&s->fq)) {
+        /* Receive was blocked by not having enough buffers.  Wake it up. */
+        af_xdp_read_poll(s, true);
+    }
+}
+
+static void af_xdp_send(void *opaque)
+{
+    uint32_t i, n_rx, idx = 0;
+    AFXDPState *s = opaque;
+
+    n_rx = xsk_ring_cons__peek(&s->rx, AF_XDP_BATCH_SIZE, &idx);
+    if (!n_rx) {
+        return;
+    }
+
+    for (i = 0; i < n_rx; i++) {
+        const struct xdp_desc *desc;
+        struct iovec iov;
+
+        desc = xsk_ring_cons__rx_desc(&s->rx, idx++);
+
+        iov.iov_base = xsk_umem__get_data(s->buffer, desc->addr);
+        iov.iov_len = desc->len;
+
+        s->pool[s->n_pool++] = desc->addr;
+
+        if (!qemu_sendv_packet_async(&s->nc, &iov, 1,
+                                     af_xdp_send_completed)) {
+            /*
+             * The peer does not receive anymore.  Packet is queued, stop
+             * reading from the backend until af_xdp_send_completed().
+             */
+            af_xdp_read_poll(s, false);
+
+            /* Return unused descriptors to not break the ring cache. */
+            xsk_ring_cons__cancel(&s->rx, n_rx - i - 1);
+            n_rx = i + 1;
+            break;
+        }
+    }
+
+    /* Release actually sent descriptors and try to re-fill. */
+    xsk_ring_cons__release(&s->rx, n_rx);
+    af_xdp_fq_refill(s, AF_XDP_BATCH_SIZE);
+}
+
+/* Flush and close. */
+static void af_xdp_cleanup(NetClientState *nc)
+{
+    AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
+
+    qemu_purge_queued_packets(nc);
+
+    af_xdp_poll(nc, false);
+
+    xsk_socket__delete(s->xsk);
+    s->xsk = NULL;
+    g_free(s->pool);
+    s->pool = NULL;
+    xsk_umem__delete(s->umem);
+    s->umem = NULL;
+    qemu_vfree(s->buffer);
+    s->buffer = NULL;
+
+    /* Remove the program if it's the last open queue. */
+    if (!s->inhibit && nc->queue_index == s->n_queues - 1 && s->xdp_flags
+        && bpf_xdp_detach(s->ifindex, s->xdp_flags, NULL) != 0) {
+        fprintf(stderr,
+                "af-xdp: unable to remove XDP program from '%s', ifindex: %d\n",
+                s->ifname, s->ifindex);
+    }
+}
+
+static int af_xdp_umem_create(AFXDPState *s, int sock_fd, Error **errp)
+{
+    struct xsk_umem_config config = {
+        .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+        .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+        .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+        .frame_headroom = 0,
+    };
+    uint64_t n_descs;
+    uint64_t size;
+    int64_t i;
+    int ret;
+
+    /* Number of descriptors if all 4 queues (rx, tx, cq, fq) are full. */
+    n_descs = (XSK_RING_PROD__DEFAULT_NUM_DESCS
+               + XSK_RING_CONS__DEFAULT_NUM_DESCS) * 2;
+    size = n_descs * XSK_UMEM__DEFAULT_FRAME_SIZE;
+
+    s->buffer = qemu_memalign(qemu_real_host_page_size(), size);
+    memset(s->buffer, 0, size);
+
+    if (sock_fd < 0) {
+        ret = xsk_umem__create(&s->umem, s->buffer, size,
+                               &s->fq, &s->cq, &config);
+    } else {
+        ret = xsk_umem__create_with_fd(&s->umem, sock_fd, s->buffer, size,
+                                       &s->fq, &s->cq, &config);
+    }
+
+    if (ret) {
+        qemu_vfree(s->buffer);
+        error_setg_errno(errp, errno,
+                         "failed to create umem for %s queue_index: %d",
+                         s->ifname, s->nc.queue_index);
+        return -1;
+    }
+
+    s->pool = g_new(uint64_t, n_descs);
+    /* Fill the pool in the opposite order, because it's a LIFO queue. */
+    for (i = n_descs; i >= 0; i--) {
+        s->pool[i] = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
+    }
+    s->n_pool = n_descs;
+
+    af_xdp_fq_refill(s, XSK_RING_PROD__DEFAULT_NUM_DESCS);
+
+    return 0;
+}
+
+static int af_xdp_socket_create(AFXDPState *s,
+                                const NetdevAFXDPOptions *opts, Error **errp)
+{
+    struct xsk_socket_config cfg = {
+        .rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+        .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+        .libxdp_flags = 0,
+        .bind_flags = XDP_USE_NEED_WAKEUP,
+        .xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST,
+    };
+    int queue_id, error = 0;
+
+    s->inhibit = opts->has_inhibit && opts->inhibit;
+    if (s->inhibit) {
+        cfg.libxdp_flags |= XSK_LIBXDP_FLAGS__INHIBIT_PROG_LOAD;
+    }
+
+    if (opts->has_force_copy && opts->force_copy) {
+        cfg.bind_flags |= XDP_COPY;
+    }
+
+    queue_id = s->nc.queue_index;
+    if (opts->has_start_queue && opts->start_queue > 0) {
+        queue_id += opts->start_queue;
+    }
+
+    if (opts->has_mode) {
+        /* Specific mode requested. */
+        cfg.xdp_flags |= (opts->mode == AFXDP_MODE_NATIVE)
+                         ? XDP_FLAGS_DRV_MODE : XDP_FLAGS_SKB_MODE;
+        if (xsk_socket__create(&s->xsk, s->ifname, queue_id,
+                               s->umem, &s->rx, &s->tx, &cfg)) {
+            error = errno;
+        }
+    } else {
+        /* No mode requested, try native first. */
+        cfg.xdp_flags |= XDP_FLAGS_DRV_MODE;
+
+        if (xsk_socket__create(&s->xsk, s->ifname, queue_id,
+                               s->umem, &s->rx, &s->tx, &cfg)) {
+            /* Can't use native mode, try skb. */
+            cfg.xdp_flags &= ~XDP_FLAGS_DRV_MODE;
+            cfg.xdp_flags |= XDP_FLAGS_SKB_MODE;
+
+            if (xsk_socket__create(&s->xsk, s->ifname, queue_id,
+                                   s->umem, &s->rx, &s->tx, &cfg)) {
+                error = errno;
+            }
+        }
+    }
+
+    if (error) {
+        error_setg_errno(errp, error,
+                         "failed to create AF_XDP socket for %s queue_id: %d",
+                         s->ifname, queue_id);
+        return -1;
+    }
+
+    s->xdp_flags = cfg.xdp_flags;
+
+    return 0;
+}
+
+/* NetClientInfo methods. */
+static NetClientInfo net_af_xdp_info = {
+    .type = NET_CLIENT_DRIVER_AF_XDP,
+    .size = sizeof(AFXDPState),
+    .receive = af_xdp_receive,
+    .poll = af_xdp_poll,
+    .cleanup = af_xdp_cleanup,
+};
+
+static int *parse_socket_fds(const char *sock_fds_str,
+                             int64_t n_expected, Error **errp)
+{
+    gchar **substrings = g_strsplit(sock_fds_str, ":", -1);
+    int64_t i, n_sock_fds = g_strv_length(substrings);
+    int *sock_fds = NULL;
+
+    if (n_sock_fds != n_expected) {
+        error_setg(errp, "expected %"PRIi64" socket fds, got %"PRIi64,
+                   n_expected, n_sock_fds);
+        goto exit;
+    }
+
+    sock_fds = g_new(int, n_sock_fds);
+
+    for (i = 0; i < n_sock_fds; i++) {
+        sock_fds[i] = monitor_fd_param(monitor_cur(), substrings[i], errp);
+        if (sock_fds[i] < 0) {
+            g_free(sock_fds);
+            sock_fds = NULL;
+            goto exit;
+        }
+    }
+
+exit:
+    g_strfreev(substrings);
+    return sock_fds;
+}
+
+/*
+ * The exported init function.
+ *
+ * ... -netdev af-xdp,ifname="..."
+ */
+int net_init_af_xdp(const Netdev *netdev,
+                    const char *name, NetClientState *peer, Error **errp)
+{
+    const NetdevAFXDPOptions *opts = &netdev->u.af_xdp;
+    NetClientState *nc, *nc0 = NULL;
+    unsigned int ifindex;
+    uint32_t prog_id = 0;
+    int *sock_fds = NULL;
+    int64_t i, queues;
+    Error *err = NULL;
+    AFXDPState *s;
+
+    ifindex = if_nametoindex(opts->ifname);
+    if (!ifindex) {
+        error_setg_errno(errp, errno, "failed to get ifindex for '%s'",
+                         opts->ifname);
+        return -1;
+    }
+
+    queues = opts->has_queues ? opts->queues : 1;
+    if (queues < 1) {
+        error_setg(errp, "invalid number of queues (%" PRIi64 ") for '%s'",
+                   queues, opts->ifname);
+        return -1;
+    }
+
+    if ((opts->has_inhibit && opts->inhibit) != !!opts->sock_fds) {
+        error_setg(errp, "'inhibit=on' requires 'sock-fds' and vice versa");
+        return -1;
+    }
+
+    if (opts->sock_fds) {
+        sock_fds = parse_socket_fds(opts->sock_fds, queues, errp);
+        if (!sock_fds) {
+            return -1;
+        }
+    }
+
+    for (i = 0; i < queues; i++) {
+        nc = qemu_new_net_client(&net_af_xdp_info, peer, "af-xdp", name);
+        qemu_set_info_str(nc, "af-xdp%"PRIi64" to %s", i, opts->ifname);
+        nc->queue_index = i;
+
+        if (!nc0) {
+            nc0 = nc;
+        }
+
+        s = DO_UPCAST(AFXDPState, nc, nc);
+
+        pstrcpy(s->ifname, sizeof(s->ifname), opts->ifname);
+        s->ifindex = ifindex;
+        s->n_queues = queues;
+
+        if (af_xdp_umem_create(s, sock_fds ? sock_fds[i] : -1, errp)
+            || af_xdp_socket_create(s, opts, errp)) {
+            /* Make sure the XDP program will be removed. */
+            s->n_queues = i;
+            error_propagate(errp, err);
+            goto err;
+        }
+    }
+
+    if (nc0) {
+        s = DO_UPCAST(AFXDPState, nc, nc0);
+        if (bpf_xdp_query_id(s->ifindex, s->xdp_flags, &prog_id) || !prog_id) {
+            error_setg_errno(errp, errno,
+                             "no XDP program loaded on '%s', ifindex: %d",
+                             s->ifname, s->ifindex);
+            goto err;
+        }
+    }
+
+    af_xdp_read_poll(s, true); /* Initially only poll for reads. */
+
+    return 0;
+
+err:
+    g_free(sock_fds);
+    if (nc0) {
+        qemu_del_net_client(nc0);
+    }
+
+    return -1;
+}
index ed8bdfff1e7c983b27565128234b2ba4c85a4de7..be53794582cf3e2adf6ae8c350862daca592a309 100644 (file)
@@ -64,6 +64,11 @@ int net_init_netmap(const Netdev *netdev, const char *name,
                     NetClientState *peer, Error **errp);
 #endif
 
+#ifdef CONFIG_AF_XDP
+int net_init_af_xdp(const Netdev *netdev, const char *name,
+                    NetClientState *peer, Error **errp);
+#endif
+
 int net_init_vhost_user(const Netdev *netdev, const char *name,
                         NetClientState *peer, Error **errp);
 
index 7d05f16ca7a39476f9de199ed665242bc752dcaf..16073f245828f9c8083be6690e759cdcef110746 100644 (file)
@@ -68,7 +68,7 @@ static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt,
     int64_t ts;
     int caplen;
     size_t size = iov_size(iov, cnt) - offset;
-    struct iovec dumpiov[cnt + 1];
+    g_autofree struct iovec *dumpiov = g_new(struct iovec, cnt + 1);
 
     /* Early return in case of previous error. */
     if (s->fd < 0) {
index 51caa42c9d21d687b03e20a6338f70e847c623aa..ce99bd4447f484e8842d7a5c47403217343dd0b5 100644 (file)
@@ -36,6 +36,9 @@ system_ss.add(when: vde, if_true: files('vde.c'))
 if have_netmap
   system_ss.add(files('netmap.c'))
 endif
+
+system_ss.add(when: libxdp, if_true: files('af-xdp.c'))
+
 if have_vhost_net_user
   system_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c'))
   system_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-user-stub.c'))
index 6492ad530e2167f4bf4a135c8834a051f7523f5f..1c0bfdaa6c72d691b618506a8952fce0645cc4be 100644 (file)
--- a/net/net.c
+++ b/net/net.c
@@ -495,6 +495,15 @@ bool qemu_has_ufo(NetClientState *nc)
     return nc->info->has_ufo(nc);
 }
 
+bool qemu_has_uso(NetClientState *nc)
+{
+    if (!nc || !nc->info->has_uso) {
+        return false;
+    }
+
+    return nc->info->has_uso(nc);
+}
+
 bool qemu_has_vnet_hdr(NetClientState *nc)
 {
     if (!nc || !nc->info->has_vnet_hdr) {
@@ -532,13 +541,13 @@ void qemu_using_vnet_hdr(NetClientState *nc, bool enable)
 }
 
 void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
-                          int ecn, int ufo)
+                          int ecn, int ufo, int uso4, int uso6)
 {
     if (!nc || !nc->info->set_offload) {
         return;
     }
 
-    nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo);
+    nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo, uso4, uso6);
 }
 
 int qemu_get_vnet_hdr_len(NetClientState *nc)
@@ -1082,6 +1091,9 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
 #ifdef CONFIG_NETMAP
         [NET_CLIENT_DRIVER_NETMAP]    = net_init_netmap,
 #endif
+#ifdef CONFIG_AF_XDP
+        [NET_CLIENT_DRIVER_AF_XDP]    = net_init_af_xdp,
+#endif
 #ifdef CONFIG_NET_BRIDGE
         [NET_CLIENT_DRIVER_BRIDGE]    = net_init_bridge,
 #endif
@@ -1186,6 +1198,9 @@ void show_netdevs(void)
 #ifdef CONFIG_NETMAP
         "netmap",
 #endif
+#ifdef CONFIG_AF_XDP
+        "af-xdp",
+#endif
 #ifdef CONFIG_POSIX
         "vhost-user",
 #endif
index 9e0cec58d379ee2eb101087c52fd3449db48f630..241b27c8e97348d5d2f05d209f9af16fa06f70ad 100644 (file)
@@ -371,7 +371,7 @@ static void netmap_set_vnet_hdr_len(NetClientState *nc, int len)
 }
 
 static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
-                               int ecn, int ufo)
+                               int ecn, int ufo, int uso4, int uso6)
 {
     NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
 
index 4c98fdd337e503a026bba620830580601c8d4fb7..274ea7bd2c3c6daf6624f93a0018ee842823eedf 100644 (file)
@@ -212,6 +212,11 @@ int tap_probe_has_ufo(int fd)
     return 0;
 }
 
+int tap_probe_has_uso(int fd)
+{
+    return 0;
+}
+
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
     return 0;
@@ -232,7 +237,7 @@ int tap_fd_set_vnet_be(int fd, int is_be)
 }
 
 void tap_fd_set_offload(int fd, int csum, int tso4,
-                        int tso6, int ecn, int ufo)
+                        int tso6, int ecn, int ufo, int uso4, int uso6)
 {
 }
 
index f54f308d359cdaf12b1f27cca45715a6b6bb8053..c7e514ecb04b5240952a5bf8b451d1f1e2139b7e 100644 (file)
@@ -173,6 +173,18 @@ int tap_probe_has_ufo(int fd)
     return 1;
 }
 
+int tap_probe_has_uso(int fd)
+{
+    unsigned offload;
+
+    offload = TUN_F_CSUM | TUN_F_USO4 | TUN_F_USO6;
+
+    if (ioctl(fd, TUNSETOFFLOAD, offload) < 0) {
+        return 0;
+    }
+    return 1;
+}
+
 /* Verify that we can assign given length */
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
@@ -237,7 +249,7 @@ int tap_fd_set_vnet_be(int fd, int is_be)
 }
 
 void tap_fd_set_offload(int fd, int csum, int tso4,
-                        int tso6, int ecn, int ufo)
+                        int tso6, int ecn, int ufo, int uso4, int uso6)
 {
     unsigned int offload = 0;
 
@@ -256,13 +268,22 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
             offload |= TUN_F_TSO_ECN;
         if (ufo)
             offload |= TUN_F_UFO;
+        if (uso4) {
+            offload |= TUN_F_USO4;
+        }
+        if (uso6) {
+            offload |= TUN_F_USO6;
+        }
     }
 
     if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {
-        offload &= ~TUN_F_UFO;
+        offload &= ~(TUN_F_USO4 | TUN_F_USO6);
         if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {
-            fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n",
+            offload &= ~TUN_F_UFO;
+            if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {
+                fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n",
                     strerror(errno));
+            }
         }
     }
 }
index bbbb62c2a75ca94813f144fc4c9d75155d2142b4..9a58cecb7f47933a919d3345af0451e88c9a3f9c 100644 (file)
@@ -50,5 +50,7 @@
 #define TUN_F_TSO6    0x04    /* I can handle TSO for IPv6 packets */
 #define TUN_F_TSO_ECN 0x08    /* I can handle TSO with ECN bits. */
 #define TUN_F_UFO     0x10    /* I can handle UFO packets */
+#define TUN_F_USO4    0x20    /* I can handle USO for IPv4 packets */
+#define TUN_F_USO6    0x40    /* I can handle USO for IPv6 packets */
 
 #endif /* QEMU_TAP_LINUX_H */
index 38e15028bf3d302c1a0e0bdbc6491df79d82d27c..08b13af51257e7e730ee3df35af83879ea9987ad 100644 (file)
@@ -216,6 +216,11 @@ int tap_probe_has_ufo(int fd)
     return 0;
 }
 
+int tap_probe_has_uso(int fd)
+{
+    return 0;
+}
+
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
     return 0;
@@ -236,7 +241,7 @@ int tap_fd_set_vnet_be(int fd, int is_be)
 }
 
 void tap_fd_set_offload(int fd, int csum, int tso4,
-                        int tso6, int ecn, int ufo)
+                        int tso6, int ecn, int ufo, int uso4, int uso6)
 {
 }
 
index a0fa25804b6f851e4567b0476d84877cc6d9be94..4b24f61e3a6ce03b2bf2d075735008188e2e57d8 100644 (file)
@@ -47,6 +47,11 @@ int tap_probe_has_ufo(int fd)
     return 0;
 }
 
+int tap_probe_has_uso(int fd)
+{
+    return 0;
+}
+
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
     return 0;
@@ -67,7 +72,7 @@ int tap_fd_set_vnet_be(int fd, int is_be)
 }
 
 void tap_fd_set_offload(int fd, int csum, int tso4,
-                        int tso6, int ecn, int ufo)
+                        int tso6, int ecn, int ufo, int uso4, int uso6)
 {
 }
 
index f327d62ab07ad707abfb3911d2339460b71c0187..7b8b4be02cff356ca446da6dcd762911f06368a4 100644 (file)
@@ -741,7 +741,7 @@ static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
 }
 
 static void tap_set_offload(NetClientState *nc, int csum, int tso4,
-                     int tso6, int ecn, int ufo)
+                     int tso6, int ecn, int ufo, int uso4, int uso6)
 {
 }
 
index 1bf085d42289232f5053666f887f4e4e09bb9b2e..c23d0323c2aefe27003f7d8c6a80529d2c40f25b 100644 (file)
--- a/net/tap.c
+++ b/net/tap.c
@@ -57,6 +57,7 @@ typedef struct TAPState {
     bool write_poll;
     bool using_vnet_hdr;
     bool has_ufo;
+    bool has_uso;
     bool enabled;
     VHostNetState *vhost_net;
     unsigned host_vnet_hdr_len;
@@ -117,10 +118,11 @@ static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
     const struct iovec *iovp = iov;
-    struct iovec iov_copy[iovcnt + 1];
+    g_autofree struct iovec *iov_copy = NULL;
     struct virtio_net_hdr_mrg_rxbuf hdr = { };
 
     if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
+        iov_copy = g_new(struct iovec, iovcnt + 1);
         iov_copy[0].iov_base = &hdr;
         iov_copy[0].iov_len =  s->host_vnet_hdr_len;
         memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
@@ -237,6 +239,15 @@ static bool tap_has_ufo(NetClientState *nc)
     return s->has_ufo;
 }
 
+static bool tap_has_uso(NetClientState *nc)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+
+    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
+
+    return s->has_uso;
+}
+
 static bool tap_has_vnet_hdr(NetClientState *nc)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
@@ -307,14 +318,14 @@ static int tap_set_vnet_be(NetClientState *nc, bool is_be)
 }
 
 static void tap_set_offload(NetClientState *nc, int csum, int tso4,
-                     int tso6, int ecn, int ufo)
+                     int tso6, int ecn, int ufo, int uso4, int uso6)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
     if (s->fd < 0) {
         return;
     }
 
-    tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
+    tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo, uso4, uso6);
 }
 
 static void tap_exit_notify(Notifier *notifier, void *data)
@@ -384,6 +395,7 @@ static NetClientInfo net_tap_info = {
     .poll = tap_poll,
     .cleanup = tap_cleanup,
     .has_ufo = tap_has_ufo,
+    .has_uso = tap_has_uso,
     .has_vnet_hdr = tap_has_vnet_hdr,
     .has_vnet_hdr_len = tap_has_vnet_hdr_len,
     .get_using_vnet_hdr = tap_get_using_vnet_hdr,
@@ -413,8 +425,9 @@ static TAPState *net_tap_fd_init(NetClientState *peer,
     s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
     s->using_vnet_hdr = false;
     s->has_ufo = tap_probe_has_ufo(s->fd);
+    s->has_uso = tap_probe_has_uso(s->fd);
     s->enabled = true;
-    tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
+    tap_set_offload(&s->nc, 0, 0, 0, 0, 0, 0, 0);
     /*
      * Make sure host header length is set correctly in tap:
      * it might have been modified by another instance of qemu.
index 547f8a5a28f0cad51b7192254e719e33e8858a6c..9a2175655bb0dde22f2b7c6bac53c2706a4d6028 100644 (file)
@@ -37,7 +37,9 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp);
 int tap_probe_vnet_hdr(int fd, Error **errp);
 int tap_probe_vnet_hdr_len(int fd, int len);
 int tap_probe_has_ufo(int fd);
-void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);
+int tap_probe_has_uso(int fd);
+void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo,
+                        int uso4, int uso6);
 void tap_fd_set_vnet_hdr_len(int fd, int len);
 int tap_fd_set_vnet_le(int fd, int vnet_is_le);
 int tap_fd_set_vnet_be(int fd, int vnet_is_be);
index 34202ca009300c9bb3a329813bb902e83ca0dffc..4e94c50bc7023a3f9b4d57bd2b2051e4da8524c2 100644 (file)
@@ -75,11 +75,14 @@ const int vdpa_feature_bits[] = {
     VIRTIO_NET_F_GUEST_TSO4,
     VIRTIO_NET_F_GUEST_TSO6,
     VIRTIO_NET_F_GUEST_UFO,
+    VIRTIO_NET_F_GUEST_USO4,
+    VIRTIO_NET_F_GUEST_USO6,
     VIRTIO_NET_F_HASH_REPORT,
     VIRTIO_NET_F_HOST_ECN,
     VIRTIO_NET_F_HOST_TSO4,
     VIRTIO_NET_F_HOST_TSO6,
     VIRTIO_NET_F_HOST_UFO,
+    VIRTIO_NET_F_HOST_USO,
     VIRTIO_NET_F_MQ,
     VIRTIO_NET_F_MRG_RXBUF,
     VIRTIO_NET_F_MTU,
index 2078b16edb024efb7733fb5cd8b043fdacc93145..5521b0ad36c74a03d951bb6cca2a381d850484c1 100644 (file)
@@ -316,22 +316,7 @@ uint64_t qemu_plugin_hwaddr_phys_addr(const struct qemu_plugin_hwaddr *haddr)
 {
 #ifdef CONFIG_SOFTMMU
     if (haddr) {
-        if (!haddr->is_io) {
-            RAMBlock *block;
-            ram_addr_t offset;
-            void *hostaddr = haddr->v.ram.hostaddr;
-
-            block = qemu_ram_block_from_host(hostaddr, false, &offset);
-            if (!block) {
-                error_report("Bad host ram pointer %p", haddr->v.ram.hostaddr);
-                abort();
-            }
-
-            return block->offset + offset + block->mr->addr;
-        } else {
-            MemoryRegionSection *mrs = haddr->v.io.section;
-            return mrs->offset_within_address_space + haddr->v.io.offset;
-        }
+        return haddr->phys_addr;
     }
 #endif
     return 0;
@@ -341,13 +326,13 @@ const char *qemu_plugin_hwaddr_device_name(const struct qemu_plugin_hwaddr *h)
 {
 #ifdef CONFIG_SOFTMMU
     if (h && h->is_io) {
-        MemoryRegionSection *mrs = h->v.io.section;
-        if (!mrs->mr->name) {
-            unsigned long maddr = 0xffffffff & (uintptr_t) mrs->mr;
-            g_autofree char *temp = g_strdup_printf("anon%08lx", maddr);
+        MemoryRegion *mr = h->mr;
+        if (!mr->name) {
+            unsigned maddr = (uintptr_t)mr;
+            g_autofree char *temp = g_strdup_printf("anon%08x", maddr);
             return g_intern_string(temp);
         } else {
-            return g_intern_string(mrs->mr->name);
+            return g_intern_string(mr->name);
         }
     } else {
         return g_intern_static_string("RAM");
index 313c8a606e94dc6d6f75e3a525cfe6a0970ac454..8095b68fa831f0f310b515903c9fbe64b028d741 100644 (file)
     'ifname':     'str',
     '*devname':    'str' } }
 
+##
+# @AFXDPMode:
+#
+# Attach mode for a default XDP program
+#
+# @skb: generic mode, no driver support necessary
+#
+# @native: DRV mode, program is attached to a driver, packets are passed to
+#     the socket without allocation of skb.
+#
+# Since: 8.2
+##
+{ 'enum': 'AFXDPMode',
+  'data': [ 'native', 'skb' ],
+  'if': 'CONFIG_AF_XDP' }
+
+##
+# @NetdevAFXDPOptions:
+#
+# AF_XDP network backend
+#
+# @ifname: The name of an existing network interface.
+#
+# @mode: Attach mode for a default XDP program.  If not specified, then
+#     'native' will be tried first, then 'skb'.
+#
+# @force-copy: Force XDP copy mode even if device supports zero-copy.
+#     (default: false)
+#
+# @queues: number of queues to be used for multiqueue interfaces (default: 1).
+#
+# @start-queue: Use @queues starting from this queue number (default: 0).
+#
+# @inhibit: Don't load a default XDP program, use one already loaded to
+#     the interface (default: false).  Requires @sock-fds.
+#
+# @sock-fds: A colon (:) separated list of file descriptors for already open
+#     but not bound AF_XDP sockets in the queue order.  One fd per queue.
+#     These descriptors should already be added into XDP socket map for
+#     corresponding queues.  Requires @inhibit.
+#
+# Since: 8.2
+##
+{ 'struct': 'NetdevAFXDPOptions',
+  'data': {
+    'ifname':       'str',
+    '*mode':        'AFXDPMode',
+    '*force-copy':  'bool',
+    '*queues':      'int',
+    '*start-queue': 'int',
+    '*inhibit':     'bool',
+    '*sock-fds':    'str' },
+  'if': 'CONFIG_AF_XDP' }
+
 ##
 # @NetdevVhostUserOptions:
 #
 # @vmnet-bridged: since 7.1
 # @stream: since 7.2
 # @dgram: since 7.2
+# @af-xdp: since 8.2
 #
 # Since: 2.7
 ##
   'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'stream',
             'dgram', 'vde', 'bridge', 'hubport', 'netmap', 'vhost-user',
             'vhost-vdpa',
+            { 'name': 'af-xdp', 'if': 'CONFIG_AF_XDP' },
             { 'name': 'vmnet-host', 'if': 'CONFIG_VMNET' },
             { 'name': 'vmnet-shared', 'if': 'CONFIG_VMNET' },
             { 'name': 'vmnet-bridged', 'if': 'CONFIG_VMNET' }] }
     'bridge':   'NetdevBridgeOptions',
     'hubport':  'NetdevHubPortOptions',
     'netmap':   'NetdevNetmapOptions',
+    'af-xdp':   { 'type': 'NetdevAFXDPOptions',
+                  'if': 'CONFIG_AF_XDP' },
     'vhost-user': 'NetdevVhostUserOptions',
     'vhost-vdpa': 'NetdevVhostVDPAOptions',
     'vmnet-host': { 'type': 'NetdevVmnetHostOptions',
index 6be621c23249f397c470fb1ab33fb2b3f6cbf032..2bcf7e4e979c4b16828d6728c0e8396878f9be19 100644 (file)
@@ -2882,6 +2882,19 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
     "                VALE port (created on the fly) called 'name' ('nmname' is name of the \n"
     "                netmap device, defaults to '/dev/netmap')\n"
 #endif
+#ifdef CONFIG_AF_XDP
+    "-netdev af-xdp,id=str,ifname=name[,mode=native|skb][,force-copy=on|off]\n"
+    "         [,queues=n][,start-queue=m][,inhibit=on|off][,sock-fds=x:y:...:z]\n"
+    "                attach to the existing network interface 'name' with AF_XDP socket\n"
+    "                use 'mode=MODE' to specify an XDP program attach mode\n"
+    "                use 'force-copy=on|off' to force XDP copy mode even if device supports zero-copy (default: off)\n"
+    "                use 'inhibit=on|off' to inhibit loading of a default XDP program (default: off)\n"
+    "                with inhibit=on,\n"
+    "                  use 'sock-fds' to provide file descriptors for already open AF_XDP sockets\n"
+    "                  added to a socket map in XDP program.  One socket per queue.\n"
+    "                use 'queues=n' to specify how many queues of a multiqueue interface should be used\n"
+    "                use 'start-queue=m' to specify the first queue that should be used\n"
+#endif
 #ifdef CONFIG_POSIX
     "-netdev vhost-user,id=str,chardev=dev[,vhostforce=on|off]\n"
     "                configure a vhost-user network, backed by a chardev 'dev'\n"
@@ -2927,6 +2940,9 @@ DEF("nic", HAS_ARG, QEMU_OPTION_nic,
 #ifdef CONFIG_NETMAP
     "netmap|"
 #endif
+#ifdef CONFIG_AF_XDP
+    "af-xdp|"
+#endif
 #ifdef CONFIG_POSIX
     "vhost-user|"
 #endif
@@ -2955,6 +2971,9 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
 #ifdef CONFIG_NETMAP
     "netmap|"
 #endif
+#ifdef CONFIG_AF_XDP
+    "af-xdp|"
+#endif
 #ifdef CONFIG_VMNET
     "vmnet-host|vmnet-shared|vmnet-bridged|"
 #endif
@@ -2962,7 +2981,7 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
     "                old way to initialize a host network interface\n"
     "                (use the -netdev option if possible instead)\n", QEMU_ARCH_ALL)
 SRST
-``-nic [tap|bridge|user|l2tpv3|vde|netmap|vhost-user|socket][,...][,mac=macaddr][,model=mn]``
+``-nic [tap|bridge|user|l2tpv3|vde|netmap|af-xdp|vhost-user|socket][,...][,mac=macaddr][,model=mn]``
     This option is a shortcut for configuring both the on-board
     (default) guest NIC hardware and the host network backend in one go.
     The host backend options are the same as with the corresponding
@@ -3376,6 +3395,55 @@ SRST
         # launch QEMU instance
         |qemu_system| linux.img -nic vde,sock=/tmp/myswitch
 
+``-netdev af-xdp,id=str,ifname=name[,mode=native|skb][,force-copy=on|off][,queues=n][,start-queue=m][,inhibit=on|off][,sock-fds=x:y:...:z]``
+    Configure AF_XDP backend to connect to a network interface 'name'
+    using AF_XDP socket.  A specific program attach mode for a default
+    XDP program can be forced with 'mode', defaults to best-effort,
+    where the likely most performant mode will be in use.  Number of queues
+    'n' should generally match the number or queues in the interface,
+    defaults to 1.  Traffic arriving on non-configured device queues will
+    not be delivered to the network backend.
+
+    .. parsed-literal::
+
+        # set number of queues to 4
+        ethtool -L eth0 combined 4
+        # launch QEMU instance
+        |qemu_system| linux.img -device virtio-net-pci,netdev=n1 \\
+            -netdev af-xdp,id=n1,ifname=eth0,queues=4
+
+    'start-queue' option can be specified if a particular range of queues
+    [m, m + n] should be in use.  For example, this is may be necessary in
+    order to use certain NICs in native mode.  Kernel allows the driver to
+    create a separate set of XDP queues on top of regular ones, and only
+    these queues can be used for AF_XDP sockets.  NICs that work this way
+    may also require an additional traffic redirection with ethtool to these
+    special queues.
+
+    .. parsed-literal::
+
+        # set number of queues to 1
+        ethtool -L eth0 combined 1
+        # redirect all the traffic to the second queue (id: 1)
+        # note: drivers may require non-empty key/mask pair.
+        ethtool -N eth0 flow-type ether \\
+            dst 00:00:00:00:00:00 m FF:FF:FF:FF:FF:FE action 1
+        ethtool -N eth0 flow-type ether \\
+            dst 00:00:00:00:00:01 m FF:FF:FF:FF:FF:FE action 1
+        # launch QEMU instance
+        |qemu_system| linux.img -device virtio-net-pci,netdev=n1 \\
+            -netdev af-xdp,id=n1,ifname=eth0,queues=1,start-queue=1
+
+    XDP program can also be loaded externally.  In this case 'inhibit' option
+    should be set to 'on' and 'sock-fds' provided with file descriptors for
+    already open but not bound XDP sockets already added to a socket map for
+    corresponding queues.  One socket per queue.
+
+    .. parsed-literal::
+
+        |qemu_system| linux.img -device virtio-net-pci,netdev=n1 \\
+            -netdev af-xdp,id=n1,ifname=eth0,queues=3,inhibit=on,sock-fds=15:16:17
+
 ``-netdev vhost-user,chardev=id[,vhostforce=on|off][,queues=n]``
     Establish a vhost-user netdev, backed by a chardev id. The chardev
     should be a unix domain socket backed one. The vhost-user uses a
index 131f8ee5f3f48b7b222f104dfafb192786939328..76781f17f41b42993dd26be1278813ea50f6abf8 100755 (executable)
@@ -35,6 +35,7 @@
 --block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
 --with-coroutine=ucontext \
 --tls-priority=@QEMU,SYSTEM \
+--disable-af-xdp \
 --disable-attr \
 --disable-auth-pam \
 --disable-avx2 \
index e1d178370c221858e3821baa06f1d3a93d2cee4b..230119346abdbd44370b25fb799f8c6e30a3f58f 100644 (file)
@@ -76,6 +76,7 @@ meson_options_help() {
   printf "%s\n" 'disabled with --disable-FEATURE, default is enabled if available'
   printf "%s\n" '(unless built with --without-default-features):'
   printf "%s\n" ''
+  printf "%s\n" '  af-xdp          AF_XDP network backend support'
   printf "%s\n" '  alsa            ALSA sound support'
   printf "%s\n" '  attr            attr/xattr support'
   printf "%s\n" '  auth-pam        PAM access control'
@@ -208,6 +209,8 @@ meson_options_help() {
 }
 _meson_option_parse() {
   case $1 in
+    --enable-af-xdp) printf "%s" -Daf_xdp=enabled ;;
+    --disable-af-xdp) printf "%s" -Daf_xdp=disabled ;;
     --enable-alsa) printf "%s" -Dalsa=enabled ;;
     --disable-alsa) printf "%s" -Dalsa=disabled ;;
     --enable-attr) printf "%s" -Dattr=enabled ;;
index 62cdeb0f20a2500e02e51895e7c90ab9a62170ac..396963c09131e6235773b21d74d660c82d5ad50a 100644 (file)
@@ -121,10 +121,7 @@ static void *new_stack_for_clone(void)
 
     /* Allocate a new stack and get a pointer to its top. */
     stack_ptr = qemu_alloc_stack(&stack_size);
-#if !defined(HOST_HPPA)
-    /* The top is at the end of the area, except on HPPA. */
     stack_ptr += stack_size;
-#endif
 
     return stack_ptr;
 }
index 403b345ea3b9ff1ad06083da8df7e778c9d05125..c666a96ba17d41e5c6d39faf6dd19e33ccf5f9cb 100644 (file)
@@ -26,6 +26,7 @@
 #include "exec/exec-all.h"
 #include "tcg/tcg.h"
 #include "fpu/softfloat.h"
+#include "crypto/clmul.h"
 
 static uint16_t mve_eci_mask(CPUARMState *env)
 {
@@ -984,17 +985,10 @@ DO_2OP_L(vmulltuw, 1, 4, uint32_t, 8, uint64_t, DO_MUL)
  * Polynomial multiply. We can always do this generating 64 bits
  * of the result at a time, so we don't need to use DO_2OP_L.
  */
-#define VMULLPH_MASK 0x00ff00ff00ff00ffULL
-#define VMULLPW_MASK 0x0000ffff0000ffffULL
-#define DO_VMULLPBH(N, M) pmull_h((N) & VMULLPH_MASK, (M) & VMULLPH_MASK)
-#define DO_VMULLPTH(N, M) DO_VMULLPBH((N) >> 8, (M) >> 8)
-#define DO_VMULLPBW(N, M) pmull_w((N) & VMULLPW_MASK, (M) & VMULLPW_MASK)
-#define DO_VMULLPTW(N, M) DO_VMULLPBW((N) >> 16, (M) >> 16)
-
-DO_2OP(vmullpbh, 8, uint64_t, DO_VMULLPBH)
-DO_2OP(vmullpth, 8, uint64_t, DO_VMULLPTH)
-DO_2OP(vmullpbw, 8, uint64_t, DO_VMULLPBW)
-DO_2OP(vmullptw, 8, uint64_t, DO_VMULLPTW)
+DO_2OP(vmullpbh, 8, uint64_t, clmul_8x4_even)
+DO_2OP(vmullpth, 8, uint64_t, clmul_8x4_odd)
+DO_2OP(vmullpbw, 8, uint64_t, clmul_16x2_even)
+DO_2OP(vmullptw, 8, uint64_t, clmul_16x2_odd)
 
 /*
  * Because the computation type is at least twice as large as required,
index 976b7042008b5ea15651ca4f38b65cb36c7f053d..d83a0e772cd953440596ba259299a5f4bd6d8932 100644 (file)
@@ -2943,54 +2943,16 @@ void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
 }
 
-#define GEN_CMP0(NAME, COND)                                            \
-    static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
-    {                                                                   \
-        tcg_gen_negsetcond_i32(COND, d, a, tcg_constant_i32(0));        \
-    }                                                                   \
-    static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
-    {                                                                   \
-        tcg_gen_negsetcond_i64(COND, d, a, tcg_constant_i64(0));        \
-    }                                                                   \
-    static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
-    {                                                                   \
-        TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
-        tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
-    }                                                                   \
-    void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
-                            uint32_t opr_sz, uint32_t max_sz)           \
-    {                                                                   \
-        const GVecGen2 op[4] = {                                        \
-            { .fno = gen_helper_gvec_##NAME##0_b,                       \
-              .fniv = gen_##NAME##0_vec,                                \
-              .opt_opc = vecop_list_cmp,                                \
-              .vece = MO_8 },                                           \
-            { .fno = gen_helper_gvec_##NAME##0_h,                       \
-              .fniv = gen_##NAME##0_vec,                                \
-              .opt_opc = vecop_list_cmp,                                \
-              .vece = MO_16 },                                          \
-            { .fni4 = gen_##NAME##0_i32,                                \
-              .fniv = gen_##NAME##0_vec,                                \
-              .opt_opc = vecop_list_cmp,                                \
-              .vece = MO_32 },                                          \
-            { .fni8 = gen_##NAME##0_i64,                                \
-              .fniv = gen_##NAME##0_vec,                                \
-              .opt_opc = vecop_list_cmp,                                \
-              .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
-              .vece = MO_64 },                                          \
-        };                                                              \
-        tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
-    }
-
-static const TCGOpcode vecop_list_cmp[] = {
-    INDEX_op_cmp_vec, 0
-};
-
-GEN_CMP0(ceq, TCG_COND_EQ)
-GEN_CMP0(cle, TCG_COND_LE)
-GEN_CMP0(cge, TCG_COND_GE)
-GEN_CMP0(clt, TCG_COND_LT)
-GEN_CMP0(cgt, TCG_COND_GT)
+#define GEN_CMP0(NAME, COND)                              \
+    void NAME(unsigned vece, uint32_t d, uint32_t m,      \
+              uint32_t opr_sz, uint32_t max_sz)           \
+    { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
+
+GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
+GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
+GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
+GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
+GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
 
 #undef GEN_CMP0
 
index 6712a2c790e0ce98985442a3b69209a4d7f8c55f..1f93510b85cd352f093552b2897a87d6f7e98ea3 100644 (file)
@@ -23,6 +23,7 @@
 #include "tcg/tcg-gvec-desc.h"
 #include "fpu/softfloat.h"
 #include "qemu/int128.h"
+#include "crypto/clmul.h"
 #include "vec_internal.h"
 
 /*
@@ -1986,21 +1987,11 @@ void HELPER(gvec_ushl_h)(void *vd, void *vn, void *vm, uint32_t desc)
  */
 void HELPER(gvec_pmul_b)(void *vd, void *vn, void *vm, uint32_t desc)
 {
-    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    intptr_t i, opr_sz = simd_oprsz(desc);
     uint64_t *d = vd, *n = vn, *m = vm;
 
     for (i = 0; i < opr_sz / 8; ++i) {
-        uint64_t nn = n[i];
-        uint64_t mm = m[i];
-        uint64_t rr = 0;
-
-        for (j = 0; j < 8; ++j) {
-            uint64_t mask = (nn & 0x0101010101010101ull) * 0xff;
-            rr ^= mm & mask;
-            mm = (mm << 1) & 0xfefefefefefefefeull;
-            nn >>= 1;
-        }
-        d[i] = rr;
+        d[i] = clmul_8x8_low(n[i], m[i]);
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
@@ -2012,84 +2003,28 @@ void HELPER(gvec_pmul_b)(void *vd, void *vn, void *vm, uint32_t desc)
  */
 void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc)
 {
-    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    intptr_t i, opr_sz = simd_oprsz(desc);
     intptr_t hi = simd_data(desc);
     uint64_t *d = vd, *n = vn, *m = vm;
 
     for (i = 0; i < opr_sz / 8; i += 2) {
-        uint64_t nn = n[i + hi];
-        uint64_t mm = m[i + hi];
-        uint64_t rhi = 0;
-        uint64_t rlo = 0;
-
-        /* Bit 0 can only influence the low 64-bit result.  */
-        if (nn & 1) {
-            rlo = mm;
-        }
-
-        for (j = 1; j < 64; ++j) {
-            uint64_t mask = -((nn >> j) & 1);
-            rlo ^= (mm << j) & mask;
-            rhi ^= (mm >> (64 - j)) & mask;
-        }
-        d[i] = rlo;
-        d[i + 1] = rhi;
+        Int128 r = clmul_64(n[i + hi], m[i + hi]);
+        d[i] = int128_getlo(r);
+        d[i + 1] = int128_gethi(r);
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
-/*
- * 8x8->16 polynomial multiply.
- *
- * The byte inputs are expanded to (or extracted from) half-words.
- * Note that neon and sve2 get the inputs from different positions.
- * This allows 4 bytes to be processed in parallel with uint64_t.
- */
-
-static uint64_t expand_byte_to_half(uint64_t x)
-{
-    return  (x & 0x000000ff)
-         | ((x & 0x0000ff00) << 8)
-         | ((x & 0x00ff0000) << 16)
-         | ((x & 0xff000000) << 24);
-}
-
-uint64_t pmull_w(uint64_t op1, uint64_t op2)
-{
-    uint64_t result = 0;
-    int i;
-    for (i = 0; i < 16; ++i) {
-        uint64_t mask = (op1 & 0x0000000100000001ull) * 0xffffffff;
-        result ^= op2 & mask;
-        op1 >>= 1;
-        op2 <<= 1;
-    }
-    return result;
-}
-
-uint64_t pmull_h(uint64_t op1, uint64_t op2)
-{
-    uint64_t result = 0;
-    int i;
-    for (i = 0; i < 8; ++i) {
-        uint64_t mask = (op1 & 0x0001000100010001ull) * 0xffff;
-        result ^= op2 & mask;
-        op1 >>= 1;
-        op2 <<= 1;
-    }
-    return result;
-}
-
 void HELPER(neon_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc)
 {
     int hi = simd_data(desc);
     uint64_t *d = vd, *n = vn, *m = vm;
     uint64_t nn = n[hi], mm = m[hi];
 
-    d[0] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm));
+    d[0] = clmul_8x4_packed(nn, mm);
     nn >>= 32;
     mm >>= 32;
-    d[1] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm));
+    d[1] = clmul_8x4_packed(nn, mm);
 
     clear_tail(d, 16, simd_maxsz(desc));
 }
@@ -2102,23 +2037,8 @@ void HELPER(sve2_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc)
     uint64_t *d = vd, *n = vn, *m = vm;
 
     for (i = 0; i < opr_sz / 8; ++i) {
-        uint64_t nn = (n[i] >> shift) & 0x00ff00ff00ff00ffull;
-        uint64_t mm = (m[i] >> shift) & 0x00ff00ff00ff00ffull;
-
-        d[i] = pmull_h(nn, mm);
-    }
-}
-
-static uint64_t pmull_d(uint64_t op1, uint64_t op2)
-{
-    uint64_t result = 0;
-    int i;
-
-    for (i = 0; i < 32; ++i) {
-        uint64_t mask = -((op1 >> i) & 1);
-        result ^= (op2 << i) & mask;
+        d[i] = clmul_8x4_even(n[i] >> shift, m[i] >> shift);
     }
-    return result;
 }
 
 void HELPER(sve2_pmull_d)(void *vd, void *vn, void *vm, uint32_t desc)
@@ -2129,7 +2049,7 @@ void HELPER(sve2_pmull_d)(void *vd, void *vn, void *vm, uint32_t desc)
     uint64_t *d = vd;
 
     for (i = 0; i < opr_sz / 8; ++i) {
-        d[i] = pmull_d(n[2 * i + sel], m[2 * i + sel]);
+        d[i] = clmul_32(n[2 * i + sel], m[2 * i + sel]);
     }
 }
 #endif
index 1f4ed80ff76efe5b00b71c596db54c82fc485d01..3ca1b94ccf989e151ca308337111b63480432033 100644 (file)
@@ -219,17 +219,6 @@ int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *);
 int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *);
 int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool);
 
-/*
- * 8 x 8 -> 16 vector polynomial multiply where the inputs are
- * in the low 8 bits of each 16-bit element
-*/
-uint64_t pmull_h(uint64_t op1, uint64_t op2);
-/*
- * 16 x 16 -> 32 vector polynomial multiply where the inputs are
- * in the low 16 bits of each 32-bit element
- */
-uint64_t pmull_w(uint64_t op1, uint64_t op2);
-
 /**
  * bfdotadd:
  * @sum: addend
index a0e425733f109e34848171023bd51dbadc1cd87a..33908c0691fee5ab5af97878849f21a5d70fd5c7 100644 (file)
@@ -20,6 +20,7 @@
 
 #include "crypto/aes.h"
 #include "crypto/aes-round.h"
+#include "crypto/clmul.h"
 
 #if SHIFT == 0
 #define Reg MMXReg
@@ -2122,41 +2123,18 @@ target_ulong helper_crc32(uint32_t crc1, target_ulong msg, uint32_t len)
 
 #endif
 
-#if SHIFT == 1
-static void clmulq(uint64_t *dest_l, uint64_t *dest_h,
-                          uint64_t a, uint64_t b)
-{
-    uint64_t al, ah, resh, resl;
-
-    ah = 0;
-    al = a;
-    resh = resl = 0;
-
-    while (b) {
-        if (b & 1) {
-            resl ^= al;
-            resh ^= ah;
-        }
-        ah = (ah << 1) | (al >> 63);
-        al <<= 1;
-        b >>= 1;
-    }
-
-    *dest_l = resl;
-    *dest_h = resh;
-}
-#endif
-
 void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s,
                                     uint32_t ctrl)
 {
-    uint64_t a, b;
-    int i;
+    int a_idx = (ctrl & 1) != 0;
+    int b_idx = (ctrl & 16) != 0;
 
-    for (i = 0; i < 1 << SHIFT; i += 2) {
-        a = v->Q(((ctrl & 1) != 0) + i);
-        b = s->Q(((ctrl & 16) != 0) + i);
-        clmulq(&d->Q(i), &d->Q(i + 1), a, b);
+    for (int i = 0; i < SHIFT; i++) {
+        uint64_t a = v->Q(2 * i + a_idx);
+        uint64_t b = s->Q(2 * i + b_idx);
+        Int128 *r = (Int128 *)&d->ZMM_X(i);
+
+        *r = clmul_64(a, b);
     }
 }
 
index 96cdb3c7e37330135dee7413acb2ee25e6bc7967..6fd00684a5b9b8ce08f15b118a2a47b1c81dad5c 100644 (file)
@@ -25,6 +25,7 @@
 #include "exec/helper-proto.h"
 #include "crypto/aes.h"
 #include "crypto/aes-round.h"
+#include "crypto/clmul.h"
 #include "fpu/softfloat.h"
 #include "qapi/error.h"
 #include "qemu/guest-random.h"
@@ -1424,46 +1425,39 @@ void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 #undef VBPERMQ_INDEX
 #undef VBPERMQ_DW
 
-#define PMSUM(name, srcfld, trgfld, trgtyp)                   \
-void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
-{                                                             \
-    int i, j;                                                 \
-    trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])];    \
-                                                              \
-    VECTOR_FOR_INORDER_I(i, srcfld) {                         \
-        prod[i] = 0;                                          \
-        for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
-            if (a->srcfld[i] & (1ull << j)) {                 \
-                prod[i] ^= ((trgtyp)b->srcfld[i] << j);       \
-            }                                                 \
-        }                                                     \
-    }                                                         \
-                                                              \
-    VECTOR_FOR_INORDER_I(i, trgfld) {                         \
-        r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1];         \
-    }                                                         \
-}
-
-PMSUM(vpmsumb, u8, u16, uint16_t)
-PMSUM(vpmsumh, u16, u32, uint32_t)
-PMSUM(vpmsumw, u32, u64, uint64_t)
+/*
+ * There is no carry across the two doublewords, so their order does
+ * not matter.  Nor is there partial overlap between registers.
+ */
+void helper_vpmsumb(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    for (int i = 0; i < 2; ++i) {
+        uint64_t aa = a->u64[i], bb = b->u64[i];
+        r->u64[i] = clmul_8x4_even(aa, bb) ^ clmul_8x4_odd(aa, bb);
+    }
+}
 
-void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+void helper_vpmsumh(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 {
-    int i, j;
-    Int128 tmp, prod[2] = {int128_zero(), int128_zero()};
-
-    for (j = 0; j < 64; j++) {
-        for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
-            if (a->VsrD(i) & (1ull << j)) {
-                tmp = int128_make64(b->VsrD(i));
-                tmp = int128_lshift(tmp, j);
-                prod[i] = int128_xor(prod[i], tmp);
-            }
-        }
+    for (int i = 0; i < 2; ++i) {
+        uint64_t aa = a->u64[i], bb = b->u64[i];
+        r->u64[i] = clmul_16x2_even(aa, bb) ^ clmul_16x2_odd(aa, bb);
+    }
+}
+
+void helper_vpmsumw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    for (int i = 0; i < 2; ++i) {
+        uint64_t aa = a->u64[i], bb = b->u64[i];
+        r->u64[i] = clmul_32(aa, bb) ^ clmul_32(aa >> 32, bb >> 32);
     }
+}
 
-    r->s128 = int128_xor(prod[0], prod[1]);
+void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    Int128 e = clmul_64(a->u64[0], b->u64[0]);
+    Int128 o = clmul_64(a->u64[1], b->u64[1]);
+    r->s128 = int128_xor(e, o);
 }
 
 #if HOST_BIG_ENDIAN
index 53ab5c5eb31be73ceade1d6d1bcb6597d6a165c4..b18d8a6d16e95f3cbff8313ded8f1bcf438991cf 100644 (file)
 #include "vec.h"
 #include "exec/helper-proto.h"
 #include "tcg/tcg-gvec-desc.h"
+#include "crypto/clmul.h"
 
 static bool s390_vec_is_zero(const S390Vector *v)
 {
     return !v->doubleword[0] && !v->doubleword[1];
 }
 
-static void s390_vec_xor(S390Vector *res, const S390Vector *a,
-                         const S390Vector *b)
-{
-    res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
-    res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
-}
-
 static void s390_vec_and(S390Vector *res, const S390Vector *a,
                          const S390Vector *b)
 {
@@ -164,117 +158,105 @@ DEF_VCTZ(8)
 DEF_VCTZ(16)
 
 /* like binary multiplication, but XOR instead of addition */
-#define DEF_GALOIS_MULTIPLY(BITS, TBITS)                                       \
-static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a,                \
-                                             uint##TBITS##_t b)                \
-{                                                                              \
-    uint##TBITS##_t res = 0;                                                   \
-                                                                               \
-    while (b) {                                                                \
-        if (b & 0x1) {                                                         \
-            res = res ^ a;                                                     \
-        }                                                                      \
-        a = a << 1;                                                            \
-        b = b >> 1;                                                            \
-    }                                                                          \
-    return res;                                                                \
+
+/*
+ * There is no carry across the two doublewords, so their order does
+ * not matter.  Nor is there partial overlap between registers.
+ */
+static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a)
+{
+    return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a;
 }
-DEF_GALOIS_MULTIPLY(8, 16)
-DEF_GALOIS_MULTIPLY(16, 32)
-DEF_GALOIS_MULTIPLY(32, 64)
 
-static S390Vector galois_multiply64(uint64_t a, uint64_t b)
+void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d)
 {
-    S390Vector res = {};
-    S390Vector va = {
-        .doubleword[1] = a,
-    };
-    S390Vector vb = {
-        .doubleword[1] = b,
-    };
-
-    while (!s390_vec_is_zero(&vb)) {
-        if (vb.doubleword[1] & 0x1) {
-            s390_vec_xor(&res, &res, &va);
-        }
-        s390_vec_shl(&va, &va, 1);
-        s390_vec_shr(&vb, &vb, 1);
-    }
-    return res;
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3;
+
+    q1[0] = do_gfma8(q2[0], q3[0], 0);
+    q1[1] = do_gfma8(q2[1], q3[1], 0);
 }
 
-#define DEF_VGFM(BITS, TBITS)                                                  \
-void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3,         \
-                             uint32_t desc)                                    \
-{                                                                              \
-    int i;                                                                     \
-                                                                               \
-    for (i = 0; i < (128 / TBITS); i++) {                                      \
-        uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
-        uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
-        uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
-                                                                               \
-        a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
-        b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
-        d = d ^ galois_multiply32(a, b);                                       \
-        s390_vec_write_element##TBITS(v1, i, d);                               \
-    }                                                                          \
+void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3,
+                         const void *v4, uint32_t desc)
+{
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
+
+    q1[0] = do_gfma8(q2[0], q3[0], q4[0]);
+    q1[1] = do_gfma8(q2[1], q3[1], q4[1]);
 }
-DEF_VGFM(8, 16)
-DEF_VGFM(16, 32)
-DEF_VGFM(32, 64)
 
-void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
-                         uint32_t desc)
+static inline uint64_t do_gfma16(uint64_t n, uint64_t m, uint64_t a)
+{
+    return clmul_16x2_even(n, m) ^ clmul_16x2_odd(n, m) ^ a;
+}
+
+void HELPER(gvec_vgfm16)(void *v1, const void *v2, const void *v3, uint32_t d)
 {
-    S390Vector tmp1, tmp2;
-    uint64_t a, b;
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3;
 
-    a = s390_vec_read_element64(v2, 0);
-    b = s390_vec_read_element64(v3, 0);
-    tmp1 = galois_multiply64(a, b);
-    a = s390_vec_read_element64(v2, 1);
-    b = s390_vec_read_element64(v3, 1);
-    tmp2 = galois_multiply64(a, b);
-    s390_vec_xor(v1, &tmp1, &tmp2);
+    q1[0] = do_gfma16(q2[0], q3[0], 0);
+    q1[1] = do_gfma16(q2[1], q3[1], 0);
 }
 
-#define DEF_VGFMA(BITS, TBITS)                                                 \
-void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3,        \
-                              const void *v4, uint32_t desc)                   \
-{                                                                              \
-    int i;                                                                     \
-                                                                               \
-    for (i = 0; i < (128 / TBITS); i++) {                                      \
-        uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
-        uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
-        uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
-                                                                               \
-        a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
-        b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
-        d = d ^ galois_multiply32(a, b);                                       \
-        d = d ^ s390_vec_read_element##TBITS(v4, i);                           \
-        s390_vec_write_element##TBITS(v1, i, d);                               \
-    }                                                                          \
+void HELPER(gvec_vgfma16)(void *v1, const void *v2, const void *v3,
+                         const void *v4, uint32_t d)
+{
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
+
+    q1[0] = do_gfma16(q2[0], q3[0], q4[0]);
+    q1[1] = do_gfma16(q2[1], q3[1], q4[1]);
+}
+
+static inline uint64_t do_gfma32(uint64_t n, uint64_t m, uint64_t a)
+{
+    return clmul_32(n, m) ^ clmul_32(n >> 32, m >> 32) ^ a;
+}
+
+void HELPER(gvec_vgfm32)(void *v1, const void *v2, const void *v3, uint32_t d)
+{
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3;
+
+    q1[0] = do_gfma32(q2[0], q3[0], 0);
+    q1[1] = do_gfma32(q2[1], q3[1], 0);
+}
+
+void HELPER(gvec_vgfma32)(void *v1, const void *v2, const void *v3,
+                         const void *v4, uint32_t d)
+{
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
+
+    q1[0] = do_gfma32(q2[0], q3[0], q4[0]);
+    q1[1] = do_gfma32(q2[1], q3[1], q4[1]);
+}
+
+void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
+                         uint32_t desc)
+{
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3;
+    Int128 r;
+
+    r = int128_xor(clmul_64(q2[0], q3[0]), clmul_64(q2[1], q3[1]));
+    q1[0] = int128_gethi(r);
+    q1[1] = int128_getlo(r);
 }
-DEF_VGFMA(8, 16)
-DEF_VGFMA(16, 32)
-DEF_VGFMA(32, 64)
 
 void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
                           const void *v4, uint32_t desc)
 {
-    S390Vector tmp1, tmp2;
-    uint64_t a, b;
-
-    a = s390_vec_read_element64(v2, 0);
-    b = s390_vec_read_element64(v3, 0);
-    tmp1 = galois_multiply64(a, b);
-    a = s390_vec_read_element64(v2, 1);
-    b = s390_vec_read_element64(v3, 1);
-    tmp2 = galois_multiply64(a, b);
-    s390_vec_xor(&tmp1, &tmp1, &tmp2);
-    s390_vec_xor(v1, &tmp1, v4);
+    uint64_t *q1 = v1;
+    const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
+    Int128 r;
+
+    r = int128_xor(clmul_64(q2[0], q3[0]), clmul_64(q2[1], q3[1]));
+    q1[0] = q4[0] ^ int128_gethi(r);
+    q1[1] = q4[1] ^ int128_getlo(r);
 }
 
 #define DEF_VMAL(BITS)                                                         \
index 0931a69448ce570e739a7932523cdd30e3f21e53..06ea3c76526888bfc3a169f1335555d38fff4ebd 100644 (file)
@@ -272,7 +272,7 @@ static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
     }
 }
 
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -602,6 +602,10 @@ typedef enum {
     DMB_ISH         = 0xd50338bf,
     DMB_LD          = 0x00000100,
     DMB_ST          = 0x00000200,
+
+    BTI_C           = 0xd503245f,
+    BTI_J           = 0xd503249f,
+    BTI_JC          = 0xd50324df,
 } AArch64Insn;
 
 static inline uint32_t tcg_in32(TCGContext *s)
@@ -843,6 +847,17 @@ static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
               | rn << 5 | (rd & 0x1f));
 }
 
+static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
+{
+    /*
+     * While BTI insns are nops on hosts without FEAT_BTI,
+     * there is no point in emitting them in that case either.
+     */
+    if (cpuinfo & CPUINFO_BTI) {
+        tcg_out32(s, insn);
+    }
+}
+
 /* Register to register move using ORR (shifted register with no shift). */
 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
 {
@@ -1351,18 +1366,6 @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
     tcg_out_insn(s, 3206, B, offset);
 }
 
-static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
-{
-    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
-    if (offset == sextract64(offset, 0, 26)) {
-        tcg_out_insn(s, 3206, B, offset);
-    } else {
-        /* Choose X9 as a call-clobbered non-LR temporary. */
-        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
-        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
-    }
-}
-
 static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
 {
     ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
@@ -1947,12 +1950,28 @@ static const tcg_insn_unit *tb_ret_addr;
 
 static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
 {
+    const tcg_insn_unit *target;
+    ptrdiff_t offset;
+
     /* Reuse the zeroing that exists for goto_ptr.  */
     if (a0 == 0) {
-        tcg_out_goto_long(s, tcg_code_gen_epilogue);
+        target = tcg_code_gen_epilogue;
     } else {
         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
-        tcg_out_goto_long(s, tb_ret_addr);
+        target = tb_ret_addr;
+    }
+
+    offset = tcg_pcrel_diff(s, target) >> 2;
+    if (offset == sextract64(offset, 0, 26)) {
+        tcg_out_insn(s, 3206, B, offset);
+    } else {
+        /*
+         * Only x16/x17 generate BTI type Jump (2),
+         * other registers generate BTI type Jump|Call (3).
+         */
+        QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16);
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
+        tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
     }
 }
 
@@ -1970,6 +1989,7 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
     tcg_out32(s, I3206_B);
     tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
     set_jmp_reset_offset(s, which);
+    tcg_out_bti(s, BTI_J);
 }
 
 void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
@@ -3074,6 +3094,8 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 {
     TCGReg r;
 
+    tcg_out_bti(s, BTI_C);
+
     /* Push (FP, LR) and allocate space for all saved registers.  */
     tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
                  TCG_REG_SP, -PUSH_SIZE, 1, 1);
@@ -3114,10 +3136,12 @@ static void tcg_target_qemu_prologue(TCGContext *s)
      * and fall through to the rest of the epilogue.
      */
     tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
+    tcg_out_bti(s, BTI_J);
     tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
 
     /* TB epilogue */
     tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
+    tcg_out_bti(s, BTI_J);
 
     /* Remove TCG locals stack space.  */
     tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
@@ -3135,6 +3159,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_insn(s, 3207, RET, TCG_REG_LR);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    tcg_out_bti(s, BTI_J);
+}
+
 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
 {
     int i;
index acb5f23b542f604a716cd3091a3ef640f301336a..b1d56362a747d30b92dfedcd60a90042b3ec3167 100644 (file)
@@ -509,7 +509,7 @@ static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
  * mov operand2:     values represented with x << (2 * y), x < 0x100
  * add, sub, eor...: ditto
  */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -2962,6 +2962,11 @@ static void tcg_out_epilogue(TCGContext *s)
                   (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | (1 << TCG_REG_PC));
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 typedef struct {
     DebugFrameHeader h;
     uint8_t fde_def_cfa[4];
index 0c3d1e4cefd049d080e69abcb0728560c823709d..4e47151241cc356402d009a80011bdf279d3b901 100644 (file)
@@ -198,7 +198,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 }
 
 /* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -4191,6 +4191,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_opc(s, OPC_RET, 0, 0, 0);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
 {
     memset(p, 0x90, count);
index b5bb0c5e739d7f3f49620e0a9dbf3226ff8a2997..ee3b483b02313d9b6bd580f8c2c4e2a311ab25c5 100644 (file)
@@ -4,7 +4,7 @@
  *
  * This file is auto-generated by genqemutcgdefs from
  * https://github.com/loongson-community/loongarch-opcodes,
- * from commit 25ca7effe9d88101c1cf96c4005423643386d81f.
+ * from commit 8027da9a8157a8b47fc48ff1def292e09c5668bd.
  * DO NOT EDIT.
  */
 
@@ -74,6 +74,60 @@ typedef enum {
     OPC_ANDI = 0x03400000,
     OPC_ORI = 0x03800000,
     OPC_XORI = 0x03c00000,
+    OPC_VFMADD_S = 0x09100000,
+    OPC_VFMADD_D = 0x09200000,
+    OPC_VFMSUB_S = 0x09500000,
+    OPC_VFMSUB_D = 0x09600000,
+    OPC_VFNMADD_S = 0x09900000,
+    OPC_VFNMADD_D = 0x09a00000,
+    OPC_VFNMSUB_S = 0x09d00000,
+    OPC_VFNMSUB_D = 0x09e00000,
+    OPC_VFCMP_CAF_S = 0x0c500000,
+    OPC_VFCMP_SAF_S = 0x0c508000,
+    OPC_VFCMP_CLT_S = 0x0c510000,
+    OPC_VFCMP_SLT_S = 0x0c518000,
+    OPC_VFCMP_CEQ_S = 0x0c520000,
+    OPC_VFCMP_SEQ_S = 0x0c528000,
+    OPC_VFCMP_CLE_S = 0x0c530000,
+    OPC_VFCMP_SLE_S = 0x0c538000,
+    OPC_VFCMP_CUN_S = 0x0c540000,
+    OPC_VFCMP_SUN_S = 0x0c548000,
+    OPC_VFCMP_CULT_S = 0x0c550000,
+    OPC_VFCMP_SULT_S = 0x0c558000,
+    OPC_VFCMP_CUEQ_S = 0x0c560000,
+    OPC_VFCMP_SUEQ_S = 0x0c568000,
+    OPC_VFCMP_CULE_S = 0x0c570000,
+    OPC_VFCMP_SULE_S = 0x0c578000,
+    OPC_VFCMP_CNE_S = 0x0c580000,
+    OPC_VFCMP_SNE_S = 0x0c588000,
+    OPC_VFCMP_COR_S = 0x0c5a0000,
+    OPC_VFCMP_SOR_S = 0x0c5a8000,
+    OPC_VFCMP_CUNE_S = 0x0c5c0000,
+    OPC_VFCMP_SUNE_S = 0x0c5c8000,
+    OPC_VFCMP_CAF_D = 0x0c600000,
+    OPC_VFCMP_SAF_D = 0x0c608000,
+    OPC_VFCMP_CLT_D = 0x0c610000,
+    OPC_VFCMP_SLT_D = 0x0c618000,
+    OPC_VFCMP_CEQ_D = 0x0c620000,
+    OPC_VFCMP_SEQ_D = 0x0c628000,
+    OPC_VFCMP_CLE_D = 0x0c630000,
+    OPC_VFCMP_SLE_D = 0x0c638000,
+    OPC_VFCMP_CUN_D = 0x0c640000,
+    OPC_VFCMP_SUN_D = 0x0c648000,
+    OPC_VFCMP_CULT_D = 0x0c650000,
+    OPC_VFCMP_SULT_D = 0x0c658000,
+    OPC_VFCMP_CUEQ_D = 0x0c660000,
+    OPC_VFCMP_SUEQ_D = 0x0c668000,
+    OPC_VFCMP_CULE_D = 0x0c670000,
+    OPC_VFCMP_SULE_D = 0x0c678000,
+    OPC_VFCMP_CNE_D = 0x0c680000,
+    OPC_VFCMP_SNE_D = 0x0c688000,
+    OPC_VFCMP_COR_D = 0x0c6a0000,
+    OPC_VFCMP_SOR_D = 0x0c6a8000,
+    OPC_VFCMP_CUNE_D = 0x0c6c0000,
+    OPC_VFCMP_SUNE_D = 0x0c6c8000,
+    OPC_VBITSEL_V = 0x0d100000,
+    OPC_VSHUF_B = 0x0d500000,
     OPC_ADDU16I_D = 0x10000000,
     OPC_LU12I_W = 0x14000000,
     OPC_CU32I_D = 0x16000000,
@@ -92,6 +146,16 @@ typedef enum {
     OPC_LD_BU = 0x2a000000,
     OPC_LD_HU = 0x2a400000,
     OPC_LD_WU = 0x2a800000,
+    OPC_VLD = 0x2c000000,
+    OPC_VST = 0x2c400000,
+    OPC_VLDREPL_D = 0x30100000,
+    OPC_VLDREPL_W = 0x30200000,
+    OPC_VLDREPL_H = 0x30400000,
+    OPC_VLDREPL_B = 0x30800000,
+    OPC_VSTELM_D = 0x31100000,
+    OPC_VSTELM_W = 0x31200000,
+    OPC_VSTELM_H = 0x31400000,
+    OPC_VSTELM_B = 0x31800000,
     OPC_LDX_B = 0x38000000,
     OPC_LDX_H = 0x38040000,
     OPC_LDX_W = 0x38080000,
@@ -103,6 +167,8 @@ typedef enum {
     OPC_LDX_BU = 0x38200000,
     OPC_LDX_HU = 0x38240000,
     OPC_LDX_WU = 0x38280000,
+    OPC_VLDX = 0x38400000,
+    OPC_VSTX = 0x38440000,
     OPC_DBAR = 0x38720000,
     OPC_JIRL = 0x4c000000,
     OPC_B = 0x50000000,
@@ -113,6 +179,652 @@ typedef enum {
     OPC_BLE = 0x64000000,
     OPC_BGTU = 0x68000000,
     OPC_BLEU = 0x6c000000,
+    OPC_VSEQ_B = 0x70000000,
+    OPC_VSEQ_H = 0x70008000,
+    OPC_VSEQ_W = 0x70010000,
+    OPC_VSEQ_D = 0x70018000,
+    OPC_VSLE_B = 0x70020000,
+    OPC_VSLE_H = 0x70028000,
+    OPC_VSLE_W = 0x70030000,
+    OPC_VSLE_D = 0x70038000,
+    OPC_VSLE_BU = 0x70040000,
+    OPC_VSLE_HU = 0x70048000,
+    OPC_VSLE_WU = 0x70050000,
+    OPC_VSLE_DU = 0x70058000,
+    OPC_VSLT_B = 0x70060000,
+    OPC_VSLT_H = 0x70068000,
+    OPC_VSLT_W = 0x70070000,
+    OPC_VSLT_D = 0x70078000,
+    OPC_VSLT_BU = 0x70080000,
+    OPC_VSLT_HU = 0x70088000,
+    OPC_VSLT_WU = 0x70090000,
+    OPC_VSLT_DU = 0x70098000,
+    OPC_VADD_B = 0x700a0000,
+    OPC_VADD_H = 0x700a8000,
+    OPC_VADD_W = 0x700b0000,
+    OPC_VADD_D = 0x700b8000,
+    OPC_VSUB_B = 0x700c0000,
+    OPC_VSUB_H = 0x700c8000,
+    OPC_VSUB_W = 0x700d0000,
+    OPC_VSUB_D = 0x700d8000,
+    OPC_VADDWEV_H_B = 0x701e0000,
+    OPC_VADDWEV_W_H = 0x701e8000,
+    OPC_VADDWEV_D_W = 0x701f0000,
+    OPC_VADDWEV_Q_D = 0x701f8000,
+    OPC_VSUBWEV_H_B = 0x70200000,
+    OPC_VSUBWEV_W_H = 0x70208000,
+    OPC_VSUBWEV_D_W = 0x70210000,
+    OPC_VSUBWEV_Q_D = 0x70218000,
+    OPC_VADDWOD_H_B = 0x70220000,
+    OPC_VADDWOD_W_H = 0x70228000,
+    OPC_VADDWOD_D_W = 0x70230000,
+    OPC_VADDWOD_Q_D = 0x70238000,
+    OPC_VSUBWOD_H_B = 0x70240000,
+    OPC_VSUBWOD_W_H = 0x70248000,
+    OPC_VSUBWOD_D_W = 0x70250000,
+    OPC_VSUBWOD_Q_D = 0x70258000,
+    OPC_VADDWEV_H_BU = 0x702e0000,
+    OPC_VADDWEV_W_HU = 0x702e8000,
+    OPC_VADDWEV_D_WU = 0x702f0000,
+    OPC_VADDWEV_Q_DU = 0x702f8000,
+    OPC_VSUBWEV_H_BU = 0x70300000,
+    OPC_VSUBWEV_W_HU = 0x70308000,
+    OPC_VSUBWEV_D_WU = 0x70310000,
+    OPC_VSUBWEV_Q_DU = 0x70318000,
+    OPC_VADDWOD_H_BU = 0x70320000,
+    OPC_VADDWOD_W_HU = 0x70328000,
+    OPC_VADDWOD_D_WU = 0x70330000,
+    OPC_VADDWOD_Q_DU = 0x70338000,
+    OPC_VSUBWOD_H_BU = 0x70340000,
+    OPC_VSUBWOD_W_HU = 0x70348000,
+    OPC_VSUBWOD_D_WU = 0x70350000,
+    OPC_VSUBWOD_Q_DU = 0x70358000,
+    OPC_VADDWEV_H_BU_B = 0x703e0000,
+    OPC_VADDWEV_W_HU_H = 0x703e8000,
+    OPC_VADDWEV_D_WU_W = 0x703f0000,
+    OPC_VADDWEV_Q_DU_D = 0x703f8000,
+    OPC_VADDWOD_H_BU_B = 0x70400000,
+    OPC_VADDWOD_W_HU_H = 0x70408000,
+    OPC_VADDWOD_D_WU_W = 0x70410000,
+    OPC_VADDWOD_Q_DU_D = 0x70418000,
+    OPC_VSADD_B = 0x70460000,
+    OPC_VSADD_H = 0x70468000,
+    OPC_VSADD_W = 0x70470000,
+    OPC_VSADD_D = 0x70478000,
+    OPC_VSSUB_B = 0x70480000,
+    OPC_VSSUB_H = 0x70488000,
+    OPC_VSSUB_W = 0x70490000,
+    OPC_VSSUB_D = 0x70498000,
+    OPC_VSADD_BU = 0x704a0000,
+    OPC_VSADD_HU = 0x704a8000,
+    OPC_VSADD_WU = 0x704b0000,
+    OPC_VSADD_DU = 0x704b8000,
+    OPC_VSSUB_BU = 0x704c0000,
+    OPC_VSSUB_HU = 0x704c8000,
+    OPC_VSSUB_WU = 0x704d0000,
+    OPC_VSSUB_DU = 0x704d8000,
+    OPC_VHADDW_H_B = 0x70540000,
+    OPC_VHADDW_W_H = 0x70548000,
+    OPC_VHADDW_D_W = 0x70550000,
+    OPC_VHADDW_Q_D = 0x70558000,
+    OPC_VHSUBW_H_B = 0x70560000,
+    OPC_VHSUBW_W_H = 0x70568000,
+    OPC_VHSUBW_D_W = 0x70570000,
+    OPC_VHSUBW_Q_D = 0x70578000,
+    OPC_VHADDW_HU_BU = 0x70580000,
+    OPC_VHADDW_WU_HU = 0x70588000,
+    OPC_VHADDW_DU_WU = 0x70590000,
+    OPC_VHADDW_QU_DU = 0x70598000,
+    OPC_VHSUBW_HU_BU = 0x705a0000,
+    OPC_VHSUBW_WU_HU = 0x705a8000,
+    OPC_VHSUBW_DU_WU = 0x705b0000,
+    OPC_VHSUBW_QU_DU = 0x705b8000,
+    OPC_VADDA_B = 0x705c0000,
+    OPC_VADDA_H = 0x705c8000,
+    OPC_VADDA_W = 0x705d0000,
+    OPC_VADDA_D = 0x705d8000,
+    OPC_VABSD_B = 0x70600000,
+    OPC_VABSD_H = 0x70608000,
+    OPC_VABSD_W = 0x70610000,
+    OPC_VABSD_D = 0x70618000,
+    OPC_VABSD_BU = 0x70620000,
+    OPC_VABSD_HU = 0x70628000,
+    OPC_VABSD_WU = 0x70630000,
+    OPC_VABSD_DU = 0x70638000,
+    OPC_VAVG_B = 0x70640000,
+    OPC_VAVG_H = 0x70648000,
+    OPC_VAVG_W = 0x70650000,
+    OPC_VAVG_D = 0x70658000,
+    OPC_VAVG_BU = 0x70660000,
+    OPC_VAVG_HU = 0x70668000,
+    OPC_VAVG_WU = 0x70670000,
+    OPC_VAVG_DU = 0x70678000,
+    OPC_VAVGR_B = 0x70680000,
+    OPC_VAVGR_H = 0x70688000,
+    OPC_VAVGR_W = 0x70690000,
+    OPC_VAVGR_D = 0x70698000,
+    OPC_VAVGR_BU = 0x706a0000,
+    OPC_VAVGR_HU = 0x706a8000,
+    OPC_VAVGR_WU = 0x706b0000,
+    OPC_VAVGR_DU = 0x706b8000,
+    OPC_VMAX_B = 0x70700000,
+    OPC_VMAX_H = 0x70708000,
+    OPC_VMAX_W = 0x70710000,
+    OPC_VMAX_D = 0x70718000,
+    OPC_VMIN_B = 0x70720000,
+    OPC_VMIN_H = 0x70728000,
+    OPC_VMIN_W = 0x70730000,
+    OPC_VMIN_D = 0x70738000,
+    OPC_VMAX_BU = 0x70740000,
+    OPC_VMAX_HU = 0x70748000,
+    OPC_VMAX_WU = 0x70750000,
+    OPC_VMAX_DU = 0x70758000,
+    OPC_VMIN_BU = 0x70760000,
+    OPC_VMIN_HU = 0x70768000,
+    OPC_VMIN_WU = 0x70770000,
+    OPC_VMIN_DU = 0x70778000,
+    OPC_VMUL_B = 0x70840000,
+    OPC_VMUL_H = 0x70848000,
+    OPC_VMUL_W = 0x70850000,
+    OPC_VMUL_D = 0x70858000,
+    OPC_VMUH_B = 0x70860000,
+    OPC_VMUH_H = 0x70868000,
+    OPC_VMUH_W = 0x70870000,
+    OPC_VMUH_D = 0x70878000,
+    OPC_VMUH_BU = 0x70880000,
+    OPC_VMUH_HU = 0x70888000,
+    OPC_VMUH_WU = 0x70890000,
+    OPC_VMUH_DU = 0x70898000,
+    OPC_VMULWEV_H_B = 0x70900000,
+    OPC_VMULWEV_W_H = 0x70908000,
+    OPC_VMULWEV_D_W = 0x70910000,
+    OPC_VMULWEV_Q_D = 0x70918000,
+    OPC_VMULWOD_H_B = 0x70920000,
+    OPC_VMULWOD_W_H = 0x70928000,
+    OPC_VMULWOD_D_W = 0x70930000,
+    OPC_VMULWOD_Q_D = 0x70938000,
+    OPC_VMULWEV_H_BU = 0x70980000,
+    OPC_VMULWEV_W_HU = 0x70988000,
+    OPC_VMULWEV_D_WU = 0x70990000,
+    OPC_VMULWEV_Q_DU = 0x70998000,
+    OPC_VMULWOD_H_BU = 0x709a0000,
+    OPC_VMULWOD_W_HU = 0x709a8000,
+    OPC_VMULWOD_D_WU = 0x709b0000,
+    OPC_VMULWOD_Q_DU = 0x709b8000,
+    OPC_VMULWEV_H_BU_B = 0x70a00000,
+    OPC_VMULWEV_W_HU_H = 0x70a08000,
+    OPC_VMULWEV_D_WU_W = 0x70a10000,
+    OPC_VMULWEV_Q_DU_D = 0x70a18000,
+    OPC_VMULWOD_H_BU_B = 0x70a20000,
+    OPC_VMULWOD_W_HU_H = 0x70a28000,
+    OPC_VMULWOD_D_WU_W = 0x70a30000,
+    OPC_VMULWOD_Q_DU_D = 0x70a38000,
+    OPC_VMADD_B = 0x70a80000,
+    OPC_VMADD_H = 0x70a88000,
+    OPC_VMADD_W = 0x70a90000,
+    OPC_VMADD_D = 0x70a98000,
+    OPC_VMSUB_B = 0x70aa0000,
+    OPC_VMSUB_H = 0x70aa8000,
+    OPC_VMSUB_W = 0x70ab0000,
+    OPC_VMSUB_D = 0x70ab8000,
+    OPC_VMADDWEV_H_B = 0x70ac0000,
+    OPC_VMADDWEV_W_H = 0x70ac8000,
+    OPC_VMADDWEV_D_W = 0x70ad0000,
+    OPC_VMADDWEV_Q_D = 0x70ad8000,
+    OPC_VMADDWOD_H_B = 0x70ae0000,
+    OPC_VMADDWOD_W_H = 0x70ae8000,
+    OPC_VMADDWOD_D_W = 0x70af0000,
+    OPC_VMADDWOD_Q_D = 0x70af8000,
+    OPC_VMADDWEV_H_BU = 0x70b40000,
+    OPC_VMADDWEV_W_HU = 0x70b48000,
+    OPC_VMADDWEV_D_WU = 0x70b50000,
+    OPC_VMADDWEV_Q_DU = 0x70b58000,
+    OPC_VMADDWOD_H_BU = 0x70b60000,
+    OPC_VMADDWOD_W_HU = 0x70b68000,
+    OPC_VMADDWOD_D_WU = 0x70b70000,
+    OPC_VMADDWOD_Q_DU = 0x70b78000,
+    OPC_VMADDWEV_H_BU_B = 0x70bc0000,
+    OPC_VMADDWEV_W_HU_H = 0x70bc8000,
+    OPC_VMADDWEV_D_WU_W = 0x70bd0000,
+    OPC_VMADDWEV_Q_DU_D = 0x70bd8000,
+    OPC_VMADDWOD_H_BU_B = 0x70be0000,
+    OPC_VMADDWOD_W_HU_H = 0x70be8000,
+    OPC_VMADDWOD_D_WU_W = 0x70bf0000,
+    OPC_VMADDWOD_Q_DU_D = 0x70bf8000,
+    OPC_VDIV_B = 0x70e00000,
+    OPC_VDIV_H = 0x70e08000,
+    OPC_VDIV_W = 0x70e10000,
+    OPC_VDIV_D = 0x70e18000,
+    OPC_VMOD_B = 0x70e20000,
+    OPC_VMOD_H = 0x70e28000,
+    OPC_VMOD_W = 0x70e30000,
+    OPC_VMOD_D = 0x70e38000,
+    OPC_VDIV_BU = 0x70e40000,
+    OPC_VDIV_HU = 0x70e48000,
+    OPC_VDIV_WU = 0x70e50000,
+    OPC_VDIV_DU = 0x70e58000,
+    OPC_VMOD_BU = 0x70e60000,
+    OPC_VMOD_HU = 0x70e68000,
+    OPC_VMOD_WU = 0x70e70000,
+    OPC_VMOD_DU = 0x70e78000,
+    OPC_VSLL_B = 0x70e80000,
+    OPC_VSLL_H = 0x70e88000,
+    OPC_VSLL_W = 0x70e90000,
+    OPC_VSLL_D = 0x70e98000,
+    OPC_VSRL_B = 0x70ea0000,
+    OPC_VSRL_H = 0x70ea8000,
+    OPC_VSRL_W = 0x70eb0000,
+    OPC_VSRL_D = 0x70eb8000,
+    OPC_VSRA_B = 0x70ec0000,
+    OPC_VSRA_H = 0x70ec8000,
+    OPC_VSRA_W = 0x70ed0000,
+    OPC_VSRA_D = 0x70ed8000,
+    OPC_VROTR_B = 0x70ee0000,
+    OPC_VROTR_H = 0x70ee8000,
+    OPC_VROTR_W = 0x70ef0000,
+    OPC_VROTR_D = 0x70ef8000,
+    OPC_VSRLR_B = 0x70f00000,
+    OPC_VSRLR_H = 0x70f08000,
+    OPC_VSRLR_W = 0x70f10000,
+    OPC_VSRLR_D = 0x70f18000,
+    OPC_VSRAR_B = 0x70f20000,
+    OPC_VSRAR_H = 0x70f28000,
+    OPC_VSRAR_W = 0x70f30000,
+    OPC_VSRAR_D = 0x70f38000,
+    OPC_VSRLN_B_H = 0x70f48000,
+    OPC_VSRLN_H_W = 0x70f50000,
+    OPC_VSRLN_W_D = 0x70f58000,
+    OPC_VSRAN_B_H = 0x70f68000,
+    OPC_VSRAN_H_W = 0x70f70000,
+    OPC_VSRAN_W_D = 0x70f78000,
+    OPC_VSRLRN_B_H = 0x70f88000,
+    OPC_VSRLRN_H_W = 0x70f90000,
+    OPC_VSRLRN_W_D = 0x70f98000,
+    OPC_VSRARN_B_H = 0x70fa8000,
+    OPC_VSRARN_H_W = 0x70fb0000,
+    OPC_VSRARN_W_D = 0x70fb8000,
+    OPC_VSSRLN_B_H = 0x70fc8000,
+    OPC_VSSRLN_H_W = 0x70fd0000,
+    OPC_VSSRLN_W_D = 0x70fd8000,
+    OPC_VSSRAN_B_H = 0x70fe8000,
+    OPC_VSSRAN_H_W = 0x70ff0000,
+    OPC_VSSRAN_W_D = 0x70ff8000,
+    OPC_VSSRLRN_B_H = 0x71008000,
+    OPC_VSSRLRN_H_W = 0x71010000,
+    OPC_VSSRLRN_W_D = 0x71018000,
+    OPC_VSSRARN_B_H = 0x71028000,
+    OPC_VSSRARN_H_W = 0x71030000,
+    OPC_VSSRARN_W_D = 0x71038000,
+    OPC_VSSRLN_BU_H = 0x71048000,
+    OPC_VSSRLN_HU_W = 0x71050000,
+    OPC_VSSRLN_WU_D = 0x71058000,
+    OPC_VSSRAN_BU_H = 0x71068000,
+    OPC_VSSRAN_HU_W = 0x71070000,
+    OPC_VSSRAN_WU_D = 0x71078000,
+    OPC_VSSRLRN_BU_H = 0x71088000,
+    OPC_VSSRLRN_HU_W = 0x71090000,
+    OPC_VSSRLRN_WU_D = 0x71098000,
+    OPC_VSSRARN_BU_H = 0x710a8000,
+    OPC_VSSRARN_HU_W = 0x710b0000,
+    OPC_VSSRARN_WU_D = 0x710b8000,
+    OPC_VBITCLR_B = 0x710c0000,
+    OPC_VBITCLR_H = 0x710c8000,
+    OPC_VBITCLR_W = 0x710d0000,
+    OPC_VBITCLR_D = 0x710d8000,
+    OPC_VBITSET_B = 0x710e0000,
+    OPC_VBITSET_H = 0x710e8000,
+    OPC_VBITSET_W = 0x710f0000,
+    OPC_VBITSET_D = 0x710f8000,
+    OPC_VBITREV_B = 0x71100000,
+    OPC_VBITREV_H = 0x71108000,
+    OPC_VBITREV_W = 0x71110000,
+    OPC_VBITREV_D = 0x71118000,
+    OPC_VPACKEV_B = 0x71160000,
+    OPC_VPACKEV_H = 0x71168000,
+    OPC_VPACKEV_W = 0x71170000,
+    OPC_VPACKEV_D = 0x71178000,
+    OPC_VPACKOD_B = 0x71180000,
+    OPC_VPACKOD_H = 0x71188000,
+    OPC_VPACKOD_W = 0x71190000,
+    OPC_VPACKOD_D = 0x71198000,
+    OPC_VILVL_B = 0x711a0000,
+    OPC_VILVL_H = 0x711a8000,
+    OPC_VILVL_W = 0x711b0000,
+    OPC_VILVL_D = 0x711b8000,
+    OPC_VILVH_B = 0x711c0000,
+    OPC_VILVH_H = 0x711c8000,
+    OPC_VILVH_W = 0x711d0000,
+    OPC_VILVH_D = 0x711d8000,
+    OPC_VPICKEV_B = 0x711e0000,
+    OPC_VPICKEV_H = 0x711e8000,
+    OPC_VPICKEV_W = 0x711f0000,
+    OPC_VPICKEV_D = 0x711f8000,
+    OPC_VPICKOD_B = 0x71200000,
+    OPC_VPICKOD_H = 0x71208000,
+    OPC_VPICKOD_W = 0x71210000,
+    OPC_VPICKOD_D = 0x71218000,
+    OPC_VREPLVE_B = 0x71220000,
+    OPC_VREPLVE_H = 0x71228000,
+    OPC_VREPLVE_W = 0x71230000,
+    OPC_VREPLVE_D = 0x71238000,
+    OPC_VAND_V = 0x71260000,
+    OPC_VOR_V = 0x71268000,
+    OPC_VXOR_V = 0x71270000,
+    OPC_VNOR_V = 0x71278000,
+    OPC_VANDN_V = 0x71280000,
+    OPC_VORN_V = 0x71288000,
+    OPC_VFRSTP_B = 0x712b0000,
+    OPC_VFRSTP_H = 0x712b8000,
+    OPC_VADD_Q = 0x712d0000,
+    OPC_VSUB_Q = 0x712d8000,
+    OPC_VSIGNCOV_B = 0x712e0000,
+    OPC_VSIGNCOV_H = 0x712e8000,
+    OPC_VSIGNCOV_W = 0x712f0000,
+    OPC_VSIGNCOV_D = 0x712f8000,
+    OPC_VFADD_S = 0x71308000,
+    OPC_VFADD_D = 0x71310000,
+    OPC_VFSUB_S = 0x71328000,
+    OPC_VFSUB_D = 0x71330000,
+    OPC_VFMUL_S = 0x71388000,
+    OPC_VFMUL_D = 0x71390000,
+    OPC_VFDIV_S = 0x713a8000,
+    OPC_VFDIV_D = 0x713b0000,
+    OPC_VFMAX_S = 0x713c8000,
+    OPC_VFMAX_D = 0x713d0000,
+    OPC_VFMIN_S = 0x713e8000,
+    OPC_VFMIN_D = 0x713f0000,
+    OPC_VFMAXA_S = 0x71408000,
+    OPC_VFMAXA_D = 0x71410000,
+    OPC_VFMINA_S = 0x71428000,
+    OPC_VFMINA_D = 0x71430000,
+    OPC_VFCVT_H_S = 0x71460000,
+    OPC_VFCVT_S_D = 0x71468000,
+    OPC_VFFINT_S_L = 0x71480000,
+    OPC_VFTINT_W_D = 0x71498000,
+    OPC_VFTINTRM_W_D = 0x714a0000,
+    OPC_VFTINTRP_W_D = 0x714a8000,
+    OPC_VFTINTRZ_W_D = 0x714b0000,
+    OPC_VFTINTRNE_W_D = 0x714b8000,
+    OPC_VSHUF_H = 0x717a8000,
+    OPC_VSHUF_W = 0x717b0000,
+    OPC_VSHUF_D = 0x717b8000,
+    OPC_VSEQI_B = 0x72800000,
+    OPC_VSEQI_H = 0x72808000,
+    OPC_VSEQI_W = 0x72810000,
+    OPC_VSEQI_D = 0x72818000,
+    OPC_VSLEI_B = 0x72820000,
+    OPC_VSLEI_H = 0x72828000,
+    OPC_VSLEI_W = 0x72830000,
+    OPC_VSLEI_D = 0x72838000,
+    OPC_VSLEI_BU = 0x72840000,
+    OPC_VSLEI_HU = 0x72848000,
+    OPC_VSLEI_WU = 0x72850000,
+    OPC_VSLEI_DU = 0x72858000,
+    OPC_VSLTI_B = 0x72860000,
+    OPC_VSLTI_H = 0x72868000,
+    OPC_VSLTI_W = 0x72870000,
+    OPC_VSLTI_D = 0x72878000,
+    OPC_VSLTI_BU = 0x72880000,
+    OPC_VSLTI_HU = 0x72888000,
+    OPC_VSLTI_WU = 0x72890000,
+    OPC_VSLTI_DU = 0x72898000,
+    OPC_VADDI_BU = 0x728a0000,
+    OPC_VADDI_HU = 0x728a8000,
+    OPC_VADDI_WU = 0x728b0000,
+    OPC_VADDI_DU = 0x728b8000,
+    OPC_VSUBI_BU = 0x728c0000,
+    OPC_VSUBI_HU = 0x728c8000,
+    OPC_VSUBI_WU = 0x728d0000,
+    OPC_VSUBI_DU = 0x728d8000,
+    OPC_VBSLL_V = 0x728e0000,
+    OPC_VBSRL_V = 0x728e8000,
+    OPC_VMAXI_B = 0x72900000,
+    OPC_VMAXI_H = 0x72908000,
+    OPC_VMAXI_W = 0x72910000,
+    OPC_VMAXI_D = 0x72918000,
+    OPC_VMINI_B = 0x72920000,
+    OPC_VMINI_H = 0x72928000,
+    OPC_VMINI_W = 0x72930000,
+    OPC_VMINI_D = 0x72938000,
+    OPC_VMAXI_BU = 0x72940000,
+    OPC_VMAXI_HU = 0x72948000,
+    OPC_VMAXI_WU = 0x72950000,
+    OPC_VMAXI_DU = 0x72958000,
+    OPC_VMINI_BU = 0x72960000,
+    OPC_VMINI_HU = 0x72968000,
+    OPC_VMINI_WU = 0x72970000,
+    OPC_VMINI_DU = 0x72978000,
+    OPC_VFRSTPI_B = 0x729a0000,
+    OPC_VFRSTPI_H = 0x729a8000,
+    OPC_VCLO_B = 0x729c0000,
+    OPC_VCLO_H = 0x729c0400,
+    OPC_VCLO_W = 0x729c0800,
+    OPC_VCLO_D = 0x729c0c00,
+    OPC_VCLZ_B = 0x729c1000,
+    OPC_VCLZ_H = 0x729c1400,
+    OPC_VCLZ_W = 0x729c1800,
+    OPC_VCLZ_D = 0x729c1c00,
+    OPC_VPCNT_B = 0x729c2000,
+    OPC_VPCNT_H = 0x729c2400,
+    OPC_VPCNT_W = 0x729c2800,
+    OPC_VPCNT_D = 0x729c2c00,
+    OPC_VNEG_B = 0x729c3000,
+    OPC_VNEG_H = 0x729c3400,
+    OPC_VNEG_W = 0x729c3800,
+    OPC_VNEG_D = 0x729c3c00,
+    OPC_VMSKLTZ_B = 0x729c4000,
+    OPC_VMSKLTZ_H = 0x729c4400,
+    OPC_VMSKLTZ_W = 0x729c4800,
+    OPC_VMSKLTZ_D = 0x729c4c00,
+    OPC_VMSKGEZ_B = 0x729c5000,
+    OPC_VMSKNZ_B = 0x729c6000,
+    OPC_VSETEQZ_V = 0x729c9800,
+    OPC_VSETNEZ_V = 0x729c9c00,
+    OPC_VSETANYEQZ_B = 0x729ca000,
+    OPC_VSETANYEQZ_H = 0x729ca400,
+    OPC_VSETANYEQZ_W = 0x729ca800,
+    OPC_VSETANYEQZ_D = 0x729cac00,
+    OPC_VSETALLNEZ_B = 0x729cb000,
+    OPC_VSETALLNEZ_H = 0x729cb400,
+    OPC_VSETALLNEZ_W = 0x729cb800,
+    OPC_VSETALLNEZ_D = 0x729cbc00,
+    OPC_VFLOGB_S = 0x729cc400,
+    OPC_VFLOGB_D = 0x729cc800,
+    OPC_VFCLASS_S = 0x729cd400,
+    OPC_VFCLASS_D = 0x729cd800,
+    OPC_VFSQRT_S = 0x729ce400,
+    OPC_VFSQRT_D = 0x729ce800,
+    OPC_VFRECIP_S = 0x729cf400,
+    OPC_VFRECIP_D = 0x729cf800,
+    OPC_VFRSQRT_S = 0x729d0400,
+    OPC_VFRSQRT_D = 0x729d0800,
+    OPC_VFRINT_S = 0x729d3400,
+    OPC_VFRINT_D = 0x729d3800,
+    OPC_VFRINTRM_S = 0x729d4400,
+    OPC_VFRINTRM_D = 0x729d4800,
+    OPC_VFRINTRP_S = 0x729d5400,
+    OPC_VFRINTRP_D = 0x729d5800,
+    OPC_VFRINTRZ_S = 0x729d6400,
+    OPC_VFRINTRZ_D = 0x729d6800,
+    OPC_VFRINTRNE_S = 0x729d7400,
+    OPC_VFRINTRNE_D = 0x729d7800,
+    OPC_VFCVTL_S_H = 0x729de800,
+    OPC_VFCVTH_S_H = 0x729dec00,
+    OPC_VFCVTL_D_S = 0x729df000,
+    OPC_VFCVTH_D_S = 0x729df400,
+    OPC_VFFINT_S_W = 0x729e0000,
+    OPC_VFFINT_S_WU = 0x729e0400,
+    OPC_VFFINT_D_L = 0x729e0800,
+    OPC_VFFINT_D_LU = 0x729e0c00,
+    OPC_VFFINTL_D_W = 0x729e1000,
+    OPC_VFFINTH_D_W = 0x729e1400,
+    OPC_VFTINT_W_S = 0x729e3000,
+    OPC_VFTINT_L_D = 0x729e3400,
+    OPC_VFTINTRM_W_S = 0x729e3800,
+    OPC_VFTINTRM_L_D = 0x729e3c00,
+    OPC_VFTINTRP_W_S = 0x729e4000,
+    OPC_VFTINTRP_L_D = 0x729e4400,
+    OPC_VFTINTRZ_W_S = 0x729e4800,
+    OPC_VFTINTRZ_L_D = 0x729e4c00,
+    OPC_VFTINTRNE_W_S = 0x729e5000,
+    OPC_VFTINTRNE_L_D = 0x729e5400,
+    OPC_VFTINT_WU_S = 0x729e5800,
+    OPC_VFTINT_LU_D = 0x729e5c00,
+    OPC_VFTINTRZ_WU_S = 0x729e7000,
+    OPC_VFTINTRZ_LU_D = 0x729e7400,
+    OPC_VFTINTL_L_S = 0x729e8000,
+    OPC_VFTINTH_L_S = 0x729e8400,
+    OPC_VFTINTRML_L_S = 0x729e8800,
+    OPC_VFTINTRMH_L_S = 0x729e8c00,
+    OPC_VFTINTRPL_L_S = 0x729e9000,
+    OPC_VFTINTRPH_L_S = 0x729e9400,
+    OPC_VFTINTRZL_L_S = 0x729e9800,
+    OPC_VFTINTRZH_L_S = 0x729e9c00,
+    OPC_VFTINTRNEL_L_S = 0x729ea000,
+    OPC_VFTINTRNEH_L_S = 0x729ea400,
+    OPC_VEXTH_H_B = 0x729ee000,
+    OPC_VEXTH_W_H = 0x729ee400,
+    OPC_VEXTH_D_W = 0x729ee800,
+    OPC_VEXTH_Q_D = 0x729eec00,
+    OPC_VEXTH_HU_BU = 0x729ef000,
+    OPC_VEXTH_WU_HU = 0x729ef400,
+    OPC_VEXTH_DU_WU = 0x729ef800,
+    OPC_VEXTH_QU_DU = 0x729efc00,
+    OPC_VREPLGR2VR_B = 0x729f0000,
+    OPC_VREPLGR2VR_H = 0x729f0400,
+    OPC_VREPLGR2VR_W = 0x729f0800,
+    OPC_VREPLGR2VR_D = 0x729f0c00,
+    OPC_VROTRI_B = 0x72a02000,
+    OPC_VROTRI_H = 0x72a04000,
+    OPC_VROTRI_W = 0x72a08000,
+    OPC_VROTRI_D = 0x72a10000,
+    OPC_VSRLRI_B = 0x72a42000,
+    OPC_VSRLRI_H = 0x72a44000,
+    OPC_VSRLRI_W = 0x72a48000,
+    OPC_VSRLRI_D = 0x72a50000,
+    OPC_VSRARI_B = 0x72a82000,
+    OPC_VSRARI_H = 0x72a84000,
+    OPC_VSRARI_W = 0x72a88000,
+    OPC_VSRARI_D = 0x72a90000,
+    OPC_VINSGR2VR_B = 0x72eb8000,
+    OPC_VINSGR2VR_H = 0x72ebc000,
+    OPC_VINSGR2VR_W = 0x72ebe000,
+    OPC_VINSGR2VR_D = 0x72ebf000,
+    OPC_VPICKVE2GR_B = 0x72ef8000,
+    OPC_VPICKVE2GR_H = 0x72efc000,
+    OPC_VPICKVE2GR_W = 0x72efe000,
+    OPC_VPICKVE2GR_D = 0x72eff000,
+    OPC_VPICKVE2GR_BU = 0x72f38000,
+    OPC_VPICKVE2GR_HU = 0x72f3c000,
+    OPC_VPICKVE2GR_WU = 0x72f3e000,
+    OPC_VPICKVE2GR_DU = 0x72f3f000,
+    OPC_VREPLVEI_B = 0x72f78000,
+    OPC_VREPLVEI_H = 0x72f7c000,
+    OPC_VREPLVEI_W = 0x72f7e000,
+    OPC_VREPLVEI_D = 0x72f7f000,
+    OPC_VSLLWIL_H_B = 0x73082000,
+    OPC_VSLLWIL_W_H = 0x73084000,
+    OPC_VSLLWIL_D_W = 0x73088000,
+    OPC_VEXTL_Q_D = 0x73090000,
+    OPC_VSLLWIL_HU_BU = 0x730c2000,
+    OPC_VSLLWIL_WU_HU = 0x730c4000,
+    OPC_VSLLWIL_DU_WU = 0x730c8000,
+    OPC_VEXTL_QU_DU = 0x730d0000,
+    OPC_VBITCLRI_B = 0x73102000,
+    OPC_VBITCLRI_H = 0x73104000,
+    OPC_VBITCLRI_W = 0x73108000,
+    OPC_VBITCLRI_D = 0x73110000,
+    OPC_VBITSETI_B = 0x73142000,
+    OPC_VBITSETI_H = 0x73144000,
+    OPC_VBITSETI_W = 0x73148000,
+    OPC_VBITSETI_D = 0x73150000,
+    OPC_VBITREVI_B = 0x73182000,
+    OPC_VBITREVI_H = 0x73184000,
+    OPC_VBITREVI_W = 0x73188000,
+    OPC_VBITREVI_D = 0x73190000,
+    OPC_VSAT_B = 0x73242000,
+    OPC_VSAT_H = 0x73244000,
+    OPC_VSAT_W = 0x73248000,
+    OPC_VSAT_D = 0x73250000,
+    OPC_VSAT_BU = 0x73282000,
+    OPC_VSAT_HU = 0x73284000,
+    OPC_VSAT_WU = 0x73288000,
+    OPC_VSAT_DU = 0x73290000,
+    OPC_VSLLI_B = 0x732c2000,
+    OPC_VSLLI_H = 0x732c4000,
+    OPC_VSLLI_W = 0x732c8000,
+    OPC_VSLLI_D = 0x732d0000,
+    OPC_VSRLI_B = 0x73302000,
+    OPC_VSRLI_H = 0x73304000,
+    OPC_VSRLI_W = 0x73308000,
+    OPC_VSRLI_D = 0x73310000,
+    OPC_VSRAI_B = 0x73342000,
+    OPC_VSRAI_H = 0x73344000,
+    OPC_VSRAI_W = 0x73348000,
+    OPC_VSRAI_D = 0x73350000,
+    OPC_VSRLNI_B_H = 0x73404000,
+    OPC_VSRLNI_H_W = 0x73408000,
+    OPC_VSRLNI_W_D = 0x73410000,
+    OPC_VSRLNI_D_Q = 0x73420000,
+    OPC_VSRLRNI_B_H = 0x73444000,
+    OPC_VSRLRNI_H_W = 0x73448000,
+    OPC_VSRLRNI_W_D = 0x73450000,
+    OPC_VSRLRNI_D_Q = 0x73460000,
+    OPC_VSSRLNI_B_H = 0x73484000,
+    OPC_VSSRLNI_H_W = 0x73488000,
+    OPC_VSSRLNI_W_D = 0x73490000,
+    OPC_VSSRLNI_D_Q = 0x734a0000,
+    OPC_VSSRLNI_BU_H = 0x734c4000,
+    OPC_VSSRLNI_HU_W = 0x734c8000,
+    OPC_VSSRLNI_WU_D = 0x734d0000,
+    OPC_VSSRLNI_DU_Q = 0x734e0000,
+    OPC_VSSRLRNI_B_H = 0x73504000,
+    OPC_VSSRLRNI_H_W = 0x73508000,
+    OPC_VSSRLRNI_W_D = 0x73510000,
+    OPC_VSSRLRNI_D_Q = 0x73520000,
+    OPC_VSSRLRNI_BU_H = 0x73544000,
+    OPC_VSSRLRNI_HU_W = 0x73548000,
+    OPC_VSSRLRNI_WU_D = 0x73550000,
+    OPC_VSSRLRNI_DU_Q = 0x73560000,
+    OPC_VSRANI_B_H = 0x73584000,
+    OPC_VSRANI_H_W = 0x73588000,
+    OPC_VSRANI_W_D = 0x73590000,
+    OPC_VSRANI_D_Q = 0x735a0000,
+    OPC_VSRARNI_B_H = 0x735c4000,
+    OPC_VSRARNI_H_W = 0x735c8000,
+    OPC_VSRARNI_W_D = 0x735d0000,
+    OPC_VSRARNI_D_Q = 0x735e0000,
+    OPC_VSSRANI_B_H = 0x73604000,
+    OPC_VSSRANI_H_W = 0x73608000,
+    OPC_VSSRANI_W_D = 0x73610000,
+    OPC_VSSRANI_D_Q = 0x73620000,
+    OPC_VSSRANI_BU_H = 0x73644000,
+    OPC_VSSRANI_HU_W = 0x73648000,
+    OPC_VSSRANI_WU_D = 0x73650000,
+    OPC_VSSRANI_DU_Q = 0x73660000,
+    OPC_VSSRARNI_B_H = 0x73684000,
+    OPC_VSSRARNI_H_W = 0x73688000,
+    OPC_VSSRARNI_W_D = 0x73690000,
+    OPC_VSSRARNI_D_Q = 0x736a0000,
+    OPC_VSSRARNI_BU_H = 0x736c4000,
+    OPC_VSSRARNI_HU_W = 0x736c8000,
+    OPC_VSSRARNI_WU_D = 0x736d0000,
+    OPC_VSSRARNI_DU_Q = 0x736e0000,
+    OPC_VEXTRINS_D = 0x73800000,
+    OPC_VEXTRINS_W = 0x73840000,
+    OPC_VEXTRINS_H = 0x73880000,
+    OPC_VEXTRINS_B = 0x738c0000,
+    OPC_VSHUF4I_B = 0x73900000,
+    OPC_VSHUF4I_H = 0x73940000,
+    OPC_VSHUF4I_W = 0x73980000,
+    OPC_VSHUF4I_D = 0x739c0000,
+    OPC_VBITSELI_B = 0x73c40000,
+    OPC_VANDI_B = 0x73d00000,
+    OPC_VORI_B = 0x73d40000,
+    OPC_VXORI_B = 0x73d80000,
+    OPC_VNORI_B = 0x73dc0000,
+    OPC_VLDI = 0x73e00000,
+    OPC_VPERMI_W = 0x73e40000,
 } LoongArchInsn;
 
 static int32_t __attribute__((unused))
@@ -133,6 +845,13 @@ encode_djk_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k)
     return opc | d | j << 5 | k << 10;
 }
 
+static int32_t __attribute__((unused))
+encode_djka_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k,
+                  uint32_t a)
+{
+    return opc | d | j << 5 | k << 10 | a << 15;
+}
+
 static int32_t __attribute__((unused))
 encode_djkm_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k,
                   uint32_t m)
@@ -140,12 +859,27 @@ encode_djkm_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k,
     return opc | d | j << 5 | k << 10 | m << 16;
 }
 
+static int32_t __attribute__((unused))
+encode_djkn_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k,
+                  uint32_t n)
+{
+    return opc | d | j << 5 | k << 10 | n << 18;
+}
+
 static int32_t __attribute__((unused))
 encode_dk_slots(LoongArchInsn opc, uint32_t d, uint32_t k)
 {
     return opc | d | k << 10;
 }
 
+static int32_t __attribute__((unused))
+encode_cdvj_insn(LoongArchInsn opc, TCGReg cd, TCGReg vj)
+{
+    tcg_debug_assert(cd >= 0 && cd <= 0x7);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    return encode_dj_slots(opc, cd, vj & 0x1f);
+}
+
 static int32_t __attribute__((unused))
 encode_dj_insn(LoongArchInsn opc, TCGReg d, TCGReg j)
 {
@@ -238,6 +972,42 @@ encode_dsj20_insn(LoongArchInsn opc, TCGReg d, int32_t sj20)
     return encode_dj_slots(opc, d, sj20 & 0xfffff);
 }
 
+static int32_t __attribute__((unused))
+encode_dvjuk1_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk1)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk1 <= 0x1);
+    return encode_djk_slots(opc, d, vj & 0x1f, uk1);
+}
+
+static int32_t __attribute__((unused))
+encode_dvjuk2_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk2)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk2 <= 0x3);
+    return encode_djk_slots(opc, d, vj & 0x1f, uk2);
+}
+
+static int32_t __attribute__((unused))
+encode_dvjuk3_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk3)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk3 <= 0x7);
+    return encode_djk_slots(opc, d, vj & 0x1f, uk3);
+}
+
+static int32_t __attribute__((unused))
+encode_dvjuk4_insn(LoongArchInsn opc, TCGReg d, TCGReg vj, uint32_t uk4)
+{
+    tcg_debug_assert(d >= 0 && d <= 0x1f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk4 <= 0xf);
+    return encode_djk_slots(opc, d, vj & 0x1f, uk4);
+}
+
 static int32_t __attribute__((unused))
 encode_sd10k16_insn(LoongArchInsn opc, int32_t sd10k16)
 {
@@ -252,6 +1022,265 @@ encode_ud15_insn(LoongArchInsn opc, uint32_t ud15)
     return encode_d_slot(opc, ud15);
 }
 
+static int32_t __attribute__((unused))
+encode_vdj_insn(LoongArchInsn opc, TCGReg vd, TCGReg j)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    return encode_dj_slots(opc, vd & 0x1f, j);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjk_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, TCGReg k)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(k >= 0 && k <= 0x1f);
+    return encode_djk_slots(opc, vd & 0x1f, j, k);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk10_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk10)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk10 >= -0x200 && sk10 <= 0x1ff);
+    return encode_djk_slots(opc, vd & 0x1f, j, sk10 & 0x3ff);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk11_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk11)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk11 >= -0x400 && sk11 <= 0x3ff);
+    return encode_djk_slots(opc, vd & 0x1f, j, sk11 & 0x7ff);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk12_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk12)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk12 >= -0x800 && sk12 <= 0x7ff);
+    return encode_djk_slots(opc, vd & 0x1f, j, sk12 & 0xfff);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk8un1_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8,
+                      uint32_t un1)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f);
+    tcg_debug_assert(un1 <= 0x1);
+    return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un1);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk8un2_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8,
+                      uint32_t un2)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f);
+    tcg_debug_assert(un2 <= 0x3);
+    return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un2);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk8un3_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8,
+                      uint32_t un3)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f);
+    tcg_debug_assert(un3 <= 0x7);
+    return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un3);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk8un4_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk8,
+                      uint32_t un4)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk8 >= -0x80 && sk8 <= 0x7f);
+    tcg_debug_assert(un4 <= 0xf);
+    return encode_djkn_slots(opc, vd & 0x1f, j, sk8 & 0xff, un4);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjsk9_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, int32_t sk9)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(sk9 >= -0x100 && sk9 <= 0xff);
+    return encode_djk_slots(opc, vd & 0x1f, j, sk9 & 0x1ff);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjuk1_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk1)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk1 <= 0x1);
+    return encode_djk_slots(opc, vd & 0x1f, j, uk1);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjuk2_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk2)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk2 <= 0x3);
+    return encode_djk_slots(opc, vd & 0x1f, j, uk2);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjuk3_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk3)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk3 <= 0x7);
+    return encode_djk_slots(opc, vd & 0x1f, j, uk3);
+}
+
+static int32_t __attribute__((unused))
+encode_vdjuk4_insn(LoongArchInsn opc, TCGReg vd, TCGReg j, uint32_t uk4)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(j >= 0 && j <= 0x1f);
+    tcg_debug_assert(uk4 <= 0xf);
+    return encode_djk_slots(opc, vd & 0x1f, j, uk4);
+}
+
+static int32_t __attribute__((unused))
+encode_vdsj13_insn(LoongArchInsn opc, TCGReg vd, int32_t sj13)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(sj13 >= -0x1000 && sj13 <= 0xfff);
+    return encode_dj_slots(opc, vd & 0x1f, sj13 & 0x1fff);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvj_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    return encode_dj_slots(opc, vd & 0x1f, vj & 0x1f);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjk_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, TCGReg k)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(k >= 0 && k <= 0x1f);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, k);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjsk5_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(sk5 >= -0x10 && sk5 <= 0xf);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, sk5 & 0x1f);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk1_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk1)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk1 <= 0x1);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk1);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk2_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk2)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk2 <= 0x3);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk2);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk3_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk3 <= 0x7);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk3);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk4_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk4 <= 0xf);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk4);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk5_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk5 <= 0x1f);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk5);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk6_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk6 <= 0x3f);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk6);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk7_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk7 <= 0x7f);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk7);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjuk8_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(uk8 <= 0xff);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, uk8);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjvk_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(vk >= 0x20 && vk <= 0x3f);
+    return encode_djk_slots(opc, vd & 0x1f, vj & 0x1f, vk & 0x1f);
+}
+
+static int32_t __attribute__((unused))
+encode_vdvjvkva_insn(LoongArchInsn opc, TCGReg vd, TCGReg vj, TCGReg vk,
+                     TCGReg va)
+{
+    tcg_debug_assert(vd >= 0x20 && vd <= 0x3f);
+    tcg_debug_assert(vj >= 0x20 && vj <= 0x3f);
+    tcg_debug_assert(vk >= 0x20 && vk <= 0x3f);
+    tcg_debug_assert(va >= 0x20 && va <= 0x3f);
+    return encode_djka_slots(opc, vd & 0x1f, vj & 0x1f, vk & 0x1f, va & 0x1f);
+}
+
 /* Emits the `clz.w d, j` instruction.  */
 static void __attribute__((unused))
 tcg_out_opc_clz_w(TCGContext *s, TCGReg d, TCGReg j)
@@ -711,277 +1740,5265 @@ tcg_out_opc_xori(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk12)
     tcg_out32(s, encode_djuk12_insn(OPC_XORI, d, j, uk12));
 }
 
-/* Emits the `addu16i.d d, j, sk16` instruction.  */
+/* Emits the `vfmadd.s vd, vj, vk, va` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_addu16i_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+tcg_out_opc_vfmadd_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
 {
-    tcg_out32(s, encode_djsk16_insn(OPC_ADDU16I_D, d, j, sk16));
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMADD_S, vd, vj, vk, va));
 }
 
-/* Emits the `lu12i.w d, sj20` instruction.  */
+/* Emits the `vfmadd.d vd, vj, vk, va` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_lu12i_w(TCGContext *s, TCGReg d, int32_t sj20)
+tcg_out_opc_vfmadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
 {
-    tcg_out32(s, encode_dsj20_insn(OPC_LU12I_W, d, sj20));
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMADD_D, vd, vj, vk, va));
 }
 
-/* Emits the `cu32i.d d, sj20` instruction.  */
+/* Emits the `vfmsub.s vd, vj, vk, va` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_cu32i_d(TCGContext *s, TCGReg d, int32_t sj20)
+tcg_out_opc_vfmsub_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
 {
-    tcg_out32(s, encode_dsj20_insn(OPC_CU32I_D, d, sj20));
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMSUB_S, vd, vj, vk, va));
 }
 
-/* Emits the `pcaddu2i d, sj20` instruction.  */
+/* Emits the `vfmsub.d vd, vj, vk, va` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_pcaddu2i(TCGContext *s, TCGReg d, int32_t sj20)
+tcg_out_opc_vfmsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
 {
-    tcg_out32(s, encode_dsj20_insn(OPC_PCADDU2I, d, sj20));
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFMSUB_D, vd, vj, vk, va));
 }
 
-/* Emits the `pcalau12i d, sj20` instruction.  */
+/* Emits the `vfnmadd.s vd, vj, vk, va` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_pcalau12i(TCGContext *s, TCGReg d, int32_t sj20)
+tcg_out_opc_vfnmadd_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
 {
-    tcg_out32(s, encode_dsj20_insn(OPC_PCALAU12I, d, sj20));
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMADD_S, vd, vj, vk, va));
 }
 
-/* Emits the `pcaddu12i d, sj20` instruction.  */
+/* Emits the `vfnmadd.d vd, vj, vk, va` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_pcaddu12i(TCGContext *s, TCGReg d, int32_t sj20)
+tcg_out_opc_vfnmadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
 {
-    tcg_out32(s, encode_dsj20_insn(OPC_PCADDU12I, d, sj20));
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMADD_D, vd, vj, vk, va));
 }
 
-/* Emits the `pcaddu18i d, sj20` instruction.  */
+/* Emits the `vfnmsub.s vd, vj, vk, va` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_pcaddu18i(TCGContext *s, TCGReg d, int32_t sj20)
+tcg_out_opc_vfnmsub_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
 {
-    tcg_out32(s, encode_dsj20_insn(OPC_PCADDU18I, d, sj20));
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMSUB_S, vd, vj, vk, va));
 }
 
-/* Emits the `ld.b d, j, sk12` instruction.  */
+/* Emits the `vfnmsub.d vd, vj, vk, va` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_ld_b(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+tcg_out_opc_vfnmsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
 {
-    tcg_out32(s, encode_djsk12_insn(OPC_LD_B, d, j, sk12));
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VFNMSUB_D, vd, vj, vk, va));
 }
 
-/* Emits the `ld.h d, j, sk12` instruction.  */
+/* Emits the `vfcmp.caf.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_ld_h(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+tcg_out_opc_vfcmp_caf_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djsk12_insn(OPC_LD_H, d, j, sk12));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CAF_S, vd, vj, vk));
 }
 
-/* Emits the `ld.w d, j, sk12` instruction.  */
+/* Emits the `vfcmp.saf.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_ld_w(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+tcg_out_opc_vfcmp_saf_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djsk12_insn(OPC_LD_W, d, j, sk12));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SAF_S, vd, vj, vk));
 }
 
-/* Emits the `ld.d d, j, sk12` instruction.  */
+/* Emits the `vfcmp.clt.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_ld_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+tcg_out_opc_vfcmp_clt_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djsk12_insn(OPC_LD_D, d, j, sk12));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLT_S, vd, vj, vk));
 }
 
-/* Emits the `st.b d, j, sk12` instruction.  */
+/* Emits the `vfcmp.slt.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_st_b(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+tcg_out_opc_vfcmp_slt_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djsk12_insn(OPC_ST_B, d, j, sk12));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLT_S, vd, vj, vk));
 }
 
-/* Emits the `st.h d, j, sk12` instruction.  */
+/* Emits the `vfcmp.ceq.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_st_h(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+tcg_out_opc_vfcmp_ceq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djsk12_insn(OPC_ST_H, d, j, sk12));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CEQ_S, vd, vj, vk));
 }
 
-/* Emits the `st.w d, j, sk12` instruction.  */
+/* Emits the `vfcmp.seq.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_st_w(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+tcg_out_opc_vfcmp_seq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djsk12_insn(OPC_ST_W, d, j, sk12));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SEQ_S, vd, vj, vk));
 }
 
-/* Emits the `st.d d, j, sk12` instruction.  */
+/* Emits the `vfcmp.cle.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_st_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+tcg_out_opc_vfcmp_cle_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djsk12_insn(OPC_ST_D, d, j, sk12));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLE_S, vd, vj, vk));
 }
 
-/* Emits the `ld.bu d, j, sk12` instruction.  */
+/* Emits the `vfcmp.sle.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_ld_bu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+tcg_out_opc_vfcmp_sle_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djsk12_insn(OPC_LD_BU, d, j, sk12));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLE_S, vd, vj, vk));
 }
 
-/* Emits the `ld.hu d, j, sk12` instruction.  */
+/* Emits the `vfcmp.cun.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_ld_hu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+tcg_out_opc_vfcmp_cun_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djsk12_insn(OPC_LD_HU, d, j, sk12));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUN_S, vd, vj, vk));
 }
 
-/* Emits the `ld.wu d, j, sk12` instruction.  */
+/* Emits the `vfcmp.sun.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_ld_wu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+tcg_out_opc_vfcmp_sun_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djsk12_insn(OPC_LD_WU, d, j, sk12));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUN_S, vd, vj, vk));
 }
 
-/* Emits the `ldx.b d, j, k` instruction.  */
+/* Emits the `vfcmp.cult.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_ldx_b(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+tcg_out_opc_vfcmp_cult_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djk_insn(OPC_LDX_B, d, j, k));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULT_S, vd, vj, vk));
 }
 
-/* Emits the `ldx.h d, j, k` instruction.  */
+/* Emits the `vfcmp.sult.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_ldx_h(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+tcg_out_opc_vfcmp_sult_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djk_insn(OPC_LDX_H, d, j, k));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULT_S, vd, vj, vk));
 }
 
-/* Emits the `ldx.w d, j, k` instruction.  */
+/* Emits the `vfcmp.cueq.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_ldx_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+tcg_out_opc_vfcmp_cueq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djk_insn(OPC_LDX_W, d, j, k));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUEQ_S, vd, vj, vk));
 }
 
-/* Emits the `ldx.d d, j, k` instruction.  */
+/* Emits the `vfcmp.sueq.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_ldx_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+tcg_out_opc_vfcmp_sueq_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djk_insn(OPC_LDX_D, d, j, k));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUEQ_S, vd, vj, vk));
 }
 
-/* Emits the `stx.b d, j, k` instruction.  */
+/* Emits the `vfcmp.cule.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_stx_b(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+tcg_out_opc_vfcmp_cule_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djk_insn(OPC_STX_B, d, j, k));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULE_S, vd, vj, vk));
 }
 
-/* Emits the `stx.h d, j, k` instruction.  */
+/* Emits the `vfcmp.sule.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_stx_h(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+tcg_out_opc_vfcmp_sule_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djk_insn(OPC_STX_H, d, j, k));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULE_S, vd, vj, vk));
 }
 
-/* Emits the `stx.w d, j, k` instruction.  */
+/* Emits the `vfcmp.cne.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_stx_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+tcg_out_opc_vfcmp_cne_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djk_insn(OPC_STX_W, d, j, k));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CNE_S, vd, vj, vk));
 }
 
-/* Emits the `stx.d d, j, k` instruction.  */
+/* Emits the `vfcmp.sne.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_stx_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+tcg_out_opc_vfcmp_sne_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djk_insn(OPC_STX_D, d, j, k));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SNE_S, vd, vj, vk));
 }
 
-/* Emits the `ldx.bu d, j, k` instruction.  */
+/* Emits the `vfcmp.cor.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_ldx_bu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+tcg_out_opc_vfcmp_cor_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djk_insn(OPC_LDX_BU, d, j, k));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_COR_S, vd, vj, vk));
 }
 
-/* Emits the `ldx.hu d, j, k` instruction.  */
+/* Emits the `vfcmp.sor.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_ldx_hu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+tcg_out_opc_vfcmp_sor_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djk_insn(OPC_LDX_HU, d, j, k));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SOR_S, vd, vj, vk));
 }
 
-/* Emits the `ldx.wu d, j, k` instruction.  */
+/* Emits the `vfcmp.cune.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_ldx_wu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+tcg_out_opc_vfcmp_cune_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djk_insn(OPC_LDX_WU, d, j, k));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUNE_S, vd, vj, vk));
 }
 
-/* Emits the `dbar ud15` instruction.  */
+/* Emits the `vfcmp.sune.s vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_dbar(TCGContext *s, uint32_t ud15)
+tcg_out_opc_vfcmp_sune_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_ud15_insn(OPC_DBAR, ud15));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUNE_S, vd, vj, vk));
 }
 
-/* Emits the `jirl d, j, sk16` instruction.  */
+/* Emits the `vfcmp.caf.d vd, vj, vk` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_jirl(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+tcg_out_opc_vfcmp_caf_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
 {
-    tcg_out32(s, encode_djsk16_insn(OPC_JIRL, d, j, sk16));
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CAF_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.saf.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_saf_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SAF_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.clt.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_clt_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLT_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.slt.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_slt_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLT_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.ceq.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_ceq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CEQ_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.seq.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_seq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SEQ_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cle.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cle_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CLE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sle.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sle_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SLE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cun.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cun_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUN_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sun.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sun_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUN_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cult.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cult_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULT_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sult.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sult_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULT_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cueq.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cueq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUEQ_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sueq.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sueq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUEQ_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cule.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cule_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CULE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sule.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sule_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SULE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cne.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cne_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CNE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sne.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sne_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SNE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cor.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cor_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_COR_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sor.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sor_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SOR_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.cune.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_cune_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_CUNE_D, vd, vj, vk));
+}
+
+/* Emits the `vfcmp.sune.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcmp_sune_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCMP_SUNE_D, vd, vj, vk));
+}
+
+/* Emits the `vbitsel.v vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitsel_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VBITSEL_V, vd, vj, vk, va));
+}
+
+/* Emits the `vshuf.b vd, vj, vk, va` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk, TCGReg va)
+{
+    tcg_out32(s, encode_vdvjvkva_insn(OPC_VSHUF_B, vd, vj, vk, va));
+}
+
+/* Emits the `addu16i.d d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_addu16i_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_ADDU16I_D, d, j, sk16));
+}
+
+/* Emits the `lu12i.w d, sj20` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_lu12i_w(TCGContext *s, TCGReg d, int32_t sj20)
+{
+    tcg_out32(s, encode_dsj20_insn(OPC_LU12I_W, d, sj20));
+}
+
+/* Emits the `cu32i.d d, sj20` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_cu32i_d(TCGContext *s, TCGReg d, int32_t sj20)
+{
+    tcg_out32(s, encode_dsj20_insn(OPC_CU32I_D, d, sj20));
+}
+
+/* Emits the `pcaddu2i d, sj20` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_pcaddu2i(TCGContext *s, TCGReg d, int32_t sj20)
+{
+    tcg_out32(s, encode_dsj20_insn(OPC_PCADDU2I, d, sj20));
+}
+
+/* Emits the `pcalau12i d, sj20` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_pcalau12i(TCGContext *s, TCGReg d, int32_t sj20)
+{
+    tcg_out32(s, encode_dsj20_insn(OPC_PCALAU12I, d, sj20));
+}
+
+/* Emits the `pcaddu12i d, sj20` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_pcaddu12i(TCGContext *s, TCGReg d, int32_t sj20)
+{
+    tcg_out32(s, encode_dsj20_insn(OPC_PCADDU12I, d, sj20));
+}
+
+/* Emits the `pcaddu18i d, sj20` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_pcaddu18i(TCGContext *s, TCGReg d, int32_t sj20)
+{
+    tcg_out32(s, encode_dsj20_insn(OPC_PCADDU18I, d, sj20));
+}
+
+/* Emits the `ld.b d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ld_b(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_LD_B, d, j, sk12));
+}
+
+/* Emits the `ld.h d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ld_h(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_LD_H, d, j, sk12));
+}
+
+/* Emits the `ld.w d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ld_w(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_LD_W, d, j, sk12));
+}
+
+/* Emits the `ld.d d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ld_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_LD_D, d, j, sk12));
+}
+
+/* Emits the `st.b d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_st_b(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_ST_B, d, j, sk12));
+}
+
+/* Emits the `st.h d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_st_h(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_ST_H, d, j, sk12));
+}
+
+/* Emits the `st.w d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_st_w(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_ST_W, d, j, sk12));
+}
+
+/* Emits the `st.d d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_st_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_ST_D, d, j, sk12));
+}
+
+/* Emits the `ld.bu d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ld_bu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_LD_BU, d, j, sk12));
+}
+
+/* Emits the `ld.hu d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ld_hu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_LD_HU, d, j, sk12));
+}
+
+/* Emits the `ld.wu d, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ld_wu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_djsk12_insn(OPC_LD_WU, d, j, sk12));
+}
+
+/* Emits the `vld vd, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vld(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_vdjsk12_insn(OPC_VLD, vd, j, sk12));
+}
+
+/* Emits the `vst vd, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vst(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_vdjsk12_insn(OPC_VST, vd, j, sk12));
+}
+
+/* Emits the `vldrepl.d vd, j, sk9` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldrepl_d(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk9)
+{
+    tcg_out32(s, encode_vdjsk9_insn(OPC_VLDREPL_D, vd, j, sk9));
+}
+
+/* Emits the `vldrepl.w vd, j, sk10` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldrepl_w(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk10)
+{
+    tcg_out32(s, encode_vdjsk10_insn(OPC_VLDREPL_W, vd, j, sk10));
+}
+
+/* Emits the `vldrepl.h vd, j, sk11` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldrepl_h(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk11)
+{
+    tcg_out32(s, encode_vdjsk11_insn(OPC_VLDREPL_H, vd, j, sk11));
+}
+
+/* Emits the `vldrepl.b vd, j, sk12` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldrepl_b(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk12)
+{
+    tcg_out32(s, encode_vdjsk12_insn(OPC_VLDREPL_B, vd, j, sk12));
+}
+
+/* Emits the `vstelm.d vd, j, sk8, un1` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vstelm_d(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8,
+                     uint32_t un1)
+{
+    tcg_out32(s, encode_vdjsk8un1_insn(OPC_VSTELM_D, vd, j, sk8, un1));
+}
+
+/* Emits the `vstelm.w vd, j, sk8, un2` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vstelm_w(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8,
+                     uint32_t un2)
+{
+    tcg_out32(s, encode_vdjsk8un2_insn(OPC_VSTELM_W, vd, j, sk8, un2));
+}
+
+/* Emits the `vstelm.h vd, j, sk8, un3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vstelm_h(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8,
+                     uint32_t un3)
+{
+    tcg_out32(s, encode_vdjsk8un3_insn(OPC_VSTELM_H, vd, j, sk8, un3));
+}
+
+/* Emits the `vstelm.b vd, j, sk8, un4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vstelm_b(TCGContext *s, TCGReg vd, TCGReg j, int32_t sk8,
+                     uint32_t un4)
+{
+    tcg_out32(s, encode_vdjsk8un4_insn(OPC_VSTELM_B, vd, j, sk8, un4));
+}
+
+/* Emits the `ldx.b d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ldx_b(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_LDX_B, d, j, k));
+}
+
+/* Emits the `ldx.h d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ldx_h(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_LDX_H, d, j, k));
+}
+
+/* Emits the `ldx.w d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ldx_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_LDX_W, d, j, k));
+}
+
+/* Emits the `ldx.d d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ldx_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_LDX_D, d, j, k));
+}
+
+/* Emits the `stx.b d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_stx_b(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_STX_B, d, j, k));
+}
+
+/* Emits the `stx.h d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_stx_h(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_STX_H, d, j, k));
+}
+
+/* Emits the `stx.w d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_stx_w(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_STX_W, d, j, k));
+}
+
+/* Emits the `stx.d d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_stx_d(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_STX_D, d, j, k));
+}
+
+/* Emits the `ldx.bu d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ldx_bu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_LDX_BU, d, j, k));
+}
+
+/* Emits the `ldx.hu d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ldx_hu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_LDX_HU, d, j, k));
+}
+
+/* Emits the `ldx.wu d, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ldx_wu(TCGContext *s, TCGReg d, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_djk_insn(OPC_LDX_WU, d, j, k));
+}
+
+/* Emits the `vldx vd, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldx(TCGContext *s, TCGReg vd, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_vdjk_insn(OPC_VLDX, vd, j, k));
+}
+
+/* Emits the `vstx vd, j, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vstx(TCGContext *s, TCGReg vd, TCGReg j, TCGReg k)
+{
+    tcg_out32(s, encode_vdjk_insn(OPC_VSTX, vd, j, k));
+}
+
+/* Emits the `dbar ud15` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_dbar(TCGContext *s, uint32_t ud15)
+{
+    tcg_out32(s, encode_ud15_insn(OPC_DBAR, ud15));
+}
+
+/* Emits the `jirl d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_jirl(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_JIRL, d, j, sk16));
 }
 
 /* Emits the `b sd10k16` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_b(TCGContext *s, int32_t sd10k16)
+tcg_out_opc_b(TCGContext *s, int32_t sd10k16)
+{
+    tcg_out32(s, encode_sd10k16_insn(OPC_B, sd10k16));
+}
+
+/* Emits the `bl sd10k16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_bl(TCGContext *s, int32_t sd10k16)
+{
+    tcg_out32(s, encode_sd10k16_insn(OPC_BL, sd10k16));
+}
+
+/* Emits the `beq d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_beq(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_BEQ, d, j, sk16));
+}
+
+/* Emits the `bne d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_bne(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_BNE, d, j, sk16));
+}
+
+/* Emits the `bgt d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_bgt(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_BGT, d, j, sk16));
+}
+
+/* Emits the `ble d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_ble(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_BLE, d, j, sk16));
+}
+
+/* Emits the `bgtu d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_bgtu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_BGTU, d, j, sk16));
+}
+
+/* Emits the `bleu d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_bleu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_BLEU, d, j, sk16));
+}
+
+/* Emits the `vseq.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseq_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_B, vd, vj, vk));
+}
+
+/* Emits the `vseq.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseq_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_H, vd, vj, vk));
+}
+
+/* Emits the `vseq.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseq_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_W, vd, vj, vk));
+}
+
+/* Emits the `vseq.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseq_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSEQ_D, vd, vj, vk));
+}
+
+/* Emits the `vsle.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_B, vd, vj, vk));
+}
+
+/* Emits the `vsle.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_H, vd, vj, vk));
+}
+
+/* Emits the `vsle.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_W, vd, vj, vk));
+}
+
+/* Emits the `vsle.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_D, vd, vj, vk));
+}
+
+/* Emits the `vsle.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_BU, vd, vj, vk));
+}
+
+/* Emits the `vsle.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_HU, vd, vj, vk));
+}
+
+/* Emits the `vsle.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_WU, vd, vj, vk));
+}
+
+/* Emits the `vsle.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsle_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLE_DU, vd, vj, vk));
+}
+
+/* Emits the `vslt.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_B, vd, vj, vk));
+}
+
+/* Emits the `vslt.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_H, vd, vj, vk));
+}
+
+/* Emits the `vslt.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_W, vd, vj, vk));
+}
+
+/* Emits the `vslt.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_D, vd, vj, vk));
+}
+
+/* Emits the `vslt.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_BU, vd, vj, vk));
+}
+
+/* Emits the `vslt.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_HU, vd, vj, vk));
+}
+
+/* Emits the `vslt.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_WU, vd, vj, vk));
+}
+
+/* Emits the `vslt.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslt_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLT_DU, vd, vj, vk));
+}
+
+/* Emits the `vadd.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_B, vd, vj, vk));
+}
+
+/* Emits the `vadd.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_H, vd, vj, vk));
+}
+
+/* Emits the `vadd.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_W, vd, vj, vk));
+}
+
+/* Emits the `vadd.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_D, vd, vj, vk));
+}
+
+/* Emits the `vsub.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsub_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_B, vd, vj, vk));
+}
+
+/* Emits the `vsub.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsub_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_H, vd, vj, vk));
+}
+
+/* Emits the `vsub.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsub_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_W, vd, vj, vk));
+}
+
+/* Emits the `vsub.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_D, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_H_B, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_W_H, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_D_W, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_H_B, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_W_H, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_D_W, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_H_B, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_W_H, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_D_W, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_H_B, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_W_H, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_D_W, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vsubwev.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWEV_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vsubwod.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUBWOD_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vaddwev.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwev_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWEV_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vaddwod.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddwod_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDWOD_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vsadd.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_B, vd, vj, vk));
+}
+
+/* Emits the `vsadd.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_H, vd, vj, vk));
+}
+
+/* Emits the `vsadd.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_W, vd, vj, vk));
+}
+
+/* Emits the `vsadd.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_D, vd, vj, vk));
+}
+
+/* Emits the `vssub.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_B, vd, vj, vk));
+}
+
+/* Emits the `vssub.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_H, vd, vj, vk));
+}
+
+/* Emits the `vssub.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_W, vd, vj, vk));
+}
+
+/* Emits the `vssub.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_D, vd, vj, vk));
+}
+
+/* Emits the `vsadd.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_BU, vd, vj, vk));
+}
+
+/* Emits the `vsadd.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_HU, vd, vj, vk));
+}
+
+/* Emits the `vsadd.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_WU, vd, vj, vk));
+}
+
+/* Emits the `vsadd.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsadd_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSADD_DU, vd, vj, vk));
+}
+
+/* Emits the `vssub.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_BU, vd, vj, vk));
+}
+
+/* Emits the `vssub.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_HU, vd, vj, vk));
+}
+
+/* Emits the `vssub.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_WU, vd, vj, vk));
+}
+
+/* Emits the `vssub.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssub_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSUB_DU, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_H_B, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_W_H, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_D_W, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_H_B, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_W_H, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_D_W, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.hu.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_HU_BU, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.wu.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_WU_HU, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.du.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_du_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_DU_WU, vd, vj, vk));
+}
+
+/* Emits the `vhaddw.qu.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhaddw_qu_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHADDW_QU_DU, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.hu.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_HU_BU, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.wu.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_WU_HU, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.du.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_du_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_DU_WU, vd, vj, vk));
+}
+
+/* Emits the `vhsubw.qu.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vhsubw_qu_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VHSUBW_QU_DU, vd, vj, vk));
+}
+
+/* Emits the `vadda.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadda_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_B, vd, vj, vk));
+}
+
+/* Emits the `vadda.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadda_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_H, vd, vj, vk));
+}
+
+/* Emits the `vadda.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadda_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_W, vd, vj, vk));
+}
+
+/* Emits the `vadda.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadda_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADDA_D, vd, vj, vk));
+}
+
+/* Emits the `vabsd.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_B, vd, vj, vk));
+}
+
+/* Emits the `vabsd.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_H, vd, vj, vk));
+}
+
+/* Emits the `vabsd.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_W, vd, vj, vk));
+}
+
+/* Emits the `vabsd.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_D, vd, vj, vk));
+}
+
+/* Emits the `vabsd.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_BU, vd, vj, vk));
+}
+
+/* Emits the `vabsd.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_HU, vd, vj, vk));
+}
+
+/* Emits the `vabsd.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_WU, vd, vj, vk));
+}
+
+/* Emits the `vabsd.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vabsd_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VABSD_DU, vd, vj, vk));
+}
+
+/* Emits the `vavg.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_B, vd, vj, vk));
+}
+
+/* Emits the `vavg.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_H, vd, vj, vk));
+}
+
+/* Emits the `vavg.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_W, vd, vj, vk));
+}
+
+/* Emits the `vavg.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_D, vd, vj, vk));
+}
+
+/* Emits the `vavg.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_BU, vd, vj, vk));
+}
+
+/* Emits the `vavg.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_HU, vd, vj, vk));
+}
+
+/* Emits the `vavg.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_WU, vd, vj, vk));
+}
+
+/* Emits the `vavg.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavg_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVG_DU, vd, vj, vk));
+}
+
+/* Emits the `vavgr.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_B, vd, vj, vk));
+}
+
+/* Emits the `vavgr.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_H, vd, vj, vk));
+}
+
+/* Emits the `vavgr.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_W, vd, vj, vk));
+}
+
+/* Emits the `vavgr.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_D, vd, vj, vk));
+}
+
+/* Emits the `vavgr.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_BU, vd, vj, vk));
+}
+
+/* Emits the `vavgr.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_HU, vd, vj, vk));
+}
+
+/* Emits the `vavgr.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_WU, vd, vj, vk));
+}
+
+/* Emits the `vavgr.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vavgr_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAVGR_DU, vd, vj, vk));
+}
+
+/* Emits the `vmax.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_B, vd, vj, vk));
+}
+
+/* Emits the `vmax.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_H, vd, vj, vk));
+}
+
+/* Emits the `vmax.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_W, vd, vj, vk));
+}
+
+/* Emits the `vmax.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_D, vd, vj, vk));
+}
+
+/* Emits the `vmin.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_B, vd, vj, vk));
+}
+
+/* Emits the `vmin.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_H, vd, vj, vk));
+}
+
+/* Emits the `vmin.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_W, vd, vj, vk));
+}
+
+/* Emits the `vmin.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_D, vd, vj, vk));
+}
+
+/* Emits the `vmax.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_BU, vd, vj, vk));
+}
+
+/* Emits the `vmax.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_HU, vd, vj, vk));
+}
+
+/* Emits the `vmax.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_WU, vd, vj, vk));
+}
+
+/* Emits the `vmax.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmax_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMAX_DU, vd, vj, vk));
+}
+
+/* Emits the `vmin.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_BU, vd, vj, vk));
+}
+
+/* Emits the `vmin.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_HU, vd, vj, vk));
+}
+
+/* Emits the `vmin.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_WU, vd, vj, vk));
+}
+
+/* Emits the `vmin.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmin_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMIN_DU, vd, vj, vk));
+}
+
+/* Emits the `vmul.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmul_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_B, vd, vj, vk));
+}
+
+/* Emits the `vmul.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmul_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_H, vd, vj, vk));
+}
+
+/* Emits the `vmul.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmul_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_W, vd, vj, vk));
+}
+
+/* Emits the `vmul.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmul_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUL_D, vd, vj, vk));
+}
+
+/* Emits the `vmuh.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_B, vd, vj, vk));
+}
+
+/* Emits the `vmuh.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_H, vd, vj, vk));
+}
+
+/* Emits the `vmuh.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_W, vd, vj, vk));
+}
+
+/* Emits the `vmuh.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_D, vd, vj, vk));
+}
+
+/* Emits the `vmuh.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_BU, vd, vj, vk));
+}
+
+/* Emits the `vmuh.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_HU, vd, vj, vk));
+}
+
+/* Emits the `vmuh.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_WU, vd, vj, vk));
+}
+
+/* Emits the `vmuh.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmuh_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMUH_DU, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_H_B, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_W_H, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_D_W, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_H_B, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_W_H, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_D_W, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vmulwev.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwev_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWEV_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vmulwod.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmulwod_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMULWOD_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vmadd.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmadd_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_B, vd, vj, vk));
+}
+
+/* Emits the `vmadd.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmadd_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_H, vd, vj, vk));
+}
+
+/* Emits the `vmadd.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmadd_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_W, vd, vj, vk));
+}
+
+/* Emits the `vmadd.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADD_D, vd, vj, vk));
+}
+
+/* Emits the `vmsub.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmsub_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_B, vd, vj, vk));
+}
+
+/* Emits the `vmsub.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmsub_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_H, vd, vj, vk));
+}
+
+/* Emits the `vmsub.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmsub_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_W, vd, vj, vk));
+}
+
+/* Emits the `vmsub.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMSUB_D, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_H_B, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_W_H, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_D_W, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.h.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_h_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_H_B, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.w.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_w_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_W_H, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.d.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_d_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_D_W, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.q.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_q_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_Q_D, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.h.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_h_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_H_BU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.w.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_w_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_W_HU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.d.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_d_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_D_WU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.q.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_q_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_Q_DU, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vmaddwev.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwev_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWEV_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.h.bu.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_h_bu_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_H_BU_B, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.w.hu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_w_hu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_W_HU_H, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.d.wu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_d_wu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_D_WU_W, vd, vj, vk));
+}
+
+/* Emits the `vmaddwod.q.du.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaddwod_q_du_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMADDWOD_Q_DU_D, vd, vj, vk));
+}
+
+/* Emits the `vdiv.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_B, vd, vj, vk));
+}
+
+/* Emits the `vdiv.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_H, vd, vj, vk));
+}
+
+/* Emits the `vdiv.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_W, vd, vj, vk));
+}
+
+/* Emits the `vdiv.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_D, vd, vj, vk));
+}
+
+/* Emits the `vmod.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_B, vd, vj, vk));
+}
+
+/* Emits the `vmod.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_H, vd, vj, vk));
+}
+
+/* Emits the `vmod.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_W, vd, vj, vk));
+}
+
+/* Emits the `vmod.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_D, vd, vj, vk));
+}
+
+/* Emits the `vdiv.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_BU, vd, vj, vk));
+}
+
+/* Emits the `vdiv.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_HU, vd, vj, vk));
+}
+
+/* Emits the `vdiv.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_WU, vd, vj, vk));
+}
+
+/* Emits the `vdiv.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vdiv_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VDIV_DU, vd, vj, vk));
+}
+
+/* Emits the `vmod.bu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_bu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_BU, vd, vj, vk));
+}
+
+/* Emits the `vmod.hu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_hu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_HU, vd, vj, vk));
+}
+
+/* Emits the `vmod.wu vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_wu(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_WU, vd, vj, vk));
+}
+
+/* Emits the `vmod.du vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmod_du(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VMOD_DU, vd, vj, vk));
+}
+
+/* Emits the `vsll.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsll_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_B, vd, vj, vk));
+}
+
+/* Emits the `vsll.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsll_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_H, vd, vj, vk));
+}
+
+/* Emits the `vsll.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsll_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_W, vd, vj, vk));
+}
+
+/* Emits the `vsll.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsll_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSLL_D, vd, vj, vk));
+}
+
+/* Emits the `vsrl.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrl_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_B, vd, vj, vk));
+}
+
+/* Emits the `vsrl.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrl_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_H, vd, vj, vk));
+}
+
+/* Emits the `vsrl.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrl_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_W, vd, vj, vk));
+}
+
+/* Emits the `vsrl.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrl_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRL_D, vd, vj, vk));
+}
+
+/* Emits the `vsra.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsra_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_B, vd, vj, vk));
+}
+
+/* Emits the `vsra.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsra_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_H, vd, vj, vk));
+}
+
+/* Emits the `vsra.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsra_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_W, vd, vj, vk));
+}
+
+/* Emits the `vsra.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsra_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRA_D, vd, vj, vk));
+}
+
+/* Emits the `vrotr.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_B, vd, vj, vk));
+}
+
+/* Emits the `vrotr.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_H, vd, vj, vk));
+}
+
+/* Emits the `vrotr.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_W, vd, vj, vk));
+}
+
+/* Emits the `vrotr.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VROTR_D, vd, vj, vk));
+}
+
+/* Emits the `vsrlr.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_B, vd, vj, vk));
+}
+
+/* Emits the `vsrlr.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_H, vd, vj, vk));
+}
+
+/* Emits the `vsrlr.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_W, vd, vj, vk));
+}
+
+/* Emits the `vsrlr.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLR_D, vd, vj, vk));
+}
+
+/* Emits the `vsrar.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrar_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_B, vd, vj, vk));
+}
+
+/* Emits the `vsrar.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrar_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_H, vd, vj, vk));
+}
+
+/* Emits the `vsrar.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrar_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_W, vd, vj, vk));
+}
+
+/* Emits the `vsrar.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrar_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAR_D, vd, vj, vk));
+}
+
+/* Emits the `vsrln.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrln_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vsrln.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrln_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vsrln.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrln_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vsran.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsran_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vsran.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsran_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vsran.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsran_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRAN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vsrlrn.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLRN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vsrlrn.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLRN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vsrlrn.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRLRN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vsrarn.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRARN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vsrarn.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRARN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vsrarn.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSRARN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vssrln.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vssrln.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vssrln.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vssran.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vssran.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vssran.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.b.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_b_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_B_H, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.h.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_h_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_H_W, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_W_D, vd, vj, vk));
+}
+
+/* Emits the `vssrln.bu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_BU_H, vd, vj, vk));
+}
+
+/* Emits the `vssrln.hu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_HU_W, vd, vj, vk));
+}
+
+/* Emits the `vssrln.wu.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrln_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLN_WU_D, vd, vj, vk));
+}
+
+/* Emits the `vssran.bu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_BU_H, vd, vj, vk));
+}
+
+/* Emits the `vssran.hu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_HU_W, vd, vj, vk));
+}
+
+/* Emits the `vssran.wu.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssran_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRAN_WU_D, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.bu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_BU_H, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.hu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_HU_W, vd, vj, vk));
+}
+
+/* Emits the `vssrlrn.wu.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrn_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRLRN_WU_D, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.bu.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_BU_H, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.hu.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_HU_W, vd, vj, vk));
+}
+
+/* Emits the `vssrarn.wu.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarn_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSSRARN_WU_D, vd, vj, vk));
+}
+
+/* Emits the `vbitclr.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclr_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_B, vd, vj, vk));
+}
+
+/* Emits the `vbitclr.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclr_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_H, vd, vj, vk));
+}
+
+/* Emits the `vbitclr.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclr_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_W, vd, vj, vk));
+}
+
+/* Emits the `vbitclr.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclr_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITCLR_D, vd, vj, vk));
+}
+
+/* Emits the `vbitset.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitset_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_B, vd, vj, vk));
+}
+
+/* Emits the `vbitset.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitset_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_H, vd, vj, vk));
+}
+
+/* Emits the `vbitset.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitset_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_W, vd, vj, vk));
+}
+
+/* Emits the `vbitset.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitset_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITSET_D, vd, vj, vk));
+}
+
+/* Emits the `vbitrev.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrev_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_B, vd, vj, vk));
+}
+
+/* Emits the `vbitrev.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrev_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_H, vd, vj, vk));
+}
+
+/* Emits the `vbitrev.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrev_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_W, vd, vj, vk));
+}
+
+/* Emits the `vbitrev.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrev_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VBITREV_D, vd, vj, vk));
+}
+
+/* Emits the `vpackev.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackev_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_B, vd, vj, vk));
+}
+
+/* Emits the `vpackev.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackev_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_H, vd, vj, vk));
+}
+
+/* Emits the `vpackev.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackev_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_W, vd, vj, vk));
+}
+
+/* Emits the `vpackev.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackev_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKEV_D, vd, vj, vk));
+}
+
+/* Emits the `vpackod.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackod_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_B, vd, vj, vk));
+}
+
+/* Emits the `vpackod.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackod_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_H, vd, vj, vk));
+}
+
+/* Emits the `vpackod.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackod_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_W, vd, vj, vk));
+}
+
+/* Emits the `vpackod.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpackod_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPACKOD_D, vd, vj, vk));
+}
+
+/* Emits the `vilvl.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvl_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_B, vd, vj, vk));
+}
+
+/* Emits the `vilvl.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvl_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_H, vd, vj, vk));
+}
+
+/* Emits the `vilvl.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvl_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_W, vd, vj, vk));
+}
+
+/* Emits the `vilvl.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvl_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVL_D, vd, vj, vk));
+}
+
+/* Emits the `vilvh.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvh_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_B, vd, vj, vk));
+}
+
+/* Emits the `vilvh.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvh_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_H, vd, vj, vk));
+}
+
+/* Emits the `vilvh.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvh_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_W, vd, vj, vk));
+}
+
+/* Emits the `vilvh.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vilvh_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VILVH_D, vd, vj, vk));
+}
+
+/* Emits the `vpickev.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickev_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_B, vd, vj, vk));
+}
+
+/* Emits the `vpickev.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickev_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_H, vd, vj, vk));
+}
+
+/* Emits the `vpickev.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickev_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_W, vd, vj, vk));
+}
+
+/* Emits the `vpickev.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickev_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKEV_D, vd, vj, vk));
+}
+
+/* Emits the `vpickod.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickod_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_B, vd, vj, vk));
+}
+
+/* Emits the `vpickod.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickod_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_H, vd, vj, vk));
+}
+
+/* Emits the `vpickod.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickod_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_W, vd, vj, vk));
+}
+
+/* Emits the `vpickod.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickod_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VPICKOD_D, vd, vj, vk));
+}
+
+/* Emits the `vreplve.b vd, vj, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplve_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k)
+{
+    tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_B, vd, vj, k));
+}
+
+/* Emits the `vreplve.h vd, vj, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplve_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k)
+{
+    tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_H, vd, vj, k));
+}
+
+/* Emits the `vreplve.w vd, vj, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplve_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k)
+{
+    tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_W, vd, vj, k));
+}
+
+/* Emits the `vreplve.d vd, vj, k` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplve_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg k)
+{
+    tcg_out32(s, encode_vdvjk_insn(OPC_VREPLVE_D, vd, vj, k));
+}
+
+/* Emits the `vand.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vand_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VAND_V, vd, vj, vk));
+}
+
+/* Emits the `vor.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vor_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VOR_V, vd, vj, vk));
+}
+
+/* Emits the `vxor.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vxor_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VXOR_V, vd, vj, vk));
+}
+
+/* Emits the `vnor.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vnor_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VNOR_V, vd, vj, vk));
+}
+
+/* Emits the `vandn.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vandn_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VANDN_V, vd, vj, vk));
+}
+
+/* Emits the `vorn.v vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vorn_v(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VORN_V, vd, vj, vk));
+}
+
+/* Emits the `vfrstp.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrstp_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFRSTP_B, vd, vj, vk));
+}
+
+/* Emits the `vfrstp.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrstp_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFRSTP_H, vd, vj, vk));
+}
+
+/* Emits the `vadd.q vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vadd_q(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VADD_Q, vd, vj, vk));
+}
+
+/* Emits the `vsub.q vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsub_q(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSUB_Q, vd, vj, vk));
+}
+
+/* Emits the `vsigncov.b vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsigncov_b(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_B, vd, vj, vk));
+}
+
+/* Emits the `vsigncov.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsigncov_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_H, vd, vj, vk));
+}
+
+/* Emits the `vsigncov.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsigncov_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_W, vd, vj, vk));
+}
+
+/* Emits the `vsigncov.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsigncov_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSIGNCOV_D, vd, vj, vk));
+}
+
+/* Emits the `vfadd.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfadd_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFADD_S, vd, vj, vk));
+}
+
+/* Emits the `vfadd.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfadd_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFADD_D, vd, vj, vk));
+}
+
+/* Emits the `vfsub.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfsub_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFSUB_S, vd, vj, vk));
+}
+
+/* Emits the `vfsub.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfsub_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFSUB_D, vd, vj, vk));
+}
+
+/* Emits the `vfmul.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmul_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMUL_S, vd, vj, vk));
+}
+
+/* Emits the `vfmul.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmul_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMUL_D, vd, vj, vk));
+}
+
+/* Emits the `vfdiv.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfdiv_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFDIV_S, vd, vj, vk));
+}
+
+/* Emits the `vfdiv.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfdiv_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFDIV_D, vd, vj, vk));
+}
+
+/* Emits the `vfmax.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmax_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAX_S, vd, vj, vk));
+}
+
+/* Emits the `vfmax.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmax_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAX_D, vd, vj, vk));
+}
+
+/* Emits the `vfmin.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmin_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMIN_S, vd, vj, vk));
+}
+
+/* Emits the `vfmin.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmin_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMIN_D, vd, vj, vk));
+}
+
+/* Emits the `vfmaxa.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmaxa_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAXA_S, vd, vj, vk));
+}
+
+/* Emits the `vfmaxa.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmaxa_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMAXA_D, vd, vj, vk));
+}
+
+/* Emits the `vfmina.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmina_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMINA_S, vd, vj, vk));
+}
+
+/* Emits the `vfmina.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfmina_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFMINA_D, vd, vj, vk));
+}
+
+/* Emits the `vfcvt.h.s vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvt_h_s(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCVT_H_S, vd, vj, vk));
+}
+
+/* Emits the `vfcvt.s.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvt_s_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFCVT_S_D, vd, vj, vk));
+}
+
+/* Emits the `vffint.s.l vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffint_s_l(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFFINT_S_L, vd, vj, vk));
+}
+
+/* Emits the `vftint.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftint_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINT_W_D, vd, vj, vk));
+}
+
+/* Emits the `vftintrm.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrm_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRM_W_D, vd, vj, vk));
+}
+
+/* Emits the `vftintrp.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrp_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRP_W_D, vd, vj, vk));
+}
+
+/* Emits the `vftintrz.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrz_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRZ_W_D, vd, vj, vk));
+}
+
+/* Emits the `vftintrne.w.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrne_w_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VFTINTRNE_W_D, vd, vj, vk));
+}
+
+/* Emits the `vshuf.h vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf_h(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSHUF_H, vd, vj, vk));
+}
+
+/* Emits the `vshuf.w vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf_w(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSHUF_W, vd, vj, vk));
+}
+
+/* Emits the `vshuf.d vd, vj, vk` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf_d(TCGContext *s, TCGReg vd, TCGReg vj, TCGReg vk)
+{
+    tcg_out32(s, encode_vdvjvk_insn(OPC_VSHUF_D, vd, vj, vk));
+}
+
+/* Emits the `vseqi.b vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseqi_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_B, vd, vj, sk5));
+}
+
+/* Emits the `vseqi.h vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseqi_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_H, vd, vj, sk5));
+}
+
+/* Emits the `vseqi.w vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseqi_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_W, vd, vj, sk5));
+}
+
+/* Emits the `vseqi.d vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseqi_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSEQI_D, vd, vj, sk5));
+}
+
+/* Emits the `vslei.b vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_B, vd, vj, sk5));
+}
+
+/* Emits the `vslei.h vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_H, vd, vj, sk5));
+}
+
+/* Emits the `vslei.w vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_W, vd, vj, sk5));
+}
+
+/* Emits the `vslei.d vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLEI_D, vd, vj, sk5));
+}
+
+/* Emits the `vslei.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vslei.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vslei.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vslei.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslei_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLEI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vslti.b vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_B, vd, vj, sk5));
+}
+
+/* Emits the `vslti.h vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_H, vd, vj, sk5));
+}
+
+/* Emits the `vslti.w vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_W, vd, vj, sk5));
+}
+
+/* Emits the `vslti.d vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VSLTI_D, vd, vj, sk5));
+}
+
+/* Emits the `vslti.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vslti.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vslti.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vslti.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslti_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLTI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vaddi.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddi_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vaddi.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddi_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vaddi.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddi_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vaddi.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vaddi_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VADDI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vsubi.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubi_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vsubi.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubi_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vsubi.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubi_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vsubi.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsubi_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSUBI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vbsll.v vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbsll_v(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VBSLL_V, vd, vj, uk5));
+}
+
+/* Emits the `vbsrl.v vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbsrl_v(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VBSRL_V, vd, vj, uk5));
+}
+
+/* Emits the `vmaxi.b vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_B, vd, vj, sk5));
+}
+
+/* Emits the `vmaxi.h vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_H, vd, vj, sk5));
+}
+
+/* Emits the `vmaxi.w vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_W, vd, vj, sk5));
+}
+
+/* Emits the `vmaxi.d vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMAXI_D, vd, vj, sk5));
+}
+
+/* Emits the `vmini.b vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_b(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_B, vd, vj, sk5));
+}
+
+/* Emits the `vmini.h vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_h(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_H, vd, vj, sk5));
+}
+
+/* Emits the `vmini.w vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_w(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_W, vd, vj, sk5));
+}
+
+/* Emits the `vmini.d vd, vj, sk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_d(TCGContext *s, TCGReg vd, TCGReg vj, int32_t sk5)
+{
+    tcg_out32(s, encode_vdvjsk5_insn(OPC_VMINI_D, vd, vj, sk5));
+}
+
+/* Emits the `vmaxi.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vmaxi.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vmaxi.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vmaxi.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmaxi_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMAXI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vmini.bu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_BU, vd, vj, uk5));
+}
+
+/* Emits the `vmini.hu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_HU, vd, vj, uk5));
+}
+
+/* Emits the `vmini.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_WU, vd, vj, uk5));
+}
+
+/* Emits the `vmini.du vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmini_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VMINI_DU, vd, vj, uk5));
+}
+
+/* Emits the `vfrstpi.b vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrstpi_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VFRSTPI_B, vd, vj, uk5));
+}
+
+/* Emits the `vfrstpi.h vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrstpi_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VFRSTPI_H, vd, vj, uk5));
+}
+
+/* Emits the `vclo.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclo_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLO_B, vd, vj));
+}
+
+/* Emits the `vclo.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclo_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLO_H, vd, vj));
+}
+
+/* Emits the `vclo.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclo_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLO_W, vd, vj));
+}
+
+/* Emits the `vclo.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclo_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLO_D, vd, vj));
+}
+
+/* Emits the `vclz.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclz_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_B, vd, vj));
+}
+
+/* Emits the `vclz.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclz_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_H, vd, vj));
+}
+
+/* Emits the `vclz.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclz_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_W, vd, vj));
+}
+
+/* Emits the `vclz.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vclz_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VCLZ_D, vd, vj));
+}
+
+/* Emits the `vpcnt.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpcnt_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_B, vd, vj));
+}
+
+/* Emits the `vpcnt.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpcnt_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_H, vd, vj));
+}
+
+/* Emits the `vpcnt.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpcnt_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_W, vd, vj));
+}
+
+/* Emits the `vpcnt.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpcnt_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VPCNT_D, vd, vj));
+}
+
+/* Emits the `vneg.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vneg_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VNEG_B, vd, vj));
+}
+
+/* Emits the `vneg.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vneg_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VNEG_H, vd, vj));
+}
+
+/* Emits the `vneg.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vneg_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VNEG_W, vd, vj));
+}
+
+/* Emits the `vneg.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vneg_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VNEG_D, vd, vj));
+}
+
+/* Emits the `vmskltz.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmskltz_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_B, vd, vj));
+}
+
+/* Emits the `vmskltz.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmskltz_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_H, vd, vj));
+}
+
+/* Emits the `vmskltz.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmskltz_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_W, vd, vj));
+}
+
+/* Emits the `vmskltz.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmskltz_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKLTZ_D, vd, vj));
+}
+
+/* Emits the `vmskgez.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmskgez_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKGEZ_B, vd, vj));
+}
+
+/* Emits the `vmsknz.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vmsknz_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VMSKNZ_B, vd, vj));
+}
+
+/* Emits the `vseteqz.v cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vseteqz_v(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETEQZ_V, cd, vj));
+}
+
+/* Emits the `vsetnez.v cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetnez_v(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETNEZ_V, cd, vj));
+}
+
+/* Emits the `vsetanyeqz.b cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetanyeqz_b(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_B, cd, vj));
+}
+
+/* Emits the `vsetanyeqz.h cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetanyeqz_h(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_H, cd, vj));
+}
+
+/* Emits the `vsetanyeqz.w cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetanyeqz_w(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_W, cd, vj));
+}
+
+/* Emits the `vsetanyeqz.d cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetanyeqz_d(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETANYEQZ_D, cd, vj));
+}
+
+/* Emits the `vsetallnez.b cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetallnez_b(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_B, cd, vj));
+}
+
+/* Emits the `vsetallnez.h cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetallnez_h(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_H, cd, vj));
+}
+
+/* Emits the `vsetallnez.w cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetallnez_w(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_W, cd, vj));
+}
+
+/* Emits the `vsetallnez.d cd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsetallnez_d(TCGContext *s, TCGReg cd, TCGReg vj)
+{
+    tcg_out32(s, encode_cdvj_insn(OPC_VSETALLNEZ_D, cd, vj));
+}
+
+/* Emits the `vflogb.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vflogb_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFLOGB_S, vd, vj));
+}
+
+/* Emits the `vflogb.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vflogb_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFLOGB_D, vd, vj));
+}
+
+/* Emits the `vfclass.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfclass_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCLASS_S, vd, vj));
+}
+
+/* Emits the `vfclass.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfclass_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCLASS_D, vd, vj));
+}
+
+/* Emits the `vfsqrt.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfsqrt_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFSQRT_S, vd, vj));
+}
+
+/* Emits the `vfsqrt.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfsqrt_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFSQRT_D, vd, vj));
+}
+
+/* Emits the `vfrecip.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrecip_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRECIP_S, vd, vj));
+}
+
+/* Emits the `vfrecip.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrecip_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRECIP_D, vd, vj));
+}
+
+/* Emits the `vfrsqrt.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrsqrt_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRSQRT_S, vd, vj));
+}
+
+/* Emits the `vfrsqrt.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrsqrt_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRSQRT_D, vd, vj));
+}
+
+/* Emits the `vfrint.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrint_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINT_S, vd, vj));
+}
+
+/* Emits the `vfrint.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrint_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINT_D, vd, vj));
+}
+
+/* Emits the `vfrintrm.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrm_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRM_S, vd, vj));
+}
+
+/* Emits the `vfrintrm.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrm_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRM_D, vd, vj));
+}
+
+/* Emits the `vfrintrp.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrp_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRP_S, vd, vj));
+}
+
+/* Emits the `vfrintrp.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrp_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRP_D, vd, vj));
+}
+
+/* Emits the `vfrintrz.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrz_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRZ_S, vd, vj));
+}
+
+/* Emits the `vfrintrz.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrz_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRZ_D, vd, vj));
+}
+
+/* Emits the `vfrintrne.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrne_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRNE_S, vd, vj));
+}
+
+/* Emits the `vfrintrne.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfrintrne_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFRINTRNE_D, vd, vj));
+}
+
+/* Emits the `vfcvtl.s.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvtl_s_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCVTL_S_H, vd, vj));
+}
+
+/* Emits the `vfcvth.s.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvth_s_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCVTH_S_H, vd, vj));
+}
+
+/* Emits the `vfcvtl.d.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvtl_d_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCVTL_D_S, vd, vj));
+}
+
+/* Emits the `vfcvth.d.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vfcvth_d_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFCVTH_D_S, vd, vj));
+}
+
+/* Emits the `vffint.s.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffint_s_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_S_W, vd, vj));
+}
+
+/* Emits the `vffint.s.wu vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffint_s_wu(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_S_WU, vd, vj));
+}
+
+/* Emits the `vffint.d.l vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffint_d_l(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_D_L, vd, vj));
+}
+
+/* Emits the `vffint.d.lu vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffint_d_lu(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINT_D_LU, vd, vj));
+}
+
+/* Emits the `vffintl.d.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffintl_d_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINTL_D_W, vd, vj));
+}
+
+/* Emits the `vffinth.d.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vffinth_d_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFFINTH_D_W, vd, vj));
+}
+
+/* Emits the `vftint.w.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftint_w_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_W_S, vd, vj));
+}
+
+/* Emits the `vftint.l.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftint_l_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_L_D, vd, vj));
+}
+
+/* Emits the `vftintrm.w.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrm_w_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRM_W_S, vd, vj));
+}
+
+/* Emits the `vftintrm.l.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrm_l_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRM_L_D, vd, vj));
+}
+
+/* Emits the `vftintrp.w.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrp_w_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRP_W_S, vd, vj));
+}
+
+/* Emits the `vftintrp.l.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrp_l_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRP_L_D, vd, vj));
+}
+
+/* Emits the `vftintrz.w.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrz_w_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_W_S, vd, vj));
+}
+
+/* Emits the `vftintrz.l.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrz_l_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_L_D, vd, vj));
+}
+
+/* Emits the `vftintrne.w.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrne_w_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNE_W_S, vd, vj));
+}
+
+/* Emits the `vftintrne.l.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrne_l_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNE_L_D, vd, vj));
+}
+
+/* Emits the `vftint.wu.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftint_wu_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_WU_S, vd, vj));
+}
+
+/* Emits the `vftint.lu.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftint_lu_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINT_LU_D, vd, vj));
+}
+
+/* Emits the `vftintrz.wu.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrz_wu_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_WU_S, vd, vj));
+}
+
+/* Emits the `vftintrz.lu.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrz_lu_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZ_LU_D, vd, vj));
+}
+
+/* Emits the `vftintl.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintl_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTL_L_S, vd, vj));
+}
+
+/* Emits the `vftinth.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftinth_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTH_L_S, vd, vj));
+}
+
+/* Emits the `vftintrml.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrml_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRML_L_S, vd, vj));
+}
+
+/* Emits the `vftintrmh.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrmh_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRMH_L_S, vd, vj));
+}
+
+/* Emits the `vftintrpl.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrpl_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRPL_L_S, vd, vj));
+}
+
+/* Emits the `vftintrph.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrph_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRPH_L_S, vd, vj));
+}
+
+/* Emits the `vftintrzl.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrzl_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZL_L_S, vd, vj));
+}
+
+/* Emits the `vftintrzh.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrzh_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRZH_L_S, vd, vj));
+}
+
+/* Emits the `vftintrnel.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrnel_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNEL_L_S, vd, vj));
+}
+
+/* Emits the `vftintrneh.l.s vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vftintrneh_l_s(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VFTINTRNEH_L_S, vd, vj));
+}
+
+/* Emits the `vexth.h.b vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_h_b(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_H_B, vd, vj));
+}
+
+/* Emits the `vexth.w.h vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_w_h(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_W_H, vd, vj));
+}
+
+/* Emits the `vexth.d.w vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_d_w(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_D_W, vd, vj));
+}
+
+/* Emits the `vexth.q.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_q_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_Q_D, vd, vj));
+}
+
+/* Emits the `vexth.hu.bu vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_HU_BU, vd, vj));
+}
+
+/* Emits the `vexth.wu.hu vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_WU_HU, vd, vj));
+}
+
+/* Emits the `vexth.du.wu vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_du_wu(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_DU_WU, vd, vj));
+}
+
+/* Emits the `vexth.qu.du vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vexth_qu_du(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTH_QU_DU, vd, vj));
+}
+
+/* Emits the `vreplgr2vr.b vd, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplgr2vr_b(TCGContext *s, TCGReg vd, TCGReg j)
+{
+    tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_B, vd, j));
+}
+
+/* Emits the `vreplgr2vr.h vd, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplgr2vr_h(TCGContext *s, TCGReg vd, TCGReg j)
+{
+    tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_H, vd, j));
+}
+
+/* Emits the `vreplgr2vr.w vd, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplgr2vr_w(TCGContext *s, TCGReg vd, TCGReg j)
+{
+    tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_W, vd, j));
+}
+
+/* Emits the `vreplgr2vr.d vd, j` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplgr2vr_d(TCGContext *s, TCGReg vd, TCGReg j)
+{
+    tcg_out32(s, encode_vdj_insn(OPC_VREPLGR2VR_D, vd, j));
+}
+
+/* Emits the `vrotri.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotri_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VROTRI_B, vd, vj, uk3));
+}
+
+/* Emits the `vrotri.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotri_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VROTRI_H, vd, vj, uk4));
+}
+
+/* Emits the `vrotri.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotri_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VROTRI_W, vd, vj, uk5));
+}
+
+/* Emits the `vrotri.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vrotri_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VROTRI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrlri.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlri_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRLRI_B, vd, vj, uk3));
+}
+
+/* Emits the `vsrlri.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlri_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLRI_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrlri.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlri_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLRI_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrlri.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlri_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLRI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrari.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrari_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRARI_B, vd, vj, uk3));
+}
+
+/* Emits the `vsrari.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrari_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRARI_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrari.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrari_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRARI_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrari.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrari_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRARI_D, vd, vj, uk6));
+}
+
+/* Emits the `vinsgr2vr.b vd, j, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vinsgr2vr_b(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdjuk4_insn(OPC_VINSGR2VR_B, vd, j, uk4));
+}
+
+/* Emits the `vinsgr2vr.h vd, j, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vinsgr2vr_h(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdjuk3_insn(OPC_VINSGR2VR_H, vd, j, uk3));
+}
+
+/* Emits the `vinsgr2vr.w vd, j, uk2` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vinsgr2vr_w(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk2)
+{
+    tcg_out32(s, encode_vdjuk2_insn(OPC_VINSGR2VR_W, vd, j, uk2));
+}
+
+/* Emits the `vinsgr2vr.d vd, j, uk1` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vinsgr2vr_d(TCGContext *s, TCGReg vd, TCGReg j, uint32_t uk1)
+{
+    tcg_out32(s, encode_vdjuk1_insn(OPC_VINSGR2VR_D, vd, j, uk1));
+}
+
+/* Emits the `vpickve2gr.b d, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_b(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_dvjuk4_insn(OPC_VPICKVE2GR_B, d, vj, uk4));
+}
+
+/* Emits the `vpickve2gr.h d, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_h(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_dvjuk3_insn(OPC_VPICKVE2GR_H, d, vj, uk3));
+}
+
+/* Emits the `vpickve2gr.w d, vj, uk2` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_w(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk2)
+{
+    tcg_out32(s, encode_dvjuk2_insn(OPC_VPICKVE2GR_W, d, vj, uk2));
+}
+
+/* Emits the `vpickve2gr.d d, vj, uk1` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_d(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk1)
+{
+    tcg_out32(s, encode_dvjuk1_insn(OPC_VPICKVE2GR_D, d, vj, uk1));
+}
+
+/* Emits the `vpickve2gr.bu d, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_bu(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_dvjuk4_insn(OPC_VPICKVE2GR_BU, d, vj, uk4));
+}
+
+/* Emits the `vpickve2gr.hu d, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_hu(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_dvjuk3_insn(OPC_VPICKVE2GR_HU, d, vj, uk3));
+}
+
+/* Emits the `vpickve2gr.wu d, vj, uk2` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_wu(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk2)
+{
+    tcg_out32(s, encode_dvjuk2_insn(OPC_VPICKVE2GR_WU, d, vj, uk2));
+}
+
+/* Emits the `vpickve2gr.du d, vj, uk1` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpickve2gr_du(TCGContext *s, TCGReg d, TCGReg vj, uint32_t uk1)
+{
+    tcg_out32(s, encode_dvjuk1_insn(OPC_VPICKVE2GR_DU, d, vj, uk1));
+}
+
+/* Emits the `vreplvei.b vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplvei_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VREPLVEI_B, vd, vj, uk4));
+}
+
+/* Emits the `vreplvei.h vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplvei_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VREPLVEI_H, vd, vj, uk3));
+}
+
+/* Emits the `vreplvei.w vd, vj, uk2` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplvei_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk2)
+{
+    tcg_out32(s, encode_vdvjuk2_insn(OPC_VREPLVEI_W, vd, vj, uk2));
+}
+
+/* Emits the `vreplvei.d vd, vj, uk1` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vreplvei_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk1)
+{
+    tcg_out32(s, encode_vdvjuk1_insn(OPC_VREPLVEI_D, vd, vj, uk1));
+}
+
+/* Emits the `vsllwil.h.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_h_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSLLWIL_H_B, vd, vj, uk3));
+}
+
+/* Emits the `vsllwil.w.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_w_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSLLWIL_W_H, vd, vj, uk4));
+}
+
+/* Emits the `vsllwil.d.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_d_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLLWIL_D_W, vd, vj, uk5));
+}
+
+/* Emits the `vextl.q.d vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextl_q_d(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTL_Q_D, vd, vj));
+}
+
+/* Emits the `vsllwil.hu.bu vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_hu_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSLLWIL_HU_BU, vd, vj, uk3));
+}
+
+/* Emits the `vsllwil.wu.hu vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_wu_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSLLWIL_WU_HU, vd, vj, uk4));
+}
+
+/* Emits the `vsllwil.du.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsllwil_du_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLLWIL_DU_WU, vd, vj, uk5));
+}
+
+/* Emits the `vextl.qu.du vd, vj` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextl_qu_du(TCGContext *s, TCGReg vd, TCGReg vj)
+{
+    tcg_out32(s, encode_vdvj_insn(OPC_VEXTL_QU_DU, vd, vj));
+}
+
+/* Emits the `vbitclri.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclri_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VBITCLRI_B, vd, vj, uk3));
+}
+
+/* Emits the `vbitclri.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclri_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VBITCLRI_H, vd, vj, uk4));
+}
+
+/* Emits the `vbitclri.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclri_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VBITCLRI_W, vd, vj, uk5));
+}
+
+/* Emits the `vbitclri.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitclri_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VBITCLRI_D, vd, vj, uk6));
+}
+
+/* Emits the `vbitseti.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitseti_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VBITSETI_B, vd, vj, uk3));
+}
+
+/* Emits the `vbitseti.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitseti_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VBITSETI_H, vd, vj, uk4));
+}
+
+/* Emits the `vbitseti.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitseti_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VBITSETI_W, vd, vj, uk5));
+}
+
+/* Emits the `vbitseti.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitseti_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VBITSETI_D, vd, vj, uk6));
+}
+
+/* Emits the `vbitrevi.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrevi_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VBITREVI_B, vd, vj, uk3));
+}
+
+/* Emits the `vbitrevi.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrevi_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VBITREVI_H, vd, vj, uk4));
+}
+
+/* Emits the `vbitrevi.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrevi_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VBITREVI_W, vd, vj, uk5));
+}
+
+/* Emits the `vbitrevi.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitrevi_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VBITREVI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsat.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSAT_B, vd, vj, uk3));
+}
+
+/* Emits the `vsat.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSAT_H, vd, vj, uk4));
+}
+
+/* Emits the `vsat.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSAT_W, vd, vj, uk5));
+}
+
+/* Emits the `vsat.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSAT_D, vd, vj, uk6));
+}
+
+/* Emits the `vsat.bu vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_bu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSAT_BU, vd, vj, uk3));
+}
+
+/* Emits the `vsat.hu vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_hu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSAT_HU, vd, vj, uk4));
+}
+
+/* Emits the `vsat.wu vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_wu(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSAT_WU, vd, vj, uk5));
+}
+
+/* Emits the `vsat.du vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsat_du(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSAT_DU, vd, vj, uk6));
+}
+
+/* Emits the `vslli.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslli_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSLLI_B, vd, vj, uk3));
+}
+
+/* Emits the `vslli.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslli_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSLLI_H, vd, vj, uk4));
+}
+
+/* Emits the `vslli.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslli_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSLLI_W, vd, vj, uk5));
+}
+
+/* Emits the `vslli.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vslli_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSLLI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrli.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrli_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRLI_B, vd, vj, uk3));
+}
+
+/* Emits the `vsrli.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrli_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLI_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrli.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrli_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLI_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrli.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrli_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLI_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrai.b vd, vj, uk3` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrai_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk3)
+{
+    tcg_out32(s, encode_vdvjuk3_insn(OPC_VSRAI_B, vd, vj, uk3));
+}
+
+/* Emits the `vsrai.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrai_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
 {
-    tcg_out32(s, encode_sd10k16_insn(OPC_B, sd10k16));
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRAI_H, vd, vj, uk4));
 }
 
-/* Emits the `bl sd10k16` instruction.  */
+/* Emits the `vsrai.w vd, vj, uk5` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_bl(TCGContext *s, int32_t sd10k16)
+tcg_out_opc_vsrai_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
 {
-    tcg_out32(s, encode_sd10k16_insn(OPC_BL, sd10k16));
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRAI_W, vd, vj, uk5));
 }
 
-/* Emits the `beq d, j, sk16` instruction.  */
+/* Emits the `vsrai.d vd, vj, uk6` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_beq(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+tcg_out_opc_vsrai_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
 {
-    tcg_out32(s, encode_djsk16_insn(OPC_BEQ, d, j, sk16));
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRAI_D, vd, vj, uk6));
 }
 
-/* Emits the `bne d, j, sk16` instruction.  */
+/* Emits the `vsrlni.b.h vd, vj, uk4` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_bne(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+tcg_out_opc_vsrlni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
 {
-    tcg_out32(s, encode_djsk16_insn(OPC_BNE, d, j, sk16));
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLNI_B_H, vd, vj, uk4));
 }
 
-/* Emits the `bgt d, j, sk16` instruction.  */
+/* Emits the `vsrlni.h.w vd, vj, uk5` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_bgt(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+tcg_out_opc_vsrlni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
 {
-    tcg_out32(s, encode_djsk16_insn(OPC_BGT, d, j, sk16));
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLNI_H_W, vd, vj, uk5));
 }
 
-/* Emits the `ble d, j, sk16` instruction.  */
+/* Emits the `vsrlni.w.d vd, vj, uk6` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_ble(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+tcg_out_opc_vsrlni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
 {
-    tcg_out32(s, encode_djsk16_insn(OPC_BLE, d, j, sk16));
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLNI_W_D, vd, vj, uk6));
 }
 
-/* Emits the `bgtu d, j, sk16` instruction.  */
+/* Emits the `vsrlni.d.q vd, vj, uk7` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_bgtu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+tcg_out_opc_vsrlni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
 {
-    tcg_out32(s, encode_djsk16_insn(OPC_BGTU, d, j, sk16));
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRLNI_D_Q, vd, vj, uk7));
 }
 
-/* Emits the `bleu d, j, sk16` instruction.  */
+/* Emits the `vsrlrni.b.h vd, vj, uk4` instruction.  */
 static void __attribute__((unused))
-tcg_out_opc_bleu(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+tcg_out_opc_vsrlrni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
 {
-    tcg_out32(s, encode_djsk16_insn(OPC_BLEU, d, j, sk16));
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRLRNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrlrni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRLRNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrlrni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRLRNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrlrni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrlrni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRLRNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrlni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrlni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrlni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrlni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrlni.bu.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLNI_BU_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrlni.hu.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLNI_HU_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrlni.wu.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLNI_WU_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrlni.du.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlni_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLNI_DU_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrlrni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLRNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrlrni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLRNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrlrni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLRNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrlrni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLRNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrlrni.bu.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRLRNI_BU_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrlrni.hu.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRLRNI_HU_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrlrni.wu.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRLRNI_WU_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrlrni.du.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrlrni_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRLRNI_DU_Q, vd, vj, uk7));
+}
+
+/* Emits the `vsrani.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrani_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRANI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrani.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrani_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRANI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrani.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrani_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRANI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrani.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrani_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRANI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vsrarni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSRARNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vsrarni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSRARNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vsrarni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSRARNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vsrarni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vsrarni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSRARNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrani.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRANI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrani.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRANI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrani.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRANI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrani.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRANI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrani.bu.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRANI_BU_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrani.hu.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRANI_HU_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrani.wu.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRANI_WU_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrani.du.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrani_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRANI_DU_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrarni.b.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_b_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRARNI_B_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrarni.h.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_h_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRARNI_H_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrarni.w.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_w_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRARNI_W_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrarni.d.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_d_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRARNI_D_Q, vd, vj, uk7));
+}
+
+/* Emits the `vssrarni.bu.h vd, vj, uk4` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_bu_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk4)
+{
+    tcg_out32(s, encode_vdvjuk4_insn(OPC_VSSRARNI_BU_H, vd, vj, uk4));
+}
+
+/* Emits the `vssrarni.hu.w vd, vj, uk5` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_hu_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk5)
+{
+    tcg_out32(s, encode_vdvjuk5_insn(OPC_VSSRARNI_HU_W, vd, vj, uk5));
+}
+
+/* Emits the `vssrarni.wu.d vd, vj, uk6` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_wu_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk6)
+{
+    tcg_out32(s, encode_vdvjuk6_insn(OPC_VSSRARNI_WU_D, vd, vj, uk6));
+}
+
+/* Emits the `vssrarni.du.q vd, vj, uk7` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vssrarni_du_q(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk7)
+{
+    tcg_out32(s, encode_vdvjuk7_insn(OPC_VSSRARNI_DU_Q, vd, vj, uk7));
+}
+
+/* Emits the `vextrins.d vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextrins_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_D, vd, vj, uk8));
+}
+
+/* Emits the `vextrins.w vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextrins_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_W, vd, vj, uk8));
+}
+
+/* Emits the `vextrins.h vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextrins_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_H, vd, vj, uk8));
+}
+
+/* Emits the `vextrins.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vextrins_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VEXTRINS_B, vd, vj, uk8));
+}
+
+/* Emits the `vshuf4i.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf4i_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_B, vd, vj, uk8));
+}
+
+/* Emits the `vshuf4i.h vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf4i_h(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_H, vd, vj, uk8));
+}
+
+/* Emits the `vshuf4i.w vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf4i_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_W, vd, vj, uk8));
+}
+
+/* Emits the `vshuf4i.d vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vshuf4i_d(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VSHUF4I_D, vd, vj, uk8));
+}
+
+/* Emits the `vbitseli.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vbitseli_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VBITSELI_B, vd, vj, uk8));
+}
+
+/* Emits the `vandi.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vandi_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VANDI_B, vd, vj, uk8));
+}
+
+/* Emits the `vori.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vori_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VORI_B, vd, vj, uk8));
+}
+
+/* Emits the `vxori.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vxori_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VXORI_B, vd, vj, uk8));
+}
+
+/* Emits the `vnori.b vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vnori_b(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VNORI_B, vd, vj, uk8));
+}
+
+/* Emits the `vldi vd, sj13` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vldi(TCGContext *s, TCGReg vd, int32_t sj13)
+{
+    tcg_out32(s, encode_vdsj13_insn(OPC_VLDI, vd, sj13));
+}
+
+/* Emits the `vpermi.w vd, vj, uk8` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_vpermi_w(TCGContext *s, TCGReg vd, TCGReg vj, uint32_t uk8)
+{
+    tcg_out32(s, encode_vdvjuk8_insn(OPC_VPERMI_W, vd, vj, uk8));
 }
 
 /* End of generated code.  */
index c2bde44613a65c1e91a775cfd5389993bbc964fc..77d62e38e747113c46856800ece3aac60686c3d4 100644 (file)
 C_O0_I1(r)
 C_O0_I2(rZ, r)
 C_O0_I2(rZ, rZ)
+C_O0_I2(w, r)
+C_O0_I3(r, r, r)
 C_O1_I1(r, r)
+C_O1_I1(w, r)
+C_O1_I1(w, w)
 C_O1_I2(r, r, rC)
 C_O1_I2(r, r, ri)
 C_O1_I2(r, r, rI)
@@ -29,4 +33,9 @@ C_O1_I2(r, 0, rZ)
 C_O1_I2(r, rZ, ri)
 C_O1_I2(r, rZ, rJ)
 C_O1_I2(r, rZ, rZ)
+C_O1_I2(w, w, w)
+C_O1_I2(w, w, wM)
+C_O1_I2(w, w, wA)
+C_O1_I3(w, w, w, w)
 C_O1_I4(r, rZ, rJ, rZ, rZ)
+C_O2_I1(r, r, r)
index 6e9ccca3ad70a85c8084ac25fe3598afc2938eaa..2ba9c135ac19ed47a3dc80b08d4d1d4c70196d9f 100644 (file)
@@ -14,6 +14,7 @@
  * REGS(letter, register_mask)
  */
 REGS('r', ALL_GENERAL_REGS)
+REGS('w', ALL_VECTOR_REGS)
 
 /*
  * Define constraint letters for constants:
@@ -25,3 +26,5 @@ CONST('U', TCG_CT_CONST_U12)
 CONST('Z', TCG_CT_CONST_ZERO)
 CONST('C', TCG_CT_CONST_C12)
 CONST('W', TCG_CT_CONST_WSZ)
+CONST('M', TCG_CT_CONST_VCMP)
+CONST('A', TCG_CT_CONST_VADD)
index baf5fc38195d923a1b75f9562bbb0d25966b36ce..b701df50db82ae428b38fb8c603813f01a999f9d 100644 (file)
@@ -32,6 +32,8 @@
 #include "../tcg-ldst.c.inc"
 #include <asm/hwcap.h>
 
+bool use_lsx_instructions;
+
 #ifdef CONFIG_DEBUG_TCG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     "zero",
@@ -65,7 +67,39 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     "s5",
     "s6",
     "s7",
-    "s8"
+    "s8",
+    "vr0",
+    "vr1",
+    "vr2",
+    "vr3",
+    "vr4",
+    "vr5",
+    "vr6",
+    "vr7",
+    "vr8",
+    "vr9",
+    "vr10",
+    "vr11",
+    "vr12",
+    "vr13",
+    "vr14",
+    "vr15",
+    "vr16",
+    "vr17",
+    "vr18",
+    "vr19",
+    "vr20",
+    "vr21",
+    "vr22",
+    "vr23",
+    "vr24",
+    "vr25",
+    "vr26",
+    "vr27",
+    "vr28",
+    "vr29",
+    "vr30",
+    "vr31",
 };
 #endif
 
@@ -102,6 +136,15 @@ static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_A2,
     TCG_REG_A1,
     TCG_REG_A0,
+
+    /* Vector registers */
+    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+    TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
+    TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+    /* V24 - V31 are caller-saved, and skipped.  */
 };
 
 static const int tcg_target_call_iarg_regs[] = {
@@ -133,8 +176,11 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
 #define TCG_CT_CONST_U12   0x800
 #define TCG_CT_CONST_C12   0x1000
 #define TCG_CT_CONST_WSZ   0x2000
+#define TCG_CT_CONST_VCMP  0x4000
+#define TCG_CT_CONST_VADD  0x8000
 
 #define ALL_GENERAL_REGS   MAKE_64BIT_MASK(0, 32)
+#define ALL_VECTOR_REGS    MAKE_64BIT_MASK(32, 32)
 
 static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
 {
@@ -142,7 +188,7 @@ static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
 }
 
 /* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return true;
@@ -165,6 +211,13 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
     if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
         return true;
     }
+    int64_t vec_val = sextract64(val, 0, 8 << vece);
+    if ((ct & TCG_CT_CONST_VCMP) && -0x10 <= vec_val && vec_val <= 0x1f) {
+        return true;
+    }
+    if ((ct & TCG_CT_CONST_VADD) && -0x1f <= vec_val && vec_val <= 0x1f) {
+        return true;
+    }
     return false;
 }
 
@@ -1028,6 +1081,48 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
     }
 }
 
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg data_lo, TCGReg data_hi,
+                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
+{
+    TCGLabelQemuLdst *ldst;
+    HostAddress h;
+
+    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
+
+    if (h.aa.atom == MO_128) {
+        /*
+         * Use VLDX/VSTX when 128-bit atomicity is required.
+         * If address is aligned to 16-bytes, the 128-bit load/store is atomic.
+         */
+        if (is_ld) {
+            tcg_out_opc_vldx(s, TCG_VEC_TMP0, h.base, h.index);
+            tcg_out_opc_vpickve2gr_d(s, data_lo, TCG_VEC_TMP0, 0);
+            tcg_out_opc_vpickve2gr_d(s, data_hi, TCG_VEC_TMP0, 1);
+        } else {
+            tcg_out_opc_vinsgr2vr_d(s, TCG_VEC_TMP0, data_lo, 0);
+            tcg_out_opc_vinsgr2vr_d(s, TCG_VEC_TMP0, data_hi, 1);
+            tcg_out_opc_vstx(s, TCG_VEC_TMP0, h.base, h.index);
+        }
+    } else {
+        /* Otherwise use a pair of LD/ST. */
+        tcg_out_opc_add_d(s, TCG_REG_TMP0, h.base, h.index);
+        if (is_ld) {
+            tcg_out_opc_ld_d(s, data_lo, TCG_REG_TMP0, 0);
+            tcg_out_opc_ld_d(s, data_hi, TCG_REG_TMP0, 8);
+        } else {
+            tcg_out_opc_st_d(s, data_lo, TCG_REG_TMP0, 0);
+            tcg_out_opc_st_d(s, data_hi, TCG_REG_TMP0, 8);
+        }
+    }
+
+    if (ldst) {
+        ldst->type = TCG_TYPE_I128;
+        ldst->datalo_reg = data_lo;
+        ldst->datahi_reg = data_hi;
+        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
+    }
+}
+
 /*
  * Entry-points
  */
@@ -1092,6 +1187,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     TCGArg a0 = args[0];
     TCGArg a1 = args[1];
     TCGArg a2 = args[2];
+    TCGArg a3 = args[3];
     int c2 = const_args[2];
 
     switch (opc) {
@@ -1454,6 +1550,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_qemu_ld_a64_i64:
         tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
         break;
+    case INDEX_op_qemu_ld_a32_i128:
+    case INDEX_op_qemu_ld_a64_i128:
+        tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, true);
+        break;
     case INDEX_op_qemu_st_a32_i32:
     case INDEX_op_qemu_st_a64_i32:
         tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
@@ -1462,6 +1562,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_qemu_st_a64_i64:
         tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
         break;
+    case INDEX_op_qemu_st_a32_i128:
+    case INDEX_op_qemu_st_a64_i128:
+        tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, false);
+        break;
 
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
@@ -1486,6 +1590,444 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     }
 }
 
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+                            TCGReg rd, TCGReg rs)
+{
+    switch (vece) {
+    case MO_8:
+        tcg_out_opc_vreplgr2vr_b(s, rd, rs);
+        break;
+    case MO_16:
+        tcg_out_opc_vreplgr2vr_h(s, rd, rs);
+        break;
+    case MO_32:
+        tcg_out_opc_vreplgr2vr_w(s, rd, rs);
+        break;
+    case MO_64:
+        tcg_out_opc_vreplgr2vr_d(s, rd, rs);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return true;
+}
+
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+                             TCGReg r, TCGReg base, intptr_t offset)
+{
+    /* Handle imm overflow and division (vldrepl.d imm is divided by 8) */
+    if (offset < -0x800 || offset > 0x7ff || \
+        (offset & ((1 << vece) - 1)) != 0) {
+        tcg_out_addi(s, TCG_TYPE_I64, TCG_REG_TMP0, base, offset);
+        base = TCG_REG_TMP0;
+        offset = 0;
+    }
+    offset >>= vece;
+
+    switch (vece) {
+    case MO_8:
+        tcg_out_opc_vldrepl_b(s, r, base, offset);
+        break;
+    case MO_16:
+        tcg_out_opc_vldrepl_h(s, r, base, offset);
+        break;
+    case MO_32:
+        tcg_out_opc_vldrepl_w(s, r, base, offset);
+        break;
+    case MO_64:
+        tcg_out_opc_vldrepl_d(s, r, base, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return true;
+}
+
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+                             TCGReg rd, int64_t v64)
+{
+    /* Try vldi if imm can fit */
+    int64_t value = sextract64(v64, 0, 8 << vece);
+    if (-0x200 <= value && value <= 0x1FF) {
+        uint32_t imm = (vece << 10) | ((uint32_t)v64 & 0x3FF);
+        tcg_out_opc_vldi(s, rd, imm);
+        return;
+    }
+
+    /* TODO: vldi patterns when imm 12 is set */
+
+    /* Fallback to vreplgr2vr */
+    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, value);
+    switch (vece) {
+    case MO_8:
+        tcg_out_opc_vreplgr2vr_b(s, rd, TCG_REG_TMP0);
+        break;
+    case MO_16:
+        tcg_out_opc_vreplgr2vr_h(s, rd, TCG_REG_TMP0);
+        break;
+    case MO_32:
+        tcg_out_opc_vreplgr2vr_w(s, rd, TCG_REG_TMP0);
+        break;
+    case MO_64:
+        tcg_out_opc_vreplgr2vr_d(s, rd, TCG_REG_TMP0);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_addsub_vec(TCGContext *s, unsigned vece, const TCGArg a0,
+                               const TCGArg a1, const TCGArg a2,
+                               bool a2_is_const, bool is_add)
+{
+    static const LoongArchInsn add_vec_insn[4] = {
+        OPC_VADD_B, OPC_VADD_H, OPC_VADD_W, OPC_VADD_D
+    };
+    static const LoongArchInsn add_vec_imm_insn[4] = {
+        OPC_VADDI_BU, OPC_VADDI_HU, OPC_VADDI_WU, OPC_VADDI_DU
+    };
+    static const LoongArchInsn sub_vec_insn[4] = {
+        OPC_VSUB_B, OPC_VSUB_H, OPC_VSUB_W, OPC_VSUB_D
+    };
+    static const LoongArchInsn sub_vec_imm_insn[4] = {
+        OPC_VSUBI_BU, OPC_VSUBI_HU, OPC_VSUBI_WU, OPC_VSUBI_DU
+    };
+
+    if (a2_is_const) {
+        int64_t value = sextract64(a2, 0, 8 << vece);
+        if (!is_add) {
+            value = -value;
+        }
+
+        /* Try vaddi/vsubi */
+        if (0 <= value && value <= 0x1f) {
+            tcg_out32(s, encode_vdvjuk5_insn(add_vec_imm_insn[vece], a0, \
+                                             a1, value));
+            return;
+        } else if (-0x1f <= value && value < 0) {
+            tcg_out32(s, encode_vdvjuk5_insn(sub_vec_imm_insn[vece], a0, \
+                                             a1, -value));
+            return;
+        }
+
+        /* constraint TCG_CT_CONST_VADD ensures unreachable */
+        g_assert_not_reached();
+    }
+
+    if (is_add) {
+        tcg_out32(s, encode_vdvjvk_insn(add_vec_insn[vece], a0, a1, a2));
+    } else {
+        tcg_out32(s, encode_vdvjvk_insn(sub_vec_insn[vece], a0, a1, a2));
+    }
+}
+
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+                           unsigned vecl, unsigned vece,
+                           const TCGArg args[TCG_MAX_OP_ARGS],
+                           const int const_args[TCG_MAX_OP_ARGS])
+{
+    TCGType type = vecl + TCG_TYPE_V64;
+    TCGArg a0, a1, a2, a3;
+    TCGReg temp = TCG_REG_TMP0;
+    TCGReg temp_vec = TCG_VEC_TMP0;
+
+    static const LoongArchInsn cmp_vec_insn[16][4] = {
+        [TCG_COND_EQ] = {OPC_VSEQ_B, OPC_VSEQ_H, OPC_VSEQ_W, OPC_VSEQ_D},
+        [TCG_COND_LE] = {OPC_VSLE_B, OPC_VSLE_H, OPC_VSLE_W, OPC_VSLE_D},
+        [TCG_COND_LEU] = {OPC_VSLE_BU, OPC_VSLE_HU, OPC_VSLE_WU, OPC_VSLE_DU},
+        [TCG_COND_LT] = {OPC_VSLT_B, OPC_VSLT_H, OPC_VSLT_W, OPC_VSLT_D},
+        [TCG_COND_LTU] = {OPC_VSLT_BU, OPC_VSLT_HU, OPC_VSLT_WU, OPC_VSLT_DU},
+    };
+    static const LoongArchInsn cmp_vec_imm_insn[16][4] = {
+        [TCG_COND_EQ] = {OPC_VSEQI_B, OPC_VSEQI_H, OPC_VSEQI_W, OPC_VSEQI_D},
+        [TCG_COND_LE] = {OPC_VSLEI_B, OPC_VSLEI_H, OPC_VSLEI_W, OPC_VSLEI_D},
+        [TCG_COND_LEU] = {OPC_VSLEI_BU, OPC_VSLEI_HU, OPC_VSLEI_WU, OPC_VSLEI_DU},
+        [TCG_COND_LT] = {OPC_VSLTI_B, OPC_VSLTI_H, OPC_VSLTI_W, OPC_VSLTI_D},
+        [TCG_COND_LTU] = {OPC_VSLTI_BU, OPC_VSLTI_HU, OPC_VSLTI_WU, OPC_VSLTI_DU},
+    };
+    LoongArchInsn insn;
+    static const LoongArchInsn neg_vec_insn[4] = {
+        OPC_VNEG_B, OPC_VNEG_H, OPC_VNEG_W, OPC_VNEG_D
+    };
+    static const LoongArchInsn mul_vec_insn[4] = {
+        OPC_VMUL_B, OPC_VMUL_H, OPC_VMUL_W, OPC_VMUL_D
+    };
+    static const LoongArchInsn smin_vec_insn[4] = {
+        OPC_VMIN_B, OPC_VMIN_H, OPC_VMIN_W, OPC_VMIN_D
+    };
+    static const LoongArchInsn umin_vec_insn[4] = {
+        OPC_VMIN_BU, OPC_VMIN_HU, OPC_VMIN_WU, OPC_VMIN_DU
+    };
+    static const LoongArchInsn smax_vec_insn[4] = {
+        OPC_VMAX_B, OPC_VMAX_H, OPC_VMAX_W, OPC_VMAX_D
+    };
+    static const LoongArchInsn umax_vec_insn[4] = {
+        OPC_VMAX_BU, OPC_VMAX_HU, OPC_VMAX_WU, OPC_VMAX_DU
+    };
+    static const LoongArchInsn ssadd_vec_insn[4] = {
+        OPC_VSADD_B, OPC_VSADD_H, OPC_VSADD_W, OPC_VSADD_D
+    };
+    static const LoongArchInsn usadd_vec_insn[4] = {
+        OPC_VSADD_BU, OPC_VSADD_HU, OPC_VSADD_WU, OPC_VSADD_DU
+    };
+    static const LoongArchInsn sssub_vec_insn[4] = {
+        OPC_VSSUB_B, OPC_VSSUB_H, OPC_VSSUB_W, OPC_VSSUB_D
+    };
+    static const LoongArchInsn ussub_vec_insn[4] = {
+        OPC_VSSUB_BU, OPC_VSSUB_HU, OPC_VSSUB_WU, OPC_VSSUB_DU
+    };
+    static const LoongArchInsn shlv_vec_insn[4] = {
+        OPC_VSLL_B, OPC_VSLL_H, OPC_VSLL_W, OPC_VSLL_D
+    };
+    static const LoongArchInsn shrv_vec_insn[4] = {
+        OPC_VSRL_B, OPC_VSRL_H, OPC_VSRL_W, OPC_VSRL_D
+    };
+    static const LoongArchInsn sarv_vec_insn[4] = {
+        OPC_VSRA_B, OPC_VSRA_H, OPC_VSRA_W, OPC_VSRA_D
+    };
+    static const LoongArchInsn shli_vec_insn[4] = {
+        OPC_VSLLI_B, OPC_VSLLI_H, OPC_VSLLI_W, OPC_VSLLI_D
+    };
+    static const LoongArchInsn shri_vec_insn[4] = {
+        OPC_VSRLI_B, OPC_VSRLI_H, OPC_VSRLI_W, OPC_VSRLI_D
+    };
+    static const LoongArchInsn sari_vec_insn[4] = {
+        OPC_VSRAI_B, OPC_VSRAI_H, OPC_VSRAI_W, OPC_VSRAI_D
+    };
+    static const LoongArchInsn rotrv_vec_insn[4] = {
+        OPC_VROTR_B, OPC_VROTR_H, OPC_VROTR_W, OPC_VROTR_D
+    };
+
+    a0 = args[0];
+    a1 = args[1];
+    a2 = args[2];
+    a3 = args[3];
+
+    /* Currently only supports V128 */
+    tcg_debug_assert(type == TCG_TYPE_V128);
+
+    switch (opc) {
+    case INDEX_op_st_vec:
+        /* Try to fit vst imm */
+        if (-0x800 <= a2 && a2 <= 0x7ff) {
+            tcg_out_opc_vst(s, a0, a1, a2);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
+            tcg_out_opc_vstx(s, a0, a1, temp);
+        }
+        break;
+    case INDEX_op_ld_vec:
+        /* Try to fit vld imm */
+        if (-0x800 <= a2 && a2 <= 0x7ff) {
+            tcg_out_opc_vld(s, a0, a1, a2);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
+            tcg_out_opc_vldx(s, a0, a1, temp);
+        }
+        break;
+    case INDEX_op_and_vec:
+        tcg_out_opc_vand_v(s, a0, a1, a2);
+        break;
+    case INDEX_op_andc_vec:
+        /*
+         * vandn vd, vj, vk: vd = vk & ~vj
+         * andc_vec vd, vj, vk: vd = vj & ~vk
+         * vk and vk are swapped
+         */
+        tcg_out_opc_vandn_v(s, a0, a2, a1);
+        break;
+    case INDEX_op_or_vec:
+        tcg_out_opc_vor_v(s, a0, a1, a2);
+        break;
+    case INDEX_op_orc_vec:
+        tcg_out_opc_vorn_v(s, a0, a1, a2);
+        break;
+    case INDEX_op_xor_vec:
+        tcg_out_opc_vxor_v(s, a0, a1, a2);
+        break;
+    case INDEX_op_nor_vec:
+        tcg_out_opc_vnor_v(s, a0, a1, a2);
+        break;
+    case INDEX_op_not_vec:
+        tcg_out_opc_vnor_v(s, a0, a1, a1);
+        break;
+    case INDEX_op_cmp_vec:
+        TCGCond cond = args[3];
+        if (const_args[2]) {
+            /*
+             * cmp_vec dest, src, value
+             * Try vseqi/vslei/vslti
+             */
+            int64_t value = sextract64(a2, 0, 8 << vece);
+            if ((cond == TCG_COND_EQ || cond == TCG_COND_LE || \
+                 cond == TCG_COND_LT) && (-0x10 <= value && value <= 0x0f)) {
+                tcg_out32(s, encode_vdvjsk5_insn(cmp_vec_imm_insn[cond][vece], \
+                                                 a0, a1, value));
+                break;
+            } else if ((cond == TCG_COND_LEU || cond == TCG_COND_LTU) &&
+                (0x00 <= value && value <= 0x1f)) {
+                tcg_out32(s, encode_vdvjuk5_insn(cmp_vec_imm_insn[cond][vece], \
+                                                 a0, a1, value));
+                break;
+            }
+
+            /*
+             * Fallback to:
+             * dupi_vec temp, a2
+             * cmp_vec a0, a1, temp, cond
+             */
+            tcg_out_dupi_vec(s, type, vece, temp_vec, a2);
+            a2 = temp_vec;
+        }
+
+        insn = cmp_vec_insn[cond][vece];
+        if (insn == 0) {
+            TCGArg t;
+            t = a1, a1 = a2, a2 = t;
+            cond = tcg_swap_cond(cond);
+            insn = cmp_vec_insn[cond][vece];
+            tcg_debug_assert(insn != 0);
+        }
+        tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
+        break;
+    case INDEX_op_add_vec:
+        tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], true);
+        break;
+    case INDEX_op_sub_vec:
+        tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], false);
+        break;
+    case INDEX_op_neg_vec:
+        tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], a0, a1));
+        break;
+    case INDEX_op_mul_vec:
+        tcg_out32(s, encode_vdvjvk_insn(mul_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_smin_vec:
+        tcg_out32(s, encode_vdvjvk_insn(smin_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_smax_vec:
+        tcg_out32(s, encode_vdvjvk_insn(smax_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_umin_vec:
+        tcg_out32(s, encode_vdvjvk_insn(umin_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_umax_vec:
+        tcg_out32(s, encode_vdvjvk_insn(umax_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_ssadd_vec:
+        tcg_out32(s, encode_vdvjvk_insn(ssadd_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_usadd_vec:
+        tcg_out32(s, encode_vdvjvk_insn(usadd_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_sssub_vec:
+        tcg_out32(s, encode_vdvjvk_insn(sssub_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_ussub_vec:
+        tcg_out32(s, encode_vdvjvk_insn(ussub_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_shlv_vec:
+        tcg_out32(s, encode_vdvjvk_insn(shlv_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_shrv_vec:
+        tcg_out32(s, encode_vdvjvk_insn(shrv_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_sarv_vec:
+        tcg_out32(s, encode_vdvjvk_insn(sarv_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_shli_vec:
+        tcg_out32(s, encode_vdvjuk3_insn(shli_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_shri_vec:
+        tcg_out32(s, encode_vdvjuk3_insn(shri_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_sari_vec:
+        tcg_out32(s, encode_vdvjuk3_insn(sari_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_rotrv_vec:
+        tcg_out32(s, encode_vdvjvk_insn(rotrv_vec_insn[vece], a0, a1, a2));
+        break;
+    case INDEX_op_rotlv_vec:
+        /* rotlv_vec a1, a2 = rotrv_vec a1, -a2 */
+        tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], temp_vec, a2));
+        tcg_out32(s, encode_vdvjvk_insn(rotrv_vec_insn[vece], a0, a1,
+                                        temp_vec));
+        break;
+    case INDEX_op_rotli_vec:
+        /* rotli_vec a1, a2 = rotri_vec a1, -a2 */
+        a2 = extract32(-a2, 0, 3 + vece);
+        switch (vece) {
+        case MO_8:
+            tcg_out_opc_vrotri_b(s, a0, a1, a2);
+            break;
+        case MO_16:
+            tcg_out_opc_vrotri_h(s, a0, a1, a2);
+            break;
+        case MO_32:
+            tcg_out_opc_vrotri_w(s, a0, a1, a2);
+            break;
+        case MO_64:
+            tcg_out_opc_vrotri_d(s, a0, a1, a2);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        break;
+    case INDEX_op_bitsel_vec:
+        /* vbitsel vd, vj, vk, va = bitsel_vec vd, va, vk, vj */
+        tcg_out_opc_vbitsel_v(s, a0, a3, a2, a1);
+        break;
+    case INDEX_op_dupm_vec:
+        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+{
+    switch (opc) {
+    case INDEX_op_ld_vec:
+    case INDEX_op_st_vec:
+    case INDEX_op_dup_vec:
+    case INDEX_op_dupm_vec:
+    case INDEX_op_cmp_vec:
+    case INDEX_op_add_vec:
+    case INDEX_op_sub_vec:
+    case INDEX_op_and_vec:
+    case INDEX_op_andc_vec:
+    case INDEX_op_or_vec:
+    case INDEX_op_orc_vec:
+    case INDEX_op_xor_vec:
+    case INDEX_op_nor_vec:
+    case INDEX_op_not_vec:
+    case INDEX_op_neg_vec:
+    case INDEX_op_mul_vec:
+    case INDEX_op_smin_vec:
+    case INDEX_op_smax_vec:
+    case INDEX_op_umin_vec:
+    case INDEX_op_umax_vec:
+    case INDEX_op_ssadd_vec:
+    case INDEX_op_usadd_vec:
+    case INDEX_op_sssub_vec:
+    case INDEX_op_ussub_vec:
+    case INDEX_op_shlv_vec:
+    case INDEX_op_shrv_vec:
+    case INDEX_op_sarv_vec:
+    case INDEX_op_bitsel_vec:
+        return 1;
+    default:
+        return 0;
+    }
+}
+
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+                       TCGArg a0, ...)
+{
+    g_assert_not_reached();
+}
+
 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 {
     switch (op) {
@@ -1505,6 +2047,14 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_qemu_st_a64_i64:
         return C_O0_I2(rZ, r);
 
+    case INDEX_op_qemu_ld_a32_i128:
+    case INDEX_op_qemu_ld_a64_i128:
+        return C_O2_I1(r, r, r);
+
+    case INDEX_op_qemu_st_a32_i128:
+    case INDEX_op_qemu_st_a64_i128:
+        return C_O0_I3(r, r, r);
+
     case INDEX_op_brcond_i32:
     case INDEX_op_brcond_i64:
         return C_O0_I2(rZ, rZ);
@@ -1627,6 +2177,54 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_movcond_i64:
         return C_O1_I4(r, rZ, rJ, rZ, rZ);
 
+    case INDEX_op_ld_vec:
+    case INDEX_op_dupm_vec:
+    case INDEX_op_dup_vec:
+        return C_O1_I1(w, r);
+
+    case INDEX_op_st_vec:
+        return C_O0_I2(w, r);
+
+    case INDEX_op_cmp_vec:
+        return C_O1_I2(w, w, wM);
+
+    case INDEX_op_add_vec:
+    case INDEX_op_sub_vec:
+        return C_O1_I2(w, w, wA);
+
+    case INDEX_op_and_vec:
+    case INDEX_op_andc_vec:
+    case INDEX_op_or_vec:
+    case INDEX_op_orc_vec:
+    case INDEX_op_xor_vec:
+    case INDEX_op_nor_vec:
+    case INDEX_op_mul_vec:
+    case INDEX_op_smin_vec:
+    case INDEX_op_smax_vec:
+    case INDEX_op_umin_vec:
+    case INDEX_op_umax_vec:
+    case INDEX_op_ssadd_vec:
+    case INDEX_op_usadd_vec:
+    case INDEX_op_sssub_vec:
+    case INDEX_op_ussub_vec:
+    case INDEX_op_shlv_vec:
+    case INDEX_op_shrv_vec:
+    case INDEX_op_sarv_vec:
+    case INDEX_op_rotrv_vec:
+    case INDEX_op_rotlv_vec:
+        return C_O1_I2(w, w, w);
+
+    case INDEX_op_not_vec:
+    case INDEX_op_neg_vec:
+    case INDEX_op_shli_vec:
+    case INDEX_op_shri_vec:
+    case INDEX_op_sari_vec:
+    case INDEX_op_rotli_vec:
+        return C_O1_I1(w, w);
+
+    case INDEX_op_bitsel_vec:
+        return C_O1_I3(w, w, w, w);
+
     default:
         g_assert_not_reached();
     }
@@ -1698,6 +2296,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_RA, 0);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 static void tcg_target_init(TCGContext *s)
 {
     unsigned long hwcap = qemu_getauxval(AT_HWCAP);
@@ -1708,6 +2311,10 @@ static void tcg_target_init(TCGContext *s)
         exit(EXIT_FAILURE);
     }
 
+    if (hwcap & HWCAP_LOONGARCH_LSX) {
+        use_lsx_instructions = 1;
+    }
+
     tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
     tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
 
@@ -1723,6 +2330,18 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
 
+    if (use_lsx_instructions) {
+        tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
+        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
+    }
+
     s->reserved_regs = 0;
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
@@ -1731,6 +2350,7 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED);
+    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
 }
 
 typedef struct {
index 559be67186633dc30de1ed975f79e1a8fc6d18d9..03017672f64896f44b79d8c09654aea1a53bb6a5 100644 (file)
@@ -30,7 +30,7 @@
 #define LOONGARCH_TCG_TARGET_H
 
 #define TCG_TARGET_INSN_UNIT_SIZE 4
-#define TCG_TARGET_NB_REGS 32
+#define TCG_TARGET_NB_REGS 64
 
 #define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 
@@ -68,13 +68,25 @@ typedef enum {
     TCG_REG_S7,
     TCG_REG_S8,
 
+    TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+    TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
+    TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
+
     /* aliases */
     TCG_AREG0    = TCG_REG_S0,
     TCG_REG_TMP0 = TCG_REG_T8,
     TCG_REG_TMP1 = TCG_REG_T7,
     TCG_REG_TMP2 = TCG_REG_T6,
+    TCG_VEC_TMP0 = TCG_REG_V23,
 } TCGReg;
 
+extern bool use_lsx_instructions;
+
 /* used for function call generation */
 #define TCG_REG_CALL_STACK              TCG_REG_SP
 #define TCG_TARGET_STACK_ALIGN          16
@@ -159,7 +171,31 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i64        1
 #define TCG_TARGET_HAS_mulsh_i64        1
 
-#define TCG_TARGET_HAS_qemu_ldst_i128   0
+#define TCG_TARGET_HAS_qemu_ldst_i128   use_lsx_instructions
+
+#define TCG_TARGET_HAS_v64              0
+#define TCG_TARGET_HAS_v128             use_lsx_instructions
+#define TCG_TARGET_HAS_v256             0
+
+#define TCG_TARGET_HAS_not_vec          1
+#define TCG_TARGET_HAS_neg_vec          1
+#define TCG_TARGET_HAS_abs_vec          0
+#define TCG_TARGET_HAS_andc_vec         1
+#define TCG_TARGET_HAS_orc_vec          1
+#define TCG_TARGET_HAS_nand_vec         0
+#define TCG_TARGET_HAS_nor_vec          1
+#define TCG_TARGET_HAS_eqv_vec          0
+#define TCG_TARGET_HAS_mul_vec          1
+#define TCG_TARGET_HAS_shi_vec          1
+#define TCG_TARGET_HAS_shs_vec          0
+#define TCG_TARGET_HAS_shv_vec          1
+#define TCG_TARGET_HAS_roti_vec         1
+#define TCG_TARGET_HAS_rots_vec         0
+#define TCG_TARGET_HAS_rotv_vec         1
+#define TCG_TARGET_HAS_sat_vec          1
+#define TCG_TARGET_HAS_minmax_vec       1
+#define TCG_TARGET_HAS_bitsel_vec       1
+#define TCG_TARGET_HAS_cmpsel_vec       0
 
 #define TCG_TARGET_DEFAULT_MO (0)
 
diff --git a/tcg/loongarch64/tcg-target.opc.h b/tcg/loongarch64/tcg-target.opc.h
new file mode 100644 (file)
index 0000000..fd1a40b
--- /dev/null
@@ -0,0 +1,12 @@
+/*
+ * Copyright (c) 2023 Jiajie Chen
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ * Target-specific opcodes for host vector expansion.  These will be
+ * emitted by tcg_expand_vec_op.  For those familiar with GCC internals,
+ * consider these to be UNSPEC with names.
+ */
index 9faa8bdf0bf6a9a572837028a3d531fd04e2c1f1..f52bda482850c2ec8fc608ca99a966fb0d4828c1 100644 (file)
@@ -190,7 +190,7 @@ static bool is_p2m1(tcg_target_long val)
 }
 
 /* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -2628,6 +2628,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 static void tcg_target_init(TCGContext *s)
 {
     tcg_target_detect_isa();
index 090f11e71cdc0cdb0af2a8c379a95d9170253f3e..90d76c2c2c75298928509a5f03b1a649df210d9b 100644 (file)
@@ -261,7 +261,7 @@ static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
 }
 
 /* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -2527,6 +2527,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out32(s, BCLR | BO_ALWAYS);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
 {
     tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
index 2b28ed3556039d86f16c29ea17934c2ebcfa2bc4..a07889909642e0cad6f9199832e1e0e0902a3443 100644 (file)
 #include "tcg/tcg.h"
 #include "exec/translation-block.h"
 #include "tcg-internal.h"
+#include "host/cpuinfo.h"
 
 
+/*
+ * Local source-level compatibility with Unix.
+ * Used by tcg_region_init below.
+ */
+#if defined(_WIN32)
+#define PROT_READ   1
+#define PROT_WRITE  2
+#define PROT_EXEC   4
+#endif
+
 struct tcg_region_tree {
     QemuMutex lock;
     QTree *tree;
@@ -83,6 +94,18 @@ bool in_code_gen_buffer(const void *p)
     return (size_t)(p - region.start_aligned) <= region.total_size;
 }
 
+#ifndef CONFIG_TCG_INTERPRETER
+static int host_prot_read_exec(void)
+{
+#if defined(CONFIG_LINUX) && defined(HOST_AARCH64) && defined(PROT_BTI)
+    if (cpuinfo & CPUINFO_BTI) {
+        return PROT_READ | PROT_EXEC | PROT_BTI;
+    }
+#endif
+    return PROT_READ | PROT_EXEC;
+}
+#endif
+
 #ifdef CONFIG_DEBUG_TCG
 const void *tcg_splitwx_to_rx(void *rw)
 {
@@ -505,14 +528,6 @@ static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
     return PROT_READ | PROT_WRITE;
 }
 #elif defined(_WIN32)
-/*
- * Local source-level compatibility with Unix.
- * Used by tcg_region_init below.
- */
-#define PROT_READ   1
-#define PROT_WRITE  2
-#define PROT_EXEC   4
-
 static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
 {
     void *buf;
@@ -567,7 +582,7 @@ static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
         goto fail;
     }
 
-    buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
+    buf_rx = mmap(NULL, size, host_prot_read_exec(), MAP_SHARED, fd, 0);
     if (buf_rx == MAP_FAILED) {
         goto fail_rx;
     }
@@ -642,7 +657,7 @@ static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
         return -1;
     }
 
-    if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
+    if (mprotect((void *)buf_rx, size, host_prot_read_exec()) != 0) {
         error_setg_errno(errp, errno, "mprotect for jit splitwx");
         munmap((void *)buf_rx, size);
         munmap((void *)buf_rw, size);
@@ -805,7 +820,7 @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
     need_prot = PROT_READ | PROT_WRITE;
 #ifndef CONFIG_TCG_INTERPRETER
     if (tcg_splitwx_diff == 0) {
-        need_prot |= PROT_EXEC;
+        need_prot |= host_prot_read_exec();
     }
 #endif
     for (size_t i = 0, n = region.n; i < n; i++) {
@@ -820,7 +835,11 @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
             } else if (need_prot == (PROT_READ | PROT_WRITE)) {
                 rc = qemu_mprotect_rw(start, end - start);
             } else {
+#ifdef CONFIG_POSIX
+                rc = mprotect(start, end - start, need_prot);
+#else
                 g_assert_not_reached();
+#endif
             }
             if (rc) {
                 error_setg_errno(&error_fatal, errno,
index 9be81c1b7b82d8d0717d63654f5152ea2fa70291..c2bcdea33f97af53b9176e8bcb42c718d775b150 100644 (file)
@@ -145,7 +145,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
 #define sextreg  sextract64
 
 /* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -2099,6 +2099,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_RA, 0);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 static volatile sig_atomic_t got_sigill;
 
 static void sigill_handler(int signo, siginfo_t *si, void *data)
index ecd8aaf2a1384471adcc49eefbe3cd59cba67453..7552f63a0575ca353e7b351f7f1f94f3d32cfeb3 100644 (file)
@@ -540,7 +540,7 @@ static bool risbg_mask(uint64_t c)
 }
 
 /* Test if a constant matches the constraint. */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -3483,6 +3483,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
 {
     memset(p, 0x07, count * sizeof(tcg_insn_unit));
index 81a08bb6c52e14a26a190176358f62629dbd7a53..01ac26c1920c85ff3d13bd7853183cc2895324b6 100644 (file)
@@ -322,7 +322,7 @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type,
 }
 
 /* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     if (ct & TCG_CT_CONST) {
         return 1;
@@ -962,6 +962,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_movi_s13(s, TCG_REG_O0, 0);
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
 {
     int i;
index e260a07c613bbf12216de9e3b8375bc528261305..41b1ae18e4422497c8ba7c792b2efb8327cdd948 100644 (file)
@@ -3846,6 +3846,155 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
     }
 }
 
+static void expand_cmps_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+                            uint32_t oprsz, uint32_t tysz, TCGType type,
+                            TCGCond cond, TCGv_vec c)
+{
+    TCGv_vec t0 = tcg_temp_new_vec(type);
+    TCGv_vec t1 = tcg_temp_new_vec(type);
+    uint32_t i;
+
+    for (i = 0; i < oprsz; i += tysz) {
+        tcg_gen_ld_vec(t1, cpu_env, aofs + i);
+        tcg_gen_cmp_vec(cond, vece, t0, t1, c);
+        tcg_gen_st_vec(t0, cpu_env, dofs + i);
+    }
+}
+
+void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, TCGv_i64 c,
+                       uint32_t oprsz, uint32_t maxsz)
+{
+    static const TCGOpcode cmp_list[] = { INDEX_op_cmp_vec, 0 };
+    static gen_helper_gvec_2i * const eq_fn[4] = {
+        gen_helper_gvec_eqs8, gen_helper_gvec_eqs16,
+        gen_helper_gvec_eqs32, gen_helper_gvec_eqs64
+    };
+    static gen_helper_gvec_2i * const lt_fn[4] = {
+        gen_helper_gvec_lts8, gen_helper_gvec_lts16,
+        gen_helper_gvec_lts32, gen_helper_gvec_lts64
+    };
+    static gen_helper_gvec_2i * const le_fn[4] = {
+        gen_helper_gvec_les8, gen_helper_gvec_les16,
+        gen_helper_gvec_les32, gen_helper_gvec_les64
+    };
+    static gen_helper_gvec_2i * const ltu_fn[4] = {
+        gen_helper_gvec_ltus8, gen_helper_gvec_ltus16,
+        gen_helper_gvec_ltus32, gen_helper_gvec_ltus64
+    };
+    static gen_helper_gvec_2i * const leu_fn[4] = {
+        gen_helper_gvec_leus8, gen_helper_gvec_leus16,
+        gen_helper_gvec_leus32, gen_helper_gvec_leus64
+    };
+    static gen_helper_gvec_2i * const * const fns[16] = {
+        [TCG_COND_EQ] = eq_fn,
+        [TCG_COND_LT] = lt_fn,
+        [TCG_COND_LE] = le_fn,
+        [TCG_COND_LTU] = ltu_fn,
+        [TCG_COND_LEU] = leu_fn,
+    };
+
+    TCGType type;
+
+    check_size_align(oprsz, maxsz, dofs | aofs);
+    check_overlap_2(dofs, aofs, maxsz);
+
+    if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) {
+        do_dup(MO_8, dofs, oprsz, maxsz,
+               NULL, NULL, -(cond == TCG_COND_ALWAYS));
+        return;
+    }
+
+    /*
+     * Implement inline with a vector type, if possible.
+     * Prefer integer when 64-bit host and 64-bit comparison.
+     */
+    type = choose_vector_type(cmp_list, vece, oprsz,
+                              TCG_TARGET_REG_BITS == 64 && vece == MO_64);
+    if (type != 0) {
+        const TCGOpcode *hold_list = tcg_swap_vecop_list(cmp_list);
+        TCGv_vec t_vec = tcg_temp_new_vec(type);
+        uint32_t some;
+
+        tcg_gen_dup_i64_vec(vece, t_vec, c);
+        switch (type) {
+        case TCG_TYPE_V256:
+            some = QEMU_ALIGN_DOWN(oprsz, 32);
+            expand_cmps_vec(vece, dofs, aofs, some, 32,
+                            TCG_TYPE_V256, cond, t_vec);
+            aofs += some;
+            dofs += some;
+            oprsz -= some;
+            maxsz -= some;
+            /* fallthru */
+
+        case TCG_TYPE_V128:
+            some = QEMU_ALIGN_DOWN(oprsz, 16);
+            expand_cmps_vec(vece, dofs, aofs, some, 16,
+                            TCG_TYPE_V128, cond, t_vec);
+            break;
+
+        case TCG_TYPE_V64:
+            some = QEMU_ALIGN_DOWN(oprsz, 8);
+            expand_cmps_vec(vece, dofs, aofs, some, 8,
+                            TCG_TYPE_V64, cond, t_vec);
+            break;
+
+        default:
+            g_assert_not_reached();
+        }
+        tcg_temp_free_vec(t_vec);
+        tcg_swap_vecop_list(hold_list);
+    } else if (vece == MO_64 && check_size_impl(oprsz, 8)) {
+        TCGv_i64 t0 = tcg_temp_ebb_new_i64();
+        uint32_t i;
+
+        for (i = 0; i < oprsz; i += 8) {
+            tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+            tcg_gen_negsetcond_i64(cond, t0, t0, c);
+            tcg_gen_st_i64(t0, cpu_env, dofs + i);
+        }
+        tcg_temp_free_i64(t0);
+    } else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
+        TCGv_i32 t0 = tcg_temp_ebb_new_i32();
+        TCGv_i32 t1 = tcg_temp_ebb_new_i32();
+        uint32_t i;
+
+        tcg_gen_extrl_i64_i32(t1, c);
+        for (i = 0; i < oprsz; i += 8) {
+            tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+            tcg_gen_negsetcond_i32(cond, t0, t0, t1);
+            tcg_gen_st_i32(t0, cpu_env, dofs + i);
+        }
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    } else {
+        gen_helper_gvec_2i * const *fn = fns[cond];
+        bool inv = false;
+
+        if (fn == NULL) {
+            cond = tcg_invert_cond(cond);
+            fn = fns[cond];
+            assert(fn != NULL);
+            inv = true;
+        }
+        tcg_gen_gvec_2i_ool(dofs, aofs, c, oprsz, maxsz, inv, fn[vece]);
+        return;
+    }
+
+    if (oprsz < maxsz) {
+        expand_clr(dofs + oprsz, maxsz - oprsz);
+    }
+}
+
+void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, int64_t c,
+                       uint32_t oprsz, uint32_t maxsz)
+{
+    TCGv_i64 tmp = tcg_constant_i64(c);
+    tcg_gen_gvec_cmps(cond, vece, dofs, aofs, tmp, oprsz, maxsz);
+}
+
 static void tcg_gen_bitsel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
 {
     TCGv_i64 t = tcg_temp_ebb_new_i64();
index 620dbe08da16765c29a4963b478129f185814a99..604fa9bf3e8351b5fd5e5b116bfcc3196eb7e05d 100644 (file)
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -108,6 +108,7 @@ static void tcg_register_jit_int(const void *buf, size_t size,
     __attribute__((unused));
 
 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
+static void tcg_out_tb_start(TCGContext *s);
 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
                        intptr_t arg2);
 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
@@ -171,7 +172,7 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
                          const TCGHelperInfo *info);
 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece);
 #ifdef TCG_TARGET_NEED_LDST_LABELS
 static int tcg_out_ldst_finalize(TCGContext *s);
 #endif
@@ -4689,7 +4690,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
         ts = arg_temp(arg);
 
         if (ts->val_type == TEMP_VAL_CONST
-            && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
+            && tcg_target_const_match(ts->val, ts->type, arg_ct->ct, TCGOP_VECE(op))) {
             /* constant is OK for instruction */
             const_args[i] = 1;
             new_args[i] = ts->val;
@@ -6014,6 +6015,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
     s->gen_insn_data =
         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
 
+    tcg_out_tb_start(s);
+
     num_insns = -1;
     QTAILQ_FOREACH(op, &s->ops, link) {
         TCGOpcode opc = op->opc;
index 253f27f1740fe1a4322bee2a0970b9035a3a233c..461f4b47ffcbce7c7082521b53ebcf673c155a77 100644 (file)
@@ -913,7 +913,7 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 }
 
 /* Test if a constant matches the constraint. */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
 {
     return ct & TCG_CT_CONST;
 }
@@ -955,6 +955,11 @@ static inline void tcg_target_qemu_prologue(TCGContext *s)
 {
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
 bool tcg_target_has_memory_bswap(MemOp memop)
 {
     return true;
index fa455f1474d2d8e32d441110ceea482bffa9e129..d25649cb4f0372d6bd6701f1a9977139fb8690b5 100644 (file)
@@ -59,6 +59,7 @@ RUN apk update && \
         libtasn1-dev \
         liburing-dev \
         libusb-dev \
+        libxdp-dev \
         linux-pam-dev \
         llvm \
         lttng-ust-dev \
index fc1830966f4233535cb203aa31233b636f725423..68bfe606f5d0d2cb1dce8e77726b471adede8811 100644 (file)
@@ -75,6 +75,7 @@ RUN dnf distro-sync -y && \
         libubsan \
         liburing-devel \
         libusbx-devel \
+        libxdp-devel \
         libzstd-devel \
         llvm \
         lttng-ust-devel \
index b66b9cc191b2ae04c6e0dd377dfcfe5588b17cc0..0cf3ba6d60a9dd9d76e8515b21a023945dd3d56a 100644 (file)
@@ -84,7 +84,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       g++-x86-64-linux-gnu \
                       gcc-x86-64-linux-gnu \
                       libaio-dev:amd64 \
-                      libasan5:amd64 \
+                      libasan6:amd64 \
                       libasound2-dev:amd64 \
                       libattr1-dev:amd64 \
                       libbpf-dev:amd64 \
index 02262bc70ec2193ac5d740d8d2a0948e8e2faefe..e3e1de25ddb7e679370803223aace0709d44731e 100644 (file)
@@ -32,7 +32,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       git \
                       hostname \
                       libaio-dev \
-                      libasan5 \
+                      libasan6 \
                       libasound2-dev \
                       libattr1-dev \
                       libbpf-dev \
index a0a968b8c67ffa098ba3966ceabd90557c5d05a1..d8cd4f87b615518e5be2a53bab7ec19c9088e406 100644 (file)
@@ -84,7 +84,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       g++-aarch64-linux-gnu \
                       gcc-aarch64-linux-gnu \
                       libaio-dev:arm64 \
-                      libasan5:arm64 \
+                      libasan6:arm64 \
                       libasound2-dev:arm64 \
                       libattr1-dev:arm64 \
                       libbpf-dev:arm64 \
index f1fc34a28ac9151e076388d445cf2f812b3769c4..75342c09b0449d63dc02160d5a3af097ed6ac17b 100644 (file)
@@ -84,7 +84,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       g++-arm-linux-gnueabi \
                       gcc-arm-linux-gnueabi \
                       libaio-dev:armel \
-                      libasan5:armel \
+                      libasan6:armel \
                       libasound2-dev:armel \
                       libattr1-dev:armel \
                       libbpf-dev:armel \
index a2785782119524ff255e858ad910f7fcccb111c6..f45cfedd3f2018149c6839b173a4383c594303be 100644 (file)
@@ -84,7 +84,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       g++-arm-linux-gnueabihf \
                       gcc-arm-linux-gnueabihf \
                       libaio-dev:armhf \
-                      libasan5:armhf \
+                      libasan6:armhf \
                       libasound2-dev:armhf \
                       libattr1-dev:armhf \
                       libbpf-dev:armhf \
index 30e5efa986e9616a4bdbdc86d74cc99ecd99e09b..52f8c34814a162659156e9bb0d333caad1b60829 100644 (file)
@@ -84,7 +84,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       g++-powerpc64le-linux-gnu \
                       gcc-powerpc64le-linux-gnu \
                       libaio-dev:ppc64el \
-                      libasan5:ppc64el \
+                      libasan6:ppc64el \
                       libasound2-dev:ppc64el \
                       libattr1-dev:ppc64el \
                       libbpf-dev:ppc64el \
index ee6db7b526bf0588eb6b89f26a167e167ed7d214..208e57bcf2744d5dcb12a051aa99afe279f0363f 100644 (file)
@@ -84,7 +84,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       g++-s390x-linux-gnu \
                       gcc-s390x-linux-gnu \
                       libaio-dev:s390x \
-                      libasan5:s390x \
+                      libasan6:s390x \
                       libasound2-dev:s390x \
                       libattr1-dev:s390x \
                       libbpf-dev:s390x \
index c5b6c96943037062dffae29836b12c2a06744bb5..f00e9e267c9780831e8c152c941ece2612e9958b 100644 (file)
@@ -82,6 +82,7 @@ exec "$@"\n' > /usr/bin/nosync && \
                libubsan \
                liburing-devel \
                libusbx-devel \
+               libxdp-devel \
                libzstd-devel \
                llvm \
                lttng-ust-devel \
index fef8d5a2e45e30ebdd62e18e3b99e4c857e37c13..ed04b4d6dada0ec7794c4ebf5222433f8b61686e 100644 (file)
@@ -40,7 +40,7 @@ RUN zypper update -y && \
            libSDL2-devel \
            libSDL2_image-devel \
            libaio-devel \
-           libasan6 \
+           libasan8 \
            libattr-devel \
            libbpf-devel \
            libbz2-devel \
index 4180cd86740ba35437df6d5d9887c14d569f337f..d3e212060c6ebece90d53f7b9c4dc43af0135160 100644 (file)
@@ -32,7 +32,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       git \
                       hostname \
                       libaio-dev \
-                      libasan5 \
+                      libasan6 \
                       libasound2-dev \
                       libattr1-dev \
                       libbrlapi-dev \
index 88493f00f6f9b853958078bebc17e433667272f4..94c2c16118a1d8730153ddd7b5faaf8416457f72 100644 (file)
@@ -32,7 +32,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
                       git \
                       hostname \
                       libaio-dev \
-                      libasan5 \
+                      libasan6 \
                       libasound2-dev \
                       libattr1-dev \
                       libbpf-dev \
index bbd55b4d18cce8f89b5167675e434a6941315634..5f84a21881577a5fb56cc956f6fe4e2abd6fcff0 160000 (submodule)
@@ -1 +1 @@
-Subproject commit bbd55b4d18cce8f89b5167675e434a6941315634
+Subproject commit 5f84a21881577a5fb56cc956f6fe4e2abd6fcff0
index 584f78cb7f929d7d5bb13441f5d4b68740d35aca..6f0885170ddb177323b99fc1b672bffa6112618b 100644 (file)
@@ -69,6 +69,7 @@ packages:
  - liburing
  - libusbx
  - libvdeplug
+ - libxdp
  - libzstd
  - llvm
  - lttng-ust
index a603468beb8c79b5c239d1f0b0a3399fc7829a5b..f40c4ec4cd2a3cfb5baaded4321a49ceae43bca6 100644 (file)
@@ -109,6 +109,11 @@ static void igb_pci_start_hw(QOSGraphObject *obj)
                         E1000_RAH_AV | E1000_RAH_POOL_1 |
                         le16_to_cpu(*(uint16_t *)(address + 4)));
 
+    /* Set supported receive descriptor mode */
+    e1000e_macreg_write(&d->e1000e,
+                        E1000_SRRCTL(0),
+                        E1000_SRRCTL_DESCTYPE_ADV_ONEBUF);
+
     /* Enable receive */
     e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN);
     e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN);
index 1163c7ef034d7d8d30df4497db2b02eebd30ae02..6ff214e60a5b3f8ccfdfb7dff562d8ab3cd9cc15 100644 (file)
@@ -4,7 +4,7 @@
 #
 
 VPATH += $(SRC_PATH)/tests/tcg/m68k
-TESTS += trap
+TESTS += trap denormal
 
 # On m68k Linux supports 4k and 8k pages (but 8k is currently broken)
 EXTRA_RUNS+=run-test-mmap-4096 # run-test-mmap-8192
diff --git a/tests/tcg/m68k/denormal.c b/tests/tcg/m68k/denormal.c
new file mode 100644 (file)
index 0000000..20bd8c7
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Test m68k extended double denormals.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+
+#define TEST(X, Y)  { X, Y, X * Y }
+
+static volatile long double test[][3] = {
+    TEST(0x1p+16383l, 0x1p-16446l),
+    TEST(0x1.1p-8223l, 0x1.1p-8224l),
+    TEST(1.0l, 0x1p-16383l),
+};
+
+#undef TEST
+
+static void dump_ld(const char *label, long double ld)
+{
+    union {
+        long double  d;
+        struct {
+            uint32_t exp:16;
+            uint32_t space:16;
+            uint32_t h;
+            uint32_t l;
+        };
+    } u;
+
+    u.d = ld;
+    printf("%12s: % -27La 0x%04x 0x%08x 0x%08x\n", label, u.d, u.exp, u.h, u.l);
+}
+
+int main(void)
+{
+    int i, n = sizeof(test) / sizeof(test[0]), err = 0;
+
+    for (i = 0; i < n; ++i) {
+        long double x = test[i][0];
+        long double y = test[i][1];
+        long double build_mul = test[i][2];
+        long double runtime_mul = x * y;
+
+        if (runtime_mul != build_mul) {
+            dump_ld("x", x);
+            dump_ld("y", y);
+            dump_ld("build_mul", build_mul);
+            dump_ld("runtime_mul", runtime_mul);
+            err = 1;
+        }
+    }
+    return err;
+}
index 7d39f47e3b880f8d601bd17f771fab27c5675cb9..4c8a0057150876b3af28b1a5b2c3e51c18363c0e 100644 (file)
@@ -13,6 +13,9 @@
 #  include <asm/hwcap.h>
 #  include "elf.h"
 # endif
+# ifndef HWCAP2_BTI
+#  define HWCAP2_BTI 0  /* added in glibc 2.32 */
+# endif
 #endif
 #ifdef CONFIG_DARWIN
 # include <sys/sysctl.h>
@@ -56,12 +59,18 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
     unsigned long hwcap = qemu_getauxval(AT_HWCAP);
     info |= (hwcap & HWCAP_ATOMICS ? CPUINFO_LSE : 0);
     info |= (hwcap & HWCAP_USCAT ? CPUINFO_LSE2 : 0);
-    info |= (hwcap & HWCAP_AES ? CPUINFO_AES: 0);
+    info |= (hwcap & HWCAP_AES ? CPUINFO_AES : 0);
+    info |= (hwcap & HWCAP_PMULL ? CPUINFO_PMULL : 0);
+
+    unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
+    info |= (hwcap2 & HWCAP2_BTI ? CPUINFO_BTI : 0);
 #endif
 #ifdef CONFIG_DARWIN
     info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE") * CPUINFO_LSE;
     info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE2") * CPUINFO_LSE2;
     info |= sysctl_for_bool("hw.optional.arm.FEAT_AES") * CPUINFO_AES;
+    info |= sysctl_for_bool("hw.optional.arm.FEAT_PMULL") * CPUINFO_PMULL;
+    info |= sysctl_for_bool("hw.optional.arm.FEAT_BTI") * CPUINFO_BTI;
 #endif
 
     cpuinfo = info;
index b2ed65bb1016f1cf9ac6f0c388361fd0d25948c1..9fddb18303dfcfc94f0885c6ea51f98bab97ca2a 100644 (file)
@@ -39,6 +39,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
         info |= (c & bit_SSE4_1 ? CPUINFO_SSE4 : 0);
         info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0);
         info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);
+        info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0);
 
         /* Our AES support requires PSHUFB as well. */
         info |= ((c & bit_AES) && (c & bit_SSSE3) ? CPUINFO_AES : 0);
index 4d583da7ce39522162ac3ba9b312a404b60e5de5..e86fd64e099877cec2818d948da659d261fde040 100644 (file)
@@ -585,7 +585,7 @@ char *qemu_get_pid_name(pid_t pid)
 
 void *qemu_alloc_stack(size_t *sz)
 {
-    void *ptr, *guardpage;
+    void *ptr;
     int flags;
 #ifdef CONFIG_DEBUG_STACK_USAGE
     void *ptr2;
@@ -618,17 +618,8 @@ void *qemu_alloc_stack(size_t *sz)
         abort();
     }
 
-#if defined(HOST_IA64)
-    /* separate register stack */
-    guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
-#elif defined(HOST_HPPA)
-    /* stack grows up */
-    guardpage = ptr + *sz - pagesz;
-#else
-    /* stack grows down */
-    guardpage = ptr;
-#endif
-    if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
+    /* Stack grows down -- guard page at the bottom. */
+    if (mprotect(ptr, pagesz, PROT_NONE) != 0) {
         perror("failed to set up stack guard page");
         abort();
     }