]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/arrow/go/arrow/memory/memory_avx2_amd64.s
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / go / arrow / memory / memory_avx2_amd64.s
diff --git a/ceph/src/arrow/go/arrow/memory/memory_avx2_amd64.s b/ceph/src/arrow/go/arrow/memory/memory_avx2_amd64.s
new file mode 100644 (file)
index 0000000..2a77807
--- /dev/null
@@ -0,0 +1,85 @@
+//+build !noasm !appengine
+// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
+
+TEXT ยท_memset_avx2(SB), $0-24
+
+       MOVQ buf+0(FP), DI
+       MOVQ len+8(FP), SI
+       MOVQ c+16(FP), DX
+
+       LONG $0x371c8d4c                           // lea    r11, [rdi + rsi]
+       WORD $0x3949; BYTE $0xfb                   // cmp    r11, rdi
+       JBE  LBB0_13
+       LONG $0x80fe8148; WORD $0x0000; BYTE $0x00 // cmp    rsi, 128
+       JB   LBB0_12
+       WORD $0x8949; BYTE $0xf0                   // mov    r8, rsi
+       LONG $0x80e08349                           // and    r8, -128
+       WORD $0x8949; BYTE $0xf2                   // mov    r10, rsi
+       LONG $0x80e28349                           // and    r10, -128
+       JE   LBB0_12
+       LONG $0xc26ef9c5                           // vmovd    xmm0, edx
+       LONG $0x787de2c4; BYTE $0xc0               // vpbroadcastb    ymm0, xmm0
+       LONG $0x804a8d4d                           // lea    r9, [r10 - 128]
+       WORD $0x8944; BYTE $0xc8                   // mov    eax, r9d
+       WORD $0xe8c1; BYTE $0x07                   // shr    eax, 7
+       WORD $0xc0ff                               // inc    eax
+       LONG $0x03e08348                           // and    rax, 3
+       JE   LBB0_4
+       WORD $0xf748; BYTE $0xd8                   // neg    rax
+       WORD $0xc931                               // xor    ecx, ecx
+
+LBB0_6:
+       LONG $0x047ffec5; BYTE $0x0f   // vmovdqu    yword [rdi + rcx], ymm0
+       LONG $0x447ffec5; WORD $0x200f // vmovdqu    yword [rdi + rcx + 32], ymm0
+       LONG $0x447ffec5; WORD $0x400f // vmovdqu    yword [rdi + rcx + 64], ymm0
+       LONG $0x447ffec5; WORD $0x600f // vmovdqu    yword [rdi + rcx + 96], ymm0
+       LONG $0x80e98348               // sub    rcx, -128
+       WORD $0xff48; BYTE $0xc0       // inc    rax
+       JNE  LBB0_6
+       JMP  LBB0_7
+
+LBB0_4:
+       WORD $0xc931 // xor    ecx, ecx
+
+LBB0_7:
+       LONG $0x80f98149; WORD $0x0001; BYTE $0x00 // cmp    r9, 384
+       JB   LBB0_10
+       WORD $0x894c; BYTE $0xd0                   // mov    rax, r10
+       WORD $0x2948; BYTE $0xc8                   // sub    rax, rcx
+       QUAD $0x000001e00f8c8d48                   // lea    rcx, [rdi + rcx + 480]
+
+LBB0_9:
+       QUAD $0xfffffe20817ffec5                   // vmovdqu    yword [rcx - 480], ymm0
+       QUAD $0xfffffe40817ffec5                   // vmovdqu    yword [rcx - 448], ymm0
+       QUAD $0xfffffe60817ffec5                   // vmovdqu    yword [rcx - 416], ymm0
+       QUAD $0xfffffe80817ffec5                   // vmovdqu    yword [rcx - 384], ymm0
+       QUAD $0xfffffea0817ffec5                   // vmovdqu    yword [rcx - 352], ymm0
+       QUAD $0xfffffec0817ffec5                   // vmovdqu    yword [rcx - 320], ymm0
+       QUAD $0xfffffee0817ffec5                   // vmovdqu    yword [rcx - 288], ymm0
+       QUAD $0xffffff00817ffec5                   // vmovdqu    yword [rcx - 256], ymm0
+       QUAD $0xffffff20817ffec5                   // vmovdqu    yword [rcx - 224], ymm0
+       QUAD $0xffffff40817ffec5                   // vmovdqu    yword [rcx - 192], ymm0
+       QUAD $0xffffff60817ffec5                   // vmovdqu    yword [rcx - 160], ymm0
+       LONG $0x417ffec5; BYTE $0x80               // vmovdqu    yword [rcx - 128], ymm0
+       LONG $0x417ffec5; BYTE $0xa0               // vmovdqu    yword [rcx - 96], ymm0
+       LONG $0x417ffec5; BYTE $0xc0               // vmovdqu    yword [rcx - 64], ymm0
+       LONG $0x417ffec5; BYTE $0xe0               // vmovdqu    yword [rcx - 32], ymm0
+       LONG $0x017ffec5                           // vmovdqu    yword [rcx], ymm0
+       LONG $0x00c18148; WORD $0x0002; BYTE $0x00 // add    rcx, 512
+       LONG $0xfe000548; WORD $0xffff             // add    rax, -512
+       JNE  LBB0_9
+
+LBB0_10:
+       WORD $0x3949; BYTE $0xf2 // cmp    r10, rsi
+       JE   LBB0_13
+       WORD $0x014c; BYTE $0xc7 // add    rdi, r8
+
+LBB0_12:
+       WORD $0x1788             // mov    byte [rdi], dl
+       WORD $0xff48; BYTE $0xc7 // inc    rdi
+       WORD $0x3949; BYTE $0xfb // cmp    r11, rdi
+       JNE  LBB0_12
+
+LBB0_13:
+       VZEROUPPER
+       RET