4 .section .rodata.cst16,"aM",@progbits,16
5 .p2align 4 # -- Begin function int32_max_min_sse4
7 .long 2147483648 # 0x80000000
8 .long 2147483648 # 0x80000000
9 .long 2147483648 # 0x80000000
10 .long 2147483648 # 0x80000000
12 .long 2147483647 # 0x7fffffff
13 .long 2147483647 # 0x7fffffff
14 .long 2147483647 # 0x7fffffff
15 .long 2147483647 # 0x7fffffff
17 .globl int32_max_min_sse4
19 .type int32_max_min_sse4,@function
20 int32_max_min_sse4: # @int32_max_min_sse4
53 movdqa xmm1, xmmword ptr [rip + .LCPI0_0] # xmm1 = [2147483648,2147483648,2147483648,2147483648]
54 movdqa xmm0, xmmword ptr [rip + .LCPI0_1] # xmm0 = [2147483647,2147483647,2147483647,2147483647]
59 .LBB0_9: # =>This Inner Loop Header: Depth=1
60 movdqu xmm4, xmmword ptr [rdi + 4*rax]
61 movdqu xmm5, xmmword ptr [rdi + 4*rax + 16]
62 movdqu xmm6, xmmword ptr [rdi + 4*rax + 32]
63 movdqu xmm7, xmmword ptr [rdi + 4*rax + 48]
79 movdqu xmm4, xmmword ptr [rdi + 4*rax]
80 movdqu xmm5, xmmword ptr [rdi + 4*rax + 16]
88 pshufd xmm2, xmm1, 78 # xmm2 = xmm1[2,3,0,1]
90 pshufd xmm1, xmm2, 229 # xmm1 = xmm2[1,1,2,3]
93 pshufd xmm1, xmm0, 78 # xmm1 = xmm0[2,3,0,1]
95 pshufd xmm0, xmm1, 229 # xmm0 = xmm1[1,1,2,3]
103 .LBB0_5: # =>This Inner Loop Header: Depth=1
104 mov eax, dword ptr [rdi + 4*r11]
114 mov dword ptr [rcx], eax
115 mov dword ptr [rdx], r8d
120 movdqa xmm1, xmmword ptr [rip + .LCPI0_0] # xmm1 = [2147483648,2147483648,2147483648,2147483648]
121 movdqa xmm0, xmmword ptr [rip + .LCPI0_1] # xmm0 = [2147483647,2147483647,2147483647,2147483647]
129 .size int32_max_min_sse4, .Lfunc_end0-int32_max_min_sse4
131 .globl uint32_max_min_sse4 # -- Begin function uint32_max_min_sse4
133 .type uint32_max_min_sse4,@function
134 uint32_max_min_sse4: # @uint32_max_min_sse4
173 .LBB1_9: # =>This Inner Loop Header: Depth=1
174 movdqu xmm4, xmmword ptr [rdi + 4*rax]
175 movdqu xmm5, xmmword ptr [rdi + 4*rax + 16]
176 movdqu xmm6, xmmword ptr [rdi + 4*rax + 32]
177 movdqu xmm7, xmmword ptr [rdi + 4*rax + 48]
193 movdqu xmm4, xmmword ptr [rdi + 4*rax]
194 movdqu xmm5, xmmword ptr [rdi + 4*rax + 16]
202 pshufd xmm2, xmm1, 78 # xmm2 = xmm1[2,3,0,1]
204 pshufd xmm1, xmm2, 229 # xmm1 = xmm2[1,1,2,3]
207 pshufd xmm1, xmm0, 78 # xmm1 = xmm0[2,3,0,1]
209 pshufd xmm0, xmm1, 229 # xmm0 = xmm1[1,1,2,3]
217 .LBB1_5: # =>This Inner Loop Header: Depth=1
218 mov esi, dword ptr [rdi + 4*r11]
228 mov dword ptr [rcx], esi
229 mov dword ptr [rdx], r8d
243 .size uint32_max_min_sse4, .Lfunc_end1-uint32_max_min_sse4
245 .section .rodata.cst16,"aM",@progbits,16
246 .p2align 4 # -- Begin function int64_max_min_sse4
248 .quad -9223372036854775808 # 0x8000000000000000
249 .quad -9223372036854775808 # 0x8000000000000000
251 .quad 9223372036854775807 # 0x7fffffffffffffff
252 .quad 9223372036854775807 # 0x7fffffffffffffff
254 .globl int64_max_min_sse4
256 .type int64_max_min_sse4,@function
257 int64_max_min_sse4: # @int64_max_min_sse4
262 movabs r8, 9223372036854775807
289 movdqa xmm9, xmmword ptr [rip + .LCPI2_0] # xmm9 = [9223372036854775808,9223372036854775808]
290 movdqa xmm8, xmmword ptr [rip + .LCPI2_1] # xmm8 = [9223372036854775807,9223372036854775807]
295 .LBB2_9: # =>This Inner Loop Header: Depth=1
296 movdqu xmm7, xmmword ptr [rdi + 8*rax]
300 blendvpd xmm4, xmm8, xmm0
301 movdqu xmm1, xmmword ptr [rdi + 8*rax + 16]
305 blendvpd xmm5, xmm2, xmm0
308 blendvpd xmm7, xmm9, xmm0
311 blendvpd xmm1, xmm6, xmm0
312 movdqu xmm3, xmmword ptr [rdi + 8*rax + 32]
316 blendvpd xmm8, xmm4, xmm0
317 movdqu xmm4, xmmword ptr [rdi + 8*rax + 48]
321 blendvpd xmm2, xmm5, xmm0
324 blendvpd xmm3, xmm7, xmm0
327 blendvpd xmm4, xmm1, xmm0
337 movdqu xmm1, xmmword ptr [rdi + 8*rax + 16]
341 blendvpd xmm5, xmm4, xmm0
342 movdqu xmm4, xmmword ptr [rdi + 8*rax]
346 blendvpd xmm6, xmm3, xmm0
349 blendvpd xmm1, xmm2, xmm0
352 blendvpd xmm4, xmm8, xmm0
360 blendvpd xmm4, xmm3, xmm0
361 pshufd xmm1, xmm4, 78 # xmm1 = xmm4[2,3,0,1]
364 blendvpd xmm1, xmm4, xmm0
368 blendvpd xmm2, xmm8, xmm0
369 pshufd xmm1, xmm2, 78 # xmm1 = xmm2[2,3,0,1]
372 blendvpd xmm1, xmm2, xmm0
379 .LBB2_5: # =>This Inner Loop Header: Depth=1
380 mov rsi, qword ptr [rdi + 8*r11]
390 mov qword ptr [rcx], rsi
391 mov qword ptr [rdx], r8
396 movapd xmm3, xmmword ptr [rip + .LCPI2_0] # xmm3 = [9223372036854775808,9223372036854775808]
397 movdqa xmm8, xmmword ptr [rip + .LCPI2_1] # xmm8 = [9223372036854775807,9223372036854775807]
405 .size int64_max_min_sse4, .Lfunc_end2-int64_max_min_sse4
407 .section .rodata.cst16,"aM",@progbits,16
408 .p2align 4 # -- Begin function uint64_max_min_sse4
410 .quad -9223372036854775808 # 0x8000000000000000
411 .quad -9223372036854775808 # 0x8000000000000000
413 .globl uint64_max_min_sse4
415 .type uint64_max_min_sse4,@function
416 uint64_max_min_sse4: # @uint64_max_min_sse4
452 movdqa xmm8, xmmword ptr [rip + .LCPI3_0] # xmm8 = [9223372036854775808,9223372036854775808]
456 .LBB3_9: # =>This Inner Loop Header: Depth=1
459 movdqu xmm4, xmmword ptr [rdi + 8*rax]
460 movdqu xmm5, xmmword ptr [rdi + 8*rax + 16]
461 movdqu xmm13, xmmword ptr [rdi + 8*rax + 32]
469 blendvpd xmm3, xmm10, xmm0
470 movdqu xmm6, xmmword ptr [rdi + 8*rax + 48]
480 blendvpd xmm7, xmm11, xmm0
482 blendvpd xmm4, xmm9, xmm0
484 blendvpd xmm5, xmm12, xmm0
494 blendvpd xmm10, xmm3, xmm0
504 blendvpd xmm11, xmm7, xmm0
506 blendvpd xmm13, xmm4, xmm0
508 blendvpd xmm6, xmm5, xmm0
518 movupd xmm4, xmmword ptr [rdi + 8*rax]
519 movupd xmm3, xmmword ptr [rdi + 8*rax + 16]
520 movapd xmm5, xmmword ptr [rip + .LCPI3_0] # xmm5 = [9223372036854775808,9223372036854775808]
527 blendvpd xmm7, xmm6, xmm0
534 blendvpd xmm6, xmm13, xmm0
539 blendvpd xmm3, xmm11, xmm0
543 blendvpd xmm4, xmm10, xmm0
549 movapd xmm1, xmmword ptr [rip + .LCPI3_0] # xmm1 = [9223372036854775808,9223372036854775808]
555 blendvpd xmm6, xmm13, xmm0
556 pshufd xmm2, xmm6, 78 # xmm2 = xmm6[2,3,0,1]
562 blendvpd xmm2, xmm6, xmm0
569 blendvpd xmm11, xmm10, xmm0
570 pshufd xmm2, xmm11, 78 # xmm2 = xmm11[2,3,0,1]
576 blendvpd xmm2, xmm11, xmm0
583 .LBB3_5: # =>This Inner Loop Header: Depth=1
584 mov rax, qword ptr [rdi + 8*r11]
594 mov qword ptr [rcx], rax
595 mov qword ptr [rdx], r8
609 .size uint64_max_min_sse4, .Lfunc_end3-uint64_max_min_sse4
611 .ident "Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162"
612 .section ".note.GNU-stack","",@progbits