mov r9, rdi\r
xor rcx, rcx\r
sub rcx, rdi\r
- and rcx, 15\r
+ and rcx, 63\r
mov rax, r8\r
jz .0\r
shr rcx, 1\r
rep stosw\r
.0:\r
mov rcx, rdx\r
- and edx, 7\r
- shr rcx, 3\r
+ and edx, 31\r
+ shr rcx, 5\r
jz @SetWords\r
movd xmm0, eax\r
pshuflw xmm0, xmm0, 0\r
movlhps xmm0, xmm0\r
.1:\r
movntdq [rdi], xmm0\r
- add rdi, 16\r
+ movntdq [rdi + 16], xmm0\r
+ movntdq [rdi + 32], xmm0\r
+ movntdq [rdi + 48], xmm0\r
+ add rdi, 64\r
loop .1\r
mfence\r
@SetWords:\r