ASM_GLOBAL ASM_PFX(InternalMemSetMem16)\r
ASM_PFX(InternalMemSetMem16):\r
dup v0.8H, valw\r
+ lsl count, count, #1\r
b 0f\r
\r
ASM_GLOBAL ASM_PFX(InternalMemSetMem32)\r
ASM_PFX(InternalMemSetMem32):\r
dup v0.4S, valw\r
+ lsl count, count, #2\r
b 0f\r
\r
ASM_GLOBAL ASM_PFX(InternalMemSetMem64)\r
ASM_PFX(InternalMemSetMem64):\r
dup v0.2D, val\r
+ lsl count, count, #3\r
b 0f\r
\r
ASM_GLOBAL ASM_PFX(InternalMemZeroMem)\r
.thumb\r
.syntax unified\r
.align 5\r
-ASM_GLOBAL ASM_PFX(InternalMemZeroMem)\r
-ASM_PFX(InternalMemZeroMem):\r
- movs r2, #0\r
-\r
-ASM_GLOBAL ASM_PFX(InternalMemSetMem)\r
-ASM_PFX(InternalMemSetMem):\r
- uxtb r2, r2\r
- orr r2, r2, r2, lsl #8\r
-\r
ASM_GLOBAL ASM_PFX(InternalMemSetMem16)\r
ASM_PFX(InternalMemSetMem16):\r
uxth r2, r2\r
+ lsl r1, r1, #1\r
orr r2, r2, r2, lsl #16\r
+ b 0f\r
\r
ASM_GLOBAL ASM_PFX(InternalMemSetMem32)\r
ASM_PFX(InternalMemSetMem32):\r
- mov r3, r2\r
+ lsl r1, r1, #2\r
+ b 0f\r
\r
ASM_GLOBAL ASM_PFX(InternalMemSetMem64)\r
ASM_PFX(InternalMemSetMem64):\r
- push {r4, lr}\r
+ lsl r1, r1, #3\r
+ b 1f\r
+\r
+ .align 5\r
+ASM_GLOBAL ASM_PFX(InternalMemSetMem)\r
+ASM_PFX(InternalMemSetMem):\r
+ uxtb r2, r2\r
+ orr r2, r2, r2, lsl #8\r
+ orr r2, r2, r2, lsl #16\r
+ b 0f\r
+\r
+ASM_GLOBAL ASM_PFX(InternalMemZeroMem)\r
+ASM_PFX(InternalMemZeroMem):\r
+ movs r2, #0\r
+0: mov r3, r2\r
+\r
+1: push {r4, lr}\r
cmp r1, #16 // fewer than 16 bytes of input?\r
add r1, r1, r0 // r1 := dst + length\r
add lr, r0, #16\r
AREA SetMem, CODE, READONLY, CODEALIGN, ALIGN=5\r
THUMB\r
\r
-InternalMemZeroMem\r
- movs r2, #0\r
+InternalMemSetMem16\r
+ uxth r2, r2\r
+ lsl r1, r1, #1\r
+ orr r2, r2, r2, lsl #16\r
+ b B0\r
+\r
+InternalMemSetMem32\r
+ lsl r1, r1, #2\r
+ b B0\r
+\r
+InternalMemSetMem64\r
+ lsl r1, r1, #3\r
+ b B1\r
\r
+ ALIGN 32\r
InternalMemSetMem\r
uxtb r2, r2\r
orr r2, r2, r2, lsl #8\r
+ orr r2, r2, r2, lsl #16\r
+ b B0\r
\r
-InternalMemSetMem16\r
- uxth r2, r2\r
- orr r2, r2, r2, lsr #16\r
-\r
-InternalMemSetMem32\r
+InternalMemZeroMem\r
+ movs r2, #0\r
+B0\r
mov r3, r2\r
\r
-InternalMemSetMem64\r
+B1\r
push {r4, lr}\r
cmp r1, #16 ; fewer than 16 bytes of input?\r
add r1, r1, r0 ; r1 := dst + length\r