.thumb\r
.syntax unified\r
.align 5\r
-ASM_GLOBAL ASM_PFX(InternalMemZeroMem)\r
-ASM_PFX(InternalMemZeroMem):\r
- movs r2, #0\r
-\r
-ASM_GLOBAL ASM_PFX(InternalMemSetMem)\r
-ASM_PFX(InternalMemSetMem):\r
- uxtb r2, r2\r
- orr r2, r2, r2, lsl #8\r
-\r
ASM_GLOBAL ASM_PFX(InternalMemSetMem16)\r
ASM_PFX(InternalMemSetMem16):\r
uxth r2, r2\r
+ lsl r1, r1, #1\r
orr r2, r2, r2, lsl #16\r
+ b 0f\r
\r
ASM_GLOBAL ASM_PFX(InternalMemSetMem32)\r
ASM_PFX(InternalMemSetMem32):\r
- mov r3, r2\r
+ lsl r1, r1, #2\r
+ b 0f\r
\r
ASM_GLOBAL ASM_PFX(InternalMemSetMem64)\r
ASM_PFX(InternalMemSetMem64):\r
- push {r4, lr}\r
+ lsl r1, r1, #3\r
+ b 1f\r
+\r
+ .align 5\r
+ASM_GLOBAL ASM_PFX(InternalMemSetMem)\r
+ASM_PFX(InternalMemSetMem):\r
+ uxtb r2, r2\r
+ orr r2, r2, r2, lsl #8\r
+ orr r2, r2, r2, lsl #16\r
+ b 0f\r
+\r
+ASM_GLOBAL ASM_PFX(InternalMemZeroMem)\r
+ASM_PFX(InternalMemZeroMem):\r
+ movs r2, #0\r
+0: mov r3, r2\r
+\r
+1: push {r4, lr}\r
cmp r1, #16 // fewer than 16 bytes of input?\r
add r1, r1, r0 // r1 := dst + length\r
add lr, r0, #16\r
cmp r4, #4 // between 4 and 15 bytes?\r
blt 4f\r
cmp r4, #8 // between 8 and 15 bytes?\r
- str r2, [lr, #-16] // overlapping store of 4 + (4 + 4) + 4 bytes\r
- itt gt\r
- strgt r3, [lr, #-12]\r
- strgt r2, [r1]\r
+ sub r4, lr, #16\r
+ str r2, [r4] // overlapping store of 4 + (4 + 4) + 4 bytes\r
+ it gt\r
+ strgt.n r3, [r4, #4]\r
+ it gt\r
+ strgt.n r2, [r1]\r
str r3, [r1, #4]\r
pop {r4, pc}\r
\r
4: cmp r4, #2 // 2 or 3 bytes?\r
strb r2, [lr, #-16] // store 1 byte\r
it ge\r
- strhge r2, [r1, #6] // store 2 bytes\r
+ strhge.n r2, [r1, #6] // store 2 bytes\r
pop {r4, pc}\r