]> git.proxmox.com Git - wasi-libc.git/blame - libc-top-half/musl/src/string/memcpy.c
Use bulk memory opcodes when possible
[wasi-libc.git] / libc-top-half / musl / src / string / memcpy.c
CommitLineData
320054e8
DG
1#include <string.h>
2#include <stdint.h>
3#include <endian.h>
4
5void *memcpy(void *restrict dest, const void *restrict src, size_t n)
6{
75fdabe1
CS
7#if defined(__wasm_bulk_memory__)
8 return __builtin_memcpy(dest, src, n);
9#else
320054e8
DG
10 unsigned char *d = dest;
11 const unsigned char *s = src;
12
13#ifdef __GNUC__
14
15#if __BYTE_ORDER == __LITTLE_ENDIAN
16#define LS >>
17#define RS <<
18#else
19#define LS <<
20#define RS >>
21#endif
22
23 typedef uint32_t __attribute__((__may_alias__)) u32;
24 uint32_t w, x;
25
26 for (; (uintptr_t)s % 4 && n; n--) *d++ = *s++;
27
28 if ((uintptr_t)d % 4 == 0) {
29 for (; n>=16; s+=16, d+=16, n-=16) {
30 *(u32 *)(d+0) = *(u32 *)(s+0);
31 *(u32 *)(d+4) = *(u32 *)(s+4);
32 *(u32 *)(d+8) = *(u32 *)(s+8);
33 *(u32 *)(d+12) = *(u32 *)(s+12);
34 }
35 if (n&8) {
36 *(u32 *)(d+0) = *(u32 *)(s+0);
37 *(u32 *)(d+4) = *(u32 *)(s+4);
38 d += 8; s += 8;
39 }
40 if (n&4) {
41 *(u32 *)(d+0) = *(u32 *)(s+0);
42 d += 4; s += 4;
43 }
44 if (n&2) {
45 *d++ = *s++; *d++ = *s++;
46 }
47 if (n&1) {
48 *d = *s;
49 }
50 return dest;
51 }
52
53 if (n >= 32) switch ((uintptr_t)d % 4) {
54 case 1:
55 w = *(u32 *)s;
56 *d++ = *s++;
57 *d++ = *s++;
58 *d++ = *s++;
59 n -= 3;
60 for (; n>=17; s+=16, d+=16, n-=16) {
61 x = *(u32 *)(s+1);
62 *(u32 *)(d+0) = (w LS 24) | (x RS 8);
63 w = *(u32 *)(s+5);
64 *(u32 *)(d+4) = (x LS 24) | (w RS 8);
65 x = *(u32 *)(s+9);
66 *(u32 *)(d+8) = (w LS 24) | (x RS 8);
67 w = *(u32 *)(s+13);
68 *(u32 *)(d+12) = (x LS 24) | (w RS 8);
69 }
70 break;
71 case 2:
72 w = *(u32 *)s;
73 *d++ = *s++;
74 *d++ = *s++;
75 n -= 2;
76 for (; n>=18; s+=16, d+=16, n-=16) {
77 x = *(u32 *)(s+2);
78 *(u32 *)(d+0) = (w LS 16) | (x RS 16);
79 w = *(u32 *)(s+6);
80 *(u32 *)(d+4) = (x LS 16) | (w RS 16);
81 x = *(u32 *)(s+10);
82 *(u32 *)(d+8) = (w LS 16) | (x RS 16);
83 w = *(u32 *)(s+14);
84 *(u32 *)(d+12) = (x LS 16) | (w RS 16);
85 }
86 break;
87 case 3:
88 w = *(u32 *)s;
89 *d++ = *s++;
90 n -= 1;
91 for (; n>=19; s+=16, d+=16, n-=16) {
92 x = *(u32 *)(s+3);
93 *(u32 *)(d+0) = (w LS 8) | (x RS 24);
94 w = *(u32 *)(s+7);
95 *(u32 *)(d+4) = (x LS 8) | (w RS 24);
96 x = *(u32 *)(s+11);
97 *(u32 *)(d+8) = (w LS 8) | (x RS 24);
98 w = *(u32 *)(s+15);
99 *(u32 *)(d+12) = (x LS 8) | (w RS 24);
100 }
101 break;
102 }
103 if (n&16) {
104 *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
105 *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
106 *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
107 *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
108 }
109 if (n&8) {
110 *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
111 *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
112 }
113 if (n&4) {
114 *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
115 }
116 if (n&2) {
117 *d++ = *s++; *d++ = *s++;
118 }
119 if (n&1) {
120 *d = *s;
121 }
122 return dest;
123#endif
124
125 for (; n; n--) *d++ = *s++;
126 return dest;
75fdabe1 127#endif // __wasm_bulk_memory__
320054e8 128}