]>
Commit | Line | Data |
---|---|---|
320054e8 DG |
1 | #include <string.h> |
2 | #include <stdint.h> | |
3 | #include <endian.h> | |
4 | ||
5 | void *memcpy(void *restrict dest, const void *restrict src, size_t n) | |
6 | { | |
75fdabe1 CS |
7 | #if defined(__wasm_bulk_memory__) |
8 | return __builtin_memcpy(dest, src, n); | |
9 | #else | |
320054e8 DG |
10 | unsigned char *d = dest; |
11 | const unsigned char *s = src; | |
12 | ||
13 | #ifdef __GNUC__ | |
14 | ||
15 | #if __BYTE_ORDER == __LITTLE_ENDIAN | |
16 | #define LS >> | |
17 | #define RS << | |
18 | #else | |
19 | #define LS << | |
20 | #define RS >> | |
21 | #endif | |
22 | ||
23 | typedef uint32_t __attribute__((__may_alias__)) u32; | |
24 | uint32_t w, x; | |
25 | ||
26 | for (; (uintptr_t)s % 4 && n; n--) *d++ = *s++; | |
27 | ||
28 | if ((uintptr_t)d % 4 == 0) { | |
29 | for (; n>=16; s+=16, d+=16, n-=16) { | |
30 | *(u32 *)(d+0) = *(u32 *)(s+0); | |
31 | *(u32 *)(d+4) = *(u32 *)(s+4); | |
32 | *(u32 *)(d+8) = *(u32 *)(s+8); | |
33 | *(u32 *)(d+12) = *(u32 *)(s+12); | |
34 | } | |
35 | if (n&8) { | |
36 | *(u32 *)(d+0) = *(u32 *)(s+0); | |
37 | *(u32 *)(d+4) = *(u32 *)(s+4); | |
38 | d += 8; s += 8; | |
39 | } | |
40 | if (n&4) { | |
41 | *(u32 *)(d+0) = *(u32 *)(s+0); | |
42 | d += 4; s += 4; | |
43 | } | |
44 | if (n&2) { | |
45 | *d++ = *s++; *d++ = *s++; | |
46 | } | |
47 | if (n&1) { | |
48 | *d = *s; | |
49 | } | |
50 | return dest; | |
51 | } | |
52 | ||
53 | if (n >= 32) switch ((uintptr_t)d % 4) { | |
54 | case 1: | |
55 | w = *(u32 *)s; | |
56 | *d++ = *s++; | |
57 | *d++ = *s++; | |
58 | *d++ = *s++; | |
59 | n -= 3; | |
60 | for (; n>=17; s+=16, d+=16, n-=16) { | |
61 | x = *(u32 *)(s+1); | |
62 | *(u32 *)(d+0) = (w LS 24) | (x RS 8); | |
63 | w = *(u32 *)(s+5); | |
64 | *(u32 *)(d+4) = (x LS 24) | (w RS 8); | |
65 | x = *(u32 *)(s+9); | |
66 | *(u32 *)(d+8) = (w LS 24) | (x RS 8); | |
67 | w = *(u32 *)(s+13); | |
68 | *(u32 *)(d+12) = (x LS 24) | (w RS 8); | |
69 | } | |
70 | break; | |
71 | case 2: | |
72 | w = *(u32 *)s; | |
73 | *d++ = *s++; | |
74 | *d++ = *s++; | |
75 | n -= 2; | |
76 | for (; n>=18; s+=16, d+=16, n-=16) { | |
77 | x = *(u32 *)(s+2); | |
78 | *(u32 *)(d+0) = (w LS 16) | (x RS 16); | |
79 | w = *(u32 *)(s+6); | |
80 | *(u32 *)(d+4) = (x LS 16) | (w RS 16); | |
81 | x = *(u32 *)(s+10); | |
82 | *(u32 *)(d+8) = (w LS 16) | (x RS 16); | |
83 | w = *(u32 *)(s+14); | |
84 | *(u32 *)(d+12) = (x LS 16) | (w RS 16); | |
85 | } | |
86 | break; | |
87 | case 3: | |
88 | w = *(u32 *)s; | |
89 | *d++ = *s++; | |
90 | n -= 1; | |
91 | for (; n>=19; s+=16, d+=16, n-=16) { | |
92 | x = *(u32 *)(s+3); | |
93 | *(u32 *)(d+0) = (w LS 8) | (x RS 24); | |
94 | w = *(u32 *)(s+7); | |
95 | *(u32 *)(d+4) = (x LS 8) | (w RS 24); | |
96 | x = *(u32 *)(s+11); | |
97 | *(u32 *)(d+8) = (w LS 8) | (x RS 24); | |
98 | w = *(u32 *)(s+15); | |
99 | *(u32 *)(d+12) = (x LS 8) | (w RS 24); | |
100 | } | |
101 | break; | |
102 | } | |
103 | if (n&16) { | |
104 | *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | |
105 | *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | |
106 | *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | |
107 | *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | |
108 | } | |
109 | if (n&8) { | |
110 | *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | |
111 | *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | |
112 | } | |
113 | if (n&4) { | |
114 | *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; | |
115 | } | |
116 | if (n&2) { | |
117 | *d++ = *s++; *d++ = *s++; | |
118 | } | |
119 | if (n&1) { | |
120 | *d = *s; | |
121 | } | |
122 | return dest; | |
123 | #endif | |
124 | ||
125 | for (; n; n--) *d++ = *s++; | |
126 | return dest; | |
75fdabe1 | 127 | #endif // __wasm_bulk_memory__ |
320054e8 | 128 | } |