]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blob - arch/metag/lib/memcpy.S
Merge remote-tracking branches 'asoc/topic/ac97', 'asoc/topic/ac97-mfd', 'asoc/topic...
[mirror_ubuntu-focal-kernel.git] / arch / metag / lib / memcpy.S
1 ! SPDX-License-Identifier: GPL-2.0
2 ! Copyright (C) 2008-2012 Imagination Technologies Ltd.
3
4 .text
5 .global _memcpy
6 .type _memcpy,function
7 ! D1Ar1 dst
8 ! D0Ar2 src
9 ! D1Ar3 cnt
10 ! D0Re0 dst
11 _memcpy:
12 CMP D1Ar3, #16
13 MOV A1.2, D0Ar2 ! source pointer
14 MOV A0.2, D1Ar1 ! destination pointer
15 MOV A0.3, D1Ar1 ! for return value
16 ! If there are less than 16 bytes to copy use the byte copy loop
17 BGE $Llong_copy
18
19 $Lbyte_copy:
20 ! Simply copy a byte at a time
21 SUBS TXRPT, D1Ar3, #1
22 BLT $Lend
23 $Lloop_byte:
24 GETB D1Re0, [A1.2++]
25 SETB [A0.2++], D1Re0
26 BR $Lloop_byte
27
28 $Lend:
29 ! Finally set return value and return
30 MOV D0Re0, A0.3
31 MOV PC, D1RtP
32
33 $Llong_copy:
34 ANDS D1Ar5, D1Ar1, #7 ! test destination alignment
35 BZ $Laligned_dst
36
37 ! The destination address is not 8 byte aligned. We will copy bytes from
38 ! the source to the destination until the remaining data has an 8 byte
39 ! destination address alignment (i.e we should never copy more than 7
40 ! bytes here).
41 $Lalign_dst:
42 GETB D0Re0, [A1.2++]
43 ADD D1Ar5, D1Ar5, #1 ! dest is aligned when D1Ar5 reaches #8
44 SUB D1Ar3, D1Ar3, #1 ! decrement count of remaining bytes
45 SETB [A0.2++], D0Re0
46 CMP D1Ar5, #8
47 BNE $Lalign_dst
48
49 ! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte
50 ! blocks, then jump to the unaligned copy loop or fall through to the aligned
51 ! copy loop as appropriate.
52 $Laligned_dst:
53 MOV D0Ar4, A1.2
54 LSR D1Ar5, D1Ar3, #3 ! D1Ar5 = number of 8 byte blocks
55 ANDS D0Ar4, D0Ar4, #7 ! test source alignment
56 BNZ $Lunaligned_copy ! if unaligned, use unaligned copy loop
57
58 ! Both source and destination are 8 byte aligned - the easy case.
59 $Laligned_copy:
60 LSRS D1Ar5, D1Ar3, #5 ! D1Ar5 = number of 32 byte blocks
61 BZ $Lbyte_copy
62 SUB TXRPT, D1Ar5, #1
63
64 $Laligned_32:
65 GETL D0Re0, D1Re0, [A1.2++]
66 GETL D0Ar6, D1Ar5, [A1.2++]
67 SETL [A0.2++], D0Re0, D1Re0
68 SETL [A0.2++], D0Ar6, D1Ar5
69 GETL D0Re0, D1Re0, [A1.2++]
70 GETL D0Ar6, D1Ar5, [A1.2++]
71 SETL [A0.2++], D0Re0, D1Re0
72 SETL [A0.2++], D0Ar6, D1Ar5
73 BR $Laligned_32
74
75 ! If there are any remaining bytes use the byte copy loop, otherwise we are done
76 ANDS D1Ar3, D1Ar3, #0x1f
77 BNZ $Lbyte_copy
78 B $Lend
79
80 ! The destination is 8 byte aligned but the source is not, and there are 8
81 ! or more bytes to be copied.
82 $Lunaligned_copy:
83 ! Adjust the source pointer (A1.2) to the 8 byte boundary before its
84 ! current value
85 MOV D0Ar4, A1.2
86 MOV D0Ar6, A1.2
87 ANDMB D0Ar4, D0Ar4, #0xfff8
88 MOV A1.2, D0Ar4
89 ! Save the number of bytes of mis-alignment in D0Ar4 for use later
90 SUBS D0Ar6, D0Ar6, D0Ar4
91 MOV D0Ar4, D0Ar6
92 ! if there is no mis-alignment after all, use the aligned copy loop
93 BZ $Laligned_copy
94
95 ! prefetch 8 bytes
96 GETL D0Re0, D1Re0, [A1.2]
97
98 SUB TXRPT, D1Ar5, #1
99
100 ! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly
101 ! 4 bytes, and more than 4 bytes.
102 CMP D0Ar6, #4
103 BLT $Lunaligned_1_2_3 ! use 1-3 byte mis-alignment loop
104 BZ $Lunaligned_4 ! use 4 byte mis-alignment loop
105
106 ! The mis-alignment is more than 4 bytes
107 $Lunaligned_5_6_7:
108 SUB D0Ar6, D0Ar6, #4
109 ! Calculate the bit offsets required for the shift operations necesssary
110 ! to align the data.
111 ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
112 MULW D0Ar6, D0Ar6, #8
113 MOV D1Ar5, #32
114 SUB D1Ar5, D1Ar5, D0Ar6
115 ! Move data 4 bytes before we enter the main loop
116 MOV D0Re0, D1Re0
117
118 $Lloop_5_6_7:
119 GETL D0Ar2, D1Ar1, [++A1.2]
120 ! form 64-bit data in D0Re0, D1Re0
121 LSR D0Re0, D0Re0, D0Ar6
122 MOV D1Re0, D0Ar2
123 LSL D1Re0, D1Re0, D1Ar5
124 ADD D0Re0, D0Re0, D1Re0
125
126 LSR D0Ar2, D0Ar2, D0Ar6
127 LSL D1Re0, D1Ar1, D1Ar5
128 ADD D1Re0, D1Re0, D0Ar2
129
130 SETL [A0.2++], D0Re0, D1Re0
131 MOV D0Re0, D1Ar1
132 BR $Lloop_5_6_7
133
134 B $Lunaligned_end
135
136 $Lunaligned_1_2_3:
137 ! Calculate the bit offsets required for the shift operations necesssary
138 ! to align the data.
139 ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
140 MULW D0Ar6, D0Ar6, #8
141 MOV D1Ar5, #32
142 SUB D1Ar5, D1Ar5, D0Ar6
143
144 $Lloop_1_2_3:
145 ! form 64-bit data in D0Re0,D1Re0
146 LSR D0Re0, D0Re0, D0Ar6
147 LSL D1Ar1, D1Re0, D1Ar5
148 ADD D0Re0, D0Re0, D1Ar1
149 MOV D0Ar2, D1Re0
150 LSR D0FrT, D0Ar2, D0Ar6
151 GETL D0Ar2, D1Ar1, [++A1.2]
152
153 MOV D1Re0, D0Ar2
154 LSL D1Re0, D1Re0, D1Ar5
155 ADD D1Re0, D1Re0, D0FrT
156
157 SETL [A0.2++], D0Re0, D1Re0
158 MOV D0Re0, D0Ar2
159 MOV D1Re0, D1Ar1
160 BR $Lloop_1_2_3
161
162 B $Lunaligned_end
163
164 ! The 4 byte mis-alignment case - this does not require any shifting, just a
165 ! shuffling of registers.
166 $Lunaligned_4:
167 MOV D0Re0, D1Re0
168 $Lloop_4:
169 GETL D0Ar2, D1Ar1, [++A1.2]
170 MOV D1Re0, D0Ar2
171 SETL [A0.2++], D0Re0, D1Re0
172 MOV D0Re0, D1Ar1
173 BR $Lloop_4
174
175 $Lunaligned_end:
176 ! If there are no remaining bytes to copy, we are done.
177 ANDS D1Ar3, D1Ar3, #7
178 BZ $Lend
179 ! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte
180 ! address of the remaining bytes, and fall through to the byte copy loop.
181 MOV D0Ar6, A1.2
182 ADD D1Ar5, D0Ar4, D0Ar6
183 MOV A1.2, D1Ar5
184 B $Lbyte_copy
185
186 .size _memcpy,.-_memcpy