]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/sparc/lib/checksum_64.S
Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[mirror_ubuntu-artful-kernel.git] / arch / sparc / lib / checksum_64.S
CommitLineData
1da177e4
LT
1/* checksum.S: Sparc V9 optimized checksum code.
2 *
3 * Copyright(C) 1995 Linus Torvalds
4 * Copyright(C) 1995 Miguel de Icaza
5 * Copyright(C) 1996, 2000 David S. Miller
6 * Copyright(C) 1997 Jakub Jelinek
7 *
8 * derived from:
9 * Linux/Alpha checksum c-code
10 * Linux/ix86 inline checksum assembly
11 * RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
12 * David Mosberger-Tang for optimized reference c-code
13 * BSD4.4 portable checksum routine
14 */
15
d3867f04 16#include <asm/export.h>
1da177e4
LT
17 .text
18
19csum_partial_fix_alignment:
20 /* We checked for zero length already, so there must be
21 * at least one byte.
22 */
23 be,pt %icc, 1f
24 nop
25 ldub [%o0 + 0x00], %o4
26 add %o0, 1, %o0
27 sub %o1, 1, %o1
281: andcc %o0, 0x2, %g0
29 be,pn %icc, csum_partial_post_align
30 cmp %o1, 2
31 blu,pn %icc, csum_partial_end_cruft
32 nop
33 lduh [%o0 + 0x00], %o5
34 add %o0, 2, %o0
35 sub %o1, 2, %o1
36 ba,pt %xcc, csum_partial_post_align
37 add %o5, %o4, %o4
38
39 .align 32
40 .globl csum_partial
f5a651f1 41 .type csum_partial,#function
d3867f04 42 EXPORT_SYMBOL(csum_partial)
1da177e4
LT
43csum_partial: /* %o0=buff, %o1=len, %o2=sum */
44 prefetch [%o0 + 0x000], #n_reads
45 clr %o4
46 prefetch [%o0 + 0x040], #n_reads
47 brz,pn %o1, csum_partial_finish
48 andcc %o0, 0x3, %g0
49
50 /* We "remember" whether the lowest bit in the address
51 * was set in %g7. Because if it is, we have to swap
52 * upper and lower 8 bit fields of the sum we calculate.
53 */
54 bne,pn %icc, csum_partial_fix_alignment
55 andcc %o0, 0x1, %g7
56
57csum_partial_post_align:
58 prefetch [%o0 + 0x080], #n_reads
59 andncc %o1, 0x3f, %o3
60
61 prefetch [%o0 + 0x0c0], #n_reads
62 sub %o1, %o3, %o1
63 brz,pn %o3, 2f
64 prefetch [%o0 + 0x100], #n_reads
65
66 /* So that we don't need to use the non-pairing
67 * add-with-carry instructions we accumulate 32-bit
68 * values into a 64-bit register. At the end of the
69 * loop we fold it down to 32-bits and so on.
70 */
71 prefetch [%o0 + 0x140], #n_reads
721: lduw [%o0 + 0x00], %o5
73 lduw [%o0 + 0x04], %g1
74 lduw [%o0 + 0x08], %g2
75 add %o4, %o5, %o4
76 lduw [%o0 + 0x0c], %g3
77 add %o4, %g1, %o4
78 lduw [%o0 + 0x10], %o5
79 add %o4, %g2, %o4
80 lduw [%o0 + 0x14], %g1
81 add %o4, %g3, %o4
82 lduw [%o0 + 0x18], %g2
83 add %o4, %o5, %o4
84 lduw [%o0 + 0x1c], %g3
85 add %o4, %g1, %o4
86 lduw [%o0 + 0x20], %o5
87 add %o4, %g2, %o4
88 lduw [%o0 + 0x24], %g1
89 add %o4, %g3, %o4
90 lduw [%o0 + 0x28], %g2
91 add %o4, %o5, %o4
92 lduw [%o0 + 0x2c], %g3
93 add %o4, %g1, %o4
94 lduw [%o0 + 0x30], %o5
95 add %o4, %g2, %o4
96 lduw [%o0 + 0x34], %g1
97 add %o4, %g3, %o4
98 lduw [%o0 + 0x38], %g2
99 add %o4, %o5, %o4
100 lduw [%o0 + 0x3c], %g3
101 add %o4, %g1, %o4
102 prefetch [%o0 + 0x180], #n_reads
103 add %o4, %g2, %o4
104 subcc %o3, 0x40, %o3
105 add %o0, 0x40, %o0
106 bne,pt %icc, 1b
107 add %o4, %g3, %o4
108
1092: and %o1, 0x3c, %o3
110 brz,pn %o3, 2f
111 sub %o1, %o3, %o1
1121: lduw [%o0 + 0x00], %o5
113 subcc %o3, 0x4, %o3
114 add %o0, 0x4, %o0
115 bne,pt %icc, 1b
116 add %o4, %o5, %o4
117
1182:
119 /* fold 64-->32 */
120 srlx %o4, 32, %o5
121 srl %o4, 0, %o4
122 add %o4, %o5, %o4
123 srlx %o4, 32, %o5
124 srl %o4, 0, %o4
125 add %o4, %o5, %o4
126
127 /* fold 32-->16 */
128 sethi %hi(0xffff0000), %g1
129 srl %o4, 16, %o5
130 andn %o4, %g1, %g2
131 add %o5, %g2, %o4
132 srl %o4, 16, %o5
133 andn %o4, %g1, %g2
134 add %o5, %g2, %o4
135
136csum_partial_end_cruft:
137 /* %o4 has the 16-bit sum we have calculated so-far. */
138 cmp %o1, 2
139 blu,pt %icc, 1f
140 nop
141 lduh [%o0 + 0x00], %o5
142 sub %o1, 2, %o1
143 add %o0, 2, %o0
144 add %o4, %o5, %o4
1451: brz,pt %o1, 1f
146 nop
147 ldub [%o0 + 0x00], %o5
148 sub %o1, 1, %o1
149 add %o0, 1, %o0
150 sllx %o5, 8, %o5
151 add %o4, %o5, %o4
1521:
153 /* fold 32-->16 */
154 sethi %hi(0xffff0000), %g1
155 srl %o4, 16, %o5
156 andn %o4, %g1, %g2
157 add %o5, %g2, %o4
158 srl %o4, 16, %o5
159 andn %o4, %g1, %g2
160 add %o5, %g2, %o4
161
1621: brz,pt %g7, 1f
163 nop
164
165 /* We started with an odd byte, byte-swap the result. */
166 srl %o4, 8, %o5
167 and %o4, 0xff, %g1
168 sll %g1, 8, %g1
169 or %o5, %g1, %o4
170
ae5de0ff
DM
1711: addcc %o2, %o4, %o2
172 addc %g0, %o2, %o2
1da177e4
LT
173
174csum_partial_finish:
175 retl
ae5de0ff 176 srl %o2, 0, %o0