]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - arch/ia64/lib/ip_fast_csum.S
License cleanup: add SPDX GPL-2.0 license identifier to files with no license
[mirror_ubuntu-bionic-kernel.git] / arch / ia64 / lib / ip_fast_csum.S
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * Optmized version of the ip_fast_csum() function
4 * Used for calculating IP header checksum
5 *
6 * Return: 16bit checksum, complemented
7 *
8 * Inputs:
9 * in0: address of buffer to checksum (char *)
10 * in1: length of the buffer (int)
11 *
12 * Copyright (C) 2002, 2006 Intel Corp.
13 * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
14 */
15
16 #include <asm/asmmacro.h>
17 #include <asm/export.h>
18
19 /*
20 * Since we know that most likely this function is called with buf aligned
21 * on 4-byte boundary and 20 bytes in length, we can execution rather quickly
22 * versus calling generic version of do_csum, which has lots of overhead in
23 * handling various alignments and sizes. However, due to lack of constrains
24 * put on the function input argument, cases with alignment not on 4-byte or
25 * size not equal to 20 bytes will be handled by the generic do_csum function.
26 */
27
28 #define in0 r32
29 #define in1 r33
30 #define in2 r34
31 #define in3 r35
32 #define in4 r36
33 #define ret0 r8
34
35 GLOBAL_ENTRY(ip_fast_csum)
36 .prologue
37 .body
38 cmp.ne p6,p7=5,in1 // size other than 20 byte?
39 and r14=3,in0 // is it aligned on 4-byte?
40 add r15=4,in0 // second source pointer
41 ;;
42 cmp.ne.or.andcm p6,p7=r14,r0
43 ;;
44 (p7) ld4 r20=[in0],8
45 (p7) ld4 r21=[r15],8
46 (p6) br.spnt .generic
47 ;;
48 ld4 r22=[in0],8
49 ld4 r23=[r15],8
50 ;;
51 ld4 r24=[in0]
52 add r20=r20,r21
53 add r22=r22,r23
54 ;;
55 add r20=r20,r22
56 ;;
57 add r20=r20,r24
58 ;;
59 shr.u ret0=r20,16 // now need to add the carry
60 zxt2 r20=r20
61 ;;
62 add r20=ret0,r20
63 ;;
64 shr.u ret0=r20,16 // add carry again
65 zxt2 r20=r20
66 ;;
67 add r20=ret0,r20
68 ;;
69 shr.u ret0=r20,16
70 zxt2 r20=r20
71 ;;
72 add r20=ret0,r20
73 mov r9=0xffff
74 ;;
75 andcm ret0=r9,r20
76 .restore sp // reset frame state
77 br.ret.sptk.many b0
78 ;;
79
80 .generic:
81 .prologue
82 .save ar.pfs, r35
83 alloc r35=ar.pfs,2,2,2,0
84 .save rp, r34
85 mov r34=b0
86 .body
87 dep.z out1=in1,2,30
88 mov out0=in0
89 ;;
90 br.call.sptk.many b0=do_csum
91 ;;
92 andcm ret0=-1,ret0
93 mov ar.pfs=r35
94 mov b0=r34
95 br.ret.sptk.many b0
96 END(ip_fast_csum)
97 EXPORT_SYMBOL(ip_fast_csum)
98
99 GLOBAL_ENTRY(csum_ipv6_magic)
100 ld4 r20=[in0],4
101 ld4 r21=[in1],4
102 zxt4 in2=in2
103 ;;
104 ld4 r22=[in0],4
105 ld4 r23=[in1],4
106 dep r15=in3,in2,32,16
107 ;;
108 ld4 r24=[in0],4
109 ld4 r25=[in1],4
110 mux1 r15=r15,@rev
111 add r16=r20,r21
112 add r17=r22,r23
113 zxt4 in4=in4
114 ;;
115 ld4 r26=[in0],4
116 ld4 r27=[in1],4
117 shr.u r15=r15,16
118 add r18=r24,r25
119 add r8=r16,r17
120 ;;
121 add r19=r26,r27
122 add r8=r8,r18
123 ;;
124 add r8=r8,r19
125 add r15=r15,in4
126 ;;
127 add r8=r8,r15
128 ;;
129 shr.u r10=r8,32 // now fold sum into short
130 zxt4 r11=r8
131 ;;
132 add r8=r10,r11
133 ;;
134 shr.u r10=r8,16 // yeah, keep it rolling
135 zxt2 r11=r8
136 ;;
137 add r8=r10,r11
138 ;;
139 shr.u r10=r8,16 // three times lucky
140 zxt2 r11=r8
141 ;;
142 add r8=r10,r11
143 mov r9=0xffff
144 ;;
145 andcm r8=r9,r8
146 br.ret.sptk.many b0
147 END(csum_ipv6_magic)
148 EXPORT_SYMBOL(csum_ipv6_magic)