arch/sh/lib/checksum.S

   1 /* SPDX-License-Identifier: GPL-2.0+
   2  *
   3  * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
   4  *
   5  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   6  *              operating system.  INET is implemented using the  BSD Socket
   7  *              interface as the means of communication with the user level.
   8  *
   9  *              IP/TCP/UDP checksumming routines
  10  *
  11  * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
  12  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  13  *              Tom May, <ftom@netcom.com>
  14  *              Pentium Pro/II routines:
  15  *              Alexander Kjeldaas <astor@guardian.no>
  16  *              Finn Arne Gangstad <finnag@guardian.no>
  17  *              Lots of code moved from tcp.c and ip.c; see those files
  18  *              for more names.
  19  *
  20  * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  21  *                           handling.
  22  *              Andi Kleen,  add zeroing on error
  23  *                   converted to pure assembler
  24  *
  25  * SuperH version:  Copyright (C) 1999  Niibe Yutaka
  26  */
  27
  28 #include <asm/errno.h>
  29 #include <linux/linkage.h>
  30
  31 /*
  32  * computes a partial checksum, e.g. for TCP/UDP fragments
  33  */
  34
  35 /*
  36  * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
  37  */
  38
  39 .text
  40 ENTRY(csum_partial)
  41           /*
  42            * Experiments with Ethernet and SLIP connections show that buff
  43            * is aligned on either a 2-byte or 4-byte boundary.  We get at
  44            * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  45            * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  46            * alignment for the unrolled loop.
  47            */
  48         mov     r4, r0
  49         tst     #3, r0          ! Check alignment.
  50         bt/s    2f              ! Jump if alignment is ok.
  51          mov    r4, r7          ! Keep a copy to check for alignment
  52         !
  53         tst     #1, r0          ! Check alignment.
  54         bt      21f             ! Jump if alignment is boundary of 2bytes.
  55
  56         ! buf is odd
  57         tst     r5, r5
  58         add     #-1, r5
  59         bt      9f
  60         mov.b   @r4+, r0
  61         extu.b  r0, r0
  62         addc    r0, r6          ! t=0 from previous tst
  63         mov     r6, r0
  64         shll8   r6
  65         shlr16  r0
  66         shlr8   r0
  67         or      r0, r6
  68         mov     r4, r0
  69         tst     #2, r0
  70         bt      2f
  71 21:
  72         ! buf is 2 byte aligned (len could be 0)
  73         add     #-2, r5         ! Alignment uses up two bytes.
  74         cmp/pz  r5              !
  75         bt/s    1f              ! Jump if we had at least two bytes.
  76          clrt
  77         bra     6f
  78          add    #2, r5          ! r5 was < 2.  Deal with it.
  79 1:
  80         mov.w   @r4+, r0
  81         extu.w  r0, r0
  82         addc    r0, r6
  83         bf      2f
  84         add     #1, r6
  85 2:
  86         ! buf is 4 byte aligned (len could be 0)
  87         mov     r5, r1
  88         mov     #-5, r0
  89         shld    r0, r1
  90         tst     r1, r1
  91         bt/s    4f              ! if it's =0, go to 4f
  92          clrt
  93         .align  2
  94 3:
  95         mov.l   @r4+, r0
  96         mov.l   @r4+, r2
  97         mov.l   @r4+, r3
  98         addc    r0, r6
  99         mov.l   @r4+, r0
 100         addc    r2, r6
 101         mov.l   @r4+, r2
 102         addc    r3, r6
 103         mov.l   @r4+, r3
 104         addc    r0, r6
 105         mov.l   @r4+, r0
 106         addc    r2, r6
 107         mov.l   @r4+, r2
 108         addc    r3, r6
 109         addc    r0, r6
 110         addc    r2, r6
 111         movt    r0
 112         dt      r1
 113         bf/s    3b
 114          cmp/eq #1, r0
 115         ! here, we know r1==0
 116         addc    r1, r6                  ! add carry to r6
 117 4:
 118         mov     r5, r0
 119         and     #0x1c, r0
 120         tst     r0, r0
 121         bt      6f
 122         ! 4 bytes or more remaining
 123         mov     r0, r1
 124         shlr2   r1
 125         mov     #0, r2
 126 5:
 127         addc    r2, r6
 128         mov.l   @r4+, r2
 129         movt    r0
 130         dt      r1
 131         bf/s    5b
 132          cmp/eq #1, r0
 133         addc    r2, r6
 134         addc    r1, r6          ! r1==0 here, so it means add carry-bit
 135 6:
 136         ! 3 bytes or less remaining
 137         mov     #3, r0
 138         and     r0, r5
 139         tst     r5, r5
 140         bt      9f              ! if it's =0 go to 9f
 141         mov     #2, r1
 142         cmp/hs  r1, r5
 143         bf      7f
 144         mov.w   @r4+, r0
 145         extu.w  r0, r0
 146         cmp/eq  r1, r5
 147         bt/s    8f
 148          clrt
 149         shll16  r0
 150         addc    r0, r6
 151 7:
 152         mov.b   @r4+, r0
 153         extu.b  r0, r0
 154 #ifndef __LITTLE_ENDIAN__
 155         shll8   r0
 156 #endif
 157 8:
 158         addc    r0, r6
 159         mov     #0, r0
 160         addc    r0, r6
 161 9:
 162         ! Check if the buffer was misaligned, if so realign sum
 163         mov     r7, r0
 164         tst     #1, r0
 165         bt      10f
 166         mov     r6, r0
 167         shll8   r6
 168         shlr16  r0
 169         shlr8   r0
 170         or      r0, r6
 171 10:
 172         rts
 173          mov    r6, r0
 174
 175 /*
 176 unsigned int csum_partial_copy_generic (const char *src, char *dst, int len)
 177  */
 178
 179 /*
 180  * Copy from ds while checksumming, otherwise like csum_partial with initial
 181  * sum being ~0U
 182  */
 183
 184 #define EXC(...)                        \
 185         9999: __VA_ARGS__ ;             \
 186         .section __ex_table, "a";       \
 187         .long 9999b, 6001f      ;       \
 188         .previous
 189
 190 !
 191 ! r4:   const char *SRC
 192 ! r5:   char *DST
 193 ! r6:   int LEN
 194 !
 195 ENTRY(csum_partial_copy_generic)
 196         mov     #-1,r7
 197         mov     #3,r0           ! Check src and dest are equally aligned
 198         mov     r4,r1
 199         and     r0,r1
 200         and     r5,r0
 201         cmp/eq  r1,r0
 202         bf      3f              ! Different alignments, use slow version
 203         tst     #1,r0           ! Check dest word aligned
 204         bf      3f              ! If not, do it the slow way
 205
 206         mov     #2,r0
 207         tst     r0,r5           ! Check dest alignment.
 208         bt      2f              ! Jump if alignment is ok.
 209         add     #-2,r6          ! Alignment uses up two bytes.
 210         cmp/pz  r6              ! Jump if we had at least two bytes.
 211         bt/s    1f
 212          clrt
 213         add     #2,r6           ! r6 was < 2.   Deal with it.
 214         bra     4f
 215          mov    r6,r2
 216
 217 3:      ! Handle different src and dest alignments.
 218         ! This is not common, so simple byte by byte copy will do.
 219         mov     r6,r2
 220         shlr    r6
 221         tst     r6,r6
 222         bt      4f
 223         clrt
 224         .align  2
 225 5:
 226 EXC(    mov.b   @r4+,r1         )
 227 EXC(    mov.b   @r4+,r0         )
 228         extu.b  r1,r1
 229 EXC(    mov.b   r1,@r5          )
 230 EXC(    mov.b   r0,@(1,r5)      )
 231         extu.b  r0,r0
 232         add     #2,r5
 233
 234 #ifdef  __LITTLE_ENDIAN__
 235         shll8   r0
 236 #else
 237         shll8   r1
 238 #endif
 239         or      r1,r0
 240
 241         addc    r0,r7
 242         movt    r0
 243         dt      r6
 244         bf/s    5b
 245          cmp/eq #1,r0
 246         mov     #0,r0
 247         addc    r0, r7
 248
 249         mov     r2, r0
 250         tst     #1, r0
 251         bt      7f
 252         bra     5f
 253          clrt
 254
 255         ! src and dest equally aligned, but to a two byte boundary.
 256         ! Handle first two bytes as a special case
 257         .align  2
 258 1:
 259 EXC(    mov.w   @r4+,r0         )
 260 EXC(    mov.w   r0,@r5          )
 261         add     #2,r5
 262         extu.w  r0,r0
 263         addc    r0,r7
 264         mov     #0,r0
 265         addc    r0,r7
 266 2:
 267         mov     r6,r2
 268         mov     #-5,r0
 269         shld    r0,r6
 270         tst     r6,r6
 271         bt/s    2f
 272          clrt
 273         .align  2
 274 1:
 275 EXC(    mov.l   @r4+,r0         )
 276 EXC(    mov.l   @r4+,r1         )
 277         addc    r0,r7
 278 EXC(    mov.l   r0,@r5          )
 279 EXC(    mov.l   r1,@(4,r5)      )
 280         addc    r1,r7
 281
 282 EXC(    mov.l   @r4+,r0         )
 283 EXC(    mov.l   @r4+,r1         )
 284         addc    r0,r7
 285 EXC(    mov.l   r0,@(8,r5)      )
 286 EXC(    mov.l   r1,@(12,r5)     )
 287         addc    r1,r7
 288
 289 EXC(    mov.l   @r4+,r0         )
 290 EXC(    mov.l   @r4+,r1         )
 291         addc    r0,r7
 292 EXC(    mov.l   r0,@(16,r5)     )
 293 EXC(    mov.l   r1,@(20,r5)     )
 294         addc    r1,r7
 295
 296 EXC(    mov.l   @r4+,r0         )
 297 EXC(    mov.l   @r4+,r1         )
 298         addc    r0,r7
 299 EXC(    mov.l   r0,@(24,r5)     )
 300 EXC(    mov.l   r1,@(28,r5)     )
 301         addc    r1,r7
 302         add     #32,r5
 303         movt    r0
 304         dt      r6
 305         bf/s    1b
 306          cmp/eq #1,r0
 307         mov     #0,r0
 308         addc    r0,r7
 309
 310 2:      mov     r2,r6
 311         mov     #0x1c,r0
 312         and     r0,r6
 313         cmp/pl  r6
 314         bf/s    4f
 315          clrt
 316         shlr2   r6
 317 3:
 318 EXC(    mov.l   @r4+,r0 )
 319         addc    r0,r7
 320 EXC(    mov.l   r0,@r5  )
 321         add     #4,r5
 322         movt    r0
 323         dt      r6
 324         bf/s    3b
 325          cmp/eq #1,r0
 326         mov     #0,r0
 327         addc    r0,r7
 328 4:      mov     r2,r6
 329         mov     #3,r0
 330         and     r0,r6
 331         cmp/pl  r6
 332         bf      7f
 333         mov     #2,r1
 334         cmp/hs  r1,r6
 335         bf      5f
 336 EXC(    mov.w   @r4+,r0 )
 337 EXC(    mov.w   r0,@r5  )
 338         extu.w  r0,r0
 339         add     #2,r5
 340         cmp/eq  r1,r6
 341         bt/s    6f
 342          clrt
 343         shll16  r0
 344         addc    r0,r7
 345 5:
 346 EXC(    mov.b   @r4+,r0 )
 347 EXC(    mov.b   r0,@r5  )
 348         extu.b  r0,r0
 349 #ifndef __LITTLE_ENDIAN__
 350         shll8   r0
 351 #endif
 352 6:      addc    r0,r7
 353         mov     #0,r0
 354         addc    r0,r7
 355 7:
 356
 357 # Exception handler:
 358 .section .fixup, "ax"
 359
 360 6001:
 361         rts
 362          mov    #0,r0
 363 .previous
 364         rts
 365          mov    r7,r0