arch/sh/lib/checksum.S

   1 /* SPDX-License-Identifier: GPL-2.0+
   2  *
   3  * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
   4  *
   5  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   6  *              operating system.  INET is implemented using the  BSD Socket
   7  *              interface as the means of communication with the user level.
   8  *
   9  *              IP/TCP/UDP checksumming routines
  10  *
  11  * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
  12  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  13  *              Tom May, <ftom@netcom.com>
  14  *              Pentium Pro/II routines:
  15  *              Alexander Kjeldaas <astor@guardian.no>
  16  *              Finn Arne Gangstad <finnag@guardian.no>
  17  *              Lots of code moved from tcp.c and ip.c; see those files
  18  *              for more names.
  19  *
  20  * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  21  *                           handling.
  22  *              Andi Kleen,  add zeroing on error
  23  *                   converted to pure assembler
  24  *
  25  * SuperH version:  Copyright (C) 1999  Niibe Yutaka
  26  */
  27
  28 #include <asm/errno.h>
  29 #include <linux/linkage.h>
  30
  31 /*
  32  * computes a partial checksum, e.g. for TCP/UDP fragments
  33  */
  34
  35 /*
  36  * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
  37  */
  38
  39 .text
  40 ENTRY(csum_partial)
  41           /*
  42            * Experiments with Ethernet and SLIP connections show that buff
  43            * is aligned on either a 2-byte or 4-byte boundary.  We get at
  44            * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  45            * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  46            * alignment for the unrolled loop.
  47            */
  48         mov     r4, r0
  49         tst     #3, r0          ! Check alignment.
  50         bt/s    2f              ! Jump if alignment is ok.
  51          mov    r4, r7          ! Keep a copy to check for alignment
  52         !
  53         tst     #1, r0          ! Check alignment.
  54         bt      21f             ! Jump if alignment is boundary of 2bytes.
  55
  56         ! buf is odd
  57         tst     r5, r5
  58         add     #-1, r5
  59         bt      9f
  60         mov.b   @r4+, r0
  61         extu.b  r0, r0
  62         addc    r0, r6          ! t=0 from previous tst
  63         mov     r6, r0
  64         shll8   r6
  65         shlr16  r0
  66         shlr8   r0
  67         or      r0, r6
  68         mov     r4, r0
  69         tst     #2, r0
  70         bt      2f
  71 21:
  72         ! buf is 2 byte aligned (len could be 0)
  73         add     #-2, r5         ! Alignment uses up two bytes.
  74         cmp/pz  r5              !
  75         bt/s    1f              ! Jump if we had at least two bytes.
  76          clrt
  77         bra     6f
  78          add    #2, r5          ! r5 was < 2.  Deal with it.
  79 1:
  80         mov.w   @r4+, r0
  81         extu.w  r0, r0
  82         addc    r0, r6
  83         bf      2f
  84         add     #1, r6
  85 2:
  86         ! buf is 4 byte aligned (len could be 0)
  87         mov     r5, r1
  88         mov     #-5, r0
  89         shld    r0, r1
  90         tst     r1, r1
  91         bt/s    4f              ! if it's =0, go to 4f
  92          clrt
  93         .align  2
  94 3:
  95         mov.l   @r4+, r0
  96         mov.l   @r4+, r2
  97         mov.l   @r4+, r3
  98         addc    r0, r6
  99         mov.l   @r4+, r0
 100         addc    r2, r6
 101         mov.l   @r4+, r2
 102         addc    r3, r6
 103         mov.l   @r4+, r3
 104         addc    r0, r6
 105         mov.l   @r4+, r0
 106         addc    r2, r6
 107         mov.l   @r4+, r2
 108         addc    r3, r6
 109         addc    r0, r6
 110         addc    r2, r6
 111         movt    r0
 112         dt      r1
 113         bf/s    3b
 114          cmp/eq #1, r0
 115         ! here, we know r1==0
 116         addc    r1, r6                  ! add carry to r6
 117 4:
 118         mov     r5, r0
 119         and     #0x1c, r0
 120         tst     r0, r0
 121         bt      6f
 122         ! 4 bytes or more remaining
 123         mov     r0, r1
 124         shlr2   r1
 125         mov     #0, r2
 126 5:
 127         addc    r2, r6
 128         mov.l   @r4+, r2
 129         movt    r0
 130         dt      r1
 131         bf/s    5b
 132          cmp/eq #1, r0
 133         addc    r2, r6
 134         addc    r1, r6          ! r1==0 here, so it means add carry-bit
 135 6:
 136         ! 3 bytes or less remaining
 137         mov     #3, r0
 138         and     r0, r5
 139         tst     r5, r5
 140         bt      9f              ! if it's =0 go to 9f
 141         mov     #2, r1
 142         cmp/hs  r1, r5
 143         bf      7f
 144         mov.w   @r4+, r0
 145         extu.w  r0, r0
 146         cmp/eq  r1, r5
 147         bt/s    8f
 148          clrt
 149         shll16  r0
 150         addc    r0, r6
 151 7:
 152         mov.b   @r4+, r0
 153         extu.b  r0, r0
 154 #ifndef __LITTLE_ENDIAN__
 155         shll8   r0
 156 #endif
 157 8:
 158         addc    r0, r6
 159         mov     #0, r0
 160         addc    r0, r6
 161 9:
 162         ! Check if the buffer was misaligned, if so realign sum
 163         mov     r7, r0
 164         tst     #1, r0
 165         bt      10f
 166         mov     r6, r0
 167         shll8   r6
 168         shlr16  r0
 169         shlr8   r0
 170         or      r0, r6
 171 10:
 172         rts
 173          mov    r6, r0
 174
 175 /*
 176 unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
 177                                         int sum, int *src_err_ptr, int *dst_err_ptr)
 178  */
 179
 180 /*
 181  * Copy from ds while checksumming, otherwise like csum_partial
 182  *
 183  * The macros SRC and DST specify the type of access for the instruction.
 184  * thus we can call a custom exception handler for all access types.
 185  *
 186  * FIXME: could someone double-check whether I haven't mixed up some SRC and
 187  *        DST definitions? It's damn hard to trigger all cases.  I hope I got
 188  *        them all but there's no guarantee.
 189  */
 190
 191 #define SRC(...)                        \
 192         9999: __VA_ARGS__ ;             \
 193         .section __ex_table, "a";       \
 194         .long 9999b, 6001f      ;       \
 195         .previous
 196
 197 #define DST(...)                        \
 198         9999: __VA_ARGS__ ;             \
 199         .section __ex_table, "a";       \
 200         .long 9999b, 6002f      ;       \
 201         .previous
 202
 203 !
 204 ! r4:   const char *SRC
 205 ! r5:   char *DST
 206 ! r6:   int LEN
 207 ! r7:   int SUM
 208 !
 209 ! on stack:
 210 ! int *SRC_ERR_PTR
 211 ! int *DST_ERR_PTR
 212 !
 213 ENTRY(csum_partial_copy_generic)
 214         mov.l   r5,@-r15
 215         mov.l   r6,@-r15
 216
 217         mov     #3,r0           ! Check src and dest are equally aligned
 218         mov     r4,r1
 219         and     r0,r1
 220         and     r5,r0
 221         cmp/eq  r1,r0
 222         bf      3f              ! Different alignments, use slow version
 223         tst     #1,r0           ! Check dest word aligned
 224         bf      3f              ! If not, do it the slow way
 225
 226         mov     #2,r0
 227         tst     r0,r5           ! Check dest alignment.
 228         bt      2f              ! Jump if alignment is ok.
 229         add     #-2,r6          ! Alignment uses up two bytes.
 230         cmp/pz  r6              ! Jump if we had at least two bytes.
 231         bt/s    1f
 232          clrt
 233         add     #2,r6           ! r6 was < 2.   Deal with it.
 234         bra     4f
 235          mov    r6,r2
 236
 237 3:      ! Handle different src and dest alignments.
 238         ! This is not common, so simple byte by byte copy will do.
 239         mov     r6,r2
 240         shlr    r6
 241         tst     r6,r6
 242         bt      4f
 243         clrt
 244         .align  2
 245 5:
 246 SRC(    mov.b   @r4+,r1         )
 247 SRC(    mov.b   @r4+,r0         )
 248         extu.b  r1,r1
 249 DST(    mov.b   r1,@r5          )
 250 DST(    mov.b   r0,@(1,r5)      )
 251         extu.b  r0,r0
 252         add     #2,r5
 253
 254 #ifdef  __LITTLE_ENDIAN__
 255         shll8   r0
 256 #else
 257         shll8   r1
 258 #endif
 259         or      r1,r0
 260
 261         addc    r0,r7
 262         movt    r0
 263         dt      r6
 264         bf/s    5b
 265          cmp/eq #1,r0
 266         mov     #0,r0
 267         addc    r0, r7
 268
 269         mov     r2, r0
 270         tst     #1, r0
 271         bt      7f
 272         bra     5f
 273          clrt
 274
 275         ! src and dest equally aligned, but to a two byte boundary.
 276         ! Handle first two bytes as a special case
 277         .align  2
 278 1:
 279 SRC(    mov.w   @r4+,r0         )
 280 DST(    mov.w   r0,@r5          )
 281         add     #2,r5
 282         extu.w  r0,r0
 283         addc    r0,r7
 284         mov     #0,r0
 285         addc    r0,r7
 286 2:
 287         mov     r6,r2
 288         mov     #-5,r0
 289         shld    r0,r6
 290         tst     r6,r6
 291         bt/s    2f
 292          clrt
 293         .align  2
 294 1:
 295 SRC(    mov.l   @r4+,r0         )
 296 SRC(    mov.l   @r4+,r1         )
 297         addc    r0,r7
 298 DST(    mov.l   r0,@r5          )
 299 DST(    mov.l   r1,@(4,r5)      )
 300         addc    r1,r7
 301
 302 SRC(    mov.l   @r4+,r0         )
 303 SRC(    mov.l   @r4+,r1         )
 304         addc    r0,r7
 305 DST(    mov.l   r0,@(8,r5)      )
 306 DST(    mov.l   r1,@(12,r5)     )
 307         addc    r1,r7
 308
 309 SRC(    mov.l   @r4+,r0         )
 310 SRC(    mov.l   @r4+,r1         )
 311         addc    r0,r7
 312 DST(    mov.l   r0,@(16,r5)     )
 313 DST(    mov.l   r1,@(20,r5)     )
 314         addc    r1,r7
 315
 316 SRC(    mov.l   @r4+,r0         )
 317 SRC(    mov.l   @r4+,r1         )
 318         addc    r0,r7
 319 DST(    mov.l   r0,@(24,r5)     )
 320 DST(    mov.l   r1,@(28,r5)     )
 321         addc    r1,r7
 322         add     #32,r5
 323         movt    r0
 324         dt      r6
 325         bf/s    1b
 326          cmp/eq #1,r0
 327         mov     #0,r0
 328         addc    r0,r7
 329
 330 2:      mov     r2,r6
 331         mov     #0x1c,r0
 332         and     r0,r6
 333         cmp/pl  r6
 334         bf/s    4f
 335          clrt
 336         shlr2   r6
 337 3:
 338 SRC(    mov.l   @r4+,r0 )
 339         addc    r0,r7
 340 DST(    mov.l   r0,@r5  )
 341         add     #4,r5
 342         movt    r0
 343         dt      r6
 344         bf/s    3b
 345          cmp/eq #1,r0
 346         mov     #0,r0
 347         addc    r0,r7
 348 4:      mov     r2,r6
 349         mov     #3,r0
 350         and     r0,r6
 351         cmp/pl  r6
 352         bf      7f
 353         mov     #2,r1
 354         cmp/hs  r1,r6
 355         bf      5f
 356 SRC(    mov.w   @r4+,r0 )
 357 DST(    mov.w   r0,@r5  )
 358         extu.w  r0,r0
 359         add     #2,r5
 360         cmp/eq  r1,r6
 361         bt/s    6f
 362          clrt
 363         shll16  r0
 364         addc    r0,r7
 365 5:
 366 SRC(    mov.b   @r4+,r0 )
 367 DST(    mov.b   r0,@r5  )
 368         extu.b  r0,r0
 369 #ifndef __LITTLE_ENDIAN__
 370         shll8   r0
 371 #endif
 372 6:      addc    r0,r7
 373         mov     #0,r0
 374         addc    r0,r7
 375 7:
 376 5000:
 377
 378 # Exception handler:
 379 .section .fixup, "ax"
 380
 381 6001:
 382         mov.l   @(8,r15),r0                     ! src_err_ptr
 383         mov     #-EFAULT,r1
 384         mov.l   r1,@r0
 385
 386         ! zero the complete destination - computing the rest
 387         ! is too much work
 388         mov.l   @(4,r15),r5             ! dst
 389         mov.l   @r15,r6                 ! len
 390         mov     #0,r7
 391 1:      mov.b   r7,@r5
 392         dt      r6
 393         bf/s    1b
 394          add    #1,r5
 395         mov.l   8000f,r0
 396         jmp     @r0
 397          nop
 398         .align  2
 399 8000:   .long   5000b
 400
 401 6002:
 402         mov.l   @(12,r15),r0                    ! dst_err_ptr
 403         mov     #-EFAULT,r1
 404         mov.l   r1,@r0
 405         mov.l   8001f,r0
 406         jmp     @r0
 407          nop
 408         .align  2
 409 8001:   .long   5000b
 410
 411 .previous
 412         add     #8,r15
 413         rts
 414          mov    r7,r0