ceph/src/common/crc32c_ppc_fast_zero_asm.S

   1 /*
   2  * Use the fixed point version of Barrett reduction to compute a mod n
   3  * over GF(2) for given n using POWER8 instructions. We use k = 32.
   4  *
   5  * http://en.wikipedia.org/wiki/Barrett_reduction
   6  *
   7  * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
   8  *
   9  * This program is free software; you can redistribute it and/or
  10  * modify it under the terms of either:
  11  *
  12  *  a) the GNU General Public License as published by the Free Software
  13  *     Foundation; either version 2 of the License, or (at your option)
  14  *     any later version, or
  15  *  b) the Apache License, Version 2.0
  16  */
  17 #include <ppc-asm.h>
  18 #include "common/ppc-opcode.h"
  19
  20 #undef toc
  21
  22 #ifndef r1
  23 #define r1 1
  24 #endif
  25
  26 #ifndef r2
  27 #define r2 2
  28 #endif
  29
  30         .section        .data
  31 .balign 16
  32
  33 .barrett_fz_constants:
  34         /* Barrett constant m - (4^32)/n */
  35         .octa 0x0000000000000000000000011f91caf6        /* x^64 div p(x) */
  36         /* Barrett constant n */
  37         .octa 0x0000000000000000000000011edc6f41
  38
  39 .text
  40 /* unsigned int barrett_reduction(unsigned long val) */
  41 FUNC_START(barrett_reduction)
  42         addis   r4,r2,.barrett_fz_constants@toc@ha
  43         addi    r4,r4,.barrett_fz_constants@toc@l
  44
  45         li      r5,16
  46         vxor    v1,v1,v1        /* zero v1 */
  47
  48         /* Get a into v0 */
  49         MTVRD(v0, r3)
  50         vsldoi  v0,v1,v0,8      /* shift into bottom 64 bits, this is a */
  51
  52         /* Load constants */
  53         lvx     v2,0,r4         /* m */
  54         lvx     v3,r5,r4        /* n */
  55
  56         /*
  57          * Now for the actual algorithm. The idea is to calculate q,
  58          * the multiple of our polynomial that we need to subtract. By
  59          * doing the computation 2x bits higher (ie 64 bits) and shifting the
  60          * result back down 2x bits, we round down to the nearest multiple.
  61          */
  62         VPMSUMD(v4,v0,v2)       /* ma */
  63         vsldoi  v4,v1,v4,8      /* q = floor(ma/(2^64)) */
  64         VPMSUMD(v4,v4,v3)       /* qn */
  65         vxor    v0,v0,v4        /* a - qn, subtraction is xor in GF(2) */
  66
  67         /*
  68          * Get the result into r3. We need to shift it left 8 bytes:
  69          * V0 [ 0 1 2 X ]
  70          * V0 [ 0 X 2 3 ]
  71          */
  72         vsldoi  v0,v0,v1,8      /* shift result into top 64 bits of v0 */
  73         MFVRD(r3, v0)
  74
  75         blr
  76 FUNC_END(barrett_reduction)
  77