]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / isa-l / crc / aarch64 / crc32_gzip_refl_hw_fold.S
diff --git a/ceph/src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S b/ceph/src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S
new file mode 100644 (file)
index 0000000..98cf129
--- /dev/null
@@ -0,0 +1,176 @@
+########################################################################
+#  Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Arm Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+       .arch armv8-a+crc+crypto
+       .text
+       .align  3
+       .global crc32_gzip_refl_hw_fold
+       .type   crc32_gzip_refl_hw_fold, %function
+
+/* uint32_t crc32_gzip_refl_hw_fold(uint32_t seed, const unsigned char *buf, uint64_t len) */
+
+w_seed         .req    w0
+w_crc          .req    w0
+x_buf          .req    x1
+x_len          .req    x2
+
+x_buf_loop_end .req    x10
+x_buf_iter     .req    x10
+
+x_tmp          .req    x15
+w_tmp          .req    w15
+
+d_c0           .req    d3
+d_c1           .req    d1
+v_c0           .req    v3
+v_c1           .req    v1
+crc32_gzip_refl_hw_fold:
+       mvn     w_seed, w_seed
+       cmp     x_len, 1023
+       mov     x_buf_iter, x_buf
+       bls     .loop_fold_end
+
+       sub     x_buf_loop_end, x_len, #1024
+       and     x_buf_loop_end, x_buf_loop_end, -1024
+       add     x_buf_loop_end, x_buf_loop_end, 1024
+       add     x_buf_loop_end, x_buf, x_buf_loop_end
+
+       mov     x_tmp, 0x819b
+       movk    x_tmp, 0xb486, lsl 16
+       fmov    d_c0, x_tmp
+
+       mov     x_tmp, 0x8617
+       movk    x_tmp, 0x7627, lsl 16
+       fmov    d_c1, x_tmp
+
+x_in64         .req    x3
+w_crc0         .req    w0
+w_crc1         .req    w4
+w_crc2         .req    w5
+
+d_crc0         .req    d4
+d_crc1         .req    d5
+v_crc0         .req    v4
+v_crc1         .req    v5
+       .align 3
+.loop_fold:
+       add     x9, x_buf, 336
+       mov     x_in64, x_buf
+       mov     w_crc1, 0
+       mov     w_crc2, 0
+
+       .align 3
+.loop_for:
+       ldr     x8, [x_in64]
+       ldr     x7, [x_in64, 336]
+       ldr     x6, [x_in64, 672]
+
+       add     x_in64, x_in64, 8
+       cmp     x_in64, x9
+
+       crc32x  w_crc0, w_crc0, x8
+       crc32x  w_crc1, w_crc1, x7
+       crc32x  w_crc2, w_crc2, x6
+       bne     .loop_for
+
+       uxtw    x_tmp, w_crc0
+       fmov    d_crc0, x_tmp
+       pmull   v_crc0.1q, v_crc0.1d, v_c0.1d
+
+       uxtw    x_tmp, w_crc1
+       fmov    d_crc1, x_tmp
+       pmull   v_crc1.1q, v_crc1.1d, v_c1.1d
+
+       ldr     x_tmp, [x_buf, 1008]
+       crc32x  w_crc2, w_crc2, x_tmp
+
+       fmov    x_tmp, d_crc0
+       crc32x  w_crc0, wzr, x_tmp
+
+       fmov    x_tmp, d_crc1
+       crc32x  w_crc1, wzr, x_tmp
+
+       eor     w_crc0, w_crc0, w_crc1
+       eor     w_crc0, w_crc0, w_crc2
+
+       ldr     x_tmp, [x_buf, 1016]
+       crc32x  w_crc0, w_crc0, x_tmp
+
+       add     x_buf, x_buf, 1024
+       cmp     x_buf_loop_end, x_buf
+       bne     .loop_fold
+
+       and     x_len, x_len, 1023
+
+x_buf_loop_size8_end   .req    x3
+.loop_fold_end:
+       cmp     x_len, 7
+       bls     .size_4
+
+       sub     x_buf_loop_size8_end, x_len, #8
+       and     x_buf_loop_size8_end, x_buf_loop_size8_end, -8
+       add     x_buf_loop_size8_end, x_buf_loop_size8_end, 8
+       add     x_buf_loop_size8_end, x_buf_iter, x_buf_loop_size8_end
+
+       .align 3
+.loop_size_8:
+       ldr     x_tmp, [x_buf_iter], 8
+       crc32x  w_crc, w_crc, x_tmp
+
+       cmp     x_buf_iter, x_buf_loop_size8_end
+       bne     .loop_size_8
+
+       and     x_len, x_len, 7
+.size_4:
+       cmp     x_len, 3
+       bls     .size_2
+
+       ldr     w_tmp, [x_buf_iter], 4
+       crc32w  w_crc, w_crc, w_tmp
+
+       sub     x_len, x_len, #4
+.size_2:
+       cmp     x_len, 1
+       bls     .size_1
+
+       ldrh    w_tmp, [x_buf_iter], 2
+       crc32h  w_crc, w_crc, w_tmp
+
+       sub     x_len, x_len, #2
+.size_1:
+       cbz     x_len, .done
+
+       ldrb    w_tmp, [x_buf_iter]
+       crc32b  w_crc, w_crc, w_tmp
+
+.done:
+       mvn     w_crc, w_crc
+       ret
+
+       .size   crc32_gzip_refl_hw_fold, .-crc32_gzip_refl_hw_fold