update ceph source to reef 18.1.2

[ceph.git] / ceph / src / crypto / isa-l / isa-l_crypto / sha256_mb / sha256_mb_x4_avx.asm
diff --git a/ceph/src/crypto/isa-l/isa-l_crypto/sha256_mb/sha256_mb_x4_avx.asm b/ceph/src/crypto/isa-l/isa-l_crypto/sha256_mb/sha256_mb_x4_avx.asm

index 5539d328fbf0adf4fbcd604fb3cdf2961ebb313e..7f8f8829bc49d6e8e9dd9d9e05386b2b4423f6bc 100644 (file)
--- a/ceph/src/crypto/isa-l/isa-l_crypto/sha256_mb/sha256_mb_x4_avx.asm
+++ b/ceph/src/crypto/isa-l/isa-l_crypto/sha256_mb/sha256_mb_x4_avx.asm
@@ -2,7 +2,7 @@
  ;  Copyright(c) 2011-2016 Intel Corporation All rights reserved.
  ;
  ;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions 
+;  modification, are permitted provided that the following conditions
  ;  are met:
  ;    * Redistributions of source code must retain the above copyright
  ;      notice, this list of conditions and the following disclaimer.
@@ -30,7 +30,9 @@
  %include "sha256_mb_mgr_datastruct.asm"
  %include "reg_sizes.asm"
  
+[bits 64]
  default rel
+section .text
  
  ;; code to compute quad SHA256 using AVX
  ;; Logic designed/laid out by JDG
@@ -48,7 +50,7 @@ default rel
  ; r1 = {d1 c1 b1 a1}
  ; r0 = {d2 c2 b2 a2}
  ; r3 = {d3 c3 b3 a3}
-; 
+;
  %macro TRANSPOSE 6
  %define %%r0 %1
  %define %%r1 %2
@@ -68,7 +70,7 @@ default rel
  
         vshufps %%r0, %%r0, %%r2, 0x88  ; r0 = {d2 c2 b2 a2}
         vshufps %%t0, %%t0, %%t1, 0x88  ; t0 = {d0 c0 b0 a0}
-%endmacro      
+%endmacro
  
  
  %define TABLE  K256_4_MB
@@ -217,7 +219,7 @@ default rel
  ; ALIGNMENT makes FRAMESZ + pushes an odd multiple of 8
  %define FRAMESZ (DATA + DIGEST_SIZE + ALIGNMENT)
  %define _DIGEST (DATA)
-       
+
  %define VMOVPS vmovups
  
  %define inp0 r8
@@ -225,7 +227,7 @@ default rel
  %define inp2 r10
  %define inp3 r11
  
-%ifidn __OUTPUT_FORMAT__, elf64 
+%ifidn __OUTPUT_FORMAT__, elf64
   ; Linux definitions
   %define arg1  rdi
   %define arg2  rsi
@@ -240,15 +242,16 @@ default rel
  %define ROUND  rbx
  %define TBL    r12
  
-;; void sha256_mb_x4_avx(SHA256_MB_ARGS_X8 *args, uint64_t len); 
+;; void sha256_mb_x4_avx(SHA256_MB_ARGS_X8 *args, uint64_t len);
  ;; arg 1 : arg1 : pointer args (only 4 of the 8 lanes used)
  ;; arg 2 : arg2 : size of data in blocks (assumed >= 1)
  ;;
  ;; Clobbers registers: arg2, rax, rbx, r8-r12, xmm0-xmm15
  ;;
-global sha256_mb_x4_avx:function internal
+mk_global sha256_mb_x4_avx, function, internal
  align 32
  sha256_mb_x4_avx:
+       endbranch
         sub     rsp, FRAMESZ
  
         ;; Initialize digests
@@ -262,7 +265,7 @@ sha256_mb_x4_avx:
         vmovdqa h,[arg1+7*SZ4]
  
         lea     TBL,[TABLE]
-       
+
         ;; transpose input onto stack
         mov     inp0,[arg1 + _data_ptr + 0*8]
         mov     inp1,[arg1 + _data_ptr + 1*8]
@@ -295,15 +298,15 @@ lloop:
         vpshufb TT1, TT1, TMP
         vpshufb TT2, TT2, TMP
         vpshufb TT3, TT3, TMP
-       ROUND_00_15     TT0,(i*4+0) 
-       ROUND_00_15     TT1,(i*4+1) 
-       ROUND_00_15     TT2,(i*4+2) 
-       ROUND_00_15     TT3,(i*4+3) 
+       ROUND_00_15     TT0,(i*4+0)
+       ROUND_00_15     TT1,(i*4+1)
+       ROUND_00_15     TT2,(i*4+2)
+       ROUND_00_15     TT3,(i*4+3)
  %assign i (i+1)
  %endrep
         add     IDX, 4*4*4
  
-       
+
  %assign i (i*4)
  
         jmp     Lrounds_16_xx
@@ -340,7 +343,7 @@ Lrounds_16_xx:
         vmovdqa [arg1+5*SZ4],f
         vmovdqa [arg1+6*SZ4],g
         vmovdqa [arg1+7*SZ4],h
-       
+
         ; update input pointers
         add     inp0, IDX
         mov     [arg1 + _data_ptr + 0*8], inp0
@@ -353,7 +356,7 @@ Lrounds_16_xx:
  
         ;;;;;;;;;;;;;;;;
         ;; Postamble
-       
+
         add     rsp, FRAMESZ
         ret