]>
Commit | Line | Data |
---|---|---|
6ba6c74d AB |
1 | /* |
2 | * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions | |
3 | * | |
4 | * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | */ | |
10 | ||
11 | #include <linux/linkage.h> | |
12 | #include <asm/assembler.h> | |
13 | ||
14 | .text | |
15 | .arch armv8-a+crypto | |
16 | ||
17 | dga .req q20 | |
18 | dgav .req v20 | |
19 | dgb .req q21 | |
20 | dgbv .req v21 | |
21 | ||
22 | t0 .req v22 | |
23 | t1 .req v23 | |
24 | ||
25 | dg0q .req q24 | |
26 | dg0v .req v24 | |
27 | dg1q .req q25 | |
28 | dg1v .req v25 | |
29 | dg2q .req q26 | |
30 | dg2v .req v26 | |
31 | ||
32 | .macro add_only, ev, rc, s0 | |
33 | mov dg2v.16b, dg0v.16b | |
34 | .ifeq \ev | |
35 | add t1.4s, v\s0\().4s, \rc\().4s | |
36 | sha256h dg0q, dg1q, t0.4s | |
37 | sha256h2 dg1q, dg2q, t0.4s | |
38 | .else | |
39 | .ifnb \s0 | |
40 | add t0.4s, v\s0\().4s, \rc\().4s | |
41 | .endif | |
42 | sha256h dg0q, dg1q, t1.4s | |
43 | sha256h2 dg1q, dg2q, t1.4s | |
44 | .endif | |
45 | .endm | |
46 | ||
47 | .macro add_update, ev, rc, s0, s1, s2, s3 | |
48 | sha256su0 v\s0\().4s, v\s1\().4s | |
49 | add_only \ev, \rc, \s1 | |
50 | sha256su1 v\s0\().4s, v\s2\().4s, v\s3\().4s | |
51 | .endm | |
52 | ||
53 | /* | |
54 | * The SHA-256 round constants | |
55 | */ | |
56 | .align 4 | |
57 | .Lsha2_rcon: | |
58 | .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 | |
59 | .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 | |
60 | .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 | |
61 | .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 | |
62 | .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc | |
63 | .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da | |
64 | .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 | |
65 | .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 | |
66 | .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 | |
67 | .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 | |
68 | .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 | |
69 | .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 | |
70 | .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 | |
71 | .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 | |
72 | .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 | |
73 | .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 | |
74 | ||
75 | /* | |
76 | * void sha2_ce_transform(int blocks, u8 const *src, u32 *state, | |
77 | * u8 *head, long bytes) | |
78 | */ | |
79 | ENTRY(sha2_ce_transform) | |
80 | /* load round constants */ | |
81 | adr x8, .Lsha2_rcon | |
82 | ld1 { v0.4s- v3.4s}, [x8], #64 | |
83 | ld1 { v4.4s- v7.4s}, [x8], #64 | |
84 | ld1 { v8.4s-v11.4s}, [x8], #64 | |
85 | ld1 {v12.4s-v15.4s}, [x8] | |
86 | ||
87 | /* load state */ | |
88 | ldp dga, dgb, [x2] | |
89 | ||
90 | /* load partial input (if supplied) */ | |
91 | cbz x3, 0f | |
92 | ld1 {v16.4s-v19.4s}, [x3] | |
93 | b 1f | |
94 | ||
95 | /* load input */ | |
96 | 0: ld1 {v16.4s-v19.4s}, [x1], #64 | |
97 | sub w0, w0, #1 | |
98 | ||
99 | 1: | |
100 | CPU_LE( rev32 v16.16b, v16.16b ) | |
101 | CPU_LE( rev32 v17.16b, v17.16b ) | |
102 | CPU_LE( rev32 v18.16b, v18.16b ) | |
103 | CPU_LE( rev32 v19.16b, v19.16b ) | |
104 | ||
105 | 2: add t0.4s, v16.4s, v0.4s | |
106 | mov dg0v.16b, dgav.16b | |
107 | mov dg1v.16b, dgbv.16b | |
108 | ||
109 | add_update 0, v1, 16, 17, 18, 19 | |
110 | add_update 1, v2, 17, 18, 19, 16 | |
111 | add_update 0, v3, 18, 19, 16, 17 | |
112 | add_update 1, v4, 19, 16, 17, 18 | |
113 | ||
114 | add_update 0, v5, 16, 17, 18, 19 | |
115 | add_update 1, v6, 17, 18, 19, 16 | |
116 | add_update 0, v7, 18, 19, 16, 17 | |
117 | add_update 1, v8, 19, 16, 17, 18 | |
118 | ||
119 | add_update 0, v9, 16, 17, 18, 19 | |
120 | add_update 1, v10, 17, 18, 19, 16 | |
121 | add_update 0, v11, 18, 19, 16, 17 | |
122 | add_update 1, v12, 19, 16, 17, 18 | |
123 | ||
124 | add_only 0, v13, 17 | |
125 | add_only 1, v14, 18 | |
126 | add_only 0, v15, 19 | |
127 | add_only 1 | |
128 | ||
129 | /* update state */ | |
130 | add dgav.4s, dgav.4s, dg0v.4s | |
131 | add dgbv.4s, dgbv.4s, dg1v.4s | |
132 | ||
133 | /* handled all input blocks? */ | |
134 | cbnz w0, 0b | |
135 | ||
136 | /* | |
137 | * Final block: add padding and total bit count. | |
138 | * Skip if we have no total byte count in x4. In that case, the input | |
139 | * size was not a round multiple of the block size, and the padding is | |
140 | * handled by the C code. | |
141 | */ | |
142 | cbz x4, 3f | |
143 | movi v17.2d, #0 | |
144 | mov x8, #0x80000000 | |
145 | movi v18.2d, #0 | |
146 | ror x7, x4, #29 // ror(lsl(x4, 3), 32) | |
147 | fmov d16, x8 | |
148 | mov x4, #0 | |
149 | mov v19.d[0], xzr | |
150 | mov v19.d[1], x7 | |
151 | b 2b | |
152 | ||
153 | /* store new state */ | |
154 | 3: stp dga, dgb, [x2] | |
155 | ret | |
156 | ENDPROC(sha2_ce_transform) |