]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ; Copyright(c) 2011-2016 Intel Corporation All rights reserved. | |
3 | ; | |
4 | ; Redistribution and use in source and binary forms, with or without | |
5 | ; modification, are permitted provided that the following conditions | |
6 | ; are met: | |
7 | ; * Redistributions of source code must retain the above copyright | |
8 | ; notice, this list of conditions and the following disclaimer. | |
9 | ; * Redistributions in binary form must reproduce the above copyright | |
10 | ; notice, this list of conditions and the following disclaimer in | |
11 | ; the documentation and/or other materials provided with the | |
12 | ; distribution. | |
13 | ; * Neither the name of Intel Corporation nor the names of its | |
14 | ; contributors may be used to endorse or promote products derived | |
15 | ; from this software without specific prior written permission. | |
16 | ; | |
17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ||
30 | %include "options.asm" | |
31 | %include "lz0a_const.asm" | |
f91f0fd5 | 32 | %include "stdmac.asm" |
7c673cae FG |
33 | |
34 | ; Macros for doing Huffman Encoding | |
35 | ||
36 | %ifdef LONGER_HUFFTABLE | |
37 | %if (D > 8192) | |
38 | %error History D is larger than 8K, cannot use %LONGER_HUFFTABLE | |
39 | % error | |
40 | %else | |
41 | %define DIST_TABLE_SIZE 8192 | |
42 | %define DECODE_OFFSET 26 | |
43 | %endif | |
44 | %else | |
224ce89b WB |
45 | %define DIST_TABLE_SIZE 2 |
46 | %define DECODE_OFFSET 0 | |
7c673cae FG |
47 | %endif |
48 | ||
49 | %define LEN_TABLE_SIZE 256 | |
50 | %define LIT_TABLE_SIZE 257 | |
51 | ||
224ce89b | 52 | %define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8) |
7c673cae FG |
53 | %define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1) |
54 | %define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3) | |
55 | %define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE) | |
56 | %define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE) | |
57 | %define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2) | |
58 | %define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET) | |
59 | ;; /** @brief Holds the huffman tree used to huffman encode the input stream **/ | |
60 | ;; struct isal_hufftables { | |
61 | ;; // deflate huffman tree header | |
224ce89b | 62 | ;; uint8_t deflate_huff_hdr[ISAL_DEF_MAX_HDR_SIZE]; |
7c673cae FG |
63 | ;; |
64 | ;; //!< Number of whole bytes in deflate_huff_hdr | |
65 | ;; uint32_t deflate_huff_hdr_count; | |
66 | ;; | |
67 | ;; //!< Number of bits in the partial byte in header | |
68 | ;; uint32_t deflate_huff_hdr_extra_bits; | |
69 | ;; | |
70 | ;; //!< bits 7:0 are the code length, bits 31:8 are the code | |
71 | ;; uint32_t dist_table[DIST_TABLE_SIZE]; | |
72 | ;; | |
73 | ;; //!< bits 7:0 are the code length, bits 31:8 are the code | |
74 | ;; uint32_t len_table[LEN_TABLE_SIZE]; | |
75 | ;; | |
76 | ;; //!< bits 3:0 are the code length, bits 15:4 are the code | |
77 | ;; uint16_t lit_table[LIT_TABLE_SIZE]; | |
78 | ;; | |
79 | ;; //!< bits 3:0 are the code length, bits 15:4 are the code | |
80 | ;; uint16_t dcodes[30 - DECODE_OFFSET]; | |
81 | ||
82 | ;; }; | |
83 | ||
84 | ||
85 | %ifdef LONGER_HUFFTABLE | |
86 | ; Uses RCX, clobbers dist | |
87 | ; get_dist_code dist, code, len | |
88 | %macro get_dist_code 4 | |
89 | %define %%dist %1 ; 64-bit IN | |
90 | %define %%code %2d ; 32-bit OUT | |
91 | %define %%len %3d ; 32-bit OUT | |
92 | %define %%hufftables %4 ; address of the hufftable | |
93 | ||
224ce89b | 94 | mov %%len, [%%hufftables + DIST_TABLE_OFFSET + 4*(%%dist + 1) ] |
7c673cae FG |
95 | mov %%code, %%len |
96 | and %%len, 0x1F; | |
97 | shr %%code, 5 | |
98 | %endm | |
99 | ||
100 | %macro get_packed_dist_code 3 | |
101 | %define %%dist %1 ; 64-bit IN | |
102 | %define %%code_len %2d ; 32-bit OUT | |
103 | %define %%hufftables %3 ; address of the hufftable | |
104 | mov %%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ] | |
105 | %endm | |
106 | ||
107 | %macro unpack_dist_code 2 | |
108 | %define %%code %1d ; 32-bit OUT | |
109 | %define %%len %2d ; 32-bit OUT | |
110 | ||
111 | mov %%len, %%code | |
112 | and %%len, 0x1F; | |
113 | shr %%code, 5 | |
114 | %endm | |
115 | ||
116 | %else | |
117 | ; Assumes (dist != 0) | |
118 | ; Uses RCX, clobbers dist | |
119 | ; void compute_dist_code dist, code, len | |
120 | %macro compute_dist_code 4 | |
224ce89b | 121 | %define %%dist %1 ; IN, clobbered |
7c673cae FG |
122 | %define %%distq %1 |
123 | %define %%code %2 ; OUT | |
124 | %define %%len %3 ; OUT | |
125 | %define %%hufftables %4 | |
126 | ||
224ce89b WB |
127 | bsr rcx, %%dist ; ecx = msb = bsr(dist) |
128 | dec rcx ; ecx = num_extra_bits = msb - N | |
129 | BZHI %%code, %%dist, rcx, %%len | |
130 | SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits | |
131 | lea %%dist, [%%dist + 2*rcx] ; dist = sym = dist + num_extra_bits*2 | |
132 | mov %%len, rcx ; len = num_extra_bits | |
133 | movzx rcx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT] | |
7c673cae | 134 | movzx %%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT] |
224ce89b | 135 | SHLX %%code, %%code, rcx ; code = extra_bits << (sym & 0xF) |
7c673cae | 136 | or %%code, %%dist ; code = (sym >> 4) | (extra_bits << (sym & 0xF)) |
224ce89b | 137 | add %%len, rcx ; len = num_extra_bits + (sym & 0xF) |
7c673cae FG |
138 | %endm |
139 | ||
140 | ; Uses RCX, clobbers dist | |
141 | ; get_dist_code dist, code, len | |
142 | %macro get_dist_code 4 | |
224ce89b | 143 | %define %%dist %1 ; 32-bit IN, clobbered |
7c673cae | 144 | %define %%distq %1 ; 64-bit IN, clobbered |
224ce89b WB |
145 | %define %%code %2 ; 32-bit OUT |
146 | %define %%len %3 ; 32-bit OUT | |
7c673cae FG |
147 | %define %%hufftables %4 |
148 | ||
224ce89b | 149 | cmp %%dist, DIST_TABLE_SIZE - 1 |
7c673cae | 150 | jg %%do_compute |
224ce89b WB |
151 | %ifndef IACA |
152 | mov %%len %+ d, dword [hufftables + DIST_TABLE_OFFSET + 4*(%%distq + 1) WRT_OPT] | |
7c673cae FG |
153 | mov %%code, %%len |
154 | and %%len, 0x1F; | |
155 | shr %%code, 5 | |
156 | jmp %%done | |
224ce89b | 157 | %endif |
7c673cae FG |
158 | %%do_compute: |
159 | compute_dist_code %%distq, %%code, %%len, %%hufftables | |
160 | %%done: | |
161 | %endm | |
162 | ||
163 | %macro get_packed_dist_code 3 | |
164 | %define %%dist %1 ; 64-bit IN | |
165 | %define %%code_len %2d ; 32-bit OUT | |
166 | %define %%hufftables %3 ; address of the hufftable | |
167 | %endm | |
168 | ||
169 | %endif | |
170 | ||
171 | ||
224ce89b WB |
172 | ; Macros for doing Huffman Encoding |
173 | ||
174 | ; Assumes (dist != 0) | |
175 | ; Uses RCX, clobbers dist | |
176 | ; void compute_dist_code dist, code, len | |
177 | %macro compute_dist_icf_code 3 | |
178 | %define %%dist %1 ; IN, clobbered | |
179 | %define %%distq %1 | |
180 | %define %%code %2 ; OUT | |
181 | %define %%tmp1 %3 | |
182 | ||
183 | bsr rcx, %%dist ; ecx = msb = bsr(dist) | |
184 | dec rcx ; ecx = num_extra_bits = msb - N | |
185 | BZHI %%code, %%dist, rcx, %%tmp1 | |
186 | SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits | |
187 | lea %%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2 | |
188 | shl %%code, EXTRA_BITS_OFFSET - DIST_OFFSET | |
189 | add %%code, %%dist ; code = extra_bits | sym | |
190 | ||
191 | %endm | |
192 | ||
193 | ; Uses RCX, clobbers dist | |
194 | ; get_dist_code dist, code, len | |
195 | %macro get_dist_icf_code 3 | |
196 | %define %%dist %1 ; 32-bit IN, clobbered | |
197 | %define %%distq %1 ; 64-bit IN, clobbered | |
198 | %define %%code %2 ; 32-bit OUT | |
199 | %define %%tmp1 %3 | |
200 | ||
201 | cmp %%dist, 1 | |
202 | jg %%do_compute | |
203 | ||
204 | %ifnidn %%code, %%dist | |
205 | mov %%code, %%dist | |
206 | %endif | |
207 | jmp %%done | |
208 | %%do_compute: | |
209 | compute_dist_icf_code %%distq, %%code, %%tmp1 | |
210 | %%done: | |
211 | shl %%code, DIST_OFFSET | |
212 | %endm | |
213 | ||
214 | ||
7c673cae FG |
215 | ; "len" can be same register as "length" |
216 | ; get_len_code length, code, len | |
217 | %macro get_len_code 4 | |
218 | %define %%length %1 ; 64-bit IN | |
219 | %define %%code %2d ; 32-bit OUT | |
220 | %define %%len %3d ; 32-bit OUT | |
221 | %define %%hufftables %4 | |
222 | ||
223 | mov %%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length] | |
224 | mov %%code, %%len | |
225 | and %%len, 0x1F | |
226 | shr %%code, 5 | |
227 | %endm | |
228 | ||
229 | ||
230 | %macro get_lit_code 4 | |
231 | %define %%lit %1 ; 64-bit IN or CONST | |
232 | %define %%code %2d ; 32-bit OUT | |
233 | %define %%len %3d ; 32-bit OUT | |
234 | %define %%hufftables %4 | |
235 | ||
236 | movzx %%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit] | |
237 | movzx %%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit] | |
238 | ||
239 | %endm | |
240 | ||
241 | ||
242 | ;; Compute hash of first 3 bytes of data | |
243 | %macro compute_hash 2 | |
244 | %define %%result %1d ; 32-bit reg | |
245 | %define %%data %2d ; 32-bit reg (low byte not clobbered) | |
246 | ||
7c673cae FG |
247 | xor %%result, %%result |
248 | crc32 %%result, %%data | |
249 | %endm |