]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ; Copyright(c) 2011-2016 Intel Corporation All rights reserved. | |
3 | ; | |
4 | ; Redistribution and use in source and binary forms, with or without | |
5 | ; modification, are permitted provided that the following conditions | |
6 | ; are met: | |
7 | ; * Redistributions of source code must retain the above copyright | |
8 | ; notice, this list of conditions and the following disclaimer. | |
9 | ; * Redistributions in binary form must reproduce the above copyright | |
10 | ; notice, this list of conditions and the following disclaimer in | |
11 | ; the documentation and/or other materials provided with the | |
12 | ; distribution. | |
13 | ; * Neither the name of Intel Corporation nor the names of its | |
14 | ; contributors may be used to endorse or promote products derived | |
15 | ; from this software without specific prior written permission. | |
16 | ; | |
17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ||
30 | %include "options.asm" | |
31 | %include "lz0a_const.asm" | |
32 | ||
33 | ; Macros for doing Huffman Encoding | |
34 | ||
35 | %ifdef LONGER_HUFFTABLE | |
36 | %if (D > 8192) | |
37 | %error History D is larger than 8K, cannot use %LONGER_HUFFTABLE | |
38 | % error | |
39 | %else | |
40 | %define DIST_TABLE_SIZE 8192 | |
41 | %define DECODE_OFFSET 26 | |
42 | %endif | |
43 | %else | |
224ce89b WB |
44 | %define DIST_TABLE_SIZE 2 |
45 | %define DECODE_OFFSET 0 | |
7c673cae FG |
46 | %endif |
47 | ||
48 | %define LEN_TABLE_SIZE 256 | |
49 | %define LIT_TABLE_SIZE 257 | |
50 | ||
224ce89b | 51 | %define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8) |
7c673cae FG |
52 | %define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1) |
53 | %define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3) | |
54 | %define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE) | |
55 | %define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE) | |
56 | %define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2) | |
57 | %define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET) | |
58 | ;; /** @brief Holds the huffman tree used to huffman encode the input stream **/ | |
59 | ;; struct isal_hufftables { | |
60 | ;; // deflate huffman tree header | |
224ce89b | 61 | ;; uint8_t deflate_huff_hdr[ISAL_DEF_MAX_HDR_SIZE]; |
7c673cae FG |
62 | ;; |
63 | ;; //!< Number of whole bytes in deflate_huff_hdr | |
64 | ;; uint32_t deflate_huff_hdr_count; | |
65 | ;; | |
66 | ;; //!< Number of bits in the partial byte in header | |
67 | ;; uint32_t deflate_huff_hdr_extra_bits; | |
68 | ;; | |
69 | ;; //!< bits 7:0 are the code length, bits 31:8 are the code | |
70 | ;; uint32_t dist_table[DIST_TABLE_SIZE]; | |
71 | ;; | |
72 | ;; //!< bits 7:0 are the code length, bits 31:8 are the code | |
73 | ;; uint32_t len_table[LEN_TABLE_SIZE]; | |
74 | ;; | |
75 | ;; //!< bits 3:0 are the code length, bits 15:4 are the code | |
76 | ;; uint16_t lit_table[LIT_TABLE_SIZE]; | |
77 | ;; | |
78 | ;; //!< bits 3:0 are the code length, bits 15:4 are the code | |
79 | ;; uint16_t dcodes[30 - DECODE_OFFSET]; | |
80 | ||
81 | ;; }; | |
82 | ||
83 | ||
84 | %ifdef LONGER_HUFFTABLE | |
85 | ; Uses RCX, clobbers dist | |
86 | ; get_dist_code dist, code, len | |
87 | %macro get_dist_code 4 | |
88 | %define %%dist %1 ; 64-bit IN | |
89 | %define %%code %2d ; 32-bit OUT | |
90 | %define %%len %3d ; 32-bit OUT | |
91 | %define %%hufftables %4 ; address of the hufftable | |
92 | ||
224ce89b | 93 | mov %%len, [%%hufftables + DIST_TABLE_OFFSET + 4*(%%dist + 1) ] |
7c673cae FG |
94 | mov %%code, %%len |
95 | and %%len, 0x1F; | |
96 | shr %%code, 5 | |
97 | %endm | |
98 | ||
99 | %macro get_packed_dist_code 3 | |
100 | %define %%dist %1 ; 64-bit IN | |
101 | %define %%code_len %2d ; 32-bit OUT | |
102 | %define %%hufftables %3 ; address of the hufftable | |
103 | mov %%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ] | |
104 | %endm | |
105 | ||
106 | %macro unpack_dist_code 2 | |
107 | %define %%code %1d ; 32-bit OUT | |
108 | %define %%len %2d ; 32-bit OUT | |
109 | ||
110 | mov %%len, %%code | |
111 | and %%len, 0x1F; | |
112 | shr %%code, 5 | |
113 | %endm | |
114 | ||
115 | %else | |
116 | ; Assumes (dist != 0) | |
117 | ; Uses RCX, clobbers dist | |
118 | ; void compute_dist_code dist, code, len | |
119 | %macro compute_dist_code 4 | |
224ce89b | 120 | %define %%dist %1 ; IN, clobbered |
7c673cae FG |
121 | %define %%distq %1 |
122 | %define %%code %2 ; OUT | |
123 | %define %%len %3 ; OUT | |
124 | %define %%hufftables %4 | |
125 | ||
224ce89b WB |
126 | bsr rcx, %%dist ; ecx = msb = bsr(dist) |
127 | dec rcx ; ecx = num_extra_bits = msb - N | |
128 | BZHI %%code, %%dist, rcx, %%len | |
129 | SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits | |
130 | lea %%dist, [%%dist + 2*rcx] ; dist = sym = dist + num_extra_bits*2 | |
131 | mov %%len, rcx ; len = num_extra_bits | |
132 | movzx rcx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT] | |
7c673cae | 133 | movzx %%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT] |
224ce89b | 134 | SHLX %%code, %%code, rcx ; code = extra_bits << (sym & 0xF) |
7c673cae | 135 | or %%code, %%dist ; code = (sym >> 4) | (extra_bits << (sym & 0xF)) |
224ce89b | 136 | add %%len, rcx ; len = num_extra_bits + (sym & 0xF) |
7c673cae FG |
137 | %endm |
138 | ||
139 | ; Uses RCX, clobbers dist | |
140 | ; get_dist_code dist, code, len | |
141 | %macro get_dist_code 4 | |
224ce89b | 142 | %define %%dist %1 ; 32-bit IN, clobbered |
7c673cae | 143 | %define %%distq %1 ; 64-bit IN, clobbered |
224ce89b WB |
144 | %define %%code %2 ; 32-bit OUT |
145 | %define %%len %3 ; 32-bit OUT | |
7c673cae FG |
146 | %define %%hufftables %4 |
147 | ||
224ce89b | 148 | cmp %%dist, DIST_TABLE_SIZE - 1 |
7c673cae | 149 | jg %%do_compute |
224ce89b WB |
150 | %ifndef IACA |
151 | mov %%len %+ d, dword [hufftables + DIST_TABLE_OFFSET + 4*(%%distq + 1) WRT_OPT] | |
7c673cae FG |
152 | mov %%code, %%len |
153 | and %%len, 0x1F; | |
154 | shr %%code, 5 | |
155 | jmp %%done | |
224ce89b | 156 | %endif |
7c673cae FG |
157 | %%do_compute: |
158 | compute_dist_code %%distq, %%code, %%len, %%hufftables | |
159 | %%done: | |
160 | %endm | |
161 | ||
162 | %macro get_packed_dist_code 3 | |
163 | %define %%dist %1 ; 64-bit IN | |
164 | %define %%code_len %2d ; 32-bit OUT | |
165 | %define %%hufftables %3 ; address of the hufftable | |
166 | %endm | |
167 | ||
168 | %endif | |
169 | ||
170 | ||
224ce89b WB |
171 | ; Macros for doing Huffman Encoding |
172 | ||
173 | ; Assumes (dist != 0) | |
174 | ; Uses RCX, clobbers dist | |
175 | ; void compute_dist_code dist, code, len | |
176 | %macro compute_dist_icf_code 3 | |
177 | %define %%dist %1 ; IN, clobbered | |
178 | %define %%distq %1 | |
179 | %define %%code %2 ; OUT | |
180 | %define %%tmp1 %3 | |
181 | ||
182 | bsr rcx, %%dist ; ecx = msb = bsr(dist) | |
183 | dec rcx ; ecx = num_extra_bits = msb - N | |
184 | BZHI %%code, %%dist, rcx, %%tmp1 | |
185 | SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits | |
186 | lea %%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2 | |
187 | shl %%code, EXTRA_BITS_OFFSET - DIST_OFFSET | |
188 | add %%code, %%dist ; code = extra_bits | sym | |
189 | ||
190 | %endm | |
191 | ||
192 | ; Uses RCX, clobbers dist | |
193 | ; get_dist_code dist, code, len | |
194 | %macro get_dist_icf_code 3 | |
195 | %define %%dist %1 ; 32-bit IN, clobbered | |
196 | %define %%distq %1 ; 64-bit IN, clobbered | |
197 | %define %%code %2 ; 32-bit OUT | |
198 | %define %%tmp1 %3 | |
199 | ||
200 | cmp %%dist, 1 | |
201 | jg %%do_compute | |
202 | ||
203 | %ifnidn %%code, %%dist | |
204 | mov %%code, %%dist | |
205 | %endif | |
206 | jmp %%done | |
207 | %%do_compute: | |
208 | compute_dist_icf_code %%distq, %%code, %%tmp1 | |
209 | %%done: | |
210 | shl %%code, DIST_OFFSET | |
211 | %endm | |
212 | ||
213 | ||
7c673cae FG |
214 | ; "len" can be same register as "length" |
215 | ; get_len_code length, code, len | |
216 | %macro get_len_code 4 | |
217 | %define %%length %1 ; 64-bit IN | |
218 | %define %%code %2d ; 32-bit OUT | |
219 | %define %%len %3d ; 32-bit OUT | |
220 | %define %%hufftables %4 | |
221 | ||
222 | mov %%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length] | |
223 | mov %%code, %%len | |
224 | and %%len, 0x1F | |
225 | shr %%code, 5 | |
226 | %endm | |
227 | ||
228 | ||
229 | %macro get_lit_code 4 | |
230 | %define %%lit %1 ; 64-bit IN or CONST | |
231 | %define %%code %2d ; 32-bit OUT | |
232 | %define %%len %3d ; 32-bit OUT | |
233 | %define %%hufftables %4 | |
234 | ||
235 | movzx %%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit] | |
236 | movzx %%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit] | |
237 | ||
238 | %endm | |
239 | ||
240 | ||
241 | ;; Compute hash of first 3 bytes of data | |
242 | %macro compute_hash 2 | |
243 | %define %%result %1d ; 32-bit reg | |
244 | %define %%data %2d ; 32-bit reg (low byte not clobbered) | |
245 | ||
7c673cae FG |
246 | xor %%result, %%result |
247 | crc32 %%result, %%data | |
248 | %endm |