]> git.proxmox.com Git - ceph.git/blob - ceph/src/isa-l/igzip/huffman.asm
update sources to v12.1.1
[ceph.git] / ceph / src / isa-l / igzip / huffman.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 %include "options.asm"
31 %include "lz0a_const.asm"
32
33 ; Macros for doing Huffman Encoding
34
35 %ifdef LONGER_HUFFTABLE
36 %if (D > 8192)
37 %error History D is larger than 8K, cannot use %LONGER_HUFFTABLE
38 % error
39 %else
40 %define DIST_TABLE_SIZE 8192
41 %define DECODE_OFFSET 26
42 %endif
43 %else
44 %define DIST_TABLE_SIZE 2
45 %define DECODE_OFFSET 0
46 %endif
47
48 %define LEN_TABLE_SIZE 256
49 %define LIT_TABLE_SIZE 257
50
51 %define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8)
52 %define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1)
53 %define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3)
54 %define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE)
55 %define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE)
56 %define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2)
57 %define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET)
58 ;; /** @brief Holds the huffman tree used to huffman encode the input stream **/
59 ;; struct isal_hufftables {
60 ;; // deflate huffman tree header
61 ;; uint8_t deflate_huff_hdr[ISAL_DEF_MAX_HDR_SIZE];
62 ;;
63 ;; //!< Number of whole bytes in deflate_huff_hdr
64 ;; uint32_t deflate_huff_hdr_count;
65 ;;
66 ;; //!< Number of bits in the partial byte in header
67 ;; uint32_t deflate_huff_hdr_extra_bits;
68 ;;
69 ;; //!< bits 7:0 are the code length, bits 31:8 are the code
70 ;; uint32_t dist_table[DIST_TABLE_SIZE];
71 ;;
72 ;; //!< bits 7:0 are the code length, bits 31:8 are the code
73 ;; uint32_t len_table[LEN_TABLE_SIZE];
74 ;;
75 ;; //!< bits 3:0 are the code length, bits 15:4 are the code
76 ;; uint16_t lit_table[LIT_TABLE_SIZE];
77 ;;
78 ;; //!< bits 3:0 are the code length, bits 15:4 are the code
79 ;; uint16_t dcodes[30 - DECODE_OFFSET];
80
81 ;; };
82
83
84 %ifdef LONGER_HUFFTABLE
85 ; Uses RCX, clobbers dist
86 ; get_dist_code dist, code, len
87 %macro get_dist_code 4
88 %define %%dist %1 ; 64-bit IN
89 %define %%code %2d ; 32-bit OUT
90 %define %%len %3d ; 32-bit OUT
91 %define %%hufftables %4 ; address of the hufftable
92
93 mov %%len, [%%hufftables + DIST_TABLE_OFFSET + 4*(%%dist + 1) ]
94 mov %%code, %%len
95 and %%len, 0x1F;
96 shr %%code, 5
97 %endm
98
99 %macro get_packed_dist_code 3
100 %define %%dist %1 ; 64-bit IN
101 %define %%code_len %2d ; 32-bit OUT
102 %define %%hufftables %3 ; address of the hufftable
103 mov %%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ]
104 %endm
105
106 %macro unpack_dist_code 2
107 %define %%code %1d ; 32-bit OUT
108 %define %%len %2d ; 32-bit OUT
109
110 mov %%len, %%code
111 and %%len, 0x1F;
112 shr %%code, 5
113 %endm
114
115 %else
116 ; Assumes (dist != 0)
117 ; Uses RCX, clobbers dist
118 ; void compute_dist_code dist, code, len
119 %macro compute_dist_code 4
120 %define %%dist %1 ; IN, clobbered
121 %define %%distq %1
122 %define %%code %2 ; OUT
123 %define %%len %3 ; OUT
124 %define %%hufftables %4
125
126 bsr rcx, %%dist ; ecx = msb = bsr(dist)
127 dec rcx ; ecx = num_extra_bits = msb - N
128 BZHI %%code, %%dist, rcx, %%len
129 SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits
130 lea %%dist, [%%dist + 2*rcx] ; dist = sym = dist + num_extra_bits*2
131 mov %%len, rcx ; len = num_extra_bits
132 movzx rcx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT]
133 movzx %%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT]
134 SHLX %%code, %%code, rcx ; code = extra_bits << (sym & 0xF)
135 or %%code, %%dist ; code = (sym >> 4) | (extra_bits << (sym & 0xF))
136 add %%len, rcx ; len = num_extra_bits + (sym & 0xF)
137 %endm
138
139 ; Uses RCX, clobbers dist
140 ; get_dist_code dist, code, len
141 %macro get_dist_code 4
142 %define %%dist %1 ; 32-bit IN, clobbered
143 %define %%distq %1 ; 64-bit IN, clobbered
144 %define %%code %2 ; 32-bit OUT
145 %define %%len %3 ; 32-bit OUT
146 %define %%hufftables %4
147
148 cmp %%dist, DIST_TABLE_SIZE - 1
149 jg %%do_compute
150 %ifndef IACA
151 mov %%len %+ d, dword [hufftables + DIST_TABLE_OFFSET + 4*(%%distq + 1) WRT_OPT]
152 mov %%code, %%len
153 and %%len, 0x1F;
154 shr %%code, 5
155 jmp %%done
156 %endif
157 %%do_compute:
158 compute_dist_code %%distq, %%code, %%len, %%hufftables
159 %%done:
160 %endm
161
162 %macro get_packed_dist_code 3
163 %define %%dist %1 ; 64-bit IN
164 %define %%code_len %2d ; 32-bit OUT
165 %define %%hufftables %3 ; address of the hufftable
166 %endm
167
168 %endif
169
170
171 ; Macros for doing Huffman Encoding
172
173 ; Assumes (dist != 0)
174 ; Uses RCX, clobbers dist
175 ; void compute_dist_code dist, code, len
176 %macro compute_dist_icf_code 3
177 %define %%dist %1 ; IN, clobbered
178 %define %%distq %1
179 %define %%code %2 ; OUT
180 %define %%tmp1 %3
181
182 bsr rcx, %%dist ; ecx = msb = bsr(dist)
183 dec rcx ; ecx = num_extra_bits = msb - N
184 BZHI %%code, %%dist, rcx, %%tmp1
185 SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits
186 lea %%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2
187 shl %%code, EXTRA_BITS_OFFSET - DIST_OFFSET
188 add %%code, %%dist ; code = extra_bits | sym
189
190 %endm
191
192 ; Uses RCX, clobbers dist
193 ; get_dist_code dist, code, len
194 %macro get_dist_icf_code 3
195 %define %%dist %1 ; 32-bit IN, clobbered
196 %define %%distq %1 ; 64-bit IN, clobbered
197 %define %%code %2 ; 32-bit OUT
198 %define %%tmp1 %3
199
200 cmp %%dist, 1
201 jg %%do_compute
202
203 %ifnidn %%code, %%dist
204 mov %%code, %%dist
205 %endif
206 jmp %%done
207 %%do_compute:
208 compute_dist_icf_code %%distq, %%code, %%tmp1
209 %%done:
210 shl %%code, DIST_OFFSET
211 %endm
212
213
214 ; "len" can be same register as "length"
215 ; get_len_code length, code, len
216 %macro get_len_code 4
217 %define %%length %1 ; 64-bit IN
218 %define %%code %2d ; 32-bit OUT
219 %define %%len %3d ; 32-bit OUT
220 %define %%hufftables %4
221
222 mov %%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length]
223 mov %%code, %%len
224 and %%len, 0x1F
225 shr %%code, 5
226 %endm
227
228
229 %macro get_lit_code 4
230 %define %%lit %1 ; 64-bit IN or CONST
231 %define %%code %2d ; 32-bit OUT
232 %define %%len %3d ; 32-bit OUT
233 %define %%hufftables %4
234
235 movzx %%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit]
236 movzx %%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit]
237
238 %endm
239
240
241 ;; Compute hash of first 3 bytes of data
242 %macro compute_hash 2
243 %define %%result %1d ; 32-bit reg
244 %define %%data %2d ; 32-bit reg (low byte not clobbered)
245
246 xor %%result, %%result
247 crc32 %%result, %%data
248 %endm