]> git.proxmox.com Git - ceph.git/blame - ceph/src/isa-l/igzip/huffman.asm
update sources to v12.1.1
[ceph.git] / ceph / src / isa-l / igzip / huffman.asm
CommitLineData
7c673cae
FG
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3;
4; Redistribution and use in source and binary forms, with or without
5; modification, are permitted provided that the following conditions
6; are met:
7; * Redistributions of source code must retain the above copyright
8; notice, this list of conditions and the following disclaimer.
9; * Redistributions in binary form must reproduce the above copyright
10; notice, this list of conditions and the following disclaimer in
11; the documentation and/or other materials provided with the
12; distribution.
13; * Neither the name of Intel Corporation nor the names of its
14; contributors may be used to endorse or promote products derived
15; from this software without specific prior written permission.
16;
17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30%include "options.asm"
31%include "lz0a_const.asm"
32
33; Macros for doing Huffman Encoding
34
35%ifdef LONGER_HUFFTABLE
36 %if (D > 8192)
37 %error History D is larger than 8K, cannot use %LONGER_HUFFTABLE
38 % error
39 %else
40 %define DIST_TABLE_SIZE 8192
41 %define DECODE_OFFSET 26
42 %endif
43%else
224ce89b
WB
44 %define DIST_TABLE_SIZE 2
45 %define DECODE_OFFSET 0
7c673cae
FG
46%endif
47
48%define LEN_TABLE_SIZE 256
49%define LIT_TABLE_SIZE 257
50
224ce89b 51%define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8)
7c673cae
FG
52%define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1)
53%define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3)
54%define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE)
55%define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE)
56%define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2)
57%define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET)
58;; /** @brief Holds the huffman tree used to huffman encode the input stream **/
59;; struct isal_hufftables {
60;; // deflate huffman tree header
224ce89b 61;; uint8_t deflate_huff_hdr[ISAL_DEF_MAX_HDR_SIZE];
7c673cae
FG
62;;
63;; //!< Number of whole bytes in deflate_huff_hdr
64;; uint32_t deflate_huff_hdr_count;
65;;
66;; //!< Number of bits in the partial byte in header
67;; uint32_t deflate_huff_hdr_extra_bits;
68;;
69;; //!< bits 7:0 are the code length, bits 31:8 are the code
70;; uint32_t dist_table[DIST_TABLE_SIZE];
71;;
72;; //!< bits 7:0 are the code length, bits 31:8 are the code
73;; uint32_t len_table[LEN_TABLE_SIZE];
74;;
75;; //!< bits 3:0 are the code length, bits 15:4 are the code
76;; uint16_t lit_table[LIT_TABLE_SIZE];
77;;
78;; //!< bits 3:0 are the code length, bits 15:4 are the code
79;; uint16_t dcodes[30 - DECODE_OFFSET];
80
81;; };
82
83
84%ifdef LONGER_HUFFTABLE
85; Uses RCX, clobbers dist
86; get_dist_code dist, code, len
87%macro get_dist_code 4
88%define %%dist %1 ; 64-bit IN
89%define %%code %2d ; 32-bit OUT
90%define %%len %3d ; 32-bit OUT
91%define %%hufftables %4 ; address of the hufftable
92
224ce89b 93 mov %%len, [%%hufftables + DIST_TABLE_OFFSET + 4*(%%dist + 1) ]
7c673cae
FG
94 mov %%code, %%len
95 and %%len, 0x1F;
96 shr %%code, 5
97%endm
98
99%macro get_packed_dist_code 3
100%define %%dist %1 ; 64-bit IN
101%define %%code_len %2d ; 32-bit OUT
102%define %%hufftables %3 ; address of the hufftable
103 mov %%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ]
104%endm
105
106%macro unpack_dist_code 2
107%define %%code %1d ; 32-bit OUT
108%define %%len %2d ; 32-bit OUT
109
110 mov %%len, %%code
111 and %%len, 0x1F;
112 shr %%code, 5
113%endm
114
115%else
116; Assumes (dist != 0)
117; Uses RCX, clobbers dist
118; void compute_dist_code dist, code, len
119%macro compute_dist_code 4
224ce89b 120%define %%dist %1 ; IN, clobbered
7c673cae
FG
121%define %%distq %1
122%define %%code %2 ; OUT
123%define %%len %3 ; OUT
124%define %%hufftables %4
125
224ce89b
WB
126 bsr rcx, %%dist ; ecx = msb = bsr(dist)
127 dec rcx ; ecx = num_extra_bits = msb - N
128 BZHI %%code, %%dist, rcx, %%len
129 SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits
130 lea %%dist, [%%dist + 2*rcx] ; dist = sym = dist + num_extra_bits*2
131 mov %%len, rcx ; len = num_extra_bits
132 movzx rcx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT]
7c673cae 133 movzx %%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT]
224ce89b 134 SHLX %%code, %%code, rcx ; code = extra_bits << (sym & 0xF)
7c673cae 135 or %%code, %%dist ; code = (sym >> 4) | (extra_bits << (sym & 0xF))
224ce89b 136 add %%len, rcx ; len = num_extra_bits + (sym & 0xF)
7c673cae
FG
137%endm
138
139; Uses RCX, clobbers dist
140; get_dist_code dist, code, len
141%macro get_dist_code 4
224ce89b 142%define %%dist %1 ; 32-bit IN, clobbered
7c673cae 143%define %%distq %1 ; 64-bit IN, clobbered
224ce89b
WB
144%define %%code %2 ; 32-bit OUT
145%define %%len %3 ; 32-bit OUT
7c673cae
FG
146%define %%hufftables %4
147
224ce89b 148 cmp %%dist, DIST_TABLE_SIZE - 1
7c673cae 149 jg %%do_compute
224ce89b
WB
150%ifndef IACA
151 mov %%len %+ d, dword [hufftables + DIST_TABLE_OFFSET + 4*(%%distq + 1) WRT_OPT]
7c673cae
FG
152 mov %%code, %%len
153 and %%len, 0x1F;
154 shr %%code, 5
155 jmp %%done
224ce89b 156%endif
7c673cae
FG
157%%do_compute:
158 compute_dist_code %%distq, %%code, %%len, %%hufftables
159%%done:
160%endm
161
162%macro get_packed_dist_code 3
163%define %%dist %1 ; 64-bit IN
164%define %%code_len %2d ; 32-bit OUT
165%define %%hufftables %3 ; address of the hufftable
166%endm
167
168%endif
169
170
224ce89b
WB
171; Macros for doing Huffman Encoding
172
173; Assumes (dist != 0)
174; Uses RCX, clobbers dist
175; void compute_dist_code dist, code, len
176%macro compute_dist_icf_code 3
177%define %%dist %1 ; IN, clobbered
178%define %%distq %1
179%define %%code %2 ; OUT
180%define %%tmp1 %3
181
182 bsr rcx, %%dist ; ecx = msb = bsr(dist)
183 dec rcx ; ecx = num_extra_bits = msb - N
184 BZHI %%code, %%dist, rcx, %%tmp1
185 SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits
186 lea %%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2
187 shl %%code, EXTRA_BITS_OFFSET - DIST_OFFSET
188 add %%code, %%dist ; code = extra_bits | sym
189
190%endm
191
192; Uses RCX, clobbers dist
193; get_dist_code dist, code, len
194%macro get_dist_icf_code 3
195%define %%dist %1 ; 32-bit IN, clobbered
196%define %%distq %1 ; 64-bit IN, clobbered
197%define %%code %2 ; 32-bit OUT
198%define %%tmp1 %3
199
200 cmp %%dist, 1
201 jg %%do_compute
202
203%ifnidn %%code, %%dist
204 mov %%code, %%dist
205%endif
206 jmp %%done
207%%do_compute:
208 compute_dist_icf_code %%distq, %%code, %%tmp1
209%%done:
210 shl %%code, DIST_OFFSET
211%endm
212
213
7c673cae
FG
214; "len" can be same register as "length"
215; get_len_code length, code, len
216%macro get_len_code 4
217%define %%length %1 ; 64-bit IN
218%define %%code %2d ; 32-bit OUT
219%define %%len %3d ; 32-bit OUT
220%define %%hufftables %4
221
222 mov %%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length]
223 mov %%code, %%len
224 and %%len, 0x1F
225 shr %%code, 5
226%endm
227
228
229%macro get_lit_code 4
230%define %%lit %1 ; 64-bit IN or CONST
231%define %%code %2d ; 32-bit OUT
232%define %%len %3d ; 32-bit OUT
233%define %%hufftables %4
234
235 movzx %%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit]
236 movzx %%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit]
237
238%endm
239
240
241;; Compute hash of first 3 bytes of data
242%macro compute_hash 2
243%define %%result %1d ; 32-bit reg
244%define %%data %2d ; 32-bit reg (low byte not clobbered)
245
7c673cae
FG
246 xor %%result, %%result
247 crc32 %%result, %%data
248%endm