]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/qat_compress.c
8c1410c9ec01966d5f806ddd49a43f61254909ee
[mirror_zfs.git] / module / zfs / qat_compress.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 #if defined(_KERNEL) && defined(HAVE_QAT)
23 #include <linux/slab.h>
24 #include <linux/vmalloc.h>
25 #include <linux/pagemap.h>
26 #include <linux/completion.h>
27 #include <sys/zfs_context.h>
28 #include <sys/zio.h>
29 #include "qat.h"
30
31 /*
32 * Max instances in a QAT device, each instance is a channel to submit
33 * jobs to QAT hardware, this is only for pre-allocating instance and
34 * session arrays; the actual number of instances are defined in the
35 * QAT driver's configuration file.
36 */
37 #define QAT_DC_MAX_INSTANCES 48
38
39 /*
40 * ZLIB head and foot size
41 */
42 #define ZLIB_HEAD_SZ 2
43 #define ZLIB_FOOT_SZ 4
44
45 static CpaInstanceHandle dc_inst_handles[QAT_DC_MAX_INSTANCES];
46 static CpaDcSessionHandle session_handles[QAT_DC_MAX_INSTANCES];
47 static CpaBufferList **buffer_array[QAT_DC_MAX_INSTANCES];
48 static Cpa16U num_inst = 0;
49 static Cpa32U inst_num = 0;
50 static boolean_t qat_dc_init_done = B_FALSE;
51 int zfs_qat_compress_disable = 0;
52
53 boolean_t
54 qat_dc_use_accel(size_t s_len)
55 {
56 return (!zfs_qat_compress_disable &&
57 qat_dc_init_done &&
58 s_len >= QAT_MIN_BUF_SIZE &&
59 s_len <= QAT_MAX_BUF_SIZE);
60 }
61
62 static void
63 qat_dc_callback(void *p_callback, CpaStatus status)
64 {
65 if (p_callback != NULL)
66 complete((struct completion *)p_callback);
67 }
68
69 static void
70 qat_dc_clean(void)
71 {
72 Cpa16U buff_num = 0;
73 Cpa16U num_inter_buff_lists = 0;
74
75 for (Cpa16U i = 0; i < num_inst; i++) {
76 cpaDcStopInstance(dc_inst_handles[i]);
77 QAT_PHYS_CONTIG_FREE(session_handles[i]);
78 /* free intermediate buffers */
79 if (buffer_array[i] != NULL) {
80 cpaDcGetNumIntermediateBuffers(
81 dc_inst_handles[i], &num_inter_buff_lists);
82 for (buff_num = 0; buff_num < num_inter_buff_lists;
83 buff_num++) {
84 CpaBufferList *buffer_inter =
85 buffer_array[i][buff_num];
86 if (buffer_inter->pBuffers) {
87 QAT_PHYS_CONTIG_FREE(
88 buffer_inter->pBuffers->pData);
89 QAT_PHYS_CONTIG_FREE(
90 buffer_inter->pBuffers);
91 }
92 QAT_PHYS_CONTIG_FREE(
93 buffer_inter->pPrivateMetaData);
94 QAT_PHYS_CONTIG_FREE(buffer_inter);
95 }
96 }
97 }
98
99 num_inst = 0;
100 qat_dc_init_done = B_FALSE;
101 }
102
103 int
104 qat_dc_init(void)
105 {
106 CpaStatus status = CPA_STATUS_SUCCESS;
107 Cpa32U sess_size = 0;
108 Cpa32U ctx_size = 0;
109 Cpa16U num_inter_buff_lists = 0;
110 Cpa16U buff_num = 0;
111 Cpa32U buff_meta_size = 0;
112 CpaDcSessionSetupData sd = {0};
113
114 status = cpaDcGetNumInstances(&num_inst);
115 if (status != CPA_STATUS_SUCCESS)
116 return (-1);
117
118 /* if the user has configured no QAT compression units just return */
119 if (num_inst == 0)
120 return (0);
121
122 if (num_inst > QAT_DC_MAX_INSTANCES)
123 num_inst = QAT_DC_MAX_INSTANCES;
124
125 status = cpaDcGetInstances(num_inst, &dc_inst_handles[0]);
126 if (status != CPA_STATUS_SUCCESS)
127 return (-1);
128
129 for (Cpa16U i = 0; i < num_inst; i++) {
130 cpaDcSetAddressTranslation(dc_inst_handles[i],
131 (void*)virt_to_phys);
132
133 status = cpaDcBufferListGetMetaSize(dc_inst_handles[i],
134 1, &buff_meta_size);
135
136 if (status == CPA_STATUS_SUCCESS)
137 status = cpaDcGetNumIntermediateBuffers(
138 dc_inst_handles[i], &num_inter_buff_lists);
139
140 if (status == CPA_STATUS_SUCCESS && num_inter_buff_lists != 0)
141 status = QAT_PHYS_CONTIG_ALLOC(&buffer_array[i],
142 num_inter_buff_lists *
143 sizeof (CpaBufferList *));
144
145 for (buff_num = 0; buff_num < num_inter_buff_lists;
146 buff_num++) {
147 if (status == CPA_STATUS_SUCCESS)
148 status = QAT_PHYS_CONTIG_ALLOC(
149 &buffer_array[i][buff_num],
150 sizeof (CpaBufferList));
151
152 if (status == CPA_STATUS_SUCCESS)
153 status = QAT_PHYS_CONTIG_ALLOC(
154 &buffer_array[i][buff_num]->
155 pPrivateMetaData,
156 buff_meta_size);
157
158 if (status == CPA_STATUS_SUCCESS)
159 status = QAT_PHYS_CONTIG_ALLOC(
160 &buffer_array[i][buff_num]->pBuffers,
161 sizeof (CpaFlatBuffer));
162
163 if (status == CPA_STATUS_SUCCESS) {
164 /*
165 * implementation requires an intermediate
166 * buffer approximately twice the size of
167 * output buffer, which is 2x max buffer
168 * size here.
169 */
170 status = QAT_PHYS_CONTIG_ALLOC(
171 &buffer_array[i][buff_num]->pBuffers->
172 pData, 2 * QAT_MAX_BUF_SIZE);
173 if (status != CPA_STATUS_SUCCESS)
174 goto fail;
175
176 buffer_array[i][buff_num]->numBuffers = 1;
177 buffer_array[i][buff_num]->pBuffers->
178 dataLenInBytes = 2 * QAT_MAX_BUF_SIZE;
179 }
180 }
181
182 status = cpaDcStartInstance(dc_inst_handles[i],
183 num_inter_buff_lists, buffer_array[i]);
184 if (status != CPA_STATUS_SUCCESS)
185 goto fail;
186
187 sd.compLevel = CPA_DC_L1;
188 sd.compType = CPA_DC_DEFLATE;
189 sd.huffType = CPA_DC_HT_FULL_DYNAMIC;
190 sd.sessDirection = CPA_DC_DIR_COMBINED;
191 sd.sessState = CPA_DC_STATELESS;
192 sd.deflateWindowSize = 7;
193 sd.checksum = CPA_DC_ADLER32;
194 status = cpaDcGetSessionSize(dc_inst_handles[i],
195 &sd, &sess_size, &ctx_size);
196 if (status != CPA_STATUS_SUCCESS)
197 goto fail;
198
199 QAT_PHYS_CONTIG_ALLOC(&session_handles[i], sess_size);
200 if (session_handles[i] == NULL)
201 goto fail;
202
203 status = cpaDcInitSession(dc_inst_handles[i],
204 session_handles[i],
205 &sd, NULL, qat_dc_callback);
206 if (status != CPA_STATUS_SUCCESS)
207 goto fail;
208 }
209
210 qat_dc_init_done = B_TRUE;
211 return (0);
212 fail:
213 qat_dc_clean();
214 return (-1);
215 }
216
217 void
218 qat_dc_fini(void)
219 {
220 if (!qat_dc_init_done)
221 return;
222
223 qat_dc_clean();
224 }
225
226 /*
227 * The "add" parameter is an additional buffer which is passed
228 * to QAT as a scratch buffer alongside the destination buffer
229 * in case the "compressed" data ends up being larger than the
230 * original source data. This is necessary to prevent QAT from
231 * generating buffer overflow warnings for incompressible data.
232 */
233 static int
234 qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len,
235 char *dst, int dst_len, char *add, int add_len, size_t *c_len)
236 {
237 CpaInstanceHandle dc_inst_handle;
238 CpaDcSessionHandle session_handle;
239 CpaBufferList *buf_list_src = NULL;
240 CpaBufferList *buf_list_dst = NULL;
241 CpaFlatBuffer *flat_buf_src = NULL;
242 CpaFlatBuffer *flat_buf_dst = NULL;
243 Cpa8U *buffer_meta_src = NULL;
244 Cpa8U *buffer_meta_dst = NULL;
245 Cpa32U buffer_meta_size = 0;
246 CpaDcRqResults dc_results;
247 CpaStatus status = CPA_STATUS_SUCCESS;
248 Cpa32U hdr_sz = 0;
249 Cpa32U compressed_sz;
250 Cpa32U num_src_buf = (src_len >> PAGE_SHIFT) + 2;
251 Cpa32U num_dst_buf = (dst_len >> PAGE_SHIFT) + 2;
252 Cpa32U num_add_buf = (add_len >> PAGE_SHIFT) + 2;
253 Cpa32U bytes_left;
254 Cpa32U dst_pages = 0;
255 char *data;
256 struct page *page;
257 struct page **in_pages = NULL;
258 struct page **out_pages = NULL;
259 struct page **add_pages = NULL;
260 Cpa32U page_off = 0;
261 struct completion complete;
262 Cpa32U page_num = 0;
263 Cpa16U i;
264
265 /*
266 * We increment num_src_buf and num_dst_buf by 2 to allow
267 * us to handle non page-aligned buffer addresses and buffers
268 * whose sizes are not divisible by PAGE_SIZE.
269 */
270 Cpa32U src_buffer_list_mem_size = sizeof (CpaBufferList) +
271 (num_src_buf * sizeof (CpaFlatBuffer));
272 Cpa32U dst_buffer_list_mem_size = sizeof (CpaBufferList) +
273 ((num_dst_buf + num_add_buf) * sizeof (CpaFlatBuffer));
274
275 if (QAT_PHYS_CONTIG_ALLOC(&in_pages,
276 num_src_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS)
277 goto fail;
278
279 if (QAT_PHYS_CONTIG_ALLOC(&out_pages,
280 num_dst_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS)
281 goto fail;
282
283 if (QAT_PHYS_CONTIG_ALLOC(&add_pages,
284 num_add_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS)
285 goto fail;
286
287 i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
288 dc_inst_handle = dc_inst_handles[i];
289 session_handle = session_handles[i];
290
291 cpaDcBufferListGetMetaSize(dc_inst_handle, num_src_buf,
292 &buffer_meta_size);
293 if (QAT_PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size) !=
294 CPA_STATUS_SUCCESS)
295 goto fail;
296
297 cpaDcBufferListGetMetaSize(dc_inst_handle, num_dst_buf + num_add_buf,
298 &buffer_meta_size);
299 if (QAT_PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size) !=
300 CPA_STATUS_SUCCESS)
301 goto fail;
302
303 /* build source buffer list */
304 if (QAT_PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size) !=
305 CPA_STATUS_SUCCESS)
306 goto fail;
307
308 flat_buf_src = (CpaFlatBuffer *)(buf_list_src + 1);
309
310 buf_list_src->pBuffers = flat_buf_src; /* always point to first one */
311
312 /* build destination buffer list */
313 if (QAT_PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size) !=
314 CPA_STATUS_SUCCESS)
315 goto fail;
316
317 flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
318
319 buf_list_dst->pBuffers = flat_buf_dst; /* always point to first one */
320
321 buf_list_src->numBuffers = 0;
322 buf_list_src->pPrivateMetaData = buffer_meta_src;
323 bytes_left = src_len;
324 data = src;
325 page_num = 0;
326 while (bytes_left > 0) {
327 page_off = ((long)data & ~PAGE_MASK);
328 page = qat_mem_to_page(data);
329 in_pages[page_num] = page;
330 flat_buf_src->pData = kmap(page) + page_off;
331 flat_buf_src->dataLenInBytes =
332 min((long)PAGE_SIZE - page_off, (long)bytes_left);
333
334 bytes_left -= flat_buf_src->dataLenInBytes;
335 data += flat_buf_src->dataLenInBytes;
336 flat_buf_src++;
337 buf_list_src->numBuffers++;
338 page_num++;
339 }
340
341 buf_list_dst->numBuffers = 0;
342 buf_list_dst->pPrivateMetaData = buffer_meta_dst;
343 bytes_left = dst_len;
344 data = dst;
345 page_num = 0;
346 while (bytes_left > 0) {
347 page_off = ((long)data & ~PAGE_MASK);
348 page = qat_mem_to_page(data);
349 flat_buf_dst->pData = kmap(page) + page_off;
350 out_pages[page_num] = page;
351 flat_buf_dst->dataLenInBytes =
352 min((long)PAGE_SIZE - page_off, (long)bytes_left);
353
354 bytes_left -= flat_buf_dst->dataLenInBytes;
355 data += flat_buf_dst->dataLenInBytes;
356 flat_buf_dst++;
357 buf_list_dst->numBuffers++;
358 page_num++;
359 dst_pages++;
360 }
361
362 /* map additional scratch pages into the destination buffer list */
363 bytes_left = add_len;
364 data = add;
365 page_num = 0;
366 while (bytes_left > 0) {
367 page_off = ((long)data & ~PAGE_MASK);
368 page = qat_mem_to_page(data);
369 flat_buf_dst->pData = kmap(page) + page_off;
370 add_pages[page_num] = page;
371 flat_buf_dst->dataLenInBytes =
372 min((long)PAGE_SIZE - page_off, (long)bytes_left);
373
374 bytes_left -= flat_buf_dst->dataLenInBytes;
375 data += flat_buf_dst->dataLenInBytes;
376 flat_buf_dst++;
377 buf_list_dst->numBuffers++;
378 page_num++;
379 }
380
381 init_completion(&complete);
382
383 if (dir == QAT_COMPRESS) {
384 QAT_STAT_BUMP(comp_requests);
385 QAT_STAT_INCR(comp_total_in_bytes, src_len);
386
387 cpaDcGenerateHeader(session_handle,
388 buf_list_dst->pBuffers, &hdr_sz);
389 buf_list_dst->pBuffers->pData += hdr_sz;
390 buf_list_dst->pBuffers->dataLenInBytes -= hdr_sz;
391 status = cpaDcCompressData(
392 dc_inst_handle, session_handle,
393 buf_list_src, buf_list_dst,
394 &dc_results, CPA_DC_FLUSH_FINAL,
395 &complete);
396 if (status != CPA_STATUS_SUCCESS) {
397 goto fail;
398 }
399
400 /* we now wait until the completion of the operation. */
401 if (!wait_for_completion_interruptible_timeout(&complete,
402 QAT_TIMEOUT_MS)) {
403 status = CPA_STATUS_FAIL;
404 goto fail;
405 }
406
407 if (dc_results.status != CPA_STATUS_SUCCESS) {
408 status = CPA_STATUS_FAIL;
409 goto fail;
410 }
411
412 compressed_sz = dc_results.produced;
413 if (compressed_sz + hdr_sz + ZLIB_FOOT_SZ > dst_len) {
414 status = CPA_STATUS_INCOMPRESSIBLE;
415 goto fail;
416 }
417
418 flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
419 /* move to the last page */
420 flat_buf_dst += (compressed_sz + hdr_sz) >> PAGE_SHIFT;
421
422 /* no space for gzip footer in the last page */
423 if (((compressed_sz + hdr_sz) % PAGE_SIZE)
424 + ZLIB_FOOT_SZ > PAGE_SIZE) {
425 status = CPA_STATUS_INCOMPRESSIBLE;
426 goto fail;
427 }
428
429 /* jump to the end of the buffer and append footer */
430 flat_buf_dst->pData =
431 (char *)((unsigned long)flat_buf_dst->pData & PAGE_MASK)
432 + ((compressed_sz + hdr_sz) % PAGE_SIZE);
433 flat_buf_dst->dataLenInBytes = ZLIB_FOOT_SZ;
434
435 dc_results.produced = 0;
436 status = cpaDcGenerateFooter(session_handle,
437 flat_buf_dst, &dc_results);
438 if (status != CPA_STATUS_SUCCESS)
439 goto fail;
440
441 *c_len = compressed_sz + dc_results.produced + hdr_sz;
442 QAT_STAT_INCR(comp_total_out_bytes, *c_len);
443 } else {
444 ASSERT3U(dir, ==, QAT_DECOMPRESS);
445 QAT_STAT_BUMP(decomp_requests);
446 QAT_STAT_INCR(decomp_total_in_bytes, src_len);
447
448 buf_list_src->pBuffers->pData += ZLIB_HEAD_SZ;
449 buf_list_src->pBuffers->dataLenInBytes -= ZLIB_HEAD_SZ;
450 status = cpaDcDecompressData(dc_inst_handle, session_handle,
451 buf_list_src, buf_list_dst, &dc_results, CPA_DC_FLUSH_FINAL,
452 &complete);
453
454 if (CPA_STATUS_SUCCESS != status) {
455 status = CPA_STATUS_FAIL;
456 goto fail;
457 }
458
459 /* we now wait until the completion of the operation. */
460 if (!wait_for_completion_interruptible_timeout(&complete,
461 QAT_TIMEOUT_MS)) {
462 status = CPA_STATUS_FAIL;
463 goto fail;
464 }
465
466 if (dc_results.status != CPA_STATUS_SUCCESS) {
467 status = CPA_STATUS_FAIL;
468 goto fail;
469 }
470
471 *c_len = dc_results.produced;
472 QAT_STAT_INCR(decomp_total_out_bytes, *c_len);
473 }
474
475 fail:
476 if (status != CPA_STATUS_SUCCESS && status != CPA_STATUS_INCOMPRESSIBLE)
477 QAT_STAT_BUMP(dc_fails);
478
479 if (in_pages) {
480 for (page_num = 0;
481 page_num < buf_list_src->numBuffers;
482 page_num++) {
483 kunmap(in_pages[page_num]);
484 }
485 QAT_PHYS_CONTIG_FREE(in_pages);
486 }
487
488 if (out_pages) {
489 for (page_num = 0; page_num < dst_pages; page_num++) {
490 kunmap(out_pages[page_num]);
491 }
492 QAT_PHYS_CONTIG_FREE(out_pages);
493 }
494
495 if (add_pages) {
496 for (page_num = 0;
497 page_num < buf_list_dst->numBuffers - dst_pages;
498 page_num++) {
499 kunmap(add_pages[page_num]);
500 }
501 QAT_PHYS_CONTIG_FREE(add_pages);
502 }
503
504 QAT_PHYS_CONTIG_FREE(buffer_meta_src);
505 QAT_PHYS_CONTIG_FREE(buffer_meta_dst);
506 QAT_PHYS_CONTIG_FREE(buf_list_src);
507 QAT_PHYS_CONTIG_FREE(buf_list_dst);
508
509 return (status);
510 }
511
512 /*
513 * Entry point for QAT accelerated compression / decompression.
514 */
515 int
516 qat_compress(qat_compress_dir_t dir, char *src, int src_len,
517 char *dst, int dst_len, size_t *c_len)
518 {
519 int ret;
520 size_t add_len = 0;
521 void *add = NULL;
522
523 if (dir == QAT_COMPRESS) {
524 add_len = dst_len;
525 add = zio_data_buf_alloc(add_len);
526 }
527
528 ret = qat_compress_impl(dir, src, src_len, dst,
529 dst_len, add, add_len, c_len);
530
531 if (dir == QAT_COMPRESS)
532 zio_data_buf_free(add, add_len);
533
534 return (ret);
535 }
536
537 module_param(zfs_qat_compress_disable, int, 0644);
538 MODULE_PARM_DESC(zfs_qat_compress_disable, "Disable QAT compression");
539
540 #endif