4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 #if defined(_KERNEL) && defined(HAVE_QAT)
23 #include <linux/slab.h>
24 #include <linux/vmalloc.h>
25 #include <linux/pagemap.h>
26 #include <linux/completion.h>
27 #include <sys/zfs_context.h>
28 #include "qat_compress.h"
31 * Timeout - no response from hardware after 0.5 seconds
33 #define TIMEOUT_MS 500
36 * Max instances in QAT device, each instance is a channel to submit
37 * jobs to QAT hardware, this is only for pre-allocating instance,
38 * and session arrays, the actual number of instances are defined in
39 * the QAT driver's configure file.
41 #define MAX_INSTANCES 48
44 * ZLIB head and foot size
46 #define ZLIB_HEAD_SZ 2
47 #define ZLIB_FOOT_SZ 4
50 * The minimal and maximal buffer size, which are not restricted
51 * in the QAT hardware, but with the input buffer size between 4KB
52 * and 128KB, the hardware can provide the optimal performance.
54 #define QAT_MIN_BUF_SIZE (4*1024)
55 #define QAT_MAX_BUF_SIZE (128*1024)
60 typedef struct qat_stats
{
62 * Number of jobs submitted to qat compression engine.
64 kstat_named_t comp_requests
;
66 * Total bytes sent to qat compression engine.
68 kstat_named_t comp_total_in_bytes
;
70 * Total bytes output from qat compression engine.
72 kstat_named_t comp_total_out_bytes
;
74 * Number of jobs submitted to qat de-compression engine.
76 kstat_named_t decomp_requests
;
78 * Total bytes sent to qat de-compression engine.
80 kstat_named_t decomp_total_in_bytes
;
82 * Total bytes output from qat de-compression engine.
84 kstat_named_t decomp_total_out_bytes
;
86 * Number of fails in qat engine.
87 * Note: when qat fail happens, it doesn't mean a critical hardware
88 * issue, sometimes it is because the output buffer is not big enough,
89 * and the compression job will be transfered to gzip software again,
90 * so the functionality of ZFS is not impacted.
92 kstat_named_t dc_fails
;
95 qat_stats_t qat_stats
= {
96 { "comp_reqests", KSTAT_DATA_UINT64
},
97 { "comp_total_in_bytes", KSTAT_DATA_UINT64
},
98 { "comp_total_out_bytes", KSTAT_DATA_UINT64
},
99 { "decomp_reqests", KSTAT_DATA_UINT64
},
100 { "decomp_total_in_bytes", KSTAT_DATA_UINT64
},
101 { "decomp_total_out_bytes", KSTAT_DATA_UINT64
},
102 { "dc_fails", KSTAT_DATA_UINT64
},
105 static kstat_t
*qat_ksp
;
106 static CpaInstanceHandle dc_inst_handles
[MAX_INSTANCES
];
107 static CpaDcSessionHandle session_handles
[MAX_INSTANCES
];
108 static CpaBufferList
**buffer_array
[MAX_INSTANCES
];
109 static Cpa16U num_inst
= 0;
110 static Cpa32U inst_num
= 0;
111 static boolean_t qat_init_done
= B_FALSE
;
112 int zfs_qat_disable
= 0;
114 #define QAT_STAT_INCR(stat, val) \
115 atomic_add_64(&qat_stats.stat.value.ui64, (val));
116 #define QAT_STAT_BUMP(stat) \
117 QAT_STAT_INCR(stat, 1);
119 #define PHYS_CONTIG_ALLOC(pp_mem_addr, size_bytes) \
120 mem_alloc_contig((void *)(pp_mem_addr), (size_bytes))
122 #define PHYS_CONTIG_FREE(p_mem_addr) \
123 mem_free_contig((void *)&(p_mem_addr))
125 static inline struct page
*
126 mem_to_page(void *addr
)
128 if (!is_vmalloc_addr(addr
))
129 return (virt_to_page(addr
));
131 return (vmalloc_to_page(addr
));
135 qat_dc_callback(void *p_callback
, CpaStatus status
)
137 if (p_callback
!= NULL
)
138 complete((struct completion
*)p_callback
);
141 static inline CpaStatus
142 mem_alloc_contig(void **pp_mem_addr
, Cpa32U size_bytes
)
144 *pp_mem_addr
= kmalloc(size_bytes
, GFP_KERNEL
);
145 if (*pp_mem_addr
== NULL
)
146 return (CPA_STATUS_RESOURCE
);
147 return (CPA_STATUS_SUCCESS
);
151 mem_free_contig(void **pp_mem_addr
)
153 if (*pp_mem_addr
!= NULL
) {
163 Cpa16U num_inter_buff_lists
= 0;
166 for (i
= 0; i
< num_inst
; i
++) {
167 cpaDcStopInstance(dc_inst_handles
[i
]);
168 PHYS_CONTIG_FREE(session_handles
[i
]);
169 /* free intermediate buffers */
170 if (buffer_array
[i
] != NULL
) {
171 cpaDcGetNumIntermediateBuffers(
172 dc_inst_handles
[i
], &num_inter_buff_lists
);
173 for (buff_num
= 0; buff_num
< num_inter_buff_lists
;
175 CpaBufferList
*buffer_inter
=
176 buffer_array
[i
][buff_num
];
177 if (buffer_inter
->pBuffers
) {
179 buffer_inter
->pBuffers
->pData
);
181 buffer_inter
->pBuffers
);
184 buffer_inter
->pPrivateMetaData
);
185 PHYS_CONTIG_FREE(buffer_inter
);
191 qat_init_done
= B_FALSE
;
197 CpaStatus status
= CPA_STATUS_SUCCESS
;
198 Cpa32U sess_size
= 0;
200 Cpa16U num_inter_buff_lists
= 0;
202 Cpa32U buff_meta_size
= 0;
203 CpaDcSessionSetupData sd
= {0};
206 status
= cpaDcGetNumInstances(&num_inst
);
207 if (status
!= CPA_STATUS_SUCCESS
|| num_inst
== 0)
210 if (num_inst
> MAX_INSTANCES
)
211 num_inst
= MAX_INSTANCES
;
213 status
= cpaDcGetInstances(num_inst
, &dc_inst_handles
[0]);
214 if (status
!= CPA_STATUS_SUCCESS
)
217 for (i
= 0; i
< num_inst
; i
++) {
218 cpaDcSetAddressTranslation(dc_inst_handles
[i
],
219 (void*)virt_to_phys
);
221 status
= cpaDcBufferListGetMetaSize(dc_inst_handles
[i
],
224 if (status
== CPA_STATUS_SUCCESS
)
225 status
= cpaDcGetNumIntermediateBuffers(
226 dc_inst_handles
[i
], &num_inter_buff_lists
);
228 if (status
== CPA_STATUS_SUCCESS
&& num_inter_buff_lists
!= 0)
229 status
= PHYS_CONTIG_ALLOC(&buffer_array
[i
],
230 num_inter_buff_lists
*
231 sizeof (CpaBufferList
*));
233 for (buff_num
= 0; buff_num
< num_inter_buff_lists
;
235 if (status
== CPA_STATUS_SUCCESS
)
236 status
= PHYS_CONTIG_ALLOC(
237 &buffer_array
[i
][buff_num
],
238 sizeof (CpaBufferList
));
240 if (status
== CPA_STATUS_SUCCESS
)
241 status
= PHYS_CONTIG_ALLOC(
242 &buffer_array
[i
][buff_num
]->
246 if (status
== CPA_STATUS_SUCCESS
)
247 status
= PHYS_CONTIG_ALLOC(
248 &buffer_array
[i
][buff_num
]->pBuffers
,
249 sizeof (CpaFlatBuffer
));
251 if (status
== CPA_STATUS_SUCCESS
) {
253 * implementation requires an intermediate
254 * buffer approximately twice the size of
255 * output buffer, which is 2x max buffer
258 status
= PHYS_CONTIG_ALLOC(
259 &buffer_array
[i
][buff_num
]->pBuffers
->
260 pData
, 2 * QAT_MAX_BUF_SIZE
);
261 if (status
!= CPA_STATUS_SUCCESS
)
264 buffer_array
[i
][buff_num
]->numBuffers
= 1;
265 buffer_array
[i
][buff_num
]->pBuffers
->
266 dataLenInBytes
= 2 * QAT_MAX_BUF_SIZE
;
270 status
= cpaDcStartInstance(dc_inst_handles
[i
],
271 num_inter_buff_lists
, buffer_array
[i
]);
272 if (status
!= CPA_STATUS_SUCCESS
)
275 sd
.compLevel
= CPA_DC_L1
;
276 sd
.compType
= CPA_DC_DEFLATE
;
277 sd
.huffType
= CPA_DC_HT_FULL_DYNAMIC
;
278 sd
.sessDirection
= CPA_DC_DIR_COMBINED
;
279 sd
.sessState
= CPA_DC_STATELESS
;
280 sd
.deflateWindowSize
= 7;
281 sd
.checksum
= CPA_DC_ADLER32
;
282 status
= cpaDcGetSessionSize(dc_inst_handles
[i
],
283 &sd
, &sess_size
, &ctx_size
);
284 if (status
!= CPA_STATUS_SUCCESS
)
287 PHYS_CONTIG_ALLOC(&session_handles
[i
], sess_size
);
288 if (session_handles
[i
] == NULL
)
291 status
= cpaDcInitSession(dc_inst_handles
[i
],
293 &sd
, NULL
, qat_dc_callback
);
294 if (status
!= CPA_STATUS_SUCCESS
)
298 qat_ksp
= kstat_create("zfs", 0, "qat", "misc",
299 KSTAT_TYPE_NAMED
, sizeof (qat_stats
) / sizeof (kstat_named_t
),
301 if (qat_ksp
!= NULL
) {
302 qat_ksp
->ks_data
= &qat_stats
;
303 kstat_install(qat_ksp
);
306 qat_init_done
= B_TRUE
;
318 if (qat_ksp
!= NULL
) {
319 kstat_delete(qat_ksp
);
325 qat_use_accel(size_t s_len
)
327 return (!zfs_qat_disable
&&
329 s_len
>= QAT_MIN_BUF_SIZE
&&
330 s_len
<= QAT_MAX_BUF_SIZE
);
334 qat_compress(qat_compress_dir_t dir
, char *src
, int src_len
,
335 char *dst
, int dst_len
, size_t *c_len
)
337 CpaInstanceHandle dc_inst_handle
;
338 CpaDcSessionHandle session_handle
;
339 CpaBufferList
*buf_list_src
= NULL
;
340 CpaBufferList
*buf_list_dst
= NULL
;
341 CpaFlatBuffer
*flat_buf_src
= NULL
;
342 CpaFlatBuffer
*flat_buf_dst
= NULL
;
343 Cpa8U
*buffer_meta_src
= NULL
;
344 Cpa8U
*buffer_meta_dst
= NULL
;
345 Cpa32U buffer_meta_size
= 0;
346 CpaDcRqResults dc_results
;
347 CpaStatus status
= CPA_STATUS_SUCCESS
;
349 Cpa32U compressed_sz
;
350 Cpa32U num_src_buf
= (src_len
>> PAGE_SHIFT
) + 1;
351 Cpa32U num_dst_buf
= (dst_len
>> PAGE_SHIFT
) + 1;
354 struct page
*in_page
, *out_page
;
355 struct page
**in_pages
= NULL
;
356 struct page
**out_pages
= NULL
;
357 struct completion complete
;
362 Cpa32U src_buffer_list_mem_size
= sizeof (CpaBufferList
) +
363 (num_src_buf
* sizeof (CpaFlatBuffer
));
364 Cpa32U dst_buffer_list_mem_size
= sizeof (CpaBufferList
) +
365 (num_dst_buf
* sizeof (CpaFlatBuffer
));
367 if (!is_vmalloc_addr(src
) || !is_vmalloc_addr(src
+ src_len
- 1) ||
368 !is_vmalloc_addr(dst
) || !is_vmalloc_addr(dst
+ dst_len
- 1))
371 if (PHYS_CONTIG_ALLOC(&in_pages
,
372 num_src_buf
* sizeof (struct page
*)) != CPA_STATUS_SUCCESS
)
375 if (PHYS_CONTIG_ALLOC(&out_pages
,
376 num_dst_buf
* sizeof (struct page
*)) != CPA_STATUS_SUCCESS
)
379 i
= atomic_inc_32_nv(&inst_num
) % num_inst
;
380 dc_inst_handle
= dc_inst_handles
[i
];
381 session_handle
= session_handles
[i
];
383 cpaDcBufferListGetMetaSize(dc_inst_handle
, num_src_buf
,
385 if (PHYS_CONTIG_ALLOC(&buffer_meta_src
, buffer_meta_size
) !=
389 cpaDcBufferListGetMetaSize(dc_inst_handle
, num_dst_buf
,
391 if (PHYS_CONTIG_ALLOC(&buffer_meta_dst
, buffer_meta_size
) !=
395 /* build source buffer list */
396 if (PHYS_CONTIG_ALLOC(&buf_list_src
, src_buffer_list_mem_size
) !=
400 flat_buf_src
= (CpaFlatBuffer
*)(buf_list_src
+ 1);
402 buf_list_src
->pBuffers
= flat_buf_src
; /* always point to first one */
404 /* build destination buffer list */
405 if (PHYS_CONTIG_ALLOC(&buf_list_dst
, dst_buffer_list_mem_size
) !=
409 flat_buf_dst
= (CpaFlatBuffer
*)(buf_list_dst
+ 1);
411 buf_list_dst
->pBuffers
= flat_buf_dst
; /* always point to first one */
413 buf_list_src
->numBuffers
= 0;
414 buf_list_src
->pPrivateMetaData
= buffer_meta_src
;
415 bytes_left
= src_len
;
418 while (bytes_left
> 0) {
419 in_page
= mem_to_page(data
);
420 in_pages
[page_num
] = in_page
;
421 flat_buf_src
->pData
= kmap(in_page
);
422 flat_buf_src
->dataLenInBytes
=
423 min((long)bytes_left
, (long)PAGE_SIZE
);
425 bytes_left
-= flat_buf_src
->dataLenInBytes
;
426 data
+= flat_buf_src
->dataLenInBytes
;
428 buf_list_src
->numBuffers
++;
432 buf_list_dst
->numBuffers
= 0;
433 buf_list_dst
->pPrivateMetaData
= buffer_meta_dst
;
434 bytes_left
= dst_len
;
437 while (bytes_left
> 0) {
438 out_page
= mem_to_page(data
);
439 flat_buf_dst
->pData
= kmap(out_page
);
440 out_pages
[page_num
] = out_page
;
441 flat_buf_dst
->dataLenInBytes
=
442 min((long)bytes_left
, (long)PAGE_SIZE
);
444 bytes_left
-= flat_buf_dst
->dataLenInBytes
;
445 data
+= flat_buf_dst
->dataLenInBytes
;
447 buf_list_dst
->numBuffers
++;
451 init_completion(&complete
);
453 if (dir
== QAT_COMPRESS
) {
454 QAT_STAT_BUMP(comp_requests
);
455 QAT_STAT_INCR(comp_total_in_bytes
, src_len
);
457 cpaDcGenerateHeader(session_handle
,
458 buf_list_dst
->pBuffers
, &hdr_sz
);
459 buf_list_dst
->pBuffers
->pData
+= hdr_sz
;
460 buf_list_dst
->pBuffers
->dataLenInBytes
-= hdr_sz
;
461 status
= cpaDcCompressData(
462 dc_inst_handle
, session_handle
,
463 buf_list_src
, buf_list_dst
,
464 &dc_results
, CPA_DC_FLUSH_FINAL
,
466 if (status
!= CPA_STATUS_SUCCESS
) {
470 /* we now wait until the completion of the operation. */
471 if (!wait_for_completion_interruptible_timeout(&complete
,
473 status
= CPA_STATUS_FAIL
;
477 if (dc_results
.status
!= CPA_STATUS_SUCCESS
) {
478 status
= CPA_STATUS_FAIL
;
482 compressed_sz
= dc_results
.produced
;
483 if (compressed_sz
+ hdr_sz
+ ZLIB_FOOT_SZ
> dst_len
) {
487 flat_buf_dst
= (CpaFlatBuffer
*)(buf_list_dst
+ 1);
488 /* move to the last page */
489 flat_buf_dst
+= (compressed_sz
+ hdr_sz
) >> PAGE_SHIFT
;
491 /* no space for gzip foot in the last page */
492 if (((compressed_sz
+ hdr_sz
) % PAGE_SIZE
)
493 + ZLIB_FOOT_SZ
> PAGE_SIZE
)
496 /* jump to the end of the buffer and append footer */
497 flat_buf_dst
->pData
=
498 (char *)((unsigned long)flat_buf_dst
->pData
& PAGE_MASK
)
499 + ((compressed_sz
+ hdr_sz
) % PAGE_SIZE
);
500 flat_buf_dst
->dataLenInBytes
= ZLIB_FOOT_SZ
;
502 dc_results
.produced
= 0;
503 status
= cpaDcGenerateFooter(session_handle
,
504 flat_buf_dst
, &dc_results
);
505 if (status
!= CPA_STATUS_SUCCESS
) {
509 *c_len
= compressed_sz
+ dc_results
.produced
+ hdr_sz
;
511 QAT_STAT_INCR(comp_total_out_bytes
, *c_len
);
515 } else if (dir
== QAT_DECOMPRESS
) {
516 QAT_STAT_BUMP(decomp_requests
);
517 QAT_STAT_INCR(decomp_total_in_bytes
, src_len
);
519 buf_list_src
->pBuffers
->pData
+= ZLIB_HEAD_SZ
;
520 buf_list_src
->pBuffers
->dataLenInBytes
-= ZLIB_HEAD_SZ
;
521 status
= cpaDcDecompressData(dc_inst_handle
,
529 if (CPA_STATUS_SUCCESS
!= status
) {
530 status
= CPA_STATUS_FAIL
;
534 /* we now wait until the completion of the operation. */
535 if (!wait_for_completion_interruptible_timeout(&complete
,
537 status
= CPA_STATUS_FAIL
;
541 if (dc_results
.status
!= CPA_STATUS_SUCCESS
) {
542 status
= CPA_STATUS_FAIL
;
546 *c_len
= dc_results
.produced
;
548 QAT_STAT_INCR(decomp_total_out_bytes
, *c_len
);
554 if (status
!= CPA_STATUS_SUCCESS
) {
555 QAT_STAT_BUMP(dc_fails
);
560 page_num
< buf_list_src
->numBuffers
;
562 kunmap(in_pages
[page_num
]);
564 PHYS_CONTIG_FREE(in_pages
);
569 page_num
< buf_list_dst
->numBuffers
;
571 kunmap(out_pages
[page_num
]);
573 PHYS_CONTIG_FREE(out_pages
);
576 PHYS_CONTIG_FREE(buffer_meta_src
);
577 PHYS_CONTIG_FREE(buffer_meta_dst
);
578 PHYS_CONTIG_FREE(buf_list_src
);
579 PHYS_CONTIG_FREE(buf_list_dst
);
584 module_param(zfs_qat_disable
, int, 0644);
585 MODULE_PARM_DESC(zfs_qat_disable
, "Disable QAT compression");