]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - fs/btrfs/zlib.c
btrfs: Fix error handling in zlib
[mirror_ubuntu-jammy-kernel.git] / fs / btrfs / zlib.c
CommitLineData
c8b97818
CM
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 *
18 * Based on jffs2 zlib code:
19 * Copyright © 2001-2007 Red Hat, Inc.
20 * Created by David Woodhouse <dwmw2@infradead.org>
21 */
22
23#include <linux/kernel.h>
24#include <linux/slab.h>
25#include <linux/zlib.h>
26#include <linux/zutil.h>
27#include <linux/vmalloc.h>
28#include <linux/init.h>
29#include <linux/err.h>
30#include <linux/sched.h>
31#include <linux/pagemap.h>
32#include <linux/bio.h>
b2950863 33#include "compression.h"
c8b97818
CM
34
35/* Plan: call deflate() with avail_in == *sourcelen,
36 avail_out = *dstlen - 12 and flush == Z_FINISH.
37 If it doesn't manage to finish, call it again with
38 avail_in == 0 and avail_out set to the remaining 12
39 bytes for it to clean up.
40 Q: Is 12 bytes sufficient?
41*/
42#define STREAM_END_SPACE 12
43
44struct workspace {
45 z_stream inf_strm;
46 z_stream def_strm;
47 char *buf;
48 struct list_head list;
49};
50
51static LIST_HEAD(idle_workspace);
52static DEFINE_SPINLOCK(workspace_lock);
53static unsigned long num_workspace;
54static atomic_t alloc_workspace = ATOMIC_INIT(0);
55static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
56
57/*
58 * this finds an available zlib workspace or allocates a new one
59 * NULL or an ERR_PTR is returned if things go bad.
60 */
61static struct workspace *find_zlib_workspace(void)
62{
63 struct workspace *workspace;
64 int ret;
65 int cpus = num_online_cpus();
66
67again:
68 spin_lock(&workspace_lock);
69 if (!list_empty(&idle_workspace)) {
70 workspace = list_entry(idle_workspace.next, struct workspace,
71 list);
72 list_del(&workspace->list);
73 num_workspace--;
74 spin_unlock(&workspace_lock);
75 return workspace;
76
77 }
c8b97818
CM
78 if (atomic_read(&alloc_workspace) > cpus) {
79 DEFINE_WAIT(wait);
8844355d
LZ
80
81 spin_unlock(&workspace_lock);
c8b97818 82 prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
8844355d 83 if (atomic_read(&alloc_workspace) > cpus && !num_workspace)
c8b97818
CM
84 schedule();
85 finish_wait(&workspace_wait, &wait);
86 goto again;
87 }
88 atomic_inc(&alloc_workspace);
8844355d
LZ
89 spin_unlock(&workspace_lock);
90
c8b97818
CM
91 workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
92 if (!workspace) {
93 ret = -ENOMEM;
94 goto fail;
95 }
96
97 workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
98 if (!workspace->def_strm.workspace) {
99 ret = -ENOMEM;
100 goto fail;
101 }
102 workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
103 if (!workspace->inf_strm.workspace) {
104 ret = -ENOMEM;
105 goto fail_inflate;
106 }
107 workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
108 if (!workspace->buf) {
109 ret = -ENOMEM;
110 goto fail_kmalloc;
111 }
112 return workspace;
113
114fail_kmalloc:
115 vfree(workspace->inf_strm.workspace);
116fail_inflate:
117 vfree(workspace->def_strm.workspace);
118fail:
119 kfree(workspace);
120 atomic_dec(&alloc_workspace);
121 wake_up(&workspace_wait);
122 return ERR_PTR(ret);
123}
124
125/*
126 * put a workspace struct back on the list or free it if we have enough
127 * idle ones sitting around
128 */
129static int free_workspace(struct workspace *workspace)
130{
131 spin_lock(&workspace_lock);
132 if (num_workspace < num_online_cpus()) {
133 list_add_tail(&workspace->list, &idle_workspace);
134 num_workspace++;
135 spin_unlock(&workspace_lock);
136 if (waitqueue_active(&workspace_wait))
137 wake_up(&workspace_wait);
138 return 0;
139 }
140 spin_unlock(&workspace_lock);
141 vfree(workspace->def_strm.workspace);
142 vfree(workspace->inf_strm.workspace);
143 kfree(workspace->buf);
144 kfree(workspace);
145
146 atomic_dec(&alloc_workspace);
147 if (waitqueue_active(&workspace_wait))
148 wake_up(&workspace_wait);
149 return 0;
150}
151
152/*
153 * cleanup function for module exit
154 */
155static void free_workspaces(void)
156{
157 struct workspace *workspace;
d397712b 158 while (!list_empty(&idle_workspace)) {
c8b97818
CM
159 workspace = list_entry(idle_workspace.next, struct workspace,
160 list);
161 list_del(&workspace->list);
162 vfree(workspace->def_strm.workspace);
163 vfree(workspace->inf_strm.workspace);
164 kfree(workspace->buf);
165 kfree(workspace);
166 atomic_dec(&alloc_workspace);
167 }
168}
169
170/*
171 * given an address space and start/len, compress the bytes.
172 *
173 * pages are allocated to hold the compressed result and stored
174 * in 'pages'
175 *
176 * out_pages is used to return the number of pages allocated. There
177 * may be pages allocated even if we return an error
178 *
179 * total_in is used to return the number of bytes actually read. It
180 * may be smaller then len if we had to exit early because we
181 * ran out of room in the pages array or because we cross the
182 * max_out threshold.
183 *
184 * total_out is used to return the total number of compressed bytes
185 *
186 * max_out tells us the max number of bytes that we're allowed to
187 * stuff into pages
188 */
189int btrfs_zlib_compress_pages(struct address_space *mapping,
190 u64 start, unsigned long len,
191 struct page **pages,
192 unsigned long nr_dest_pages,
193 unsigned long *out_pages,
194 unsigned long *total_in,
195 unsigned long *total_out,
196 unsigned long max_out)
197{
198 int ret;
199 struct workspace *workspace;
200 char *data_in;
201 char *cpage_out;
202 int nr_pages = 0;
203 struct page *in_page = NULL;
204 struct page *out_page = NULL;
c8b97818
CM
205 unsigned long bytes_left;
206
207 *out_pages = 0;
208 *total_out = 0;
209 *total_in = 0;
210
211 workspace = find_zlib_workspace();
60f2e8f8 212 if (IS_ERR(workspace))
c8b97818
CM
213 return -1;
214
215 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
216 printk(KERN_WARNING "deflateInit failed\n");
217 ret = -1;
218 goto out;
219 }
220
221 workspace->def_strm.total_in = 0;
222 workspace->def_strm.total_out = 0;
223
224 in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
225 data_in = kmap(in_page);
226
227 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
4b72029d
LZ
228 if (out_page == NULL) {
229 ret = -1;
230 goto out;
231 }
c8b97818
CM
232 cpage_out = kmap(out_page);
233 pages[0] = out_page;
234 nr_pages = 1;
235
236 workspace->def_strm.next_in = data_in;
237 workspace->def_strm.next_out = cpage_out;
238 workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
239 workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
240
c8b97818
CM
241 while (workspace->def_strm.total_in < len) {
242 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
243 if (ret != Z_OK) {
244 printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
245 ret);
246 zlib_deflateEnd(&workspace->def_strm);
247 ret = -1;
248 goto out;
249 }
250
251 /* we're making it bigger, give up */
252 if (workspace->def_strm.total_in > 8192 &&
253 workspace->def_strm.total_in <
254 workspace->def_strm.total_out) {
255 ret = -1;
256 goto out;
257 }
258 /* we need another page for writing out. Test this
259 * before the total_in so we will pull in a new page for
260 * the stream end if required
261 */
262 if (workspace->def_strm.avail_out == 0) {
263 kunmap(out_page);
264 if (nr_pages == nr_dest_pages) {
265 out_page = NULL;
266 ret = -1;
267 goto out;
268 }
269 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
4b72029d
LZ
270 if (out_page == NULL) {
271 ret = -1;
272 goto out;
273 }
c8b97818
CM
274 cpage_out = kmap(out_page);
275 pages[nr_pages] = out_page;
276 nr_pages++;
277 workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
278 workspace->def_strm.next_out = cpage_out;
279 }
280 /* we're all done */
281 if (workspace->def_strm.total_in >= len)
282 break;
283
284 /* we've read in a full page, get a new one */
285 if (workspace->def_strm.avail_in == 0) {
286 if (workspace->def_strm.total_out > max_out)
287 break;
288
289 bytes_left = len - workspace->def_strm.total_in;
290 kunmap(in_page);
291 page_cache_release(in_page);
292
293 start += PAGE_CACHE_SIZE;
294 in_page = find_get_page(mapping,
295 start >> PAGE_CACHE_SHIFT);
296 data_in = kmap(in_page);
297 workspace->def_strm.avail_in = min(bytes_left,
298 PAGE_CACHE_SIZE);
299 workspace->def_strm.next_in = data_in;
300 }
301 }
302 workspace->def_strm.avail_in = 0;
303 ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
304 zlib_deflateEnd(&workspace->def_strm);
305
306 if (ret != Z_STREAM_END) {
307 ret = -1;
308 goto out;
309 }
310
311 if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
312 ret = -1;
313 goto out;
314 }
315
316 ret = 0;
317 *total_out = workspace->def_strm.total_out;
318 *total_in = workspace->def_strm.total_in;
319out:
320 *out_pages = nr_pages;
321 if (out_page)
322 kunmap(out_page);
323
324 if (in_page) {
325 kunmap(in_page);
326 page_cache_release(in_page);
327 }
328 free_workspace(workspace);
329 return ret;
330}
331
332/*
333 * pages_in is an array of pages with compressed data.
334 *
335 * disk_start is the starting logical offset of this array in the file
336 *
337 * bvec is a bio_vec of pages from the file that we want to decompress into
338 *
339 * vcnt is the count of pages in the biovec
340 *
341 * srclen is the number of bytes in pages_in
342 *
343 * The basic idea is that we have a bio that was created by readpages.
344 * The pages in the bio are for the uncompressed data, and they may not
345 * be contiguous. They all correspond to the range of bytes covered by
346 * the compressed extent.
347 */
348int btrfs_zlib_decompress_biovec(struct page **pages_in,
349 u64 disk_start,
350 struct bio_vec *bvec,
351 int vcnt,
352 size_t srclen)
353{
354 int ret = 0;
355 int wbits = MAX_WBITS;
356 struct workspace *workspace;
357 char *data_in;
358 size_t total_out = 0;
359 unsigned long page_bytes_left;
360 unsigned long page_in_index = 0;
361 unsigned long page_out_index = 0;
362 struct page *page_out;
363 unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
364 PAGE_CACHE_SIZE;
365 unsigned long buf_start;
366 unsigned long buf_offset;
367 unsigned long bytes;
368 unsigned long working_bytes;
369 unsigned long pg_offset;
370 unsigned long start_byte;
371 unsigned long current_buf_start;
372 char *kaddr;
373
374 workspace = find_zlib_workspace();
60f2e8f8 375 if (IS_ERR(workspace))
c8b97818
CM
376 return -ENOMEM;
377
378 data_in = kmap(pages_in[page_in_index]);
379 workspace->inf_strm.next_in = data_in;
5b050f04 380 workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
c8b97818
CM
381 workspace->inf_strm.total_in = 0;
382
383 workspace->inf_strm.total_out = 0;
384 workspace->inf_strm.next_out = workspace->buf;
385 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
386 page_out = bvec[page_out_index].bv_page;
387 page_bytes_left = PAGE_CACHE_SIZE;
388 pg_offset = 0;
389
390 /* If it's deflate, and it's got no preset dictionary, then
391 we can tell zlib to skip the adler32 check. */
392 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
393 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
394 !(((data_in[0]<<8) + data_in[1]) % 31)) {
395
396 wbits = -((data_in[0] >> 4) + 8);
397 workspace->inf_strm.next_in += 2;
398 workspace->inf_strm.avail_in -= 2;
399 }
400
401 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
402 printk(KERN_WARNING "inflateInit failed\n");
403 ret = -1;
404 goto out;
405 }
d397712b 406 while (workspace->inf_strm.total_in < srclen) {
c8b97818 407 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
d397712b 408 if (ret != Z_OK && ret != Z_STREAM_END)
c8b97818 409 break;
c8b97818
CM
410 /*
411 * buf start is the byte offset we're of the start of
412 * our workspace buffer
413 */
414 buf_start = total_out;
415
416 /* total_out is the last byte of the workspace buffer */
417 total_out = workspace->inf_strm.total_out;
418
419 working_bytes = total_out - buf_start;
420
421 /*
422 * start byte is the first byte of the page we're currently
423 * copying into relative to the start of the compressed data.
424 */
425 start_byte = page_offset(page_out) - disk_start;
426
427 if (working_bytes == 0) {
428 /* we didn't make progress in this inflate
429 * call, we're done
430 */
d397712b 431 if (ret != Z_STREAM_END)
c8b97818
CM
432 ret = -1;
433 break;
434 }
435
436 /* we haven't yet hit data corresponding to this page */
d397712b 437 if (total_out <= start_byte)
c8b97818 438 goto next;
c8b97818
CM
439
440 /*
441 * the start of the data we care about is offset into
442 * the middle of our working buffer
443 */
444 if (total_out > start_byte && buf_start < start_byte) {
445 buf_offset = start_byte - buf_start;
446 working_bytes -= buf_offset;
447 } else {
448 buf_offset = 0;
449 }
450 current_buf_start = buf_start;
451
452 /* copy bytes from the working buffer into the pages */
d397712b 453 while (working_bytes > 0) {
c8b97818
CM
454 bytes = min(PAGE_CACHE_SIZE - pg_offset,
455 PAGE_CACHE_SIZE - buf_offset);
456 bytes = min(bytes, working_bytes);
457 kaddr = kmap_atomic(page_out, KM_USER0);
458 memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
459 bytes);
460 kunmap_atomic(kaddr, KM_USER0);
461 flush_dcache_page(page_out);
462
463 pg_offset += bytes;
464 page_bytes_left -= bytes;
465 buf_offset += bytes;
466 working_bytes -= bytes;
467 current_buf_start += bytes;
468
469 /* check if we need to pick another page */
470 if (page_bytes_left == 0) {
471 page_out_index++;
472 if (page_out_index >= vcnt) {
473 ret = 0;
474 goto done;
475 }
d397712b 476
c8b97818
CM
477 page_out = bvec[page_out_index].bv_page;
478 pg_offset = 0;
479 page_bytes_left = PAGE_CACHE_SIZE;
480 start_byte = page_offset(page_out) - disk_start;
481
482 /*
483 * make sure our new page is covered by this
484 * working buffer
485 */
d397712b 486 if (total_out <= start_byte)
c8b97818 487 goto next;
c8b97818
CM
488
489 /* the next page in the biovec might not
490 * be adjacent to the last page, but it
491 * might still be found inside this working
492 * buffer. bump our offset pointer
493 */
494 if (total_out > start_byte &&
495 current_buf_start < start_byte) {
496 buf_offset = start_byte - buf_start;
497 working_bytes = total_out - start_byte;
498 current_buf_start = buf_start +
499 buf_offset;
500 }
501 }
502 }
503next:
504 workspace->inf_strm.next_out = workspace->buf;
505 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
506
507 if (workspace->inf_strm.avail_in == 0) {
508 unsigned long tmp;
509 kunmap(pages_in[page_in_index]);
510 page_in_index++;
511 if (page_in_index >= total_pages_in) {
512 data_in = NULL;
513 break;
514 }
515 data_in = kmap(pages_in[page_in_index]);
516 workspace->inf_strm.next_in = data_in;
517 tmp = srclen - workspace->inf_strm.total_in;
518 workspace->inf_strm.avail_in = min(tmp,
519 PAGE_CACHE_SIZE);
520 }
521 }
d397712b 522 if (ret != Z_STREAM_END)
c8b97818 523 ret = -1;
d397712b 524 else
c8b97818 525 ret = 0;
c8b97818
CM
526done:
527 zlib_inflateEnd(&workspace->inf_strm);
528 if (data_in)
529 kunmap(pages_in[page_in_index]);
530out:
531 free_workspace(workspace);
532 return ret;
533}
534
535/*
536 * a less complex decompression routine. Our compressed data fits in a
537 * single page, and we want to read a single page out of it.
538 * start_byte tells us the offset into the compressed data we're interested in
539 */
540int btrfs_zlib_decompress(unsigned char *data_in,
541 struct page *dest_page,
542 unsigned long start_byte,
543 size_t srclen, size_t destlen)
544{
545 int ret = 0;
546 int wbits = MAX_WBITS;
547 struct workspace *workspace;
548 unsigned long bytes_left = destlen;
549 unsigned long total_out = 0;
550 char *kaddr;
551
552 if (destlen > PAGE_CACHE_SIZE)
553 return -ENOMEM;
554
555 workspace = find_zlib_workspace();
60f2e8f8 556 if (IS_ERR(workspace))
c8b97818
CM
557 return -ENOMEM;
558
559 workspace->inf_strm.next_in = data_in;
560 workspace->inf_strm.avail_in = srclen;
561 workspace->inf_strm.total_in = 0;
562
563 workspace->inf_strm.next_out = workspace->buf;
564 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
565 workspace->inf_strm.total_out = 0;
566 /* If it's deflate, and it's got no preset dictionary, then
567 we can tell zlib to skip the adler32 check. */
568 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
569 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
570 !(((data_in[0]<<8) + data_in[1]) % 31)) {
571
572 wbits = -((data_in[0] >> 4) + 8);
573 workspace->inf_strm.next_in += 2;
574 workspace->inf_strm.avail_in -= 2;
575 }
576
577 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
578 printk(KERN_WARNING "inflateInit failed\n");
579 ret = -1;
580 goto out;
581 }
582
d397712b 583 while (bytes_left > 0) {
c8b97818
CM
584 unsigned long buf_start;
585 unsigned long buf_offset;
586 unsigned long bytes;
587 unsigned long pg_offset = 0;
588
589 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
d397712b 590 if (ret != Z_OK && ret != Z_STREAM_END)
c8b97818 591 break;
c8b97818
CM
592
593 buf_start = total_out;
594 total_out = workspace->inf_strm.total_out;
595
596 if (total_out == buf_start) {
597 ret = -1;
598 break;
599 }
600
d397712b 601 if (total_out <= start_byte)
c8b97818 602 goto next;
c8b97818 603
d397712b 604 if (total_out > start_byte && buf_start < start_byte)
c8b97818 605 buf_offset = start_byte - buf_start;
d397712b 606 else
c8b97818 607 buf_offset = 0;
c8b97818
CM
608
609 bytes = min(PAGE_CACHE_SIZE - pg_offset,
610 PAGE_CACHE_SIZE - buf_offset);
611 bytes = min(bytes, bytes_left);
612
613 kaddr = kmap_atomic(dest_page, KM_USER0);
614 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
615 kunmap_atomic(kaddr, KM_USER0);
616
617 pg_offset += bytes;
618 bytes_left -= bytes;
619next:
620 workspace->inf_strm.next_out = workspace->buf;
621 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
622 }
d397712b
CM
623
624 if (ret != Z_STREAM_END && bytes_left != 0)
c8b97818 625 ret = -1;
d397712b 626 else
c8b97818 627 ret = 0;
d397712b 628
c8b97818
CM
629 zlib_inflateEnd(&workspace->inf_strm);
630out:
631 free_workspace(workspace);
632 return ret;
633}
634
635void btrfs_zlib_exit(void)
636{
637 free_workspaces();
638}