]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - fs/btrfs/zlib.c
Btrfs: Fix starting search offset inside btrfs_drop_extents
[mirror_ubuntu-zesty-kernel.git] / fs / btrfs / zlib.c
CommitLineData
c8b97818
CM
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 *
18 * Based on jffs2 zlib code:
19 * Copyright © 2001-2007 Red Hat, Inc.
20 * Created by David Woodhouse <dwmw2@infradead.org>
21 */
22
23#include <linux/kernel.h>
24#include <linux/slab.h>
25#include <linux/zlib.h>
26#include <linux/zutil.h>
27#include <linux/vmalloc.h>
28#include <linux/init.h>
29#include <linux/err.h>
30#include <linux/sched.h>
31#include <linux/pagemap.h>
32#include <linux/bio.h>
33
34/* Plan: call deflate() with avail_in == *sourcelen,
35 avail_out = *dstlen - 12 and flush == Z_FINISH.
36 If it doesn't manage to finish, call it again with
37 avail_in == 0 and avail_out set to the remaining 12
38 bytes for it to clean up.
39 Q: Is 12 bytes sufficient?
40*/
41#define STREAM_END_SPACE 12
42
43struct workspace {
44 z_stream inf_strm;
45 z_stream def_strm;
46 char *buf;
47 struct list_head list;
48};
49
50static LIST_HEAD(idle_workspace);
51static DEFINE_SPINLOCK(workspace_lock);
52static unsigned long num_workspace;
53static atomic_t alloc_workspace = ATOMIC_INIT(0);
54static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
55
56/*
57 * this finds an available zlib workspace or allocates a new one
58 * NULL or an ERR_PTR is returned if things go bad.
59 */
60static struct workspace *find_zlib_workspace(void)
61{
62 struct workspace *workspace;
63 int ret;
64 int cpus = num_online_cpus();
65
66again:
67 spin_lock(&workspace_lock);
68 if (!list_empty(&idle_workspace)) {
69 workspace = list_entry(idle_workspace.next, struct workspace,
70 list);
71 list_del(&workspace->list);
72 num_workspace--;
73 spin_unlock(&workspace_lock);
74 return workspace;
75
76 }
77 spin_unlock(&workspace_lock);
78 if (atomic_read(&alloc_workspace) > cpus) {
79 DEFINE_WAIT(wait);
80 prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
81 if (atomic_read(&alloc_workspace) > cpus)
82 schedule();
83 finish_wait(&workspace_wait, &wait);
84 goto again;
85 }
86 atomic_inc(&alloc_workspace);
87 workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
88 if (!workspace) {
89 ret = -ENOMEM;
90 goto fail;
91 }
92
93 workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
94 if (!workspace->def_strm.workspace) {
95 ret = -ENOMEM;
96 goto fail;
97 }
98 workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
99 if (!workspace->inf_strm.workspace) {
100 ret = -ENOMEM;
101 goto fail_inflate;
102 }
103 workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
104 if (!workspace->buf) {
105 ret = -ENOMEM;
106 goto fail_kmalloc;
107 }
108 return workspace;
109
110fail_kmalloc:
111 vfree(workspace->inf_strm.workspace);
112fail_inflate:
113 vfree(workspace->def_strm.workspace);
114fail:
115 kfree(workspace);
116 atomic_dec(&alloc_workspace);
117 wake_up(&workspace_wait);
118 return ERR_PTR(ret);
119}
120
121/*
122 * put a workspace struct back on the list or free it if we have enough
123 * idle ones sitting around
124 */
125static int free_workspace(struct workspace *workspace)
126{
127 spin_lock(&workspace_lock);
128 if (num_workspace < num_online_cpus()) {
129 list_add_tail(&workspace->list, &idle_workspace);
130 num_workspace++;
131 spin_unlock(&workspace_lock);
132 if (waitqueue_active(&workspace_wait))
133 wake_up(&workspace_wait);
134 return 0;
135 }
136 spin_unlock(&workspace_lock);
137 vfree(workspace->def_strm.workspace);
138 vfree(workspace->inf_strm.workspace);
139 kfree(workspace->buf);
140 kfree(workspace);
141
142 atomic_dec(&alloc_workspace);
143 if (waitqueue_active(&workspace_wait))
144 wake_up(&workspace_wait);
145 return 0;
146}
147
148/*
149 * cleanup function for module exit
150 */
151static void free_workspaces(void)
152{
153 struct workspace *workspace;
154 while(!list_empty(&idle_workspace)) {
155 workspace = list_entry(idle_workspace.next, struct workspace,
156 list);
157 list_del(&workspace->list);
158 vfree(workspace->def_strm.workspace);
159 vfree(workspace->inf_strm.workspace);
160 kfree(workspace->buf);
161 kfree(workspace);
162 atomic_dec(&alloc_workspace);
163 }
164}
165
166/*
167 * given an address space and start/len, compress the bytes.
168 *
169 * pages are allocated to hold the compressed result and stored
170 * in 'pages'
171 *
172 * out_pages is used to return the number of pages allocated. There
173 * may be pages allocated even if we return an error
174 *
175 * total_in is used to return the number of bytes actually read. It
176 * may be smaller then len if we had to exit early because we
177 * ran out of room in the pages array or because we cross the
178 * max_out threshold.
179 *
180 * total_out is used to return the total number of compressed bytes
181 *
182 * max_out tells us the max number of bytes that we're allowed to
183 * stuff into pages
184 */
185int btrfs_zlib_compress_pages(struct address_space *mapping,
186 u64 start, unsigned long len,
187 struct page **pages,
188 unsigned long nr_dest_pages,
189 unsigned long *out_pages,
190 unsigned long *total_in,
191 unsigned long *total_out,
192 unsigned long max_out)
193{
194 int ret;
195 struct workspace *workspace;
196 char *data_in;
197 char *cpage_out;
198 int nr_pages = 0;
199 struct page *in_page = NULL;
200 struct page *out_page = NULL;
201 int out_written = 0;
202 int in_read = 0;
203 unsigned long bytes_left;
204
205 *out_pages = 0;
206 *total_out = 0;
207 *total_in = 0;
208
209 workspace = find_zlib_workspace();
210 if (!workspace)
211 return -1;
212
213 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
214 printk(KERN_WARNING "deflateInit failed\n");
215 ret = -1;
216 goto out;
217 }
218
219 workspace->def_strm.total_in = 0;
220 workspace->def_strm.total_out = 0;
221
222 in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
223 data_in = kmap(in_page);
224
225 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
226 cpage_out = kmap(out_page);
227 pages[0] = out_page;
228 nr_pages = 1;
229
230 workspace->def_strm.next_in = data_in;
231 workspace->def_strm.next_out = cpage_out;
232 workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
233 workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
234
235 out_written = 0;
236 in_read = 0;
237
238 while (workspace->def_strm.total_in < len) {
239 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
240 if (ret != Z_OK) {
241 printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
242 ret);
243 zlib_deflateEnd(&workspace->def_strm);
244 ret = -1;
245 goto out;
246 }
247
248 /* we're making it bigger, give up */
249 if (workspace->def_strm.total_in > 8192 &&
250 workspace->def_strm.total_in <
251 workspace->def_strm.total_out) {
252 ret = -1;
253 goto out;
254 }
255 /* we need another page for writing out. Test this
256 * before the total_in so we will pull in a new page for
257 * the stream end if required
258 */
259 if (workspace->def_strm.avail_out == 0) {
260 kunmap(out_page);
261 if (nr_pages == nr_dest_pages) {
262 out_page = NULL;
263 ret = -1;
264 goto out;
265 }
266 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
267 cpage_out = kmap(out_page);
268 pages[nr_pages] = out_page;
269 nr_pages++;
270 workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
271 workspace->def_strm.next_out = cpage_out;
272 }
273 /* we're all done */
274 if (workspace->def_strm.total_in >= len)
275 break;
276
277 /* we've read in a full page, get a new one */
278 if (workspace->def_strm.avail_in == 0) {
279 if (workspace->def_strm.total_out > max_out)
280 break;
281
282 bytes_left = len - workspace->def_strm.total_in;
283 kunmap(in_page);
284 page_cache_release(in_page);
285
286 start += PAGE_CACHE_SIZE;
287 in_page = find_get_page(mapping,
288 start >> PAGE_CACHE_SHIFT);
289 data_in = kmap(in_page);
290 workspace->def_strm.avail_in = min(bytes_left,
291 PAGE_CACHE_SIZE);
292 workspace->def_strm.next_in = data_in;
293 }
294 }
295 workspace->def_strm.avail_in = 0;
296 ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
297 zlib_deflateEnd(&workspace->def_strm);
298
299 if (ret != Z_STREAM_END) {
300 ret = -1;
301 goto out;
302 }
303
304 if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
305 ret = -1;
306 goto out;
307 }
308
309 ret = 0;
310 *total_out = workspace->def_strm.total_out;
311 *total_in = workspace->def_strm.total_in;
312out:
313 *out_pages = nr_pages;
314 if (out_page)
315 kunmap(out_page);
316
317 if (in_page) {
318 kunmap(in_page);
319 page_cache_release(in_page);
320 }
321 free_workspace(workspace);
322 return ret;
323}
324
325/*
326 * pages_in is an array of pages with compressed data.
327 *
328 * disk_start is the starting logical offset of this array in the file
329 *
330 * bvec is a bio_vec of pages from the file that we want to decompress into
331 *
332 * vcnt is the count of pages in the biovec
333 *
334 * srclen is the number of bytes in pages_in
335 *
336 * The basic idea is that we have a bio that was created by readpages.
337 * The pages in the bio are for the uncompressed data, and they may not
338 * be contiguous. They all correspond to the range of bytes covered by
339 * the compressed extent.
340 */
341int btrfs_zlib_decompress_biovec(struct page **pages_in,
342 u64 disk_start,
343 struct bio_vec *bvec,
344 int vcnt,
345 size_t srclen)
346{
347 int ret = 0;
348 int wbits = MAX_WBITS;
349 struct workspace *workspace;
350 char *data_in;
351 size_t total_out = 0;
352 unsigned long page_bytes_left;
353 unsigned long page_in_index = 0;
354 unsigned long page_out_index = 0;
355 struct page *page_out;
356 unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
357 PAGE_CACHE_SIZE;
358 unsigned long buf_start;
359 unsigned long buf_offset;
360 unsigned long bytes;
361 unsigned long working_bytes;
362 unsigned long pg_offset;
363 unsigned long start_byte;
364 unsigned long current_buf_start;
365 char *kaddr;
366
367 workspace = find_zlib_workspace();
368 if (!workspace)
369 return -ENOMEM;
370
371 data_in = kmap(pages_in[page_in_index]);
372 workspace->inf_strm.next_in = data_in;
373 workspace->inf_strm.avail_in = min(srclen, PAGE_CACHE_SIZE);
374 workspace->inf_strm.total_in = 0;
375
376 workspace->inf_strm.total_out = 0;
377 workspace->inf_strm.next_out = workspace->buf;
378 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
379 page_out = bvec[page_out_index].bv_page;
380 page_bytes_left = PAGE_CACHE_SIZE;
381 pg_offset = 0;
382
383 /* If it's deflate, and it's got no preset dictionary, then
384 we can tell zlib to skip the adler32 check. */
385 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
386 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
387 !(((data_in[0]<<8) + data_in[1]) % 31)) {
388
389 wbits = -((data_in[0] >> 4) + 8);
390 workspace->inf_strm.next_in += 2;
391 workspace->inf_strm.avail_in -= 2;
392 }
393
394 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
395 printk(KERN_WARNING "inflateInit failed\n");
396 ret = -1;
397 goto out;
398 }
399 while(workspace->inf_strm.total_in < srclen) {
400 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
401 if (ret != Z_OK && ret != Z_STREAM_END) {
402 break;
403 }
404
405 /*
406 * buf start is the byte offset we're of the start of
407 * our workspace buffer
408 */
409 buf_start = total_out;
410
411 /* total_out is the last byte of the workspace buffer */
412 total_out = workspace->inf_strm.total_out;
413
414 working_bytes = total_out - buf_start;
415
416 /*
417 * start byte is the first byte of the page we're currently
418 * copying into relative to the start of the compressed data.
419 */
420 start_byte = page_offset(page_out) - disk_start;
421
422 if (working_bytes == 0) {
423 /* we didn't make progress in this inflate
424 * call, we're done
425 */
771ed689 426 if (ret != Z_STREAM_END) {
c8b97818 427 ret = -1;
771ed689 428 }
c8b97818
CM
429 break;
430 }
431
432 /* we haven't yet hit data corresponding to this page */
433 if (total_out <= start_byte) {
434 goto next;
435 }
436
437 /*
438 * the start of the data we care about is offset into
439 * the middle of our working buffer
440 */
441 if (total_out > start_byte && buf_start < start_byte) {
442 buf_offset = start_byte - buf_start;
443 working_bytes -= buf_offset;
444 } else {
445 buf_offset = 0;
446 }
447 current_buf_start = buf_start;
448
449 /* copy bytes from the working buffer into the pages */
450 while(working_bytes > 0) {
451 bytes = min(PAGE_CACHE_SIZE - pg_offset,
452 PAGE_CACHE_SIZE - buf_offset);
453 bytes = min(bytes, working_bytes);
454 kaddr = kmap_atomic(page_out, KM_USER0);
455 memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
456 bytes);
457 kunmap_atomic(kaddr, KM_USER0);
458 flush_dcache_page(page_out);
459
460 pg_offset += bytes;
461 page_bytes_left -= bytes;
462 buf_offset += bytes;
463 working_bytes -= bytes;
464 current_buf_start += bytes;
465
466 /* check if we need to pick another page */
467 if (page_bytes_left == 0) {
468 page_out_index++;
469 if (page_out_index >= vcnt) {
470 ret = 0;
471 goto done;
472 }
473 page_out = bvec[page_out_index].bv_page;
474 pg_offset = 0;
475 page_bytes_left = PAGE_CACHE_SIZE;
476 start_byte = page_offset(page_out) - disk_start;
477
478 /*
479 * make sure our new page is covered by this
480 * working buffer
481 */
482 if (total_out <= start_byte) {
483 goto next;
484 }
485
486 /* the next page in the biovec might not
487 * be adjacent to the last page, but it
488 * might still be found inside this working
489 * buffer. bump our offset pointer
490 */
491 if (total_out > start_byte &&
492 current_buf_start < start_byte) {
493 buf_offset = start_byte - buf_start;
494 working_bytes = total_out - start_byte;
495 current_buf_start = buf_start +
496 buf_offset;
497 }
498 }
499 }
500next:
501 workspace->inf_strm.next_out = workspace->buf;
502 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
503
504 if (workspace->inf_strm.avail_in == 0) {
505 unsigned long tmp;
506 kunmap(pages_in[page_in_index]);
507 page_in_index++;
508 if (page_in_index >= total_pages_in) {
509 data_in = NULL;
510 break;
511 }
512 data_in = kmap(pages_in[page_in_index]);
513 workspace->inf_strm.next_in = data_in;
514 tmp = srclen - workspace->inf_strm.total_in;
515 workspace->inf_strm.avail_in = min(tmp,
516 PAGE_CACHE_SIZE);
517 }
518 }
519 if (ret != Z_STREAM_END) {
520 ret = -1;
521 } else {
522 ret = 0;
523 }
524done:
525 zlib_inflateEnd(&workspace->inf_strm);
526 if (data_in)
527 kunmap(pages_in[page_in_index]);
528out:
529 free_workspace(workspace);
530 return ret;
531}
532
533/*
534 * a less complex decompression routine. Our compressed data fits in a
535 * single page, and we want to read a single page out of it.
536 * start_byte tells us the offset into the compressed data we're interested in
537 */
538int btrfs_zlib_decompress(unsigned char *data_in,
539 struct page *dest_page,
540 unsigned long start_byte,
541 size_t srclen, size_t destlen)
542{
543 int ret = 0;
544 int wbits = MAX_WBITS;
545 struct workspace *workspace;
546 unsigned long bytes_left = destlen;
547 unsigned long total_out = 0;
548 char *kaddr;
549
550 if (destlen > PAGE_CACHE_SIZE)
551 return -ENOMEM;
552
553 workspace = find_zlib_workspace();
554 if (!workspace)
555 return -ENOMEM;
556
557 workspace->inf_strm.next_in = data_in;
558 workspace->inf_strm.avail_in = srclen;
559 workspace->inf_strm.total_in = 0;
560
561 workspace->inf_strm.next_out = workspace->buf;
562 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
563 workspace->inf_strm.total_out = 0;
564 /* If it's deflate, and it's got no preset dictionary, then
565 we can tell zlib to skip the adler32 check. */
566 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
567 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
568 !(((data_in[0]<<8) + data_in[1]) % 31)) {
569
570 wbits = -((data_in[0] >> 4) + 8);
571 workspace->inf_strm.next_in += 2;
572 workspace->inf_strm.avail_in -= 2;
573 }
574
575 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
576 printk(KERN_WARNING "inflateInit failed\n");
577 ret = -1;
578 goto out;
579 }
580
581 while(bytes_left > 0) {
582 unsigned long buf_start;
583 unsigned long buf_offset;
584 unsigned long bytes;
585 unsigned long pg_offset = 0;
586
587 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
588 if (ret != Z_OK && ret != Z_STREAM_END) {
589 break;
590 }
591
592 buf_start = total_out;
593 total_out = workspace->inf_strm.total_out;
594
595 if (total_out == buf_start) {
596 ret = -1;
597 break;
598 }
599
600 if (total_out <= start_byte) {
601 goto next;
602 }
603
604 if (total_out > start_byte && buf_start < start_byte) {
605 buf_offset = start_byte - buf_start;
606 } else {
607 buf_offset = 0;
608 }
609
610 bytes = min(PAGE_CACHE_SIZE - pg_offset,
611 PAGE_CACHE_SIZE - buf_offset);
612 bytes = min(bytes, bytes_left);
613
614 kaddr = kmap_atomic(dest_page, KM_USER0);
615 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
616 kunmap_atomic(kaddr, KM_USER0);
617
618 pg_offset += bytes;
619 bytes_left -= bytes;
620next:
621 workspace->inf_strm.next_out = workspace->buf;
622 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
623 }
624 if (ret != Z_STREAM_END && bytes_left != 0) {
625 ret = -1;
626 } else {
627 ret = 0;
628 }
629 zlib_inflateEnd(&workspace->inf_strm);
630out:
631 free_workspace(workspace);
632 return ret;
633}
634
635void btrfs_zlib_exit(void)
636{
637 free_workspaces();
638}