2 * Compressed RAM block device
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 * 2012, 2013 Minchan Kim
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/genhd.h>
26 #include <linux/highmem.h>
27 #include <linux/slab.h>
28 #include <linux/backing-dev.h>
29 #include <linux/string.h>
30 #include <linux/vmalloc.h>
31 #include <linux/err.h>
32 #include <linux/idr.h>
33 #include <linux/sysfs.h>
34 #include <linux/cpuhotplug.h>
38 static DEFINE_IDR(zram_index_idr
);
39 /* idr index must be protected */
40 static DEFINE_MUTEX(zram_index_mutex
);
42 static int zram_major
;
43 static const char *default_compressor
= "lzo";
45 /* Module params (documentation at end) */
46 static unsigned int num_devices
= 1;
48 static void zram_free_page(struct zram
*zram
, size_t index
);
50 static inline bool init_done(struct zram
*zram
)
52 return zram
->disksize
;
55 static inline struct zram
*dev_to_zram(struct device
*dev
)
57 return (struct zram
*)dev_to_disk(dev
)->private_data
;
60 static unsigned long zram_get_handle(struct zram
*zram
, u32 index
)
62 return zram
->table
[index
].handle
;
65 static void zram_set_handle(struct zram
*zram
, u32 index
, unsigned long handle
)
67 zram
->table
[index
].handle
= handle
;
70 /* flag operations require table entry bit_spin_lock() being held */
71 static int zram_test_flag(struct zram
*zram
, u32 index
,
72 enum zram_pageflags flag
)
74 return zram
->table
[index
].value
& BIT(flag
);
77 static void zram_set_flag(struct zram
*zram
, u32 index
,
78 enum zram_pageflags flag
)
80 zram
->table
[index
].value
|= BIT(flag
);
83 static void zram_clear_flag(struct zram
*zram
, u32 index
,
84 enum zram_pageflags flag
)
86 zram
->table
[index
].value
&= ~BIT(flag
);
89 static inline void zram_set_element(struct zram
*zram
, u32 index
,
90 unsigned long element
)
92 zram
->table
[index
].element
= element
;
95 static unsigned long zram_get_element(struct zram
*zram
, u32 index
)
97 return zram
->table
[index
].element
;
100 static size_t zram_get_obj_size(struct zram
*zram
, u32 index
)
102 return zram
->table
[index
].value
& (BIT(ZRAM_FLAG_SHIFT
) - 1);
105 static void zram_set_obj_size(struct zram
*zram
,
106 u32 index
, size_t size
)
108 unsigned long flags
= zram
->table
[index
].value
>> ZRAM_FLAG_SHIFT
;
110 zram
->table
[index
].value
= (flags
<< ZRAM_FLAG_SHIFT
) | size
;
113 #if PAGE_SIZE != 4096
114 static inline bool is_partial_io(struct bio_vec
*bvec
)
116 return bvec
->bv_len
!= PAGE_SIZE
;
119 static inline bool is_partial_io(struct bio_vec
*bvec
)
125 static void zram_revalidate_disk(struct zram
*zram
)
127 revalidate_disk(zram
->disk
);
128 /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */
129 zram
->disk
->queue
->backing_dev_info
->capabilities
|=
130 BDI_CAP_STABLE_WRITES
;
134 * Check if request is within bounds and aligned on zram logical blocks.
136 static inline bool valid_io_request(struct zram
*zram
,
137 sector_t start
, unsigned int size
)
141 /* unaligned request */
142 if (unlikely(start
& (ZRAM_SECTOR_PER_LOGICAL_BLOCK
- 1)))
144 if (unlikely(size
& (ZRAM_LOGICAL_BLOCK_SIZE
- 1)))
147 end
= start
+ (size
>> SECTOR_SHIFT
);
148 bound
= zram
->disksize
>> SECTOR_SHIFT
;
149 /* out of range range */
150 if (unlikely(start
>= bound
|| end
> bound
|| start
> end
))
153 /* I/O request is valid */
157 static void update_position(u32
*index
, int *offset
, struct bio_vec
*bvec
)
159 *index
+= (*offset
+ bvec
->bv_len
) / PAGE_SIZE
;
160 *offset
= (*offset
+ bvec
->bv_len
) % PAGE_SIZE
;
163 static inline void update_used_max(struct zram
*zram
,
164 const unsigned long pages
)
166 unsigned long old_max
, cur_max
;
168 old_max
= atomic_long_read(&zram
->stats
.max_used_pages
);
173 old_max
= atomic_long_cmpxchg(
174 &zram
->stats
.max_used_pages
, cur_max
, pages
);
175 } while (old_max
!= cur_max
);
178 static inline void zram_fill_page(char *ptr
, unsigned long len
,
182 unsigned long *page
= (unsigned long *)ptr
;
184 WARN_ON_ONCE(!IS_ALIGNED(len
, sizeof(unsigned long)));
186 if (likely(value
== 0)) {
189 for (i
= 0; i
< len
/ sizeof(*page
); i
++)
194 static bool page_same_filled(void *ptr
, unsigned long *element
)
200 page
= (unsigned long *)ptr
;
203 for (pos
= 1; pos
< PAGE_SIZE
/ sizeof(*page
); pos
++) {
204 if (val
!= page
[pos
])
213 static ssize_t
initstate_show(struct device
*dev
,
214 struct device_attribute
*attr
, char *buf
)
217 struct zram
*zram
= dev_to_zram(dev
);
219 down_read(&zram
->init_lock
);
220 val
= init_done(zram
);
221 up_read(&zram
->init_lock
);
223 return scnprintf(buf
, PAGE_SIZE
, "%u\n", val
);
226 static ssize_t
disksize_show(struct device
*dev
,
227 struct device_attribute
*attr
, char *buf
)
229 struct zram
*zram
= dev_to_zram(dev
);
231 return scnprintf(buf
, PAGE_SIZE
, "%llu\n", zram
->disksize
);
234 static ssize_t
mem_limit_store(struct device
*dev
,
235 struct device_attribute
*attr
, const char *buf
, size_t len
)
239 struct zram
*zram
= dev_to_zram(dev
);
241 limit
= memparse(buf
, &tmp
);
242 if (buf
== tmp
) /* no chars parsed, invalid input */
245 down_write(&zram
->init_lock
);
246 zram
->limit_pages
= PAGE_ALIGN(limit
) >> PAGE_SHIFT
;
247 up_write(&zram
->init_lock
);
252 static ssize_t
mem_used_max_store(struct device
*dev
,
253 struct device_attribute
*attr
, const char *buf
, size_t len
)
257 struct zram
*zram
= dev_to_zram(dev
);
259 err
= kstrtoul(buf
, 10, &val
);
263 down_read(&zram
->init_lock
);
264 if (init_done(zram
)) {
265 atomic_long_set(&zram
->stats
.max_used_pages
,
266 zs_get_total_pages(zram
->mem_pool
));
268 up_read(&zram
->init_lock
);
274 * We switched to per-cpu streams and this attr is not needed anymore.
275 * However, we will keep it around for some time, because:
276 * a) we may revert per-cpu streams in the future
277 * b) it's visible to user space and we need to follow our 2 years
278 * retirement rule; but we already have a number of 'soon to be
279 * altered' attrs, so max_comp_streams need to wait for the next
282 static ssize_t
max_comp_streams_show(struct device
*dev
,
283 struct device_attribute
*attr
, char *buf
)
285 return scnprintf(buf
, PAGE_SIZE
, "%d\n", num_online_cpus());
288 static ssize_t
max_comp_streams_store(struct device
*dev
,
289 struct device_attribute
*attr
, const char *buf
, size_t len
)
294 static ssize_t
comp_algorithm_show(struct device
*dev
,
295 struct device_attribute
*attr
, char *buf
)
298 struct zram
*zram
= dev_to_zram(dev
);
300 down_read(&zram
->init_lock
);
301 sz
= zcomp_available_show(zram
->compressor
, buf
);
302 up_read(&zram
->init_lock
);
307 static ssize_t
comp_algorithm_store(struct device
*dev
,
308 struct device_attribute
*attr
, const char *buf
, size_t len
)
310 struct zram
*zram
= dev_to_zram(dev
);
311 char compressor
[CRYPTO_MAX_ALG_NAME
];
314 strlcpy(compressor
, buf
, sizeof(compressor
));
315 /* ignore trailing newline */
316 sz
= strlen(compressor
);
317 if (sz
> 0 && compressor
[sz
- 1] == '\n')
318 compressor
[sz
- 1] = 0x00;
320 if (!zcomp_available_algorithm(compressor
))
323 down_write(&zram
->init_lock
);
324 if (init_done(zram
)) {
325 up_write(&zram
->init_lock
);
326 pr_info("Can't change algorithm for initialized device\n");
330 strlcpy(zram
->compressor
, compressor
, sizeof(compressor
));
331 up_write(&zram
->init_lock
);
335 static ssize_t
compact_store(struct device
*dev
,
336 struct device_attribute
*attr
, const char *buf
, size_t len
)
338 struct zram
*zram
= dev_to_zram(dev
);
340 down_read(&zram
->init_lock
);
341 if (!init_done(zram
)) {
342 up_read(&zram
->init_lock
);
346 zs_compact(zram
->mem_pool
);
347 up_read(&zram
->init_lock
);
352 static ssize_t
io_stat_show(struct device
*dev
,
353 struct device_attribute
*attr
, char *buf
)
355 struct zram
*zram
= dev_to_zram(dev
);
358 down_read(&zram
->init_lock
);
359 ret
= scnprintf(buf
, PAGE_SIZE
,
360 "%8llu %8llu %8llu %8llu\n",
361 (u64
)atomic64_read(&zram
->stats
.failed_reads
),
362 (u64
)atomic64_read(&zram
->stats
.failed_writes
),
363 (u64
)atomic64_read(&zram
->stats
.invalid_io
),
364 (u64
)atomic64_read(&zram
->stats
.notify_free
));
365 up_read(&zram
->init_lock
);
370 static ssize_t
mm_stat_show(struct device
*dev
,
371 struct device_attribute
*attr
, char *buf
)
373 struct zram
*zram
= dev_to_zram(dev
);
374 struct zs_pool_stats pool_stats
;
375 u64 orig_size
, mem_used
= 0;
379 memset(&pool_stats
, 0x00, sizeof(struct zs_pool_stats
));
381 down_read(&zram
->init_lock
);
382 if (init_done(zram
)) {
383 mem_used
= zs_get_total_pages(zram
->mem_pool
);
384 zs_pool_stats(zram
->mem_pool
, &pool_stats
);
387 orig_size
= atomic64_read(&zram
->stats
.pages_stored
);
388 max_used
= atomic_long_read(&zram
->stats
.max_used_pages
);
390 ret
= scnprintf(buf
, PAGE_SIZE
,
391 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n",
392 orig_size
<< PAGE_SHIFT
,
393 (u64
)atomic64_read(&zram
->stats
.compr_data_size
),
394 mem_used
<< PAGE_SHIFT
,
395 zram
->limit_pages
<< PAGE_SHIFT
,
396 max_used
<< PAGE_SHIFT
,
397 (u64
)atomic64_read(&zram
->stats
.same_pages
),
398 pool_stats
.pages_compacted
);
399 up_read(&zram
->init_lock
);
404 static ssize_t
debug_stat_show(struct device
*dev
,
405 struct device_attribute
*attr
, char *buf
)
408 struct zram
*zram
= dev_to_zram(dev
);
411 down_read(&zram
->init_lock
);
412 ret
= scnprintf(buf
, PAGE_SIZE
,
413 "version: %d\n%8llu\n",
415 (u64
)atomic64_read(&zram
->stats
.writestall
));
416 up_read(&zram
->init_lock
);
421 static DEVICE_ATTR_RO(io_stat
);
422 static DEVICE_ATTR_RO(mm_stat
);
423 static DEVICE_ATTR_RO(debug_stat
);
425 static void zram_slot_lock(struct zram
*zram
, u32 index
)
427 bit_spin_lock(ZRAM_ACCESS
, &zram
->table
[index
].value
);
430 static void zram_slot_unlock(struct zram
*zram
, u32 index
)
432 bit_spin_unlock(ZRAM_ACCESS
, &zram
->table
[index
].value
);
435 static bool zram_same_page_read(struct zram
*zram
, u32 index
,
437 unsigned int offset
, unsigned int len
)
439 zram_slot_lock(zram
, index
);
440 if (unlikely(!zram_get_handle(zram
, index
) ||
441 zram_test_flag(zram
, index
, ZRAM_SAME
))) {
444 zram_slot_unlock(zram
, index
);
445 mem
= kmap_atomic(page
);
446 zram_fill_page(mem
+ offset
, len
,
447 zram_get_element(zram
, index
));
451 zram_slot_unlock(zram
, index
);
456 static bool zram_same_page_write(struct zram
*zram
, u32 index
,
459 unsigned long element
;
460 void *mem
= kmap_atomic(page
);
462 if (page_same_filled(mem
, &element
)) {
464 /* Free memory associated with this sector now. */
465 zram_slot_lock(zram
, index
);
466 zram_free_page(zram
, index
);
467 zram_set_flag(zram
, index
, ZRAM_SAME
);
468 zram_set_element(zram
, index
, element
);
469 zram_slot_unlock(zram
, index
);
471 atomic64_inc(&zram
->stats
.same_pages
);
472 atomic64_inc(&zram
->stats
.pages_stored
);
480 static void zram_meta_free(struct zram
*zram
, u64 disksize
)
482 size_t num_pages
= disksize
>> PAGE_SHIFT
;
485 /* Free all pages that are still in this zram device */
486 for (index
= 0; index
< num_pages
; index
++)
487 zram_free_page(zram
, index
);
489 zs_destroy_pool(zram
->mem_pool
);
493 static bool zram_meta_alloc(struct zram
*zram
, u64 disksize
)
497 num_pages
= disksize
>> PAGE_SHIFT
;
498 zram
->table
= vzalloc(num_pages
* sizeof(*zram
->table
));
502 zram
->mem_pool
= zs_create_pool(zram
->disk
->disk_name
);
503 if (!zram
->mem_pool
) {
512 * To protect concurrent access to the same index entry,
513 * caller should hold this table index entry's bit_spinlock to
514 * indicate this index entry is accessing.
516 static void zram_free_page(struct zram
*zram
, size_t index
)
518 unsigned long handle
= zram_get_handle(zram
, index
);
521 * No memory is allocated for same element filled pages.
522 * Simply clear same page flag.
524 if (zram_test_flag(zram
, index
, ZRAM_SAME
)) {
525 zram_clear_flag(zram
, index
, ZRAM_SAME
);
526 zram_set_element(zram
, index
, 0);
527 atomic64_dec(&zram
->stats
.same_pages
);
528 atomic64_dec(&zram
->stats
.pages_stored
);
535 zs_free(zram
->mem_pool
, handle
);
537 atomic64_sub(zram_get_obj_size(zram
, index
),
538 &zram
->stats
.compr_data_size
);
539 atomic64_dec(&zram
->stats
.pages_stored
);
541 zram_set_handle(zram
, index
, 0);
542 zram_set_obj_size(zram
, index
, 0);
545 static int zram_decompress_page(struct zram
*zram
, struct page
*page
, u32 index
)
548 unsigned long handle
;
552 if (zram_same_page_read(zram
, index
, page
, 0, PAGE_SIZE
))
555 zram_slot_lock(zram
, index
);
556 handle
= zram_get_handle(zram
, index
);
557 size
= zram_get_obj_size(zram
, index
);
559 src
= zs_map_object(zram
->mem_pool
, handle
, ZS_MM_RO
);
560 if (size
== PAGE_SIZE
) {
561 dst
= kmap_atomic(page
);
562 memcpy(dst
, src
, PAGE_SIZE
);
566 struct zcomp_strm
*zstrm
= zcomp_stream_get(zram
->comp
);
568 dst
= kmap_atomic(page
);
569 ret
= zcomp_decompress(zstrm
, src
, size
, dst
);
571 zcomp_stream_put(zram
->comp
);
573 zs_unmap_object(zram
->mem_pool
, handle
);
574 zram_slot_unlock(zram
, index
);
576 /* Should NEVER happen. Return bio error if it does. */
578 pr_err("Decompression failed! err=%d, page=%u\n", ret
, index
);
583 static int zram_bvec_read(struct zram
*zram
, struct bio_vec
*bvec
,
584 u32 index
, int offset
)
589 page
= bvec
->bv_page
;
590 if (is_partial_io(bvec
)) {
591 /* Use a temporary buffer to decompress the page */
592 page
= alloc_page(GFP_NOIO
|__GFP_HIGHMEM
);
597 ret
= zram_decompress_page(zram
, page
, index
);
601 if (is_partial_io(bvec
)) {
602 void *dst
= kmap_atomic(bvec
->bv_page
);
603 void *src
= kmap_atomic(page
);
605 memcpy(dst
+ bvec
->bv_offset
, src
+ offset
, bvec
->bv_len
);
610 if (is_partial_io(bvec
))
616 static int zram_compress(struct zram
*zram
, struct zcomp_strm
**zstrm
,
618 unsigned long *out_handle
, unsigned int *out_comp_len
)
621 unsigned int comp_len
;
623 unsigned long alloced_pages
;
624 unsigned long handle
= 0;
627 src
= kmap_atomic(page
);
628 ret
= zcomp_compress(*zstrm
, src
, &comp_len
);
632 pr_err("Compression failed! err=%d\n", ret
);
634 zs_free(zram
->mem_pool
, handle
);
638 if (unlikely(comp_len
> max_zpage_size
))
639 comp_len
= PAGE_SIZE
;
642 * handle allocation has 2 paths:
643 * a) fast path is executed with preemption disabled (for
644 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
645 * since we can't sleep;
646 * b) slow path enables preemption and attempts to allocate
647 * the page with __GFP_DIRECT_RECLAIM bit set. we have to
648 * put per-cpu compression stream and, thus, to re-do
649 * the compression once handle is allocated.
651 * if we have a 'non-null' handle here then we are coming
652 * from the slow path and handle has already been allocated.
655 handle
= zs_malloc(zram
->mem_pool
, comp_len
,
656 __GFP_KSWAPD_RECLAIM
|
661 zcomp_stream_put(zram
->comp
);
662 atomic64_inc(&zram
->stats
.writestall
);
663 handle
= zs_malloc(zram
->mem_pool
, comp_len
,
664 GFP_NOIO
| __GFP_HIGHMEM
|
666 *zstrm
= zcomp_stream_get(zram
->comp
);
672 alloced_pages
= zs_get_total_pages(zram
->mem_pool
);
673 update_used_max(zram
, alloced_pages
);
675 if (zram
->limit_pages
&& alloced_pages
> zram
->limit_pages
) {
676 zs_free(zram
->mem_pool
, handle
);
680 *out_handle
= handle
;
681 *out_comp_len
= comp_len
;
685 static int __zram_bvec_write(struct zram
*zram
, struct bio_vec
*bvec
, u32 index
)
688 unsigned long handle
;
689 unsigned int comp_len
;
691 struct zcomp_strm
*zstrm
;
692 struct page
*page
= bvec
->bv_page
;
694 if (zram_same_page_write(zram
, index
, page
))
697 zstrm
= zcomp_stream_get(zram
->comp
);
698 ret
= zram_compress(zram
, &zstrm
, page
, &handle
, &comp_len
);
700 zcomp_stream_put(zram
->comp
);
704 dst
= zs_map_object(zram
->mem_pool
, handle
, ZS_MM_WO
);
707 if (comp_len
== PAGE_SIZE
)
708 src
= kmap_atomic(page
);
709 memcpy(dst
, src
, comp_len
);
710 if (comp_len
== PAGE_SIZE
)
713 zcomp_stream_put(zram
->comp
);
714 zs_unmap_object(zram
->mem_pool
, handle
);
717 * Free memory associated with this sector
718 * before overwriting unused sectors.
720 zram_slot_lock(zram
, index
);
721 zram_free_page(zram
, index
);
722 zram_set_handle(zram
, index
, handle
);
723 zram_set_obj_size(zram
, index
, comp_len
);
724 zram_slot_unlock(zram
, index
);
727 atomic64_add(comp_len
, &zram
->stats
.compr_data_size
);
728 atomic64_inc(&zram
->stats
.pages_stored
);
732 static int zram_bvec_write(struct zram
*zram
, struct bio_vec
*bvec
,
733 u32 index
, int offset
)
736 struct page
*page
= NULL
;
741 if (is_partial_io(bvec
)) {
744 * This is a partial IO. We need to read the full page
745 * before to write the changes.
747 page
= alloc_page(GFP_NOIO
|__GFP_HIGHMEM
);
751 ret
= zram_decompress_page(zram
, page
, index
);
755 src
= kmap_atomic(bvec
->bv_page
);
756 dst
= kmap_atomic(page
);
757 memcpy(dst
+ offset
, src
+ bvec
->bv_offset
, bvec
->bv_len
);
762 vec
.bv_len
= PAGE_SIZE
;
766 ret
= __zram_bvec_write(zram
, &vec
, index
);
768 if (is_partial_io(bvec
))
774 * zram_bio_discard - handler on discard request
775 * @index: physical block index in PAGE_SIZE units
776 * @offset: byte offset within physical block
778 static void zram_bio_discard(struct zram
*zram
, u32 index
,
779 int offset
, struct bio
*bio
)
781 size_t n
= bio
->bi_iter
.bi_size
;
784 * zram manages data in physical block size units. Because logical block
785 * size isn't identical with physical block size on some arch, we
786 * could get a discard request pointing to a specific offset within a
787 * certain physical block. Although we can handle this request by
788 * reading that physiclal block and decompressing and partially zeroing
789 * and re-compressing and then re-storing it, this isn't reasonable
790 * because our intent with a discard request is to save memory. So
791 * skipping this logical block is appropriate here.
794 if (n
<= (PAGE_SIZE
- offset
))
797 n
-= (PAGE_SIZE
- offset
);
801 while (n
>= PAGE_SIZE
) {
802 zram_slot_lock(zram
, index
);
803 zram_free_page(zram
, index
);
804 zram_slot_unlock(zram
, index
);
805 atomic64_inc(&zram
->stats
.notify_free
);
811 static int zram_bvec_rw(struct zram
*zram
, struct bio_vec
*bvec
, u32 index
,
812 int offset
, bool is_write
)
814 unsigned long start_time
= jiffies
;
815 int rw_acct
= is_write
? REQ_OP_WRITE
: REQ_OP_READ
;
818 generic_start_io_acct(rw_acct
, bvec
->bv_len
>> SECTOR_SHIFT
,
822 atomic64_inc(&zram
->stats
.num_reads
);
823 ret
= zram_bvec_read(zram
, bvec
, index
, offset
);
824 flush_dcache_page(bvec
->bv_page
);
826 atomic64_inc(&zram
->stats
.num_writes
);
827 ret
= zram_bvec_write(zram
, bvec
, index
, offset
);
830 generic_end_io_acct(rw_acct
, &zram
->disk
->part0
, start_time
);
834 atomic64_inc(&zram
->stats
.failed_reads
);
836 atomic64_inc(&zram
->stats
.failed_writes
);
842 static void __zram_make_request(struct zram
*zram
, struct bio
*bio
)
847 struct bvec_iter iter
;
849 index
= bio
->bi_iter
.bi_sector
>> SECTORS_PER_PAGE_SHIFT
;
850 offset
= (bio
->bi_iter
.bi_sector
&
851 (SECTORS_PER_PAGE
- 1)) << SECTOR_SHIFT
;
853 switch (bio_op(bio
)) {
855 case REQ_OP_WRITE_ZEROES
:
856 zram_bio_discard(zram
, index
, offset
, bio
);
863 bio_for_each_segment(bvec
, bio
, iter
) {
864 struct bio_vec bv
= bvec
;
865 unsigned int unwritten
= bvec
.bv_len
;
868 bv
.bv_len
= min_t(unsigned int, PAGE_SIZE
- offset
,
870 if (zram_bvec_rw(zram
, &bv
, index
, offset
,
871 op_is_write(bio_op(bio
))) < 0)
874 bv
.bv_offset
+= bv
.bv_len
;
875 unwritten
-= bv
.bv_len
;
877 update_position(&index
, &offset
, &bv
);
889 * Handler function for all zram I/O requests.
891 static blk_qc_t
zram_make_request(struct request_queue
*queue
, struct bio
*bio
)
893 struct zram
*zram
= queue
->queuedata
;
895 if (!valid_io_request(zram
, bio
->bi_iter
.bi_sector
,
896 bio
->bi_iter
.bi_size
)) {
897 atomic64_inc(&zram
->stats
.invalid_io
);
901 __zram_make_request(zram
, bio
);
902 return BLK_QC_T_NONE
;
906 return BLK_QC_T_NONE
;
909 static void zram_slot_free_notify(struct block_device
*bdev
,
914 zram
= bdev
->bd_disk
->private_data
;
916 zram_slot_lock(zram
, index
);
917 zram_free_page(zram
, index
);
918 zram_slot_unlock(zram
, index
);
919 atomic64_inc(&zram
->stats
.notify_free
);
922 static int zram_rw_page(struct block_device
*bdev
, sector_t sector
,
923 struct page
*page
, bool is_write
)
925 int offset
, err
= -EIO
;
930 zram
= bdev
->bd_disk
->private_data
;
932 if (!valid_io_request(zram
, sector
, PAGE_SIZE
)) {
933 atomic64_inc(&zram
->stats
.invalid_io
);
938 index
= sector
>> SECTORS_PER_PAGE_SHIFT
;
939 offset
= (sector
& (SECTORS_PER_PAGE
- 1)) << SECTOR_SHIFT
;
942 bv
.bv_len
= PAGE_SIZE
;
945 err
= zram_bvec_rw(zram
, &bv
, index
, offset
, is_write
);
948 * If I/O fails, just return error(ie, non-zero) without
949 * calling page_endio.
950 * It causes resubmit the I/O with bio request by upper functions
951 * of rw_page(e.g., swap_readpage, __swap_writepage) and
952 * bio->bi_end_io does things to handle the error
953 * (e.g., SetPageError, set_page_dirty and extra works).
956 page_endio(page
, is_write
, 0);
960 static void zram_reset_device(struct zram
*zram
)
965 down_write(&zram
->init_lock
);
967 zram
->limit_pages
= 0;
969 if (!init_done(zram
)) {
970 up_write(&zram
->init_lock
);
975 disksize
= zram
->disksize
;
978 set_capacity(zram
->disk
, 0);
979 part_stat_set_all(&zram
->disk
->part0
, 0);
981 up_write(&zram
->init_lock
);
982 /* I/O operation under all of CPU are done so let's free */
983 zram_meta_free(zram
, disksize
);
984 memset(&zram
->stats
, 0, sizeof(zram
->stats
));
988 static ssize_t
disksize_store(struct device
*dev
,
989 struct device_attribute
*attr
, const char *buf
, size_t len
)
993 struct zram
*zram
= dev_to_zram(dev
);
996 disksize
= memparse(buf
, NULL
);
1000 down_write(&zram
->init_lock
);
1001 if (init_done(zram
)) {
1002 pr_info("Cannot change disksize for initialized device\n");
1007 disksize
= PAGE_ALIGN(disksize
);
1008 if (!zram_meta_alloc(zram
, disksize
)) {
1013 comp
= zcomp_create(zram
->compressor
);
1015 pr_err("Cannot initialise %s compressing backend\n",
1017 err
= PTR_ERR(comp
);
1022 zram
->disksize
= disksize
;
1023 set_capacity(zram
->disk
, zram
->disksize
>> SECTOR_SHIFT
);
1024 zram_revalidate_disk(zram
);
1025 up_write(&zram
->init_lock
);
1030 zram_meta_free(zram
, disksize
);
1032 up_write(&zram
->init_lock
);
1036 static ssize_t
reset_store(struct device
*dev
,
1037 struct device_attribute
*attr
, const char *buf
, size_t len
)
1040 unsigned short do_reset
;
1042 struct block_device
*bdev
;
1044 ret
= kstrtou16(buf
, 10, &do_reset
);
1051 zram
= dev_to_zram(dev
);
1052 bdev
= bdget_disk(zram
->disk
, 0);
1056 mutex_lock(&bdev
->bd_mutex
);
1057 /* Do not reset an active device or claimed device */
1058 if (bdev
->bd_openers
|| zram
->claim
) {
1059 mutex_unlock(&bdev
->bd_mutex
);
1064 /* From now on, anyone can't open /dev/zram[0-9] */
1066 mutex_unlock(&bdev
->bd_mutex
);
1068 /* Make sure all the pending I/O are finished */
1070 zram_reset_device(zram
);
1071 zram_revalidate_disk(zram
);
1074 mutex_lock(&bdev
->bd_mutex
);
1075 zram
->claim
= false;
1076 mutex_unlock(&bdev
->bd_mutex
);
1081 static int zram_open(struct block_device
*bdev
, fmode_t mode
)
1086 WARN_ON(!mutex_is_locked(&bdev
->bd_mutex
));
1088 zram
= bdev
->bd_disk
->private_data
;
1089 /* zram was claimed to reset so open request fails */
1096 static const struct block_device_operations zram_devops
= {
1098 .swap_slot_free_notify
= zram_slot_free_notify
,
1099 .rw_page
= zram_rw_page
,
1100 .owner
= THIS_MODULE
1103 static DEVICE_ATTR_WO(compact
);
1104 static DEVICE_ATTR_RW(disksize
);
1105 static DEVICE_ATTR_RO(initstate
);
1106 static DEVICE_ATTR_WO(reset
);
1107 static DEVICE_ATTR_WO(mem_limit
);
1108 static DEVICE_ATTR_WO(mem_used_max
);
1109 static DEVICE_ATTR_RW(max_comp_streams
);
1110 static DEVICE_ATTR_RW(comp_algorithm
);
1112 static struct attribute
*zram_disk_attrs
[] = {
1113 &dev_attr_disksize
.attr
,
1114 &dev_attr_initstate
.attr
,
1115 &dev_attr_reset
.attr
,
1116 &dev_attr_compact
.attr
,
1117 &dev_attr_mem_limit
.attr
,
1118 &dev_attr_mem_used_max
.attr
,
1119 &dev_attr_max_comp_streams
.attr
,
1120 &dev_attr_comp_algorithm
.attr
,
1121 &dev_attr_io_stat
.attr
,
1122 &dev_attr_mm_stat
.attr
,
1123 &dev_attr_debug_stat
.attr
,
1127 static const struct attribute_group zram_disk_attr_group
= {
1128 .attrs
= zram_disk_attrs
,
1132 * Allocate and initialize new zram device. the function returns
1133 * '>= 0' device_id upon success, and negative value otherwise.
1135 static int zram_add(void)
1138 struct request_queue
*queue
;
1141 zram
= kzalloc(sizeof(struct zram
), GFP_KERNEL
);
1145 ret
= idr_alloc(&zram_index_idr
, zram
, 0, 0, GFP_KERNEL
);
1150 init_rwsem(&zram
->init_lock
);
1152 queue
= blk_alloc_queue(GFP_KERNEL
);
1154 pr_err("Error allocating disk queue for device %d\n",
1160 blk_queue_make_request(queue
, zram_make_request
);
1162 /* gendisk structure */
1163 zram
->disk
= alloc_disk(1);
1165 pr_err("Error allocating disk structure for device %d\n",
1168 goto out_free_queue
;
1171 zram
->disk
->major
= zram_major
;
1172 zram
->disk
->first_minor
= device_id
;
1173 zram
->disk
->fops
= &zram_devops
;
1174 zram
->disk
->queue
= queue
;
1175 zram
->disk
->queue
->queuedata
= zram
;
1176 zram
->disk
->private_data
= zram
;
1177 snprintf(zram
->disk
->disk_name
, 16, "zram%d", device_id
);
1179 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1180 set_capacity(zram
->disk
, 0);
1181 /* zram devices sort of resembles non-rotational disks */
1182 queue_flag_set_unlocked(QUEUE_FLAG_NONROT
, zram
->disk
->queue
);
1183 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM
, zram
->disk
->queue
);
1185 * To ensure that we always get PAGE_SIZE aligned
1186 * and n*PAGE_SIZED sized I/O requests.
1188 blk_queue_physical_block_size(zram
->disk
->queue
, PAGE_SIZE
);
1189 blk_queue_logical_block_size(zram
->disk
->queue
,
1190 ZRAM_LOGICAL_BLOCK_SIZE
);
1191 blk_queue_io_min(zram
->disk
->queue
, PAGE_SIZE
);
1192 blk_queue_io_opt(zram
->disk
->queue
, PAGE_SIZE
);
1193 zram
->disk
->queue
->limits
.discard_granularity
= PAGE_SIZE
;
1194 blk_queue_max_discard_sectors(zram
->disk
->queue
, UINT_MAX
);
1195 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD
, zram
->disk
->queue
);
1198 * zram_bio_discard() will clear all logical blocks if logical block
1199 * size is identical with physical block size(PAGE_SIZE). But if it is
1200 * different, we will skip discarding some parts of logical blocks in
1201 * the part of the request range which isn't aligned to physical block
1202 * size. So we can't ensure that all discarded logical blocks are
1205 if (ZRAM_LOGICAL_BLOCK_SIZE
== PAGE_SIZE
)
1206 blk_queue_max_write_zeroes_sectors(zram
->disk
->queue
, UINT_MAX
);
1208 add_disk(zram
->disk
);
1210 ret
= sysfs_create_group(&disk_to_dev(zram
->disk
)->kobj
,
1211 &zram_disk_attr_group
);
1213 pr_err("Error creating sysfs group for device %d\n",
1217 strlcpy(zram
->compressor
, default_compressor
, sizeof(zram
->compressor
));
1219 pr_info("Added device: %s\n", zram
->disk
->disk_name
);
1223 del_gendisk(zram
->disk
);
1224 put_disk(zram
->disk
);
1226 blk_cleanup_queue(queue
);
1228 idr_remove(&zram_index_idr
, device_id
);
1234 static int zram_remove(struct zram
*zram
)
1236 struct block_device
*bdev
;
1238 bdev
= bdget_disk(zram
->disk
, 0);
1242 mutex_lock(&bdev
->bd_mutex
);
1243 if (bdev
->bd_openers
|| zram
->claim
) {
1244 mutex_unlock(&bdev
->bd_mutex
);
1250 mutex_unlock(&bdev
->bd_mutex
);
1253 * Remove sysfs first, so no one will perform a disksize
1254 * store while we destroy the devices. This also helps during
1255 * hot_remove -- zram_reset_device() is the last holder of
1256 * ->init_lock, no later/concurrent disksize_store() or any
1257 * other sysfs handlers are possible.
1259 sysfs_remove_group(&disk_to_dev(zram
->disk
)->kobj
,
1260 &zram_disk_attr_group
);
1262 /* Make sure all the pending I/O are finished */
1264 zram_reset_device(zram
);
1267 pr_info("Removed device: %s\n", zram
->disk
->disk_name
);
1269 blk_cleanup_queue(zram
->disk
->queue
);
1270 del_gendisk(zram
->disk
);
1271 put_disk(zram
->disk
);
1276 /* zram-control sysfs attributes */
1279 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
1280 * sense that reading from this file does alter the state of your system -- it
1281 * creates a new un-initialized zram device and returns back this device's
1282 * device_id (or an error code if it fails to create a new device).
1284 static ssize_t
hot_add_show(struct class *class,
1285 struct class_attribute
*attr
,
1290 mutex_lock(&zram_index_mutex
);
1292 mutex_unlock(&zram_index_mutex
);
1296 return scnprintf(buf
, PAGE_SIZE
, "%d\n", ret
);
1298 static CLASS_ATTR_RO(hot_add
);
1300 static ssize_t
hot_remove_store(struct class *class,
1301 struct class_attribute
*attr
,
1308 /* dev_id is gendisk->first_minor, which is `int' */
1309 ret
= kstrtoint(buf
, 10, &dev_id
);
1315 mutex_lock(&zram_index_mutex
);
1317 zram
= idr_find(&zram_index_idr
, dev_id
);
1319 ret
= zram_remove(zram
);
1321 idr_remove(&zram_index_idr
, dev_id
);
1326 mutex_unlock(&zram_index_mutex
);
1327 return ret
? ret
: count
;
1329 static CLASS_ATTR_WO(hot_remove
);
1331 static struct attribute
*zram_control_class_attrs
[] = {
1332 &class_attr_hot_add
.attr
,
1333 &class_attr_hot_remove
.attr
,
1336 ATTRIBUTE_GROUPS(zram_control_class
);
1338 static struct class zram_control_class
= {
1339 .name
= "zram-control",
1340 .owner
= THIS_MODULE
,
1341 .class_groups
= zram_control_class_groups
,
1344 static int zram_remove_cb(int id
, void *ptr
, void *data
)
1350 static void destroy_devices(void)
1352 class_unregister(&zram_control_class
);
1353 idr_for_each(&zram_index_idr
, &zram_remove_cb
, NULL
);
1354 idr_destroy(&zram_index_idr
);
1355 unregister_blkdev(zram_major
, "zram");
1356 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
1359 static int __init
zram_init(void)
1363 ret
= cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE
, "block/zram:prepare",
1364 zcomp_cpu_up_prepare
, zcomp_cpu_dead
);
1368 ret
= class_register(&zram_control_class
);
1370 pr_err("Unable to register zram-control class\n");
1371 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
1375 zram_major
= register_blkdev(0, "zram");
1376 if (zram_major
<= 0) {
1377 pr_err("Unable to get major number\n");
1378 class_unregister(&zram_control_class
);
1379 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
1383 while (num_devices
!= 0) {
1384 mutex_lock(&zram_index_mutex
);
1386 mutex_unlock(&zram_index_mutex
);
1399 static void __exit
zram_exit(void)
1404 module_init(zram_init
);
1405 module_exit(zram_exit
);
1407 module_param(num_devices
, uint
, 0);
1408 MODULE_PARM_DESC(num_devices
, "Number of pre-created zram devices");
1410 MODULE_LICENSE("Dual BSD/GPL");
1411 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1412 MODULE_DESCRIPTION("Compressed RAM Block Device");