2 * Compressed RAM block device
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 * 2012, 2013 Minchan Kim
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/genhd.h>
26 #include <linux/highmem.h>
27 #include <linux/slab.h>
28 #include <linux/backing-dev.h>
29 #include <linux/string.h>
30 #include <linux/vmalloc.h>
31 #include <linux/err.h>
32 #include <linux/idr.h>
33 #include <linux/sysfs.h>
34 #include <linux/debugfs.h>
35 #include <linux/cpuhotplug.h>
36 #include <linux/part_stat.h>
40 static DEFINE_IDR(zram_index_idr
);
41 /* idr index must be protected */
42 static DEFINE_MUTEX(zram_index_mutex
);
44 static int zram_major
;
45 static const char *default_compressor
= CONFIG_ZRAM_DEF_COMP
;
47 /* Module params (documentation at end) */
48 static unsigned int num_devices
= 1;
50 * Pages that compress to sizes equals or greater than this are stored
51 * uncompressed in memory.
53 static size_t huge_class_size
;
55 static const struct block_device_operations zram_devops
;
56 static const struct block_device_operations zram_wb_devops
;
58 static void zram_free_page(struct zram
*zram
, size_t index
);
59 static int zram_bvec_read(struct zram
*zram
, struct bio_vec
*bvec
,
60 u32 index
, int offset
, struct bio
*bio
);
63 static int zram_slot_trylock(struct zram
*zram
, u32 index
)
65 return bit_spin_trylock(ZRAM_LOCK
, &zram
->table
[index
].flags
);
68 static void zram_slot_lock(struct zram
*zram
, u32 index
)
70 bit_spin_lock(ZRAM_LOCK
, &zram
->table
[index
].flags
);
73 static void zram_slot_unlock(struct zram
*zram
, u32 index
)
75 bit_spin_unlock(ZRAM_LOCK
, &zram
->table
[index
].flags
);
78 static inline bool init_done(struct zram
*zram
)
80 return zram
->disksize
;
83 static inline struct zram
*dev_to_zram(struct device
*dev
)
85 return (struct zram
*)dev_to_disk(dev
)->private_data
;
88 static unsigned long zram_get_handle(struct zram
*zram
, u32 index
)
90 return zram
->table
[index
].handle
;
93 static void zram_set_handle(struct zram
*zram
, u32 index
, unsigned long handle
)
95 zram
->table
[index
].handle
= handle
;
98 /* flag operations require table entry bit_spin_lock() being held */
99 static bool zram_test_flag(struct zram
*zram
, u32 index
,
100 enum zram_pageflags flag
)
102 return zram
->table
[index
].flags
& BIT(flag
);
105 static void zram_set_flag(struct zram
*zram
, u32 index
,
106 enum zram_pageflags flag
)
108 zram
->table
[index
].flags
|= BIT(flag
);
111 static void zram_clear_flag(struct zram
*zram
, u32 index
,
112 enum zram_pageflags flag
)
114 zram
->table
[index
].flags
&= ~BIT(flag
);
117 static inline void zram_set_element(struct zram
*zram
, u32 index
,
118 unsigned long element
)
120 zram
->table
[index
].element
= element
;
123 static unsigned long zram_get_element(struct zram
*zram
, u32 index
)
125 return zram
->table
[index
].element
;
128 static size_t zram_get_obj_size(struct zram
*zram
, u32 index
)
130 return zram
->table
[index
].flags
& (BIT(ZRAM_FLAG_SHIFT
) - 1);
133 static void zram_set_obj_size(struct zram
*zram
,
134 u32 index
, size_t size
)
136 unsigned long flags
= zram
->table
[index
].flags
>> ZRAM_FLAG_SHIFT
;
138 zram
->table
[index
].flags
= (flags
<< ZRAM_FLAG_SHIFT
) | size
;
141 static inline bool zram_allocated(struct zram
*zram
, u32 index
)
143 return zram_get_obj_size(zram
, index
) ||
144 zram_test_flag(zram
, index
, ZRAM_SAME
) ||
145 zram_test_flag(zram
, index
, ZRAM_WB
);
148 #if PAGE_SIZE != 4096
149 static inline bool is_partial_io(struct bio_vec
*bvec
)
151 return bvec
->bv_len
!= PAGE_SIZE
;
154 static inline bool is_partial_io(struct bio_vec
*bvec
)
161 * Check if request is within bounds and aligned on zram logical blocks.
163 static inline bool valid_io_request(struct zram
*zram
,
164 sector_t start
, unsigned int size
)
168 /* unaligned request */
169 if (unlikely(start
& (ZRAM_SECTOR_PER_LOGICAL_BLOCK
- 1)))
171 if (unlikely(size
& (ZRAM_LOGICAL_BLOCK_SIZE
- 1)))
174 end
= start
+ (size
>> SECTOR_SHIFT
);
175 bound
= zram
->disksize
>> SECTOR_SHIFT
;
176 /* out of range range */
177 if (unlikely(start
>= bound
|| end
> bound
|| start
> end
))
180 /* I/O request is valid */
184 static void update_position(u32
*index
, int *offset
, struct bio_vec
*bvec
)
186 *index
+= (*offset
+ bvec
->bv_len
) / PAGE_SIZE
;
187 *offset
= (*offset
+ bvec
->bv_len
) % PAGE_SIZE
;
190 static inline void update_used_max(struct zram
*zram
,
191 const unsigned long pages
)
193 unsigned long old_max
, cur_max
;
195 old_max
= atomic_long_read(&zram
->stats
.max_used_pages
);
200 old_max
= atomic_long_cmpxchg(
201 &zram
->stats
.max_used_pages
, cur_max
, pages
);
202 } while (old_max
!= cur_max
);
205 static inline void zram_fill_page(void *ptr
, unsigned long len
,
208 WARN_ON_ONCE(!IS_ALIGNED(len
, sizeof(unsigned long)));
209 memset_l(ptr
, value
, len
/ sizeof(unsigned long));
212 static bool page_same_filled(void *ptr
, unsigned long *element
)
216 unsigned int pos
, last_pos
= PAGE_SIZE
/ sizeof(*page
) - 1;
218 page
= (unsigned long *)ptr
;
221 if (val
!= page
[last_pos
])
224 for (pos
= 1; pos
< last_pos
; pos
++) {
225 if (val
!= page
[pos
])
234 static ssize_t
initstate_show(struct device
*dev
,
235 struct device_attribute
*attr
, char *buf
)
238 struct zram
*zram
= dev_to_zram(dev
);
240 down_read(&zram
->init_lock
);
241 val
= init_done(zram
);
242 up_read(&zram
->init_lock
);
244 return scnprintf(buf
, PAGE_SIZE
, "%u\n", val
);
247 static ssize_t
disksize_show(struct device
*dev
,
248 struct device_attribute
*attr
, char *buf
)
250 struct zram
*zram
= dev_to_zram(dev
);
252 return scnprintf(buf
, PAGE_SIZE
, "%llu\n", zram
->disksize
);
255 static ssize_t
mem_limit_store(struct device
*dev
,
256 struct device_attribute
*attr
, const char *buf
, size_t len
)
260 struct zram
*zram
= dev_to_zram(dev
);
262 limit
= memparse(buf
, &tmp
);
263 if (buf
== tmp
) /* no chars parsed, invalid input */
266 down_write(&zram
->init_lock
);
267 zram
->limit_pages
= PAGE_ALIGN(limit
) >> PAGE_SHIFT
;
268 up_write(&zram
->init_lock
);
273 static ssize_t
mem_used_max_store(struct device
*dev
,
274 struct device_attribute
*attr
, const char *buf
, size_t len
)
278 struct zram
*zram
= dev_to_zram(dev
);
280 err
= kstrtoul(buf
, 10, &val
);
284 down_read(&zram
->init_lock
);
285 if (init_done(zram
)) {
286 atomic_long_set(&zram
->stats
.max_used_pages
,
287 zs_get_total_pages(zram
->mem_pool
));
289 up_read(&zram
->init_lock
);
294 static ssize_t
idle_store(struct device
*dev
,
295 struct device_attribute
*attr
, const char *buf
, size_t len
)
297 struct zram
*zram
= dev_to_zram(dev
);
298 unsigned long nr_pages
= zram
->disksize
>> PAGE_SHIFT
;
301 if (!sysfs_streq(buf
, "all"))
304 down_read(&zram
->init_lock
);
305 if (!init_done(zram
)) {
306 up_read(&zram
->init_lock
);
310 for (index
= 0; index
< nr_pages
; index
++) {
312 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
313 * See the comment in writeback_store.
315 zram_slot_lock(zram
, index
);
316 if (zram_allocated(zram
, index
) &&
317 !zram_test_flag(zram
, index
, ZRAM_UNDER_WB
))
318 zram_set_flag(zram
, index
, ZRAM_IDLE
);
319 zram_slot_unlock(zram
, index
);
322 up_read(&zram
->init_lock
);
327 #ifdef CONFIG_ZRAM_WRITEBACK
328 static ssize_t
writeback_limit_enable_store(struct device
*dev
,
329 struct device_attribute
*attr
, const char *buf
, size_t len
)
331 struct zram
*zram
= dev_to_zram(dev
);
333 ssize_t ret
= -EINVAL
;
335 if (kstrtoull(buf
, 10, &val
))
338 down_read(&zram
->init_lock
);
339 spin_lock(&zram
->wb_limit_lock
);
340 zram
->wb_limit_enable
= val
;
341 spin_unlock(&zram
->wb_limit_lock
);
342 up_read(&zram
->init_lock
);
348 static ssize_t
writeback_limit_enable_show(struct device
*dev
,
349 struct device_attribute
*attr
, char *buf
)
352 struct zram
*zram
= dev_to_zram(dev
);
354 down_read(&zram
->init_lock
);
355 spin_lock(&zram
->wb_limit_lock
);
356 val
= zram
->wb_limit_enable
;
357 spin_unlock(&zram
->wb_limit_lock
);
358 up_read(&zram
->init_lock
);
360 return scnprintf(buf
, PAGE_SIZE
, "%d\n", val
);
363 static ssize_t
writeback_limit_store(struct device
*dev
,
364 struct device_attribute
*attr
, const char *buf
, size_t len
)
366 struct zram
*zram
= dev_to_zram(dev
);
368 ssize_t ret
= -EINVAL
;
370 if (kstrtoull(buf
, 10, &val
))
373 down_read(&zram
->init_lock
);
374 spin_lock(&zram
->wb_limit_lock
);
375 zram
->bd_wb_limit
= val
;
376 spin_unlock(&zram
->wb_limit_lock
);
377 up_read(&zram
->init_lock
);
383 static ssize_t
writeback_limit_show(struct device
*dev
,
384 struct device_attribute
*attr
, char *buf
)
387 struct zram
*zram
= dev_to_zram(dev
);
389 down_read(&zram
->init_lock
);
390 spin_lock(&zram
->wb_limit_lock
);
391 val
= zram
->bd_wb_limit
;
392 spin_unlock(&zram
->wb_limit_lock
);
393 up_read(&zram
->init_lock
);
395 return scnprintf(buf
, PAGE_SIZE
, "%llu\n", val
);
398 static void reset_bdev(struct zram
*zram
)
400 struct block_device
*bdev
;
402 if (!zram
->backing_dev
)
406 blkdev_put(bdev
, FMODE_READ
|FMODE_WRITE
|FMODE_EXCL
);
407 /* hope filp_close flush all of IO */
408 filp_close(zram
->backing_dev
, NULL
);
409 zram
->backing_dev
= NULL
;
411 zram
->disk
->fops
= &zram_devops
;
412 kvfree(zram
->bitmap
);
416 static ssize_t
backing_dev_show(struct device
*dev
,
417 struct device_attribute
*attr
, char *buf
)
420 struct zram
*zram
= dev_to_zram(dev
);
424 down_read(&zram
->init_lock
);
425 file
= zram
->backing_dev
;
427 memcpy(buf
, "none\n", 5);
428 up_read(&zram
->init_lock
);
432 p
= file_path(file
, buf
, PAGE_SIZE
- 1);
439 memmove(buf
, p
, ret
);
442 up_read(&zram
->init_lock
);
446 static ssize_t
backing_dev_store(struct device
*dev
,
447 struct device_attribute
*attr
, const char *buf
, size_t len
)
451 struct file
*backing_dev
= NULL
;
453 struct address_space
*mapping
;
454 unsigned int bitmap_sz
;
455 unsigned long nr_pages
, *bitmap
= NULL
;
456 struct block_device
*bdev
= NULL
;
458 struct zram
*zram
= dev_to_zram(dev
);
460 file_name
= kmalloc(PATH_MAX
, GFP_KERNEL
);
464 down_write(&zram
->init_lock
);
465 if (init_done(zram
)) {
466 pr_info("Can't setup backing device for initialized device\n");
471 strlcpy(file_name
, buf
, PATH_MAX
);
472 /* ignore trailing newline */
473 sz
= strlen(file_name
);
474 if (sz
> 0 && file_name
[sz
- 1] == '\n')
475 file_name
[sz
- 1] = 0x00;
477 backing_dev
= filp_open(file_name
, O_RDWR
|O_LARGEFILE
, 0);
478 if (IS_ERR(backing_dev
)) {
479 err
= PTR_ERR(backing_dev
);
484 mapping
= backing_dev
->f_mapping
;
485 inode
= mapping
->host
;
487 /* Support only block device in this moment */
488 if (!S_ISBLK(inode
->i_mode
)) {
493 bdev
= blkdev_get_by_dev(inode
->i_rdev
,
494 FMODE_READ
| FMODE_WRITE
| FMODE_EXCL
, zram
);
501 nr_pages
= i_size_read(inode
) >> PAGE_SHIFT
;
502 bitmap_sz
= BITS_TO_LONGS(nr_pages
) * sizeof(long);
503 bitmap
= kvzalloc(bitmap_sz
, GFP_KERNEL
);
512 zram
->backing_dev
= backing_dev
;
513 zram
->bitmap
= bitmap
;
514 zram
->nr_pages
= nr_pages
;
516 * With writeback feature, zram does asynchronous IO so it's no longer
517 * synchronous device so let's remove synchronous io flag. Othewise,
518 * upper layer(e.g., swap) could wait IO completion rather than
519 * (submit and return), which will cause system sluggish.
520 * Furthermore, when the IO function returns(e.g., swap_readpage),
521 * upper layer expects IO was done so it could deallocate the page
522 * freely but in fact, IO is going on so finally could cause
523 * use-after-free when the IO is really done.
525 zram
->disk
->fops
= &zram_wb_devops
;
526 up_write(&zram
->init_lock
);
528 pr_info("setup backing device %s\n", file_name
);
537 blkdev_put(bdev
, FMODE_READ
| FMODE_WRITE
| FMODE_EXCL
);
540 filp_close(backing_dev
, NULL
);
542 up_write(&zram
->init_lock
);
549 static unsigned long alloc_block_bdev(struct zram
*zram
)
551 unsigned long blk_idx
= 1;
553 /* skip 0 bit to confuse zram.handle = 0 */
554 blk_idx
= find_next_zero_bit(zram
->bitmap
, zram
->nr_pages
, blk_idx
);
555 if (blk_idx
== zram
->nr_pages
)
558 if (test_and_set_bit(blk_idx
, zram
->bitmap
))
561 atomic64_inc(&zram
->stats
.bd_count
);
565 static void free_block_bdev(struct zram
*zram
, unsigned long blk_idx
)
569 was_set
= test_and_clear_bit(blk_idx
, zram
->bitmap
);
570 WARN_ON_ONCE(!was_set
);
571 atomic64_dec(&zram
->stats
.bd_count
);
574 static void zram_page_end_io(struct bio
*bio
)
576 struct page
*page
= bio_first_page_all(bio
);
578 page_endio(page
, op_is_write(bio_op(bio
)),
579 blk_status_to_errno(bio
->bi_status
));
584 * Returns 1 if the submission is successful.
586 static int read_from_bdev_async(struct zram
*zram
, struct bio_vec
*bvec
,
587 unsigned long entry
, struct bio
*parent
)
591 bio
= bio_alloc(GFP_ATOMIC
, 1);
595 bio
->bi_iter
.bi_sector
= entry
* (PAGE_SIZE
>> 9);
596 bio_set_dev(bio
, zram
->bdev
);
597 if (!bio_add_page(bio
, bvec
->bv_page
, bvec
->bv_len
, bvec
->bv_offset
)) {
603 bio
->bi_opf
= REQ_OP_READ
;
604 bio
->bi_end_io
= zram_page_end_io
;
606 bio
->bi_opf
= parent
->bi_opf
;
607 bio_chain(bio
, parent
);
614 #define PAGE_WB_SIG "page_index="
616 #define PAGE_WRITEBACK 0
617 #define HUGE_WRITEBACK 1
618 #define IDLE_WRITEBACK 2
621 static ssize_t
writeback_store(struct device
*dev
,
622 struct device_attribute
*attr
, const char *buf
, size_t len
)
624 struct zram
*zram
= dev_to_zram(dev
);
625 unsigned long nr_pages
= zram
->disksize
>> PAGE_SHIFT
;
626 unsigned long index
= 0;
628 struct bio_vec bio_vec
;
632 unsigned long blk_idx
= 0;
634 if (sysfs_streq(buf
, "idle"))
635 mode
= IDLE_WRITEBACK
;
636 else if (sysfs_streq(buf
, "huge"))
637 mode
= HUGE_WRITEBACK
;
639 if (strncmp(buf
, PAGE_WB_SIG
, sizeof(PAGE_WB_SIG
) - 1))
642 ret
= kstrtol(buf
+ sizeof(PAGE_WB_SIG
) - 1, 10, &index
);
643 if (ret
|| index
>= nr_pages
)
647 mode
= PAGE_WRITEBACK
;
650 down_read(&zram
->init_lock
);
651 if (!init_done(zram
)) {
653 goto release_init_lock
;
656 if (!zram
->backing_dev
) {
658 goto release_init_lock
;
661 page
= alloc_page(GFP_KERNEL
);
664 goto release_init_lock
;
671 bvec
.bv_len
= PAGE_SIZE
;
674 spin_lock(&zram
->wb_limit_lock
);
675 if (zram
->wb_limit_enable
&& !zram
->bd_wb_limit
) {
676 spin_unlock(&zram
->wb_limit_lock
);
680 spin_unlock(&zram
->wb_limit_lock
);
683 blk_idx
= alloc_block_bdev(zram
);
690 zram_slot_lock(zram
, index
);
691 if (!zram_allocated(zram
, index
))
694 if (zram_test_flag(zram
, index
, ZRAM_WB
) ||
695 zram_test_flag(zram
, index
, ZRAM_SAME
) ||
696 zram_test_flag(zram
, index
, ZRAM_UNDER_WB
))
699 if (mode
== IDLE_WRITEBACK
&&
700 !zram_test_flag(zram
, index
, ZRAM_IDLE
))
702 if (mode
== HUGE_WRITEBACK
&&
703 !zram_test_flag(zram
, index
, ZRAM_HUGE
))
706 * Clearing ZRAM_UNDER_WB is duty of caller.
707 * IOW, zram_free_page never clear it.
709 zram_set_flag(zram
, index
, ZRAM_UNDER_WB
);
710 /* Need for hugepage writeback racing */
711 zram_set_flag(zram
, index
, ZRAM_IDLE
);
712 zram_slot_unlock(zram
, index
);
713 if (zram_bvec_read(zram
, &bvec
, index
, 0, NULL
)) {
714 zram_slot_lock(zram
, index
);
715 zram_clear_flag(zram
, index
, ZRAM_UNDER_WB
);
716 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
717 zram_slot_unlock(zram
, index
);
721 bio_init(&bio
, &bio_vec
, 1);
722 bio_set_dev(&bio
, zram
->bdev
);
723 bio
.bi_iter
.bi_sector
= blk_idx
* (PAGE_SIZE
>> 9);
724 bio
.bi_opf
= REQ_OP_WRITE
| REQ_SYNC
;
726 bio_add_page(&bio
, bvec
.bv_page
, bvec
.bv_len
,
729 * XXX: A single page IO would be inefficient for write
730 * but it would be not bad as starter.
732 err
= submit_bio_wait(&bio
);
734 zram_slot_lock(zram
, index
);
735 zram_clear_flag(zram
, index
, ZRAM_UNDER_WB
);
736 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
737 zram_slot_unlock(zram
, index
);
739 * Return last IO error unless every IO were
746 atomic64_inc(&zram
->stats
.bd_writes
);
748 * We released zram_slot_lock so need to check if the slot was
749 * changed. If there is freeing for the slot, we can catch it
750 * easily by zram_allocated.
751 * A subtle case is the slot is freed/reallocated/marked as
752 * ZRAM_IDLE again. To close the race, idle_store doesn't
753 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
754 * Thus, we could close the race by checking ZRAM_IDLE bit.
756 zram_slot_lock(zram
, index
);
757 if (!zram_allocated(zram
, index
) ||
758 !zram_test_flag(zram
, index
, ZRAM_IDLE
)) {
759 zram_clear_flag(zram
, index
, ZRAM_UNDER_WB
);
760 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
764 zram_free_page(zram
, index
);
765 zram_clear_flag(zram
, index
, ZRAM_UNDER_WB
);
766 zram_set_flag(zram
, index
, ZRAM_WB
);
767 zram_set_element(zram
, index
, blk_idx
);
769 atomic64_inc(&zram
->stats
.pages_stored
);
770 spin_lock(&zram
->wb_limit_lock
);
771 if (zram
->wb_limit_enable
&& zram
->bd_wb_limit
> 0)
772 zram
->bd_wb_limit
-= 1UL << (PAGE_SHIFT
- 12);
773 spin_unlock(&zram
->wb_limit_lock
);
775 zram_slot_unlock(zram
, index
);
779 free_block_bdev(zram
, blk_idx
);
782 up_read(&zram
->init_lock
);
788 struct work_struct work
;
795 #if PAGE_SIZE != 4096
796 static void zram_sync_read(struct work_struct
*work
)
798 struct zram_work
*zw
= container_of(work
, struct zram_work
, work
);
799 struct zram
*zram
= zw
->zram
;
800 unsigned long entry
= zw
->entry
;
801 struct bio
*bio
= zw
->bio
;
803 read_from_bdev_async(zram
, &zw
->bvec
, entry
, bio
);
807 * Block layer want one ->submit_bio to be active at a time, so if we use
808 * chained IO with parent IO in same context, it's a deadlock. To avoid that,
809 * use a worker thread context.
811 static int read_from_bdev_sync(struct zram
*zram
, struct bio_vec
*bvec
,
812 unsigned long entry
, struct bio
*bio
)
814 struct zram_work work
;
821 INIT_WORK_ONSTACK(&work
.work
, zram_sync_read
);
822 queue_work(system_unbound_wq
, &work
.work
);
823 flush_work(&work
.work
);
824 destroy_work_on_stack(&work
.work
);
829 static int read_from_bdev_sync(struct zram
*zram
, struct bio_vec
*bvec
,
830 unsigned long entry
, struct bio
*bio
)
837 static int read_from_bdev(struct zram
*zram
, struct bio_vec
*bvec
,
838 unsigned long entry
, struct bio
*parent
, bool sync
)
840 atomic64_inc(&zram
->stats
.bd_reads
);
842 return read_from_bdev_sync(zram
, bvec
, entry
, parent
);
844 return read_from_bdev_async(zram
, bvec
, entry
, parent
);
847 static inline void reset_bdev(struct zram
*zram
) {};
848 static int read_from_bdev(struct zram
*zram
, struct bio_vec
*bvec
,
849 unsigned long entry
, struct bio
*parent
, bool sync
)
854 static void free_block_bdev(struct zram
*zram
, unsigned long blk_idx
) {};
857 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
859 static struct dentry
*zram_debugfs_root
;
861 static void zram_debugfs_create(void)
863 zram_debugfs_root
= debugfs_create_dir("zram", NULL
);
866 static void zram_debugfs_destroy(void)
868 debugfs_remove_recursive(zram_debugfs_root
);
871 static void zram_accessed(struct zram
*zram
, u32 index
)
873 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
874 zram
->table
[index
].ac_time
= ktime_get_boottime();
877 static ssize_t
read_block_state(struct file
*file
, char __user
*buf
,
878 size_t count
, loff_t
*ppos
)
881 ssize_t index
, written
= 0;
882 struct zram
*zram
= file
->private_data
;
883 unsigned long nr_pages
= zram
->disksize
>> PAGE_SHIFT
;
884 struct timespec64 ts
;
886 kbuf
= kvmalloc(count
, GFP_KERNEL
);
890 down_read(&zram
->init_lock
);
891 if (!init_done(zram
)) {
892 up_read(&zram
->init_lock
);
897 for (index
= *ppos
; index
< nr_pages
; index
++) {
900 zram_slot_lock(zram
, index
);
901 if (!zram_allocated(zram
, index
))
904 ts
= ktime_to_timespec64(zram
->table
[index
].ac_time
);
905 copied
= snprintf(kbuf
+ written
, count
,
906 "%12zd %12lld.%06lu %c%c%c%c\n",
907 index
, (s64
)ts
.tv_sec
,
908 ts
.tv_nsec
/ NSEC_PER_USEC
,
909 zram_test_flag(zram
, index
, ZRAM_SAME
) ? 's' : '.',
910 zram_test_flag(zram
, index
, ZRAM_WB
) ? 'w' : '.',
911 zram_test_flag(zram
, index
, ZRAM_HUGE
) ? 'h' : '.',
912 zram_test_flag(zram
, index
, ZRAM_IDLE
) ? 'i' : '.');
914 if (count
< copied
) {
915 zram_slot_unlock(zram
, index
);
921 zram_slot_unlock(zram
, index
);
925 up_read(&zram
->init_lock
);
926 if (copy_to_user(buf
, kbuf
, written
))
933 static const struct file_operations proc_zram_block_state_op
= {
935 .read
= read_block_state
,
936 .llseek
= default_llseek
,
939 static void zram_debugfs_register(struct zram
*zram
)
941 if (!zram_debugfs_root
)
944 zram
->debugfs_dir
= debugfs_create_dir(zram
->disk
->disk_name
,
946 debugfs_create_file("block_state", 0400, zram
->debugfs_dir
,
947 zram
, &proc_zram_block_state_op
);
950 static void zram_debugfs_unregister(struct zram
*zram
)
952 debugfs_remove_recursive(zram
->debugfs_dir
);
955 static void zram_debugfs_create(void) {};
956 static void zram_debugfs_destroy(void) {};
957 static void zram_accessed(struct zram
*zram
, u32 index
)
959 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
961 static void zram_debugfs_register(struct zram
*zram
) {};
962 static void zram_debugfs_unregister(struct zram
*zram
) {};
966 * We switched to per-cpu streams and this attr is not needed anymore.
967 * However, we will keep it around for some time, because:
968 * a) we may revert per-cpu streams in the future
969 * b) it's visible to user space and we need to follow our 2 years
970 * retirement rule; but we already have a number of 'soon to be
971 * altered' attrs, so max_comp_streams need to wait for the next
974 static ssize_t
max_comp_streams_show(struct device
*dev
,
975 struct device_attribute
*attr
, char *buf
)
977 return scnprintf(buf
, PAGE_SIZE
, "%d\n", num_online_cpus());
980 static ssize_t
max_comp_streams_store(struct device
*dev
,
981 struct device_attribute
*attr
, const char *buf
, size_t len
)
986 static ssize_t
comp_algorithm_show(struct device
*dev
,
987 struct device_attribute
*attr
, char *buf
)
990 struct zram
*zram
= dev_to_zram(dev
);
992 down_read(&zram
->init_lock
);
993 sz
= zcomp_available_show(zram
->compressor
, buf
);
994 up_read(&zram
->init_lock
);
999 static ssize_t
comp_algorithm_store(struct device
*dev
,
1000 struct device_attribute
*attr
, const char *buf
, size_t len
)
1002 struct zram
*zram
= dev_to_zram(dev
);
1003 char compressor
[ARRAY_SIZE(zram
->compressor
)];
1006 strlcpy(compressor
, buf
, sizeof(compressor
));
1007 /* ignore trailing newline */
1008 sz
= strlen(compressor
);
1009 if (sz
> 0 && compressor
[sz
- 1] == '\n')
1010 compressor
[sz
- 1] = 0x00;
1012 if (!zcomp_available_algorithm(compressor
))
1015 down_write(&zram
->init_lock
);
1016 if (init_done(zram
)) {
1017 up_write(&zram
->init_lock
);
1018 pr_info("Can't change algorithm for initialized device\n");
1022 strcpy(zram
->compressor
, compressor
);
1023 up_write(&zram
->init_lock
);
1027 static ssize_t
compact_store(struct device
*dev
,
1028 struct device_attribute
*attr
, const char *buf
, size_t len
)
1030 struct zram
*zram
= dev_to_zram(dev
);
1032 down_read(&zram
->init_lock
);
1033 if (!init_done(zram
)) {
1034 up_read(&zram
->init_lock
);
1038 zs_compact(zram
->mem_pool
);
1039 up_read(&zram
->init_lock
);
1044 static ssize_t
io_stat_show(struct device
*dev
,
1045 struct device_attribute
*attr
, char *buf
)
1047 struct zram
*zram
= dev_to_zram(dev
);
1050 down_read(&zram
->init_lock
);
1051 ret
= scnprintf(buf
, PAGE_SIZE
,
1052 "%8llu %8llu %8llu %8llu\n",
1053 (u64
)atomic64_read(&zram
->stats
.failed_reads
),
1054 (u64
)atomic64_read(&zram
->stats
.failed_writes
),
1055 (u64
)atomic64_read(&zram
->stats
.invalid_io
),
1056 (u64
)atomic64_read(&zram
->stats
.notify_free
));
1057 up_read(&zram
->init_lock
);
1062 static ssize_t
mm_stat_show(struct device
*dev
,
1063 struct device_attribute
*attr
, char *buf
)
1065 struct zram
*zram
= dev_to_zram(dev
);
1066 struct zs_pool_stats pool_stats
;
1067 u64 orig_size
, mem_used
= 0;
1071 memset(&pool_stats
, 0x00, sizeof(struct zs_pool_stats
));
1073 down_read(&zram
->init_lock
);
1074 if (init_done(zram
)) {
1075 mem_used
= zs_get_total_pages(zram
->mem_pool
);
1076 zs_pool_stats(zram
->mem_pool
, &pool_stats
);
1079 orig_size
= atomic64_read(&zram
->stats
.pages_stored
);
1080 max_used
= atomic_long_read(&zram
->stats
.max_used_pages
);
1082 ret
= scnprintf(buf
, PAGE_SIZE
,
1083 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
1084 orig_size
<< PAGE_SHIFT
,
1085 (u64
)atomic64_read(&zram
->stats
.compr_data_size
),
1086 mem_used
<< PAGE_SHIFT
,
1087 zram
->limit_pages
<< PAGE_SHIFT
,
1088 max_used
<< PAGE_SHIFT
,
1089 (u64
)atomic64_read(&zram
->stats
.same_pages
),
1090 atomic_long_read(&pool_stats
.pages_compacted
),
1091 (u64
)atomic64_read(&zram
->stats
.huge_pages
),
1092 (u64
)atomic64_read(&zram
->stats
.huge_pages_since
));
1093 up_read(&zram
->init_lock
);
1098 #ifdef CONFIG_ZRAM_WRITEBACK
1099 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
1100 static ssize_t
bd_stat_show(struct device
*dev
,
1101 struct device_attribute
*attr
, char *buf
)
1103 struct zram
*zram
= dev_to_zram(dev
);
1106 down_read(&zram
->init_lock
);
1107 ret
= scnprintf(buf
, PAGE_SIZE
,
1108 "%8llu %8llu %8llu\n",
1109 FOUR_K((u64
)atomic64_read(&zram
->stats
.bd_count
)),
1110 FOUR_K((u64
)atomic64_read(&zram
->stats
.bd_reads
)),
1111 FOUR_K((u64
)atomic64_read(&zram
->stats
.bd_writes
)));
1112 up_read(&zram
->init_lock
);
1118 static ssize_t
debug_stat_show(struct device
*dev
,
1119 struct device_attribute
*attr
, char *buf
)
1122 struct zram
*zram
= dev_to_zram(dev
);
1125 down_read(&zram
->init_lock
);
1126 ret
= scnprintf(buf
, PAGE_SIZE
,
1127 "version: %d\n%8llu %8llu\n",
1129 (u64
)atomic64_read(&zram
->stats
.writestall
),
1130 (u64
)atomic64_read(&zram
->stats
.miss_free
));
1131 up_read(&zram
->init_lock
);
1136 static DEVICE_ATTR_RO(io_stat
);
1137 static DEVICE_ATTR_RO(mm_stat
);
1138 #ifdef CONFIG_ZRAM_WRITEBACK
1139 static DEVICE_ATTR_RO(bd_stat
);
1141 static DEVICE_ATTR_RO(debug_stat
);
1143 static void zram_meta_free(struct zram
*zram
, u64 disksize
)
1145 size_t num_pages
= disksize
>> PAGE_SHIFT
;
1148 /* Free all pages that are still in this zram device */
1149 for (index
= 0; index
< num_pages
; index
++)
1150 zram_free_page(zram
, index
);
1152 zs_destroy_pool(zram
->mem_pool
);
1156 static bool zram_meta_alloc(struct zram
*zram
, u64 disksize
)
1160 num_pages
= disksize
>> PAGE_SHIFT
;
1161 zram
->table
= vzalloc(array_size(num_pages
, sizeof(*zram
->table
)));
1165 zram
->mem_pool
= zs_create_pool(zram
->disk
->disk_name
);
1166 if (!zram
->mem_pool
) {
1171 if (!huge_class_size
)
1172 huge_class_size
= zs_huge_class_size(zram
->mem_pool
);
1177 * To protect concurrent access to the same index entry,
1178 * caller should hold this table index entry's bit_spinlock to
1179 * indicate this index entry is accessing.
1181 static void zram_free_page(struct zram
*zram
, size_t index
)
1183 unsigned long handle
;
1185 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1186 zram
->table
[index
].ac_time
= 0;
1188 if (zram_test_flag(zram
, index
, ZRAM_IDLE
))
1189 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
1191 if (zram_test_flag(zram
, index
, ZRAM_HUGE
)) {
1192 zram_clear_flag(zram
, index
, ZRAM_HUGE
);
1193 atomic64_dec(&zram
->stats
.huge_pages
);
1196 if (zram_test_flag(zram
, index
, ZRAM_WB
)) {
1197 zram_clear_flag(zram
, index
, ZRAM_WB
);
1198 free_block_bdev(zram
, zram_get_element(zram
, index
));
1203 * No memory is allocated for same element filled pages.
1204 * Simply clear same page flag.
1206 if (zram_test_flag(zram
, index
, ZRAM_SAME
)) {
1207 zram_clear_flag(zram
, index
, ZRAM_SAME
);
1208 atomic64_dec(&zram
->stats
.same_pages
);
1212 handle
= zram_get_handle(zram
, index
);
1216 zs_free(zram
->mem_pool
, handle
);
1218 atomic64_sub(zram_get_obj_size(zram
, index
),
1219 &zram
->stats
.compr_data_size
);
1221 atomic64_dec(&zram
->stats
.pages_stored
);
1222 zram_set_handle(zram
, index
, 0);
1223 zram_set_obj_size(zram
, index
, 0);
1224 WARN_ON_ONCE(zram
->table
[index
].flags
&
1225 ~(1UL << ZRAM_LOCK
| 1UL << ZRAM_UNDER_WB
));
1228 static int __zram_bvec_read(struct zram
*zram
, struct page
*page
, u32 index
,
1229 struct bio
*bio
, bool partial_io
)
1231 struct zcomp_strm
*zstrm
;
1232 unsigned long handle
;
1237 zram_slot_lock(zram
, index
);
1238 if (zram_test_flag(zram
, index
, ZRAM_WB
)) {
1239 struct bio_vec bvec
;
1241 zram_slot_unlock(zram
, index
);
1243 bvec
.bv_page
= page
;
1244 bvec
.bv_len
= PAGE_SIZE
;
1246 return read_from_bdev(zram
, &bvec
,
1247 zram_get_element(zram
, index
),
1251 handle
= zram_get_handle(zram
, index
);
1252 if (!handle
|| zram_test_flag(zram
, index
, ZRAM_SAME
)) {
1253 unsigned long value
;
1256 value
= handle
? zram_get_element(zram
, index
) : 0;
1257 mem
= kmap_atomic(page
);
1258 zram_fill_page(mem
, PAGE_SIZE
, value
);
1260 zram_slot_unlock(zram
, index
);
1264 size
= zram_get_obj_size(zram
, index
);
1266 if (size
!= PAGE_SIZE
)
1267 zstrm
= zcomp_stream_get(zram
->comp
);
1269 src
= zs_map_object(zram
->mem_pool
, handle
, ZS_MM_RO
);
1270 if (size
== PAGE_SIZE
) {
1271 dst
= kmap_atomic(page
);
1272 memcpy(dst
, src
, PAGE_SIZE
);
1276 dst
= kmap_atomic(page
);
1277 ret
= zcomp_decompress(zstrm
, src
, size
, dst
);
1279 zcomp_stream_put(zram
->comp
);
1281 zs_unmap_object(zram
->mem_pool
, handle
);
1282 zram_slot_unlock(zram
, index
);
1284 /* Should NEVER happen. Return bio error if it does. */
1286 pr_err("Decompression failed! err=%d, page=%u\n", ret
, index
);
1291 static int zram_bvec_read(struct zram
*zram
, struct bio_vec
*bvec
,
1292 u32 index
, int offset
, struct bio
*bio
)
1297 page
= bvec
->bv_page
;
1298 if (is_partial_io(bvec
)) {
1299 /* Use a temporary buffer to decompress the page */
1300 page
= alloc_page(GFP_NOIO
|__GFP_HIGHMEM
);
1305 ret
= __zram_bvec_read(zram
, page
, index
, bio
, is_partial_io(bvec
));
1309 if (is_partial_io(bvec
)) {
1310 void *dst
= kmap_atomic(bvec
->bv_page
);
1311 void *src
= kmap_atomic(page
);
1313 memcpy(dst
+ bvec
->bv_offset
, src
+ offset
, bvec
->bv_len
);
1318 if (is_partial_io(bvec
))
1324 static int __zram_bvec_write(struct zram
*zram
, struct bio_vec
*bvec
,
1325 u32 index
, struct bio
*bio
)
1328 unsigned long alloced_pages
;
1329 unsigned long handle
= 0;
1330 unsigned int comp_len
= 0;
1331 void *src
, *dst
, *mem
;
1332 struct zcomp_strm
*zstrm
;
1333 struct page
*page
= bvec
->bv_page
;
1334 unsigned long element
= 0;
1335 enum zram_pageflags flags
= 0;
1337 mem
= kmap_atomic(page
);
1338 if (page_same_filled(mem
, &element
)) {
1340 /* Free memory associated with this sector now. */
1342 atomic64_inc(&zram
->stats
.same_pages
);
1348 zstrm
= zcomp_stream_get(zram
->comp
);
1349 src
= kmap_atomic(page
);
1350 ret
= zcomp_compress(zstrm
, src
, &comp_len
);
1353 if (unlikely(ret
)) {
1354 zcomp_stream_put(zram
->comp
);
1355 pr_err("Compression failed! err=%d\n", ret
);
1356 zs_free(zram
->mem_pool
, handle
);
1360 if (comp_len
>= huge_class_size
)
1361 comp_len
= PAGE_SIZE
;
1363 * handle allocation has 2 paths:
1364 * a) fast path is executed with preemption disabled (for
1365 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1366 * since we can't sleep;
1367 * b) slow path enables preemption and attempts to allocate
1368 * the page with __GFP_DIRECT_RECLAIM bit set. we have to
1369 * put per-cpu compression stream and, thus, to re-do
1370 * the compression once handle is allocated.
1372 * if we have a 'non-null' handle here then we are coming
1373 * from the slow path and handle has already been allocated.
1376 handle
= zs_malloc(zram
->mem_pool
, comp_len
,
1377 __GFP_KSWAPD_RECLAIM
|
1382 zcomp_stream_put(zram
->comp
);
1383 atomic64_inc(&zram
->stats
.writestall
);
1384 handle
= zs_malloc(zram
->mem_pool
, comp_len
,
1385 GFP_NOIO
| __GFP_HIGHMEM
|
1388 goto compress_again
;
1392 alloced_pages
= zs_get_total_pages(zram
->mem_pool
);
1393 update_used_max(zram
, alloced_pages
);
1395 if (zram
->limit_pages
&& alloced_pages
> zram
->limit_pages
) {
1396 zcomp_stream_put(zram
->comp
);
1397 zs_free(zram
->mem_pool
, handle
);
1401 dst
= zs_map_object(zram
->mem_pool
, handle
, ZS_MM_WO
);
1403 src
= zstrm
->buffer
;
1404 if (comp_len
== PAGE_SIZE
)
1405 src
= kmap_atomic(page
);
1406 memcpy(dst
, src
, comp_len
);
1407 if (comp_len
== PAGE_SIZE
)
1410 zcomp_stream_put(zram
->comp
);
1411 zs_unmap_object(zram
->mem_pool
, handle
);
1412 atomic64_add(comp_len
, &zram
->stats
.compr_data_size
);
1415 * Free memory associated with this sector
1416 * before overwriting unused sectors.
1418 zram_slot_lock(zram
, index
);
1419 zram_free_page(zram
, index
);
1421 if (comp_len
== PAGE_SIZE
) {
1422 zram_set_flag(zram
, index
, ZRAM_HUGE
);
1423 atomic64_inc(&zram
->stats
.huge_pages
);
1424 atomic64_inc(&zram
->stats
.huge_pages_since
);
1428 zram_set_flag(zram
, index
, flags
);
1429 zram_set_element(zram
, index
, element
);
1431 zram_set_handle(zram
, index
, handle
);
1432 zram_set_obj_size(zram
, index
, comp_len
);
1434 zram_slot_unlock(zram
, index
);
1437 atomic64_inc(&zram
->stats
.pages_stored
);
1441 static int zram_bvec_write(struct zram
*zram
, struct bio_vec
*bvec
,
1442 u32 index
, int offset
, struct bio
*bio
)
1445 struct page
*page
= NULL
;
1450 if (is_partial_io(bvec
)) {
1453 * This is a partial IO. We need to read the full page
1454 * before to write the changes.
1456 page
= alloc_page(GFP_NOIO
|__GFP_HIGHMEM
);
1460 ret
= __zram_bvec_read(zram
, page
, index
, bio
, true);
1464 src
= kmap_atomic(bvec
->bv_page
);
1465 dst
= kmap_atomic(page
);
1466 memcpy(dst
+ offset
, src
+ bvec
->bv_offset
, bvec
->bv_len
);
1471 vec
.bv_len
= PAGE_SIZE
;
1475 ret
= __zram_bvec_write(zram
, &vec
, index
, bio
);
1477 if (is_partial_io(bvec
))
1483 * zram_bio_discard - handler on discard request
1484 * @index: physical block index in PAGE_SIZE units
1485 * @offset: byte offset within physical block
1487 static void zram_bio_discard(struct zram
*zram
, u32 index
,
1488 int offset
, struct bio
*bio
)
1490 size_t n
= bio
->bi_iter
.bi_size
;
1493 * zram manages data in physical block size units. Because logical block
1494 * size isn't identical with physical block size on some arch, we
1495 * could get a discard request pointing to a specific offset within a
1496 * certain physical block. Although we can handle this request by
1497 * reading that physiclal block and decompressing and partially zeroing
1498 * and re-compressing and then re-storing it, this isn't reasonable
1499 * because our intent with a discard request is to save memory. So
1500 * skipping this logical block is appropriate here.
1503 if (n
<= (PAGE_SIZE
- offset
))
1506 n
-= (PAGE_SIZE
- offset
);
1510 while (n
>= PAGE_SIZE
) {
1511 zram_slot_lock(zram
, index
);
1512 zram_free_page(zram
, index
);
1513 zram_slot_unlock(zram
, index
);
1514 atomic64_inc(&zram
->stats
.notify_free
);
1521 * Returns errno if it has some problem. Otherwise return 0 or 1.
1522 * Returns 0 if IO request was done synchronously
1523 * Returns 1 if IO request was successfully submitted.
1525 static int zram_bvec_rw(struct zram
*zram
, struct bio_vec
*bvec
, u32 index
,
1526 int offset
, unsigned int op
, struct bio
*bio
)
1530 if (!op_is_write(op
)) {
1531 atomic64_inc(&zram
->stats
.num_reads
);
1532 ret
= zram_bvec_read(zram
, bvec
, index
, offset
, bio
);
1533 flush_dcache_page(bvec
->bv_page
);
1535 atomic64_inc(&zram
->stats
.num_writes
);
1536 ret
= zram_bvec_write(zram
, bvec
, index
, offset
, bio
);
1539 zram_slot_lock(zram
, index
);
1540 zram_accessed(zram
, index
);
1541 zram_slot_unlock(zram
, index
);
1543 if (unlikely(ret
< 0)) {
1544 if (!op_is_write(op
))
1545 atomic64_inc(&zram
->stats
.failed_reads
);
1547 atomic64_inc(&zram
->stats
.failed_writes
);
1553 static void __zram_make_request(struct zram
*zram
, struct bio
*bio
)
1557 struct bio_vec bvec
;
1558 struct bvec_iter iter
;
1559 unsigned long start_time
;
1561 index
= bio
->bi_iter
.bi_sector
>> SECTORS_PER_PAGE_SHIFT
;
1562 offset
= (bio
->bi_iter
.bi_sector
&
1563 (SECTORS_PER_PAGE
- 1)) << SECTOR_SHIFT
;
1565 switch (bio_op(bio
)) {
1566 case REQ_OP_DISCARD
:
1567 case REQ_OP_WRITE_ZEROES
:
1568 zram_bio_discard(zram
, index
, offset
, bio
);
1575 start_time
= bio_start_io_acct(bio
);
1576 bio_for_each_segment(bvec
, bio
, iter
) {
1577 struct bio_vec bv
= bvec
;
1578 unsigned int unwritten
= bvec
.bv_len
;
1581 bv
.bv_len
= min_t(unsigned int, PAGE_SIZE
- offset
,
1583 if (zram_bvec_rw(zram
, &bv
, index
, offset
,
1584 bio_op(bio
), bio
) < 0) {
1585 bio
->bi_status
= BLK_STS_IOERR
;
1589 bv
.bv_offset
+= bv
.bv_len
;
1590 unwritten
-= bv
.bv_len
;
1592 update_position(&index
, &offset
, &bv
);
1593 } while (unwritten
);
1595 bio_end_io_acct(bio
, start_time
);
1600 * Handler function for all zram I/O requests.
1602 static blk_qc_t
zram_submit_bio(struct bio
*bio
)
1604 struct zram
*zram
= bio
->bi_disk
->private_data
;
1606 if (!valid_io_request(zram
, bio
->bi_iter
.bi_sector
,
1607 bio
->bi_iter
.bi_size
)) {
1608 atomic64_inc(&zram
->stats
.invalid_io
);
1612 __zram_make_request(zram
, bio
);
1613 return BLK_QC_T_NONE
;
1617 return BLK_QC_T_NONE
;
1620 static void zram_slot_free_notify(struct block_device
*bdev
,
1621 unsigned long index
)
1625 zram
= bdev
->bd_disk
->private_data
;
1627 atomic64_inc(&zram
->stats
.notify_free
);
1628 if (!zram_slot_trylock(zram
, index
)) {
1629 atomic64_inc(&zram
->stats
.miss_free
);
1633 zram_free_page(zram
, index
);
1634 zram_slot_unlock(zram
, index
);
1637 static int zram_rw_page(struct block_device
*bdev
, sector_t sector
,
1638 struct page
*page
, unsigned int op
)
1644 unsigned long start_time
;
1646 if (PageTransHuge(page
))
1648 zram
= bdev
->bd_disk
->private_data
;
1650 if (!valid_io_request(zram
, sector
, PAGE_SIZE
)) {
1651 atomic64_inc(&zram
->stats
.invalid_io
);
1656 index
= sector
>> SECTORS_PER_PAGE_SHIFT
;
1657 offset
= (sector
& (SECTORS_PER_PAGE
- 1)) << SECTOR_SHIFT
;
1660 bv
.bv_len
= PAGE_SIZE
;
1663 start_time
= disk_start_io_acct(bdev
->bd_disk
, SECTORS_PER_PAGE
, op
);
1664 ret
= zram_bvec_rw(zram
, &bv
, index
, offset
, op
, NULL
);
1665 disk_end_io_acct(bdev
->bd_disk
, op
, start_time
);
1668 * If I/O fails, just return error(ie, non-zero) without
1669 * calling page_endio.
1670 * It causes resubmit the I/O with bio request by upper functions
1671 * of rw_page(e.g., swap_readpage, __swap_writepage) and
1672 * bio->bi_end_io does things to handle the error
1673 * (e.g., SetPageError, set_page_dirty and extra works).
1675 if (unlikely(ret
< 0))
1680 page_endio(page
, op_is_write(op
), 0);
1691 static void zram_reset_device(struct zram
*zram
)
1696 down_write(&zram
->init_lock
);
1698 zram
->limit_pages
= 0;
1700 if (!init_done(zram
)) {
1701 up_write(&zram
->init_lock
);
1706 disksize
= zram
->disksize
;
1709 set_capacity_and_notify(zram
->disk
, 0);
1710 part_stat_set_all(zram
->disk
->part0
, 0);
1712 up_write(&zram
->init_lock
);
1713 /* I/O operation under all of CPU are done so let's free */
1714 zram_meta_free(zram
, disksize
);
1715 memset(&zram
->stats
, 0, sizeof(zram
->stats
));
1716 zcomp_destroy(comp
);
1720 static ssize_t
disksize_store(struct device
*dev
,
1721 struct device_attribute
*attr
, const char *buf
, size_t len
)
1725 struct zram
*zram
= dev_to_zram(dev
);
1728 disksize
= memparse(buf
, NULL
);
1732 down_write(&zram
->init_lock
);
1733 if (init_done(zram
)) {
1734 pr_info("Cannot change disksize for initialized device\n");
1739 disksize
= PAGE_ALIGN(disksize
);
1740 if (!zram_meta_alloc(zram
, disksize
)) {
1745 comp
= zcomp_create(zram
->compressor
);
1747 pr_err("Cannot initialise %s compressing backend\n",
1749 err
= PTR_ERR(comp
);
1754 zram
->disksize
= disksize
;
1755 set_capacity_and_notify(zram
->disk
, zram
->disksize
>> SECTOR_SHIFT
);
1756 up_write(&zram
->init_lock
);
1761 zram_meta_free(zram
, disksize
);
1763 up_write(&zram
->init_lock
);
1767 static ssize_t
reset_store(struct device
*dev
,
1768 struct device_attribute
*attr
, const char *buf
, size_t len
)
1771 unsigned short do_reset
;
1773 struct block_device
*bdev
;
1775 ret
= kstrtou16(buf
, 10, &do_reset
);
1782 zram
= dev_to_zram(dev
);
1783 bdev
= zram
->disk
->part0
;
1785 mutex_lock(&bdev
->bd_mutex
);
1786 /* Do not reset an active device or claimed device */
1787 if (bdev
->bd_openers
|| zram
->claim
) {
1788 mutex_unlock(&bdev
->bd_mutex
);
1792 /* From now on, anyone can't open /dev/zram[0-9] */
1794 mutex_unlock(&bdev
->bd_mutex
);
1796 /* Make sure all the pending I/O are finished */
1798 zram_reset_device(zram
);
1800 mutex_lock(&bdev
->bd_mutex
);
1801 zram
->claim
= false;
1802 mutex_unlock(&bdev
->bd_mutex
);
1807 static int zram_open(struct block_device
*bdev
, fmode_t mode
)
1812 WARN_ON(!mutex_is_locked(&bdev
->bd_mutex
));
1814 zram
= bdev
->bd_disk
->private_data
;
1815 /* zram was claimed to reset so open request fails */
1822 static const struct block_device_operations zram_devops
= {
1824 .submit_bio
= zram_submit_bio
,
1825 .swap_slot_free_notify
= zram_slot_free_notify
,
1826 .rw_page
= zram_rw_page
,
1827 .owner
= THIS_MODULE
1830 static const struct block_device_operations zram_wb_devops
= {
1832 .submit_bio
= zram_submit_bio
,
1833 .swap_slot_free_notify
= zram_slot_free_notify
,
1834 .owner
= THIS_MODULE
1837 static DEVICE_ATTR_WO(compact
);
1838 static DEVICE_ATTR_RW(disksize
);
1839 static DEVICE_ATTR_RO(initstate
);
1840 static DEVICE_ATTR_WO(reset
);
1841 static DEVICE_ATTR_WO(mem_limit
);
1842 static DEVICE_ATTR_WO(mem_used_max
);
1843 static DEVICE_ATTR_WO(idle
);
1844 static DEVICE_ATTR_RW(max_comp_streams
);
1845 static DEVICE_ATTR_RW(comp_algorithm
);
1846 #ifdef CONFIG_ZRAM_WRITEBACK
1847 static DEVICE_ATTR_RW(backing_dev
);
1848 static DEVICE_ATTR_WO(writeback
);
1849 static DEVICE_ATTR_RW(writeback_limit
);
1850 static DEVICE_ATTR_RW(writeback_limit_enable
);
1853 static struct attribute
*zram_disk_attrs
[] = {
1854 &dev_attr_disksize
.attr
,
1855 &dev_attr_initstate
.attr
,
1856 &dev_attr_reset
.attr
,
1857 &dev_attr_compact
.attr
,
1858 &dev_attr_mem_limit
.attr
,
1859 &dev_attr_mem_used_max
.attr
,
1860 &dev_attr_idle
.attr
,
1861 &dev_attr_max_comp_streams
.attr
,
1862 &dev_attr_comp_algorithm
.attr
,
1863 #ifdef CONFIG_ZRAM_WRITEBACK
1864 &dev_attr_backing_dev
.attr
,
1865 &dev_attr_writeback
.attr
,
1866 &dev_attr_writeback_limit
.attr
,
1867 &dev_attr_writeback_limit_enable
.attr
,
1869 &dev_attr_io_stat
.attr
,
1870 &dev_attr_mm_stat
.attr
,
1871 #ifdef CONFIG_ZRAM_WRITEBACK
1872 &dev_attr_bd_stat
.attr
,
1874 &dev_attr_debug_stat
.attr
,
1878 static const struct attribute_group zram_disk_attr_group
= {
1879 .attrs
= zram_disk_attrs
,
1882 static const struct attribute_group
*zram_disk_attr_groups
[] = {
1883 &zram_disk_attr_group
,
1888 * Allocate and initialize new zram device. the function returns
1889 * '>= 0' device_id upon success, and negative value otherwise.
1891 static int zram_add(void)
1894 struct request_queue
*queue
;
1897 zram
= kzalloc(sizeof(struct zram
), GFP_KERNEL
);
1901 ret
= idr_alloc(&zram_index_idr
, zram
, 0, 0, GFP_KERNEL
);
1906 init_rwsem(&zram
->init_lock
);
1907 #ifdef CONFIG_ZRAM_WRITEBACK
1908 spin_lock_init(&zram
->wb_limit_lock
);
1910 queue
= blk_alloc_queue(NUMA_NO_NODE
);
1912 pr_err("Error allocating disk queue for device %d\n",
1918 /* gendisk structure */
1919 zram
->disk
= alloc_disk(1);
1921 pr_err("Error allocating disk structure for device %d\n",
1924 goto out_free_queue
;
1927 zram
->disk
->major
= zram_major
;
1928 zram
->disk
->first_minor
= device_id
;
1929 zram
->disk
->fops
= &zram_devops
;
1930 zram
->disk
->queue
= queue
;
1931 zram
->disk
->private_data
= zram
;
1932 snprintf(zram
->disk
->disk_name
, 16, "zram%d", device_id
);
1934 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1935 set_capacity(zram
->disk
, 0);
1936 /* zram devices sort of resembles non-rotational disks */
1937 blk_queue_flag_set(QUEUE_FLAG_NONROT
, zram
->disk
->queue
);
1938 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM
, zram
->disk
->queue
);
1941 * To ensure that we always get PAGE_SIZE aligned
1942 * and n*PAGE_SIZED sized I/O requests.
1944 blk_queue_physical_block_size(zram
->disk
->queue
, PAGE_SIZE
);
1945 blk_queue_logical_block_size(zram
->disk
->queue
,
1946 ZRAM_LOGICAL_BLOCK_SIZE
);
1947 blk_queue_io_min(zram
->disk
->queue
, PAGE_SIZE
);
1948 blk_queue_io_opt(zram
->disk
->queue
, PAGE_SIZE
);
1949 zram
->disk
->queue
->limits
.discard_granularity
= PAGE_SIZE
;
1950 blk_queue_max_discard_sectors(zram
->disk
->queue
, UINT_MAX
);
1951 blk_queue_flag_set(QUEUE_FLAG_DISCARD
, zram
->disk
->queue
);
1954 * zram_bio_discard() will clear all logical blocks if logical block
1955 * size is identical with physical block size(PAGE_SIZE). But if it is
1956 * different, we will skip discarding some parts of logical blocks in
1957 * the part of the request range which isn't aligned to physical block
1958 * size. So we can't ensure that all discarded logical blocks are
1961 if (ZRAM_LOGICAL_BLOCK_SIZE
== PAGE_SIZE
)
1962 blk_queue_max_write_zeroes_sectors(zram
->disk
->queue
, UINT_MAX
);
1964 blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES
, zram
->disk
->queue
);
1965 device_add_disk(NULL
, zram
->disk
, zram_disk_attr_groups
);
1967 strlcpy(zram
->compressor
, default_compressor
, sizeof(zram
->compressor
));
1969 zram_debugfs_register(zram
);
1970 pr_info("Added device: %s\n", zram
->disk
->disk_name
);
1974 blk_cleanup_queue(queue
);
1976 idr_remove(&zram_index_idr
, device_id
);
1982 static int zram_remove(struct zram
*zram
)
1984 struct block_device
*bdev
= zram
->disk
->part0
;
1986 mutex_lock(&bdev
->bd_mutex
);
1987 if (bdev
->bd_openers
|| zram
->claim
) {
1988 mutex_unlock(&bdev
->bd_mutex
);
1993 mutex_unlock(&bdev
->bd_mutex
);
1995 zram_debugfs_unregister(zram
);
1997 /* Make sure all the pending I/O are finished */
1999 zram_reset_device(zram
);
2001 pr_info("Removed device: %s\n", zram
->disk
->disk_name
);
2003 del_gendisk(zram
->disk
);
2004 blk_cleanup_queue(zram
->disk
->queue
);
2005 put_disk(zram
->disk
);
2010 /* zram-control sysfs attributes */
2013 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2014 * sense that reading from this file does alter the state of your system -- it
2015 * creates a new un-initialized zram device and returns back this device's
2016 * device_id (or an error code if it fails to create a new device).
2018 static ssize_t
hot_add_show(struct class *class,
2019 struct class_attribute
*attr
,
2024 mutex_lock(&zram_index_mutex
);
2026 mutex_unlock(&zram_index_mutex
);
2030 return scnprintf(buf
, PAGE_SIZE
, "%d\n", ret
);
2032 static struct class_attribute class_attr_hot_add
=
2033 __ATTR(hot_add
, 0400, hot_add_show
, NULL
);
2035 static ssize_t
hot_remove_store(struct class *class,
2036 struct class_attribute
*attr
,
2043 /* dev_id is gendisk->first_minor, which is `int' */
2044 ret
= kstrtoint(buf
, 10, &dev_id
);
2050 mutex_lock(&zram_index_mutex
);
2052 zram
= idr_find(&zram_index_idr
, dev_id
);
2054 ret
= zram_remove(zram
);
2056 idr_remove(&zram_index_idr
, dev_id
);
2061 mutex_unlock(&zram_index_mutex
);
2062 return ret
? ret
: count
;
2064 static CLASS_ATTR_WO(hot_remove
);
2066 static struct attribute
*zram_control_class_attrs
[] = {
2067 &class_attr_hot_add
.attr
,
2068 &class_attr_hot_remove
.attr
,
2071 ATTRIBUTE_GROUPS(zram_control_class
);
2073 static struct class zram_control_class
= {
2074 .name
= "zram-control",
2075 .owner
= THIS_MODULE
,
2076 .class_groups
= zram_control_class_groups
,
2079 static int zram_remove_cb(int id
, void *ptr
, void *data
)
2085 static void destroy_devices(void)
2087 class_unregister(&zram_control_class
);
2088 idr_for_each(&zram_index_idr
, &zram_remove_cb
, NULL
);
2089 zram_debugfs_destroy();
2090 idr_destroy(&zram_index_idr
);
2091 unregister_blkdev(zram_major
, "zram");
2092 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
2095 static int __init
zram_init(void)
2099 ret
= cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE
, "block/zram:prepare",
2100 zcomp_cpu_up_prepare
, zcomp_cpu_dead
);
2104 ret
= class_register(&zram_control_class
);
2106 pr_err("Unable to register zram-control class\n");
2107 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
2111 zram_debugfs_create();
2112 zram_major
= register_blkdev(0, "zram");
2113 if (zram_major
<= 0) {
2114 pr_err("Unable to get major number\n");
2115 class_unregister(&zram_control_class
);
2116 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
2120 while (num_devices
!= 0) {
2121 mutex_lock(&zram_index_mutex
);
2123 mutex_unlock(&zram_index_mutex
);
2136 static void __exit
zram_exit(void)
2141 module_init(zram_init
);
2142 module_exit(zram_exit
);
2144 module_param(num_devices
, uint
, 0);
2145 MODULE_PARM_DESC(num_devices
, "Number of pre-created zram devices");
2147 MODULE_LICENSE("Dual BSD/GPL");
2148 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2149 MODULE_DESCRIPTION("Compressed RAM Block Device");