]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - drivers/block/zram/zram_drv.c
zram: remove obsolete sysfs attrs
[mirror_ubuntu-artful-kernel.git] / drivers / block / zram / zram_drv.c
CommitLineData
306b0c95 1/*
f1e3cfff 2 * Compressed RAM block device
306b0c95 3 *
1130ebba 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
7bfb3de8 5 * 2012, 2013 Minchan Kim
306b0c95
NG
6 *
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
9 *
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
12 *
306b0c95
NG
13 */
14
f1e3cfff 15#define KMSG_COMPONENT "zram"
306b0c95
NG
16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18#include <linux/module.h>
19#include <linux/kernel.h>
8946a086 20#include <linux/bio.h>
306b0c95
NG
21#include <linux/bitops.h>
22#include <linux/blkdev.h>
23#include <linux/buffer_head.h>
24#include <linux/device.h>
25#include <linux/genhd.h>
26#include <linux/highmem.h>
5a0e3ad6 27#include <linux/slab.h>
b09ab054 28#include <linux/backing-dev.h>
306b0c95 29#include <linux/string.h>
306b0c95 30#include <linux/vmalloc.h>
fcfa8d95 31#include <linux/err.h>
85508ec6 32#include <linux/idr.h>
6566d1a3 33#include <linux/sysfs.h>
1dd6c834 34#include <linux/cpuhotplug.h>
306b0c95 35
16a4bfb9 36#include "zram_drv.h"
306b0c95 37
85508ec6 38static DEFINE_IDR(zram_index_idr);
6566d1a3
SS
39/* idr index must be protected */
40static DEFINE_MUTEX(zram_index_mutex);
41
f1e3cfff 42static int zram_major;
b7ca232e 43static const char *default_compressor = "lzo";
306b0c95 44
306b0c95 45/* Module params (documentation at end) */
ca3d70bd 46static unsigned int num_devices = 1;
33863c21 47
08eee69f 48static inline bool init_done(struct zram *zram)
be2d1d56 49{
08eee69f 50 return zram->disksize;
be2d1d56
SS
51}
52
9b3bb7ab
SS
53static inline struct zram *dev_to_zram(struct device *dev)
54{
55 return (struct zram *)dev_to_disk(dev)->private_data;
56}
57
b31177f2 58/* flag operations require table entry bit_spin_lock() being held */
522698d7
SS
59static int zram_test_flag(struct zram_meta *meta, u32 index,
60 enum zram_pageflags flag)
99ebbd30 61{
522698d7
SS
62 return meta->table[index].value & BIT(flag);
63}
99ebbd30 64
522698d7
SS
65static void zram_set_flag(struct zram_meta *meta, u32 index,
66 enum zram_pageflags flag)
67{
68 meta->table[index].value |= BIT(flag);
69}
99ebbd30 70
522698d7
SS
71static void zram_clear_flag(struct zram_meta *meta, u32 index,
72 enum zram_pageflags flag)
73{
74 meta->table[index].value &= ~BIT(flag);
75}
99ebbd30 76
522698d7
SS
77static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
78{
79 return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
99ebbd30
AM
80}
81
522698d7
SS
82static void zram_set_obj_size(struct zram_meta *meta,
83 u32 index, size_t size)
9b3bb7ab 84{
522698d7 85 unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
9b3bb7ab 86
522698d7
SS
87 meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
88}
89
1c53e0d2 90static inline bool is_partial_io(struct bio_vec *bvec)
522698d7
SS
91{
92 return bvec->bv_len != PAGE_SIZE;
93}
94
b09ab054
MK
95static void zram_revalidate_disk(struct zram *zram)
96{
97 revalidate_disk(zram->disk);
98 /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */
e1735496 99 zram->disk->queue->backing_dev_info->capabilities |=
b09ab054
MK
100 BDI_CAP_STABLE_WRITES;
101}
102
522698d7
SS
103/*
104 * Check if request is within bounds and aligned on zram logical blocks.
105 */
1c53e0d2 106static inline bool valid_io_request(struct zram *zram,
522698d7
SS
107 sector_t start, unsigned int size)
108{
109 u64 end, bound;
110
111 /* unaligned request */
112 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
1c53e0d2 113 return false;
522698d7 114 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
1c53e0d2 115 return false;
522698d7
SS
116
117 end = start + (size >> SECTOR_SHIFT);
118 bound = zram->disksize >> SECTOR_SHIFT;
119 /* out of range range */
120 if (unlikely(start >= bound || end > bound || start > end))
1c53e0d2 121 return false;
522698d7
SS
122
123 /* I/O request is valid */
1c53e0d2 124 return true;
522698d7
SS
125}
126
127static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
128{
129 if (*offset + bvec->bv_len >= PAGE_SIZE)
130 (*index)++;
131 *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
132}
133
134static inline void update_used_max(struct zram *zram,
135 const unsigned long pages)
136{
137 unsigned long old_max, cur_max;
138
139 old_max = atomic_long_read(&zram->stats.max_used_pages);
140
141 do {
142 cur_max = old_max;
143 if (pages > cur_max)
144 old_max = atomic_long_cmpxchg(
145 &zram->stats.max_used_pages, cur_max, pages);
146 } while (old_max != cur_max);
147}
148
1c53e0d2 149static bool page_zero_filled(void *ptr)
522698d7
SS
150{
151 unsigned int pos;
152 unsigned long *page;
153
154 page = (unsigned long *)ptr;
155
156 for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
157 if (page[pos])
1c53e0d2 158 return false;
522698d7
SS
159 }
160
1c53e0d2 161 return true;
522698d7
SS
162}
163
164static void handle_zero_page(struct bio_vec *bvec)
165{
166 struct page *page = bvec->bv_page;
167 void *user_mem;
168
169 user_mem = kmap_atomic(page);
170 if (is_partial_io(bvec))
171 memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
172 else
173 clear_page(user_mem);
174 kunmap_atomic(user_mem);
175
176 flush_dcache_page(page);
9b3bb7ab
SS
177}
178
179static ssize_t initstate_show(struct device *dev,
180 struct device_attribute *attr, char *buf)
181{
a68eb3b6 182 u32 val;
9b3bb7ab
SS
183 struct zram *zram = dev_to_zram(dev);
184
a68eb3b6
SS
185 down_read(&zram->init_lock);
186 val = init_done(zram);
187 up_read(&zram->init_lock);
9b3bb7ab 188
56b4e8cb 189 return scnprintf(buf, PAGE_SIZE, "%u\n", val);
9b3bb7ab
SS
190}
191
522698d7
SS
192static ssize_t disksize_show(struct device *dev,
193 struct device_attribute *attr, char *buf)
194{
195 struct zram *zram = dev_to_zram(dev);
196
197 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
198}
199
9ada9da9
MK
200static ssize_t mem_limit_store(struct device *dev,
201 struct device_attribute *attr, const char *buf, size_t len)
202{
203 u64 limit;
204 char *tmp;
205 struct zram *zram = dev_to_zram(dev);
206
207 limit = memparse(buf, &tmp);
208 if (buf == tmp) /* no chars parsed, invalid input */
209 return -EINVAL;
210
211 down_write(&zram->init_lock);
212 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
213 up_write(&zram->init_lock);
214
215 return len;
216}
217
461a8eee
MK
218static ssize_t mem_used_max_store(struct device *dev,
219 struct device_attribute *attr, const char *buf, size_t len)
220{
221 int err;
222 unsigned long val;
223 struct zram *zram = dev_to_zram(dev);
461a8eee
MK
224
225 err = kstrtoul(buf, 10, &val);
226 if (err || val != 0)
227 return -EINVAL;
228
229 down_read(&zram->init_lock);
5a99e95b
WY
230 if (init_done(zram)) {
231 struct zram_meta *meta = zram->meta;
461a8eee
MK
232 atomic_long_set(&zram->stats.max_used_pages,
233 zs_get_total_pages(meta->mem_pool));
5a99e95b 234 }
461a8eee
MK
235 up_read(&zram->init_lock);
236
237 return len;
238}
239
43209ea2
SS
240/*
241 * We switched to per-cpu streams and this attr is not needed anymore.
242 * However, we will keep it around for some time, because:
243 * a) we may revert per-cpu streams in the future
244 * b) it's visible to user space and we need to follow our 2 years
245 * retirement rule; but we already have a number of 'soon to be
246 * altered' attrs, so max_comp_streams need to wait for the next
247 * layoff cycle.
248 */
522698d7
SS
249static ssize_t max_comp_streams_show(struct device *dev,
250 struct device_attribute *attr, char *buf)
251{
43209ea2 252 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
522698d7
SS
253}
254
beca3ec7
SS
255static ssize_t max_comp_streams_store(struct device *dev,
256 struct device_attribute *attr, const char *buf, size_t len)
257{
43209ea2 258 return len;
beca3ec7
SS
259}
260
e46b8a03
SS
261static ssize_t comp_algorithm_show(struct device *dev,
262 struct device_attribute *attr, char *buf)
263{
264 size_t sz;
265 struct zram *zram = dev_to_zram(dev);
266
267 down_read(&zram->init_lock);
268 sz = zcomp_available_show(zram->compressor, buf);
269 up_read(&zram->init_lock);
270
271 return sz;
272}
273
274static ssize_t comp_algorithm_store(struct device *dev,
275 struct device_attribute *attr, const char *buf, size_t len)
276{
277 struct zram *zram = dev_to_zram(dev);
415403be 278 char compressor[CRYPTO_MAX_ALG_NAME];
4bbacd51
SS
279 size_t sz;
280
415403be
SS
281 strlcpy(compressor, buf, sizeof(compressor));
282 /* ignore trailing newline */
283 sz = strlen(compressor);
284 if (sz > 0 && compressor[sz - 1] == '\n')
285 compressor[sz - 1] = 0x00;
286
287 if (!zcomp_available_algorithm(compressor))
1d5b43bf
LH
288 return -EINVAL;
289
e46b8a03
SS
290 down_write(&zram->init_lock);
291 if (init_done(zram)) {
292 up_write(&zram->init_lock);
293 pr_info("Can't change algorithm for initialized device\n");
294 return -EBUSY;
295 }
4bbacd51 296
415403be 297 strlcpy(zram->compressor, compressor, sizeof(compressor));
e46b8a03
SS
298 up_write(&zram->init_lock);
299 return len;
300}
301
522698d7
SS
302static ssize_t compact_store(struct device *dev,
303 struct device_attribute *attr, const char *buf, size_t len)
306b0c95 304{
522698d7
SS
305 struct zram *zram = dev_to_zram(dev);
306 struct zram_meta *meta;
306b0c95 307
522698d7
SS
308 down_read(&zram->init_lock);
309 if (!init_done(zram)) {
310 up_read(&zram->init_lock);
311 return -EINVAL;
312 }
306b0c95 313
522698d7 314 meta = zram->meta;
7d3f3938 315 zs_compact(meta->mem_pool);
522698d7 316 up_read(&zram->init_lock);
d2d5e762 317
522698d7 318 return len;
d2d5e762
WY
319}
320
522698d7
SS
321static ssize_t io_stat_show(struct device *dev,
322 struct device_attribute *attr, char *buf)
d2d5e762 323{
522698d7
SS
324 struct zram *zram = dev_to_zram(dev);
325 ssize_t ret;
d2d5e762 326
522698d7
SS
327 down_read(&zram->init_lock);
328 ret = scnprintf(buf, PAGE_SIZE,
329 "%8llu %8llu %8llu %8llu\n",
330 (u64)atomic64_read(&zram->stats.failed_reads),
331 (u64)atomic64_read(&zram->stats.failed_writes),
332 (u64)atomic64_read(&zram->stats.invalid_io),
333 (u64)atomic64_read(&zram->stats.notify_free));
334 up_read(&zram->init_lock);
306b0c95 335
522698d7 336 return ret;
9b3bb7ab
SS
337}
338
522698d7
SS
339static ssize_t mm_stat_show(struct device *dev,
340 struct device_attribute *attr, char *buf)
9b3bb7ab 341{
522698d7 342 struct zram *zram = dev_to_zram(dev);
7d3f3938 343 struct zs_pool_stats pool_stats;
522698d7
SS
344 u64 orig_size, mem_used = 0;
345 long max_used;
346 ssize_t ret;
a539c72a 347
7d3f3938
SS
348 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
349
522698d7 350 down_read(&zram->init_lock);
7d3f3938 351 if (init_done(zram)) {
522698d7 352 mem_used = zs_get_total_pages(zram->meta->mem_pool);
7d3f3938
SS
353 zs_pool_stats(zram->meta->mem_pool, &pool_stats);
354 }
9b3bb7ab 355
522698d7
SS
356 orig_size = atomic64_read(&zram->stats.pages_stored);
357 max_used = atomic_long_read(&zram->stats.max_used_pages);
9b3bb7ab 358
522698d7 359 ret = scnprintf(buf, PAGE_SIZE,
7d3f3938 360 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n",
522698d7
SS
361 orig_size << PAGE_SHIFT,
362 (u64)atomic64_read(&zram->stats.compr_data_size),
363 mem_used << PAGE_SHIFT,
364 zram->limit_pages << PAGE_SHIFT,
365 max_used << PAGE_SHIFT,
366 (u64)atomic64_read(&zram->stats.zero_pages),
860c707d 367 pool_stats.pages_compacted);
522698d7 368 up_read(&zram->init_lock);
9b3bb7ab 369
522698d7
SS
370 return ret;
371}
372
623e47fc
SS
373static ssize_t debug_stat_show(struct device *dev,
374 struct device_attribute *attr, char *buf)
375{
376 int version = 1;
377 struct zram *zram = dev_to_zram(dev);
378 ssize_t ret;
379
380 down_read(&zram->init_lock);
381 ret = scnprintf(buf, PAGE_SIZE,
382 "version: %d\n%8llu\n",
383 version,
384 (u64)atomic64_read(&zram->stats.writestall));
385 up_read(&zram->init_lock);
386
387 return ret;
388}
389
522698d7
SS
390static DEVICE_ATTR_RO(io_stat);
391static DEVICE_ATTR_RO(mm_stat);
623e47fc 392static DEVICE_ATTR_RO(debug_stat);
522698d7
SS
393
394static inline bool zram_meta_get(struct zram *zram)
395{
396 if (atomic_inc_not_zero(&zram->refcount))
397 return true;
398 return false;
399}
400
401static inline void zram_meta_put(struct zram *zram)
402{
403 atomic_dec(&zram->refcount);
404}
405
406static void zram_meta_free(struct zram_meta *meta, u64 disksize)
407{
408 size_t num_pages = disksize >> PAGE_SHIFT;
409 size_t index;
1fec1172
GM
410
411 /* Free all pages that are still in this zram device */
412 for (index = 0; index < num_pages; index++) {
413 unsigned long handle = meta->table[index].handle;
414
415 if (!handle)
416 continue;
417
418 zs_free(meta->mem_pool, handle);
419 }
420
9b3bb7ab 421 zs_destroy_pool(meta->mem_pool);
9b3bb7ab
SS
422 vfree(meta->table);
423 kfree(meta);
424}
425
4ce321f5 426static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
9b3bb7ab
SS
427{
428 size_t num_pages;
429 struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
b8179958 430
9b3bb7ab 431 if (!meta)
b8179958 432 return NULL;
9b3bb7ab 433
9b3bb7ab
SS
434 num_pages = disksize >> PAGE_SHIFT;
435 meta->table = vzalloc(num_pages * sizeof(*meta->table));
436 if (!meta->table) {
437 pr_err("Error allocating zram address table\n");
b8179958 438 goto out_error;
9b3bb7ab
SS
439 }
440
d0d8da2d 441 meta->mem_pool = zs_create_pool(pool_name);
9b3bb7ab
SS
442 if (!meta->mem_pool) {
443 pr_err("Error creating memory pool\n");
b8179958 444 goto out_error;
9b3bb7ab
SS
445 }
446
447 return meta;
448
b8179958 449out_error:
9b3bb7ab 450 vfree(meta->table);
9b3bb7ab 451 kfree(meta);
b8179958 452 return NULL;
9b3bb7ab
SS
453}
454
d2d5e762
WY
455/*
456 * To protect concurrent access to the same index entry,
457 * caller should hold this table index entry's bit_spinlock to
458 * indicate this index entry is accessing.
459 */
f1e3cfff 460static void zram_free_page(struct zram *zram, size_t index)
306b0c95 461{
8b3cc3ed
MK
462 struct zram_meta *meta = zram->meta;
463 unsigned long handle = meta->table[index].handle;
306b0c95 464
fd1a30de 465 if (unlikely(!handle)) {
2e882281
NG
466 /*
467 * No memory is allocated for zero filled pages.
468 * Simply clear zero page flag.
469 */
8b3cc3ed
MK
470 if (zram_test_flag(meta, index, ZRAM_ZERO)) {
471 zram_clear_flag(meta, index, ZRAM_ZERO);
90a7806e 472 atomic64_dec(&zram->stats.zero_pages);
306b0c95
NG
473 }
474 return;
475 }
476
8b3cc3ed 477 zs_free(meta->mem_pool, handle);
306b0c95 478
d2d5e762
WY
479 atomic64_sub(zram_get_obj_size(meta, index),
480 &zram->stats.compr_data_size);
90a7806e 481 atomic64_dec(&zram->stats.pages_stored);
306b0c95 482
8b3cc3ed 483 meta->table[index].handle = 0;
d2d5e762 484 zram_set_obj_size(meta, index, 0);
306b0c95
NG
485}
486
37b51fdd 487static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
306b0c95 488{
b7ca232e 489 int ret = 0;
37b51fdd 490 unsigned char *cmem;
8b3cc3ed 491 struct zram_meta *meta = zram->meta;
92967471 492 unsigned long handle;
ebaf9ab5 493 unsigned int size;
92967471 494
d2d5e762 495 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
92967471 496 handle = meta->table[index].handle;
d2d5e762 497 size = zram_get_obj_size(meta, index);
306b0c95 498
8b3cc3ed 499 if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
d2d5e762 500 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
42e99bd9 501 clear_page(mem);
8c921b2b
JM
502 return 0;
503 }
306b0c95 504
8b3cc3ed 505 cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
ebaf9ab5 506 if (size == PAGE_SIZE) {
42e99bd9 507 copy_page(mem, cmem);
ebaf9ab5
SS
508 } else {
509 struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
510
511 ret = zcomp_decompress(zstrm, cmem, size, mem);
512 zcomp_stream_put(zram->comp);
513 }
8b3cc3ed 514 zs_unmap_object(meta->mem_pool, handle);
d2d5e762 515 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
a1dd52af 516
8c921b2b 517 /* Should NEVER happen. Return bio error if it does. */
b7ca232e 518 if (unlikely(ret)) {
8c921b2b 519 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
8c921b2b 520 return ret;
a1dd52af 521 }
306b0c95 522
8c921b2b 523 return 0;
306b0c95
NG
524}
525
37b51fdd 526static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
b627cff3 527 u32 index, int offset)
924bd88d
JM
528{
529 int ret;
37b51fdd
SS
530 struct page *page;
531 unsigned char *user_mem, *uncmem = NULL;
8b3cc3ed 532 struct zram_meta *meta = zram->meta;
37b51fdd
SS
533 page = bvec->bv_page;
534
d2d5e762 535 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
8b3cc3ed
MK
536 if (unlikely(!meta->table[index].handle) ||
537 zram_test_flag(meta, index, ZRAM_ZERO)) {
d2d5e762 538 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
37b51fdd 539 handle_zero_page(bvec);
924bd88d
JM
540 return 0;
541 }
d2d5e762 542 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
924bd88d 543
37b51fdd
SS
544 if (is_partial_io(bvec))
545 /* Use a temporary buffer to decompress the page */
7e5a5104
MK
546 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
547
548 user_mem = kmap_atomic(page);
549 if (!is_partial_io(bvec))
37b51fdd
SS
550 uncmem = user_mem;
551
552 if (!uncmem) {
70864969 553 pr_err("Unable to allocate temp memory\n");
37b51fdd
SS
554 ret = -ENOMEM;
555 goto out_cleanup;
556 }
924bd88d 557
37b51fdd 558 ret = zram_decompress_page(zram, uncmem, index);
924bd88d 559 /* Should NEVER happen. Return bio error if it does. */
b7ca232e 560 if (unlikely(ret))
37b51fdd 561 goto out_cleanup;
924bd88d 562
37b51fdd
SS
563 if (is_partial_io(bvec))
564 memcpy(user_mem + bvec->bv_offset, uncmem + offset,
565 bvec->bv_len);
566
567 flush_dcache_page(page);
568 ret = 0;
569out_cleanup:
570 kunmap_atomic(user_mem);
571 if (is_partial_io(bvec))
572 kfree(uncmem);
573 return ret;
924bd88d
JM
574}
575
576static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
577 int offset)
306b0c95 578{
397c6066 579 int ret = 0;
ebaf9ab5 580 unsigned int clen;
da9556a2 581 unsigned long handle = 0;
130f315a 582 struct page *page;
924bd88d 583 unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
8b3cc3ed 584 struct zram_meta *meta = zram->meta;
17162f41 585 struct zcomp_strm *zstrm = NULL;
461a8eee 586 unsigned long alloced_pages;
306b0c95 587
8c921b2b 588 page = bvec->bv_page;
924bd88d
JM
589 if (is_partial_io(bvec)) {
590 /*
591 * This is a partial IO. We need to read the full page
592 * before to write the changes.
593 */
7e5a5104 594 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
924bd88d 595 if (!uncmem) {
924bd88d
JM
596 ret = -ENOMEM;
597 goto out;
598 }
37b51fdd 599 ret = zram_decompress_page(zram, uncmem, index);
397c6066 600 if (ret)
924bd88d 601 goto out;
924bd88d
JM
602 }
603
da9556a2 604compress_again:
ba82fe2e 605 user_mem = kmap_atomic(page);
397c6066 606 if (is_partial_io(bvec)) {
924bd88d
JM
607 memcpy(uncmem + offset, user_mem + bvec->bv_offset,
608 bvec->bv_len);
397c6066
NG
609 kunmap_atomic(user_mem);
610 user_mem = NULL;
611 } else {
924bd88d 612 uncmem = user_mem;
397c6066 613 }
924bd88d
JM
614
615 if (page_zero_filled(uncmem)) {
c4065152
WY
616 if (user_mem)
617 kunmap_atomic(user_mem);
f40ac2ae 618 /* Free memory associated with this sector now. */
d2d5e762 619 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
f40ac2ae 620 zram_free_page(zram, index);
92967471 621 zram_set_flag(meta, index, ZRAM_ZERO);
d2d5e762 622 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
f40ac2ae 623
90a7806e 624 atomic64_inc(&zram->stats.zero_pages);
924bd88d
JM
625 ret = 0;
626 goto out;
8c921b2b 627 }
306b0c95 628
2aea8493 629 zstrm = zcomp_stream_get(zram->comp);
ebaf9ab5 630 ret = zcomp_compress(zstrm, uncmem, &clen);
397c6066
NG
631 if (!is_partial_io(bvec)) {
632 kunmap_atomic(user_mem);
633 user_mem = NULL;
634 uncmem = NULL;
635 }
306b0c95 636
b7ca232e 637 if (unlikely(ret)) {
8c921b2b 638 pr_err("Compression failed! err=%d\n", ret);
924bd88d 639 goto out;
8c921b2b 640 }
da9556a2 641
b7ca232e 642 src = zstrm->buffer;
c8f2f0db 643 if (unlikely(clen > max_zpage_size)) {
c8f2f0db 644 clen = PAGE_SIZE;
397c6066
NG
645 if (is_partial_io(bvec))
646 src = uncmem;
c8f2f0db 647 }
a1dd52af 648
da9556a2
SS
649 /*
650 * handle allocation has 2 paths:
651 * a) fast path is executed with preemption disabled (for
652 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
653 * since we can't sleep;
654 * b) slow path enables preemption and attempts to allocate
655 * the page with __GFP_DIRECT_RECLAIM bit set. we have to
656 * put per-cpu compression stream and, thus, to re-do
657 * the compression once handle is allocated.
658 *
659 * if we have a 'non-null' handle here then we are coming
660 * from the slow path and handle has already been allocated.
661 */
662 if (!handle)
663 handle = zs_malloc(meta->mem_pool, clen,
664 __GFP_KSWAPD_RECLAIM |
665 __GFP_NOWARN |
9bc482d3
MK
666 __GFP_HIGHMEM |
667 __GFP_MOVABLE);
fd1a30de 668 if (!handle) {
2aea8493 669 zcomp_stream_put(zram->comp);
da9556a2
SS
670 zstrm = NULL;
671
623e47fc
SS
672 atomic64_inc(&zram->stats.writestall);
673
da9556a2 674 handle = zs_malloc(meta->mem_pool, clen,
9bc482d3
MK
675 GFP_NOIO | __GFP_HIGHMEM |
676 __GFP_MOVABLE);
da9556a2
SS
677 if (handle)
678 goto compress_again;
679
ebaf9ab5 680 pr_err("Error allocating memory for compressed page: %u, size=%u\n",
596b3dd4 681 index, clen);
924bd88d
JM
682 ret = -ENOMEM;
683 goto out;
8c921b2b 684 }
9ada9da9 685
461a8eee 686 alloced_pages = zs_get_total_pages(meta->mem_pool);
12372755
SS
687 update_used_max(zram, alloced_pages);
688
461a8eee 689 if (zram->limit_pages && alloced_pages > zram->limit_pages) {
9ada9da9
MK
690 zs_free(meta->mem_pool, handle);
691 ret = -ENOMEM;
692 goto out;
693 }
694
8b3cc3ed 695 cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
306b0c95 696
42e99bd9 697 if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
397c6066 698 src = kmap_atomic(page);
42e99bd9 699 copy_page(cmem, src);
397c6066 700 kunmap_atomic(src);
42e99bd9
JL
701 } else {
702 memcpy(cmem, src, clen);
703 }
306b0c95 704
2aea8493 705 zcomp_stream_put(zram->comp);
17162f41 706 zstrm = NULL;
8b3cc3ed 707 zs_unmap_object(meta->mem_pool, handle);
fd1a30de 708
f40ac2ae
SS
709 /*
710 * Free memory associated with this sector
711 * before overwriting unused sectors.
712 */
d2d5e762 713 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
f40ac2ae
SS
714 zram_free_page(zram, index);
715
8b3cc3ed 716 meta->table[index].handle = handle;
d2d5e762
WY
717 zram_set_obj_size(meta, index, clen);
718 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
306b0c95 719
8c921b2b 720 /* Update stats */
90a7806e
SS
721 atomic64_add(clen, &zram->stats.compr_data_size);
722 atomic64_inc(&zram->stats.pages_stored);
924bd88d 723out:
17162f41 724 if (zstrm)
2aea8493 725 zcomp_stream_put(zram->comp);
397c6066
NG
726 if (is_partial_io(bvec))
727 kfree(uncmem);
924bd88d 728 return ret;
8c921b2b
JM
729}
730
f4659d8e
JK
731/*
732 * zram_bio_discard - handler on discard request
733 * @index: physical block index in PAGE_SIZE units
734 * @offset: byte offset within physical block
735 */
736static void zram_bio_discard(struct zram *zram, u32 index,
737 int offset, struct bio *bio)
738{
739 size_t n = bio->bi_iter.bi_size;
d2d5e762 740 struct zram_meta *meta = zram->meta;
f4659d8e
JK
741
742 /*
743 * zram manages data in physical block size units. Because logical block
744 * size isn't identical with physical block size on some arch, we
745 * could get a discard request pointing to a specific offset within a
746 * certain physical block. Although we can handle this request by
747 * reading that physiclal block and decompressing and partially zeroing
748 * and re-compressing and then re-storing it, this isn't reasonable
749 * because our intent with a discard request is to save memory. So
750 * skipping this logical block is appropriate here.
751 */
752 if (offset) {
38515c73 753 if (n <= (PAGE_SIZE - offset))
f4659d8e
JK
754 return;
755
38515c73 756 n -= (PAGE_SIZE - offset);
f4659d8e
JK
757 index++;
758 }
759
760 while (n >= PAGE_SIZE) {
d2d5e762 761 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
f4659d8e 762 zram_free_page(zram, index);
d2d5e762 763 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
015254da 764 atomic64_inc(&zram->stats.notify_free);
f4659d8e
JK
765 index++;
766 n -= PAGE_SIZE;
767 }
768}
769
522698d7 770static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
c11f0c0b 771 int offset, bool is_write)
9b3bb7ab 772{
522698d7 773 unsigned long start_time = jiffies;
c11f0c0b 774 int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
9b3bb7ab 775 int ret;
9b3bb7ab 776
c11f0c0b 777 generic_start_io_acct(rw_acct, bvec->bv_len >> SECTOR_SHIFT,
522698d7 778 &zram->disk->part0);
46a51c80 779
c11f0c0b 780 if (!is_write) {
522698d7
SS
781 atomic64_inc(&zram->stats.num_reads);
782 ret = zram_bvec_read(zram, bvec, index, offset);
783 } else {
784 atomic64_inc(&zram->stats.num_writes);
785 ret = zram_bvec_write(zram, bvec, index, offset);
1b672224 786 }
9b3bb7ab 787
c11f0c0b 788 generic_end_io_acct(rw_acct, &zram->disk->part0, start_time);
9b3bb7ab 789
522698d7 790 if (unlikely(ret)) {
c11f0c0b 791 if (!is_write)
522698d7
SS
792 atomic64_inc(&zram->stats.failed_reads);
793 else
794 atomic64_inc(&zram->stats.failed_writes);
1b672224 795 }
9b3bb7ab 796
1b672224 797 return ret;
8c921b2b
JM
798}
799
be257c61 800static void __zram_make_request(struct zram *zram, struct bio *bio)
8c921b2b 801{
abf54548 802 int offset;
8c921b2b 803 u32 index;
7988613b
KO
804 struct bio_vec bvec;
805 struct bvec_iter iter;
8c921b2b 806
4f024f37
KO
807 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
808 offset = (bio->bi_iter.bi_sector &
809 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
8c921b2b 810
95fe6c1a 811 if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
f4659d8e 812 zram_bio_discard(zram, index, offset, bio);
4246a0b6 813 bio_endio(bio);
f4659d8e
JK
814 return;
815 }
816
7988613b 817 bio_for_each_segment(bvec, bio, iter) {
924bd88d
JM
818 int max_transfer_size = PAGE_SIZE - offset;
819
7988613b 820 if (bvec.bv_len > max_transfer_size) {
924bd88d
JM
821 /*
822 * zram_bvec_rw() can only make operation on a single
823 * zram page. Split the bio vector.
824 */
825 struct bio_vec bv;
826
7988613b 827 bv.bv_page = bvec.bv_page;
924bd88d 828 bv.bv_len = max_transfer_size;
7988613b 829 bv.bv_offset = bvec.bv_offset;
924bd88d 830
abf54548 831 if (zram_bvec_rw(zram, &bv, index, offset,
c11f0c0b 832 op_is_write(bio_op(bio))) < 0)
924bd88d
JM
833 goto out;
834
7988613b 835 bv.bv_len = bvec.bv_len - max_transfer_size;
924bd88d 836 bv.bv_offset += max_transfer_size;
abf54548 837 if (zram_bvec_rw(zram, &bv, index + 1, 0,
c11f0c0b 838 op_is_write(bio_op(bio))) < 0)
924bd88d
JM
839 goto out;
840 } else
abf54548 841 if (zram_bvec_rw(zram, &bvec, index, offset,
c11f0c0b 842 op_is_write(bio_op(bio))) < 0)
924bd88d
JM
843 goto out;
844
7988613b 845 update_position(&index, &offset, &bvec);
a1dd52af 846 }
306b0c95 847
4246a0b6 848 bio_endio(bio);
7d7854b4 849 return;
306b0c95
NG
850
851out:
306b0c95 852 bio_io_error(bio);
306b0c95
NG
853}
854
306b0c95 855/*
f1e3cfff 856 * Handler function for all zram I/O requests.
306b0c95 857 */
dece1635 858static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
306b0c95 859{
f1e3cfff 860 struct zram *zram = queue->queuedata;
306b0c95 861
08eee69f 862 if (unlikely(!zram_meta_get(zram)))
3de738cd 863 goto error;
0900beae 864
54efd50b
KO
865 blk_queue_split(queue, &bio, queue->bio_split);
866
54850e73 867 if (!valid_io_request(zram, bio->bi_iter.bi_sector,
868 bio->bi_iter.bi_size)) {
da5cc7d3 869 atomic64_inc(&zram->stats.invalid_io);
08eee69f 870 goto put_zram;
6642a67c
JM
871 }
872
be257c61 873 __zram_make_request(zram, bio);
08eee69f 874 zram_meta_put(zram);
dece1635 875 return BLK_QC_T_NONE;
08eee69f
MK
876put_zram:
877 zram_meta_put(zram);
0900beae
JM
878error:
879 bio_io_error(bio);
dece1635 880 return BLK_QC_T_NONE;
306b0c95
NG
881}
882
2ccbec05
NG
883static void zram_slot_free_notify(struct block_device *bdev,
884 unsigned long index)
107c161b 885{
f1e3cfff 886 struct zram *zram;
f614a9f4 887 struct zram_meta *meta;
107c161b 888
f1e3cfff 889 zram = bdev->bd_disk->private_data;
f614a9f4 890 meta = zram->meta;
a0c516cb 891
d2d5e762 892 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
f614a9f4 893 zram_free_page(zram, index);
d2d5e762 894 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
f614a9f4 895 atomic64_inc(&zram->stats.notify_free);
107c161b
NG
896}
897
8c7f0102 898static int zram_rw_page(struct block_device *bdev, sector_t sector,
c11f0c0b 899 struct page *page, bool is_write)
8c7f0102 900{
08eee69f 901 int offset, err = -EIO;
8c7f0102 902 u32 index;
903 struct zram *zram;
904 struct bio_vec bv;
905
906 zram = bdev->bd_disk->private_data;
08eee69f
MK
907 if (unlikely(!zram_meta_get(zram)))
908 goto out;
909
8c7f0102 910 if (!valid_io_request(zram, sector, PAGE_SIZE)) {
911 atomic64_inc(&zram->stats.invalid_io);
08eee69f
MK
912 err = -EINVAL;
913 goto put_zram;
8c7f0102 914 }
915
916 index = sector >> SECTORS_PER_PAGE_SHIFT;
917 offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT;
918
919 bv.bv_page = page;
920 bv.bv_len = PAGE_SIZE;
921 bv.bv_offset = 0;
922
c11f0c0b 923 err = zram_bvec_rw(zram, &bv, index, offset, is_write);
08eee69f
MK
924put_zram:
925 zram_meta_put(zram);
926out:
8c7f0102 927 /*
928 * If I/O fails, just return error(ie, non-zero) without
929 * calling page_endio.
930 * It causes resubmit the I/O with bio request by upper functions
931 * of rw_page(e.g., swap_readpage, __swap_writepage) and
932 * bio->bi_end_io does things to handle the error
933 * (e.g., SetPageError, set_page_dirty and extra works).
934 */
935 if (err == 0)
c11f0c0b 936 page_endio(page, is_write, 0);
8c7f0102 937 return err;
938}
939
522698d7
SS
940static void zram_reset_device(struct zram *zram)
941{
942 struct zram_meta *meta;
943 struct zcomp *comp;
944 u64 disksize;
306b0c95 945
522698d7 946 down_write(&zram->init_lock);
9b3bb7ab 947
522698d7
SS
948 zram->limit_pages = 0;
949
950 if (!init_done(zram)) {
951 up_write(&zram->init_lock);
952 return;
953 }
954
955 meta = zram->meta;
956 comp = zram->comp;
957 disksize = zram->disksize;
958 /*
959 * Refcount will go down to 0 eventually and r/w handler
960 * cannot handle further I/O so it will bail out by
961 * check zram_meta_get.
962 */
963 zram_meta_put(zram);
964 /*
965 * We want to free zram_meta in process context to avoid
966 * deadlock between reclaim path and any other locks.
967 */
968 wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
969
970 /* Reset stats */
971 memset(&zram->stats, 0, sizeof(zram->stats));
972 zram->disksize = 0;
522698d7
SS
973
974 set_capacity(zram->disk, 0);
975 part_stat_set_all(&zram->disk->part0, 0);
976
977 up_write(&zram->init_lock);
978 /* I/O operation under all of CPU are done so let's free */
979 zram_meta_free(meta, disksize);
980 zcomp_destroy(comp);
981}
982
983static ssize_t disksize_store(struct device *dev,
984 struct device_attribute *attr, const char *buf, size_t len)
2f6a3bed 985{
522698d7
SS
986 u64 disksize;
987 struct zcomp *comp;
988 struct zram_meta *meta;
2f6a3bed 989 struct zram *zram = dev_to_zram(dev);
522698d7 990 int err;
2f6a3bed 991
522698d7
SS
992 disksize = memparse(buf, NULL);
993 if (!disksize)
994 return -EINVAL;
2f6a3bed 995
522698d7 996 disksize = PAGE_ALIGN(disksize);
4ce321f5 997 meta = zram_meta_alloc(zram->disk->disk_name, disksize);
522698d7
SS
998 if (!meta)
999 return -ENOMEM;
1000
da9556a2 1001 comp = zcomp_create(zram->compressor);
522698d7 1002 if (IS_ERR(comp)) {
70864969 1003 pr_err("Cannot initialise %s compressing backend\n",
522698d7
SS
1004 zram->compressor);
1005 err = PTR_ERR(comp);
1006 goto out_free_meta;
1007 }
1008
1009 down_write(&zram->init_lock);
1010 if (init_done(zram)) {
1011 pr_info("Cannot change disksize for initialized device\n");
1012 err = -EBUSY;
1013 goto out_destroy_comp;
1014 }
1015
1016 init_waitqueue_head(&zram->io_done);
1017 atomic_set(&zram->refcount, 1);
1018 zram->meta = meta;
1019 zram->comp = comp;
1020 zram->disksize = disksize;
1021 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
b09ab054 1022 zram_revalidate_disk(zram);
e7ccfc4c 1023 up_write(&zram->init_lock);
522698d7
SS
1024
1025 return len;
1026
1027out_destroy_comp:
1028 up_write(&zram->init_lock);
1029 zcomp_destroy(comp);
1030out_free_meta:
1031 zram_meta_free(meta, disksize);
1032 return err;
2f6a3bed
SS
1033}
1034
522698d7
SS
1035static ssize_t reset_store(struct device *dev,
1036 struct device_attribute *attr, const char *buf, size_t len)
4f2109f6 1037{
522698d7
SS
1038 int ret;
1039 unsigned short do_reset;
1040 struct zram *zram;
1041 struct block_device *bdev;
4f2109f6 1042
f405c445
SS
1043 ret = kstrtou16(buf, 10, &do_reset);
1044 if (ret)
1045 return ret;
1046
1047 if (!do_reset)
1048 return -EINVAL;
1049
522698d7
SS
1050 zram = dev_to_zram(dev);
1051 bdev = bdget_disk(zram->disk, 0);
522698d7
SS
1052 if (!bdev)
1053 return -ENOMEM;
4f2109f6 1054
522698d7 1055 mutex_lock(&bdev->bd_mutex);
f405c445
SS
1056 /* Do not reset an active device or claimed device */
1057 if (bdev->bd_openers || zram->claim) {
1058 mutex_unlock(&bdev->bd_mutex);
1059 bdput(bdev);
1060 return -EBUSY;
522698d7
SS
1061 }
1062
f405c445
SS
1063 /* From now on, anyone can't open /dev/zram[0-9] */
1064 zram->claim = true;
1065 mutex_unlock(&bdev->bd_mutex);
522698d7 1066
f405c445 1067 /* Make sure all the pending I/O are finished */
522698d7
SS
1068 fsync_bdev(bdev);
1069 zram_reset_device(zram);
b09ab054 1070 zram_revalidate_disk(zram);
522698d7
SS
1071 bdput(bdev);
1072
f405c445
SS
1073 mutex_lock(&bdev->bd_mutex);
1074 zram->claim = false;
1075 mutex_unlock(&bdev->bd_mutex);
1076
522698d7 1077 return len;
f405c445
SS
1078}
1079
1080static int zram_open(struct block_device *bdev, fmode_t mode)
1081{
1082 int ret = 0;
1083 struct zram *zram;
1084
1085 WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
1086
1087 zram = bdev->bd_disk->private_data;
1088 /* zram was claimed to reset so open request fails */
1089 if (zram->claim)
1090 ret = -EBUSY;
4f2109f6
SS
1091
1092 return ret;
1093}
1094
522698d7 1095static const struct block_device_operations zram_devops = {
f405c445 1096 .open = zram_open,
522698d7
SS
1097 .swap_slot_free_notify = zram_slot_free_notify,
1098 .rw_page = zram_rw_page,
1099 .owner = THIS_MODULE
1100};
1101
1102static DEVICE_ATTR_WO(compact);
1103static DEVICE_ATTR_RW(disksize);
1104static DEVICE_ATTR_RO(initstate);
1105static DEVICE_ATTR_WO(reset);
c87d1655
SS
1106static DEVICE_ATTR_WO(mem_limit);
1107static DEVICE_ATTR_WO(mem_used_max);
522698d7
SS
1108static DEVICE_ATTR_RW(max_comp_streams);
1109static DEVICE_ATTR_RW(comp_algorithm);
a68eb3b6 1110
9b3bb7ab
SS
1111static struct attribute *zram_disk_attrs[] = {
1112 &dev_attr_disksize.attr,
1113 &dev_attr_initstate.attr,
1114 &dev_attr_reset.attr,
99ebbd30 1115 &dev_attr_compact.attr,
9ada9da9 1116 &dev_attr_mem_limit.attr,
461a8eee 1117 &dev_attr_mem_used_max.attr,
beca3ec7 1118 &dev_attr_max_comp_streams.attr,
e46b8a03 1119 &dev_attr_comp_algorithm.attr,
2f6a3bed 1120 &dev_attr_io_stat.attr,
4f2109f6 1121 &dev_attr_mm_stat.attr,
623e47fc 1122 &dev_attr_debug_stat.attr,
9b3bb7ab
SS
1123 NULL,
1124};
1125
1126static struct attribute_group zram_disk_attr_group = {
1127 .attrs = zram_disk_attrs,
1128};
1129
92ff1528
SS
1130/*
1131 * Allocate and initialize new zram device. the function returns
1132 * '>= 0' device_id upon success, and negative value otherwise.
1133 */
1134static int zram_add(void)
306b0c95 1135{
85508ec6 1136 struct zram *zram;
ee980160 1137 struct request_queue *queue;
92ff1528 1138 int ret, device_id;
85508ec6
SS
1139
1140 zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1141 if (!zram)
1142 return -ENOMEM;
1143
92ff1528 1144 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
85508ec6
SS
1145 if (ret < 0)
1146 goto out_free_dev;
92ff1528 1147 device_id = ret;
de1a21a0 1148
0900beae 1149 init_rwsem(&zram->init_lock);
306b0c95 1150
ee980160
SS
1151 queue = blk_alloc_queue(GFP_KERNEL);
1152 if (!queue) {
306b0c95
NG
1153 pr_err("Error allocating disk queue for device %d\n",
1154 device_id);
85508ec6
SS
1155 ret = -ENOMEM;
1156 goto out_free_idr;
306b0c95
NG
1157 }
1158
ee980160 1159 blk_queue_make_request(queue, zram_make_request);
306b0c95 1160
85508ec6 1161 /* gendisk structure */
f1e3cfff
NG
1162 zram->disk = alloc_disk(1);
1163 if (!zram->disk) {
70864969 1164 pr_err("Error allocating disk structure for device %d\n",
306b0c95 1165 device_id);
201c7b72 1166 ret = -ENOMEM;
39a9b8ac 1167 goto out_free_queue;
306b0c95
NG
1168 }
1169
f1e3cfff
NG
1170 zram->disk->major = zram_major;
1171 zram->disk->first_minor = device_id;
1172 zram->disk->fops = &zram_devops;
ee980160
SS
1173 zram->disk->queue = queue;
1174 zram->disk->queue->queuedata = zram;
f1e3cfff
NG
1175 zram->disk->private_data = zram;
1176 snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
306b0c95 1177
33863c21 1178 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
f1e3cfff 1179 set_capacity(zram->disk, 0);
b67d1ec1
SS
1180 /* zram devices sort of resembles non-rotational disks */
1181 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
b277da0a 1182 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
a1dd52af
NG
1183 /*
1184 * To ensure that we always get PAGE_SIZE aligned
1185 * and n*PAGE_SIZED sized I/O requests.
1186 */
f1e3cfff 1187 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
7b19b8d4
RJ
1188 blk_queue_logical_block_size(zram->disk->queue,
1189 ZRAM_LOGICAL_BLOCK_SIZE);
f1e3cfff
NG
1190 blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1191 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
f4659d8e 1192 zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
2bb4cd5c 1193 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
f4659d8e
JK
1194 /*
1195 * zram_bio_discard() will clear all logical blocks if logical block
1196 * size is identical with physical block size(PAGE_SIZE). But if it is
1197 * different, we will skip discarding some parts of logical blocks in
1198 * the part of the request range which isn't aligned to physical block
1199 * size. So we can't ensure that all discarded logical blocks are
1200 * zeroed.
1201 */
1202 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1203 zram->disk->queue->limits.discard_zeroes_data = 1;
1204 else
1205 zram->disk->queue->limits.discard_zeroes_data = 0;
1206 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
5d83d5a0 1207
f1e3cfff 1208 add_disk(zram->disk);
306b0c95 1209
33863c21
NG
1210 ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
1211 &zram_disk_attr_group);
1212 if (ret < 0) {
70864969
SS
1213 pr_err("Error creating sysfs group for device %d\n",
1214 device_id);
39a9b8ac 1215 goto out_free_disk;
33863c21 1216 }
e46b8a03 1217 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
be2d1d56 1218 zram->meta = NULL;
d12b63c9
SS
1219
1220 pr_info("Added device: %s\n", zram->disk->disk_name);
92ff1528 1221 return device_id;
de1a21a0 1222
39a9b8ac
JL
1223out_free_disk:
1224 del_gendisk(zram->disk);
1225 put_disk(zram->disk);
1226out_free_queue:
ee980160 1227 blk_cleanup_queue(queue);
85508ec6
SS
1228out_free_idr:
1229 idr_remove(&zram_index_idr, device_id);
1230out_free_dev:
1231 kfree(zram);
de1a21a0 1232 return ret;
306b0c95
NG
1233}
1234
6566d1a3 1235static int zram_remove(struct zram *zram)
306b0c95 1236{
6566d1a3
SS
1237 struct block_device *bdev;
1238
1239 bdev = bdget_disk(zram->disk, 0);
1240 if (!bdev)
1241 return -ENOMEM;
1242
1243 mutex_lock(&bdev->bd_mutex);
1244 if (bdev->bd_openers || zram->claim) {
1245 mutex_unlock(&bdev->bd_mutex);
1246 bdput(bdev);
1247 return -EBUSY;
1248 }
1249
1250 zram->claim = true;
1251 mutex_unlock(&bdev->bd_mutex);
1252
85508ec6
SS
1253 /*
1254 * Remove sysfs first, so no one will perform a disksize
6566d1a3
SS
1255 * store while we destroy the devices. This also helps during
1256 * hot_remove -- zram_reset_device() is the last holder of
1257 * ->init_lock, no later/concurrent disksize_store() or any
1258 * other sysfs handlers are possible.
85508ec6
SS
1259 */
1260 sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
1261 &zram_disk_attr_group);
306b0c95 1262
6566d1a3
SS
1263 /* Make sure all the pending I/O are finished */
1264 fsync_bdev(bdev);
85508ec6 1265 zram_reset_device(zram);
6566d1a3
SS
1266 bdput(bdev);
1267
1268 pr_info("Removed device: %s\n", zram->disk->disk_name);
1269
85508ec6
SS
1270 blk_cleanup_queue(zram->disk->queue);
1271 del_gendisk(zram->disk);
1272 put_disk(zram->disk);
1273 kfree(zram);
6566d1a3
SS
1274 return 0;
1275}
1276
1277/* zram-control sysfs attributes */
1278static ssize_t hot_add_show(struct class *class,
1279 struct class_attribute *attr,
1280 char *buf)
1281{
1282 int ret;
1283
1284 mutex_lock(&zram_index_mutex);
1285 ret = zram_add();
1286 mutex_unlock(&zram_index_mutex);
1287
1288 if (ret < 0)
1289 return ret;
1290 return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
1291}
1292
1293static ssize_t hot_remove_store(struct class *class,
1294 struct class_attribute *attr,
1295 const char *buf,
1296 size_t count)
1297{
1298 struct zram *zram;
1299 int ret, dev_id;
1300
1301 /* dev_id is gendisk->first_minor, which is `int' */
1302 ret = kstrtoint(buf, 10, &dev_id);
1303 if (ret)
1304 return ret;
1305 if (dev_id < 0)
1306 return -EINVAL;
1307
1308 mutex_lock(&zram_index_mutex);
1309
1310 zram = idr_find(&zram_index_idr, dev_id);
17ec4cd9 1311 if (zram) {
6566d1a3 1312 ret = zram_remove(zram);
529e71e1
TI
1313 if (!ret)
1314 idr_remove(&zram_index_idr, dev_id);
17ec4cd9 1315 } else {
6566d1a3 1316 ret = -ENODEV;
17ec4cd9 1317 }
6566d1a3
SS
1318
1319 mutex_unlock(&zram_index_mutex);
1320 return ret ? ret : count;
85508ec6 1321}
a096cafc 1322
5c7e9ccd
SS
1323/*
1324 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
1325 * sense that reading from this file does alter the state of your system -- it
1326 * creates a new un-initialized zram device and returns back this device's
1327 * device_id (or an error code if it fails to create a new device).
1328 */
6566d1a3 1329static struct class_attribute zram_control_class_attrs[] = {
5c7e9ccd 1330 __ATTR(hot_add, 0400, hot_add_show, NULL),
6566d1a3
SS
1331 __ATTR_WO(hot_remove),
1332 __ATTR_NULL,
1333};
1334
1335static struct class zram_control_class = {
1336 .name = "zram-control",
1337 .owner = THIS_MODULE,
1338 .class_attrs = zram_control_class_attrs,
1339};
1340
85508ec6
SS
1341static int zram_remove_cb(int id, void *ptr, void *data)
1342{
1343 zram_remove(ptr);
1344 return 0;
1345}
a096cafc 1346
85508ec6
SS
1347static void destroy_devices(void)
1348{
6566d1a3 1349 class_unregister(&zram_control_class);
85508ec6
SS
1350 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
1351 idr_destroy(&zram_index_idr);
a096cafc 1352 unregister_blkdev(zram_major, "zram");
1dd6c834 1353 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
306b0c95
NG
1354}
1355
f1e3cfff 1356static int __init zram_init(void)
306b0c95 1357{
92ff1528 1358 int ret;
306b0c95 1359
1dd6c834
AMG
1360 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
1361 zcomp_cpu_up_prepare, zcomp_cpu_dead);
1362 if (ret < 0)
1363 return ret;
1364
6566d1a3
SS
1365 ret = class_register(&zram_control_class);
1366 if (ret) {
70864969 1367 pr_err("Unable to register zram-control class\n");
1dd6c834 1368 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
6566d1a3
SS
1369 return ret;
1370 }
1371
f1e3cfff
NG
1372 zram_major = register_blkdev(0, "zram");
1373 if (zram_major <= 0) {
70864969 1374 pr_err("Unable to get major number\n");
6566d1a3 1375 class_unregister(&zram_control_class);
1dd6c834 1376 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
a096cafc 1377 return -EBUSY;
306b0c95
NG
1378 }
1379
92ff1528 1380 while (num_devices != 0) {
6566d1a3 1381 mutex_lock(&zram_index_mutex);
92ff1528 1382 ret = zram_add();
6566d1a3 1383 mutex_unlock(&zram_index_mutex);
92ff1528 1384 if (ret < 0)
a096cafc 1385 goto out_error;
92ff1528 1386 num_devices--;
de1a21a0
NG
1387 }
1388
306b0c95 1389 return 0;
de1a21a0 1390
a096cafc 1391out_error:
85508ec6 1392 destroy_devices();
306b0c95
NG
1393 return ret;
1394}
1395
f1e3cfff 1396static void __exit zram_exit(void)
306b0c95 1397{
85508ec6 1398 destroy_devices();
306b0c95
NG
1399}
1400
f1e3cfff
NG
1401module_init(zram_init);
1402module_exit(zram_exit);
306b0c95 1403
9b3bb7ab 1404module_param(num_devices, uint, 0);
c3cdb40e 1405MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
9b3bb7ab 1406
306b0c95
NG
1407MODULE_LICENSE("Dual BSD/GPL");
1408MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
f1e3cfff 1409MODULE_DESCRIPTION("Compressed RAM Block Device");