]>
Commit | Line | Data |
---|---|---|
306b0c95 | 1 | /* |
f1e3cfff | 2 | * Compressed RAM block device |
306b0c95 | 3 | * |
1130ebba | 4 | * Copyright (C) 2008, 2009, 2010 Nitin Gupta |
7bfb3de8 | 5 | * 2012, 2013 Minchan Kim |
306b0c95 NG |
6 | * |
7 | * This code is released using a dual license strategy: BSD/GPL | |
8 | * You can choose the licence that better fits your requirements. | |
9 | * | |
10 | * Released under the terms of 3-clause BSD License | |
11 | * Released under the terms of GNU General Public License Version 2.0 | |
12 | * | |
306b0c95 NG |
13 | */ |
14 | ||
f1e3cfff | 15 | #define KMSG_COMPONENT "zram" |
306b0c95 NG |
16 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
17 | ||
18 | #include <linux/module.h> | |
19 | #include <linux/kernel.h> | |
8946a086 | 20 | #include <linux/bio.h> |
306b0c95 NG |
21 | #include <linux/bitops.h> |
22 | #include <linux/blkdev.h> | |
23 | #include <linux/buffer_head.h> | |
24 | #include <linux/device.h> | |
25 | #include <linux/genhd.h> | |
26 | #include <linux/highmem.h> | |
5a0e3ad6 | 27 | #include <linux/slab.h> |
b09ab054 | 28 | #include <linux/backing-dev.h> |
306b0c95 | 29 | #include <linux/string.h> |
306b0c95 | 30 | #include <linux/vmalloc.h> |
fcfa8d95 | 31 | #include <linux/err.h> |
85508ec6 | 32 | #include <linux/idr.h> |
6566d1a3 | 33 | #include <linux/sysfs.h> |
1dd6c834 | 34 | #include <linux/cpuhotplug.h> |
306b0c95 | 35 | |
16a4bfb9 | 36 | #include "zram_drv.h" |
306b0c95 | 37 | |
85508ec6 | 38 | static DEFINE_IDR(zram_index_idr); |
6566d1a3 SS |
39 | /* idr index must be protected */ |
40 | static DEFINE_MUTEX(zram_index_mutex); | |
41 | ||
f1e3cfff | 42 | static int zram_major; |
b7ca232e | 43 | static const char *default_compressor = "lzo"; |
306b0c95 | 44 | |
306b0c95 | 45 | /* Module params (documentation at end) */ |
ca3d70bd | 46 | static unsigned int num_devices = 1; |
33863c21 | 47 | |
08eee69f | 48 | static inline bool init_done(struct zram *zram) |
be2d1d56 | 49 | { |
08eee69f | 50 | return zram->disksize; |
be2d1d56 SS |
51 | } |
52 | ||
9b3bb7ab SS |
53 | static inline struct zram *dev_to_zram(struct device *dev) |
54 | { | |
55 | return (struct zram *)dev_to_disk(dev)->private_data; | |
56 | } | |
57 | ||
b31177f2 | 58 | /* flag operations require table entry bit_spin_lock() being held */ |
522698d7 SS |
59 | static int zram_test_flag(struct zram_meta *meta, u32 index, |
60 | enum zram_pageflags flag) | |
99ebbd30 | 61 | { |
522698d7 SS |
62 | return meta->table[index].value & BIT(flag); |
63 | } | |
99ebbd30 | 64 | |
522698d7 SS |
65 | static void zram_set_flag(struct zram_meta *meta, u32 index, |
66 | enum zram_pageflags flag) | |
67 | { | |
68 | meta->table[index].value |= BIT(flag); | |
69 | } | |
99ebbd30 | 70 | |
522698d7 SS |
71 | static void zram_clear_flag(struct zram_meta *meta, u32 index, |
72 | enum zram_pageflags flag) | |
73 | { | |
74 | meta->table[index].value &= ~BIT(flag); | |
75 | } | |
99ebbd30 | 76 | |
522698d7 SS |
77 | static size_t zram_get_obj_size(struct zram_meta *meta, u32 index) |
78 | { | |
79 | return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); | |
99ebbd30 AM |
80 | } |
81 | ||
522698d7 SS |
82 | static void zram_set_obj_size(struct zram_meta *meta, |
83 | u32 index, size_t size) | |
9b3bb7ab | 84 | { |
522698d7 | 85 | unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT; |
9b3bb7ab | 86 | |
522698d7 SS |
87 | meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; |
88 | } | |
89 | ||
1c53e0d2 | 90 | static inline bool is_partial_io(struct bio_vec *bvec) |
522698d7 SS |
91 | { |
92 | return bvec->bv_len != PAGE_SIZE; | |
93 | } | |
94 | ||
b09ab054 MK |
95 | static void zram_revalidate_disk(struct zram *zram) |
96 | { | |
97 | revalidate_disk(zram->disk); | |
98 | /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */ | |
e1735496 | 99 | zram->disk->queue->backing_dev_info->capabilities |= |
b09ab054 MK |
100 | BDI_CAP_STABLE_WRITES; |
101 | } | |
102 | ||
522698d7 SS |
103 | /* |
104 | * Check if request is within bounds and aligned on zram logical blocks. | |
105 | */ | |
1c53e0d2 | 106 | static inline bool valid_io_request(struct zram *zram, |
522698d7 SS |
107 | sector_t start, unsigned int size) |
108 | { | |
109 | u64 end, bound; | |
110 | ||
111 | /* unaligned request */ | |
112 | if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) | |
1c53e0d2 | 113 | return false; |
522698d7 | 114 | if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) |
1c53e0d2 | 115 | return false; |
522698d7 SS |
116 | |
117 | end = start + (size >> SECTOR_SHIFT); | |
118 | bound = zram->disksize >> SECTOR_SHIFT; | |
119 | /* out of range range */ | |
120 | if (unlikely(start >= bound || end > bound || start > end)) | |
1c53e0d2 | 121 | return false; |
522698d7 SS |
122 | |
123 | /* I/O request is valid */ | |
1c53e0d2 | 124 | return true; |
522698d7 SS |
125 | } |
126 | ||
127 | static void update_position(u32 *index, int *offset, struct bio_vec *bvec) | |
128 | { | |
129 | if (*offset + bvec->bv_len >= PAGE_SIZE) | |
130 | (*index)++; | |
131 | *offset = (*offset + bvec->bv_len) % PAGE_SIZE; | |
132 | } | |
133 | ||
134 | static inline void update_used_max(struct zram *zram, | |
135 | const unsigned long pages) | |
136 | { | |
137 | unsigned long old_max, cur_max; | |
138 | ||
139 | old_max = atomic_long_read(&zram->stats.max_used_pages); | |
140 | ||
141 | do { | |
142 | cur_max = old_max; | |
143 | if (pages > cur_max) | |
144 | old_max = atomic_long_cmpxchg( | |
145 | &zram->stats.max_used_pages, cur_max, pages); | |
146 | } while (old_max != cur_max); | |
147 | } | |
148 | ||
1c53e0d2 | 149 | static bool page_zero_filled(void *ptr) |
522698d7 SS |
150 | { |
151 | unsigned int pos; | |
152 | unsigned long *page; | |
153 | ||
154 | page = (unsigned long *)ptr; | |
155 | ||
156 | for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { | |
157 | if (page[pos]) | |
1c53e0d2 | 158 | return false; |
522698d7 SS |
159 | } |
160 | ||
1c53e0d2 | 161 | return true; |
522698d7 SS |
162 | } |
163 | ||
164 | static void handle_zero_page(struct bio_vec *bvec) | |
165 | { | |
166 | struct page *page = bvec->bv_page; | |
167 | void *user_mem; | |
168 | ||
169 | user_mem = kmap_atomic(page); | |
170 | if (is_partial_io(bvec)) | |
171 | memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); | |
172 | else | |
173 | clear_page(user_mem); | |
174 | kunmap_atomic(user_mem); | |
175 | ||
176 | flush_dcache_page(page); | |
9b3bb7ab SS |
177 | } |
178 | ||
179 | static ssize_t initstate_show(struct device *dev, | |
180 | struct device_attribute *attr, char *buf) | |
181 | { | |
a68eb3b6 | 182 | u32 val; |
9b3bb7ab SS |
183 | struct zram *zram = dev_to_zram(dev); |
184 | ||
a68eb3b6 SS |
185 | down_read(&zram->init_lock); |
186 | val = init_done(zram); | |
187 | up_read(&zram->init_lock); | |
9b3bb7ab | 188 | |
56b4e8cb | 189 | return scnprintf(buf, PAGE_SIZE, "%u\n", val); |
9b3bb7ab SS |
190 | } |
191 | ||
522698d7 SS |
192 | static ssize_t disksize_show(struct device *dev, |
193 | struct device_attribute *attr, char *buf) | |
194 | { | |
195 | struct zram *zram = dev_to_zram(dev); | |
196 | ||
197 | return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); | |
198 | } | |
199 | ||
9ada9da9 MK |
200 | static ssize_t mem_limit_store(struct device *dev, |
201 | struct device_attribute *attr, const char *buf, size_t len) | |
202 | { | |
203 | u64 limit; | |
204 | char *tmp; | |
205 | struct zram *zram = dev_to_zram(dev); | |
206 | ||
207 | limit = memparse(buf, &tmp); | |
208 | if (buf == tmp) /* no chars parsed, invalid input */ | |
209 | return -EINVAL; | |
210 | ||
211 | down_write(&zram->init_lock); | |
212 | zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; | |
213 | up_write(&zram->init_lock); | |
214 | ||
215 | return len; | |
216 | } | |
217 | ||
461a8eee MK |
218 | static ssize_t mem_used_max_store(struct device *dev, |
219 | struct device_attribute *attr, const char *buf, size_t len) | |
220 | { | |
221 | int err; | |
222 | unsigned long val; | |
223 | struct zram *zram = dev_to_zram(dev); | |
461a8eee MK |
224 | |
225 | err = kstrtoul(buf, 10, &val); | |
226 | if (err || val != 0) | |
227 | return -EINVAL; | |
228 | ||
229 | down_read(&zram->init_lock); | |
5a99e95b WY |
230 | if (init_done(zram)) { |
231 | struct zram_meta *meta = zram->meta; | |
461a8eee MK |
232 | atomic_long_set(&zram->stats.max_used_pages, |
233 | zs_get_total_pages(meta->mem_pool)); | |
5a99e95b | 234 | } |
461a8eee MK |
235 | up_read(&zram->init_lock); |
236 | ||
237 | return len; | |
238 | } | |
239 | ||
43209ea2 SS |
240 | /* |
241 | * We switched to per-cpu streams and this attr is not needed anymore. | |
242 | * However, we will keep it around for some time, because: | |
243 | * a) we may revert per-cpu streams in the future | |
244 | * b) it's visible to user space and we need to follow our 2 years | |
245 | * retirement rule; but we already have a number of 'soon to be | |
246 | * altered' attrs, so max_comp_streams need to wait for the next | |
247 | * layoff cycle. | |
248 | */ | |
522698d7 SS |
249 | static ssize_t max_comp_streams_show(struct device *dev, |
250 | struct device_attribute *attr, char *buf) | |
251 | { | |
43209ea2 | 252 | return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); |
522698d7 SS |
253 | } |
254 | ||
beca3ec7 SS |
255 | static ssize_t max_comp_streams_store(struct device *dev, |
256 | struct device_attribute *attr, const char *buf, size_t len) | |
257 | { | |
43209ea2 | 258 | return len; |
beca3ec7 SS |
259 | } |
260 | ||
e46b8a03 SS |
261 | static ssize_t comp_algorithm_show(struct device *dev, |
262 | struct device_attribute *attr, char *buf) | |
263 | { | |
264 | size_t sz; | |
265 | struct zram *zram = dev_to_zram(dev); | |
266 | ||
267 | down_read(&zram->init_lock); | |
268 | sz = zcomp_available_show(zram->compressor, buf); | |
269 | up_read(&zram->init_lock); | |
270 | ||
271 | return sz; | |
272 | } | |
273 | ||
274 | static ssize_t comp_algorithm_store(struct device *dev, | |
275 | struct device_attribute *attr, const char *buf, size_t len) | |
276 | { | |
277 | struct zram *zram = dev_to_zram(dev); | |
415403be | 278 | char compressor[CRYPTO_MAX_ALG_NAME]; |
4bbacd51 SS |
279 | size_t sz; |
280 | ||
415403be SS |
281 | strlcpy(compressor, buf, sizeof(compressor)); |
282 | /* ignore trailing newline */ | |
283 | sz = strlen(compressor); | |
284 | if (sz > 0 && compressor[sz - 1] == '\n') | |
285 | compressor[sz - 1] = 0x00; | |
286 | ||
287 | if (!zcomp_available_algorithm(compressor)) | |
1d5b43bf LH |
288 | return -EINVAL; |
289 | ||
e46b8a03 SS |
290 | down_write(&zram->init_lock); |
291 | if (init_done(zram)) { | |
292 | up_write(&zram->init_lock); | |
293 | pr_info("Can't change algorithm for initialized device\n"); | |
294 | return -EBUSY; | |
295 | } | |
4bbacd51 | 296 | |
415403be | 297 | strlcpy(zram->compressor, compressor, sizeof(compressor)); |
e46b8a03 SS |
298 | up_write(&zram->init_lock); |
299 | return len; | |
300 | } | |
301 | ||
522698d7 SS |
302 | static ssize_t compact_store(struct device *dev, |
303 | struct device_attribute *attr, const char *buf, size_t len) | |
306b0c95 | 304 | { |
522698d7 SS |
305 | struct zram *zram = dev_to_zram(dev); |
306 | struct zram_meta *meta; | |
306b0c95 | 307 | |
522698d7 SS |
308 | down_read(&zram->init_lock); |
309 | if (!init_done(zram)) { | |
310 | up_read(&zram->init_lock); | |
311 | return -EINVAL; | |
312 | } | |
306b0c95 | 313 | |
522698d7 | 314 | meta = zram->meta; |
7d3f3938 | 315 | zs_compact(meta->mem_pool); |
522698d7 | 316 | up_read(&zram->init_lock); |
d2d5e762 | 317 | |
522698d7 | 318 | return len; |
d2d5e762 WY |
319 | } |
320 | ||
522698d7 SS |
321 | static ssize_t io_stat_show(struct device *dev, |
322 | struct device_attribute *attr, char *buf) | |
d2d5e762 | 323 | { |
522698d7 SS |
324 | struct zram *zram = dev_to_zram(dev); |
325 | ssize_t ret; | |
d2d5e762 | 326 | |
522698d7 SS |
327 | down_read(&zram->init_lock); |
328 | ret = scnprintf(buf, PAGE_SIZE, | |
329 | "%8llu %8llu %8llu %8llu\n", | |
330 | (u64)atomic64_read(&zram->stats.failed_reads), | |
331 | (u64)atomic64_read(&zram->stats.failed_writes), | |
332 | (u64)atomic64_read(&zram->stats.invalid_io), | |
333 | (u64)atomic64_read(&zram->stats.notify_free)); | |
334 | up_read(&zram->init_lock); | |
306b0c95 | 335 | |
522698d7 | 336 | return ret; |
9b3bb7ab SS |
337 | } |
338 | ||
522698d7 SS |
339 | static ssize_t mm_stat_show(struct device *dev, |
340 | struct device_attribute *attr, char *buf) | |
9b3bb7ab | 341 | { |
522698d7 | 342 | struct zram *zram = dev_to_zram(dev); |
7d3f3938 | 343 | struct zs_pool_stats pool_stats; |
522698d7 SS |
344 | u64 orig_size, mem_used = 0; |
345 | long max_used; | |
346 | ssize_t ret; | |
a539c72a | 347 | |
7d3f3938 SS |
348 | memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); |
349 | ||
522698d7 | 350 | down_read(&zram->init_lock); |
7d3f3938 | 351 | if (init_done(zram)) { |
522698d7 | 352 | mem_used = zs_get_total_pages(zram->meta->mem_pool); |
7d3f3938 SS |
353 | zs_pool_stats(zram->meta->mem_pool, &pool_stats); |
354 | } | |
9b3bb7ab | 355 | |
522698d7 SS |
356 | orig_size = atomic64_read(&zram->stats.pages_stored); |
357 | max_used = atomic_long_read(&zram->stats.max_used_pages); | |
9b3bb7ab | 358 | |
522698d7 | 359 | ret = scnprintf(buf, PAGE_SIZE, |
7d3f3938 | 360 | "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n", |
522698d7 SS |
361 | orig_size << PAGE_SHIFT, |
362 | (u64)atomic64_read(&zram->stats.compr_data_size), | |
363 | mem_used << PAGE_SHIFT, | |
364 | zram->limit_pages << PAGE_SHIFT, | |
365 | max_used << PAGE_SHIFT, | |
366 | (u64)atomic64_read(&zram->stats.zero_pages), | |
860c707d | 367 | pool_stats.pages_compacted); |
522698d7 | 368 | up_read(&zram->init_lock); |
9b3bb7ab | 369 | |
522698d7 SS |
370 | return ret; |
371 | } | |
372 | ||
623e47fc SS |
373 | static ssize_t debug_stat_show(struct device *dev, |
374 | struct device_attribute *attr, char *buf) | |
375 | { | |
376 | int version = 1; | |
377 | struct zram *zram = dev_to_zram(dev); | |
378 | ssize_t ret; | |
379 | ||
380 | down_read(&zram->init_lock); | |
381 | ret = scnprintf(buf, PAGE_SIZE, | |
382 | "version: %d\n%8llu\n", | |
383 | version, | |
384 | (u64)atomic64_read(&zram->stats.writestall)); | |
385 | up_read(&zram->init_lock); | |
386 | ||
387 | return ret; | |
388 | } | |
389 | ||
522698d7 SS |
390 | static DEVICE_ATTR_RO(io_stat); |
391 | static DEVICE_ATTR_RO(mm_stat); | |
623e47fc | 392 | static DEVICE_ATTR_RO(debug_stat); |
522698d7 SS |
393 | |
394 | static inline bool zram_meta_get(struct zram *zram) | |
395 | { | |
396 | if (atomic_inc_not_zero(&zram->refcount)) | |
397 | return true; | |
398 | return false; | |
399 | } | |
400 | ||
401 | static inline void zram_meta_put(struct zram *zram) | |
402 | { | |
403 | atomic_dec(&zram->refcount); | |
404 | } | |
405 | ||
406 | static void zram_meta_free(struct zram_meta *meta, u64 disksize) | |
407 | { | |
408 | size_t num_pages = disksize >> PAGE_SHIFT; | |
409 | size_t index; | |
1fec1172 GM |
410 | |
411 | /* Free all pages that are still in this zram device */ | |
412 | for (index = 0; index < num_pages; index++) { | |
413 | unsigned long handle = meta->table[index].handle; | |
414 | ||
415 | if (!handle) | |
416 | continue; | |
417 | ||
418 | zs_free(meta->mem_pool, handle); | |
419 | } | |
420 | ||
9b3bb7ab | 421 | zs_destroy_pool(meta->mem_pool); |
9b3bb7ab SS |
422 | vfree(meta->table); |
423 | kfree(meta); | |
424 | } | |
425 | ||
4ce321f5 | 426 | static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize) |
9b3bb7ab SS |
427 | { |
428 | size_t num_pages; | |
429 | struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL); | |
b8179958 | 430 | |
9b3bb7ab | 431 | if (!meta) |
b8179958 | 432 | return NULL; |
9b3bb7ab | 433 | |
9b3bb7ab SS |
434 | num_pages = disksize >> PAGE_SHIFT; |
435 | meta->table = vzalloc(num_pages * sizeof(*meta->table)); | |
436 | if (!meta->table) { | |
437 | pr_err("Error allocating zram address table\n"); | |
b8179958 | 438 | goto out_error; |
9b3bb7ab SS |
439 | } |
440 | ||
d0d8da2d | 441 | meta->mem_pool = zs_create_pool(pool_name); |
9b3bb7ab SS |
442 | if (!meta->mem_pool) { |
443 | pr_err("Error creating memory pool\n"); | |
b8179958 | 444 | goto out_error; |
9b3bb7ab SS |
445 | } |
446 | ||
447 | return meta; | |
448 | ||
b8179958 | 449 | out_error: |
9b3bb7ab | 450 | vfree(meta->table); |
9b3bb7ab | 451 | kfree(meta); |
b8179958 | 452 | return NULL; |
9b3bb7ab SS |
453 | } |
454 | ||
d2d5e762 WY |
455 | /* |
456 | * To protect concurrent access to the same index entry, | |
457 | * caller should hold this table index entry's bit_spinlock to | |
458 | * indicate this index entry is accessing. | |
459 | */ | |
f1e3cfff | 460 | static void zram_free_page(struct zram *zram, size_t index) |
306b0c95 | 461 | { |
8b3cc3ed MK |
462 | struct zram_meta *meta = zram->meta; |
463 | unsigned long handle = meta->table[index].handle; | |
306b0c95 | 464 | |
fd1a30de | 465 | if (unlikely(!handle)) { |
2e882281 NG |
466 | /* |
467 | * No memory is allocated for zero filled pages. | |
468 | * Simply clear zero page flag. | |
469 | */ | |
8b3cc3ed MK |
470 | if (zram_test_flag(meta, index, ZRAM_ZERO)) { |
471 | zram_clear_flag(meta, index, ZRAM_ZERO); | |
90a7806e | 472 | atomic64_dec(&zram->stats.zero_pages); |
306b0c95 NG |
473 | } |
474 | return; | |
475 | } | |
476 | ||
8b3cc3ed | 477 | zs_free(meta->mem_pool, handle); |
306b0c95 | 478 | |
d2d5e762 WY |
479 | atomic64_sub(zram_get_obj_size(meta, index), |
480 | &zram->stats.compr_data_size); | |
90a7806e | 481 | atomic64_dec(&zram->stats.pages_stored); |
306b0c95 | 482 | |
8b3cc3ed | 483 | meta->table[index].handle = 0; |
d2d5e762 | 484 | zram_set_obj_size(meta, index, 0); |
306b0c95 NG |
485 | } |
486 | ||
37b51fdd | 487 | static int zram_decompress_page(struct zram *zram, char *mem, u32 index) |
306b0c95 | 488 | { |
b7ca232e | 489 | int ret = 0; |
37b51fdd | 490 | unsigned char *cmem; |
8b3cc3ed | 491 | struct zram_meta *meta = zram->meta; |
92967471 | 492 | unsigned long handle; |
ebaf9ab5 | 493 | unsigned int size; |
92967471 | 494 | |
d2d5e762 | 495 | bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); |
92967471 | 496 | handle = meta->table[index].handle; |
d2d5e762 | 497 | size = zram_get_obj_size(meta, index); |
306b0c95 | 498 | |
8b3cc3ed | 499 | if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { |
d2d5e762 | 500 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); |
42e99bd9 | 501 | clear_page(mem); |
8c921b2b JM |
502 | return 0; |
503 | } | |
306b0c95 | 504 | |
8b3cc3ed | 505 | cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); |
ebaf9ab5 | 506 | if (size == PAGE_SIZE) { |
42e99bd9 | 507 | copy_page(mem, cmem); |
ebaf9ab5 SS |
508 | } else { |
509 | struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); | |
510 | ||
511 | ret = zcomp_decompress(zstrm, cmem, size, mem); | |
512 | zcomp_stream_put(zram->comp); | |
513 | } | |
8b3cc3ed | 514 | zs_unmap_object(meta->mem_pool, handle); |
d2d5e762 | 515 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); |
a1dd52af | 516 | |
8c921b2b | 517 | /* Should NEVER happen. Return bio error if it does. */ |
b7ca232e | 518 | if (unlikely(ret)) { |
8c921b2b | 519 | pr_err("Decompression failed! err=%d, page=%u\n", ret, index); |
8c921b2b | 520 | return ret; |
a1dd52af | 521 | } |
306b0c95 | 522 | |
8c921b2b | 523 | return 0; |
306b0c95 NG |
524 | } |
525 | ||
37b51fdd | 526 | static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, |
b627cff3 | 527 | u32 index, int offset) |
924bd88d JM |
528 | { |
529 | int ret; | |
37b51fdd SS |
530 | struct page *page; |
531 | unsigned char *user_mem, *uncmem = NULL; | |
8b3cc3ed | 532 | struct zram_meta *meta = zram->meta; |
37b51fdd SS |
533 | page = bvec->bv_page; |
534 | ||
d2d5e762 | 535 | bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); |
8b3cc3ed MK |
536 | if (unlikely(!meta->table[index].handle) || |
537 | zram_test_flag(meta, index, ZRAM_ZERO)) { | |
d2d5e762 | 538 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); |
37b51fdd | 539 | handle_zero_page(bvec); |
924bd88d JM |
540 | return 0; |
541 | } | |
d2d5e762 | 542 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); |
924bd88d | 543 | |
37b51fdd SS |
544 | if (is_partial_io(bvec)) |
545 | /* Use a temporary buffer to decompress the page */ | |
7e5a5104 MK |
546 | uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); |
547 | ||
548 | user_mem = kmap_atomic(page); | |
549 | if (!is_partial_io(bvec)) | |
37b51fdd SS |
550 | uncmem = user_mem; |
551 | ||
552 | if (!uncmem) { | |
70864969 | 553 | pr_err("Unable to allocate temp memory\n"); |
37b51fdd SS |
554 | ret = -ENOMEM; |
555 | goto out_cleanup; | |
556 | } | |
924bd88d | 557 | |
37b51fdd | 558 | ret = zram_decompress_page(zram, uncmem, index); |
924bd88d | 559 | /* Should NEVER happen. Return bio error if it does. */ |
b7ca232e | 560 | if (unlikely(ret)) |
37b51fdd | 561 | goto out_cleanup; |
924bd88d | 562 | |
37b51fdd SS |
563 | if (is_partial_io(bvec)) |
564 | memcpy(user_mem + bvec->bv_offset, uncmem + offset, | |
565 | bvec->bv_len); | |
566 | ||
567 | flush_dcache_page(page); | |
568 | ret = 0; | |
569 | out_cleanup: | |
570 | kunmap_atomic(user_mem); | |
571 | if (is_partial_io(bvec)) | |
572 | kfree(uncmem); | |
573 | return ret; | |
924bd88d JM |
574 | } |
575 | ||
576 | static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, | |
577 | int offset) | |
306b0c95 | 578 | { |
397c6066 | 579 | int ret = 0; |
ebaf9ab5 | 580 | unsigned int clen; |
da9556a2 | 581 | unsigned long handle = 0; |
130f315a | 582 | struct page *page; |
924bd88d | 583 | unsigned char *user_mem, *cmem, *src, *uncmem = NULL; |
8b3cc3ed | 584 | struct zram_meta *meta = zram->meta; |
17162f41 | 585 | struct zcomp_strm *zstrm = NULL; |
461a8eee | 586 | unsigned long alloced_pages; |
306b0c95 | 587 | |
8c921b2b | 588 | page = bvec->bv_page; |
924bd88d JM |
589 | if (is_partial_io(bvec)) { |
590 | /* | |
591 | * This is a partial IO. We need to read the full page | |
592 | * before to write the changes. | |
593 | */ | |
7e5a5104 | 594 | uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); |
924bd88d | 595 | if (!uncmem) { |
924bd88d JM |
596 | ret = -ENOMEM; |
597 | goto out; | |
598 | } | |
37b51fdd | 599 | ret = zram_decompress_page(zram, uncmem, index); |
397c6066 | 600 | if (ret) |
924bd88d | 601 | goto out; |
924bd88d JM |
602 | } |
603 | ||
da9556a2 | 604 | compress_again: |
ba82fe2e | 605 | user_mem = kmap_atomic(page); |
397c6066 | 606 | if (is_partial_io(bvec)) { |
924bd88d JM |
607 | memcpy(uncmem + offset, user_mem + bvec->bv_offset, |
608 | bvec->bv_len); | |
397c6066 NG |
609 | kunmap_atomic(user_mem); |
610 | user_mem = NULL; | |
611 | } else { | |
924bd88d | 612 | uncmem = user_mem; |
397c6066 | 613 | } |
924bd88d JM |
614 | |
615 | if (page_zero_filled(uncmem)) { | |
c4065152 WY |
616 | if (user_mem) |
617 | kunmap_atomic(user_mem); | |
f40ac2ae | 618 | /* Free memory associated with this sector now. */ |
d2d5e762 | 619 | bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); |
f40ac2ae | 620 | zram_free_page(zram, index); |
92967471 | 621 | zram_set_flag(meta, index, ZRAM_ZERO); |
d2d5e762 | 622 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); |
f40ac2ae | 623 | |
90a7806e | 624 | atomic64_inc(&zram->stats.zero_pages); |
924bd88d JM |
625 | ret = 0; |
626 | goto out; | |
8c921b2b | 627 | } |
306b0c95 | 628 | |
2aea8493 | 629 | zstrm = zcomp_stream_get(zram->comp); |
ebaf9ab5 | 630 | ret = zcomp_compress(zstrm, uncmem, &clen); |
397c6066 NG |
631 | if (!is_partial_io(bvec)) { |
632 | kunmap_atomic(user_mem); | |
633 | user_mem = NULL; | |
634 | uncmem = NULL; | |
635 | } | |
306b0c95 | 636 | |
b7ca232e | 637 | if (unlikely(ret)) { |
8c921b2b | 638 | pr_err("Compression failed! err=%d\n", ret); |
924bd88d | 639 | goto out; |
8c921b2b | 640 | } |
da9556a2 | 641 | |
b7ca232e | 642 | src = zstrm->buffer; |
c8f2f0db | 643 | if (unlikely(clen > max_zpage_size)) { |
c8f2f0db | 644 | clen = PAGE_SIZE; |
397c6066 NG |
645 | if (is_partial_io(bvec)) |
646 | src = uncmem; | |
c8f2f0db | 647 | } |
a1dd52af | 648 | |
da9556a2 SS |
649 | /* |
650 | * handle allocation has 2 paths: | |
651 | * a) fast path is executed with preemption disabled (for | |
652 | * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, | |
653 | * since we can't sleep; | |
654 | * b) slow path enables preemption and attempts to allocate | |
655 | * the page with __GFP_DIRECT_RECLAIM bit set. we have to | |
656 | * put per-cpu compression stream and, thus, to re-do | |
657 | * the compression once handle is allocated. | |
658 | * | |
659 | * if we have a 'non-null' handle here then we are coming | |
660 | * from the slow path and handle has already been allocated. | |
661 | */ | |
662 | if (!handle) | |
663 | handle = zs_malloc(meta->mem_pool, clen, | |
664 | __GFP_KSWAPD_RECLAIM | | |
665 | __GFP_NOWARN | | |
9bc482d3 MK |
666 | __GFP_HIGHMEM | |
667 | __GFP_MOVABLE); | |
fd1a30de | 668 | if (!handle) { |
2aea8493 | 669 | zcomp_stream_put(zram->comp); |
da9556a2 SS |
670 | zstrm = NULL; |
671 | ||
623e47fc SS |
672 | atomic64_inc(&zram->stats.writestall); |
673 | ||
da9556a2 | 674 | handle = zs_malloc(meta->mem_pool, clen, |
9bc482d3 MK |
675 | GFP_NOIO | __GFP_HIGHMEM | |
676 | __GFP_MOVABLE); | |
da9556a2 SS |
677 | if (handle) |
678 | goto compress_again; | |
679 | ||
ebaf9ab5 | 680 | pr_err("Error allocating memory for compressed page: %u, size=%u\n", |
596b3dd4 | 681 | index, clen); |
924bd88d JM |
682 | ret = -ENOMEM; |
683 | goto out; | |
8c921b2b | 684 | } |
9ada9da9 | 685 | |
461a8eee | 686 | alloced_pages = zs_get_total_pages(meta->mem_pool); |
12372755 SS |
687 | update_used_max(zram, alloced_pages); |
688 | ||
461a8eee | 689 | if (zram->limit_pages && alloced_pages > zram->limit_pages) { |
9ada9da9 MK |
690 | zs_free(meta->mem_pool, handle); |
691 | ret = -ENOMEM; | |
692 | goto out; | |
693 | } | |
694 | ||
8b3cc3ed | 695 | cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); |
306b0c95 | 696 | |
42e99bd9 | 697 | if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { |
397c6066 | 698 | src = kmap_atomic(page); |
42e99bd9 | 699 | copy_page(cmem, src); |
397c6066 | 700 | kunmap_atomic(src); |
42e99bd9 JL |
701 | } else { |
702 | memcpy(cmem, src, clen); | |
703 | } | |
306b0c95 | 704 | |
2aea8493 | 705 | zcomp_stream_put(zram->comp); |
17162f41 | 706 | zstrm = NULL; |
8b3cc3ed | 707 | zs_unmap_object(meta->mem_pool, handle); |
fd1a30de | 708 | |
f40ac2ae SS |
709 | /* |
710 | * Free memory associated with this sector | |
711 | * before overwriting unused sectors. | |
712 | */ | |
d2d5e762 | 713 | bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); |
f40ac2ae SS |
714 | zram_free_page(zram, index); |
715 | ||
8b3cc3ed | 716 | meta->table[index].handle = handle; |
d2d5e762 WY |
717 | zram_set_obj_size(meta, index, clen); |
718 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
306b0c95 | 719 | |
8c921b2b | 720 | /* Update stats */ |
90a7806e SS |
721 | atomic64_add(clen, &zram->stats.compr_data_size); |
722 | atomic64_inc(&zram->stats.pages_stored); | |
924bd88d | 723 | out: |
17162f41 | 724 | if (zstrm) |
2aea8493 | 725 | zcomp_stream_put(zram->comp); |
397c6066 NG |
726 | if (is_partial_io(bvec)) |
727 | kfree(uncmem); | |
924bd88d | 728 | return ret; |
8c921b2b JM |
729 | } |
730 | ||
f4659d8e JK |
731 | /* |
732 | * zram_bio_discard - handler on discard request | |
733 | * @index: physical block index in PAGE_SIZE units | |
734 | * @offset: byte offset within physical block | |
735 | */ | |
736 | static void zram_bio_discard(struct zram *zram, u32 index, | |
737 | int offset, struct bio *bio) | |
738 | { | |
739 | size_t n = bio->bi_iter.bi_size; | |
d2d5e762 | 740 | struct zram_meta *meta = zram->meta; |
f4659d8e JK |
741 | |
742 | /* | |
743 | * zram manages data in physical block size units. Because logical block | |
744 | * size isn't identical with physical block size on some arch, we | |
745 | * could get a discard request pointing to a specific offset within a | |
746 | * certain physical block. Although we can handle this request by | |
747 | * reading that physiclal block and decompressing and partially zeroing | |
748 | * and re-compressing and then re-storing it, this isn't reasonable | |
749 | * because our intent with a discard request is to save memory. So | |
750 | * skipping this logical block is appropriate here. | |
751 | */ | |
752 | if (offset) { | |
38515c73 | 753 | if (n <= (PAGE_SIZE - offset)) |
f4659d8e JK |
754 | return; |
755 | ||
38515c73 | 756 | n -= (PAGE_SIZE - offset); |
f4659d8e JK |
757 | index++; |
758 | } | |
759 | ||
760 | while (n >= PAGE_SIZE) { | |
d2d5e762 | 761 | bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); |
f4659d8e | 762 | zram_free_page(zram, index); |
d2d5e762 | 763 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); |
015254da | 764 | atomic64_inc(&zram->stats.notify_free); |
f4659d8e JK |
765 | index++; |
766 | n -= PAGE_SIZE; | |
767 | } | |
768 | } | |
769 | ||
522698d7 | 770 | static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, |
c11f0c0b | 771 | int offset, bool is_write) |
9b3bb7ab | 772 | { |
522698d7 | 773 | unsigned long start_time = jiffies; |
c11f0c0b | 774 | int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ; |
9b3bb7ab | 775 | int ret; |
9b3bb7ab | 776 | |
c11f0c0b | 777 | generic_start_io_acct(rw_acct, bvec->bv_len >> SECTOR_SHIFT, |
522698d7 | 778 | &zram->disk->part0); |
46a51c80 | 779 | |
c11f0c0b | 780 | if (!is_write) { |
522698d7 SS |
781 | atomic64_inc(&zram->stats.num_reads); |
782 | ret = zram_bvec_read(zram, bvec, index, offset); | |
783 | } else { | |
784 | atomic64_inc(&zram->stats.num_writes); | |
785 | ret = zram_bvec_write(zram, bvec, index, offset); | |
1b672224 | 786 | } |
9b3bb7ab | 787 | |
c11f0c0b | 788 | generic_end_io_acct(rw_acct, &zram->disk->part0, start_time); |
9b3bb7ab | 789 | |
522698d7 | 790 | if (unlikely(ret)) { |
c11f0c0b | 791 | if (!is_write) |
522698d7 SS |
792 | atomic64_inc(&zram->stats.failed_reads); |
793 | else | |
794 | atomic64_inc(&zram->stats.failed_writes); | |
1b672224 | 795 | } |
9b3bb7ab | 796 | |
1b672224 | 797 | return ret; |
8c921b2b JM |
798 | } |
799 | ||
be257c61 | 800 | static void __zram_make_request(struct zram *zram, struct bio *bio) |
8c921b2b | 801 | { |
abf54548 | 802 | int offset; |
8c921b2b | 803 | u32 index; |
7988613b KO |
804 | struct bio_vec bvec; |
805 | struct bvec_iter iter; | |
8c921b2b | 806 | |
4f024f37 KO |
807 | index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; |
808 | offset = (bio->bi_iter.bi_sector & | |
809 | (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; | |
8c921b2b | 810 | |
95fe6c1a | 811 | if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { |
f4659d8e | 812 | zram_bio_discard(zram, index, offset, bio); |
4246a0b6 | 813 | bio_endio(bio); |
f4659d8e JK |
814 | return; |
815 | } | |
816 | ||
7988613b | 817 | bio_for_each_segment(bvec, bio, iter) { |
924bd88d JM |
818 | int max_transfer_size = PAGE_SIZE - offset; |
819 | ||
7988613b | 820 | if (bvec.bv_len > max_transfer_size) { |
924bd88d JM |
821 | /* |
822 | * zram_bvec_rw() can only make operation on a single | |
823 | * zram page. Split the bio vector. | |
824 | */ | |
825 | struct bio_vec bv; | |
826 | ||
7988613b | 827 | bv.bv_page = bvec.bv_page; |
924bd88d | 828 | bv.bv_len = max_transfer_size; |
7988613b | 829 | bv.bv_offset = bvec.bv_offset; |
924bd88d | 830 | |
abf54548 | 831 | if (zram_bvec_rw(zram, &bv, index, offset, |
c11f0c0b | 832 | op_is_write(bio_op(bio))) < 0) |
924bd88d JM |
833 | goto out; |
834 | ||
7988613b | 835 | bv.bv_len = bvec.bv_len - max_transfer_size; |
924bd88d | 836 | bv.bv_offset += max_transfer_size; |
abf54548 | 837 | if (zram_bvec_rw(zram, &bv, index + 1, 0, |
c11f0c0b | 838 | op_is_write(bio_op(bio))) < 0) |
924bd88d JM |
839 | goto out; |
840 | } else | |
abf54548 | 841 | if (zram_bvec_rw(zram, &bvec, index, offset, |
c11f0c0b | 842 | op_is_write(bio_op(bio))) < 0) |
924bd88d JM |
843 | goto out; |
844 | ||
7988613b | 845 | update_position(&index, &offset, &bvec); |
a1dd52af | 846 | } |
306b0c95 | 847 | |
4246a0b6 | 848 | bio_endio(bio); |
7d7854b4 | 849 | return; |
306b0c95 NG |
850 | |
851 | out: | |
306b0c95 | 852 | bio_io_error(bio); |
306b0c95 NG |
853 | } |
854 | ||
306b0c95 | 855 | /* |
f1e3cfff | 856 | * Handler function for all zram I/O requests. |
306b0c95 | 857 | */ |
dece1635 | 858 | static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) |
306b0c95 | 859 | { |
f1e3cfff | 860 | struct zram *zram = queue->queuedata; |
306b0c95 | 861 | |
08eee69f | 862 | if (unlikely(!zram_meta_get(zram))) |
3de738cd | 863 | goto error; |
0900beae | 864 | |
54efd50b KO |
865 | blk_queue_split(queue, &bio, queue->bio_split); |
866 | ||
54850e73 | 867 | if (!valid_io_request(zram, bio->bi_iter.bi_sector, |
868 | bio->bi_iter.bi_size)) { | |
da5cc7d3 | 869 | atomic64_inc(&zram->stats.invalid_io); |
08eee69f | 870 | goto put_zram; |
6642a67c JM |
871 | } |
872 | ||
be257c61 | 873 | __zram_make_request(zram, bio); |
08eee69f | 874 | zram_meta_put(zram); |
dece1635 | 875 | return BLK_QC_T_NONE; |
08eee69f MK |
876 | put_zram: |
877 | zram_meta_put(zram); | |
0900beae JM |
878 | error: |
879 | bio_io_error(bio); | |
dece1635 | 880 | return BLK_QC_T_NONE; |
306b0c95 NG |
881 | } |
882 | ||
2ccbec05 NG |
883 | static void zram_slot_free_notify(struct block_device *bdev, |
884 | unsigned long index) | |
107c161b | 885 | { |
f1e3cfff | 886 | struct zram *zram; |
f614a9f4 | 887 | struct zram_meta *meta; |
107c161b | 888 | |
f1e3cfff | 889 | zram = bdev->bd_disk->private_data; |
f614a9f4 | 890 | meta = zram->meta; |
a0c516cb | 891 | |
d2d5e762 | 892 | bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); |
f614a9f4 | 893 | zram_free_page(zram, index); |
d2d5e762 | 894 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); |
f614a9f4 | 895 | atomic64_inc(&zram->stats.notify_free); |
107c161b NG |
896 | } |
897 | ||
8c7f0102 | 898 | static int zram_rw_page(struct block_device *bdev, sector_t sector, |
c11f0c0b | 899 | struct page *page, bool is_write) |
8c7f0102 | 900 | { |
08eee69f | 901 | int offset, err = -EIO; |
8c7f0102 | 902 | u32 index; |
903 | struct zram *zram; | |
904 | struct bio_vec bv; | |
905 | ||
906 | zram = bdev->bd_disk->private_data; | |
08eee69f MK |
907 | if (unlikely(!zram_meta_get(zram))) |
908 | goto out; | |
909 | ||
8c7f0102 | 910 | if (!valid_io_request(zram, sector, PAGE_SIZE)) { |
911 | atomic64_inc(&zram->stats.invalid_io); | |
08eee69f MK |
912 | err = -EINVAL; |
913 | goto put_zram; | |
8c7f0102 | 914 | } |
915 | ||
916 | index = sector >> SECTORS_PER_PAGE_SHIFT; | |
917 | offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT; | |
918 | ||
919 | bv.bv_page = page; | |
920 | bv.bv_len = PAGE_SIZE; | |
921 | bv.bv_offset = 0; | |
922 | ||
c11f0c0b | 923 | err = zram_bvec_rw(zram, &bv, index, offset, is_write); |
08eee69f MK |
924 | put_zram: |
925 | zram_meta_put(zram); | |
926 | out: | |
8c7f0102 | 927 | /* |
928 | * If I/O fails, just return error(ie, non-zero) without | |
929 | * calling page_endio. | |
930 | * It causes resubmit the I/O with bio request by upper functions | |
931 | * of rw_page(e.g., swap_readpage, __swap_writepage) and | |
932 | * bio->bi_end_io does things to handle the error | |
933 | * (e.g., SetPageError, set_page_dirty and extra works). | |
934 | */ | |
935 | if (err == 0) | |
c11f0c0b | 936 | page_endio(page, is_write, 0); |
8c7f0102 | 937 | return err; |
938 | } | |
939 | ||
522698d7 SS |
940 | static void zram_reset_device(struct zram *zram) |
941 | { | |
942 | struct zram_meta *meta; | |
943 | struct zcomp *comp; | |
944 | u64 disksize; | |
306b0c95 | 945 | |
522698d7 | 946 | down_write(&zram->init_lock); |
9b3bb7ab | 947 | |
522698d7 SS |
948 | zram->limit_pages = 0; |
949 | ||
950 | if (!init_done(zram)) { | |
951 | up_write(&zram->init_lock); | |
952 | return; | |
953 | } | |
954 | ||
955 | meta = zram->meta; | |
956 | comp = zram->comp; | |
957 | disksize = zram->disksize; | |
958 | /* | |
959 | * Refcount will go down to 0 eventually and r/w handler | |
960 | * cannot handle further I/O so it will bail out by | |
961 | * check zram_meta_get. | |
962 | */ | |
963 | zram_meta_put(zram); | |
964 | /* | |
965 | * We want to free zram_meta in process context to avoid | |
966 | * deadlock between reclaim path and any other locks. | |
967 | */ | |
968 | wait_event(zram->io_done, atomic_read(&zram->refcount) == 0); | |
969 | ||
970 | /* Reset stats */ | |
971 | memset(&zram->stats, 0, sizeof(zram->stats)); | |
972 | zram->disksize = 0; | |
522698d7 SS |
973 | |
974 | set_capacity(zram->disk, 0); | |
975 | part_stat_set_all(&zram->disk->part0, 0); | |
976 | ||
977 | up_write(&zram->init_lock); | |
978 | /* I/O operation under all of CPU are done so let's free */ | |
979 | zram_meta_free(meta, disksize); | |
980 | zcomp_destroy(comp); | |
981 | } | |
982 | ||
983 | static ssize_t disksize_store(struct device *dev, | |
984 | struct device_attribute *attr, const char *buf, size_t len) | |
2f6a3bed | 985 | { |
522698d7 SS |
986 | u64 disksize; |
987 | struct zcomp *comp; | |
988 | struct zram_meta *meta; | |
2f6a3bed | 989 | struct zram *zram = dev_to_zram(dev); |
522698d7 | 990 | int err; |
2f6a3bed | 991 | |
522698d7 SS |
992 | disksize = memparse(buf, NULL); |
993 | if (!disksize) | |
994 | return -EINVAL; | |
2f6a3bed | 995 | |
522698d7 | 996 | disksize = PAGE_ALIGN(disksize); |
4ce321f5 | 997 | meta = zram_meta_alloc(zram->disk->disk_name, disksize); |
522698d7 SS |
998 | if (!meta) |
999 | return -ENOMEM; | |
1000 | ||
da9556a2 | 1001 | comp = zcomp_create(zram->compressor); |
522698d7 | 1002 | if (IS_ERR(comp)) { |
70864969 | 1003 | pr_err("Cannot initialise %s compressing backend\n", |
522698d7 SS |
1004 | zram->compressor); |
1005 | err = PTR_ERR(comp); | |
1006 | goto out_free_meta; | |
1007 | } | |
1008 | ||
1009 | down_write(&zram->init_lock); | |
1010 | if (init_done(zram)) { | |
1011 | pr_info("Cannot change disksize for initialized device\n"); | |
1012 | err = -EBUSY; | |
1013 | goto out_destroy_comp; | |
1014 | } | |
1015 | ||
1016 | init_waitqueue_head(&zram->io_done); | |
1017 | atomic_set(&zram->refcount, 1); | |
1018 | zram->meta = meta; | |
1019 | zram->comp = comp; | |
1020 | zram->disksize = disksize; | |
1021 | set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); | |
b09ab054 | 1022 | zram_revalidate_disk(zram); |
e7ccfc4c | 1023 | up_write(&zram->init_lock); |
522698d7 SS |
1024 | |
1025 | return len; | |
1026 | ||
1027 | out_destroy_comp: | |
1028 | up_write(&zram->init_lock); | |
1029 | zcomp_destroy(comp); | |
1030 | out_free_meta: | |
1031 | zram_meta_free(meta, disksize); | |
1032 | return err; | |
2f6a3bed SS |
1033 | } |
1034 | ||
522698d7 SS |
1035 | static ssize_t reset_store(struct device *dev, |
1036 | struct device_attribute *attr, const char *buf, size_t len) | |
4f2109f6 | 1037 | { |
522698d7 SS |
1038 | int ret; |
1039 | unsigned short do_reset; | |
1040 | struct zram *zram; | |
1041 | struct block_device *bdev; | |
4f2109f6 | 1042 | |
f405c445 SS |
1043 | ret = kstrtou16(buf, 10, &do_reset); |
1044 | if (ret) | |
1045 | return ret; | |
1046 | ||
1047 | if (!do_reset) | |
1048 | return -EINVAL; | |
1049 | ||
522698d7 SS |
1050 | zram = dev_to_zram(dev); |
1051 | bdev = bdget_disk(zram->disk, 0); | |
522698d7 SS |
1052 | if (!bdev) |
1053 | return -ENOMEM; | |
4f2109f6 | 1054 | |
522698d7 | 1055 | mutex_lock(&bdev->bd_mutex); |
f405c445 SS |
1056 | /* Do not reset an active device or claimed device */ |
1057 | if (bdev->bd_openers || zram->claim) { | |
1058 | mutex_unlock(&bdev->bd_mutex); | |
1059 | bdput(bdev); | |
1060 | return -EBUSY; | |
522698d7 SS |
1061 | } |
1062 | ||
f405c445 SS |
1063 | /* From now on, anyone can't open /dev/zram[0-9] */ |
1064 | zram->claim = true; | |
1065 | mutex_unlock(&bdev->bd_mutex); | |
522698d7 | 1066 | |
f405c445 | 1067 | /* Make sure all the pending I/O are finished */ |
522698d7 SS |
1068 | fsync_bdev(bdev); |
1069 | zram_reset_device(zram); | |
b09ab054 | 1070 | zram_revalidate_disk(zram); |
522698d7 SS |
1071 | bdput(bdev); |
1072 | ||
f405c445 SS |
1073 | mutex_lock(&bdev->bd_mutex); |
1074 | zram->claim = false; | |
1075 | mutex_unlock(&bdev->bd_mutex); | |
1076 | ||
522698d7 | 1077 | return len; |
f405c445 SS |
1078 | } |
1079 | ||
1080 | static int zram_open(struct block_device *bdev, fmode_t mode) | |
1081 | { | |
1082 | int ret = 0; | |
1083 | struct zram *zram; | |
1084 | ||
1085 | WARN_ON(!mutex_is_locked(&bdev->bd_mutex)); | |
1086 | ||
1087 | zram = bdev->bd_disk->private_data; | |
1088 | /* zram was claimed to reset so open request fails */ | |
1089 | if (zram->claim) | |
1090 | ret = -EBUSY; | |
4f2109f6 SS |
1091 | |
1092 | return ret; | |
1093 | } | |
1094 | ||
522698d7 | 1095 | static const struct block_device_operations zram_devops = { |
f405c445 | 1096 | .open = zram_open, |
522698d7 SS |
1097 | .swap_slot_free_notify = zram_slot_free_notify, |
1098 | .rw_page = zram_rw_page, | |
1099 | .owner = THIS_MODULE | |
1100 | }; | |
1101 | ||
1102 | static DEVICE_ATTR_WO(compact); | |
1103 | static DEVICE_ATTR_RW(disksize); | |
1104 | static DEVICE_ATTR_RO(initstate); | |
1105 | static DEVICE_ATTR_WO(reset); | |
c87d1655 SS |
1106 | static DEVICE_ATTR_WO(mem_limit); |
1107 | static DEVICE_ATTR_WO(mem_used_max); | |
522698d7 SS |
1108 | static DEVICE_ATTR_RW(max_comp_streams); |
1109 | static DEVICE_ATTR_RW(comp_algorithm); | |
a68eb3b6 | 1110 | |
9b3bb7ab SS |
1111 | static struct attribute *zram_disk_attrs[] = { |
1112 | &dev_attr_disksize.attr, | |
1113 | &dev_attr_initstate.attr, | |
1114 | &dev_attr_reset.attr, | |
99ebbd30 | 1115 | &dev_attr_compact.attr, |
9ada9da9 | 1116 | &dev_attr_mem_limit.attr, |
461a8eee | 1117 | &dev_attr_mem_used_max.attr, |
beca3ec7 | 1118 | &dev_attr_max_comp_streams.attr, |
e46b8a03 | 1119 | &dev_attr_comp_algorithm.attr, |
2f6a3bed | 1120 | &dev_attr_io_stat.attr, |
4f2109f6 | 1121 | &dev_attr_mm_stat.attr, |
623e47fc | 1122 | &dev_attr_debug_stat.attr, |
9b3bb7ab SS |
1123 | NULL, |
1124 | }; | |
1125 | ||
1126 | static struct attribute_group zram_disk_attr_group = { | |
1127 | .attrs = zram_disk_attrs, | |
1128 | }; | |
1129 | ||
92ff1528 SS |
1130 | /* |
1131 | * Allocate and initialize new zram device. the function returns | |
1132 | * '>= 0' device_id upon success, and negative value otherwise. | |
1133 | */ | |
1134 | static int zram_add(void) | |
306b0c95 | 1135 | { |
85508ec6 | 1136 | struct zram *zram; |
ee980160 | 1137 | struct request_queue *queue; |
92ff1528 | 1138 | int ret, device_id; |
85508ec6 SS |
1139 | |
1140 | zram = kzalloc(sizeof(struct zram), GFP_KERNEL); | |
1141 | if (!zram) | |
1142 | return -ENOMEM; | |
1143 | ||
92ff1528 | 1144 | ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); |
85508ec6 SS |
1145 | if (ret < 0) |
1146 | goto out_free_dev; | |
92ff1528 | 1147 | device_id = ret; |
de1a21a0 | 1148 | |
0900beae | 1149 | init_rwsem(&zram->init_lock); |
306b0c95 | 1150 | |
ee980160 SS |
1151 | queue = blk_alloc_queue(GFP_KERNEL); |
1152 | if (!queue) { | |
306b0c95 NG |
1153 | pr_err("Error allocating disk queue for device %d\n", |
1154 | device_id); | |
85508ec6 SS |
1155 | ret = -ENOMEM; |
1156 | goto out_free_idr; | |
306b0c95 NG |
1157 | } |
1158 | ||
ee980160 | 1159 | blk_queue_make_request(queue, zram_make_request); |
306b0c95 | 1160 | |
85508ec6 | 1161 | /* gendisk structure */ |
f1e3cfff NG |
1162 | zram->disk = alloc_disk(1); |
1163 | if (!zram->disk) { | |
70864969 | 1164 | pr_err("Error allocating disk structure for device %d\n", |
306b0c95 | 1165 | device_id); |
201c7b72 | 1166 | ret = -ENOMEM; |
39a9b8ac | 1167 | goto out_free_queue; |
306b0c95 NG |
1168 | } |
1169 | ||
f1e3cfff NG |
1170 | zram->disk->major = zram_major; |
1171 | zram->disk->first_minor = device_id; | |
1172 | zram->disk->fops = &zram_devops; | |
ee980160 SS |
1173 | zram->disk->queue = queue; |
1174 | zram->disk->queue->queuedata = zram; | |
f1e3cfff NG |
1175 | zram->disk->private_data = zram; |
1176 | snprintf(zram->disk->disk_name, 16, "zram%d", device_id); | |
306b0c95 | 1177 | |
33863c21 | 1178 | /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ |
f1e3cfff | 1179 | set_capacity(zram->disk, 0); |
b67d1ec1 SS |
1180 | /* zram devices sort of resembles non-rotational disks */ |
1181 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); | |
b277da0a | 1182 | queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); |
a1dd52af NG |
1183 | /* |
1184 | * To ensure that we always get PAGE_SIZE aligned | |
1185 | * and n*PAGE_SIZED sized I/O requests. | |
1186 | */ | |
f1e3cfff | 1187 | blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); |
7b19b8d4 RJ |
1188 | blk_queue_logical_block_size(zram->disk->queue, |
1189 | ZRAM_LOGICAL_BLOCK_SIZE); | |
f1e3cfff NG |
1190 | blk_queue_io_min(zram->disk->queue, PAGE_SIZE); |
1191 | blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); | |
f4659d8e | 1192 | zram->disk->queue->limits.discard_granularity = PAGE_SIZE; |
2bb4cd5c | 1193 | blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); |
f4659d8e JK |
1194 | /* |
1195 | * zram_bio_discard() will clear all logical blocks if logical block | |
1196 | * size is identical with physical block size(PAGE_SIZE). But if it is | |
1197 | * different, we will skip discarding some parts of logical blocks in | |
1198 | * the part of the request range which isn't aligned to physical block | |
1199 | * size. So we can't ensure that all discarded logical blocks are | |
1200 | * zeroed. | |
1201 | */ | |
1202 | if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) | |
1203 | zram->disk->queue->limits.discard_zeroes_data = 1; | |
1204 | else | |
1205 | zram->disk->queue->limits.discard_zeroes_data = 0; | |
1206 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); | |
5d83d5a0 | 1207 | |
f1e3cfff | 1208 | add_disk(zram->disk); |
306b0c95 | 1209 | |
33863c21 NG |
1210 | ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, |
1211 | &zram_disk_attr_group); | |
1212 | if (ret < 0) { | |
70864969 SS |
1213 | pr_err("Error creating sysfs group for device %d\n", |
1214 | device_id); | |
39a9b8ac | 1215 | goto out_free_disk; |
33863c21 | 1216 | } |
e46b8a03 | 1217 | strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); |
be2d1d56 | 1218 | zram->meta = NULL; |
d12b63c9 SS |
1219 | |
1220 | pr_info("Added device: %s\n", zram->disk->disk_name); | |
92ff1528 | 1221 | return device_id; |
de1a21a0 | 1222 | |
39a9b8ac JL |
1223 | out_free_disk: |
1224 | del_gendisk(zram->disk); | |
1225 | put_disk(zram->disk); | |
1226 | out_free_queue: | |
ee980160 | 1227 | blk_cleanup_queue(queue); |
85508ec6 SS |
1228 | out_free_idr: |
1229 | idr_remove(&zram_index_idr, device_id); | |
1230 | out_free_dev: | |
1231 | kfree(zram); | |
de1a21a0 | 1232 | return ret; |
306b0c95 NG |
1233 | } |
1234 | ||
6566d1a3 | 1235 | static int zram_remove(struct zram *zram) |
306b0c95 | 1236 | { |
6566d1a3 SS |
1237 | struct block_device *bdev; |
1238 | ||
1239 | bdev = bdget_disk(zram->disk, 0); | |
1240 | if (!bdev) | |
1241 | return -ENOMEM; | |
1242 | ||
1243 | mutex_lock(&bdev->bd_mutex); | |
1244 | if (bdev->bd_openers || zram->claim) { | |
1245 | mutex_unlock(&bdev->bd_mutex); | |
1246 | bdput(bdev); | |
1247 | return -EBUSY; | |
1248 | } | |
1249 | ||
1250 | zram->claim = true; | |
1251 | mutex_unlock(&bdev->bd_mutex); | |
1252 | ||
85508ec6 SS |
1253 | /* |
1254 | * Remove sysfs first, so no one will perform a disksize | |
6566d1a3 SS |
1255 | * store while we destroy the devices. This also helps during |
1256 | * hot_remove -- zram_reset_device() is the last holder of | |
1257 | * ->init_lock, no later/concurrent disksize_store() or any | |
1258 | * other sysfs handlers are possible. | |
85508ec6 SS |
1259 | */ |
1260 | sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, | |
1261 | &zram_disk_attr_group); | |
306b0c95 | 1262 | |
6566d1a3 SS |
1263 | /* Make sure all the pending I/O are finished */ |
1264 | fsync_bdev(bdev); | |
85508ec6 | 1265 | zram_reset_device(zram); |
6566d1a3 SS |
1266 | bdput(bdev); |
1267 | ||
1268 | pr_info("Removed device: %s\n", zram->disk->disk_name); | |
1269 | ||
85508ec6 SS |
1270 | blk_cleanup_queue(zram->disk->queue); |
1271 | del_gendisk(zram->disk); | |
1272 | put_disk(zram->disk); | |
1273 | kfree(zram); | |
6566d1a3 SS |
1274 | return 0; |
1275 | } | |
1276 | ||
1277 | /* zram-control sysfs attributes */ | |
1278 | static ssize_t hot_add_show(struct class *class, | |
1279 | struct class_attribute *attr, | |
1280 | char *buf) | |
1281 | { | |
1282 | int ret; | |
1283 | ||
1284 | mutex_lock(&zram_index_mutex); | |
1285 | ret = zram_add(); | |
1286 | mutex_unlock(&zram_index_mutex); | |
1287 | ||
1288 | if (ret < 0) | |
1289 | return ret; | |
1290 | return scnprintf(buf, PAGE_SIZE, "%d\n", ret); | |
1291 | } | |
1292 | ||
1293 | static ssize_t hot_remove_store(struct class *class, | |
1294 | struct class_attribute *attr, | |
1295 | const char *buf, | |
1296 | size_t count) | |
1297 | { | |
1298 | struct zram *zram; | |
1299 | int ret, dev_id; | |
1300 | ||
1301 | /* dev_id is gendisk->first_minor, which is `int' */ | |
1302 | ret = kstrtoint(buf, 10, &dev_id); | |
1303 | if (ret) | |
1304 | return ret; | |
1305 | if (dev_id < 0) | |
1306 | return -EINVAL; | |
1307 | ||
1308 | mutex_lock(&zram_index_mutex); | |
1309 | ||
1310 | zram = idr_find(&zram_index_idr, dev_id); | |
17ec4cd9 | 1311 | if (zram) { |
6566d1a3 | 1312 | ret = zram_remove(zram); |
529e71e1 TI |
1313 | if (!ret) |
1314 | idr_remove(&zram_index_idr, dev_id); | |
17ec4cd9 | 1315 | } else { |
6566d1a3 | 1316 | ret = -ENODEV; |
17ec4cd9 | 1317 | } |
6566d1a3 SS |
1318 | |
1319 | mutex_unlock(&zram_index_mutex); | |
1320 | return ret ? ret : count; | |
85508ec6 | 1321 | } |
a096cafc | 1322 | |
5c7e9ccd SS |
1323 | /* |
1324 | * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a | |
1325 | * sense that reading from this file does alter the state of your system -- it | |
1326 | * creates a new un-initialized zram device and returns back this device's | |
1327 | * device_id (or an error code if it fails to create a new device). | |
1328 | */ | |
6566d1a3 | 1329 | static struct class_attribute zram_control_class_attrs[] = { |
5c7e9ccd | 1330 | __ATTR(hot_add, 0400, hot_add_show, NULL), |
6566d1a3 SS |
1331 | __ATTR_WO(hot_remove), |
1332 | __ATTR_NULL, | |
1333 | }; | |
1334 | ||
1335 | static struct class zram_control_class = { | |
1336 | .name = "zram-control", | |
1337 | .owner = THIS_MODULE, | |
1338 | .class_attrs = zram_control_class_attrs, | |
1339 | }; | |
1340 | ||
85508ec6 SS |
1341 | static int zram_remove_cb(int id, void *ptr, void *data) |
1342 | { | |
1343 | zram_remove(ptr); | |
1344 | return 0; | |
1345 | } | |
a096cafc | 1346 | |
85508ec6 SS |
1347 | static void destroy_devices(void) |
1348 | { | |
6566d1a3 | 1349 | class_unregister(&zram_control_class); |
85508ec6 SS |
1350 | idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); |
1351 | idr_destroy(&zram_index_idr); | |
a096cafc | 1352 | unregister_blkdev(zram_major, "zram"); |
1dd6c834 | 1353 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
306b0c95 NG |
1354 | } |
1355 | ||
f1e3cfff | 1356 | static int __init zram_init(void) |
306b0c95 | 1357 | { |
92ff1528 | 1358 | int ret; |
306b0c95 | 1359 | |
1dd6c834 AMG |
1360 | ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", |
1361 | zcomp_cpu_up_prepare, zcomp_cpu_dead); | |
1362 | if (ret < 0) | |
1363 | return ret; | |
1364 | ||
6566d1a3 SS |
1365 | ret = class_register(&zram_control_class); |
1366 | if (ret) { | |
70864969 | 1367 | pr_err("Unable to register zram-control class\n"); |
1dd6c834 | 1368 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
6566d1a3 SS |
1369 | return ret; |
1370 | } | |
1371 | ||
f1e3cfff NG |
1372 | zram_major = register_blkdev(0, "zram"); |
1373 | if (zram_major <= 0) { | |
70864969 | 1374 | pr_err("Unable to get major number\n"); |
6566d1a3 | 1375 | class_unregister(&zram_control_class); |
1dd6c834 | 1376 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
a096cafc | 1377 | return -EBUSY; |
306b0c95 NG |
1378 | } |
1379 | ||
92ff1528 | 1380 | while (num_devices != 0) { |
6566d1a3 | 1381 | mutex_lock(&zram_index_mutex); |
92ff1528 | 1382 | ret = zram_add(); |
6566d1a3 | 1383 | mutex_unlock(&zram_index_mutex); |
92ff1528 | 1384 | if (ret < 0) |
a096cafc | 1385 | goto out_error; |
92ff1528 | 1386 | num_devices--; |
de1a21a0 NG |
1387 | } |
1388 | ||
306b0c95 | 1389 | return 0; |
de1a21a0 | 1390 | |
a096cafc | 1391 | out_error: |
85508ec6 | 1392 | destroy_devices(); |
306b0c95 NG |
1393 | return ret; |
1394 | } | |
1395 | ||
f1e3cfff | 1396 | static void __exit zram_exit(void) |
306b0c95 | 1397 | { |
85508ec6 | 1398 | destroy_devices(); |
306b0c95 NG |
1399 | } |
1400 | ||
f1e3cfff NG |
1401 | module_init(zram_init); |
1402 | module_exit(zram_exit); | |
306b0c95 | 1403 | |
9b3bb7ab | 1404 | module_param(num_devices, uint, 0); |
c3cdb40e | 1405 | MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); |
9b3bb7ab | 1406 | |
306b0c95 NG |
1407 | MODULE_LICENSE("Dual BSD/GPL"); |
1408 | MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); | |
f1e3cfff | 1409 | MODULE_DESCRIPTION("Compressed RAM Block Device"); |