]>
Commit | Line | Data |
---|---|---|
306b0c95 NG |
1 | /* |
2 | * Compressed RAM based swap device | |
3 | * | |
1130ebba | 4 | * Copyright (C) 2008, 2009, 2010 Nitin Gupta |
306b0c95 NG |
5 | * |
6 | * This code is released using a dual license strategy: BSD/GPL | |
7 | * You can choose the licence that better fits your requirements. | |
8 | * | |
9 | * Released under the terms of 3-clause BSD License | |
10 | * Released under the terms of GNU General Public License Version 2.0 | |
11 | * | |
12 | * Project home: http://compcache.googlecode.com | |
13 | */ | |
14 | ||
15 | #define KMSG_COMPONENT "ramzswap" | |
16 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | |
17 | ||
18 | #include <linux/module.h> | |
19 | #include <linux/kernel.h> | |
20 | #include <linux/bitops.h> | |
21 | #include <linux/blkdev.h> | |
22 | #include <linux/buffer_head.h> | |
23 | #include <linux/device.h> | |
24 | #include <linux/genhd.h> | |
25 | #include <linux/highmem.h> | |
5a0e3ad6 | 26 | #include <linux/slab.h> |
306b0c95 | 27 | #include <linux/lzo.h> |
306b0c95 NG |
28 | #include <linux/string.h> |
29 | #include <linux/swap.h> | |
30 | #include <linux/swapops.h> | |
31 | #include <linux/vmalloc.h> | |
306b0c95 NG |
32 | |
33 | #include "ramzswap_drv.h" | |
34 | ||
35 | /* Globals */ | |
36 | static int ramzswap_major; | |
37 | static struct ramzswap *devices; | |
38 | ||
306b0c95 NG |
39 | /* Module params (documentation at end) */ |
40 | static unsigned int num_devices; | |
41 | ||
42 | static int rzs_test_flag(struct ramzswap *rzs, u32 index, | |
43 | enum rzs_pageflags flag) | |
44 | { | |
45 | return rzs->table[index].flags & BIT(flag); | |
46 | } | |
47 | ||
48 | static void rzs_set_flag(struct ramzswap *rzs, u32 index, | |
49 | enum rzs_pageflags flag) | |
50 | { | |
51 | rzs->table[index].flags |= BIT(flag); | |
52 | } | |
53 | ||
54 | static void rzs_clear_flag(struct ramzswap *rzs, u32 index, | |
55 | enum rzs_pageflags flag) | |
56 | { | |
57 | rzs->table[index].flags &= ~BIT(flag); | |
58 | } | |
59 | ||
60 | static int page_zero_filled(void *ptr) | |
61 | { | |
62 | unsigned int pos; | |
63 | unsigned long *page; | |
64 | ||
65 | page = (unsigned long *)ptr; | |
66 | ||
67 | for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { | |
68 | if (page[pos]) | |
69 | return 0; | |
70 | } | |
71 | ||
72 | return 1; | |
73 | } | |
74 | ||
306b0c95 NG |
75 | static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes) |
76 | { | |
77 | if (!rzs->disksize) { | |
78 | pr_info( | |
79 | "disk size not provided. You can use disksize_kb module " | |
80 | "param to specify size.\nUsing default: (%u%% of RAM).\n", | |
81 | default_disksize_perc_ram | |
82 | ); | |
83 | rzs->disksize = default_disksize_perc_ram * | |
84 | (totalram_bytes / 100); | |
85 | } | |
86 | ||
87 | if (rzs->disksize > 2 * (totalram_bytes)) { | |
88 | pr_info( | |
89 | "There is little point creating a ramzswap of greater than " | |
90 | "twice the size of memory since we expect a 2:1 compression " | |
91 | "ratio. Note that ramzswap uses about 0.1%% of the size of " | |
92 | "the swap device when not in use so a huge ramzswap is " | |
93 | "wasteful.\n" | |
94 | "\tMemory Size: %zu kB\n" | |
95 | "\tSize you selected: %zu kB\n" | |
96 | "Continuing anyway ...\n", | |
97 | totalram_bytes >> 10, rzs->disksize | |
98 | ); | |
99 | } | |
100 | ||
101 | rzs->disksize &= PAGE_MASK; | |
102 | } | |
103 | ||
104 | /* | |
105 | * Swap header (1st page of swap device) contains information | |
97a06382 NG |
106 | * about a swap file/partition. Prepare such a header for the |
107 | * given ramzswap device so that swapon can identify it as a | |
108 | * swap partition. | |
306b0c95 | 109 | */ |
97a06382 | 110 | static void setup_swap_header(struct ramzswap *rzs, union swap_header *s) |
306b0c95 | 111 | { |
97a06382 | 112 | s->info.version = 1; |
306b0c95 | 113 | s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1; |
97a06382 NG |
114 | s->info.nr_badpages = 0; |
115 | memcpy(s->magic.magic, "SWAPSPACE2", 10); | |
306b0c95 NG |
116 | } |
117 | ||
c25d75a2 | 118 | static void ramzswap_ioctl_get_stats(struct ramzswap *rzs, |
306b0c95 NG |
119 | struct ramzswap_ioctl_stats *s) |
120 | { | |
306b0c95 | 121 | s->disksize = rzs->disksize; |
306b0c95 NG |
122 | |
123 | #if defined(CONFIG_RAMZSWAP_STATS) | |
124 | { | |
125 | struct ramzswap_stats *rs = &rzs->stats; | |
126 | size_t succ_writes, mem_used; | |
127 | unsigned int good_compress_perc = 0, no_compress_perc = 0; | |
128 | ||
129 | mem_used = xv_get_total_size_bytes(rzs->mem_pool) | |
130 | + (rs->pages_expand << PAGE_SHIFT); | |
6a907728 NG |
131 | succ_writes = rzs_stat64_read(rzs, &rs->num_writes) - |
132 | rzs_stat64_read(rzs, &rs->failed_writes); | |
306b0c95 NG |
133 | |
134 | if (succ_writes && rs->pages_stored) { | |
135 | good_compress_perc = rs->good_compress * 100 | |
136 | / rs->pages_stored; | |
137 | no_compress_perc = rs->pages_expand * 100 | |
138 | / rs->pages_stored; | |
139 | } | |
140 | ||
6a907728 NG |
141 | s->num_reads = rzs_stat64_read(rzs, &rs->num_reads); |
142 | s->num_writes = rzs_stat64_read(rzs, &rs->num_writes); | |
143 | s->failed_reads = rzs_stat64_read(rzs, &rs->failed_reads); | |
144 | s->failed_writes = rzs_stat64_read(rzs, &rs->failed_writes); | |
145 | s->invalid_io = rzs_stat64_read(rzs, &rs->invalid_io); | |
146 | s->notify_free = rzs_stat64_read(rzs, &rs->notify_free); | |
306b0c95 NG |
147 | s->pages_zero = rs->pages_zero; |
148 | ||
149 | s->good_compress_pct = good_compress_perc; | |
150 | s->pages_expand_pct = no_compress_perc; | |
151 | ||
152 | s->pages_stored = rs->pages_stored; | |
153 | s->pages_used = mem_used >> PAGE_SHIFT; | |
154 | s->orig_data_size = rs->pages_stored << PAGE_SHIFT; | |
155 | s->compr_data_size = rs->compr_size; | |
156 | s->mem_used_total = mem_used; | |
306b0c95 NG |
157 | } |
158 | #endif /* CONFIG_RAMZSWAP_STATS */ | |
159 | } | |
160 | ||
306b0c95 NG |
161 | static void ramzswap_free_page(struct ramzswap *rzs, size_t index) |
162 | { | |
163 | u32 clen; | |
164 | void *obj; | |
165 | ||
166 | struct page *page = rzs->table[index].page; | |
167 | u32 offset = rzs->table[index].offset; | |
168 | ||
169 | if (unlikely(!page)) { | |
2e882281 NG |
170 | /* |
171 | * No memory is allocated for zero filled pages. | |
172 | * Simply clear zero page flag. | |
173 | */ | |
306b0c95 NG |
174 | if (rzs_test_flag(rzs, index, RZS_ZERO)) { |
175 | rzs_clear_flag(rzs, index, RZS_ZERO); | |
6a907728 | 176 | rzs_stat_dec(&rzs->stats.pages_zero); |
306b0c95 NG |
177 | } |
178 | return; | |
179 | } | |
180 | ||
181 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) { | |
182 | clen = PAGE_SIZE; | |
183 | __free_page(page); | |
184 | rzs_clear_flag(rzs, index, RZS_UNCOMPRESSED); | |
6a907728 | 185 | rzs_stat_dec(&rzs->stats.pages_expand); |
306b0c95 NG |
186 | goto out; |
187 | } | |
188 | ||
189 | obj = kmap_atomic(page, KM_USER0) + offset; | |
190 | clen = xv_get_object_size(obj) - sizeof(struct zobj_header); | |
191 | kunmap_atomic(obj, KM_USER0); | |
192 | ||
193 | xv_free(rzs->mem_pool, page, offset); | |
194 | if (clen <= PAGE_SIZE / 2) | |
6a907728 | 195 | rzs_stat_dec(&rzs->stats.good_compress); |
306b0c95 NG |
196 | |
197 | out: | |
198 | rzs->stats.compr_size -= clen; | |
6a907728 | 199 | rzs_stat_dec(&rzs->stats.pages_stored); |
306b0c95 NG |
200 | |
201 | rzs->table[index].page = NULL; | |
202 | rzs->table[index].offset = 0; | |
203 | } | |
204 | ||
205 | static int handle_zero_page(struct bio *bio) | |
206 | { | |
207 | void *user_mem; | |
208 | struct page *page = bio->bi_io_vec[0].bv_page; | |
209 | ||
210 | user_mem = kmap_atomic(page, KM_USER0); | |
211 | memset(user_mem, 0, PAGE_SIZE); | |
212 | kunmap_atomic(user_mem, KM_USER0); | |
213 | ||
30fb8a71 | 214 | flush_dcache_page(page); |
306b0c95 NG |
215 | |
216 | set_bit(BIO_UPTODATE, &bio->bi_flags); | |
217 | bio_endio(bio, 0); | |
218 | return 0; | |
219 | } | |
220 | ||
221 | static int handle_uncompressed_page(struct ramzswap *rzs, struct bio *bio) | |
222 | { | |
223 | u32 index; | |
224 | struct page *page; | |
225 | unsigned char *user_mem, *cmem; | |
226 | ||
227 | page = bio->bi_io_vec[0].bv_page; | |
228 | index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; | |
229 | ||
230 | user_mem = kmap_atomic(page, KM_USER0); | |
231 | cmem = kmap_atomic(rzs->table[index].page, KM_USER1) + | |
232 | rzs->table[index].offset; | |
233 | ||
234 | memcpy(user_mem, cmem, PAGE_SIZE); | |
235 | kunmap_atomic(user_mem, KM_USER0); | |
236 | kunmap_atomic(cmem, KM_USER1); | |
237 | ||
30fb8a71 | 238 | flush_dcache_page(page); |
306b0c95 NG |
239 | |
240 | set_bit(BIO_UPTODATE, &bio->bi_flags); | |
241 | bio_endio(bio, 0); | |
242 | return 0; | |
243 | } | |
244 | ||
306b0c95 NG |
245 | /* |
246 | * Called when request page is not present in ramzswap. | |
97a06382 | 247 | * This is an attempt to read before any previous write |
306b0c95 NG |
248 | * to this location - this happens due to readahead when |
249 | * swap device is read from user-space (e.g. during swapon) | |
250 | */ | |
251 | static int handle_ramzswap_fault(struct ramzswap *rzs, struct bio *bio) | |
252 | { | |
306b0c95 NG |
253 | pr_debug("Read before write on swap device: " |
254 | "sector=%lu, size=%u, offset=%u\n", | |
255 | (ulong)(bio->bi_sector), bio->bi_size, | |
256 | bio->bi_io_vec[0].bv_offset); | |
257 | ||
258 | /* Do nothing. Just return success */ | |
259 | set_bit(BIO_UPTODATE, &bio->bi_flags); | |
260 | bio_endio(bio, 0); | |
261 | return 0; | |
262 | } | |
263 | ||
264 | static int ramzswap_read(struct ramzswap *rzs, struct bio *bio) | |
265 | { | |
266 | int ret; | |
267 | u32 index; | |
268 | size_t clen; | |
269 | struct page *page; | |
270 | struct zobj_header *zheader; | |
271 | unsigned char *user_mem, *cmem; | |
272 | ||
6a907728 | 273 | rzs_stat64_inc(rzs, &rzs->stats.num_reads); |
306b0c95 NG |
274 | |
275 | page = bio->bi_io_vec[0].bv_page; | |
276 | index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; | |
277 | ||
278 | if (rzs_test_flag(rzs, index, RZS_ZERO)) | |
279 | return handle_zero_page(bio); | |
280 | ||
281 | /* Requested page is not present in compressed area */ | |
282 | if (!rzs->table[index].page) | |
283 | return handle_ramzswap_fault(rzs, bio); | |
284 | ||
ef4ffb7a | 285 | /* Page is stored uncompressed since it's incompressible */ |
306b0c95 NG |
286 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) |
287 | return handle_uncompressed_page(rzs, bio); | |
288 | ||
289 | user_mem = kmap_atomic(page, KM_USER0); | |
290 | clen = PAGE_SIZE; | |
291 | ||
292 | cmem = kmap_atomic(rzs->table[index].page, KM_USER1) + | |
293 | rzs->table[index].offset; | |
294 | ||
295 | ret = lzo1x_decompress_safe( | |
296 | cmem + sizeof(*zheader), | |
297 | xv_get_object_size(cmem) - sizeof(*zheader), | |
298 | user_mem, &clen); | |
299 | ||
300 | kunmap_atomic(user_mem, KM_USER0); | |
301 | kunmap_atomic(cmem, KM_USER1); | |
302 | ||
303 | /* should NEVER happen */ | |
304 | if (unlikely(ret != LZO_E_OK)) { | |
305 | pr_err("Decompression failed! err=%d, page=%u\n", | |
306 | ret, index); | |
6a907728 | 307 | rzs_stat64_inc(rzs, &rzs->stats.failed_reads); |
306b0c95 NG |
308 | goto out; |
309 | } | |
310 | ||
30fb8a71 | 311 | flush_dcache_page(page); |
306b0c95 NG |
312 | |
313 | set_bit(BIO_UPTODATE, &bio->bi_flags); | |
314 | bio_endio(bio, 0); | |
315 | return 0; | |
316 | ||
317 | out: | |
318 | bio_io_error(bio); | |
319 | return 0; | |
320 | } | |
321 | ||
322 | static int ramzswap_write(struct ramzswap *rzs, struct bio *bio) | |
323 | { | |
97a06382 | 324 | int ret; |
306b0c95 NG |
325 | u32 offset, index; |
326 | size_t clen; | |
327 | struct zobj_header *zheader; | |
328 | struct page *page, *page_store; | |
329 | unsigned char *user_mem, *cmem, *src; | |
330 | ||
6a907728 | 331 | rzs_stat64_inc(rzs, &rzs->stats.num_writes); |
306b0c95 NG |
332 | |
333 | page = bio->bi_io_vec[0].bv_page; | |
334 | index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; | |
335 | ||
336 | src = rzs->compress_buffer; | |
337 | ||
338 | /* | |
339 | * System swaps to same sector again when the stored page | |
340 | * is no longer referenced by any process. So, its now safe | |
341 | * to free the memory that was allocated for this page. | |
342 | */ | |
2e882281 | 343 | if (rzs->table[index].page || rzs_test_flag(rzs, index, RZS_ZERO)) |
306b0c95 NG |
344 | ramzswap_free_page(rzs, index); |
345 | ||
306b0c95 NG |
346 | mutex_lock(&rzs->lock); |
347 | ||
348 | user_mem = kmap_atomic(page, KM_USER0); | |
349 | if (page_zero_filled(user_mem)) { | |
350 | kunmap_atomic(user_mem, KM_USER0); | |
351 | mutex_unlock(&rzs->lock); | |
6a907728 | 352 | rzs_stat_inc(&rzs->stats.pages_zero); |
306b0c95 NG |
353 | rzs_set_flag(rzs, index, RZS_ZERO); |
354 | ||
355 | set_bit(BIO_UPTODATE, &bio->bi_flags); | |
356 | bio_endio(bio, 0); | |
357 | return 0; | |
358 | } | |
359 | ||
306b0c95 NG |
360 | ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen, |
361 | rzs->compress_workmem); | |
362 | ||
363 | kunmap_atomic(user_mem, KM_USER0); | |
364 | ||
365 | if (unlikely(ret != LZO_E_OK)) { | |
366 | mutex_unlock(&rzs->lock); | |
367 | pr_err("Compression failed! err=%d\n", ret); | |
6a907728 | 368 | rzs_stat64_inc(rzs, &rzs->stats.failed_writes); |
306b0c95 NG |
369 | goto out; |
370 | } | |
371 | ||
372 | /* | |
97a06382 | 373 | * Page is incompressible. Store it as-is (uncompressed) |
306b0c95 NG |
374 | * since we do not want to return too many swap write |
375 | * errors which has side effect of hanging the system. | |
376 | */ | |
377 | if (unlikely(clen > max_zpage_size)) { | |
306b0c95 NG |
378 | clen = PAGE_SIZE; |
379 | page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM); | |
380 | if (unlikely(!page_store)) { | |
381 | mutex_unlock(&rzs->lock); | |
382 | pr_info("Error allocating memory for incompressible " | |
383 | "page: %u\n", index); | |
6a907728 | 384 | rzs_stat64_inc(rzs, &rzs->stats.failed_writes); |
306b0c95 NG |
385 | goto out; |
386 | } | |
387 | ||
388 | offset = 0; | |
389 | rzs_set_flag(rzs, index, RZS_UNCOMPRESSED); | |
6a907728 | 390 | rzs_stat_inc(&rzs->stats.pages_expand); |
306b0c95 NG |
391 | rzs->table[index].page = page_store; |
392 | src = kmap_atomic(page, KM_USER0); | |
393 | goto memstore; | |
394 | } | |
395 | ||
396 | if (xv_malloc(rzs->mem_pool, clen + sizeof(*zheader), | |
397 | &rzs->table[index].page, &offset, | |
398 | GFP_NOIO | __GFP_HIGHMEM)) { | |
399 | mutex_unlock(&rzs->lock); | |
400 | pr_info("Error allocating memory for compressed " | |
401 | "page: %u, size=%zu\n", index, clen); | |
6a907728 | 402 | rzs_stat64_inc(rzs, &rzs->stats.failed_writes); |
306b0c95 NG |
403 | goto out; |
404 | } | |
405 | ||
406 | memstore: | |
407 | rzs->table[index].offset = offset; | |
408 | ||
409 | cmem = kmap_atomic(rzs->table[index].page, KM_USER1) + | |
410 | rzs->table[index].offset; | |
411 | ||
412 | #if 0 | |
413 | /* Back-reference needed for memory defragmentation */ | |
414 | if (!rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)) { | |
415 | zheader = (struct zobj_header *)cmem; | |
416 | zheader->table_idx = index; | |
417 | cmem += sizeof(*zheader); | |
418 | } | |
419 | #endif | |
420 | ||
421 | memcpy(cmem, src, clen); | |
422 | ||
423 | kunmap_atomic(cmem, KM_USER1); | |
424 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) | |
425 | kunmap_atomic(src, KM_USER0); | |
426 | ||
427 | /* Update stats */ | |
428 | rzs->stats.compr_size += clen; | |
6a907728 | 429 | rzs_stat_inc(&rzs->stats.pages_stored); |
306b0c95 | 430 | if (clen <= PAGE_SIZE / 2) |
6a907728 | 431 | rzs_stat_inc(&rzs->stats.good_compress); |
306b0c95 NG |
432 | |
433 | mutex_unlock(&rzs->lock); | |
434 | ||
435 | set_bit(BIO_UPTODATE, &bio->bi_flags); | |
436 | bio_endio(bio, 0); | |
437 | return 0; | |
438 | ||
439 | out: | |
306b0c95 NG |
440 | bio_io_error(bio); |
441 | return 0; | |
442 | } | |
443 | ||
306b0c95 NG |
444 | /* |
445 | * Check if request is within bounds and page aligned. | |
446 | */ | |
447 | static inline int valid_swap_request(struct ramzswap *rzs, struct bio *bio) | |
448 | { | |
449 | if (unlikely( | |
450 | (bio->bi_sector >= (rzs->disksize >> SECTOR_SHIFT)) || | |
451 | (bio->bi_sector & (SECTORS_PER_PAGE - 1)) || | |
452 | (bio->bi_vcnt != 1) || | |
453 | (bio->bi_size != PAGE_SIZE) || | |
454 | (bio->bi_io_vec[0].bv_offset != 0))) { | |
455 | ||
456 | return 0; | |
457 | } | |
458 | ||
459 | /* swap request is valid */ | |
460 | return 1; | |
461 | } | |
462 | ||
463 | /* | |
464 | * Handler function for all ramzswap I/O requests. | |
465 | */ | |
466 | static int ramzswap_make_request(struct request_queue *queue, struct bio *bio) | |
467 | { | |
468 | int ret = 0; | |
469 | struct ramzswap *rzs = queue->queuedata; | |
470 | ||
471 | if (unlikely(!rzs->init_done)) { | |
472 | bio_io_error(bio); | |
473 | return 0; | |
474 | } | |
475 | ||
476 | if (!valid_swap_request(rzs, bio)) { | |
6a907728 | 477 | rzs_stat64_inc(rzs, &rzs->stats.invalid_io); |
306b0c95 NG |
478 | bio_io_error(bio); |
479 | return 0; | |
480 | } | |
481 | ||
482 | switch (bio_data_dir(bio)) { | |
483 | case READ: | |
484 | ret = ramzswap_read(rzs, bio); | |
485 | break; | |
486 | ||
487 | case WRITE: | |
488 | ret = ramzswap_write(rzs, bio); | |
489 | break; | |
490 | } | |
491 | ||
492 | return ret; | |
493 | } | |
494 | ||
495 | static void reset_device(struct ramzswap *rzs) | |
496 | { | |
97a06382 | 497 | size_t index; |
306b0c95 | 498 | |
7eef7533 NG |
499 | /* Do not accept any new I/O request */ |
500 | rzs->init_done = 0; | |
501 | ||
306b0c95 NG |
502 | /* Free various per-device buffers */ |
503 | kfree(rzs->compress_workmem); | |
504 | free_pages((unsigned long)rzs->compress_buffer, 1); | |
505 | ||
506 | rzs->compress_workmem = NULL; | |
507 | rzs->compress_buffer = NULL; | |
508 | ||
509 | /* Free all pages that are still in this ramzswap device */ | |
97a06382 | 510 | for (index = 0; index < rzs->disksize >> PAGE_SHIFT; index++) { |
306b0c95 NG |
511 | struct page *page; |
512 | u16 offset; | |
513 | ||
514 | page = rzs->table[index].page; | |
515 | offset = rzs->table[index].offset; | |
516 | ||
517 | if (!page) | |
518 | continue; | |
519 | ||
520 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) | |
521 | __free_page(page); | |
522 | else | |
523 | xv_free(rzs->mem_pool, page, offset); | |
524 | } | |
525 | ||
306b0c95 NG |
526 | vfree(rzs->table); |
527 | rzs->table = NULL; | |
528 | ||
529 | xv_destroy_pool(rzs->mem_pool); | |
530 | rzs->mem_pool = NULL; | |
531 | ||
306b0c95 NG |
532 | /* Reset stats */ |
533 | memset(&rzs->stats, 0, sizeof(rzs->stats)); | |
534 | ||
535 | rzs->disksize = 0; | |
306b0c95 NG |
536 | } |
537 | ||
538 | static int ramzswap_ioctl_init_device(struct ramzswap *rzs) | |
539 | { | |
540 | int ret; | |
541 | size_t num_pages; | |
542 | struct page *page; | |
543 | union swap_header *swap_header; | |
544 | ||
545 | if (rzs->init_done) { | |
546 | pr_info("Device already initialized!\n"); | |
547 | return -EBUSY; | |
548 | } | |
549 | ||
97a06382 | 550 | ramzswap_set_disksize(rzs, totalram_pages << PAGE_SHIFT); |
306b0c95 NG |
551 | |
552 | rzs->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); | |
553 | if (!rzs->compress_workmem) { | |
554 | pr_err("Error allocating compressor working memory!\n"); | |
555 | ret = -ENOMEM; | |
556 | goto fail; | |
557 | } | |
558 | ||
559 | rzs->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1); | |
560 | if (!rzs->compress_buffer) { | |
561 | pr_err("Error allocating compressor buffer space\n"); | |
562 | ret = -ENOMEM; | |
563 | goto fail; | |
564 | } | |
565 | ||
566 | num_pages = rzs->disksize >> PAGE_SHIFT; | |
567 | rzs->table = vmalloc(num_pages * sizeof(*rzs->table)); | |
568 | if (!rzs->table) { | |
569 | pr_err("Error allocating ramzswap address table\n"); | |
570 | /* To prevent accessing table entries during cleanup */ | |
571 | rzs->disksize = 0; | |
572 | ret = -ENOMEM; | |
573 | goto fail; | |
574 | } | |
575 | memset(rzs->table, 0, num_pages * sizeof(*rzs->table)); | |
576 | ||
306b0c95 NG |
577 | page = alloc_page(__GFP_ZERO); |
578 | if (!page) { | |
579 | pr_err("Error allocating swap header page\n"); | |
580 | ret = -ENOMEM; | |
581 | goto fail; | |
582 | } | |
583 | rzs->table[0].page = page; | |
584 | rzs_set_flag(rzs, 0, RZS_UNCOMPRESSED); | |
585 | ||
586 | swap_header = kmap(page); | |
97a06382 | 587 | setup_swap_header(rzs, swap_header); |
306b0c95 | 588 | kunmap(page); |
306b0c95 NG |
589 | |
590 | set_capacity(rzs->disk, rzs->disksize >> SECTOR_SHIFT); | |
591 | ||
97a06382 NG |
592 | /* ramzswap devices sort of resembles non-rotational disks */ |
593 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rzs->disk->queue); | |
306b0c95 NG |
594 | |
595 | rzs->mem_pool = xv_create_pool(); | |
596 | if (!rzs->mem_pool) { | |
597 | pr_err("Error creating memory pool\n"); | |
598 | ret = -ENOMEM; | |
599 | goto fail; | |
600 | } | |
601 | ||
306b0c95 NG |
602 | rzs->init_done = 1; |
603 | ||
604 | pr_debug("Initialization done!\n"); | |
605 | return 0; | |
606 | ||
607 | fail: | |
608 | reset_device(rzs); | |
609 | ||
610 | pr_err("Initialization failed: err=%d\n", ret); | |
611 | return ret; | |
612 | } | |
613 | ||
614 | static int ramzswap_ioctl_reset_device(struct ramzswap *rzs) | |
615 | { | |
616 | if (rzs->init_done) | |
617 | reset_device(rzs); | |
618 | ||
619 | return 0; | |
620 | } | |
621 | ||
622 | static int ramzswap_ioctl(struct block_device *bdev, fmode_t mode, | |
623 | unsigned int cmd, unsigned long arg) | |
624 | { | |
625 | int ret = 0; | |
97a06382 | 626 | size_t disksize_kb; |
306b0c95 NG |
627 | |
628 | struct ramzswap *rzs = bdev->bd_disk->private_data; | |
629 | ||
630 | switch (cmd) { | |
631 | case RZSIO_SET_DISKSIZE_KB: | |
632 | if (rzs->init_done) { | |
633 | ret = -EBUSY; | |
634 | goto out; | |
635 | } | |
636 | if (copy_from_user(&disksize_kb, (void *)arg, | |
637 | _IOC_SIZE(cmd))) { | |
638 | ret = -EFAULT; | |
639 | goto out; | |
640 | } | |
641 | rzs->disksize = disksize_kb << 10; | |
642 | pr_info("Disk size set to %zu kB\n", disksize_kb); | |
643 | break; | |
644 | ||
306b0c95 NG |
645 | case RZSIO_GET_STATS: |
646 | { | |
647 | struct ramzswap_ioctl_stats *stats; | |
648 | if (!rzs->init_done) { | |
649 | ret = -ENOTTY; | |
650 | goto out; | |
651 | } | |
652 | stats = kzalloc(sizeof(*stats), GFP_KERNEL); | |
653 | if (!stats) { | |
654 | ret = -ENOMEM; | |
655 | goto out; | |
656 | } | |
657 | ramzswap_ioctl_get_stats(rzs, stats); | |
658 | if (copy_to_user((void *)arg, stats, sizeof(*stats))) { | |
659 | kfree(stats); | |
660 | ret = -EFAULT; | |
661 | goto out; | |
662 | } | |
663 | kfree(stats); | |
664 | break; | |
665 | } | |
666 | case RZSIO_INIT: | |
667 | ret = ramzswap_ioctl_init_device(rzs); | |
668 | break; | |
669 | ||
670 | case RZSIO_RESET: | |
671 | /* Do not reset an active device! */ | |
672 | if (bdev->bd_holders) { | |
673 | ret = -EBUSY; | |
674 | goto out; | |
675 | } | |
7eef7533 NG |
676 | |
677 | /* Make sure all pending I/O is finished */ | |
678 | if (bdev) | |
679 | fsync_bdev(bdev); | |
680 | ||
306b0c95 NG |
681 | ret = ramzswap_ioctl_reset_device(rzs); |
682 | break; | |
683 | ||
684 | default: | |
685 | pr_info("Invalid ioctl %u\n", cmd); | |
686 | ret = -ENOTTY; | |
687 | } | |
688 | ||
689 | out: | |
690 | return ret; | |
691 | } | |
692 | ||
693 | static struct block_device_operations ramzswap_devops = { | |
694 | .ioctl = ramzswap_ioctl, | |
695 | .owner = THIS_MODULE, | |
696 | }; | |
697 | ||
3bf040c7 | 698 | static int create_device(struct ramzswap *rzs, int device_id) |
306b0c95 | 699 | { |
de1a21a0 NG |
700 | int ret = 0; |
701 | ||
306b0c95 | 702 | mutex_init(&rzs->lock); |
6a907728 | 703 | spin_lock_init(&rzs->stat64_lock); |
306b0c95 NG |
704 | |
705 | rzs->queue = blk_alloc_queue(GFP_KERNEL); | |
706 | if (!rzs->queue) { | |
707 | pr_err("Error allocating disk queue for device %d\n", | |
708 | device_id); | |
de1a21a0 NG |
709 | ret = -ENOMEM; |
710 | goto out; | |
306b0c95 NG |
711 | } |
712 | ||
713 | blk_queue_make_request(rzs->queue, ramzswap_make_request); | |
714 | rzs->queue->queuedata = rzs; | |
715 | ||
716 | /* gendisk structure */ | |
717 | rzs->disk = alloc_disk(1); | |
718 | if (!rzs->disk) { | |
719 | blk_cleanup_queue(rzs->queue); | |
720 | pr_warning("Error allocating disk structure for device %d\n", | |
721 | device_id); | |
de1a21a0 NG |
722 | ret = -ENOMEM; |
723 | goto out; | |
306b0c95 NG |
724 | } |
725 | ||
726 | rzs->disk->major = ramzswap_major; | |
727 | rzs->disk->first_minor = device_id; | |
728 | rzs->disk->fops = &ramzswap_devops; | |
729 | rzs->disk->queue = rzs->queue; | |
730 | rzs->disk->private_data = rzs; | |
731 | snprintf(rzs->disk->disk_name, 16, "ramzswap%d", device_id); | |
732 | ||
97a06382 | 733 | /* Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl */ |
306b0c95 | 734 | set_capacity(rzs->disk, 0); |
5d83d5a0 NG |
735 | |
736 | blk_queue_physical_block_size(rzs->disk->queue, PAGE_SIZE); | |
737 | blk_queue_logical_block_size(rzs->disk->queue, PAGE_SIZE); | |
738 | ||
306b0c95 NG |
739 | add_disk(rzs->disk); |
740 | ||
741 | rzs->init_done = 0; | |
de1a21a0 NG |
742 | |
743 | out: | |
744 | return ret; | |
306b0c95 NG |
745 | } |
746 | ||
747 | static void destroy_device(struct ramzswap *rzs) | |
748 | { | |
749 | if (rzs->disk) { | |
750 | del_gendisk(rzs->disk); | |
751 | put_disk(rzs->disk); | |
752 | } | |
753 | ||
754 | if (rzs->queue) | |
755 | blk_cleanup_queue(rzs->queue); | |
756 | } | |
757 | ||
758 | static int __init ramzswap_init(void) | |
759 | { | |
de1a21a0 | 760 | int ret, dev_id; |
306b0c95 NG |
761 | |
762 | if (num_devices > max_num_devices) { | |
763 | pr_warning("Invalid value for num_devices: %u\n", | |
764 | num_devices); | |
de1a21a0 NG |
765 | ret = -EINVAL; |
766 | goto out; | |
306b0c95 NG |
767 | } |
768 | ||
769 | ramzswap_major = register_blkdev(0, "ramzswap"); | |
770 | if (ramzswap_major <= 0) { | |
771 | pr_warning("Unable to get major number\n"); | |
de1a21a0 NG |
772 | ret = -EBUSY; |
773 | goto out; | |
306b0c95 NG |
774 | } |
775 | ||
776 | if (!num_devices) { | |
777 | pr_info("num_devices not specified. Using default: 1\n"); | |
778 | num_devices = 1; | |
779 | } | |
780 | ||
781 | /* Allocate the device array and initialize each one */ | |
782 | pr_info("Creating %u devices ...\n", num_devices); | |
783 | devices = kzalloc(num_devices * sizeof(struct ramzswap), GFP_KERNEL); | |
de1a21a0 NG |
784 | if (!devices) { |
785 | ret = -ENOMEM; | |
786 | goto unregister; | |
787 | } | |
306b0c95 | 788 | |
de1a21a0 NG |
789 | for (dev_id = 0; dev_id < num_devices; dev_id++) { |
790 | ret = create_device(&devices[dev_id], dev_id); | |
791 | if (ret) | |
3bf040c7 | 792 | goto free_devices; |
de1a21a0 NG |
793 | } |
794 | ||
306b0c95 | 795 | return 0; |
de1a21a0 | 796 | |
3bf040c7 | 797 | free_devices: |
de1a21a0 NG |
798 | while (dev_id) |
799 | destroy_device(&devices[--dev_id]); | |
800 | unregister: | |
306b0c95 | 801 | unregister_blkdev(ramzswap_major, "ramzswap"); |
de1a21a0 | 802 | out: |
306b0c95 NG |
803 | return ret; |
804 | } | |
805 | ||
806 | static void __exit ramzswap_exit(void) | |
807 | { | |
808 | int i; | |
809 | struct ramzswap *rzs; | |
810 | ||
811 | for (i = 0; i < num_devices; i++) { | |
812 | rzs = &devices[i]; | |
813 | ||
814 | destroy_device(rzs); | |
815 | if (rzs->init_done) | |
816 | reset_device(rzs); | |
817 | } | |
818 | ||
819 | unregister_blkdev(ramzswap_major, "ramzswap"); | |
820 | ||
821 | kfree(devices); | |
822 | pr_debug("Cleanup done!\n"); | |
823 | } | |
824 | ||
825 | module_param(num_devices, uint, 0); | |
826 | MODULE_PARM_DESC(num_devices, "Number of ramzswap devices"); | |
827 | ||
828 | module_init(ramzswap_init); | |
829 | module_exit(ramzswap_exit); | |
830 | ||
831 | MODULE_LICENSE("Dual BSD/GPL"); | |
832 | MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); | |
833 | MODULE_DESCRIPTION("Compressed RAM Based Swap Device"); |