]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/um/drivers/ubd_kern.c
Linux 4.9
[mirror_ubuntu-artful-kernel.git] / arch / um / drivers / ubd_kern.c
CommitLineData
6c29256c 1/*
1da177e4
LT
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
1da177e4
LT
20#define UBD_SHIFT 4
21
8ea3c06a
AV
22#include <linux/module.h>
23#include <linux/init.h>
24#include <linux/blkdev.h>
25#include <linux/ata.h>
26#include <linux/hdreg.h>
27#include <linux/cdrom.h>
28#include <linux/proc_fs.h>
29#include <linux/seq_file.h>
30#include <linux/ctype.h>
31#include <linux/slab.h>
32#include <linux/vmalloc.h>
33#include <linux/platform_device.h>
34#include <linux/scatterlist.h>
35#include <asm/tlbflush.h>
37185b33 36#include <kern_util.h>
1da177e4 37#include "mconsole_kern.h"
37185b33
AV
38#include <init.h>
39#include <irq_kern.h>
8ea3c06a 40#include "ubd.h"
37185b33 41#include <os.h>
1da177e4
LT
42#include "cow.h"
43
805f11a0 44enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
1da177e4
LT
45
46struct io_thread_req {
62f96cb0 47 struct request *req;
91acb21f 48 enum ubd_req op;
1da177e4
LT
49 int fds[2];
50 unsigned long offsets[2];
51 unsigned long long offset;
52 unsigned long length;
53 char *buffer;
54 int sectorsize;
91acb21f
JD
55 unsigned long sector_mask;
56 unsigned long long cow_offset;
57 unsigned long bitmap_words[2];
1da177e4
LT
58 int error;
59};
60
91acb21f 61static inline int ubd_test_bit(__u64 bit, unsigned char *data)
1da177e4
LT
62{
63 __u64 n;
64 int bits, off;
65
91acb21f 66 bits = sizeof(data[0]) * 8;
1da177e4
LT
67 n = bit / bits;
68 off = bit % bits;
dc764e50 69 return (data[n] & (1 << off)) != 0;
1da177e4
LT
70}
71
91acb21f 72static inline void ubd_set_bit(__u64 bit, unsigned char *data)
1da177e4
LT
73{
74 __u64 n;
75 int bits, off;
76
91acb21f 77 bits = sizeof(data[0]) * 8;
1da177e4
LT
78 n = bit / bits;
79 off = bit % bits;
91acb21f 80 data[n] |= (1 << off);
1da177e4
LT
81}
82/*End stuff from ubd_user.h*/
83
84#define DRIVER_NAME "uml-blkdev"
85
d7fb2c38 86static DEFINE_MUTEX(ubd_lock);
9a181c58 87static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
1da177e4 88
a625c998 89static int ubd_open(struct block_device *bdev, fmode_t mode);
db2a144b 90static void ubd_release(struct gendisk *disk, fmode_t mode);
a625c998 91static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1da177e4 92 unsigned int cmd, unsigned long arg);
a885c8c4 93static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
1da177e4 94
97d88ac8 95#define MAX_DEV (16)
1da177e4 96
83d5cde4 97static const struct block_device_operations ubd_blops = {
1da177e4 98 .owner = THIS_MODULE,
a625c998
AV
99 .open = ubd_open,
100 .release = ubd_release,
101 .ioctl = ubd_ioctl,
a885c8c4 102 .getgeo = ubd_getgeo,
1da177e4
LT
103};
104
1da177e4 105/* Protected by ubd_lock */
792dd4fc 106static int fake_major = UBD_MAJOR;
1da177e4
LT
107static struct gendisk *ubd_gendisk[MAX_DEV];
108static struct gendisk *fake_gendisk[MAX_DEV];
6c29256c 109
1da177e4
LT
110#ifdef CONFIG_BLK_DEV_UBD_SYNC
111#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
112 .cl = 1 })
113#else
114#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
115 .cl = 1 })
116#endif
1da177e4
LT
117static struct openflags global_openflags = OPEN_FLAGS;
118
119struct cow {
2a9d32f6 120 /* backing file name */
1da177e4 121 char *file;
2a9d32f6 122 /* backing file fd */
1da177e4
LT
123 int fd;
124 unsigned long *bitmap;
125 unsigned long bitmap_len;
126 int bitmap_offset;
dc764e50 127 int data_offset;
1da177e4
LT
128};
129
a0044bdf
JD
130#define MAX_SG 64
131
1da177e4 132struct ubd {
a0044bdf 133 struct list_head restart;
2a9d32f6
PBG
134 /* name (and fd, below) of the file opened for writing, either the
135 * backing or the cow file. */
1da177e4
LT
136 char *file;
137 int count;
138 int fd;
139 __u64 size;
140 struct openflags boot_openflags;
141 struct openflags openflags;
84e945e3
PBG
142 unsigned shared:1;
143 unsigned no_cow:1;
1da177e4
LT
144 struct cow cow;
145 struct platform_device pdev;
62f96cb0
JD
146 struct request_queue *queue;
147 spinlock_t lock;
a0044bdf
JD
148 struct scatterlist sg[MAX_SG];
149 struct request *request;
150 int start_sg, end_sg;
47526903 151 sector_t rq_pos;
1da177e4
LT
152};
153
154#define DEFAULT_COW { \
155 .file = NULL, \
dc764e50
JD
156 .fd = -1, \
157 .bitmap = NULL, \
1da177e4 158 .bitmap_offset = 0, \
dc764e50 159 .data_offset = 0, \
1da177e4
LT
160}
161
162#define DEFAULT_UBD { \
163 .file = NULL, \
164 .count = 0, \
165 .fd = -1, \
166 .size = -1, \
167 .boot_openflags = OPEN_FLAGS, \
168 .openflags = OPEN_FLAGS, \
dc764e50 169 .no_cow = 0, \
6c29256c 170 .shared = 0, \
dc764e50 171 .cow = DEFAULT_COW, \
22e65004 172 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
a0044bdf
JD
173 .request = NULL, \
174 .start_sg = 0, \
175 .end_sg = 0, \
47526903 176 .rq_pos = 0, \
1da177e4
LT
177}
178
b8831a1d 179/* Protected by ubd_lock */
5dc62b1b 180static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
1da177e4 181
1da177e4
LT
182/* Only changed by fake_ide_setup which is a setup */
183static int fake_ide = 0;
184static struct proc_dir_entry *proc_ide_root = NULL;
185static struct proc_dir_entry *proc_ide = NULL;
186
187static void make_proc_ide(void)
188{
189 proc_ide_root = proc_mkdir("ide", NULL);
190 proc_ide = proc_mkdir("ide0", proc_ide_root);
191}
192
6613c5e8 193static int fake_ide_media_proc_show(struct seq_file *m, void *v)
1da177e4 194{
6613c5e8
AD
195 seq_puts(m, "disk\n");
196 return 0;
197}
198
199static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
200{
201 return single_open(file, fake_ide_media_proc_show, NULL);
1da177e4
LT
202}
203
6613c5e8
AD
204static const struct file_operations fake_ide_media_proc_fops = {
205 .owner = THIS_MODULE,
206 .open = fake_ide_media_proc_open,
207 .read = seq_read,
208 .llseek = seq_lseek,
209 .release = single_release,
210};
211
c0a9290e 212static void make_ide_entries(const char *dev_name)
1da177e4
LT
213{
214 struct proc_dir_entry *dir, *ent;
215 char name[64];
216
217 if(proc_ide_root == NULL) make_proc_ide();
218
219 dir = proc_mkdir(dev_name, proc_ide);
220 if(!dir) return;
221
6613c5e8 222 ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
1da177e4 223 if(!ent) return;
c0a9290e 224 snprintf(name, sizeof(name), "ide0/%s", dev_name);
1da177e4
LT
225 proc_symlink(dev_name, proc_ide_root, name);
226}
227
228static int fake_ide_setup(char *str)
229{
230 fake_ide = 1;
dc764e50 231 return 1;
1da177e4
LT
232}
233
234__setup("fake_ide", fake_ide_setup);
235
236__uml_help(fake_ide_setup,
237"fake_ide\n"
238" Create ide0 entries that map onto ubd devices.\n\n"
239);
240
241static int parse_unit(char **ptr)
242{
243 char *str = *ptr, *end;
244 int n = -1;
245
246 if(isdigit(*str)) {
247 n = simple_strtoul(str, &end, 0);
248 if(end == str)
dc764e50 249 return -1;
1da177e4
LT
250 *ptr = end;
251 }
97d88ac8 252 else if (('a' <= *str) && (*str <= 'z')) {
1da177e4
LT
253 n = *str - 'a';
254 str++;
255 *ptr = str;
256 }
dc764e50 257 return n;
1da177e4
LT
258}
259
d8d7c28e
PBG
260/* If *index_out == -1 at exit, the passed option was a general one;
261 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
262 * should not be freed on exit.
263 */
f28169d2 264static int ubd_setup_common(char *str, int *index_out, char **error_out)
1da177e4 265{
7d314e34 266 struct ubd *ubd_dev;
1da177e4
LT
267 struct openflags flags = global_openflags;
268 char *backing_file;
b8831a1d 269 int n, err = 0, i;
1da177e4
LT
270
271 if(index_out) *index_out = -1;
272 n = *str;
273 if(n == '='){
274 char *end;
275 int major;
276
277 str++;
1da177e4
LT
278 if(!strcmp(str, "sync")){
279 global_openflags = of_sync(global_openflags);
b8831a1d 280 goto out1;
1da177e4 281 }
b8831a1d
JD
282
283 err = -EINVAL;
1da177e4
LT
284 major = simple_strtoul(str, &end, 0);
285 if((*end != '\0') || (end == str)){
f28169d2 286 *error_out = "Didn't parse major number";
b8831a1d 287 goto out1;
1da177e4
LT
288 }
289
f28169d2 290 mutex_lock(&ubd_lock);
792dd4fc 291 if (fake_major != UBD_MAJOR) {
f28169d2
JD
292 *error_out = "Can't assign a fake major twice";
293 goto out1;
294 }
6c29256c 295
f28169d2 296 fake_major = major;
1da177e4
LT
297
298 printk(KERN_INFO "Setting extra ubd major number to %d\n",
299 major);
f28169d2
JD
300 err = 0;
301 out1:
302 mutex_unlock(&ubd_lock);
303 return err;
1da177e4
LT
304 }
305
306 n = parse_unit(&str);
307 if(n < 0){
f28169d2
JD
308 *error_out = "Couldn't parse device number";
309 return -EINVAL;
1da177e4
LT
310 }
311 if(n >= MAX_DEV){
f28169d2
JD
312 *error_out = "Device number out of range";
313 return 1;
1da177e4
LT
314 }
315
f28169d2 316 err = -EBUSY;
d7fb2c38 317 mutex_lock(&ubd_lock);
1da177e4 318
7d314e34
PBG
319 ubd_dev = &ubd_devs[n];
320 if(ubd_dev->file != NULL){
f28169d2 321 *error_out = "Device is already configured";
1da177e4
LT
322 goto out;
323 }
324
325 if (index_out)
326 *index_out = n;
327
f28169d2 328 err = -EINVAL;
6c29256c 329 for (i = 0; i < sizeof("rscd="); i++) {
1da177e4
LT
330 switch (*str) {
331 case 'r':
332 flags.w = 0;
333 break;
334 case 's':
335 flags.s = 1;
336 break;
337 case 'd':
7d314e34 338 ubd_dev->no_cow = 1;
1da177e4 339 break;
6c29256c 340 case 'c':
7d314e34 341 ubd_dev->shared = 1;
6c29256c 342 break;
1da177e4
LT
343 case '=':
344 str++;
345 goto break_loop;
346 default:
f28169d2
JD
347 *error_out = "Expected '=' or flag letter "
348 "(r, s, c, or d)";
1da177e4
LT
349 goto out;
350 }
351 str++;
352 }
353
f28169d2
JD
354 if (*str == '=')
355 *error_out = "Too many flags specified";
356 else
357 *error_out = "Missing '='";
1da177e4
LT
358 goto out;
359
360break_loop:
1da177e4
LT
361 backing_file = strchr(str, ',');
362
f28169d2 363 if (backing_file == NULL)
1da177e4 364 backing_file = strchr(str, ':');
1da177e4 365
f28169d2
JD
366 if(backing_file != NULL){
367 if(ubd_dev->no_cow){
368 *error_out = "Can't specify both 'd' and a cow file";
369 goto out;
370 }
1da177e4
LT
371 else {
372 *backing_file = '\0';
373 backing_file++;
374 }
375 }
f28169d2 376 err = 0;
7d314e34
PBG
377 ubd_dev->file = str;
378 ubd_dev->cow.file = backing_file;
379 ubd_dev->boot_openflags = flags;
1da177e4 380out:
d7fb2c38 381 mutex_unlock(&ubd_lock);
f28169d2 382 return err;
1da177e4
LT
383}
384
385static int ubd_setup(char *str)
386{
f28169d2
JD
387 char *error;
388 int err;
389
390 err = ubd_setup_common(str, NULL, &error);
391 if(err)
392 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
393 "%s\n", str, error);
394 return 1;
1da177e4
LT
395}
396
397__setup("ubd", ubd_setup);
398__uml_help(ubd_setup,
399"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
400" This is used to associate a device with a file in the underlying\n"
401" filesystem. When specifying two filenames, the first one is the\n"
402" COW name and the second is the backing file name. As separator you can\n"
403" use either a ':' or a ',': the first one allows writing things like;\n"
404" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
405" while with a ',' the shell would not expand the 2nd '~'.\n"
f28169d2 406" When using only one filename, UML will detect whether to treat it like\n"
1da177e4
LT
407" a COW file or a backing file. To override this detection, add the 'd'\n"
408" flag:\n"
409" ubd0d=BackingFile\n"
410" Usually, there is a filesystem in the file, but \n"
411" that's not required. Swap devices containing swap files can be\n"
412" specified like this. Also, a file which doesn't contain a\n"
413" filesystem can have its contents read in the virtual \n"
414" machine by running 'dd' on the device. <n> must be in the range\n"
415" 0 to 7. Appending an 'r' to the number will cause that device\n"
416" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
20ede453
JD
417" an 's' will cause data to be written to disk on the host immediately.\n"
418" 'c' will cause the device to be treated as being shared between multiple\n"
419" UMLs and file locking will be turned off - this is appropriate for a\n"
420" cluster filesystem and inappropriate at almost all other times.\n\n"
1da177e4
LT
421);
422
8299ca5c 423static int udb_setup(char *str)
1da177e4
LT
424{
425 printk("udb%s specified on command line is almost certainly a ubd -> "
426 "udb TYPO\n", str);
dc764e50 427 return 1;
1da177e4
LT
428}
429
430__setup("udb", udb_setup);
431__uml_help(udb_setup,
432"udb\n"
0894e27e
JD
433" This option is here solely to catch ubd -> udb typos, which can be\n"
434" to impossible to catch visually unless you specifically look for\n"
435" them. The only result of any option starting with 'udb' is an error\n"
1da177e4
LT
436" in the boot output.\n\n"
437);
438
165125e1 439static void do_ubd_request(struct request_queue * q);
91acb21f
JD
440
441/* Only changed by ubd_init, which is an initcall. */
5dc62b1b 442static int thread_fd = -1;
a0044bdf
JD
443static LIST_HEAD(restart);
444
2fe30a34 445/* XXX - move this inside ubd_intr. */
62f96cb0 446/* Called without dev->lock held, and only in interrupt context. */
91acb21f 447static void ubd_handler(void)
1da177e4 448{
2adcec21 449 struct io_thread_req *req;
a0044bdf
JD
450 struct ubd *ubd;
451 struct list_head *list, *next_ele;
452 unsigned long flags;
91acb21f
JD
453 int n;
454
a0044bdf 455 while(1){
a6ea4cce
JD
456 n = os_read_file(thread_fd, &req,
457 sizeof(struct io_thread_req *));
a0044bdf
JD
458 if(n != sizeof(req)){
459 if(n == -EAGAIN)
460 break;
461 printk(KERN_ERR "spurious interrupt in ubd_handler, "
462 "err = %d\n", -n);
463 return;
464 }
62f96cb0 465
4d6c84d9 466 blk_end_request(req->req, 0, req->length);
2adcec21 467 kfree(req);
a0044bdf 468 }
62f96cb0 469 reactivate_fd(thread_fd, UBD_IRQ);
a0044bdf
JD
470
471 list_for_each_safe(list, next_ele, &restart){
472 ubd = container_of(list, struct ubd, restart);
473 list_del_init(&ubd->restart);
474 spin_lock_irqsave(&ubd->lock, flags);
475 do_ubd_request(ubd->queue);
476 spin_unlock_irqrestore(&ubd->lock, flags);
477 }
1da177e4
LT
478}
479
7bea96fd 480static irqreturn_t ubd_intr(int irq, void *dev)
1da177e4 481{
91acb21f 482 ubd_handler();
dc764e50 483 return IRQ_HANDLED;
91acb21f 484}
09ace81c 485
91acb21f
JD
486/* Only changed by ubd_init, which is an initcall. */
487static int io_pid = -1;
09ace81c 488
5dc62b1b 489static void kill_io_thread(void)
91acb21f 490{
6c29256c 491 if(io_pid != -1)
91acb21f 492 os_kill_process(io_pid, 1);
09ace81c 493}
1da177e4 494
91acb21f
JD
495__uml_exitcall(kill_io_thread);
496
d8d7c28e 497static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
1da177e4
LT
498{
499 char *file;
85356398
RW
500 int fd;
501 int err;
502
503 __u32 version;
504 __u32 align;
505 char *backing_file;
506 time_t mtime;
507 unsigned long long size;
508 int sector_size;
509 int bitmap_offset;
510
511 if (ubd_dev->file && ubd_dev->cow.file) {
512 file = ubd_dev->cow.file;
513
514 goto out;
515 }
516
d4afcba9 517 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
85356398
RW
518 if (fd < 0)
519 return fd;
520
521 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
522 &mtime, &size, &sector_size, &align, &bitmap_offset);
523 os_close_file(fd);
1da177e4 524
85356398
RW
525 if(err == -EINVAL)
526 file = ubd_dev->file;
527 else
528 file = backing_file;
529
530out:
dc764e50 531 return os_file_size(file, size_out);
1da177e4
LT
532}
533
5dc62b1b
WC
534static int read_cow_bitmap(int fd, void *buf, int offset, int len)
535{
536 int err;
537
8c6157b6 538 err = os_pread_file(fd, buf, len, offset);
5dc62b1b
WC
539 if (err < 0)
540 return err;
541
542 return 0;
543}
544
545static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
546{
547 unsigned long modtime;
548 unsigned long long actual;
549 int err;
550
551 err = os_file_modtime(file, &modtime);
552 if (err < 0) {
553 printk(KERN_ERR "Failed to get modification time of backing "
554 "file \"%s\", err = %d\n", file, -err);
555 return err;
556 }
557
558 err = os_file_size(file, &actual);
559 if (err < 0) {
560 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
561 "err = %d\n", file, -err);
562 return err;
563 }
564
565 if (actual != size) {
566 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
567 * the typecast.*/
568 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
569 "vs backing file\n", (unsigned long long) size, actual);
570 return -EINVAL;
571 }
572 if (modtime != mtime) {
573 printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
574 "backing file\n", mtime, modtime);
575 return -EINVAL;
576 }
577 return 0;
578}
579
580static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
581{
582 struct uml_stat buf1, buf2;
583 int err;
584
585 if (from_cmdline == NULL)
586 return 0;
587 if (!strcmp(from_cmdline, from_cow))
588 return 0;
589
590 err = os_stat_file(from_cmdline, &buf1);
591 if (err < 0) {
592 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
593 -err);
594 return 0;
595 }
596 err = os_stat_file(from_cow, &buf2);
597 if (err < 0) {
598 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
599 -err);
600 return 1;
601 }
602 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
603 return 0;
604
605 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
606 "\"%s\" specified in COW header of \"%s\"\n",
607 from_cmdline, from_cow, cow);
608 return 1;
609}
610
611static int open_ubd_file(char *file, struct openflags *openflags, int shared,
612 char **backing_file_out, int *bitmap_offset_out,
613 unsigned long *bitmap_len_out, int *data_offset_out,
614 int *create_cow_out)
615{
616 time_t mtime;
617 unsigned long long size;
618 __u32 version, align;
619 char *backing_file;
620 int fd, err, sectorsize, asked_switch, mode = 0644;
621
622 fd = os_open_file(file, *openflags, mode);
623 if (fd < 0) {
624 if ((fd == -ENOENT) && (create_cow_out != NULL))
625 *create_cow_out = 1;
626 if (!openflags->w ||
627 ((fd != -EROFS) && (fd != -EACCES)))
628 return fd;
629 openflags->w = 0;
630 fd = os_open_file(file, *openflags, mode);
631 if (fd < 0)
632 return fd;
633 }
634
635 if (shared)
636 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
637 else {
638 err = os_lock_file(fd, openflags->w);
639 if (err < 0) {
640 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
641 file, -err);
642 goto out_close;
643 }
644 }
645
646 /* Successful return case! */
647 if (backing_file_out == NULL)
648 return fd;
649
650 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
651 &size, &sectorsize, &align, bitmap_offset_out);
652 if (err && (*backing_file_out != NULL)) {
653 printk(KERN_ERR "Failed to read COW header from COW file "
654 "\"%s\", errno = %d\n", file, -err);
655 goto out_close;
656 }
657 if (err)
658 return fd;
659
660 asked_switch = path_requires_switch(*backing_file_out, backing_file,
661 file);
662
663 /* Allow switching only if no mismatch. */
664 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
665 mtime)) {
666 printk(KERN_ERR "Switching backing file to '%s'\n",
667 *backing_file_out);
668 err = write_cow_header(file, fd, *backing_file_out,
669 sectorsize, align, &size);
670 if (err) {
671 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
672 goto out_close;
673 }
674 } else {
675 *backing_file_out = backing_file;
676 err = backing_file_mismatch(*backing_file_out, size, mtime);
677 if (err)
678 goto out_close;
679 }
680
681 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
682 bitmap_len_out, data_offset_out);
683
684 return fd;
685 out_close:
686 os_close_file(fd);
687 return err;
688}
689
690static int create_cow_file(char *cow_file, char *backing_file,
691 struct openflags flags,
692 int sectorsize, int alignment, int *bitmap_offset_out,
693 unsigned long *bitmap_len_out, int *data_offset_out)
694{
695 int err, fd;
696
697 flags.c = 1;
698 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
699 if (fd < 0) {
700 err = fd;
701 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
702 cow_file, -err);
703 goto out;
704 }
705
706 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
707 bitmap_offset_out, bitmap_len_out,
708 data_offset_out);
709 if (!err)
710 return fd;
711 os_close_file(fd);
712 out:
713 return err;
714}
715
5f75a4f8 716static void ubd_close_dev(struct ubd *ubd_dev)
1da177e4 717{
7d314e34
PBG
718 os_close_file(ubd_dev->fd);
719 if(ubd_dev->cow.file == NULL)
1da177e4
LT
720 return;
721
7d314e34
PBG
722 os_close_file(ubd_dev->cow.fd);
723 vfree(ubd_dev->cow.bitmap);
724 ubd_dev->cow.bitmap = NULL;
1da177e4
LT
725}
726
7d314e34 727static int ubd_open_dev(struct ubd *ubd_dev)
1da177e4
LT
728{
729 struct openflags flags;
730 char **back_ptr;
731 int err, create_cow, *create_ptr;
0bf16bff 732 int fd;
1da177e4 733
7d314e34 734 ubd_dev->openflags = ubd_dev->boot_openflags;
1da177e4 735 create_cow = 0;
7d314e34
PBG
736 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
737 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
0bf16bff
PBG
738
739 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
7d314e34
PBG
740 back_ptr, &ubd_dev->cow.bitmap_offset,
741 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
6c29256c 742 create_ptr);
1da177e4 743
0bf16bff
PBG
744 if((fd == -ENOENT) && create_cow){
745 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
7d314e34
PBG
746 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
747 &ubd_dev->cow.bitmap_offset,
748 &ubd_dev->cow.bitmap_len,
749 &ubd_dev->cow.data_offset);
0bf16bff 750 if(fd >= 0){
1da177e4 751 printk(KERN_INFO "Creating \"%s\" as COW file for "
7d314e34 752 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
1da177e4
LT
753 }
754 }
755
0bf16bff 756 if(fd < 0){
7d314e34 757 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
0bf16bff
PBG
758 -fd);
759 return fd;
1da177e4 760 }
0bf16bff 761 ubd_dev->fd = fd;
1da177e4 762
7d314e34 763 if(ubd_dev->cow.file != NULL){
086fa5ff 764 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
f4768ffd 765
1da177e4 766 err = -ENOMEM;
da2486ba 767 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
7d314e34 768 if(ubd_dev->cow.bitmap == NULL){
1da177e4
LT
769 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
770 goto error;
771 }
772 flush_tlb_kernel_vm();
773
7d314e34
PBG
774 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
775 ubd_dev->cow.bitmap_offset,
776 ubd_dev->cow.bitmap_len);
1da177e4
LT
777 if(err < 0)
778 goto error;
779
7d314e34 780 flags = ubd_dev->openflags;
1da177e4 781 flags.w = 0;
7d314e34 782 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
6c29256c 783 NULL, NULL, NULL, NULL);
1da177e4 784 if(err < 0) goto error;
7d314e34 785 ubd_dev->cow.fd = err;
1da177e4 786 }
dc764e50 787 return 0;
1da177e4 788 error:
7d314e34 789 os_close_file(ubd_dev->fd);
dc764e50 790 return err;
1da177e4
LT
791}
792
2e3f5251
JD
793static void ubd_device_release(struct device *dev)
794{
8691b97b 795 struct ubd *ubd_dev = dev_get_drvdata(dev);
2e3f5251
JD
796
797 blk_cleanup_queue(ubd_dev->queue);
798 *ubd_dev = ((struct ubd) DEFAULT_UBD);
799}
800
5f75a4f8 801static int ubd_disk_register(int major, u64 size, int unit,
b8831a1d 802 struct gendisk **disk_out)
1da177e4 803{
d72a5783 804 struct device *parent = NULL;
1da177e4 805 struct gendisk *disk;
1da177e4
LT
806
807 disk = alloc_disk(1 << UBD_SHIFT);
808 if(disk == NULL)
dc764e50 809 return -ENOMEM;
1da177e4
LT
810
811 disk->major = major;
812 disk->first_minor = unit << UBD_SHIFT;
813 disk->fops = &ubd_blops;
814 set_capacity(disk, size / 512);
792dd4fc 815 if (major == UBD_MAJOR)
1da177e4 816 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
ce7b0f46 817 else
1da177e4 818 sprintf(disk->disk_name, "ubd_fake%d", unit);
1da177e4
LT
819
820 /* sysfs register (not for ide fake devices) */
792dd4fc 821 if (major == UBD_MAJOR) {
7d314e34
PBG
822 ubd_devs[unit].pdev.id = unit;
823 ubd_devs[unit].pdev.name = DRIVER_NAME;
2e3f5251 824 ubd_devs[unit].pdev.dev.release = ubd_device_release;
8691b97b 825 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
7d314e34 826 platform_device_register(&ubd_devs[unit].pdev);
d72a5783 827 parent = &ubd_devs[unit].pdev.dev;
1da177e4
LT
828 }
829
7d314e34 830 disk->private_data = &ubd_devs[unit];
62f96cb0 831 disk->queue = ubd_devs[unit].queue;
d72a5783 832 device_add_disk(parent, disk);
1da177e4
LT
833
834 *disk_out = disk;
835 return 0;
836}
837
838#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
839
f28169d2 840static int ubd_add(int n, char **error_out)
1da177e4 841{
7d314e34 842 struct ubd *ubd_dev = &ubd_devs[n];
f28169d2 843 int err = 0;
1da177e4 844
7d314e34 845 if(ubd_dev->file == NULL)
ec7cf783 846 goto out;
1da177e4 847
7d314e34 848 err = ubd_file_size(ubd_dev, &ubd_dev->size);
f28169d2
JD
849 if(err < 0){
850 *error_out = "Couldn't determine size of device's file";
80c13749 851 goto out;
f28169d2 852 }
1da177e4 853
7d314e34 854 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
1da177e4 855
a0044bdf 856 INIT_LIST_HEAD(&ubd_dev->restart);
4f40c055 857 sg_init_table(ubd_dev->sg, MAX_SG);
a0044bdf 858
62f96cb0
JD
859 err = -ENOMEM;
860 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
861 if (ubd_dev->queue == NULL) {
862 *error_out = "Failed to initialize device queue";
80c13749 863 goto out;
62f96cb0
JD
864 }
865 ubd_dev->queue->queuedata = ubd_dev;
f935a8ce 866 blk_queue_write_cache(ubd_dev->queue, true, false);
62f96cb0 867
8a78362c 868 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
792dd4fc 869 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
62f96cb0
JD
870 if(err){
871 *error_out = "Failed to register device";
872 goto out_cleanup;
873 }
6c29256c 874
792dd4fc 875 if (fake_major != UBD_MAJOR)
5f75a4f8 876 ubd_disk_register(fake_major, ubd_dev->size, n,
62f96cb0 877 &fake_gendisk[n]);
1da177e4 878
83380cc1
JD
879 /*
880 * Perhaps this should also be under the "if (fake_major)" above
881 * using the fake_disk->disk_name
882 */
1da177e4
LT
883 if (fake_ide)
884 make_ide_entries(ubd_gendisk[n]->disk_name);
885
ec7cf783 886 err = 0;
ec7cf783
JD
887out:
888 return err;
62f96cb0
JD
889
890out_cleanup:
891 blk_cleanup_queue(ubd_dev->queue);
892 goto out;
1da177e4
LT
893}
894
f28169d2 895static int ubd_config(char *str, char **error_out)
1da177e4 896{
e7f6552f 897 int n, ret;
1da177e4 898
f28169d2
JD
899 /* This string is possibly broken up and stored, so it's only
900 * freed if ubd_setup_common fails, or if only general options
901 * were set.
902 */
970d6e3a 903 str = kstrdup(str, GFP_KERNEL);
e7f6552f 904 if (str == NULL) {
f28169d2
JD
905 *error_out = "Failed to allocate memory";
906 return -ENOMEM;
1da177e4 907 }
f28169d2
JD
908
909 ret = ubd_setup_common(str, &n, error_out);
910 if (ret)
e7f6552f 911 goto err_free;
f28169d2 912
e7f6552f
PBG
913 if (n == -1) {
914 ret = 0;
d8d7c28e 915 goto err_free;
1da177e4 916 }
1da177e4 917
dc764e50 918 mutex_lock(&ubd_lock);
f28169d2 919 ret = ubd_add(n, error_out);
e7f6552f 920 if (ret)
7d314e34 921 ubd_devs[n].file = NULL;
dc764e50 922 mutex_unlock(&ubd_lock);
1da177e4 923
e7f6552f 924out:
dc764e50 925 return ret;
e7f6552f
PBG
926
927err_free:
928 kfree(str);
929 goto out;
1da177e4
LT
930}
931
932static int ubd_get_config(char *name, char *str, int size, char **error_out)
933{
7d314e34 934 struct ubd *ubd_dev;
1da177e4
LT
935 int n, len = 0;
936
937 n = parse_unit(&name);
938 if((n >= MAX_DEV) || (n < 0)){
939 *error_out = "ubd_get_config : device number out of range";
dc764e50 940 return -1;
1da177e4
LT
941 }
942
7d314e34 943 ubd_dev = &ubd_devs[n];
d7fb2c38 944 mutex_lock(&ubd_lock);
1da177e4 945
7d314e34 946 if(ubd_dev->file == NULL){
1da177e4
LT
947 CONFIG_CHUNK(str, size, len, "", 1);
948 goto out;
949 }
950
7d314e34 951 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1da177e4 952
7d314e34 953 if(ubd_dev->cow.file != NULL){
1da177e4 954 CONFIG_CHUNK(str, size, len, ",", 0);
7d314e34 955 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1da177e4
LT
956 }
957 else CONFIG_CHUNK(str, size, len, "", 1);
958
959 out:
d7fb2c38 960 mutex_unlock(&ubd_lock);
dc764e50 961 return len;
1da177e4
LT
962}
963
29d56cfe
JD
964static int ubd_id(char **str, int *start_out, int *end_out)
965{
dc764e50 966 int n;
29d56cfe
JD
967
968 n = parse_unit(str);
dc764e50
JD
969 *start_out = 0;
970 *end_out = MAX_DEV - 1;
971 return n;
29d56cfe
JD
972}
973
f28169d2 974static int ubd_remove(int n, char **error_out)
1da177e4 975{
2e3f5251 976 struct gendisk *disk = ubd_gendisk[n];
7d314e34 977 struct ubd *ubd_dev;
29d56cfe 978 int err = -ENODEV;
1da177e4 979
d7fb2c38 980 mutex_lock(&ubd_lock);
1da177e4 981
7d314e34 982 ubd_dev = &ubd_devs[n];
1da177e4 983
7d314e34 984 if(ubd_dev->file == NULL)
29d56cfe 985 goto out;
1da177e4 986
29d56cfe
JD
987 /* you cannot remove a open disk */
988 err = -EBUSY;
7d314e34 989 if(ubd_dev->count > 0)
1da177e4
LT
990 goto out;
991
dc764e50 992 ubd_gendisk[n] = NULL;
b47d2deb
JD
993 if(disk != NULL){
994 del_gendisk(disk);
995 put_disk(disk);
996 }
1da177e4
LT
997
998 if(fake_gendisk[n] != NULL){
999 del_gendisk(fake_gendisk[n]);
1000 put_disk(fake_gendisk[n]);
1001 fake_gendisk[n] = NULL;
1002 }
1003
1da177e4 1004 err = 0;
2e3f5251 1005 platform_device_unregister(&ubd_dev->pdev);
29d56cfe 1006out:
d7fb2c38 1007 mutex_unlock(&ubd_lock);
29d56cfe 1008 return err;
1da177e4
LT
1009}
1010
f28169d2 1011/* All these are called by mconsole in process context and without
b8831a1d 1012 * ubd-specific locks. The structure itself is const except for .list.
f28169d2 1013 */
1da177e4 1014static struct mc_device ubd_mc = {
84f48d4f 1015 .list = LIST_HEAD_INIT(ubd_mc.list),
1da177e4
LT
1016 .name = "ubd",
1017 .config = ubd_config,
dc764e50 1018 .get_config = ubd_get_config,
29d56cfe 1019 .id = ubd_id,
1da177e4
LT
1020 .remove = ubd_remove,
1021};
1022
d8d7c28e 1023static int __init ubd_mc_init(void)
1da177e4
LT
1024{
1025 mconsole_register_dev(&ubd_mc);
1026 return 0;
1027}
1028
1029__initcall(ubd_mc_init);
1030
d8d7c28e
PBG
1031static int __init ubd0_init(void)
1032{
1033 struct ubd *ubd_dev = &ubd_devs[0];
1034
b8831a1d 1035 mutex_lock(&ubd_lock);
d8d7c28e
PBG
1036 if(ubd_dev->file == NULL)
1037 ubd_dev->file = "root_fs";
b8831a1d
JD
1038 mutex_unlock(&ubd_lock);
1039
dc764e50 1040 return 0;
d8d7c28e
PBG
1041}
1042
1043__initcall(ubd0_init);
1044
b8831a1d 1045/* Used in ubd_init, which is an initcall */
3ae5eaec
RK
1046static struct platform_driver ubd_driver = {
1047 .driver = {
1048 .name = DRIVER_NAME,
1049 },
1da177e4
LT
1050};
1051
d8d7c28e 1052static int __init ubd_init(void)
1da177e4 1053{
f28169d2
JD
1054 char *error;
1055 int i, err;
1da177e4 1056
792dd4fc 1057 if (register_blkdev(UBD_MAJOR, "ubd"))
1da177e4
LT
1058 return -1;
1059
792dd4fc 1060 if (fake_major != UBD_MAJOR) {
1da177e4
LT
1061 char name[sizeof("ubd_nnn\0")];
1062
1063 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1da177e4
LT
1064 if (register_blkdev(fake_major, "ubd"))
1065 return -1;
1066 }
3ae5eaec 1067 platform_driver_register(&ubd_driver);
dc764e50 1068 mutex_lock(&ubd_lock);
f28169d2
JD
1069 for (i = 0; i < MAX_DEV; i++){
1070 err = ubd_add(i, &error);
1071 if(err)
1072 printk(KERN_ERR "Failed to initialize ubd device %d :"
1073 "%s\n", i, error);
1074 }
dc764e50 1075 mutex_unlock(&ubd_lock);
1da177e4
LT
1076 return 0;
1077}
1078
1079late_initcall(ubd_init);
1080
d8d7c28e 1081static int __init ubd_driver_init(void){
91acb21f
JD
1082 unsigned long stack;
1083 int err;
1084
1085 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1086 if(global_openflags.s){
1087 printk(KERN_INFO "ubd: Synchronous mode\n");
1088 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1089 * enough. So use anyway the io thread. */
1090 }
1091 stack = alloc_stack(0, 0);
6c29256c 1092 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
91acb21f
JD
1093 &thread_fd);
1094 if(io_pid < 0){
6c29256c 1095 printk(KERN_ERR
91acb21f
JD
1096 "ubd : Failed to start I/O thread (errno = %d) - "
1097 "falling back to synchronous I/O\n", -io_pid);
1098 io_pid = -1;
dc764e50 1099 return 0;
91acb21f 1100 }
6c29256c 1101 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
c0b79a90 1102 0, "ubd", ubd_devs);
91acb21f
JD
1103 if(err != 0)
1104 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
f4c57a78 1105 return 0;
91acb21f
JD
1106}
1107
1108device_initcall(ubd_driver_init);
1109
a625c998 1110static int ubd_open(struct block_device *bdev, fmode_t mode)
1da177e4 1111{
a625c998 1112 struct gendisk *disk = bdev->bd_disk;
7d314e34 1113 struct ubd *ubd_dev = disk->private_data;
1da177e4
LT
1114 int err = 0;
1115
9a181c58 1116 mutex_lock(&ubd_mutex);
7d314e34
PBG
1117 if(ubd_dev->count == 0){
1118 err = ubd_open_dev(ubd_dev);
1da177e4
LT
1119 if(err){
1120 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
7d314e34 1121 disk->disk_name, ubd_dev->file, -err);
1da177e4
LT
1122 goto out;
1123 }
1124 }
7d314e34
PBG
1125 ubd_dev->count++;
1126 set_disk_ro(disk, !ubd_dev->openflags.w);
2c49be99
PBG
1127
1128 /* This should no more be needed. And it didn't work anyway to exclude
1129 * read-write remounting of filesystems.*/
a625c998 1130 /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
5f75a4f8 1131 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1da177e4 1132 err = -EROFS;
2c49be99 1133 }*/
6e9624b8 1134out:
9a181c58 1135 mutex_unlock(&ubd_mutex);
dc764e50 1136 return err;
1da177e4
LT
1137}
1138
db2a144b 1139static void ubd_release(struct gendisk *disk, fmode_t mode)
1da177e4 1140{
7d314e34 1141 struct ubd *ubd_dev = disk->private_data;
1da177e4 1142
9a181c58 1143 mutex_lock(&ubd_mutex);
7d314e34 1144 if(--ubd_dev->count == 0)
5f75a4f8 1145 ubd_close_dev(ubd_dev);
9a181c58 1146 mutex_unlock(&ubd_mutex);
1da177e4
LT
1147}
1148
91acb21f
JD
1149static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1150 __u64 *cow_offset, unsigned long *bitmap,
1151 __u64 bitmap_offset, unsigned long *bitmap_words,
1152 __u64 bitmap_len)
1da177e4 1153{
91acb21f
JD
1154 __u64 sector = io_offset >> 9;
1155 int i, update_bitmap = 0;
1156
1157 for(i = 0; i < length >> 9; i++){
1158 if(cow_mask != NULL)
1159 ubd_set_bit(i, (unsigned char *) cow_mask);
1160 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1161 continue;
1da177e4 1162
91acb21f
JD
1163 update_bitmap = 1;
1164 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1165 }
1166
1167 if(!update_bitmap)
1168 return;
1da177e4 1169
91acb21f 1170 *cow_offset = sector / (sizeof(unsigned long) * 8);
1da177e4 1171
91acb21f
JD
1172 /* This takes care of the case where we're exactly at the end of the
1173 * device, and *cow_offset + 1 is off the end. So, just back it up
1174 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1175 * for the original diagnosis.
1176 */
6d074242
JO
1177 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1178 sizeof(unsigned long)) - 1))
91acb21f
JD
1179 (*cow_offset)--;
1180
1181 bitmap_words[0] = bitmap[*cow_offset];
1182 bitmap_words[1] = bitmap[*cow_offset + 1];
1183
1184 *cow_offset *= sizeof(unsigned long);
1185 *cow_offset += bitmap_offset;
1186}
1187
1188static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1189 __u64 bitmap_offset, __u64 bitmap_len)
1190{
1191 __u64 sector = req->offset >> 9;
1192 int i;
1193
1194 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1195 panic("Operation too long");
1196
1197 if(req->op == UBD_READ) {
1198 for(i = 0; i < req->length >> 9; i++){
1199 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
6c29256c 1200 ubd_set_bit(i, (unsigned char *)
91acb21f 1201 &req->sector_mask);
dc764e50 1202 }
91acb21f
JD
1203 }
1204 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1205 &req->cow_offset, bitmap, bitmap_offset,
1206 req->bitmap_words, bitmap_len);
1da177e4
LT
1207}
1208
62f96cb0 1209/* Called with dev->lock held */
a0044bdf
JD
1210static void prepare_request(struct request *req, struct io_thread_req *io_req,
1211 unsigned long long offset, int page_offset,
1212 int len, struct page *page)
1da177e4
LT
1213{
1214 struct gendisk *disk = req->rq_disk;
7d314e34 1215 struct ubd *ubd_dev = disk->private_data;
91acb21f 1216
62f96cb0 1217 io_req->req = req;
a0044bdf
JD
1218 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1219 ubd_dev->fd;
7d314e34 1220 io_req->fds[1] = ubd_dev->fd;
91acb21f 1221 io_req->cow_offset = -1;
1da177e4
LT
1222 io_req->offset = offset;
1223 io_req->length = len;
1224 io_req->error = 0;
91acb21f
JD
1225 io_req->sector_mask = 0;
1226
1227 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1da177e4 1228 io_req->offsets[0] = 0;
7d314e34 1229 io_req->offsets[1] = ubd_dev->cow.data_offset;
a0044bdf 1230 io_req->buffer = page_address(page) + page_offset;
1da177e4
LT
1231 io_req->sectorsize = 1 << 9;
1232
7d314e34 1233 if(ubd_dev->cow.file != NULL)
a0044bdf
JD
1234 cowify_req(io_req, ubd_dev->cow.bitmap,
1235 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
91acb21f 1236
1da177e4
LT
1237}
1238
805f11a0
RW
1239/* Called with dev->lock held */
1240static void prepare_flush_request(struct request *req,
1241 struct io_thread_req *io_req)
1242{
1243 struct gendisk *disk = req->rq_disk;
1244 struct ubd *ubd_dev = disk->private_data;
1245
1246 io_req->req = req;
1247 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1248 ubd_dev->fd;
1249 io_req->op = UBD_FLUSH;
1250}
1251
bc1d72e7
RW
1252static bool submit_request(struct io_thread_req *io_req, struct ubd *dev)
1253{
1254 int n = os_write_file(thread_fd, &io_req,
1255 sizeof(io_req));
1256 if (n != sizeof(io_req)) {
1257 if (n != -EAGAIN)
1258 printk("write to io thread failed, "
1259 "errno = %d\n", -n);
1260 else if (list_empty(&dev->restart))
1261 list_add(&dev->restart, &restart);
1262
1263 kfree(io_req);
1264 return false;
1265 }
1266 return true;
1267}
1268
62f96cb0 1269/* Called with dev->lock held */
165125e1 1270static void do_ubd_request(struct request_queue *q)
1da177e4 1271{
2adcec21 1272 struct io_thread_req *io_req;
1da177e4 1273 struct request *req;
a0044bdf
JD
1274
1275 while(1){
2a9529a0 1276 struct ubd *dev = q->queuedata;
2a236122 1277 if(dev->request == NULL){
9934c8c0 1278 struct request *req = blk_fetch_request(q);
a0044bdf
JD
1279 if(req == NULL)
1280 return;
1281
1282 dev->request = req;
47526903 1283 dev->rq_pos = blk_rq_pos(req);
a0044bdf
JD
1284 dev->start_sg = 0;
1285 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1286 }
1287
1288 req = dev->request;
805f11a0 1289
3a5e02ce 1290 if (req_op(req) == REQ_OP_FLUSH) {
805f11a0
RW
1291 io_req = kmalloc(sizeof(struct io_thread_req),
1292 GFP_ATOMIC);
1293 if (io_req == NULL) {
1294 if (list_empty(&dev->restart))
1295 list_add(&dev->restart, &restart);
1296 return;
1297 }
1298 prepare_flush_request(req, io_req);
2a236122
TK
1299 if (submit_request(io_req, dev) == false)
1300 return;
805f11a0
RW
1301 }
1302
a0044bdf
JD
1303 while(dev->start_sg < dev->end_sg){
1304 struct scatterlist *sg = &dev->sg[dev->start_sg];
1305
2adcec21 1306 io_req = kmalloc(sizeof(struct io_thread_req),
990c5587 1307 GFP_ATOMIC);
2adcec21
JD
1308 if(io_req == NULL){
1309 if(list_empty(&dev->restart))
1310 list_add(&dev->restart, &restart);
1311 return;
1312 }
1313 prepare_request(req, io_req,
47526903 1314 (unsigned long long)dev->rq_pos << 9,
45711f1a 1315 sg->offset, sg->length, sg_page(sg));
a0044bdf 1316
bc1d72e7 1317 if (submit_request(io_req, dev) == false)
a0044bdf 1318 return;
a0044bdf 1319
47526903 1320 dev->rq_pos += sg->length >> 9;
a0044bdf 1321 dev->start_sg++;
1da177e4 1322 }
a0044bdf
JD
1323 dev->end_sg = 0;
1324 dev->request = NULL;
1da177e4
LT
1325 }
1326}
1327
a885c8c4
CH
1328static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1329{
7d314e34 1330 struct ubd *ubd_dev = bdev->bd_disk->private_data;
a885c8c4
CH
1331
1332 geo->heads = 128;
1333 geo->sectors = 32;
7d314e34 1334 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
a885c8c4
CH
1335 return 0;
1336}
1337
a625c998 1338static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1da177e4
LT
1339 unsigned int cmd, unsigned long arg)
1340{
a625c998 1341 struct ubd *ubd_dev = bdev->bd_disk->private_data;
73855e13 1342 u16 ubd_id[ATA_ID_WORDS];
1da177e4
LT
1343
1344 switch (cmd) {
1da177e4 1345 struct cdrom_volctrl volume;
1da177e4 1346 case HDIO_GET_IDENTITY:
73855e13
BZ
1347 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1348 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1349 ubd_id[ATA_ID_HEADS] = 128;
1350 ubd_id[ATA_ID_SECTORS] = 32;
1da177e4
LT
1351 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1352 sizeof(ubd_id)))
dc764e50
JD
1353 return -EFAULT;
1354 return 0;
b8831a1d 1355
1da177e4
LT
1356 case CDROMVOLREAD:
1357 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
dc764e50 1358 return -EFAULT;
1da177e4
LT
1359 volume.channel0 = 255;
1360 volume.channel1 = 255;
1361 volume.channel2 = 255;
1362 volume.channel3 = 255;
1363 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
dc764e50
JD
1364 return -EFAULT;
1365 return 0;
1da177e4 1366 }
dc764e50 1367 return -EINVAL;
1da177e4
LT
1368}
1369
91acb21f 1370static int update_bitmap(struct io_thread_req *req)
1da177e4 1371{
91acb21f 1372 int n;
1da177e4 1373
91acb21f 1374 if(req->cow_offset == -1)
dc764e50 1375 return 0;
1da177e4 1376
8c6157b6
AI
1377 n = os_pwrite_file(req->fds[1], &req->bitmap_words,
1378 sizeof(req->bitmap_words), req->cow_offset);
91acb21f
JD
1379 if(n != sizeof(req->bitmap_words)){
1380 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1381 req->fds[1]);
dc764e50 1382 return 1;
91acb21f 1383 }
1da177e4 1384
dc764e50 1385 return 0;
91acb21f 1386}
1da177e4 1387
5dc62b1b 1388static void do_io(struct io_thread_req *req)
91acb21f
JD
1389{
1390 char *buf;
1391 unsigned long len;
1392 int n, nsectors, start, end, bit;
91acb21f
JD
1393 __u64 off;
1394
805f11a0
RW
1395 if (req->op == UBD_FLUSH) {
1396 /* fds[0] is always either the rw image or our cow file */
1397 n = os_sync_file(req->fds[0]);
1398 if (n != 0) {
1399 printk("do_io - sync failed err = %d "
1400 "fd = %d\n", -n, req->fds[0]);
1401 req->error = 1;
1402 }
1403 return;
1404 }
1405
91acb21f
JD
1406 nsectors = req->length / req->sectorsize;
1407 start = 0;
1408 do {
1409 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1410 end = start;
1411 while((end < nsectors) &&
1412 (ubd_test_bit(end, (unsigned char *)
1413 &req->sector_mask) == bit))
1414 end++;
1415
1416 off = req->offset + req->offsets[bit] +
1417 start * req->sectorsize;
1418 len = (end - start) * req->sectorsize;
1419 buf = &req->buffer[start * req->sectorsize];
1420
91acb21f
JD
1421 if(req->op == UBD_READ){
1422 n = 0;
1423 do {
1424 buf = &buf[n];
1425 len -= n;
8c6157b6 1426 n = os_pread_file(req->fds[bit], buf, len, off);
91acb21f
JD
1427 if (n < 0) {
1428 printk("do_io - read failed, err = %d "
1429 "fd = %d\n", -n, req->fds[bit]);
1430 req->error = 1;
1431 return;
1432 }
1433 } while((n < len) && (n != 0));
1434 if (n < len) memset(&buf[n], 0, len - n);
1435 } else {
8c6157b6 1436 n = os_pwrite_file(req->fds[bit], buf, len, off);
91acb21f
JD
1437 if(n != len){
1438 printk("do_io - write failed err = %d "
1439 "fd = %d\n", -n, req->fds[bit]);
1440 req->error = 1;
1441 return;
1442 }
1443 }
1444
1445 start = end;
1446 } while(start < nsectors);
1da177e4 1447
91acb21f 1448 req->error = update_bitmap(req);
1da177e4 1449}
91acb21f
JD
1450
1451/* Changed in start_io_thread, which is serialized by being called only
1452 * from ubd_init, which is an initcall.
1453 */
1454int kernel_fd = -1;
1455
d8d7c28e
PBG
1456/* Only changed by the io thread. XXX: currently unused. */
1457static int io_count = 0;
91acb21f
JD
1458
1459int io_thread(void *arg)
1460{
2adcec21 1461 struct io_thread_req *req;
91acb21f
JD
1462 int n;
1463
91d44ff8
RW
1464 os_fix_helper_signals();
1465
91acb21f 1466 while(1){
a6ea4cce 1467 n = os_read_file(kernel_fd, &req,
2adcec21
JD
1468 sizeof(struct io_thread_req *));
1469 if(n != sizeof(struct io_thread_req *)){
91acb21f
JD
1470 if(n < 0)
1471 printk("io_thread - read failed, fd = %d, "
1472 "err = %d\n", kernel_fd, -n);
1473 else {
1474 printk("io_thread - short read, fd = %d, "
1475 "length = %d\n", kernel_fd, n);
1476 }
1477 continue;
1478 }
1479 io_count++;
2adcec21 1480 do_io(req);
a6ea4cce 1481 n = os_write_file(kernel_fd, &req,
2adcec21
JD
1482 sizeof(struct io_thread_req *));
1483 if(n != sizeof(struct io_thread_req *))
91acb21f
JD
1484 printk("io_thread - write failed, fd = %d, err = %d\n",
1485 kernel_fd, -n);
1486 }
91acb21f 1487
1b57e9c2
JD
1488 return 0;
1489}