]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - arch/um/drivers/ubd_kern.c
block, drivers: add REQ_OP_FLUSH operation
[mirror_ubuntu-hirsute-kernel.git] / arch / um / drivers / ubd_kern.c
CommitLineData
6c29256c 1/*
1da177e4
LT
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
1da177e4
LT
20#define UBD_SHIFT 4
21
8ea3c06a
AV
22#include <linux/module.h>
23#include <linux/init.h>
24#include <linux/blkdev.h>
25#include <linux/ata.h>
26#include <linux/hdreg.h>
27#include <linux/cdrom.h>
28#include <linux/proc_fs.h>
29#include <linux/seq_file.h>
30#include <linux/ctype.h>
31#include <linux/slab.h>
32#include <linux/vmalloc.h>
33#include <linux/platform_device.h>
34#include <linux/scatterlist.h>
35#include <asm/tlbflush.h>
37185b33 36#include <kern_util.h>
1da177e4 37#include "mconsole_kern.h"
37185b33
AV
38#include <init.h>
39#include <irq_kern.h>
8ea3c06a 40#include "ubd.h"
37185b33 41#include <os.h>
1da177e4
LT
42#include "cow.h"
43
805f11a0 44enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
1da177e4
LT
45
46struct io_thread_req {
62f96cb0 47 struct request *req;
91acb21f 48 enum ubd_req op;
1da177e4
LT
49 int fds[2];
50 unsigned long offsets[2];
51 unsigned long long offset;
52 unsigned long length;
53 char *buffer;
54 int sectorsize;
91acb21f
JD
55 unsigned long sector_mask;
56 unsigned long long cow_offset;
57 unsigned long bitmap_words[2];
1da177e4
LT
58 int error;
59};
60
91acb21f 61static inline int ubd_test_bit(__u64 bit, unsigned char *data)
1da177e4
LT
62{
63 __u64 n;
64 int bits, off;
65
91acb21f 66 bits = sizeof(data[0]) * 8;
1da177e4
LT
67 n = bit / bits;
68 off = bit % bits;
dc764e50 69 return (data[n] & (1 << off)) != 0;
1da177e4
LT
70}
71
91acb21f 72static inline void ubd_set_bit(__u64 bit, unsigned char *data)
1da177e4
LT
73{
74 __u64 n;
75 int bits, off;
76
91acb21f 77 bits = sizeof(data[0]) * 8;
1da177e4
LT
78 n = bit / bits;
79 off = bit % bits;
91acb21f 80 data[n] |= (1 << off);
1da177e4
LT
81}
82/*End stuff from ubd_user.h*/
83
84#define DRIVER_NAME "uml-blkdev"
85
d7fb2c38 86static DEFINE_MUTEX(ubd_lock);
9a181c58 87static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
1da177e4 88
a625c998 89static int ubd_open(struct block_device *bdev, fmode_t mode);
db2a144b 90static void ubd_release(struct gendisk *disk, fmode_t mode);
a625c998 91static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1da177e4 92 unsigned int cmd, unsigned long arg);
a885c8c4 93static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
1da177e4 94
97d88ac8 95#define MAX_DEV (16)
1da177e4 96
83d5cde4 97static const struct block_device_operations ubd_blops = {
1da177e4 98 .owner = THIS_MODULE,
a625c998
AV
99 .open = ubd_open,
100 .release = ubd_release,
101 .ioctl = ubd_ioctl,
a885c8c4 102 .getgeo = ubd_getgeo,
1da177e4
LT
103};
104
1da177e4 105/* Protected by ubd_lock */
792dd4fc 106static int fake_major = UBD_MAJOR;
1da177e4
LT
107static struct gendisk *ubd_gendisk[MAX_DEV];
108static struct gendisk *fake_gendisk[MAX_DEV];
6c29256c 109
1da177e4
LT
110#ifdef CONFIG_BLK_DEV_UBD_SYNC
111#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
112 .cl = 1 })
113#else
114#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
115 .cl = 1 })
116#endif
1da177e4
LT
117static struct openflags global_openflags = OPEN_FLAGS;
118
119struct cow {
2a9d32f6 120 /* backing file name */
1da177e4 121 char *file;
2a9d32f6 122 /* backing file fd */
1da177e4
LT
123 int fd;
124 unsigned long *bitmap;
125 unsigned long bitmap_len;
126 int bitmap_offset;
dc764e50 127 int data_offset;
1da177e4
LT
128};
129
a0044bdf
JD
130#define MAX_SG 64
131
1da177e4 132struct ubd {
a0044bdf 133 struct list_head restart;
2a9d32f6
PBG
134 /* name (and fd, below) of the file opened for writing, either the
135 * backing or the cow file. */
1da177e4
LT
136 char *file;
137 int count;
138 int fd;
139 __u64 size;
140 struct openflags boot_openflags;
141 struct openflags openflags;
84e945e3
PBG
142 unsigned shared:1;
143 unsigned no_cow:1;
1da177e4
LT
144 struct cow cow;
145 struct platform_device pdev;
62f96cb0
JD
146 struct request_queue *queue;
147 spinlock_t lock;
a0044bdf
JD
148 struct scatterlist sg[MAX_SG];
149 struct request *request;
150 int start_sg, end_sg;
47526903 151 sector_t rq_pos;
1da177e4
LT
152};
153
154#define DEFAULT_COW { \
155 .file = NULL, \
dc764e50
JD
156 .fd = -1, \
157 .bitmap = NULL, \
1da177e4 158 .bitmap_offset = 0, \
dc764e50 159 .data_offset = 0, \
1da177e4
LT
160}
161
162#define DEFAULT_UBD { \
163 .file = NULL, \
164 .count = 0, \
165 .fd = -1, \
166 .size = -1, \
167 .boot_openflags = OPEN_FLAGS, \
168 .openflags = OPEN_FLAGS, \
dc764e50 169 .no_cow = 0, \
6c29256c 170 .shared = 0, \
dc764e50 171 .cow = DEFAULT_COW, \
22e65004 172 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
a0044bdf
JD
173 .request = NULL, \
174 .start_sg = 0, \
175 .end_sg = 0, \
47526903 176 .rq_pos = 0, \
1da177e4
LT
177}
178
b8831a1d 179/* Protected by ubd_lock */
5dc62b1b 180static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
1da177e4 181
1da177e4
LT
182/* Only changed by fake_ide_setup which is a setup */
183static int fake_ide = 0;
184static struct proc_dir_entry *proc_ide_root = NULL;
185static struct proc_dir_entry *proc_ide = NULL;
186
187static void make_proc_ide(void)
188{
189 proc_ide_root = proc_mkdir("ide", NULL);
190 proc_ide = proc_mkdir("ide0", proc_ide_root);
191}
192
6613c5e8 193static int fake_ide_media_proc_show(struct seq_file *m, void *v)
1da177e4 194{
6613c5e8
AD
195 seq_puts(m, "disk\n");
196 return 0;
197}
198
199static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
200{
201 return single_open(file, fake_ide_media_proc_show, NULL);
1da177e4
LT
202}
203
6613c5e8
AD
204static const struct file_operations fake_ide_media_proc_fops = {
205 .owner = THIS_MODULE,
206 .open = fake_ide_media_proc_open,
207 .read = seq_read,
208 .llseek = seq_lseek,
209 .release = single_release,
210};
211
c0a9290e 212static void make_ide_entries(const char *dev_name)
1da177e4
LT
213{
214 struct proc_dir_entry *dir, *ent;
215 char name[64];
216
217 if(proc_ide_root == NULL) make_proc_ide();
218
219 dir = proc_mkdir(dev_name, proc_ide);
220 if(!dir) return;
221
6613c5e8 222 ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
1da177e4 223 if(!ent) return;
c0a9290e 224 snprintf(name, sizeof(name), "ide0/%s", dev_name);
1da177e4
LT
225 proc_symlink(dev_name, proc_ide_root, name);
226}
227
228static int fake_ide_setup(char *str)
229{
230 fake_ide = 1;
dc764e50 231 return 1;
1da177e4
LT
232}
233
234__setup("fake_ide", fake_ide_setup);
235
236__uml_help(fake_ide_setup,
237"fake_ide\n"
238" Create ide0 entries that map onto ubd devices.\n\n"
239);
240
241static int parse_unit(char **ptr)
242{
243 char *str = *ptr, *end;
244 int n = -1;
245
246 if(isdigit(*str)) {
247 n = simple_strtoul(str, &end, 0);
248 if(end == str)
dc764e50 249 return -1;
1da177e4
LT
250 *ptr = end;
251 }
97d88ac8 252 else if (('a' <= *str) && (*str <= 'z')) {
1da177e4
LT
253 n = *str - 'a';
254 str++;
255 *ptr = str;
256 }
dc764e50 257 return n;
1da177e4
LT
258}
259
d8d7c28e
PBG
260/* If *index_out == -1 at exit, the passed option was a general one;
261 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
262 * should not be freed on exit.
263 */
f28169d2 264static int ubd_setup_common(char *str, int *index_out, char **error_out)
1da177e4 265{
7d314e34 266 struct ubd *ubd_dev;
1da177e4
LT
267 struct openflags flags = global_openflags;
268 char *backing_file;
b8831a1d 269 int n, err = 0, i;
1da177e4
LT
270
271 if(index_out) *index_out = -1;
272 n = *str;
273 if(n == '='){
274 char *end;
275 int major;
276
277 str++;
1da177e4
LT
278 if(!strcmp(str, "sync")){
279 global_openflags = of_sync(global_openflags);
b8831a1d 280 goto out1;
1da177e4 281 }
b8831a1d
JD
282
283 err = -EINVAL;
1da177e4
LT
284 major = simple_strtoul(str, &end, 0);
285 if((*end != '\0') || (end == str)){
f28169d2 286 *error_out = "Didn't parse major number";
b8831a1d 287 goto out1;
1da177e4
LT
288 }
289
f28169d2 290 mutex_lock(&ubd_lock);
792dd4fc 291 if (fake_major != UBD_MAJOR) {
f28169d2
JD
292 *error_out = "Can't assign a fake major twice";
293 goto out1;
294 }
6c29256c 295
f28169d2 296 fake_major = major;
1da177e4
LT
297
298 printk(KERN_INFO "Setting extra ubd major number to %d\n",
299 major);
f28169d2
JD
300 err = 0;
301 out1:
302 mutex_unlock(&ubd_lock);
303 return err;
1da177e4
LT
304 }
305
306 n = parse_unit(&str);
307 if(n < 0){
f28169d2
JD
308 *error_out = "Couldn't parse device number";
309 return -EINVAL;
1da177e4
LT
310 }
311 if(n >= MAX_DEV){
f28169d2
JD
312 *error_out = "Device number out of range";
313 return 1;
1da177e4
LT
314 }
315
f28169d2 316 err = -EBUSY;
d7fb2c38 317 mutex_lock(&ubd_lock);
1da177e4 318
7d314e34
PBG
319 ubd_dev = &ubd_devs[n];
320 if(ubd_dev->file != NULL){
f28169d2 321 *error_out = "Device is already configured";
1da177e4
LT
322 goto out;
323 }
324
325 if (index_out)
326 *index_out = n;
327
f28169d2 328 err = -EINVAL;
6c29256c 329 for (i = 0; i < sizeof("rscd="); i++) {
1da177e4
LT
330 switch (*str) {
331 case 'r':
332 flags.w = 0;
333 break;
334 case 's':
335 flags.s = 1;
336 break;
337 case 'd':
7d314e34 338 ubd_dev->no_cow = 1;
1da177e4 339 break;
6c29256c 340 case 'c':
7d314e34 341 ubd_dev->shared = 1;
6c29256c 342 break;
1da177e4
LT
343 case '=':
344 str++;
345 goto break_loop;
346 default:
f28169d2
JD
347 *error_out = "Expected '=' or flag letter "
348 "(r, s, c, or d)";
1da177e4
LT
349 goto out;
350 }
351 str++;
352 }
353
f28169d2
JD
354 if (*str == '=')
355 *error_out = "Too many flags specified";
356 else
357 *error_out = "Missing '='";
1da177e4
LT
358 goto out;
359
360break_loop:
1da177e4
LT
361 backing_file = strchr(str, ',');
362
f28169d2 363 if (backing_file == NULL)
1da177e4 364 backing_file = strchr(str, ':');
1da177e4 365
f28169d2
JD
366 if(backing_file != NULL){
367 if(ubd_dev->no_cow){
368 *error_out = "Can't specify both 'd' and a cow file";
369 goto out;
370 }
1da177e4
LT
371 else {
372 *backing_file = '\0';
373 backing_file++;
374 }
375 }
f28169d2 376 err = 0;
7d314e34
PBG
377 ubd_dev->file = str;
378 ubd_dev->cow.file = backing_file;
379 ubd_dev->boot_openflags = flags;
1da177e4 380out:
d7fb2c38 381 mutex_unlock(&ubd_lock);
f28169d2 382 return err;
1da177e4
LT
383}
384
385static int ubd_setup(char *str)
386{
f28169d2
JD
387 char *error;
388 int err;
389
390 err = ubd_setup_common(str, NULL, &error);
391 if(err)
392 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
393 "%s\n", str, error);
394 return 1;
1da177e4
LT
395}
396
397__setup("ubd", ubd_setup);
398__uml_help(ubd_setup,
399"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
400" This is used to associate a device with a file in the underlying\n"
401" filesystem. When specifying two filenames, the first one is the\n"
402" COW name and the second is the backing file name. As separator you can\n"
403" use either a ':' or a ',': the first one allows writing things like;\n"
404" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
405" while with a ',' the shell would not expand the 2nd '~'.\n"
f28169d2 406" When using only one filename, UML will detect whether to treat it like\n"
1da177e4
LT
407" a COW file or a backing file. To override this detection, add the 'd'\n"
408" flag:\n"
409" ubd0d=BackingFile\n"
410" Usually, there is a filesystem in the file, but \n"
411" that's not required. Swap devices containing swap files can be\n"
412" specified like this. Also, a file which doesn't contain a\n"
413" filesystem can have its contents read in the virtual \n"
414" machine by running 'dd' on the device. <n> must be in the range\n"
415" 0 to 7. Appending an 'r' to the number will cause that device\n"
416" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
20ede453
JD
417" an 's' will cause data to be written to disk on the host immediately.\n"
418" 'c' will cause the device to be treated as being shared between multiple\n"
419" UMLs and file locking will be turned off - this is appropriate for a\n"
420" cluster filesystem and inappropriate at almost all other times.\n\n"
1da177e4
LT
421);
422
8299ca5c 423static int udb_setup(char *str)
1da177e4
LT
424{
425 printk("udb%s specified on command line is almost certainly a ubd -> "
426 "udb TYPO\n", str);
dc764e50 427 return 1;
1da177e4
LT
428}
429
430__setup("udb", udb_setup);
431__uml_help(udb_setup,
432"udb\n"
0894e27e
JD
433" This option is here solely to catch ubd -> udb typos, which can be\n"
434" to impossible to catch visually unless you specifically look for\n"
435" them. The only result of any option starting with 'udb' is an error\n"
1da177e4
LT
436" in the boot output.\n\n"
437);
438
165125e1 439static void do_ubd_request(struct request_queue * q);
91acb21f
JD
440
441/* Only changed by ubd_init, which is an initcall. */
5dc62b1b 442static int thread_fd = -1;
a0044bdf
JD
443static LIST_HEAD(restart);
444
2fe30a34 445/* XXX - move this inside ubd_intr. */
62f96cb0 446/* Called without dev->lock held, and only in interrupt context. */
91acb21f 447static void ubd_handler(void)
1da177e4 448{
2adcec21 449 struct io_thread_req *req;
a0044bdf
JD
450 struct ubd *ubd;
451 struct list_head *list, *next_ele;
452 unsigned long flags;
91acb21f
JD
453 int n;
454
a0044bdf 455 while(1){
a6ea4cce
JD
456 n = os_read_file(thread_fd, &req,
457 sizeof(struct io_thread_req *));
a0044bdf
JD
458 if(n != sizeof(req)){
459 if(n == -EAGAIN)
460 break;
461 printk(KERN_ERR "spurious interrupt in ubd_handler, "
462 "err = %d\n", -n);
463 return;
464 }
62f96cb0 465
4d6c84d9 466 blk_end_request(req->req, 0, req->length);
2adcec21 467 kfree(req);
a0044bdf 468 }
62f96cb0 469 reactivate_fd(thread_fd, UBD_IRQ);
a0044bdf
JD
470
471 list_for_each_safe(list, next_ele, &restart){
472 ubd = container_of(list, struct ubd, restart);
473 list_del_init(&ubd->restart);
474 spin_lock_irqsave(&ubd->lock, flags);
475 do_ubd_request(ubd->queue);
476 spin_unlock_irqrestore(&ubd->lock, flags);
477 }
1da177e4
LT
478}
479
7bea96fd 480static irqreturn_t ubd_intr(int irq, void *dev)
1da177e4 481{
91acb21f 482 ubd_handler();
dc764e50 483 return IRQ_HANDLED;
91acb21f 484}
09ace81c 485
91acb21f
JD
486/* Only changed by ubd_init, which is an initcall. */
487static int io_pid = -1;
09ace81c 488
5dc62b1b 489static void kill_io_thread(void)
91acb21f 490{
6c29256c 491 if(io_pid != -1)
91acb21f 492 os_kill_process(io_pid, 1);
09ace81c 493}
1da177e4 494
91acb21f
JD
495__uml_exitcall(kill_io_thread);
496
d8d7c28e 497static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
1da177e4
LT
498{
499 char *file;
85356398
RW
500 int fd;
501 int err;
502
503 __u32 version;
504 __u32 align;
505 char *backing_file;
506 time_t mtime;
507 unsigned long long size;
508 int sector_size;
509 int bitmap_offset;
510
511 if (ubd_dev->file && ubd_dev->cow.file) {
512 file = ubd_dev->cow.file;
513
514 goto out;
515 }
516
d4afcba9 517 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
85356398
RW
518 if (fd < 0)
519 return fd;
520
521 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
522 &mtime, &size, &sector_size, &align, &bitmap_offset);
523 os_close_file(fd);
1da177e4 524
85356398
RW
525 if(err == -EINVAL)
526 file = ubd_dev->file;
527 else
528 file = backing_file;
529
530out:
dc764e50 531 return os_file_size(file, size_out);
1da177e4
LT
532}
533
5dc62b1b
WC
534static int read_cow_bitmap(int fd, void *buf, int offset, int len)
535{
536 int err;
537
8c6157b6 538 err = os_pread_file(fd, buf, len, offset);
5dc62b1b
WC
539 if (err < 0)
540 return err;
541
542 return 0;
543}
544
545static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
546{
547 unsigned long modtime;
548 unsigned long long actual;
549 int err;
550
551 err = os_file_modtime(file, &modtime);
552 if (err < 0) {
553 printk(KERN_ERR "Failed to get modification time of backing "
554 "file \"%s\", err = %d\n", file, -err);
555 return err;
556 }
557
558 err = os_file_size(file, &actual);
559 if (err < 0) {
560 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
561 "err = %d\n", file, -err);
562 return err;
563 }
564
565 if (actual != size) {
566 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
567 * the typecast.*/
568 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
569 "vs backing file\n", (unsigned long long) size, actual);
570 return -EINVAL;
571 }
572 if (modtime != mtime) {
573 printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
574 "backing file\n", mtime, modtime);
575 return -EINVAL;
576 }
577 return 0;
578}
579
580static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
581{
582 struct uml_stat buf1, buf2;
583 int err;
584
585 if (from_cmdline == NULL)
586 return 0;
587 if (!strcmp(from_cmdline, from_cow))
588 return 0;
589
590 err = os_stat_file(from_cmdline, &buf1);
591 if (err < 0) {
592 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
593 -err);
594 return 0;
595 }
596 err = os_stat_file(from_cow, &buf2);
597 if (err < 0) {
598 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
599 -err);
600 return 1;
601 }
602 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
603 return 0;
604
605 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
606 "\"%s\" specified in COW header of \"%s\"\n",
607 from_cmdline, from_cow, cow);
608 return 1;
609}
610
611static int open_ubd_file(char *file, struct openflags *openflags, int shared,
612 char **backing_file_out, int *bitmap_offset_out,
613 unsigned long *bitmap_len_out, int *data_offset_out,
614 int *create_cow_out)
615{
616 time_t mtime;
617 unsigned long long size;
618 __u32 version, align;
619 char *backing_file;
620 int fd, err, sectorsize, asked_switch, mode = 0644;
621
622 fd = os_open_file(file, *openflags, mode);
623 if (fd < 0) {
624 if ((fd == -ENOENT) && (create_cow_out != NULL))
625 *create_cow_out = 1;
626 if (!openflags->w ||
627 ((fd != -EROFS) && (fd != -EACCES)))
628 return fd;
629 openflags->w = 0;
630 fd = os_open_file(file, *openflags, mode);
631 if (fd < 0)
632 return fd;
633 }
634
635 if (shared)
636 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
637 else {
638 err = os_lock_file(fd, openflags->w);
639 if (err < 0) {
640 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
641 file, -err);
642 goto out_close;
643 }
644 }
645
646 /* Successful return case! */
647 if (backing_file_out == NULL)
648 return fd;
649
650 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
651 &size, &sectorsize, &align, bitmap_offset_out);
652 if (err && (*backing_file_out != NULL)) {
653 printk(KERN_ERR "Failed to read COW header from COW file "
654 "\"%s\", errno = %d\n", file, -err);
655 goto out_close;
656 }
657 if (err)
658 return fd;
659
660 asked_switch = path_requires_switch(*backing_file_out, backing_file,
661 file);
662
663 /* Allow switching only if no mismatch. */
664 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
665 mtime)) {
666 printk(KERN_ERR "Switching backing file to '%s'\n",
667 *backing_file_out);
668 err = write_cow_header(file, fd, *backing_file_out,
669 sectorsize, align, &size);
670 if (err) {
671 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
672 goto out_close;
673 }
674 } else {
675 *backing_file_out = backing_file;
676 err = backing_file_mismatch(*backing_file_out, size, mtime);
677 if (err)
678 goto out_close;
679 }
680
681 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
682 bitmap_len_out, data_offset_out);
683
684 return fd;
685 out_close:
686 os_close_file(fd);
687 return err;
688}
689
690static int create_cow_file(char *cow_file, char *backing_file,
691 struct openflags flags,
692 int sectorsize, int alignment, int *bitmap_offset_out,
693 unsigned long *bitmap_len_out, int *data_offset_out)
694{
695 int err, fd;
696
697 flags.c = 1;
698 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
699 if (fd < 0) {
700 err = fd;
701 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
702 cow_file, -err);
703 goto out;
704 }
705
706 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
707 bitmap_offset_out, bitmap_len_out,
708 data_offset_out);
709 if (!err)
710 return fd;
711 os_close_file(fd);
712 out:
713 return err;
714}
715
5f75a4f8 716static void ubd_close_dev(struct ubd *ubd_dev)
1da177e4 717{
7d314e34
PBG
718 os_close_file(ubd_dev->fd);
719 if(ubd_dev->cow.file == NULL)
1da177e4
LT
720 return;
721
7d314e34
PBG
722 os_close_file(ubd_dev->cow.fd);
723 vfree(ubd_dev->cow.bitmap);
724 ubd_dev->cow.bitmap = NULL;
1da177e4
LT
725}
726
7d314e34 727static int ubd_open_dev(struct ubd *ubd_dev)
1da177e4
LT
728{
729 struct openflags flags;
730 char **back_ptr;
731 int err, create_cow, *create_ptr;
0bf16bff 732 int fd;
1da177e4 733
7d314e34 734 ubd_dev->openflags = ubd_dev->boot_openflags;
1da177e4 735 create_cow = 0;
7d314e34
PBG
736 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
737 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
0bf16bff
PBG
738
739 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
7d314e34
PBG
740 back_ptr, &ubd_dev->cow.bitmap_offset,
741 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
6c29256c 742 create_ptr);
1da177e4 743
0bf16bff
PBG
744 if((fd == -ENOENT) && create_cow){
745 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
7d314e34
PBG
746 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
747 &ubd_dev->cow.bitmap_offset,
748 &ubd_dev->cow.bitmap_len,
749 &ubd_dev->cow.data_offset);
0bf16bff 750 if(fd >= 0){
1da177e4 751 printk(KERN_INFO "Creating \"%s\" as COW file for "
7d314e34 752 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
1da177e4
LT
753 }
754 }
755
0bf16bff 756 if(fd < 0){
7d314e34 757 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
0bf16bff
PBG
758 -fd);
759 return fd;
1da177e4 760 }
0bf16bff 761 ubd_dev->fd = fd;
1da177e4 762
7d314e34 763 if(ubd_dev->cow.file != NULL){
086fa5ff 764 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
f4768ffd 765
1da177e4 766 err = -ENOMEM;
da2486ba 767 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
7d314e34 768 if(ubd_dev->cow.bitmap == NULL){
1da177e4
LT
769 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
770 goto error;
771 }
772 flush_tlb_kernel_vm();
773
7d314e34
PBG
774 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
775 ubd_dev->cow.bitmap_offset,
776 ubd_dev->cow.bitmap_len);
1da177e4
LT
777 if(err < 0)
778 goto error;
779
7d314e34 780 flags = ubd_dev->openflags;
1da177e4 781 flags.w = 0;
7d314e34 782 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
6c29256c 783 NULL, NULL, NULL, NULL);
1da177e4 784 if(err < 0) goto error;
7d314e34 785 ubd_dev->cow.fd = err;
1da177e4 786 }
dc764e50 787 return 0;
1da177e4 788 error:
7d314e34 789 os_close_file(ubd_dev->fd);
dc764e50 790 return err;
1da177e4
LT
791}
792
2e3f5251
JD
793static void ubd_device_release(struct device *dev)
794{
8691b97b 795 struct ubd *ubd_dev = dev_get_drvdata(dev);
2e3f5251
JD
796
797 blk_cleanup_queue(ubd_dev->queue);
798 *ubd_dev = ((struct ubd) DEFAULT_UBD);
799}
800
5f75a4f8 801static int ubd_disk_register(int major, u64 size, int unit,
b8831a1d 802 struct gendisk **disk_out)
1da177e4
LT
803{
804 struct gendisk *disk;
1da177e4
LT
805
806 disk = alloc_disk(1 << UBD_SHIFT);
807 if(disk == NULL)
dc764e50 808 return -ENOMEM;
1da177e4
LT
809
810 disk->major = major;
811 disk->first_minor = unit << UBD_SHIFT;
812 disk->fops = &ubd_blops;
813 set_capacity(disk, size / 512);
792dd4fc 814 if (major == UBD_MAJOR)
1da177e4 815 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
ce7b0f46 816 else
1da177e4 817 sprintf(disk->disk_name, "ubd_fake%d", unit);
1da177e4
LT
818
819 /* sysfs register (not for ide fake devices) */
792dd4fc 820 if (major == UBD_MAJOR) {
7d314e34
PBG
821 ubd_devs[unit].pdev.id = unit;
822 ubd_devs[unit].pdev.name = DRIVER_NAME;
2e3f5251 823 ubd_devs[unit].pdev.dev.release = ubd_device_release;
8691b97b 824 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
7d314e34
PBG
825 platform_device_register(&ubd_devs[unit].pdev);
826 disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
1da177e4
LT
827 }
828
7d314e34 829 disk->private_data = &ubd_devs[unit];
62f96cb0 830 disk->queue = ubd_devs[unit].queue;
1da177e4
LT
831 add_disk(disk);
832
833 *disk_out = disk;
834 return 0;
835}
836
837#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
838
f28169d2 839static int ubd_add(int n, char **error_out)
1da177e4 840{
7d314e34 841 struct ubd *ubd_dev = &ubd_devs[n];
f28169d2 842 int err = 0;
1da177e4 843
7d314e34 844 if(ubd_dev->file == NULL)
ec7cf783 845 goto out;
1da177e4 846
7d314e34 847 err = ubd_file_size(ubd_dev, &ubd_dev->size);
f28169d2
JD
848 if(err < 0){
849 *error_out = "Couldn't determine size of device's file";
80c13749 850 goto out;
f28169d2 851 }
1da177e4 852
7d314e34 853 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
1da177e4 854
a0044bdf 855 INIT_LIST_HEAD(&ubd_dev->restart);
4f40c055 856 sg_init_table(ubd_dev->sg, MAX_SG);
a0044bdf 857
62f96cb0
JD
858 err = -ENOMEM;
859 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
860 if (ubd_dev->queue == NULL) {
861 *error_out = "Failed to initialize device queue";
80c13749 862 goto out;
62f96cb0
JD
863 }
864 ubd_dev->queue->queuedata = ubd_dev;
f935a8ce 865 blk_queue_write_cache(ubd_dev->queue, true, false);
62f96cb0 866
8a78362c 867 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
792dd4fc 868 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
62f96cb0
JD
869 if(err){
870 *error_out = "Failed to register device";
871 goto out_cleanup;
872 }
6c29256c 873
792dd4fc 874 if (fake_major != UBD_MAJOR)
5f75a4f8 875 ubd_disk_register(fake_major, ubd_dev->size, n,
62f96cb0 876 &fake_gendisk[n]);
1da177e4 877
83380cc1
JD
878 /*
879 * Perhaps this should also be under the "if (fake_major)" above
880 * using the fake_disk->disk_name
881 */
1da177e4
LT
882 if (fake_ide)
883 make_ide_entries(ubd_gendisk[n]->disk_name);
884
ec7cf783 885 err = 0;
ec7cf783
JD
886out:
887 return err;
62f96cb0
JD
888
889out_cleanup:
890 blk_cleanup_queue(ubd_dev->queue);
891 goto out;
1da177e4
LT
892}
893
f28169d2 894static int ubd_config(char *str, char **error_out)
1da177e4 895{
e7f6552f 896 int n, ret;
1da177e4 897
f28169d2
JD
898 /* This string is possibly broken up and stored, so it's only
899 * freed if ubd_setup_common fails, or if only general options
900 * were set.
901 */
970d6e3a 902 str = kstrdup(str, GFP_KERNEL);
e7f6552f 903 if (str == NULL) {
f28169d2
JD
904 *error_out = "Failed to allocate memory";
905 return -ENOMEM;
1da177e4 906 }
f28169d2
JD
907
908 ret = ubd_setup_common(str, &n, error_out);
909 if (ret)
e7f6552f 910 goto err_free;
f28169d2 911
e7f6552f
PBG
912 if (n == -1) {
913 ret = 0;
d8d7c28e 914 goto err_free;
1da177e4 915 }
1da177e4 916
dc764e50 917 mutex_lock(&ubd_lock);
f28169d2 918 ret = ubd_add(n, error_out);
e7f6552f 919 if (ret)
7d314e34 920 ubd_devs[n].file = NULL;
dc764e50 921 mutex_unlock(&ubd_lock);
1da177e4 922
e7f6552f 923out:
dc764e50 924 return ret;
e7f6552f
PBG
925
926err_free:
927 kfree(str);
928 goto out;
1da177e4
LT
929}
930
931static int ubd_get_config(char *name, char *str, int size, char **error_out)
932{
7d314e34 933 struct ubd *ubd_dev;
1da177e4
LT
934 int n, len = 0;
935
936 n = parse_unit(&name);
937 if((n >= MAX_DEV) || (n < 0)){
938 *error_out = "ubd_get_config : device number out of range";
dc764e50 939 return -1;
1da177e4
LT
940 }
941
7d314e34 942 ubd_dev = &ubd_devs[n];
d7fb2c38 943 mutex_lock(&ubd_lock);
1da177e4 944
7d314e34 945 if(ubd_dev->file == NULL){
1da177e4
LT
946 CONFIG_CHUNK(str, size, len, "", 1);
947 goto out;
948 }
949
7d314e34 950 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1da177e4 951
7d314e34 952 if(ubd_dev->cow.file != NULL){
1da177e4 953 CONFIG_CHUNK(str, size, len, ",", 0);
7d314e34 954 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1da177e4
LT
955 }
956 else CONFIG_CHUNK(str, size, len, "", 1);
957
958 out:
d7fb2c38 959 mutex_unlock(&ubd_lock);
dc764e50 960 return len;
1da177e4
LT
961}
962
29d56cfe
JD
963static int ubd_id(char **str, int *start_out, int *end_out)
964{
dc764e50 965 int n;
29d56cfe
JD
966
967 n = parse_unit(str);
dc764e50
JD
968 *start_out = 0;
969 *end_out = MAX_DEV - 1;
970 return n;
29d56cfe
JD
971}
972
f28169d2 973static int ubd_remove(int n, char **error_out)
1da177e4 974{
2e3f5251 975 struct gendisk *disk = ubd_gendisk[n];
7d314e34 976 struct ubd *ubd_dev;
29d56cfe 977 int err = -ENODEV;
1da177e4 978
d7fb2c38 979 mutex_lock(&ubd_lock);
1da177e4 980
7d314e34 981 ubd_dev = &ubd_devs[n];
1da177e4 982
7d314e34 983 if(ubd_dev->file == NULL)
29d56cfe 984 goto out;
1da177e4 985
29d56cfe
JD
986 /* you cannot remove a open disk */
987 err = -EBUSY;
7d314e34 988 if(ubd_dev->count > 0)
1da177e4
LT
989 goto out;
990
dc764e50 991 ubd_gendisk[n] = NULL;
b47d2deb
JD
992 if(disk != NULL){
993 del_gendisk(disk);
994 put_disk(disk);
995 }
1da177e4
LT
996
997 if(fake_gendisk[n] != NULL){
998 del_gendisk(fake_gendisk[n]);
999 put_disk(fake_gendisk[n]);
1000 fake_gendisk[n] = NULL;
1001 }
1002
1da177e4 1003 err = 0;
2e3f5251 1004 platform_device_unregister(&ubd_dev->pdev);
29d56cfe 1005out:
d7fb2c38 1006 mutex_unlock(&ubd_lock);
29d56cfe 1007 return err;
1da177e4
LT
1008}
1009
f28169d2 1010/* All these are called by mconsole in process context and without
b8831a1d 1011 * ubd-specific locks. The structure itself is const except for .list.
f28169d2 1012 */
1da177e4 1013static struct mc_device ubd_mc = {
84f48d4f 1014 .list = LIST_HEAD_INIT(ubd_mc.list),
1da177e4
LT
1015 .name = "ubd",
1016 .config = ubd_config,
dc764e50 1017 .get_config = ubd_get_config,
29d56cfe 1018 .id = ubd_id,
1da177e4
LT
1019 .remove = ubd_remove,
1020};
1021
d8d7c28e 1022static int __init ubd_mc_init(void)
1da177e4
LT
1023{
1024 mconsole_register_dev(&ubd_mc);
1025 return 0;
1026}
1027
1028__initcall(ubd_mc_init);
1029
d8d7c28e
PBG
1030static int __init ubd0_init(void)
1031{
1032 struct ubd *ubd_dev = &ubd_devs[0];
1033
b8831a1d 1034 mutex_lock(&ubd_lock);
d8d7c28e
PBG
1035 if(ubd_dev->file == NULL)
1036 ubd_dev->file = "root_fs";
b8831a1d
JD
1037 mutex_unlock(&ubd_lock);
1038
dc764e50 1039 return 0;
d8d7c28e
PBG
1040}
1041
1042__initcall(ubd0_init);
1043
b8831a1d 1044/* Used in ubd_init, which is an initcall */
3ae5eaec
RK
1045static struct platform_driver ubd_driver = {
1046 .driver = {
1047 .name = DRIVER_NAME,
1048 },
1da177e4
LT
1049};
1050
d8d7c28e 1051static int __init ubd_init(void)
1da177e4 1052{
f28169d2
JD
1053 char *error;
1054 int i, err;
1da177e4 1055
792dd4fc 1056 if (register_blkdev(UBD_MAJOR, "ubd"))
1da177e4
LT
1057 return -1;
1058
792dd4fc 1059 if (fake_major != UBD_MAJOR) {
1da177e4
LT
1060 char name[sizeof("ubd_nnn\0")];
1061
1062 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1da177e4
LT
1063 if (register_blkdev(fake_major, "ubd"))
1064 return -1;
1065 }
3ae5eaec 1066 platform_driver_register(&ubd_driver);
dc764e50 1067 mutex_lock(&ubd_lock);
f28169d2
JD
1068 for (i = 0; i < MAX_DEV; i++){
1069 err = ubd_add(i, &error);
1070 if(err)
1071 printk(KERN_ERR "Failed to initialize ubd device %d :"
1072 "%s\n", i, error);
1073 }
dc764e50 1074 mutex_unlock(&ubd_lock);
1da177e4
LT
1075 return 0;
1076}
1077
1078late_initcall(ubd_init);
1079
d8d7c28e 1080static int __init ubd_driver_init(void){
91acb21f
JD
1081 unsigned long stack;
1082 int err;
1083
1084 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1085 if(global_openflags.s){
1086 printk(KERN_INFO "ubd: Synchronous mode\n");
1087 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1088 * enough. So use anyway the io thread. */
1089 }
1090 stack = alloc_stack(0, 0);
6c29256c 1091 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
91acb21f
JD
1092 &thread_fd);
1093 if(io_pid < 0){
6c29256c 1094 printk(KERN_ERR
91acb21f
JD
1095 "ubd : Failed to start I/O thread (errno = %d) - "
1096 "falling back to synchronous I/O\n", -io_pid);
1097 io_pid = -1;
dc764e50 1098 return 0;
91acb21f 1099 }
6c29256c 1100 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
c0b79a90 1101 0, "ubd", ubd_devs);
91acb21f
JD
1102 if(err != 0)
1103 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
f4c57a78 1104 return 0;
91acb21f
JD
1105}
1106
1107device_initcall(ubd_driver_init);
1108
a625c998 1109static int ubd_open(struct block_device *bdev, fmode_t mode)
1da177e4 1110{
a625c998 1111 struct gendisk *disk = bdev->bd_disk;
7d314e34 1112 struct ubd *ubd_dev = disk->private_data;
1da177e4
LT
1113 int err = 0;
1114
9a181c58 1115 mutex_lock(&ubd_mutex);
7d314e34
PBG
1116 if(ubd_dev->count == 0){
1117 err = ubd_open_dev(ubd_dev);
1da177e4
LT
1118 if(err){
1119 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
7d314e34 1120 disk->disk_name, ubd_dev->file, -err);
1da177e4
LT
1121 goto out;
1122 }
1123 }
7d314e34
PBG
1124 ubd_dev->count++;
1125 set_disk_ro(disk, !ubd_dev->openflags.w);
2c49be99
PBG
1126
1127 /* This should no more be needed. And it didn't work anyway to exclude
1128 * read-write remounting of filesystems.*/
a625c998 1129 /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
5f75a4f8 1130 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1da177e4 1131 err = -EROFS;
2c49be99 1132 }*/
6e9624b8 1133out:
9a181c58 1134 mutex_unlock(&ubd_mutex);
dc764e50 1135 return err;
1da177e4
LT
1136}
1137
db2a144b 1138static void ubd_release(struct gendisk *disk, fmode_t mode)
1da177e4 1139{
7d314e34 1140 struct ubd *ubd_dev = disk->private_data;
1da177e4 1141
9a181c58 1142 mutex_lock(&ubd_mutex);
7d314e34 1143 if(--ubd_dev->count == 0)
5f75a4f8 1144 ubd_close_dev(ubd_dev);
9a181c58 1145 mutex_unlock(&ubd_mutex);
1da177e4
LT
1146}
1147
91acb21f
JD
1148static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1149 __u64 *cow_offset, unsigned long *bitmap,
1150 __u64 bitmap_offset, unsigned long *bitmap_words,
1151 __u64 bitmap_len)
1da177e4 1152{
91acb21f
JD
1153 __u64 sector = io_offset >> 9;
1154 int i, update_bitmap = 0;
1155
1156 for(i = 0; i < length >> 9; i++){
1157 if(cow_mask != NULL)
1158 ubd_set_bit(i, (unsigned char *) cow_mask);
1159 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1160 continue;
1da177e4 1161
91acb21f
JD
1162 update_bitmap = 1;
1163 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1164 }
1165
1166 if(!update_bitmap)
1167 return;
1da177e4 1168
91acb21f 1169 *cow_offset = sector / (sizeof(unsigned long) * 8);
1da177e4 1170
91acb21f
JD
1171 /* This takes care of the case where we're exactly at the end of the
1172 * device, and *cow_offset + 1 is off the end. So, just back it up
1173 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1174 * for the original diagnosis.
1175 */
6d074242
JO
1176 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1177 sizeof(unsigned long)) - 1))
91acb21f
JD
1178 (*cow_offset)--;
1179
1180 bitmap_words[0] = bitmap[*cow_offset];
1181 bitmap_words[1] = bitmap[*cow_offset + 1];
1182
1183 *cow_offset *= sizeof(unsigned long);
1184 *cow_offset += bitmap_offset;
1185}
1186
1187static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1188 __u64 bitmap_offset, __u64 bitmap_len)
1189{
1190 __u64 sector = req->offset >> 9;
1191 int i;
1192
1193 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1194 panic("Operation too long");
1195
1196 if(req->op == UBD_READ) {
1197 for(i = 0; i < req->length >> 9; i++){
1198 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
6c29256c 1199 ubd_set_bit(i, (unsigned char *)
91acb21f 1200 &req->sector_mask);
dc764e50 1201 }
91acb21f
JD
1202 }
1203 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1204 &req->cow_offset, bitmap, bitmap_offset,
1205 req->bitmap_words, bitmap_len);
1da177e4
LT
1206}
1207
62f96cb0 1208/* Called with dev->lock held */
a0044bdf
JD
1209static void prepare_request(struct request *req, struct io_thread_req *io_req,
1210 unsigned long long offset, int page_offset,
1211 int len, struct page *page)
1da177e4
LT
1212{
1213 struct gendisk *disk = req->rq_disk;
7d314e34 1214 struct ubd *ubd_dev = disk->private_data;
91acb21f 1215
62f96cb0 1216 io_req->req = req;
a0044bdf
JD
1217 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1218 ubd_dev->fd;
7d314e34 1219 io_req->fds[1] = ubd_dev->fd;
91acb21f 1220 io_req->cow_offset = -1;
1da177e4
LT
1221 io_req->offset = offset;
1222 io_req->length = len;
1223 io_req->error = 0;
91acb21f
JD
1224 io_req->sector_mask = 0;
1225
1226 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1da177e4 1227 io_req->offsets[0] = 0;
7d314e34 1228 io_req->offsets[1] = ubd_dev->cow.data_offset;
a0044bdf 1229 io_req->buffer = page_address(page) + page_offset;
1da177e4
LT
1230 io_req->sectorsize = 1 << 9;
1231
7d314e34 1232 if(ubd_dev->cow.file != NULL)
a0044bdf
JD
1233 cowify_req(io_req, ubd_dev->cow.bitmap,
1234 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
91acb21f 1235
1da177e4
LT
1236}
1237
805f11a0
RW
1238/* Called with dev->lock held */
1239static void prepare_flush_request(struct request *req,
1240 struct io_thread_req *io_req)
1241{
1242 struct gendisk *disk = req->rq_disk;
1243 struct ubd *ubd_dev = disk->private_data;
1244
1245 io_req->req = req;
1246 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1247 ubd_dev->fd;
1248 io_req->op = UBD_FLUSH;
1249}
1250
bc1d72e7
RW
1251static bool submit_request(struct io_thread_req *io_req, struct ubd *dev)
1252{
1253 int n = os_write_file(thread_fd, &io_req,
1254 sizeof(io_req));
1255 if (n != sizeof(io_req)) {
1256 if (n != -EAGAIN)
1257 printk("write to io thread failed, "
1258 "errno = %d\n", -n);
1259 else if (list_empty(&dev->restart))
1260 list_add(&dev->restart, &restart);
1261
1262 kfree(io_req);
1263 return false;
1264 }
1265 return true;
1266}
1267
62f96cb0 1268/* Called with dev->lock held */
165125e1 1269static void do_ubd_request(struct request_queue *q)
1da177e4 1270{
2adcec21 1271 struct io_thread_req *io_req;
1da177e4 1272 struct request *req;
a0044bdf
JD
1273
1274 while(1){
2a9529a0 1275 struct ubd *dev = q->queuedata;
2a236122 1276 if(dev->request == NULL){
9934c8c0 1277 struct request *req = blk_fetch_request(q);
a0044bdf
JD
1278 if(req == NULL)
1279 return;
1280
1281 dev->request = req;
47526903 1282 dev->rq_pos = blk_rq_pos(req);
a0044bdf
JD
1283 dev->start_sg = 0;
1284 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1285 }
1286
1287 req = dev->request;
805f11a0 1288
3a5e02ce 1289 if (req_op(req) == REQ_OP_FLUSH) {
805f11a0
RW
1290 io_req = kmalloc(sizeof(struct io_thread_req),
1291 GFP_ATOMIC);
1292 if (io_req == NULL) {
1293 if (list_empty(&dev->restart))
1294 list_add(&dev->restart, &restart);
1295 return;
1296 }
1297 prepare_flush_request(req, io_req);
2a236122
TK
1298 if (submit_request(io_req, dev) == false)
1299 return;
805f11a0
RW
1300 }
1301
a0044bdf
JD
1302 while(dev->start_sg < dev->end_sg){
1303 struct scatterlist *sg = &dev->sg[dev->start_sg];
1304
2adcec21 1305 io_req = kmalloc(sizeof(struct io_thread_req),
990c5587 1306 GFP_ATOMIC);
2adcec21
JD
1307 if(io_req == NULL){
1308 if(list_empty(&dev->restart))
1309 list_add(&dev->restart, &restart);
1310 return;
1311 }
1312 prepare_request(req, io_req,
47526903 1313 (unsigned long long)dev->rq_pos << 9,
45711f1a 1314 sg->offset, sg->length, sg_page(sg));
a0044bdf 1315
bc1d72e7 1316 if (submit_request(io_req, dev) == false)
a0044bdf 1317 return;
a0044bdf 1318
47526903 1319 dev->rq_pos += sg->length >> 9;
a0044bdf 1320 dev->start_sg++;
1da177e4 1321 }
a0044bdf
JD
1322 dev->end_sg = 0;
1323 dev->request = NULL;
1da177e4
LT
1324 }
1325}
1326
a885c8c4
CH
1327static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1328{
7d314e34 1329 struct ubd *ubd_dev = bdev->bd_disk->private_data;
a885c8c4
CH
1330
1331 geo->heads = 128;
1332 geo->sectors = 32;
7d314e34 1333 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
a885c8c4
CH
1334 return 0;
1335}
1336
a625c998 1337static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1da177e4
LT
1338 unsigned int cmd, unsigned long arg)
1339{
a625c998 1340 struct ubd *ubd_dev = bdev->bd_disk->private_data;
73855e13 1341 u16 ubd_id[ATA_ID_WORDS];
1da177e4
LT
1342
1343 switch (cmd) {
1da177e4 1344 struct cdrom_volctrl volume;
1da177e4 1345 case HDIO_GET_IDENTITY:
73855e13
BZ
1346 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1347 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1348 ubd_id[ATA_ID_HEADS] = 128;
1349 ubd_id[ATA_ID_SECTORS] = 32;
1da177e4
LT
1350 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1351 sizeof(ubd_id)))
dc764e50
JD
1352 return -EFAULT;
1353 return 0;
b8831a1d 1354
1da177e4
LT
1355 case CDROMVOLREAD:
1356 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
dc764e50 1357 return -EFAULT;
1da177e4
LT
1358 volume.channel0 = 255;
1359 volume.channel1 = 255;
1360 volume.channel2 = 255;
1361 volume.channel3 = 255;
1362 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
dc764e50
JD
1363 return -EFAULT;
1364 return 0;
1da177e4 1365 }
dc764e50 1366 return -EINVAL;
1da177e4
LT
1367}
1368
91acb21f 1369static int update_bitmap(struct io_thread_req *req)
1da177e4 1370{
91acb21f 1371 int n;
1da177e4 1372
91acb21f 1373 if(req->cow_offset == -1)
dc764e50 1374 return 0;
1da177e4 1375
8c6157b6
AI
1376 n = os_pwrite_file(req->fds[1], &req->bitmap_words,
1377 sizeof(req->bitmap_words), req->cow_offset);
91acb21f
JD
1378 if(n != sizeof(req->bitmap_words)){
1379 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1380 req->fds[1]);
dc764e50 1381 return 1;
91acb21f 1382 }
1da177e4 1383
dc764e50 1384 return 0;
91acb21f 1385}
1da177e4 1386
5dc62b1b 1387static void do_io(struct io_thread_req *req)
91acb21f
JD
1388{
1389 char *buf;
1390 unsigned long len;
1391 int n, nsectors, start, end, bit;
91acb21f
JD
1392 __u64 off;
1393
805f11a0
RW
1394 if (req->op == UBD_FLUSH) {
1395 /* fds[0] is always either the rw image or our cow file */
1396 n = os_sync_file(req->fds[0]);
1397 if (n != 0) {
1398 printk("do_io - sync failed err = %d "
1399 "fd = %d\n", -n, req->fds[0]);
1400 req->error = 1;
1401 }
1402 return;
1403 }
1404
91acb21f
JD
1405 nsectors = req->length / req->sectorsize;
1406 start = 0;
1407 do {
1408 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1409 end = start;
1410 while((end < nsectors) &&
1411 (ubd_test_bit(end, (unsigned char *)
1412 &req->sector_mask) == bit))
1413 end++;
1414
1415 off = req->offset + req->offsets[bit] +
1416 start * req->sectorsize;
1417 len = (end - start) * req->sectorsize;
1418 buf = &req->buffer[start * req->sectorsize];
1419
91acb21f
JD
1420 if(req->op == UBD_READ){
1421 n = 0;
1422 do {
1423 buf = &buf[n];
1424 len -= n;
8c6157b6 1425 n = os_pread_file(req->fds[bit], buf, len, off);
91acb21f
JD
1426 if (n < 0) {
1427 printk("do_io - read failed, err = %d "
1428 "fd = %d\n", -n, req->fds[bit]);
1429 req->error = 1;
1430 return;
1431 }
1432 } while((n < len) && (n != 0));
1433 if (n < len) memset(&buf[n], 0, len - n);
1434 } else {
8c6157b6 1435 n = os_pwrite_file(req->fds[bit], buf, len, off);
91acb21f
JD
1436 if(n != len){
1437 printk("do_io - write failed err = %d "
1438 "fd = %d\n", -n, req->fds[bit]);
1439 req->error = 1;
1440 return;
1441 }
1442 }
1443
1444 start = end;
1445 } while(start < nsectors);
1da177e4 1446
91acb21f 1447 req->error = update_bitmap(req);
1da177e4 1448}
91acb21f
JD
1449
1450/* Changed in start_io_thread, which is serialized by being called only
1451 * from ubd_init, which is an initcall.
1452 */
1453int kernel_fd = -1;
1454
d8d7c28e
PBG
1455/* Only changed by the io thread. XXX: currently unused. */
1456static int io_count = 0;
91acb21f
JD
1457
1458int io_thread(void *arg)
1459{
2adcec21 1460 struct io_thread_req *req;
91acb21f
JD
1461 int n;
1462
91d44ff8
RW
1463 os_fix_helper_signals();
1464
91acb21f 1465 while(1){
a6ea4cce 1466 n = os_read_file(kernel_fd, &req,
2adcec21
JD
1467 sizeof(struct io_thread_req *));
1468 if(n != sizeof(struct io_thread_req *)){
91acb21f
JD
1469 if(n < 0)
1470 printk("io_thread - read failed, fd = %d, "
1471 "err = %d\n", kernel_fd, -n);
1472 else {
1473 printk("io_thread - short read, fd = %d, "
1474 "length = %d\n", kernel_fd, n);
1475 }
1476 continue;
1477 }
1478 io_count++;
2adcec21 1479 do_io(req);
a6ea4cce 1480 n = os_write_file(kernel_fd, &req,
2adcec21
JD
1481 sizeof(struct io_thread_req *));
1482 if(n != sizeof(struct io_thread_req *))
91acb21f
JD
1483 printk("io_thread - write failed, fd = %d, err = %d\n",
1484 kernel_fd, -n);
1485 }
91acb21f 1486
1b57e9c2
JD
1487 return 0;
1488}