]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/um/drivers/ubd_kern.c
[PATCH] uml: add per-device queues and locks to ubd driver
[mirror_ubuntu-artful-kernel.git] / arch / um / drivers / ubd_kern.c
CommitLineData
6c29256c 1/*
1da177e4
LT
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
1da177e4
LT
23#include "linux/module.h"
24#include "linux/blkdev.h"
25#include "linux/hdreg.h"
26#include "linux/init.h"
1da177e4
LT
27#include "linux/cdrom.h"
28#include "linux/proc_fs.h"
29#include "linux/ctype.h"
30#include "linux/capability.h"
31#include "linux/mm.h"
32#include "linux/vmalloc.h"
33#include "linux/blkpg.h"
34#include "linux/genhd.h"
35#include "linux/spinlock.h"
d052d1be 36#include "linux/platform_device.h"
1da177e4
LT
37#include "asm/segment.h"
38#include "asm/uaccess.h"
39#include "asm/irq.h"
40#include "asm/types.h"
41#include "asm/tlbflush.h"
42#include "user_util.h"
43#include "mem_user.h"
44#include "kern_util.h"
45#include "kern.h"
46#include "mconsole_kern.h"
47#include "init.h"
48#include "irq_user.h"
49#include "irq_kern.h"
50#include "ubd_user.h"
1da177e4
LT
51#include "os.h"
52#include "mem.h"
53#include "mem_kern.h"
54#include "cow.h"
55
7b9014c1 56enum ubd_req { UBD_READ, UBD_WRITE };
1da177e4
LT
57
58struct io_thread_req {
62f96cb0 59 struct request *req;
91acb21f 60 enum ubd_req op;
1da177e4
LT
61 int fds[2];
62 unsigned long offsets[2];
63 unsigned long long offset;
64 unsigned long length;
65 char *buffer;
66 int sectorsize;
91acb21f
JD
67 unsigned long sector_mask;
68 unsigned long long cow_offset;
69 unsigned long bitmap_words[2];
1da177e4
LT
70 int error;
71};
72
6c29256c 73extern int open_ubd_file(char *file, struct openflags *openflags, int shared,
1da177e4
LT
74 char **backing_file_out, int *bitmap_offset_out,
75 unsigned long *bitmap_len_out, int *data_offset_out,
76 int *create_cow_out);
77extern int create_cow_file(char *cow_file, char *backing_file,
78 struct openflags flags, int sectorsize,
79 int alignment, int *bitmap_offset_out,
80 unsigned long *bitmap_len_out,
81 int *data_offset_out);
82extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
91acb21f 83extern void do_io(struct io_thread_req *req);
1da177e4 84
91acb21f 85static inline int ubd_test_bit(__u64 bit, unsigned char *data)
1da177e4
LT
86{
87 __u64 n;
88 int bits, off;
89
91acb21f 90 bits = sizeof(data[0]) * 8;
1da177e4
LT
91 n = bit / bits;
92 off = bit % bits;
91acb21f 93 return((data[n] & (1 << off)) != 0);
1da177e4
LT
94}
95
91acb21f 96static inline void ubd_set_bit(__u64 bit, unsigned char *data)
1da177e4
LT
97{
98 __u64 n;
99 int bits, off;
100
91acb21f 101 bits = sizeof(data[0]) * 8;
1da177e4
LT
102 n = bit / bits;
103 off = bit % bits;
91acb21f 104 data[n] |= (1 << off);
1da177e4
LT
105}
106/*End stuff from ubd_user.h*/
107
108#define DRIVER_NAME "uml-blkdev"
109
d7fb2c38 110static DEFINE_MUTEX(ubd_lock);
1da177e4 111
2fe30a34
PBG
112/* XXX - this made sense in 2.4 days, now it's only used as a boolean, and
113 * probably it doesn't make sense even for that. */
114static int do_ubd;
91acb21f 115
1da177e4
LT
116static int ubd_open(struct inode * inode, struct file * filp);
117static int ubd_release(struct inode * inode, struct file * file);
118static int ubd_ioctl(struct inode * inode, struct file * file,
119 unsigned int cmd, unsigned long arg);
a885c8c4 120static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
1da177e4 121
97d88ac8 122#define MAX_DEV (16)
1da177e4 123
1da177e4
LT
124static struct block_device_operations ubd_blops = {
125 .owner = THIS_MODULE,
126 .open = ubd_open,
127 .release = ubd_release,
128 .ioctl = ubd_ioctl,
a885c8c4 129 .getgeo = ubd_getgeo,
1da177e4
LT
130};
131
1da177e4
LT
132/* Protected by ubd_lock */
133static int fake_major = MAJOR_NR;
134
135static struct gendisk *ubd_gendisk[MAX_DEV];
136static struct gendisk *fake_gendisk[MAX_DEV];
6c29256c 137
1da177e4
LT
138#ifdef CONFIG_BLK_DEV_UBD_SYNC
139#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
140 .cl = 1 })
141#else
142#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
143 .cl = 1 })
144#endif
145
146/* Not protected - changed only in ubd_setup_common and then only to
147 * to enable O_SYNC.
148 */
149static struct openflags global_openflags = OPEN_FLAGS;
150
151struct cow {
2a9d32f6 152 /* backing file name */
1da177e4 153 char *file;
2a9d32f6 154 /* backing file fd */
1da177e4
LT
155 int fd;
156 unsigned long *bitmap;
157 unsigned long bitmap_len;
158 int bitmap_offset;
159 int data_offset;
160};
161
162struct ubd {
2a9d32f6
PBG
163 /* name (and fd, below) of the file opened for writing, either the
164 * backing or the cow file. */
1da177e4
LT
165 char *file;
166 int count;
167 int fd;
168 __u64 size;
169 struct openflags boot_openflags;
170 struct openflags openflags;
84e945e3
PBG
171 unsigned shared:1;
172 unsigned no_cow:1;
1da177e4
LT
173 struct cow cow;
174 struct platform_device pdev;
62f96cb0
JD
175 struct request_queue *queue;
176 spinlock_t lock;
1da177e4
LT
177};
178
179#define DEFAULT_COW { \
180 .file = NULL, \
181 .fd = -1, \
182 .bitmap = NULL, \
183 .bitmap_offset = 0, \
184 .data_offset = 0, \
185}
186
187#define DEFAULT_UBD { \
188 .file = NULL, \
189 .count = 0, \
190 .fd = -1, \
191 .size = -1, \
192 .boot_openflags = OPEN_FLAGS, \
193 .openflags = OPEN_FLAGS, \
194 .no_cow = 0, \
6c29256c 195 .shared = 0, \
1da177e4 196 .cow = DEFAULT_COW, \
62f96cb0 197 .lock = SPIN_LOCK_UNLOCKED, \
1da177e4
LT
198}
199
7d314e34 200struct ubd ubd_devs[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
1da177e4 201
1da177e4
LT
202/* Only changed by fake_ide_setup which is a setup */
203static int fake_ide = 0;
204static struct proc_dir_entry *proc_ide_root = NULL;
205static struct proc_dir_entry *proc_ide = NULL;
206
207static void make_proc_ide(void)
208{
209 proc_ide_root = proc_mkdir("ide", NULL);
210 proc_ide = proc_mkdir("ide0", proc_ide_root);
211}
212
213static int proc_ide_read_media(char *page, char **start, off_t off, int count,
214 int *eof, void *data)
215{
216 int len;
217
218 strcpy(page, "disk\n");
219 len = strlen("disk\n");
220 len -= off;
221 if (len < count){
222 *eof = 1;
223 if (len <= 0) return 0;
224 }
225 else len = count;
226 *start = page + off;
227 return len;
228}
229
230static void make_ide_entries(char *dev_name)
231{
232 struct proc_dir_entry *dir, *ent;
233 char name[64];
234
235 if(proc_ide_root == NULL) make_proc_ide();
236
237 dir = proc_mkdir(dev_name, proc_ide);
238 if(!dir) return;
239
240 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
241 if(!ent) return;
242 ent->nlink = 1;
243 ent->data = NULL;
244 ent->read_proc = proc_ide_read_media;
245 ent->write_proc = NULL;
246 sprintf(name,"ide0/%s", dev_name);
247 proc_symlink(dev_name, proc_ide_root, name);
248}
249
250static int fake_ide_setup(char *str)
251{
252 fake_ide = 1;
253 return(1);
254}
255
256__setup("fake_ide", fake_ide_setup);
257
258__uml_help(fake_ide_setup,
259"fake_ide\n"
260" Create ide0 entries that map onto ubd devices.\n\n"
261);
262
263static int parse_unit(char **ptr)
264{
265 char *str = *ptr, *end;
266 int n = -1;
267
268 if(isdigit(*str)) {
269 n = simple_strtoul(str, &end, 0);
270 if(end == str)
271 return(-1);
272 *ptr = end;
273 }
97d88ac8 274 else if (('a' <= *str) && (*str <= 'z')) {
1da177e4
LT
275 n = *str - 'a';
276 str++;
277 *ptr = str;
278 }
279 return(n);
280}
281
d8d7c28e
PBG
282/* If *index_out == -1 at exit, the passed option was a general one;
283 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
284 * should not be freed on exit.
285 */
f28169d2 286static int ubd_setup_common(char *str, int *index_out, char **error_out)
1da177e4 287{
7d314e34 288 struct ubd *ubd_dev;
1da177e4
LT
289 struct openflags flags = global_openflags;
290 char *backing_file;
291 int n, err, i;
292
293 if(index_out) *index_out = -1;
294 n = *str;
295 if(n == '='){
296 char *end;
297 int major;
298
299 str++;
1da177e4
LT
300 if(!strcmp(str, "sync")){
301 global_openflags = of_sync(global_openflags);
f28169d2 302 return 0;
1da177e4
LT
303 }
304 major = simple_strtoul(str, &end, 0);
305 if((*end != '\0') || (end == str)){
f28169d2
JD
306 *error_out = "Didn't parse major number";
307 return -EINVAL;
1da177e4
LT
308 }
309
f28169d2
JD
310 err = -EINVAL;
311 mutex_lock(&ubd_lock);
312 if(fake_major != MAJOR_NR){
313 *error_out = "Can't assign a fake major twice";
314 goto out1;
315 }
6c29256c 316
f28169d2 317 fake_major = major;
1da177e4
LT
318
319 printk(KERN_INFO "Setting extra ubd major number to %d\n",
320 major);
f28169d2
JD
321 err = 0;
322 out1:
323 mutex_unlock(&ubd_lock);
324 return err;
1da177e4
LT
325 }
326
327 n = parse_unit(&str);
328 if(n < 0){
f28169d2
JD
329 *error_out = "Couldn't parse device number";
330 return -EINVAL;
1da177e4
LT
331 }
332 if(n >= MAX_DEV){
f28169d2
JD
333 *error_out = "Device number out of range";
334 return 1;
1da177e4
LT
335 }
336
f28169d2 337 err = -EBUSY;
d7fb2c38 338 mutex_lock(&ubd_lock);
1da177e4 339
7d314e34
PBG
340 ubd_dev = &ubd_devs[n];
341 if(ubd_dev->file != NULL){
f28169d2 342 *error_out = "Device is already configured";
1da177e4
LT
343 goto out;
344 }
345
346 if (index_out)
347 *index_out = n;
348
f28169d2 349 err = -EINVAL;
6c29256c 350 for (i = 0; i < sizeof("rscd="); i++) {
1da177e4
LT
351 switch (*str) {
352 case 'r':
353 flags.w = 0;
354 break;
355 case 's':
356 flags.s = 1;
357 break;
358 case 'd':
7d314e34 359 ubd_dev->no_cow = 1;
1da177e4 360 break;
6c29256c 361 case 'c':
7d314e34 362 ubd_dev->shared = 1;
6c29256c 363 break;
1da177e4
LT
364 case '=':
365 str++;
366 goto break_loop;
367 default:
f28169d2
JD
368 *error_out = "Expected '=' or flag letter "
369 "(r, s, c, or d)";
1da177e4
LT
370 goto out;
371 }
372 str++;
373 }
374
f28169d2
JD
375 if (*str == '=')
376 *error_out = "Too many flags specified";
377 else
378 *error_out = "Missing '='";
1da177e4
LT
379 goto out;
380
381break_loop:
1da177e4
LT
382 backing_file = strchr(str, ',');
383
f28169d2 384 if (backing_file == NULL)
1da177e4 385 backing_file = strchr(str, ':');
1da177e4 386
f28169d2
JD
387 if(backing_file != NULL){
388 if(ubd_dev->no_cow){
389 *error_out = "Can't specify both 'd' and a cow file";
390 goto out;
391 }
1da177e4
LT
392 else {
393 *backing_file = '\0';
394 backing_file++;
395 }
396 }
f28169d2 397 err = 0;
7d314e34
PBG
398 ubd_dev->file = str;
399 ubd_dev->cow.file = backing_file;
400 ubd_dev->boot_openflags = flags;
1da177e4 401out:
d7fb2c38 402 mutex_unlock(&ubd_lock);
f28169d2 403 return err;
1da177e4
LT
404}
405
406static int ubd_setup(char *str)
407{
f28169d2
JD
408 char *error;
409 int err;
410
411 err = ubd_setup_common(str, NULL, &error);
412 if(err)
413 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
414 "%s\n", str, error);
415 return 1;
1da177e4
LT
416}
417
418__setup("ubd", ubd_setup);
419__uml_help(ubd_setup,
420"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
421" This is used to associate a device with a file in the underlying\n"
422" filesystem. When specifying two filenames, the first one is the\n"
423" COW name and the second is the backing file name. As separator you can\n"
424" use either a ':' or a ',': the first one allows writing things like;\n"
425" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
426" while with a ',' the shell would not expand the 2nd '~'.\n"
f28169d2 427" When using only one filename, UML will detect whether to treat it like\n"
1da177e4
LT
428" a COW file or a backing file. To override this detection, add the 'd'\n"
429" flag:\n"
430" ubd0d=BackingFile\n"
431" Usually, there is a filesystem in the file, but \n"
432" that's not required. Swap devices containing swap files can be\n"
433" specified like this. Also, a file which doesn't contain a\n"
434" filesystem can have its contents read in the virtual \n"
435" machine by running 'dd' on the device. <n> must be in the range\n"
436" 0 to 7. Appending an 'r' to the number will cause that device\n"
437" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
438" an 's' will cause data to be written to disk on the host immediately.\n\n"
439);
440
441static int udb_setup(char *str)
442{
443 printk("udb%s specified on command line is almost certainly a ubd -> "
444 "udb TYPO\n", str);
445 return(1);
446}
447
448__setup("udb", udb_setup);
449__uml_help(udb_setup,
450"udb\n"
0894e27e
JD
451" This option is here solely to catch ubd -> udb typos, which can be\n"
452" to impossible to catch visually unless you specifically look for\n"
453" them. The only result of any option starting with 'udb' is an error\n"
1da177e4
LT
454" in the boot output.\n\n"
455);
456
457static int fakehd_set = 0;
458static int fakehd(char *str)
459{
460 printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
461 fakehd_set = 1;
462 return 1;
463}
464
465__setup("fakehd", fakehd);
466__uml_help(fakehd,
467"fakehd\n"
468" Change the ubd device name to \"hd\".\n\n"
469);
470
471static void do_ubd_request(request_queue_t * q);
91acb21f
JD
472
473/* Only changed by ubd_init, which is an initcall. */
474int thread_fd = -1;
1da177e4
LT
475
476/* Changed by ubd_handler, which is serialized because interrupts only
477 * happen on CPU 0.
d8d7c28e 478 * XXX: currently unused.
1da177e4 479 */
d8d7c28e 480static int intr_count = 0;
1da177e4 481
91acb21f
JD
482/* call ubd_finish if you need to serialize */
483static void __ubd_finish(struct request *req, int error)
1da177e4 484{
91acb21f
JD
485 int nsect;
486
487 if(error){
488 end_request(req, 0);
489 return;
1da177e4 490 }
91acb21f
JD
491 nsect = req->current_nr_sectors;
492 req->sector += nsect;
493 req->buffer += nsect << 9;
494 req->errors = 0;
495 req->nr_sectors -= nsect;
496 req->current_nr_sectors = 0;
497 end_request(req, 1);
1da177e4
LT
498}
499
33f775ee
PBG
500/* Callable only from interrupt context - otherwise you need to do
501 * spin_lock_irq()/spin_lock_irqsave() */
91acb21f 502static inline void ubd_finish(struct request *req, int error)
1da177e4 503{
62f96cb0
JD
504 struct ubd *dev = req->rq_disk->private_data;
505
506 spin_lock(&dev->lock);
91acb21f 507 __ubd_finish(req, error);
62f96cb0 508 spin_unlock(&dev->lock);
1da177e4
LT
509}
510
2fe30a34 511/* XXX - move this inside ubd_intr. */
62f96cb0 512/* Called without dev->lock held, and only in interrupt context. */
91acb21f 513static void ubd_handler(void)
1da177e4 514{
91acb21f 515 struct io_thread_req req;
62f96cb0
JD
516 struct request *rq;
517 struct ubd *dev;
91acb21f
JD
518 int n;
519
2fe30a34 520 do_ubd = 0;
91acb21f
JD
521 intr_count++;
522 n = os_read_file(thread_fd, &req, sizeof(req));
523 if(n != sizeof(req)){
524 printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
525 "err = %d\n", os_getpid(), -n);
91acb21f
JD
526 return;
527 }
6c29256c 528
62f96cb0
JD
529 rq = req.req;
530 dev = rq->rq_disk->private_data;
531
91acb21f 532 ubd_finish(rq, req.error);
62f96cb0
JD
533 reactivate_fd(thread_fd, UBD_IRQ);
534 spin_lock(&dev->lock);
535 do_ubd_request(dev->queue);
536 spin_unlock(&dev->lock);
1da177e4
LT
537}
538
7bea96fd 539static irqreturn_t ubd_intr(int irq, void *dev)
1da177e4 540{
91acb21f
JD
541 ubd_handler();
542 return(IRQ_HANDLED);
543}
09ace81c 544
91acb21f
JD
545/* Only changed by ubd_init, which is an initcall. */
546static int io_pid = -1;
09ace81c 547
91acb21f
JD
548void kill_io_thread(void)
549{
6c29256c 550 if(io_pid != -1)
91acb21f 551 os_kill_process(io_pid, 1);
09ace81c 552}
1da177e4 553
91acb21f
JD
554__uml_exitcall(kill_io_thread);
555
d8d7c28e 556static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
1da177e4
LT
557{
558 char *file;
559
7d314e34 560 file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file;
1da177e4
LT
561 return(os_file_size(file, size_out));
562}
563
5f75a4f8 564static void ubd_close_dev(struct ubd *ubd_dev)
1da177e4 565{
7d314e34
PBG
566 os_close_file(ubd_dev->fd);
567 if(ubd_dev->cow.file == NULL)
1da177e4
LT
568 return;
569
7d314e34
PBG
570 os_close_file(ubd_dev->cow.fd);
571 vfree(ubd_dev->cow.bitmap);
572 ubd_dev->cow.bitmap = NULL;
1da177e4
LT
573}
574
7d314e34 575static int ubd_open_dev(struct ubd *ubd_dev)
1da177e4
LT
576{
577 struct openflags flags;
578 char **back_ptr;
579 int err, create_cow, *create_ptr;
0bf16bff 580 int fd;
1da177e4 581
7d314e34 582 ubd_dev->openflags = ubd_dev->boot_openflags;
1da177e4 583 create_cow = 0;
7d314e34
PBG
584 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
585 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
0bf16bff
PBG
586
587 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
7d314e34
PBG
588 back_ptr, &ubd_dev->cow.bitmap_offset,
589 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
6c29256c 590 create_ptr);
1da177e4 591
0bf16bff
PBG
592 if((fd == -ENOENT) && create_cow){
593 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
7d314e34
PBG
594 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
595 &ubd_dev->cow.bitmap_offset,
596 &ubd_dev->cow.bitmap_len,
597 &ubd_dev->cow.data_offset);
0bf16bff 598 if(fd >= 0){
1da177e4 599 printk(KERN_INFO "Creating \"%s\" as COW file for "
7d314e34 600 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
1da177e4
LT
601 }
602 }
603
0bf16bff 604 if(fd < 0){
7d314e34 605 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
0bf16bff
PBG
606 -fd);
607 return fd;
1da177e4 608 }
0bf16bff 609 ubd_dev->fd = fd;
1da177e4 610
7d314e34 611 if(ubd_dev->cow.file != NULL){
1da177e4 612 err = -ENOMEM;
7d314e34
PBG
613 ubd_dev->cow.bitmap = (void *) vmalloc(ubd_dev->cow.bitmap_len);
614 if(ubd_dev->cow.bitmap == NULL){
1da177e4
LT
615 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
616 goto error;
617 }
618 flush_tlb_kernel_vm();
619
7d314e34
PBG
620 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
621 ubd_dev->cow.bitmap_offset,
622 ubd_dev->cow.bitmap_len);
1da177e4
LT
623 if(err < 0)
624 goto error;
625
7d314e34 626 flags = ubd_dev->openflags;
1da177e4 627 flags.w = 0;
7d314e34 628 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
6c29256c 629 NULL, NULL, NULL, NULL);
1da177e4 630 if(err < 0) goto error;
7d314e34 631 ubd_dev->cow.fd = err;
1da177e4
LT
632 }
633 return(0);
634 error:
7d314e34 635 os_close_file(ubd_dev->fd);
1da177e4
LT
636 return(err);
637}
638
5f75a4f8 639static int ubd_disk_register(int major, u64 size, int unit,
1da177e4
LT
640 struct gendisk **disk_out)
641
642{
643 struct gendisk *disk;
1da177e4
LT
644
645 disk = alloc_disk(1 << UBD_SHIFT);
646 if(disk == NULL)
647 return(-ENOMEM);
648
649 disk->major = major;
650 disk->first_minor = unit << UBD_SHIFT;
651 disk->fops = &ubd_blops;
652 set_capacity(disk, size / 512);
ce7b0f46 653 if(major == MAJOR_NR)
1da177e4 654 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
ce7b0f46 655 else
1da177e4 656 sprintf(disk->disk_name, "ubd_fake%d", unit);
1da177e4
LT
657
658 /* sysfs register (not for ide fake devices) */
659 if (major == MAJOR_NR) {
7d314e34
PBG
660 ubd_devs[unit].pdev.id = unit;
661 ubd_devs[unit].pdev.name = DRIVER_NAME;
662 platform_device_register(&ubd_devs[unit].pdev);
663 disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
1da177e4
LT
664 }
665
7d314e34 666 disk->private_data = &ubd_devs[unit];
62f96cb0 667 disk->queue = ubd_devs[unit].queue;
1da177e4
LT
668 add_disk(disk);
669
670 *disk_out = disk;
671 return 0;
672}
673
674#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
675
f28169d2 676static int ubd_add(int n, char **error_out)
1da177e4 677{
7d314e34 678 struct ubd *ubd_dev = &ubd_devs[n];
f28169d2 679 int err = 0;
1da177e4 680
7d314e34 681 if(ubd_dev->file == NULL)
ec7cf783 682 goto out;
1da177e4 683
7d314e34 684 err = ubd_file_size(ubd_dev, &ubd_dev->size);
f28169d2
JD
685 if(err < 0){
686 *error_out = "Couldn't determine size of device's file";
80c13749 687 goto out;
f28169d2 688 }
1da177e4 689
7d314e34 690 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
1da177e4 691
62f96cb0
JD
692 err = -ENOMEM;
693 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
694 if (ubd_dev->queue == NULL) {
695 *error_out = "Failed to initialize device queue";
80c13749 696 goto out;
62f96cb0
JD
697 }
698 ubd_dev->queue->queuedata = ubd_dev;
699
700 err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]);
701 if(err){
702 *error_out = "Failed to register device";
703 goto out_cleanup;
704 }
6c29256c 705
1da177e4 706 if(fake_major != MAJOR_NR)
5f75a4f8 707 ubd_disk_register(fake_major, ubd_dev->size, n,
62f96cb0 708 &fake_gendisk[n]);
1da177e4
LT
709
710 /* perhaps this should also be under the "if (fake_major)" above */
711 /* using the fake_disk->disk_name and also the fakehd_set name */
712 if (fake_ide)
713 make_ide_entries(ubd_gendisk[n]->disk_name);
714
ec7cf783 715 err = 0;
ec7cf783
JD
716out:
717 return err;
62f96cb0
JD
718
719out_cleanup:
720 blk_cleanup_queue(ubd_dev->queue);
721 goto out;
1da177e4
LT
722}
723
f28169d2 724static int ubd_config(char *str, char **error_out)
1da177e4 725{
e7f6552f 726 int n, ret;
1da177e4 727
f28169d2
JD
728 /* This string is possibly broken up and stored, so it's only
729 * freed if ubd_setup_common fails, or if only general options
730 * were set.
731 */
970d6e3a 732 str = kstrdup(str, GFP_KERNEL);
e7f6552f 733 if (str == NULL) {
f28169d2
JD
734 *error_out = "Failed to allocate memory";
735 return -ENOMEM;
1da177e4 736 }
f28169d2
JD
737
738 ret = ubd_setup_common(str, &n, error_out);
739 if (ret)
e7f6552f 740 goto err_free;
f28169d2 741
e7f6552f
PBG
742 if (n == -1) {
743 ret = 0;
d8d7c28e 744 goto err_free;
1da177e4 745 }
1da177e4 746
d7fb2c38 747 mutex_lock(&ubd_lock);
f28169d2 748 ret = ubd_add(n, error_out);
e7f6552f 749 if (ret)
7d314e34 750 ubd_devs[n].file = NULL;
d7fb2c38 751 mutex_unlock(&ubd_lock);
1da177e4 752
e7f6552f
PBG
753out:
754 return ret;
755
756err_free:
757 kfree(str);
758 goto out;
1da177e4
LT
759}
760
761static int ubd_get_config(char *name, char *str, int size, char **error_out)
762{
7d314e34 763 struct ubd *ubd_dev;
1da177e4
LT
764 int n, len = 0;
765
766 n = parse_unit(&name);
767 if((n >= MAX_DEV) || (n < 0)){
768 *error_out = "ubd_get_config : device number out of range";
769 return(-1);
770 }
771
7d314e34 772 ubd_dev = &ubd_devs[n];
d7fb2c38 773 mutex_lock(&ubd_lock);
1da177e4 774
7d314e34 775 if(ubd_dev->file == NULL){
1da177e4
LT
776 CONFIG_CHUNK(str, size, len, "", 1);
777 goto out;
778 }
779
7d314e34 780 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1da177e4 781
7d314e34 782 if(ubd_dev->cow.file != NULL){
1da177e4 783 CONFIG_CHUNK(str, size, len, ",", 0);
7d314e34 784 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1da177e4
LT
785 }
786 else CONFIG_CHUNK(str, size, len, "", 1);
787
788 out:
d7fb2c38 789 mutex_unlock(&ubd_lock);
1da177e4
LT
790 return(len);
791}
792
29d56cfe
JD
793static int ubd_id(char **str, int *start_out, int *end_out)
794{
795 int n;
796
797 n = parse_unit(str);
798 *start_out = 0;
799 *end_out = MAX_DEV - 1;
800 return n;
801}
802
f28169d2 803static int ubd_remove(int n, char **error_out)
1da177e4 804{
7d314e34 805 struct ubd *ubd_dev;
29d56cfe 806 int err = -ENODEV;
1da177e4 807
d7fb2c38 808 mutex_lock(&ubd_lock);
1da177e4 809
29d56cfe
JD
810 if(ubd_gendisk[n] == NULL)
811 goto out;
1da177e4 812
7d314e34 813 ubd_dev = &ubd_devs[n];
1da177e4 814
7d314e34 815 if(ubd_dev->file == NULL)
29d56cfe 816 goto out;
1da177e4 817
29d56cfe
JD
818 /* you cannot remove a open disk */
819 err = -EBUSY;
7d314e34 820 if(ubd_dev->count > 0)
1da177e4
LT
821 goto out;
822
823 del_gendisk(ubd_gendisk[n]);
824 put_disk(ubd_gendisk[n]);
825 ubd_gendisk[n] = NULL;
826
827 if(fake_gendisk[n] != NULL){
828 del_gendisk(fake_gendisk[n]);
829 put_disk(fake_gendisk[n]);
830 fake_gendisk[n] = NULL;
831 }
832
62f96cb0 833 blk_cleanup_queue(ubd_dev->queue);
7d314e34
PBG
834 platform_device_unregister(&ubd_dev->pdev);
835 *ubd_dev = ((struct ubd) DEFAULT_UBD);
1da177e4 836 err = 0;
29d56cfe 837out:
d7fb2c38 838 mutex_unlock(&ubd_lock);
29d56cfe 839 return err;
1da177e4
LT
840}
841
f28169d2
JD
842/* All these are called by mconsole in process context and without
843 * ubd-specific locks.
844 */
1da177e4 845static struct mc_device ubd_mc = {
84f48d4f 846 .list = LIST_HEAD_INIT(ubd_mc.list),
1da177e4
LT
847 .name = "ubd",
848 .config = ubd_config,
849 .get_config = ubd_get_config,
29d56cfe 850 .id = ubd_id,
1da177e4
LT
851 .remove = ubd_remove,
852};
853
d8d7c28e 854static int __init ubd_mc_init(void)
1da177e4
LT
855{
856 mconsole_register_dev(&ubd_mc);
857 return 0;
858}
859
860__initcall(ubd_mc_init);
861
d8d7c28e
PBG
862static int __init ubd0_init(void)
863{
864 struct ubd *ubd_dev = &ubd_devs[0];
865
866 if(ubd_dev->file == NULL)
867 ubd_dev->file = "root_fs";
868 return(0);
869}
870
871__initcall(ubd0_init);
872
3ae5eaec
RK
873static struct platform_driver ubd_driver = {
874 .driver = {
875 .name = DRIVER_NAME,
876 },
1da177e4
LT
877};
878
d8d7c28e 879static int __init ubd_init(void)
1da177e4 880{
f28169d2
JD
881 char *error;
882 int i, err;
1da177e4 883
1da177e4
LT
884 if (register_blkdev(MAJOR_NR, "ubd"))
885 return -1;
886
1da177e4
LT
887 if (fake_major != MAJOR_NR) {
888 char name[sizeof("ubd_nnn\0")];
889
890 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1da177e4
LT
891 if (register_blkdev(fake_major, "ubd"))
892 return -1;
893 }
3ae5eaec 894 platform_driver_register(&ubd_driver);
f28169d2
JD
895 for (i = 0; i < MAX_DEV; i++){
896 err = ubd_add(i, &error);
897 if(err)
898 printk(KERN_ERR "Failed to initialize ubd device %d :"
899 "%s\n", i, error);
900 }
1da177e4
LT
901 return 0;
902}
903
904late_initcall(ubd_init);
905
d8d7c28e 906static int __init ubd_driver_init(void){
91acb21f
JD
907 unsigned long stack;
908 int err;
909
910 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
911 if(global_openflags.s){
912 printk(KERN_INFO "ubd: Synchronous mode\n");
913 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
914 * enough. So use anyway the io thread. */
915 }
916 stack = alloc_stack(0, 0);
6c29256c 917 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
91acb21f
JD
918 &thread_fd);
919 if(io_pid < 0){
6c29256c 920 printk(KERN_ERR
91acb21f
JD
921 "ubd : Failed to start I/O thread (errno = %d) - "
922 "falling back to synchronous I/O\n", -io_pid);
923 io_pid = -1;
924 return(0);
925 }
6c29256c 926 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
7d314e34 927 IRQF_DISABLED, "ubd", ubd_devs);
91acb21f
JD
928 if(err != 0)
929 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
f4c57a78 930 return 0;
91acb21f
JD
931}
932
933device_initcall(ubd_driver_init);
934
1da177e4
LT
935static int ubd_open(struct inode *inode, struct file *filp)
936{
937 struct gendisk *disk = inode->i_bdev->bd_disk;
7d314e34 938 struct ubd *ubd_dev = disk->private_data;
1da177e4
LT
939 int err = 0;
940
7d314e34
PBG
941 if(ubd_dev->count == 0){
942 err = ubd_open_dev(ubd_dev);
1da177e4
LT
943 if(err){
944 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
7d314e34 945 disk->disk_name, ubd_dev->file, -err);
1da177e4
LT
946 goto out;
947 }
948 }
7d314e34
PBG
949 ubd_dev->count++;
950 set_disk_ro(disk, !ubd_dev->openflags.w);
2c49be99
PBG
951
952 /* This should no more be needed. And it didn't work anyway to exclude
953 * read-write remounting of filesystems.*/
7d314e34 954 /*if((filp->f_mode & FMODE_WRITE) && !ubd_dev->openflags.w){
5f75a4f8 955 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1da177e4 956 err = -EROFS;
2c49be99 957 }*/
1da177e4
LT
958 out:
959 return(err);
960}
961
962static int ubd_release(struct inode * inode, struct file * file)
963{
964 struct gendisk *disk = inode->i_bdev->bd_disk;
7d314e34 965 struct ubd *ubd_dev = disk->private_data;
1da177e4 966
7d314e34 967 if(--ubd_dev->count == 0)
5f75a4f8 968 ubd_close_dev(ubd_dev);
1da177e4
LT
969 return(0);
970}
971
91acb21f
JD
972static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
973 __u64 *cow_offset, unsigned long *bitmap,
974 __u64 bitmap_offset, unsigned long *bitmap_words,
975 __u64 bitmap_len)
1da177e4 976{
91acb21f
JD
977 __u64 sector = io_offset >> 9;
978 int i, update_bitmap = 0;
979
980 for(i = 0; i < length >> 9; i++){
981 if(cow_mask != NULL)
982 ubd_set_bit(i, (unsigned char *) cow_mask);
983 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
984 continue;
1da177e4 985
91acb21f
JD
986 update_bitmap = 1;
987 ubd_set_bit(sector + i, (unsigned char *) bitmap);
988 }
989
990 if(!update_bitmap)
991 return;
1da177e4 992
91acb21f 993 *cow_offset = sector / (sizeof(unsigned long) * 8);
1da177e4 994
91acb21f
JD
995 /* This takes care of the case where we're exactly at the end of the
996 * device, and *cow_offset + 1 is off the end. So, just back it up
997 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
998 * for the original diagnosis.
999 */
1000 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
1001 sizeof(unsigned long) - 1))
1002 (*cow_offset)--;
1003
1004 bitmap_words[0] = bitmap[*cow_offset];
1005 bitmap_words[1] = bitmap[*cow_offset + 1];
1006
1007 *cow_offset *= sizeof(unsigned long);
1008 *cow_offset += bitmap_offset;
1009}
1010
1011static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1012 __u64 bitmap_offset, __u64 bitmap_len)
1013{
1014 __u64 sector = req->offset >> 9;
1015 int i;
1016
1017 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1018 panic("Operation too long");
1019
1020 if(req->op == UBD_READ) {
1021 for(i = 0; i < req->length >> 9; i++){
1022 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
6c29256c 1023 ubd_set_bit(i, (unsigned char *)
91acb21f
JD
1024 &req->sector_mask);
1025 }
1026 }
1027 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1028 &req->cow_offset, bitmap, bitmap_offset,
1029 req->bitmap_words, bitmap_len);
1da177e4
LT
1030}
1031
62f96cb0 1032/* Called with dev->lock held */
91acb21f 1033static int prepare_request(struct request *req, struct io_thread_req *io_req)
1da177e4
LT
1034{
1035 struct gendisk *disk = req->rq_disk;
7d314e34 1036 struct ubd *ubd_dev = disk->private_data;
91acb21f
JD
1037 __u64 offset;
1038 int len;
1039
2c49be99 1040 /* This should be impossible now */
7d314e34 1041 if((rq_data_dir(req) == WRITE) && !ubd_dev->openflags.w){
6c29256c 1042 printk("Write attempted on readonly ubd device %s\n",
1da177e4 1043 disk->disk_name);
91acb21f 1044 end_request(req, 0);
1da177e4
LT
1045 return(1);
1046 }
1047
91acb21f
JD
1048 offset = ((__u64) req->sector) << 9;
1049 len = req->current_nr_sectors << 9;
1050
62f96cb0 1051 io_req->req = req;
7d314e34
PBG
1052 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : ubd_dev->fd;
1053 io_req->fds[1] = ubd_dev->fd;
91acb21f 1054 io_req->cow_offset = -1;
1da177e4
LT
1055 io_req->offset = offset;
1056 io_req->length = len;
1057 io_req->error = 0;
91acb21f
JD
1058 io_req->sector_mask = 0;
1059
1060 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1da177e4 1061 io_req->offsets[0] = 0;
7d314e34 1062 io_req->offsets[1] = ubd_dev->cow.data_offset;
91acb21f 1063 io_req->buffer = req->buffer;
1da177e4
LT
1064 io_req->sectorsize = 1 << 9;
1065
7d314e34
PBG
1066 if(ubd_dev->cow.file != NULL)
1067 cowify_req(io_req, ubd_dev->cow.bitmap, ubd_dev->cow.bitmap_offset,
1068 ubd_dev->cow.bitmap_len);
91acb21f 1069
1da177e4
LT
1070 return(0);
1071}
1072
62f96cb0 1073/* Called with dev->lock held */
1da177e4
LT
1074static void do_ubd_request(request_queue_t *q)
1075{
1076 struct io_thread_req io_req;
1077 struct request *req;
91acb21f
JD
1078 int err, n;
1079
1080 if(thread_fd == -1){
1081 while((req = elv_next_request(q)) != NULL){
1082 err = prepare_request(req, &io_req);
1083 if(!err){
1084 do_io(&io_req);
1085 __ubd_finish(req, io_req.error);
1086 }
1087 }
1088 }
1089 else {
1090 if(do_ubd || (req = elv_next_request(q)) == NULL)
1091 return;
1092 err = prepare_request(req, &io_req);
1093 if(!err){
2fe30a34 1094 do_ubd = 1;
91acb21f
JD
1095 n = os_write_file(thread_fd, (char *) &io_req,
1096 sizeof(io_req));
1097 if(n != sizeof(io_req))
1098 printk("write to io thread failed, "
1099 "errno = %d\n", -n);
1da177e4
LT
1100 }
1101 }
1102}
1103
a885c8c4
CH
1104static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1105{
7d314e34 1106 struct ubd *ubd_dev = bdev->bd_disk->private_data;
a885c8c4
CH
1107
1108 geo->heads = 128;
1109 geo->sectors = 32;
7d314e34 1110 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
a885c8c4
CH
1111 return 0;
1112}
1113
1da177e4
LT
1114static int ubd_ioctl(struct inode * inode, struct file * file,
1115 unsigned int cmd, unsigned long arg)
1116{
7d314e34 1117 struct ubd *ubd_dev = inode->i_bdev->bd_disk->private_data;
1da177e4
LT
1118 struct hd_driveid ubd_id = {
1119 .cyls = 0,
1120 .heads = 128,
1121 .sectors = 32,
1122 };
1123
1124 switch (cmd) {
1da177e4 1125 struct cdrom_volctrl volume;
1da177e4 1126 case HDIO_GET_IDENTITY:
7d314e34 1127 ubd_id.cyls = ubd_dev->size / (128 * 32 * 512);
1da177e4
LT
1128 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1129 sizeof(ubd_id)))
1130 return(-EFAULT);
1131 return(0);
1132
1133 case CDROMVOLREAD:
1134 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1135 return(-EFAULT);
1136 volume.channel0 = 255;
1137 volume.channel1 = 255;
1138 volume.channel2 = 255;
1139 volume.channel3 = 255;
1140 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1141 return(-EFAULT);
1142 return(0);
1143 }
1144 return(-EINVAL);
1145}
1146
4833aff7 1147static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
1da177e4
LT
1148{
1149 struct uml_stat buf1, buf2;
1150 int err;
1151
4833aff7
PBG
1152 if(from_cmdline == NULL)
1153 return 0;
1154 if(!strcmp(from_cmdline, from_cow))
1155 return 0;
1da177e4
LT
1156
1157 err = os_stat_file(from_cmdline, &buf1);
1158 if(err < 0){
1159 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
4833aff7 1160 return 0;
1da177e4
LT
1161 }
1162 err = os_stat_file(from_cow, &buf2);
1163 if(err < 0){
1164 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
4833aff7 1165 return 1;
1da177e4
LT
1166 }
1167 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
4833aff7 1168 return 0;
1da177e4
LT
1169
1170 printk("Backing file mismatch - \"%s\" requested,\n"
1171 "\"%s\" specified in COW header of \"%s\"\n",
1172 from_cmdline, from_cow, cow);
4833aff7 1173 return 1;
1da177e4
LT
1174}
1175
1176static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1177{
1178 unsigned long modtime;
fe1db50c 1179 unsigned long long actual;
1da177e4
LT
1180 int err;
1181
1182 err = os_file_modtime(file, &modtime);
1183 if(err < 0){
1184 printk("Failed to get modification time of backing file "
1185 "\"%s\", err = %d\n", file, -err);
1186 return(err);
1187 }
1188
1189 err = os_file_size(file, &actual);
1190 if(err < 0){
1191 printk("Failed to get size of backing file \"%s\", "
1192 "err = %d\n", file, -err);
1193 return(err);
1194 }
1195
1196 if(actual != size){
1197 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
1198 * the typecast.*/
1199 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1200 "file\n", (unsigned long long) size, actual);
1201 return(-EINVAL);
1202 }
1203 if(modtime != mtime){
1204 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1205 "file\n", mtime, modtime);
1206 return(-EINVAL);
1207 }
1208 return(0);
1209}
1210
1211int read_cow_bitmap(int fd, void *buf, int offset, int len)
1212{
1213 int err;
1214
1215 err = os_seek_file(fd, offset);
1216 if(err < 0)
1217 return(err);
1218
1219 err = os_read_file(fd, buf, len);
1220 if(err < 0)
1221 return(err);
1222
1223 return(0);
1224}
1225
6c29256c 1226int open_ubd_file(char *file, struct openflags *openflags, int shared,
1da177e4
LT
1227 char **backing_file_out, int *bitmap_offset_out,
1228 unsigned long *bitmap_len_out, int *data_offset_out,
1229 int *create_cow_out)
1230{
1231 time_t mtime;
1232 unsigned long long size;
1233 __u32 version, align;
1234 char *backing_file;
4833aff7 1235 int fd, err, sectorsize, asked_switch, mode = 0644;
1da177e4
LT
1236
1237 fd = os_open_file(file, *openflags, mode);
a374a48f
PBG
1238 if (fd < 0) {
1239 if ((fd == -ENOENT) && (create_cow_out != NULL))
1da177e4 1240 *create_cow_out = 1;
a374a48f
PBG
1241 if (!openflags->w ||
1242 ((fd != -EROFS) && (fd != -EACCES)))
1243 return fd;
1da177e4
LT
1244 openflags->w = 0;
1245 fd = os_open_file(file, *openflags, mode);
a374a48f
PBG
1246 if (fd < 0)
1247 return fd;
1da177e4
LT
1248 }
1249
6c29256c
JD
1250 if(shared)
1251 printk("Not locking \"%s\" on the host\n", file);
1252 else {
1253 err = os_lock_file(fd, openflags->w);
1254 if(err < 0){
1255 printk("Failed to lock '%s', err = %d\n", file, -err);
1256 goto out_close;
1257 }
1da177e4
LT
1258 }
1259
d6e05edc 1260 /* Successful return case! */
a374a48f
PBG
1261 if(backing_file_out == NULL)
1262 return(fd);
1da177e4
LT
1263
1264 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1265 &size, &sectorsize, &align, bitmap_offset_out);
1266 if(err && (*backing_file_out != NULL)){
1267 printk("Failed to read COW header from COW file \"%s\", "
1268 "errno = %d\n", file, -err);
1269 goto out_close;
1270 }
a374a48f
PBG
1271 if(err)
1272 return(fd);
1da177e4 1273
4833aff7 1274 asked_switch = path_requires_switch(*backing_file_out, backing_file, file);
1da177e4 1275
4833aff7
PBG
1276 /* Allow switching only if no mismatch. */
1277 if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) {
1da177e4
LT
1278 printk("Switching backing file to '%s'\n", *backing_file_out);
1279 err = write_cow_header(file, fd, *backing_file_out,
1280 sectorsize, align, &size);
a374a48f 1281 if (err) {
1da177e4 1282 printk("Switch failed, errno = %d\n", -err);
4833aff7 1283 goto out_close;
1da177e4 1284 }
a374a48f 1285 } else {
1da177e4
LT
1286 *backing_file_out = backing_file;
1287 err = backing_file_mismatch(*backing_file_out, size, mtime);
a374a48f
PBG
1288 if (err)
1289 goto out_close;
1da177e4
LT
1290 }
1291
1292 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1293 bitmap_len_out, data_offset_out);
1294
a374a48f 1295 return fd;
1da177e4
LT
1296 out_close:
1297 os_close_file(fd);
a374a48f 1298 return err;
1da177e4
LT
1299}
1300
1301int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1302 int sectorsize, int alignment, int *bitmap_offset_out,
1303 unsigned long *bitmap_len_out, int *data_offset_out)
1304{
1305 int err, fd;
1306
1307 flags.c = 1;
6c29256c 1308 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
1da177e4
LT
1309 if(fd < 0){
1310 err = fd;
1311 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1312 -err);
1313 goto out;
1314 }
1315
1316 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1317 bitmap_offset_out, bitmap_len_out,
1318 data_offset_out);
1319 if(!err)
1320 return(fd);
1321 os_close_file(fd);
1322 out:
1323 return(err);
1324}
1325
91acb21f 1326static int update_bitmap(struct io_thread_req *req)
1da177e4 1327{
91acb21f 1328 int n;
1da177e4 1329
91acb21f
JD
1330 if(req->cow_offset == -1)
1331 return(0);
1da177e4 1332
91acb21f
JD
1333 n = os_seek_file(req->fds[1], req->cow_offset);
1334 if(n < 0){
1335 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1336 return(1);
1337 }
1da177e4 1338
91acb21f
JD
1339 n = os_write_file(req->fds[1], &req->bitmap_words,
1340 sizeof(req->bitmap_words));
1341 if(n != sizeof(req->bitmap_words)){
1342 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1343 req->fds[1]);
1344 return(1);
1345 }
1da177e4 1346
91acb21f
JD
1347 return(0);
1348}
1da177e4 1349
91acb21f
JD
1350void do_io(struct io_thread_req *req)
1351{
1352 char *buf;
1353 unsigned long len;
1354 int n, nsectors, start, end, bit;
1355 int err;
1356 __u64 off;
1357
1358 nsectors = req->length / req->sectorsize;
1359 start = 0;
1360 do {
1361 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1362 end = start;
1363 while((end < nsectors) &&
1364 (ubd_test_bit(end, (unsigned char *)
1365 &req->sector_mask) == bit))
1366 end++;
1367
1368 off = req->offset + req->offsets[bit] +
1369 start * req->sectorsize;
1370 len = (end - start) * req->sectorsize;
1371 buf = &req->buffer[start * req->sectorsize];
1372
1373 err = os_seek_file(req->fds[bit], off);
1374 if(err < 0){
1375 printk("do_io - lseek failed : err = %d\n", -err);
1376 req->error = 1;
1377 return;
1378 }
1379 if(req->op == UBD_READ){
1380 n = 0;
1381 do {
1382 buf = &buf[n];
1383 len -= n;
1384 n = os_read_file(req->fds[bit], buf, len);
1385 if (n < 0) {
1386 printk("do_io - read failed, err = %d "
1387 "fd = %d\n", -n, req->fds[bit]);
1388 req->error = 1;
1389 return;
1390 }
1391 } while((n < len) && (n != 0));
1392 if (n < len) memset(&buf[n], 0, len - n);
1393 } else {
1394 n = os_write_file(req->fds[bit], buf, len);
1395 if(n != len){
1396 printk("do_io - write failed err = %d "
1397 "fd = %d\n", -n, req->fds[bit]);
1398 req->error = 1;
1399 return;
1400 }
1401 }
1402
1403 start = end;
1404 } while(start < nsectors);
1da177e4 1405
91acb21f 1406 req->error = update_bitmap(req);
1da177e4 1407}
91acb21f
JD
1408
1409/* Changed in start_io_thread, which is serialized by being called only
1410 * from ubd_init, which is an initcall.
1411 */
1412int kernel_fd = -1;
1413
d8d7c28e
PBG
1414/* Only changed by the io thread. XXX: currently unused. */
1415static int io_count = 0;
91acb21f
JD
1416
1417int io_thread(void *arg)
1418{
1419 struct io_thread_req req;
1420 int n;
1421
1422 ignore_sigwinch_sig();
1423 while(1){
1424 n = os_read_file(kernel_fd, &req, sizeof(req));
1425 if(n != sizeof(req)){
1426 if(n < 0)
1427 printk("io_thread - read failed, fd = %d, "
1428 "err = %d\n", kernel_fd, -n);
1429 else {
1430 printk("io_thread - short read, fd = %d, "
1431 "length = %d\n", kernel_fd, n);
1432 }
1433 continue;
1434 }
1435 io_count++;
1436 do_io(&req);
1437 n = os_write_file(kernel_fd, &req, sizeof(req));
1438 if(n != sizeof(req))
1439 printk("io_thread - write failed, fd = %d, err = %d\n",
1440 kernel_fd, -n);
1441 }
91acb21f 1442
1b57e9c2
JD
1443 return 0;
1444}