]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - arch/um/drivers/ubd_kern.c
arch/um/os-Linux/sys-i386/task_size.c: improve a bit
[mirror_ubuntu-hirsute-kernel.git] / arch / um / drivers / ubd_kern.c
CommitLineData
6c29256c 1/*
1da177e4
LT
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
e16f5350 23#include "linux/kernel.h"
1da177e4
LT
24#include "linux/module.h"
25#include "linux/blkdev.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
1da177e4
LT
28#include "linux/cdrom.h"
29#include "linux/proc_fs.h"
30#include "linux/ctype.h"
31#include "linux/capability.h"
32#include "linux/mm.h"
33#include "linux/vmalloc.h"
34#include "linux/blkpg.h"
35#include "linux/genhd.h"
36#include "linux/spinlock.h"
d052d1be 37#include "linux/platform_device.h"
23464ffa 38#include "linux/scatterlist.h"
1da177e4
LT
39#include "asm/segment.h"
40#include "asm/uaccess.h"
41#include "asm/irq.h"
42#include "asm/types.h"
43#include "asm/tlbflush.h"
1da177e4
LT
44#include "mem_user.h"
45#include "kern_util.h"
46#include "kern.h"
47#include "mconsole_kern.h"
48#include "init.h"
49#include "irq_user.h"
50#include "irq_kern.h"
51#include "ubd_user.h"
edea1385 52#include "kern_util.h"
1da177e4
LT
53#include "os.h"
54#include "mem.h"
55#include "mem_kern.h"
56#include "cow.h"
57
7b9014c1 58enum ubd_req { UBD_READ, UBD_WRITE };
1da177e4
LT
59
60struct io_thread_req {
62f96cb0 61 struct request *req;
91acb21f 62 enum ubd_req op;
1da177e4
LT
63 int fds[2];
64 unsigned long offsets[2];
65 unsigned long long offset;
66 unsigned long length;
67 char *buffer;
68 int sectorsize;
91acb21f
JD
69 unsigned long sector_mask;
70 unsigned long long cow_offset;
71 unsigned long bitmap_words[2];
1da177e4
LT
72 int error;
73};
74
6c29256c 75extern int open_ubd_file(char *file, struct openflags *openflags, int shared,
1da177e4
LT
76 char **backing_file_out, int *bitmap_offset_out,
77 unsigned long *bitmap_len_out, int *data_offset_out,
78 int *create_cow_out);
79extern int create_cow_file(char *cow_file, char *backing_file,
80 struct openflags flags, int sectorsize,
81 int alignment, int *bitmap_offset_out,
82 unsigned long *bitmap_len_out,
83 int *data_offset_out);
84extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
91acb21f 85extern void do_io(struct io_thread_req *req);
1da177e4 86
91acb21f 87static inline int ubd_test_bit(__u64 bit, unsigned char *data)
1da177e4
LT
88{
89 __u64 n;
90 int bits, off;
91
91acb21f 92 bits = sizeof(data[0]) * 8;
1da177e4
LT
93 n = bit / bits;
94 off = bit % bits;
dc764e50 95 return (data[n] & (1 << off)) != 0;
1da177e4
LT
96}
97
91acb21f 98static inline void ubd_set_bit(__u64 bit, unsigned char *data)
1da177e4
LT
99{
100 __u64 n;
101 int bits, off;
102
91acb21f 103 bits = sizeof(data[0]) * 8;
1da177e4
LT
104 n = bit / bits;
105 off = bit % bits;
91acb21f 106 data[n] |= (1 << off);
1da177e4
LT
107}
108/*End stuff from ubd_user.h*/
109
110#define DRIVER_NAME "uml-blkdev"
111
d7fb2c38 112static DEFINE_MUTEX(ubd_lock);
1da177e4 113
1da177e4
LT
114static int ubd_open(struct inode * inode, struct file * filp);
115static int ubd_release(struct inode * inode, struct file * file);
116static int ubd_ioctl(struct inode * inode, struct file * file,
117 unsigned int cmd, unsigned long arg);
a885c8c4 118static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
1da177e4 119
97d88ac8 120#define MAX_DEV (16)
1da177e4 121
1da177e4
LT
122static struct block_device_operations ubd_blops = {
123 .owner = THIS_MODULE,
124 .open = ubd_open,
125 .release = ubd_release,
126 .ioctl = ubd_ioctl,
a885c8c4 127 .getgeo = ubd_getgeo,
1da177e4
LT
128};
129
1da177e4
LT
130/* Protected by ubd_lock */
131static int fake_major = MAJOR_NR;
1da177e4
LT
132static struct gendisk *ubd_gendisk[MAX_DEV];
133static struct gendisk *fake_gendisk[MAX_DEV];
6c29256c 134
1da177e4
LT
135#ifdef CONFIG_BLK_DEV_UBD_SYNC
136#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
137 .cl = 1 })
138#else
139#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
140 .cl = 1 })
141#endif
1da177e4
LT
142static struct openflags global_openflags = OPEN_FLAGS;
143
144struct cow {
2a9d32f6 145 /* backing file name */
1da177e4 146 char *file;
2a9d32f6 147 /* backing file fd */
1da177e4
LT
148 int fd;
149 unsigned long *bitmap;
150 unsigned long bitmap_len;
151 int bitmap_offset;
dc764e50 152 int data_offset;
1da177e4
LT
153};
154
a0044bdf
JD
155#define MAX_SG 64
156
1da177e4 157struct ubd {
a0044bdf 158 struct list_head restart;
2a9d32f6
PBG
159 /* name (and fd, below) of the file opened for writing, either the
160 * backing or the cow file. */
1da177e4
LT
161 char *file;
162 int count;
163 int fd;
164 __u64 size;
165 struct openflags boot_openflags;
166 struct openflags openflags;
84e945e3
PBG
167 unsigned shared:1;
168 unsigned no_cow:1;
1da177e4
LT
169 struct cow cow;
170 struct platform_device pdev;
62f96cb0
JD
171 struct request_queue *queue;
172 spinlock_t lock;
a0044bdf
JD
173 struct scatterlist sg[MAX_SG];
174 struct request *request;
175 int start_sg, end_sg;
1da177e4
LT
176};
177
178#define DEFAULT_COW { \
179 .file = NULL, \
dc764e50
JD
180 .fd = -1, \
181 .bitmap = NULL, \
1da177e4 182 .bitmap_offset = 0, \
dc764e50 183 .data_offset = 0, \
1da177e4
LT
184}
185
186#define DEFAULT_UBD { \
187 .file = NULL, \
188 .count = 0, \
189 .fd = -1, \
190 .size = -1, \
191 .boot_openflags = OPEN_FLAGS, \
192 .openflags = OPEN_FLAGS, \
dc764e50 193 .no_cow = 0, \
6c29256c 194 .shared = 0, \
dc764e50 195 .cow = DEFAULT_COW, \
62f96cb0 196 .lock = SPIN_LOCK_UNLOCKED, \
a0044bdf
JD
197 .request = NULL, \
198 .start_sg = 0, \
199 .end_sg = 0, \
1da177e4
LT
200}
201
b8831a1d 202/* Protected by ubd_lock */
7d314e34 203struct ubd ubd_devs[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
1da177e4 204
1da177e4
LT
205/* Only changed by fake_ide_setup which is a setup */
206static int fake_ide = 0;
207static struct proc_dir_entry *proc_ide_root = NULL;
208static struct proc_dir_entry *proc_ide = NULL;
209
210static void make_proc_ide(void)
211{
212 proc_ide_root = proc_mkdir("ide", NULL);
213 proc_ide = proc_mkdir("ide0", proc_ide_root);
214}
215
216static int proc_ide_read_media(char *page, char **start, off_t off, int count,
217 int *eof, void *data)
218{
219 int len;
220
221 strcpy(page, "disk\n");
222 len = strlen("disk\n");
223 len -= off;
224 if (len < count){
225 *eof = 1;
226 if (len <= 0) return 0;
227 }
228 else len = count;
229 *start = page + off;
230 return len;
231}
232
c0a9290e 233static void make_ide_entries(const char *dev_name)
1da177e4
LT
234{
235 struct proc_dir_entry *dir, *ent;
236 char name[64];
237
238 if(proc_ide_root == NULL) make_proc_ide();
239
240 dir = proc_mkdir(dev_name, proc_ide);
241 if(!dir) return;
242
243 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
244 if(!ent) return;
1da177e4
LT
245 ent->data = NULL;
246 ent->read_proc = proc_ide_read_media;
247 ent->write_proc = NULL;
c0a9290e 248 snprintf(name, sizeof(name), "ide0/%s", dev_name);
1da177e4
LT
249 proc_symlink(dev_name, proc_ide_root, name);
250}
251
252static int fake_ide_setup(char *str)
253{
254 fake_ide = 1;
dc764e50 255 return 1;
1da177e4
LT
256}
257
258__setup("fake_ide", fake_ide_setup);
259
260__uml_help(fake_ide_setup,
261"fake_ide\n"
262" Create ide0 entries that map onto ubd devices.\n\n"
263);
264
265static int parse_unit(char **ptr)
266{
267 char *str = *ptr, *end;
268 int n = -1;
269
270 if(isdigit(*str)) {
271 n = simple_strtoul(str, &end, 0);
272 if(end == str)
dc764e50 273 return -1;
1da177e4
LT
274 *ptr = end;
275 }
97d88ac8 276 else if (('a' <= *str) && (*str <= 'z')) {
1da177e4
LT
277 n = *str - 'a';
278 str++;
279 *ptr = str;
280 }
dc764e50 281 return n;
1da177e4
LT
282}
283
d8d7c28e
PBG
284/* If *index_out == -1 at exit, the passed option was a general one;
285 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
286 * should not be freed on exit.
287 */
f28169d2 288static int ubd_setup_common(char *str, int *index_out, char **error_out)
1da177e4 289{
7d314e34 290 struct ubd *ubd_dev;
1da177e4
LT
291 struct openflags flags = global_openflags;
292 char *backing_file;
b8831a1d 293 int n, err = 0, i;
1da177e4
LT
294
295 if(index_out) *index_out = -1;
296 n = *str;
297 if(n == '='){
298 char *end;
299 int major;
300
301 str++;
1da177e4
LT
302 if(!strcmp(str, "sync")){
303 global_openflags = of_sync(global_openflags);
b8831a1d 304 goto out1;
1da177e4 305 }
b8831a1d
JD
306
307 err = -EINVAL;
1da177e4
LT
308 major = simple_strtoul(str, &end, 0);
309 if((*end != '\0') || (end == str)){
f28169d2 310 *error_out = "Didn't parse major number";
b8831a1d 311 goto out1;
1da177e4
LT
312 }
313
f28169d2
JD
314 mutex_lock(&ubd_lock);
315 if(fake_major != MAJOR_NR){
316 *error_out = "Can't assign a fake major twice";
317 goto out1;
318 }
6c29256c 319
f28169d2 320 fake_major = major;
1da177e4
LT
321
322 printk(KERN_INFO "Setting extra ubd major number to %d\n",
323 major);
f28169d2
JD
324 err = 0;
325 out1:
326 mutex_unlock(&ubd_lock);
327 return err;
1da177e4
LT
328 }
329
330 n = parse_unit(&str);
331 if(n < 0){
f28169d2
JD
332 *error_out = "Couldn't parse device number";
333 return -EINVAL;
1da177e4
LT
334 }
335 if(n >= MAX_DEV){
f28169d2
JD
336 *error_out = "Device number out of range";
337 return 1;
1da177e4
LT
338 }
339
f28169d2 340 err = -EBUSY;
d7fb2c38 341 mutex_lock(&ubd_lock);
1da177e4 342
7d314e34
PBG
343 ubd_dev = &ubd_devs[n];
344 if(ubd_dev->file != NULL){
f28169d2 345 *error_out = "Device is already configured";
1da177e4
LT
346 goto out;
347 }
348
349 if (index_out)
350 *index_out = n;
351
f28169d2 352 err = -EINVAL;
6c29256c 353 for (i = 0; i < sizeof("rscd="); i++) {
1da177e4
LT
354 switch (*str) {
355 case 'r':
356 flags.w = 0;
357 break;
358 case 's':
359 flags.s = 1;
360 break;
361 case 'd':
7d314e34 362 ubd_dev->no_cow = 1;
1da177e4 363 break;
6c29256c 364 case 'c':
7d314e34 365 ubd_dev->shared = 1;
6c29256c 366 break;
1da177e4
LT
367 case '=':
368 str++;
369 goto break_loop;
370 default:
f28169d2
JD
371 *error_out = "Expected '=' or flag letter "
372 "(r, s, c, or d)";
1da177e4
LT
373 goto out;
374 }
375 str++;
376 }
377
f28169d2
JD
378 if (*str == '=')
379 *error_out = "Too many flags specified";
380 else
381 *error_out = "Missing '='";
1da177e4
LT
382 goto out;
383
384break_loop:
1da177e4
LT
385 backing_file = strchr(str, ',');
386
f28169d2 387 if (backing_file == NULL)
1da177e4 388 backing_file = strchr(str, ':');
1da177e4 389
f28169d2
JD
390 if(backing_file != NULL){
391 if(ubd_dev->no_cow){
392 *error_out = "Can't specify both 'd' and a cow file";
393 goto out;
394 }
1da177e4
LT
395 else {
396 *backing_file = '\0';
397 backing_file++;
398 }
399 }
f28169d2 400 err = 0;
7d314e34
PBG
401 ubd_dev->file = str;
402 ubd_dev->cow.file = backing_file;
403 ubd_dev->boot_openflags = flags;
1da177e4 404out:
d7fb2c38 405 mutex_unlock(&ubd_lock);
f28169d2 406 return err;
1da177e4
LT
407}
408
409static int ubd_setup(char *str)
410{
f28169d2
JD
411 char *error;
412 int err;
413
414 err = ubd_setup_common(str, NULL, &error);
415 if(err)
416 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
417 "%s\n", str, error);
418 return 1;
1da177e4
LT
419}
420
421__setup("ubd", ubd_setup);
422__uml_help(ubd_setup,
423"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
424" This is used to associate a device with a file in the underlying\n"
425" filesystem. When specifying two filenames, the first one is the\n"
426" COW name and the second is the backing file name. As separator you can\n"
427" use either a ':' or a ',': the first one allows writing things like;\n"
428" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
429" while with a ',' the shell would not expand the 2nd '~'.\n"
f28169d2 430" When using only one filename, UML will detect whether to treat it like\n"
1da177e4
LT
431" a COW file or a backing file. To override this detection, add the 'd'\n"
432" flag:\n"
433" ubd0d=BackingFile\n"
434" Usually, there is a filesystem in the file, but \n"
435" that's not required. Swap devices containing swap files can be\n"
436" specified like this. Also, a file which doesn't contain a\n"
437" filesystem can have its contents read in the virtual \n"
438" machine by running 'dd' on the device. <n> must be in the range\n"
439" 0 to 7. Appending an 'r' to the number will cause that device\n"
440" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
20ede453
JD
441" an 's' will cause data to be written to disk on the host immediately.\n"
442" 'c' will cause the device to be treated as being shared between multiple\n"
443" UMLs and file locking will be turned off - this is appropriate for a\n"
444" cluster filesystem and inappropriate at almost all other times.\n\n"
1da177e4
LT
445);
446
8299ca5c 447static int udb_setup(char *str)
1da177e4
LT
448{
449 printk("udb%s specified on command line is almost certainly a ubd -> "
450 "udb TYPO\n", str);
dc764e50 451 return 1;
1da177e4
LT
452}
453
454__setup("udb", udb_setup);
455__uml_help(udb_setup,
456"udb\n"
0894e27e
JD
457" This option is here solely to catch ubd -> udb typos, which can be\n"
458" to impossible to catch visually unless you specifically look for\n"
459" them. The only result of any option starting with 'udb' is an error\n"
1da177e4
LT
460" in the boot output.\n\n"
461);
462
165125e1 463static void do_ubd_request(struct request_queue * q);
91acb21f
JD
464
465/* Only changed by ubd_init, which is an initcall. */
466int thread_fd = -1;
1da177e4 467
4898b53a 468static void ubd_end_request(struct request *req, int bytes, int error)
1da177e4 469{
4898b53a 470 blk_end_request(req, error, bytes);
1da177e4
LT
471}
472
33f775ee
PBG
473/* Callable only from interrupt context - otherwise you need to do
474 * spin_lock_irq()/spin_lock_irqsave() */
a0044bdf 475static inline void ubd_finish(struct request *req, int bytes)
1da177e4 476{
a0044bdf 477 if(bytes < 0){
4898b53a 478 ubd_end_request(req, 0, -EIO);
a0044bdf
JD
479 return;
480 }
4898b53a 481 ubd_end_request(req, bytes, 0);
1da177e4
LT
482}
483
a0044bdf
JD
484static LIST_HEAD(restart);
485
2fe30a34 486/* XXX - move this inside ubd_intr. */
62f96cb0 487/* Called without dev->lock held, and only in interrupt context. */
91acb21f 488static void ubd_handler(void)
1da177e4 489{
2adcec21 490 struct io_thread_req *req;
62f96cb0 491 struct request *rq;
a0044bdf
JD
492 struct ubd *ubd;
493 struct list_head *list, *next_ele;
494 unsigned long flags;
91acb21f
JD
495 int n;
496
a0044bdf 497 while(1){
a6ea4cce
JD
498 n = os_read_file(thread_fd, &req,
499 sizeof(struct io_thread_req *));
a0044bdf
JD
500 if(n != sizeof(req)){
501 if(n == -EAGAIN)
502 break;
503 printk(KERN_ERR "spurious interrupt in ubd_handler, "
504 "err = %d\n", -n);
505 return;
506 }
62f96cb0 507
2adcec21
JD
508 rq = req->req;
509 rq->nr_sectors -= req->length >> 9;
a0044bdf
JD
510 if(rq->nr_sectors == 0)
511 ubd_finish(rq, rq->hard_nr_sectors << 9);
2adcec21 512 kfree(req);
a0044bdf 513 }
62f96cb0 514 reactivate_fd(thread_fd, UBD_IRQ);
a0044bdf
JD
515
516 list_for_each_safe(list, next_ele, &restart){
517 ubd = container_of(list, struct ubd, restart);
518 list_del_init(&ubd->restart);
519 spin_lock_irqsave(&ubd->lock, flags);
520 do_ubd_request(ubd->queue);
521 spin_unlock_irqrestore(&ubd->lock, flags);
522 }
1da177e4
LT
523}
524
7bea96fd 525static irqreturn_t ubd_intr(int irq, void *dev)
1da177e4 526{
91acb21f 527 ubd_handler();
dc764e50 528 return IRQ_HANDLED;
91acb21f 529}
09ace81c 530
91acb21f
JD
531/* Only changed by ubd_init, which is an initcall. */
532static int io_pid = -1;
09ace81c 533
91acb21f
JD
534void kill_io_thread(void)
535{
6c29256c 536 if(io_pid != -1)
91acb21f 537 os_kill_process(io_pid, 1);
09ace81c 538}
1da177e4 539
91acb21f
JD
540__uml_exitcall(kill_io_thread);
541
d8d7c28e 542static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
1da177e4
LT
543{
544 char *file;
545
7d314e34 546 file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file;
dc764e50 547 return os_file_size(file, size_out);
1da177e4
LT
548}
549
5f75a4f8 550static void ubd_close_dev(struct ubd *ubd_dev)
1da177e4 551{
7d314e34
PBG
552 os_close_file(ubd_dev->fd);
553 if(ubd_dev->cow.file == NULL)
1da177e4
LT
554 return;
555
7d314e34
PBG
556 os_close_file(ubd_dev->cow.fd);
557 vfree(ubd_dev->cow.bitmap);
558 ubd_dev->cow.bitmap = NULL;
1da177e4
LT
559}
560
7d314e34 561static int ubd_open_dev(struct ubd *ubd_dev)
1da177e4
LT
562{
563 struct openflags flags;
564 char **back_ptr;
565 int err, create_cow, *create_ptr;
0bf16bff 566 int fd;
1da177e4 567
7d314e34 568 ubd_dev->openflags = ubd_dev->boot_openflags;
1da177e4 569 create_cow = 0;
7d314e34
PBG
570 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
571 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
0bf16bff
PBG
572
573 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
7d314e34
PBG
574 back_ptr, &ubd_dev->cow.bitmap_offset,
575 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
6c29256c 576 create_ptr);
1da177e4 577
0bf16bff
PBG
578 if((fd == -ENOENT) && create_cow){
579 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
7d314e34
PBG
580 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
581 &ubd_dev->cow.bitmap_offset,
582 &ubd_dev->cow.bitmap_len,
583 &ubd_dev->cow.data_offset);
0bf16bff 584 if(fd >= 0){
1da177e4 585 printk(KERN_INFO "Creating \"%s\" as COW file for "
7d314e34 586 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
1da177e4
LT
587 }
588 }
589
0bf16bff 590 if(fd < 0){
7d314e34 591 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
0bf16bff
PBG
592 -fd);
593 return fd;
1da177e4 594 }
0bf16bff 595 ubd_dev->fd = fd;
1da177e4 596
7d314e34 597 if(ubd_dev->cow.file != NULL){
f4768ffd
JD
598 blk_queue_max_sectors(ubd_dev->queue, 8 * sizeof(long));
599
1da177e4 600 err = -ENOMEM;
da2486ba 601 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
7d314e34 602 if(ubd_dev->cow.bitmap == NULL){
1da177e4
LT
603 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
604 goto error;
605 }
606 flush_tlb_kernel_vm();
607
7d314e34
PBG
608 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
609 ubd_dev->cow.bitmap_offset,
610 ubd_dev->cow.bitmap_len);
1da177e4
LT
611 if(err < 0)
612 goto error;
613
7d314e34 614 flags = ubd_dev->openflags;
1da177e4 615 flags.w = 0;
7d314e34 616 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
6c29256c 617 NULL, NULL, NULL, NULL);
1da177e4 618 if(err < 0) goto error;
7d314e34 619 ubd_dev->cow.fd = err;
1da177e4 620 }
dc764e50 621 return 0;
1da177e4 622 error:
7d314e34 623 os_close_file(ubd_dev->fd);
dc764e50 624 return err;
1da177e4
LT
625}
626
2e3f5251
JD
627static void ubd_device_release(struct device *dev)
628{
629 struct ubd *ubd_dev = dev->driver_data;
630
631 blk_cleanup_queue(ubd_dev->queue);
632 *ubd_dev = ((struct ubd) DEFAULT_UBD);
633}
634
5f75a4f8 635static int ubd_disk_register(int major, u64 size, int unit,
b8831a1d 636 struct gendisk **disk_out)
1da177e4
LT
637{
638 struct gendisk *disk;
1da177e4
LT
639
640 disk = alloc_disk(1 << UBD_SHIFT);
641 if(disk == NULL)
dc764e50 642 return -ENOMEM;
1da177e4
LT
643
644 disk->major = major;
645 disk->first_minor = unit << UBD_SHIFT;
646 disk->fops = &ubd_blops;
647 set_capacity(disk, size / 512);
ce7b0f46 648 if(major == MAJOR_NR)
1da177e4 649 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
ce7b0f46 650 else
1da177e4 651 sprintf(disk->disk_name, "ubd_fake%d", unit);
1da177e4
LT
652
653 /* sysfs register (not for ide fake devices) */
654 if (major == MAJOR_NR) {
7d314e34
PBG
655 ubd_devs[unit].pdev.id = unit;
656 ubd_devs[unit].pdev.name = DRIVER_NAME;
2e3f5251
JD
657 ubd_devs[unit].pdev.dev.release = ubd_device_release;
658 ubd_devs[unit].pdev.dev.driver_data = &ubd_devs[unit];
7d314e34
PBG
659 platform_device_register(&ubd_devs[unit].pdev);
660 disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
1da177e4
LT
661 }
662
7d314e34 663 disk->private_data = &ubd_devs[unit];
62f96cb0 664 disk->queue = ubd_devs[unit].queue;
1da177e4
LT
665 add_disk(disk);
666
667 *disk_out = disk;
668 return 0;
669}
670
671#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
672
f28169d2 673static int ubd_add(int n, char **error_out)
1da177e4 674{
7d314e34 675 struct ubd *ubd_dev = &ubd_devs[n];
f28169d2 676 int err = 0;
1da177e4 677
7d314e34 678 if(ubd_dev->file == NULL)
ec7cf783 679 goto out;
1da177e4 680
7d314e34 681 err = ubd_file_size(ubd_dev, &ubd_dev->size);
f28169d2
JD
682 if(err < 0){
683 *error_out = "Couldn't determine size of device's file";
80c13749 684 goto out;
f28169d2 685 }
1da177e4 686
7d314e34 687 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
1da177e4 688
a0044bdf 689 INIT_LIST_HEAD(&ubd_dev->restart);
4f40c055 690 sg_init_table(ubd_dev->sg, MAX_SG);
a0044bdf 691
62f96cb0
JD
692 err = -ENOMEM;
693 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
694 if (ubd_dev->queue == NULL) {
695 *error_out = "Failed to initialize device queue";
80c13749 696 goto out;
62f96cb0
JD
697 }
698 ubd_dev->queue->queuedata = ubd_dev;
699
a0044bdf 700 blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG);
62f96cb0
JD
701 err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]);
702 if(err){
703 *error_out = "Failed to register device";
704 goto out_cleanup;
705 }
6c29256c 706
1da177e4 707 if(fake_major != MAJOR_NR)
5f75a4f8 708 ubd_disk_register(fake_major, ubd_dev->size, n,
62f96cb0 709 &fake_gendisk[n]);
1da177e4 710
83380cc1
JD
711 /*
712 * Perhaps this should also be under the "if (fake_major)" above
713 * using the fake_disk->disk_name
714 */
1da177e4
LT
715 if (fake_ide)
716 make_ide_entries(ubd_gendisk[n]->disk_name);
717
ec7cf783 718 err = 0;
ec7cf783
JD
719out:
720 return err;
62f96cb0
JD
721
722out_cleanup:
723 blk_cleanup_queue(ubd_dev->queue);
724 goto out;
1da177e4
LT
725}
726
f28169d2 727static int ubd_config(char *str, char **error_out)
1da177e4 728{
e7f6552f 729 int n, ret;
1da177e4 730
f28169d2
JD
731 /* This string is possibly broken up and stored, so it's only
732 * freed if ubd_setup_common fails, or if only general options
733 * were set.
734 */
970d6e3a 735 str = kstrdup(str, GFP_KERNEL);
e7f6552f 736 if (str == NULL) {
f28169d2
JD
737 *error_out = "Failed to allocate memory";
738 return -ENOMEM;
1da177e4 739 }
f28169d2
JD
740
741 ret = ubd_setup_common(str, &n, error_out);
742 if (ret)
e7f6552f 743 goto err_free;
f28169d2 744
e7f6552f
PBG
745 if (n == -1) {
746 ret = 0;
d8d7c28e 747 goto err_free;
1da177e4 748 }
1da177e4 749
dc764e50 750 mutex_lock(&ubd_lock);
f28169d2 751 ret = ubd_add(n, error_out);
e7f6552f 752 if (ret)
7d314e34 753 ubd_devs[n].file = NULL;
dc764e50 754 mutex_unlock(&ubd_lock);
1da177e4 755
e7f6552f 756out:
dc764e50 757 return ret;
e7f6552f
PBG
758
759err_free:
760 kfree(str);
761 goto out;
1da177e4
LT
762}
763
764static int ubd_get_config(char *name, char *str, int size, char **error_out)
765{
7d314e34 766 struct ubd *ubd_dev;
1da177e4
LT
767 int n, len = 0;
768
769 n = parse_unit(&name);
770 if((n >= MAX_DEV) || (n < 0)){
771 *error_out = "ubd_get_config : device number out of range";
dc764e50 772 return -1;
1da177e4
LT
773 }
774
7d314e34 775 ubd_dev = &ubd_devs[n];
d7fb2c38 776 mutex_lock(&ubd_lock);
1da177e4 777
7d314e34 778 if(ubd_dev->file == NULL){
1da177e4
LT
779 CONFIG_CHUNK(str, size, len, "", 1);
780 goto out;
781 }
782
7d314e34 783 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1da177e4 784
7d314e34 785 if(ubd_dev->cow.file != NULL){
1da177e4 786 CONFIG_CHUNK(str, size, len, ",", 0);
7d314e34 787 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1da177e4
LT
788 }
789 else CONFIG_CHUNK(str, size, len, "", 1);
790
791 out:
d7fb2c38 792 mutex_unlock(&ubd_lock);
dc764e50 793 return len;
1da177e4
LT
794}
795
29d56cfe
JD
796static int ubd_id(char **str, int *start_out, int *end_out)
797{
dc764e50 798 int n;
29d56cfe
JD
799
800 n = parse_unit(str);
dc764e50
JD
801 *start_out = 0;
802 *end_out = MAX_DEV - 1;
803 return n;
29d56cfe
JD
804}
805
f28169d2 806static int ubd_remove(int n, char **error_out)
1da177e4 807{
2e3f5251 808 struct gendisk *disk = ubd_gendisk[n];
7d314e34 809 struct ubd *ubd_dev;
29d56cfe 810 int err = -ENODEV;
1da177e4 811
d7fb2c38 812 mutex_lock(&ubd_lock);
1da177e4 813
7d314e34 814 ubd_dev = &ubd_devs[n];
1da177e4 815
7d314e34 816 if(ubd_dev->file == NULL)
29d56cfe 817 goto out;
1da177e4 818
29d56cfe
JD
819 /* you cannot remove a open disk */
820 err = -EBUSY;
7d314e34 821 if(ubd_dev->count > 0)
1da177e4
LT
822 goto out;
823
dc764e50 824 ubd_gendisk[n] = NULL;
b47d2deb
JD
825 if(disk != NULL){
826 del_gendisk(disk);
827 put_disk(disk);
828 }
1da177e4
LT
829
830 if(fake_gendisk[n] != NULL){
831 del_gendisk(fake_gendisk[n]);
832 put_disk(fake_gendisk[n]);
833 fake_gendisk[n] = NULL;
834 }
835
1da177e4 836 err = 0;
2e3f5251 837 platform_device_unregister(&ubd_dev->pdev);
29d56cfe 838out:
d7fb2c38 839 mutex_unlock(&ubd_lock);
29d56cfe 840 return err;
1da177e4
LT
841}
842
f28169d2 843/* All these are called by mconsole in process context and without
b8831a1d 844 * ubd-specific locks. The structure itself is const except for .list.
f28169d2 845 */
1da177e4 846static struct mc_device ubd_mc = {
84f48d4f 847 .list = LIST_HEAD_INIT(ubd_mc.list),
1da177e4
LT
848 .name = "ubd",
849 .config = ubd_config,
dc764e50 850 .get_config = ubd_get_config,
29d56cfe 851 .id = ubd_id,
1da177e4
LT
852 .remove = ubd_remove,
853};
854
d8d7c28e 855static int __init ubd_mc_init(void)
1da177e4
LT
856{
857 mconsole_register_dev(&ubd_mc);
858 return 0;
859}
860
861__initcall(ubd_mc_init);
862
d8d7c28e
PBG
863static int __init ubd0_init(void)
864{
865 struct ubd *ubd_dev = &ubd_devs[0];
866
b8831a1d 867 mutex_lock(&ubd_lock);
d8d7c28e
PBG
868 if(ubd_dev->file == NULL)
869 ubd_dev->file = "root_fs";
b8831a1d
JD
870 mutex_unlock(&ubd_lock);
871
dc764e50 872 return 0;
d8d7c28e
PBG
873}
874
875__initcall(ubd0_init);
876
b8831a1d 877/* Used in ubd_init, which is an initcall */
3ae5eaec
RK
878static struct platform_driver ubd_driver = {
879 .driver = {
880 .name = DRIVER_NAME,
881 },
1da177e4
LT
882};
883
d8d7c28e 884static int __init ubd_init(void)
1da177e4 885{
f28169d2
JD
886 char *error;
887 int i, err;
1da177e4 888
1da177e4
LT
889 if (register_blkdev(MAJOR_NR, "ubd"))
890 return -1;
891
1da177e4
LT
892 if (fake_major != MAJOR_NR) {
893 char name[sizeof("ubd_nnn\0")];
894
895 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1da177e4
LT
896 if (register_blkdev(fake_major, "ubd"))
897 return -1;
898 }
3ae5eaec 899 platform_driver_register(&ubd_driver);
dc764e50 900 mutex_lock(&ubd_lock);
f28169d2
JD
901 for (i = 0; i < MAX_DEV; i++){
902 err = ubd_add(i, &error);
903 if(err)
904 printk(KERN_ERR "Failed to initialize ubd device %d :"
905 "%s\n", i, error);
906 }
dc764e50 907 mutex_unlock(&ubd_lock);
1da177e4
LT
908 return 0;
909}
910
911late_initcall(ubd_init);
912
d8d7c28e 913static int __init ubd_driver_init(void){
91acb21f
JD
914 unsigned long stack;
915 int err;
916
917 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
918 if(global_openflags.s){
919 printk(KERN_INFO "ubd: Synchronous mode\n");
920 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
921 * enough. So use anyway the io thread. */
922 }
923 stack = alloc_stack(0, 0);
6c29256c 924 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
91acb21f
JD
925 &thread_fd);
926 if(io_pid < 0){
6c29256c 927 printk(KERN_ERR
91acb21f
JD
928 "ubd : Failed to start I/O thread (errno = %d) - "
929 "falling back to synchronous I/O\n", -io_pid);
930 io_pid = -1;
dc764e50 931 return 0;
91acb21f 932 }
6c29256c 933 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
7d314e34 934 IRQF_DISABLED, "ubd", ubd_devs);
91acb21f
JD
935 if(err != 0)
936 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
f4c57a78 937 return 0;
91acb21f
JD
938}
939
940device_initcall(ubd_driver_init);
941
1da177e4
LT
942static int ubd_open(struct inode *inode, struct file *filp)
943{
944 struct gendisk *disk = inode->i_bdev->bd_disk;
7d314e34 945 struct ubd *ubd_dev = disk->private_data;
1da177e4
LT
946 int err = 0;
947
7d314e34
PBG
948 if(ubd_dev->count == 0){
949 err = ubd_open_dev(ubd_dev);
1da177e4
LT
950 if(err){
951 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
7d314e34 952 disk->disk_name, ubd_dev->file, -err);
1da177e4
LT
953 goto out;
954 }
955 }
7d314e34
PBG
956 ubd_dev->count++;
957 set_disk_ro(disk, !ubd_dev->openflags.w);
2c49be99
PBG
958
959 /* This should no more be needed. And it didn't work anyway to exclude
960 * read-write remounting of filesystems.*/
7d314e34 961 /*if((filp->f_mode & FMODE_WRITE) && !ubd_dev->openflags.w){
5f75a4f8 962 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1da177e4 963 err = -EROFS;
2c49be99 964 }*/
1da177e4 965 out:
dc764e50 966 return err;
1da177e4
LT
967}
968
969static int ubd_release(struct inode * inode, struct file * file)
970{
971 struct gendisk *disk = inode->i_bdev->bd_disk;
7d314e34 972 struct ubd *ubd_dev = disk->private_data;
1da177e4 973
7d314e34 974 if(--ubd_dev->count == 0)
5f75a4f8 975 ubd_close_dev(ubd_dev);
dc764e50 976 return 0;
1da177e4
LT
977}
978
91acb21f
JD
979static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
980 __u64 *cow_offset, unsigned long *bitmap,
981 __u64 bitmap_offset, unsigned long *bitmap_words,
982 __u64 bitmap_len)
1da177e4 983{
91acb21f
JD
984 __u64 sector = io_offset >> 9;
985 int i, update_bitmap = 0;
986
987 for(i = 0; i < length >> 9; i++){
988 if(cow_mask != NULL)
989 ubd_set_bit(i, (unsigned char *) cow_mask);
990 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
991 continue;
1da177e4 992
91acb21f
JD
993 update_bitmap = 1;
994 ubd_set_bit(sector + i, (unsigned char *) bitmap);
995 }
996
997 if(!update_bitmap)
998 return;
1da177e4 999
91acb21f 1000 *cow_offset = sector / (sizeof(unsigned long) * 8);
1da177e4 1001
91acb21f
JD
1002 /* This takes care of the case where we're exactly at the end of the
1003 * device, and *cow_offset + 1 is off the end. So, just back it up
1004 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1005 * for the original diagnosis.
1006 */
1007 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
1008 sizeof(unsigned long) - 1))
1009 (*cow_offset)--;
1010
1011 bitmap_words[0] = bitmap[*cow_offset];
1012 bitmap_words[1] = bitmap[*cow_offset + 1];
1013
1014 *cow_offset *= sizeof(unsigned long);
1015 *cow_offset += bitmap_offset;
1016}
1017
1018static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1019 __u64 bitmap_offset, __u64 bitmap_len)
1020{
1021 __u64 sector = req->offset >> 9;
1022 int i;
1023
1024 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1025 panic("Operation too long");
1026
1027 if(req->op == UBD_READ) {
1028 for(i = 0; i < req->length >> 9; i++){
1029 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
6c29256c 1030 ubd_set_bit(i, (unsigned char *)
91acb21f 1031 &req->sector_mask);
dc764e50 1032 }
91acb21f
JD
1033 }
1034 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1035 &req->cow_offset, bitmap, bitmap_offset,
1036 req->bitmap_words, bitmap_len);
1da177e4
LT
1037}
1038
62f96cb0 1039/* Called with dev->lock held */
a0044bdf
JD
1040static void prepare_request(struct request *req, struct io_thread_req *io_req,
1041 unsigned long long offset, int page_offset,
1042 int len, struct page *page)
1da177e4
LT
1043{
1044 struct gendisk *disk = req->rq_disk;
7d314e34 1045 struct ubd *ubd_dev = disk->private_data;
91acb21f 1046
62f96cb0 1047 io_req->req = req;
a0044bdf
JD
1048 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1049 ubd_dev->fd;
7d314e34 1050 io_req->fds[1] = ubd_dev->fd;
91acb21f 1051 io_req->cow_offset = -1;
1da177e4
LT
1052 io_req->offset = offset;
1053 io_req->length = len;
1054 io_req->error = 0;
91acb21f
JD
1055 io_req->sector_mask = 0;
1056
1057 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1da177e4 1058 io_req->offsets[0] = 0;
7d314e34 1059 io_req->offsets[1] = ubd_dev->cow.data_offset;
a0044bdf 1060 io_req->buffer = page_address(page) + page_offset;
1da177e4
LT
1061 io_req->sectorsize = 1 << 9;
1062
7d314e34 1063 if(ubd_dev->cow.file != NULL)
a0044bdf
JD
1064 cowify_req(io_req, ubd_dev->cow.bitmap,
1065 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
91acb21f 1066
1da177e4
LT
1067}
1068
62f96cb0 1069/* Called with dev->lock held */
165125e1 1070static void do_ubd_request(struct request_queue *q)
1da177e4 1071{
2adcec21 1072 struct io_thread_req *io_req;
1da177e4 1073 struct request *req;
0a6d3a2a 1074 int n, last_sectors;
a0044bdf
JD
1075
1076 while(1){
2a9529a0 1077 struct ubd *dev = q->queuedata;
a0044bdf
JD
1078 if(dev->end_sg == 0){
1079 struct request *req = elv_next_request(q);
1080 if(req == NULL)
1081 return;
1082
1083 dev->request = req;
1084 blkdev_dequeue_request(req);
1085 dev->start_sg = 0;
1086 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1087 }
1088
1089 req = dev->request;
0a6d3a2a 1090 last_sectors = 0;
a0044bdf
JD
1091 while(dev->start_sg < dev->end_sg){
1092 struct scatterlist *sg = &dev->sg[dev->start_sg];
1093
0a6d3a2a 1094 req->sector += last_sectors;
2adcec21 1095 io_req = kmalloc(sizeof(struct io_thread_req),
990c5587 1096 GFP_ATOMIC);
2adcec21
JD
1097 if(io_req == NULL){
1098 if(list_empty(&dev->restart))
1099 list_add(&dev->restart, &restart);
1100 return;
1101 }
1102 prepare_request(req, io_req,
a0044bdf 1103 (unsigned long long) req->sector << 9,
45711f1a 1104 sg->offset, sg->length, sg_page(sg));
a0044bdf 1105
0a6d3a2a 1106 last_sectors = sg->length >> 9;
a6ea4cce
JD
1107 n = os_write_file(thread_fd, &io_req,
1108 sizeof(struct io_thread_req *));
2adcec21 1109 if(n != sizeof(struct io_thread_req *)){
a0044bdf
JD
1110 if(n != -EAGAIN)
1111 printk("write to io thread failed, "
1112 "errno = %d\n", -n);
1113 else if(list_empty(&dev->restart))
1114 list_add(&dev->restart, &restart);
12429bf9 1115 kfree(io_req);
a0044bdf
JD
1116 return;
1117 }
1118
a0044bdf 1119 dev->start_sg++;
1da177e4 1120 }
a0044bdf
JD
1121 dev->end_sg = 0;
1122 dev->request = NULL;
1da177e4
LT
1123 }
1124}
1125
a885c8c4
CH
1126static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1127{
7d314e34 1128 struct ubd *ubd_dev = bdev->bd_disk->private_data;
a885c8c4
CH
1129
1130 geo->heads = 128;
1131 geo->sectors = 32;
7d314e34 1132 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
a885c8c4
CH
1133 return 0;
1134}
1135
1da177e4
LT
1136static int ubd_ioctl(struct inode * inode, struct file * file,
1137 unsigned int cmd, unsigned long arg)
1138{
7d314e34 1139 struct ubd *ubd_dev = inode->i_bdev->bd_disk->private_data;
1da177e4
LT
1140 struct hd_driveid ubd_id = {
1141 .cyls = 0,
1142 .heads = 128,
1143 .sectors = 32,
1144 };
1145
1146 switch (cmd) {
1da177e4 1147 struct cdrom_volctrl volume;
1da177e4 1148 case HDIO_GET_IDENTITY:
7d314e34 1149 ubd_id.cyls = ubd_dev->size / (128 * 32 * 512);
1da177e4
LT
1150 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1151 sizeof(ubd_id)))
dc764e50
JD
1152 return -EFAULT;
1153 return 0;
b8831a1d 1154
1da177e4
LT
1155 case CDROMVOLREAD:
1156 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
dc764e50 1157 return -EFAULT;
1da177e4
LT
1158 volume.channel0 = 255;
1159 volume.channel1 = 255;
1160 volume.channel2 = 255;
1161 volume.channel3 = 255;
1162 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
dc764e50
JD
1163 return -EFAULT;
1164 return 0;
1da177e4 1165 }
dc764e50 1166 return -EINVAL;
1da177e4
LT
1167}
1168
4833aff7 1169static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
1da177e4
LT
1170{
1171 struct uml_stat buf1, buf2;
1172 int err;
1173
4833aff7
PBG
1174 if(from_cmdline == NULL)
1175 return 0;
1176 if(!strcmp(from_cmdline, from_cow))
1177 return 0;
1da177e4
LT
1178
1179 err = os_stat_file(from_cmdline, &buf1);
1180 if(err < 0){
1181 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
4833aff7 1182 return 0;
1da177e4
LT
1183 }
1184 err = os_stat_file(from_cow, &buf2);
1185 if(err < 0){
1186 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
4833aff7 1187 return 1;
1da177e4
LT
1188 }
1189 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
4833aff7 1190 return 0;
1da177e4
LT
1191
1192 printk("Backing file mismatch - \"%s\" requested,\n"
1193 "\"%s\" specified in COW header of \"%s\"\n",
1194 from_cmdline, from_cow, cow);
4833aff7 1195 return 1;
1da177e4
LT
1196}
1197
1198static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1199{
1200 unsigned long modtime;
fe1db50c 1201 unsigned long long actual;
1da177e4
LT
1202 int err;
1203
1204 err = os_file_modtime(file, &modtime);
1205 if(err < 0){
1206 printk("Failed to get modification time of backing file "
1207 "\"%s\", err = %d\n", file, -err);
dc764e50 1208 return err;
1da177e4
LT
1209 }
1210
1211 err = os_file_size(file, &actual);
1212 if(err < 0){
1213 printk("Failed to get size of backing file \"%s\", "
1214 "err = %d\n", file, -err);
dc764e50 1215 return err;
1da177e4
LT
1216 }
1217
dc764e50 1218 if(actual != size){
1da177e4
LT
1219 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
1220 * the typecast.*/
1221 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1222 "file\n", (unsigned long long) size, actual);
dc764e50 1223 return -EINVAL;
1da177e4
LT
1224 }
1225 if(modtime != mtime){
1226 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1227 "file\n", mtime, modtime);
dc764e50 1228 return -EINVAL;
1da177e4 1229 }
dc764e50 1230 return 0;
1da177e4
LT
1231}
1232
1233int read_cow_bitmap(int fd, void *buf, int offset, int len)
1234{
1235 int err;
1236
1237 err = os_seek_file(fd, offset);
1238 if(err < 0)
dc764e50 1239 return err;
1da177e4 1240
dc764e50 1241 err = os_read_file(fd, buf, len);
1da177e4 1242 if(err < 0)
dc764e50 1243 return err;
1da177e4 1244
dc764e50 1245 return 0;
1da177e4
LT
1246}
1247
6c29256c 1248int open_ubd_file(char *file, struct openflags *openflags, int shared,
1da177e4
LT
1249 char **backing_file_out, int *bitmap_offset_out,
1250 unsigned long *bitmap_len_out, int *data_offset_out,
1251 int *create_cow_out)
1252{
1253 time_t mtime;
1254 unsigned long long size;
1255 __u32 version, align;
1256 char *backing_file;
4833aff7 1257 int fd, err, sectorsize, asked_switch, mode = 0644;
1da177e4
LT
1258
1259 fd = os_open_file(file, *openflags, mode);
a374a48f
PBG
1260 if (fd < 0) {
1261 if ((fd == -ENOENT) && (create_cow_out != NULL))
1da177e4 1262 *create_cow_out = 1;
dc764e50
JD
1263 if (!openflags->w ||
1264 ((fd != -EROFS) && (fd != -EACCES)))
a374a48f 1265 return fd;
1da177e4
LT
1266 openflags->w = 0;
1267 fd = os_open_file(file, *openflags, mode);
a374a48f
PBG
1268 if (fd < 0)
1269 return fd;
dc764e50 1270 }
1da177e4 1271
6c29256c
JD
1272 if(shared)
1273 printk("Not locking \"%s\" on the host\n", file);
1274 else {
1275 err = os_lock_file(fd, openflags->w);
1276 if(err < 0){
1277 printk("Failed to lock '%s', err = %d\n", file, -err);
1278 goto out_close;
1279 }
1da177e4
LT
1280 }
1281
d6e05edc 1282 /* Successful return case! */
a374a48f 1283 if(backing_file_out == NULL)
dc764e50 1284 return fd;
1da177e4
LT
1285
1286 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1287 &size, &sectorsize, &align, bitmap_offset_out);
1288 if(err && (*backing_file_out != NULL)){
1289 printk("Failed to read COW header from COW file \"%s\", "
1290 "errno = %d\n", file, -err);
1291 goto out_close;
1292 }
a374a48f 1293 if(err)
dc764e50 1294 return fd;
1da177e4 1295
4833aff7 1296 asked_switch = path_requires_switch(*backing_file_out, backing_file, file);
1da177e4 1297
4833aff7
PBG
1298 /* Allow switching only if no mismatch. */
1299 if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) {
1da177e4
LT
1300 printk("Switching backing file to '%s'\n", *backing_file_out);
1301 err = write_cow_header(file, fd, *backing_file_out,
1302 sectorsize, align, &size);
a374a48f 1303 if (err) {
1da177e4 1304 printk("Switch failed, errno = %d\n", -err);
4833aff7 1305 goto out_close;
1da177e4 1306 }
a374a48f 1307 } else {
1da177e4
LT
1308 *backing_file_out = backing_file;
1309 err = backing_file_mismatch(*backing_file_out, size, mtime);
a374a48f
PBG
1310 if (err)
1311 goto out_close;
1da177e4
LT
1312 }
1313
1314 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1315 bitmap_len_out, data_offset_out);
1316
dc764e50 1317 return fd;
1da177e4
LT
1318 out_close:
1319 os_close_file(fd);
a374a48f 1320 return err;
1da177e4
LT
1321}
1322
1323int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1324 int sectorsize, int alignment, int *bitmap_offset_out,
1325 unsigned long *bitmap_len_out, int *data_offset_out)
1326{
1327 int err, fd;
1328
1329 flags.c = 1;
6c29256c 1330 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
1da177e4
LT
1331 if(fd < 0){
1332 err = fd;
1333 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1334 -err);
1335 goto out;
1336 }
1337
1338 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1339 bitmap_offset_out, bitmap_len_out,
1340 data_offset_out);
1341 if(!err)
dc764e50 1342 return fd;
1da177e4
LT
1343 os_close_file(fd);
1344 out:
dc764e50 1345 return err;
1da177e4
LT
1346}
1347
91acb21f 1348static int update_bitmap(struct io_thread_req *req)
1da177e4 1349{
91acb21f 1350 int n;
1da177e4 1351
91acb21f 1352 if(req->cow_offset == -1)
dc764e50 1353 return 0;
1da177e4 1354
91acb21f
JD
1355 n = os_seek_file(req->fds[1], req->cow_offset);
1356 if(n < 0){
1357 printk("do_io - bitmap lseek failed : err = %d\n", -n);
dc764e50 1358 return 1;
91acb21f 1359 }
1da177e4 1360
a6ea4cce
JD
1361 n = os_write_file(req->fds[1], &req->bitmap_words,
1362 sizeof(req->bitmap_words));
91acb21f
JD
1363 if(n != sizeof(req->bitmap_words)){
1364 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1365 req->fds[1]);
dc764e50 1366 return 1;
91acb21f 1367 }
1da177e4 1368
dc764e50 1369 return 0;
91acb21f 1370}
1da177e4 1371
91acb21f
JD
1372void do_io(struct io_thread_req *req)
1373{
1374 char *buf;
1375 unsigned long len;
1376 int n, nsectors, start, end, bit;
1377 int err;
1378 __u64 off;
1379
1380 nsectors = req->length / req->sectorsize;
1381 start = 0;
1382 do {
1383 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1384 end = start;
1385 while((end < nsectors) &&
1386 (ubd_test_bit(end, (unsigned char *)
1387 &req->sector_mask) == bit))
1388 end++;
1389
1390 off = req->offset + req->offsets[bit] +
1391 start * req->sectorsize;
1392 len = (end - start) * req->sectorsize;
1393 buf = &req->buffer[start * req->sectorsize];
1394
1395 err = os_seek_file(req->fds[bit], off);
1396 if(err < 0){
1397 printk("do_io - lseek failed : err = %d\n", -err);
1398 req->error = 1;
1399 return;
1400 }
1401 if(req->op == UBD_READ){
1402 n = 0;
1403 do {
1404 buf = &buf[n];
1405 len -= n;
a6ea4cce 1406 n = os_read_file(req->fds[bit], buf, len);
91acb21f
JD
1407 if (n < 0) {
1408 printk("do_io - read failed, err = %d "
1409 "fd = %d\n", -n, req->fds[bit]);
1410 req->error = 1;
1411 return;
1412 }
1413 } while((n < len) && (n != 0));
1414 if (n < len) memset(&buf[n], 0, len - n);
1415 } else {
a6ea4cce 1416 n = os_write_file(req->fds[bit], buf, len);
91acb21f
JD
1417 if(n != len){
1418 printk("do_io - write failed err = %d "
1419 "fd = %d\n", -n, req->fds[bit]);
1420 req->error = 1;
1421 return;
1422 }
1423 }
1424
1425 start = end;
1426 } while(start < nsectors);
1da177e4 1427
91acb21f 1428 req->error = update_bitmap(req);
1da177e4 1429}
91acb21f
JD
1430
1431/* Changed in start_io_thread, which is serialized by being called only
1432 * from ubd_init, which is an initcall.
1433 */
1434int kernel_fd = -1;
1435
d8d7c28e
PBG
1436/* Only changed by the io thread. XXX: currently unused. */
1437static int io_count = 0;
91acb21f
JD
1438
1439int io_thread(void *arg)
1440{
2adcec21 1441 struct io_thread_req *req;
91acb21f
JD
1442 int n;
1443
1444 ignore_sigwinch_sig();
1445 while(1){
a6ea4cce 1446 n = os_read_file(kernel_fd, &req,
2adcec21
JD
1447 sizeof(struct io_thread_req *));
1448 if(n != sizeof(struct io_thread_req *)){
91acb21f
JD
1449 if(n < 0)
1450 printk("io_thread - read failed, fd = %d, "
1451 "err = %d\n", kernel_fd, -n);
1452 else {
1453 printk("io_thread - short read, fd = %d, "
1454 "length = %d\n", kernel_fd, n);
1455 }
1456 continue;
1457 }
1458 io_count++;
2adcec21 1459 do_io(req);
a6ea4cce 1460 n = os_write_file(kernel_fd, &req,
2adcec21
JD
1461 sizeof(struct io_thread_req *));
1462 if(n != sizeof(struct io_thread_req *))
91acb21f
JD
1463 printk("io_thread - write failed, fd = %d, err = %d\n",
1464 kernel_fd, -n);
1465 }
91acb21f 1466
1b57e9c2
JD
1467 return 0;
1468}