X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=drivers%2Fmd%2Fmd.c;h=859edbf8c9b0a72679f86ff035c23e5e2949cb6c;hb=43a705076e51c5af21ec4260a35699775ea298f5;hp=5f154ef1e4befeeef26358d9ae37e424c62cee47;hpb=eae6fa9b0c3e2cb49cc157e906dd0ac52cfd7ca5;p=mirror_ubuntu-artful-kernel.git diff --git a/drivers/md/md.c b/drivers/md/md.c index 5f154ef1e4be..859edbf8c9b0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -213,12 +213,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio) return 0; } rcu_read_lock(); - if (mddev->suspended) { + if (mddev->suspended || mddev->barrier) { DEFINE_WAIT(__wait); for (;;) { prepare_to_wait(&mddev->sb_wait, &__wait, TASK_UNINTERRUPTIBLE); - if (!mddev->suspended) + if (!mddev->suspended && !mddev->barrier) break; rcu_read_unlock(); schedule(); @@ -260,10 +260,110 @@ static void mddev_resume(mddev_t *mddev) int mddev_congested(mddev_t *mddev, int bits) { + if (mddev->barrier) + return 1; return mddev->suspended; } EXPORT_SYMBOL(mddev_congested); +/* + * Generic barrier handling for md + */ + +#define POST_REQUEST_BARRIER ((void*)1) + +static void md_end_barrier(struct bio *bio, int err) +{ + mdk_rdev_t *rdev = bio->bi_private; + mddev_t *mddev = rdev->mddev; + if (err == -EOPNOTSUPP && mddev->barrier != POST_REQUEST_BARRIER) + set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags); + + rdev_dec_pending(rdev, mddev); + + if (atomic_dec_and_test(&mddev->flush_pending)) { + if (mddev->barrier == POST_REQUEST_BARRIER) { + /* This was a post-request barrier */ + mddev->barrier = NULL; + wake_up(&mddev->sb_wait); + } else + /* The pre-request barrier has finished */ + schedule_work(&mddev->barrier_work); + } + bio_put(bio); +} + +static void submit_barriers(mddev_t *mddev) +{ + mdk_rdev_t *rdev; + + rcu_read_lock(); + list_for_each_entry_rcu(rdev, &mddev->disks, same_set) + if (rdev->raid_disk >= 0 && + !test_bit(Faulty, &rdev->flags)) { + /* Take two references, one is dropped + * when request finishes, one after + * we reclaim rcu_read_lock + */ + struct bio *bi; + atomic_inc(&rdev->nr_pending); + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + bi = bio_alloc(GFP_KERNEL, 0); + bi->bi_end_io = md_end_barrier; + bi->bi_private = rdev; + bi->bi_bdev = rdev->bdev; + atomic_inc(&mddev->flush_pending); + submit_bio(WRITE_BARRIER, bi); + rcu_read_lock(); + rdev_dec_pending(rdev, mddev); + } + rcu_read_unlock(); +} + +static void md_submit_barrier(struct work_struct *ws) +{ + mddev_t *mddev = container_of(ws, mddev_t, barrier_work); + struct bio *bio = mddev->barrier; + + atomic_set(&mddev->flush_pending, 1); + + if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) + bio_endio(bio, -EOPNOTSUPP); + else if (bio->bi_size == 0) + /* an empty barrier - all done */ + bio_endio(bio, 0); + else { + bio->bi_rw &= ~(1<pers->make_request(mddev->queue, bio)) + generic_make_request(bio); + mddev->barrier = POST_REQUEST_BARRIER; + submit_barriers(mddev); + } + if (atomic_dec_and_test(&mddev->flush_pending)) { + mddev->barrier = NULL; + wake_up(&mddev->sb_wait); + } +} + +void md_barrier_request(mddev_t *mddev, struct bio *bio) +{ + spin_lock_irq(&mddev->write_lock); + wait_event_lock_irq(mddev->sb_wait, + !mddev->barrier, + mddev->write_lock, /*nothing*/); + mddev->barrier = bio; + spin_unlock_irq(&mddev->write_lock); + + atomic_set(&mddev->flush_pending, 1); + INIT_WORK(&mddev->barrier_work, md_submit_barrier); + + submit_barriers(mddev); + + if (atomic_dec_and_test(&mddev->flush_pending)) + schedule_work(&mddev->barrier_work); +} +EXPORT_SYMBOL(md_barrier_request); static inline mddev_t *mddev_get(mddev_t *mddev) { @@ -363,6 +463,7 @@ static mddev_t * mddev_find(dev_t unit) mutex_init(&new->open_mutex); mutex_init(&new->reconfig_mutex); + mutex_init(&new->bitmap_info.mutex); INIT_LIST_HEAD(&new->disks); INIT_LIST_HEAD(&new->all_mddevs); init_timer(&new->safemode_timer); @@ -370,6 +471,7 @@ static mddev_t * mddev_find(dev_t unit) atomic_set(&new->openers, 0); atomic_set(&new->active_io, 0); spin_lock_init(&new->write_lock); + atomic_set(&new->flush_pending, 0); init_waitqueue_head(&new->sb_wait); init_waitqueue_head(&new->recovery_wait); new->reshape_position = MaxSector; @@ -748,7 +850,7 @@ struct super_type { */ int md_check_no_bitmap(mddev_t *mddev) { - if (!mddev->bitmap_file && !mddev->bitmap_offset) + if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset) return 0; printk(KERN_ERR "%s: bitmaps are not supported for %s\n", mdname(mddev), mddev->pers->name); @@ -876,8 +978,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->raid_disks = sb->raid_disks; mddev->dev_sectors = sb->size * 2; mddev->events = ev1; - mddev->bitmap_offset = 0; - mddev->default_bitmap_offset = MD_SB_BYTES >> 9; + mddev->bitmap_info.offset = 0; + mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9; if (mddev->minor_version >= 91) { mddev->reshape_position = sb->reshape_position; @@ -911,8 +1013,9 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->max_disks = MD_SB_DISKS; if (sb->state & (1<bitmap_file == NULL) - mddev->bitmap_offset = mddev->default_bitmap_offset; + mddev->bitmap_info.file == NULL) + mddev->bitmap_info.offset = + mddev->bitmap_info.default_offset; } else if (mddev->pers == NULL) { /* Insist on good event counter while assembling */ @@ -1029,7 +1132,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->layout = mddev->layout; sb->chunk_size = mddev->chunk_sectors << 9; - if (mddev->bitmap && mddev->bitmap_file == NULL) + if (mddev->bitmap && mddev->bitmap_info.file == NULL) sb->state |= (1<disks[0].state = (1<mddev->dev_sectors) return 0; /* component must fit device */ - if (rdev->mddev->bitmap_offset) + if (rdev->mddev->bitmap_info.offset) return 0; /* can't move bitmap */ rdev->sb_start = calc_dev_sboffset(rdev->bdev); if (!num_sectors || num_sectors > rdev->sb_start) @@ -1286,8 +1389,8 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->raid_disks = le32_to_cpu(sb->raid_disks); mddev->dev_sectors = le64_to_cpu(sb->size); mddev->events = ev1; - mddev->bitmap_offset = 0; - mddev->default_bitmap_offset = 1024 >> 9; + mddev->bitmap_info.offset = 0; + mddev->bitmap_info.default_offset = 1024 >> 9; mddev->recovery_cp = le64_to_cpu(sb->resync_offset); memcpy(mddev->uuid, sb->set_uuid, 16); @@ -1295,8 +1398,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->max_disks = (4096-256)/2; if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) && - mddev->bitmap_file == NULL ) - mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); + mddev->bitmap_info.file == NULL ) + mddev->bitmap_info.offset = + (__s32)le32_to_cpu(sb->bitmap_offset); if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) { mddev->reshape_position = le64_to_cpu(sb->reshape_position); @@ -1390,8 +1494,8 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); - if (mddev->bitmap && mddev->bitmap_file == NULL) { - sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); + if (mddev->bitmap && mddev->bitmap_info.file == NULL) { + sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset); sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); } @@ -1458,7 +1562,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) max_sectors -= rdev->data_offset; if (!num_sectors || num_sectors > max_sectors) num_sectors = max_sectors; - } else if (rdev->mddev->bitmap_offset) { + } else if (rdev->mddev->bitmap_info.offset) { /* minor version 0 with bitmap we can't move */ return 0; } else { @@ -2659,6 +2763,47 @@ static void analyze_sbs(mddev_t * mddev) } } +/* Read a fixed-point number. + * Numbers in sysfs attributes should be in "standard" units where + * possible, so time should be in seconds. + * However we internally use a a much smaller unit such as + * milliseconds or jiffies. + * This function takes a decimal number with a possible fractional + * component, and produces an integer which is the result of + * multiplying that number by 10^'scale'. + * all without any floating-point arithmetic. + */ +int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale) +{ + unsigned long result = 0; + long decimals = -1; + while (isdigit(*cp) || (*cp == '.' && decimals < 0)) { + if (*cp == '.') + decimals = 0; + else if (decimals < scale) { + unsigned int value; + value = *cp - '0'; + result = result * 10 + value; + if (decimals >= 0) + decimals++; + } + cp++; + } + if (*cp == '\n') + cp++; + if (*cp) + return -EINVAL; + if (decimals < 0) + decimals = 0; + while (decimals < scale) { + result *= 10; + decimals ++; + } + *res = result; + return 0; +} + + static void md_safemode_timeout(unsigned long data); static ssize_t @@ -2670,31 +2815,10 @@ safe_delay_show(mddev_t *mddev, char *page) static ssize_t safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) { - int scale=1; - int dot=0; - int i; unsigned long msec; - char buf[30]; - /* remove a period, and count digits after it */ - if (len >= sizeof(buf)) + if (strict_strtoul_scaled(cbuf, &msec, 3) < 0) return -EINVAL; - strlcpy(buf, cbuf, sizeof(buf)); - for (i=0; isafemode_delay = 0; else { @@ -3894,6 +4018,7 @@ static void mddev_delayed_delete(struct work_struct *ws) mddev->sysfs_action = NULL; mddev->private = NULL; } + sysfs_remove_group(&mddev->kobj, &md_bitmap_group); kobject_del(&mddev->kobj); kobject_put(&mddev->kobj); } @@ -3985,6 +4110,8 @@ static int md_alloc(dev_t dev, char *name) disk->disk_name); error = 0; } + if (sysfs_create_group(&mddev->kobj, &md_bitmap_group)) + printk(KERN_DEBUG "pointless warning\n"); abort: mutex_unlock(&disks_mutex); if (!error) { @@ -4310,7 +4437,7 @@ static int deny_bitmap_write_access(struct file * file) return 0; } -static void restore_bitmap_write_access(struct file *file) +void restore_bitmap_write_access(struct file *file) { struct inode *inode = file->f_mapping->host; @@ -4405,12 +4532,12 @@ out: printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); bitmap_destroy(mddev); - if (mddev->bitmap_file) { - restore_bitmap_write_access(mddev->bitmap_file); - fput(mddev->bitmap_file); - mddev->bitmap_file = NULL; + if (mddev->bitmap_info.file) { + restore_bitmap_write_access(mddev->bitmap_info.file); + fput(mddev->bitmap_info.file); + mddev->bitmap_info.file = NULL; } - mddev->bitmap_offset = 0; + mddev->bitmap_info.offset = 0; /* make sure all md_delayed_delete calls have finished */ flush_scheduled_work(); @@ -4451,6 +4578,11 @@ out: mddev->degraded = 0; mddev->barriers_work = 0; mddev->safemode = 0; + mddev->bitmap_info.offset = 0; + mddev->bitmap_info.default_offset = 0; + mddev->bitmap_info.chunksize = 0; + mddev->bitmap_info.daemon_sleep = 0; + mddev->bitmap_info.max_write_behind = 0; kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); if (mddev->hold_active == UNTIL_STOP) mddev->hold_active = 0; @@ -4636,7 +4768,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) info.state = 0; if (mddev->in_sync) info.state = (1<bitmap && mddev->bitmap_offset) + if (mddev->bitmap && mddev->bitmap_info.offset) info.state = (1<= 0) { if (mddev->bitmap) return -EEXIST; /* cannot add when bitmap is present */ - mddev->bitmap_file = fget(fd); + mddev->bitmap_info.file = fget(fd); - if (mddev->bitmap_file == NULL) { + if (mddev->bitmap_info.file == NULL) { printk(KERN_ERR "%s: error: failed to get bitmap file\n", mdname(mddev)); return -EBADF; } - err = deny_bitmap_write_access(mddev->bitmap_file); + err = deny_bitmap_write_access(mddev->bitmap_info.file); if (err) { printk(KERN_ERR "%s: error: bitmap file is already in use\n", mdname(mddev)); - fput(mddev->bitmap_file); - mddev->bitmap_file = NULL; + fput(mddev->bitmap_info.file); + mddev->bitmap_info.file = NULL; return err; } - mddev->bitmap_offset = 0; /* file overrides offset */ + mddev->bitmap_info.offset = 0; /* file overrides offset */ } else if (mddev->bitmap == NULL) return -ENOENT; /* cannot remove what isn't there */ err = 0; @@ -5025,11 +5157,11 @@ static int set_bitmap_file(mddev_t *mddev, int fd) mddev->pers->quiesce(mddev, 0); } if (fd < 0) { - if (mddev->bitmap_file) { - restore_bitmap_write_access(mddev->bitmap_file); - fput(mddev->bitmap_file); + if (mddev->bitmap_info.file) { + restore_bitmap_write_access(mddev->bitmap_info.file); + fput(mddev->bitmap_info.file); } - mddev->bitmap_file = NULL; + mddev->bitmap_info.file = NULL; } return err; @@ -5096,8 +5228,8 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) mddev->flags = 0; set_bit(MD_CHANGE_DEVS, &mddev->flags); - mddev->default_bitmap_offset = MD_SB_BYTES >> 9; - mddev->bitmap_offset = 0; + mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9; + mddev->bitmap_info.offset = 0; mddev->reshape_position = MaxSector; @@ -5197,7 +5329,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) int state = 0; /* calculate expected state,ignoring low bits */ - if (mddev->bitmap && mddev->bitmap_offset) + if (mddev->bitmap && mddev->bitmap_info.offset) state |= (1 << MD_SB_BITMAP_PRESENT); if (mddev->major_version != info->major_version || @@ -5256,9 +5388,10 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) /* add the bitmap */ if (mddev->bitmap) return -EEXIST; - if (mddev->default_bitmap_offset == 0) + if (mddev->bitmap_info.default_offset == 0) return -EINVAL; - mddev->bitmap_offset = mddev->default_bitmap_offset; + mddev->bitmap_info.offset = + mddev->bitmap_info.default_offset; mddev->pers->quiesce(mddev, 1); rv = bitmap_create(mddev); if (rv) @@ -5273,7 +5406,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) mddev->pers->quiesce(mddev, 1); bitmap_destroy(mddev); mddev->pers->quiesce(mddev, 0); - mddev->bitmap_offset = 0; + mddev->bitmap_info.offset = 0; } } md_update_sb(mddev, 1); @@ -5982,14 +6115,14 @@ static int md_seq_show(struct seq_file *seq, void *v) unsigned long chunk_kb; unsigned long flags; spin_lock_irqsave(&bitmap->lock, flags); - chunk_kb = bitmap->chunksize >> 10; + chunk_kb = mddev->bitmap_info.chunksize >> 10; seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " "%lu%s chunk", bitmap->pages - bitmap->missing_pages, bitmap->pages, (bitmap->pages - bitmap->missing_pages) << (PAGE_SHIFT - 10), - chunk_kb ? chunk_kb : bitmap->chunksize, + chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize, chunk_kb ? "KB" : "B"); if (bitmap->file) { seq_printf(seq, ", file: "); @@ -6380,6 +6513,7 @@ void md_do_sync(mddev_t *mddev) desc, mdname(mddev)); mddev->curr_resync = j; } + mddev->curr_resync_completed = mddev->curr_resync; while (j < max_sectors) { sector_t sectors; @@ -6523,11 +6657,16 @@ void md_do_sync(mddev_t *mddev) set_bit(MD_CHANGE_DEVS, &mddev->flags); skip: + if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { + /* We completed so min/max setting can be forgotten if used. */ + if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) + mddev->resync_min = 0; + mddev->resync_max = MaxSector; + } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) + mddev->resync_min = mddev->curr_resync_completed; mddev->curr_resync = 0; - mddev->curr_resync_completed = 0; if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) - /* We completed so max setting can be forgotten. */ - mddev->resync_max = MaxSector; + mddev->curr_resync_completed = 0; sysfs_notify(&mddev->kobj, NULL, "sync_completed"); wake_up(&resync_wait); set_bit(MD_RECOVERY_DONE, &mddev->recovery); @@ -6625,7 +6764,7 @@ void md_check_recovery(mddev_t *mddev) if (mddev->bitmap) - bitmap_daemon_work(mddev->bitmap); + bitmap_daemon_work(mddev); if (mddev->ro) return;