]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - zfs/module/zpios/pios.c
UBUNTU: SAUCE: Update zfs to e02aaf17f15ad274fa1f24c9c826f1477911ea3f
[mirror_ubuntu-zesty-kernel.git] / zfs / module / zpios / pios.c
CommitLineData
7bdf406d
TG
1/*
2 * ZPIOS is a heavily modified version of the original PIOS test code.
3 * It is designed to have the test code running in the Linux kernel
4 * against ZFS while still being flexibly controled from user space.
5 *
6 * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
7 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
9 * LLNL-CODE-403049
10 *
11 * Original PIOS Test Code
12 * Copyright (C) 2004 Cluster File Systems, Inc.
13 * Written by Peter Braam <braam@clusterfs.com>
14 * Atul Vidwansa <atul@clusterfs.com>
15 * Milind Dumbare <milind@clusterfs.com>
16 *
17 * This file is part of ZFS on Linux.
18 * For details, see <http://zfsonlinux.org/>.
19 *
20 * ZPIOS is free software; you can redistribute it and/or modify it
21 * under the terms of the GNU General Public License as published by the
22 * Free Software Foundation; either version 2 of the License, or (at your
23 * option) any later version.
24 *
25 * ZPIOS is distributed in the hope that it will be useful, but WITHOUT
26 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
27 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
28 * for more details.
29 *
30 * You should have received a copy of the GNU General Public License along
31 * with ZPIOS. If not, see <http://www.gnu.org/licenses/>.
51d97d8f
TG
32 *
33 * Copyright (c) 2015, Intel Corporation.
7bdf406d
TG
34 */
35
36#include <sys/zfs_context.h>
37#include <sys/dmu.h>
51d97d8f 38#include <sys/spa.h>
7bdf406d
TG
39#include <sys/txg.h>
40#include <sys/dsl_destroy.h>
41#include <linux/miscdevice.h>
42#include "zpios-internal.h"
43
44
45static char *zpios_tag = "zpios_tag";
46
47static int
48zpios_upcall(char *path, char *phase, run_args_t *run_args, int rc)
49{
50 /*
51 * This is stack heavy but it should be OK since we are only
52 * making the upcall between tests when the stack is shallow.
53 */
54 char id[16], chunk_size[16], region_size[16], thread_count[16];
55 char region_count[16], offset[16], region_noise[16], chunk_noise[16];
56 char thread_delay[16], flags[16], result[8];
57 char *argv[16], *envp[4];
58
59 if ((path == NULL) || (strlen(path) == 0))
60 return (-ENOENT);
61
62 snprintf(id, 15, "%d", run_args->id);
63 snprintf(chunk_size, 15, "%lu", (long unsigned)run_args->chunk_size);
64 snprintf(region_size, 15, "%lu", (long unsigned) run_args->region_size);
65 snprintf(thread_count, 15, "%u", run_args->thread_count);
66 snprintf(region_count, 15, "%u", run_args->region_count);
67 snprintf(offset, 15, "%lu", (long unsigned)run_args->offset);
68 snprintf(region_noise, 15, "%u", run_args->region_noise);
69 snprintf(chunk_noise, 15, "%u", run_args->chunk_noise);
70 snprintf(thread_delay, 15, "%u", run_args->thread_delay);
71 snprintf(flags, 15, "0x%x", run_args->flags);
72 snprintf(result, 7, "%d", rc);
73
74 /* Passing 15 args to registered pre/post upcall */
75 argv[0] = path;
76 argv[1] = phase;
77 argv[2] = strlen(run_args->log) ? run_args->log : "<none>";
78 argv[3] = id;
79 argv[4] = run_args->pool;
80 argv[5] = chunk_size;
81 argv[6] = region_size;
82 argv[7] = thread_count;
83 argv[8] = region_count;
84 argv[9] = offset;
85 argv[10] = region_noise;
86 argv[11] = chunk_noise;
87 argv[12] = thread_delay;
88 argv[13] = flags;
89 argv[14] = result;
90 argv[15] = NULL;
91
92 /* Passing environment for user space upcall */
93 envp[0] = "HOME=/";
94 envp[1] = "TERM=linux";
95 envp[2] = "PATH=/sbin:/usr/sbin:/bin:/usr/bin";
96 envp[3] = NULL;
97
98 return (call_usermodehelper(path, argv, envp, UMH_WAIT_PROC));
99}
100
101static int
102zpios_print(struct file *file, const char *format, ...)
103{
104 zpios_info_t *info = (zpios_info_t *)file->private_data;
105 va_list adx;
106 int rc;
107
108 ASSERT(info);
109 ASSERT(info->info_buffer);
110
111 va_start(adx, format);
112 spin_lock(&info->info_lock);
113
114 /* Don't allow the kernel to start a write in the red zone */
115 if ((int)(info->info_head - info->info_buffer) >
116 (info->info_size - ZPIOS_INFO_BUFFER_REDZONE)) {
117 rc = -EOVERFLOW;
118 } else {
119 rc = vsprintf(info->info_head, format, adx);
120 if (rc >= 0)
121 info->info_head += rc;
122 }
123
124 spin_unlock(&info->info_lock);
125 va_end(adx);
126
127 return (rc);
128}
129
130static uint64_t
131zpios_dmu_object_create(run_args_t *run_args, objset_t *os)
132{
133 struct dmu_tx *tx;
134 uint64_t obj = 0ULL;
51d97d8f 135 uint64_t blksize = run_args->block_size;
7bdf406d
TG
136 int rc;
137
51d97d8f
TG
138 if (blksize < SPA_MINBLOCKSIZE ||
139 blksize > spa_maxblocksize(dmu_objset_spa(os)) ||
140 !ISP2(blksize)) {
141 zpios_print(run_args->file,
142 "invalid block size for pool: %d\n", (int)blksize);
143 return (obj);
144 }
145
7bdf406d
TG
146 tx = dmu_tx_create(os);
147 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE);
148 rc = dmu_tx_assign(tx, TXG_WAIT);
149 if (rc) {
150 zpios_print(run_args->file,
151 "dmu_tx_assign() failed: %d\n", rc);
152 dmu_tx_abort(tx);
153 return (obj);
154 }
155
156 obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, DMU_OT_NONE, 0, tx);
51d97d8f 157 rc = dmu_object_set_blocksize(os, obj, blksize, 0, tx);
7bdf406d
TG
158 if (rc) {
159 zpios_print(run_args->file,
51d97d8f
TG
160 "dmu_object_set_blocksize to %d failed: %d\n",
161 (int)blksize, rc);
7bdf406d
TG
162 dmu_tx_abort(tx);
163 return (obj);
164 }
165
166 dmu_tx_commit(tx);
167
168 return (obj);
169}
170
171static int
172zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj)
173{
174 struct dmu_tx *tx;
175 int rc;
176
177 tx = dmu_tx_create(os);
178 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
179 rc = dmu_tx_assign(tx, TXG_WAIT);
180 if (rc) {
181 zpios_print(run_args->file,
182 "dmu_tx_assign() failed: %d\n", rc);
183 dmu_tx_abort(tx);
184 return (rc);
185 }
186
187 rc = dmu_object_free(os, obj, tx);
188 if (rc) {
189 zpios_print(run_args->file,
190 "dmu_object_free() failed: %d\n", rc);
191 dmu_tx_abort(tx);
192 return (rc);
193 }
194
195 dmu_tx_commit(tx);
196
197 return (0);
198}
199
200static int
201zpios_dmu_setup(run_args_t *run_args)
202{
203 zpios_time_t *t = &(run_args->stats.cr_time);
204 objset_t *os;
205 char name[32];
206 uint64_t obj = 0ULL;
207 int i, rc = 0, rc2;
208
209 (void) zpios_upcall(run_args->pre, PHASE_PRE_CREATE, run_args, 0);
210 t->start = zpios_timespec_now();
211
212 (void) snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
213 rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL);
214 if (rc) {
215 zpios_print(run_args->file, "Error dmu_objset_create(%s, ...) "
216 "failed: %d\n", name, rc);
217 goto out;
218 }
219
220 rc = dmu_objset_own(name, DMU_OST_OTHER, 0, zpios_tag, &os);
221 if (rc) {
222 zpios_print(run_args->file, "Error dmu_objset_own(%s, ...) "
223 "failed: %d\n", name, rc);
224 goto out_destroy;
225 }
226
227 if (!(run_args->flags & DMU_FPP)) {
228 obj = zpios_dmu_object_create(run_args, os);
229 if (obj == 0) {
230 rc = -EBADF;
231 zpios_print(run_args->file, "Error zpios_dmu_"
232 "object_create() failed, %d\n", rc);
233 goto out_destroy;
234 }
235 }
236
237 for (i = 0; i < run_args->region_count; i++) {
238 zpios_region_t *region;
239
240 region = &run_args->regions[i];
241 mutex_init(&region->lock, NULL, MUTEX_DEFAULT, NULL);
242
243 if (run_args->flags & DMU_FPP) {
244 /* File per process */
245 region->obj.os = os;
246 region->obj.obj = zpios_dmu_object_create(run_args, os);
247 ASSERT(region->obj.obj > 0); /* XXX - Handle this */
248 region->wr_offset = run_args->offset;
249 region->rd_offset = run_args->offset;
250 region->init_offset = run_args->offset;
251 region->max_offset = run_args->offset +
252 run_args->region_size;
253 } else {
254 /* Single shared file */
255 region->obj.os = os;
256 region->obj.obj = obj;
257 region->wr_offset = run_args->offset * i;
258 region->rd_offset = run_args->offset * i;
259 region->init_offset = run_args->offset * i;
260 region->max_offset = run_args->offset *
261 i + run_args->region_size;
262 }
263 }
264
265 run_args->os = os;
266out_destroy:
267 if (rc) {
268 rc2 = dsl_destroy_head(name);
269 if (rc2)
270 zpios_print(run_args->file, "Error dsl_destroy_head"
271 "(%s, ...) failed: %d\n", name, rc2);
272 }
273out:
274 t->stop = zpios_timespec_now();
275 t->delta = zpios_timespec_sub(t->stop, t->start);
276 (void) zpios_upcall(run_args->post, PHASE_POST_CREATE, run_args, rc);
277
278 return (rc);
279}
280
281static int
282zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file)
283{
284 run_args_t *ra;
285 int rc, size;
286
287 size = sizeof (*ra) + kcmd->cmd_region_count * sizeof (zpios_region_t);
288
289 ra = vmem_zalloc(size, KM_SLEEP);
7bdf406d
TG
290
291 *run_args = ra;
292 strncpy(ra->pool, kcmd->cmd_pool, ZPIOS_NAME_SIZE - 1);
293 strncpy(ra->pre, kcmd->cmd_pre, ZPIOS_PATH_SIZE - 1);
294 strncpy(ra->post, kcmd->cmd_post, ZPIOS_PATH_SIZE - 1);
295 strncpy(ra->log, kcmd->cmd_log, ZPIOS_PATH_SIZE - 1);
296 ra->id = kcmd->cmd_id;
297 ra->chunk_size = kcmd->cmd_chunk_size;
298 ra->thread_count = kcmd->cmd_thread_count;
299 ra->region_count = kcmd->cmd_region_count;
300 ra->region_size = kcmd->cmd_region_size;
301 ra->offset = kcmd->cmd_offset;
302 ra->region_noise = kcmd->cmd_region_noise;
303 ra->chunk_noise = kcmd->cmd_chunk_noise;
304 ra->thread_delay = kcmd->cmd_thread_delay;
305 ra->flags = kcmd->cmd_flags;
51d97d8f 306 ra->block_size = kcmd->cmd_block_size;
7bdf406d
TG
307 ra->stats.wr_data = 0;
308 ra->stats.wr_chunks = 0;
309 ra->stats.rd_data = 0;
310 ra->stats.rd_chunks = 0;
311 ra->region_next = 0;
312 ra->file = file;
313 mutex_init(&ra->lock_work, NULL, MUTEX_DEFAULT, NULL);
314 mutex_init(&ra->lock_ctl, NULL, MUTEX_DEFAULT, NULL);
315
316 (void) zpios_upcall(ra->pre, PHASE_PRE_RUN, ra, 0);
317
318 rc = zpios_dmu_setup(ra);
319 if (rc) {
320 mutex_destroy(&ra->lock_ctl);
321 mutex_destroy(&ra->lock_work);
322 vmem_free(ra, size);
323 *run_args = NULL;
324 }
325
326 return (rc);
327}
328
329static int
330zpios_get_work_item(run_args_t *run_args, dmu_obj_t *obj, __u64 *offset,
331 __u32 *chunk_size, zpios_region_t **region, __u32 flags)
332{
333 int i, j, count = 0;
334 unsigned int random_int;
335
336 get_random_bytes(&random_int, sizeof (unsigned int));
337
338 mutex_enter(&run_args->lock_work);
339 i = run_args->region_next;
340
341 /*
342 * XXX: I don't much care for this chunk selection mechansim
343 * there's the potential to burn a lot of time here doing nothing
344 * useful while holding the global lock. This could give some
345 * misleading performance results. I'll fix it latter.
346 */
347 while (count < run_args->region_count) {
348 __u64 *rw_offset;
349 zpios_time_t *rw_time;
350
351 j = i % run_args->region_count;
352 *region = &(run_args->regions[j]);
353
354 if (flags & DMU_WRITE) {
355 rw_offset = &((*region)->wr_offset);
356 rw_time = &((*region)->stats.wr_time);
357 } else {
358 rw_offset = &((*region)->rd_offset);
359 rw_time = &((*region)->stats.rd_time);
360 }
361
362 /* test if region is fully written */
363 if (*rw_offset + *chunk_size > (*region)->max_offset) {
364 i++;
365 count++;
366
367 if (unlikely(rw_time->stop.ts_sec == 0) &&
368 unlikely(rw_time->stop.ts_nsec == 0))
369 rw_time->stop = zpios_timespec_now();
370
371 continue;
372 }
373
374 *offset = *rw_offset;
375 *obj = (*region)->obj;
376 *rw_offset += *chunk_size;
377
378 /* update ctl structure */
379 if (run_args->region_noise) {
380 get_random_bytes(&random_int, sizeof (unsigned int));
381 run_args->region_next +=
382 random_int % run_args->region_noise;
383 } else {
384 run_args->region_next++;
385 }
386
387 mutex_exit(&run_args->lock_work);
388 return (1);
389 }
390
391 /* nothing left to do */
392 mutex_exit(&run_args->lock_work);
393
394 return (0);
395}
396
397static void
398zpios_remove_objset(run_args_t *run_args)
399{
400 zpios_time_t *t = &(run_args->stats.rm_time);
401 zpios_region_t *region;
402 char name[32];
403 int rc = 0, i;
404
405 (void) zpios_upcall(run_args->pre, PHASE_PRE_REMOVE, run_args, 0);
406 t->start = zpios_timespec_now();
407
408 (void) snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
409
410 if (run_args->flags & DMU_REMOVE) {
411 if (run_args->flags & DMU_FPP) {
412 for (i = 0; i < run_args->region_count; i++) {
413 region = &run_args->regions[i];
414 rc = zpios_dmu_object_free(run_args,
415 region->obj.os, region->obj.obj);
416 if (rc)
417 zpios_print(run_args->file,
418 "Error removing object %d, %d\n",
419 (int)region->obj.obj, rc);
420 }
421 } else {
422 region = &run_args->regions[0];
423 rc = zpios_dmu_object_free(run_args,
424 region->obj.os, region->obj.obj);
425 if (rc)
426 zpios_print(run_args->file,
427 "Error removing object %d, %d\n",
428 (int)region->obj.obj, rc);
429 }
430 }
431
432 dmu_objset_disown(run_args->os, zpios_tag);
433
434 if (run_args->flags & DMU_REMOVE) {
435 rc = dsl_destroy_head(name);
436 if (rc)
437 zpios_print(run_args->file, "Error dsl_destroy_head"
438 "(%s, ...) failed: %d\n", name, rc);
439 }
440
441 t->stop = zpios_timespec_now();
442 t->delta = zpios_timespec_sub(t->stop, t->start);
443 (void) zpios_upcall(run_args->post, PHASE_POST_REMOVE, run_args, rc);
444}
445
446static void
447zpios_cleanup_run(run_args_t *run_args)
448{
449 int i, size = 0;
450
451 if (run_args == NULL)
452 return;
453
454 if (run_args->threads != NULL) {
455 for (i = 0; i < run_args->thread_count; i++) {
456 if (run_args->threads[i]) {
457 mutex_destroy(&run_args->threads[i]->lock);
458 kmem_free(run_args->threads[i],
459 sizeof (thread_data_t));
460 }
461 }
462
463 kmem_free(run_args->threads,
464 sizeof (thread_data_t *) * run_args->thread_count);
465 }
466
467 for (i = 0; i < run_args->region_count; i++)
468 mutex_destroy(&run_args->regions[i].lock);
469
470 mutex_destroy(&run_args->lock_work);
471 mutex_destroy(&run_args->lock_ctl);
472 size = run_args->region_count * sizeof (zpios_region_t);
473
474 vmem_free(run_args, sizeof (*run_args) + size);
475}
476
477static int
478zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object,
479 uint64_t offset, uint64_t size, const void *buf)
480{
481 struct dmu_tx *tx;
482 int rc, how = TXG_WAIT;
483// int flags = 0;
484
485 if (run_args->flags & DMU_WRITE_NOWAIT)
486 how = TXG_NOWAIT;
487
488 while (1) {
489 tx = dmu_tx_create(os);
490 dmu_tx_hold_write(tx, object, offset, size);
491 rc = dmu_tx_assign(tx, how);
492
493 if (rc) {
494 if (rc == ERESTART && how == TXG_NOWAIT) {
495 dmu_tx_wait(tx);
496 dmu_tx_abort(tx);
497 continue;
498 }
499 zpios_print(run_args->file,
500 "Error in dmu_tx_assign(), %d", rc);
501 dmu_tx_abort(tx);
502 return (rc);
503 }
504 break;
505 }
506
507// if (run_args->flags & DMU_WRITE_ZC)
508// flags |= DMU_WRITE_ZEROCOPY;
509
510 dmu_write(os, object, offset, size, buf, tx);
511 dmu_tx_commit(tx);
512
513 return (0);
514}
515
516static int
517zpios_dmu_read(run_args_t *run_args, objset_t *os, uint64_t object,
518 uint64_t offset, uint64_t size, void *buf)
519{
520 int flags = 0;
521
522// if (run_args->flags & DMU_READ_ZC)
523// flags |= DMU_READ_ZEROCOPY;
524
525 if (run_args->flags & DMU_READ_NOPF)
526 flags |= DMU_READ_NO_PREFETCH;
527
528 return (dmu_read(os, object, offset, size, buf, flags));
529}
530
531static int
532zpios_thread_main(void *data)
533{
534 thread_data_t *thr = (thread_data_t *)data;
535 run_args_t *run_args = thr->run_args;
536 zpios_time_t t;
537 dmu_obj_t obj;
538 __u64 offset;
539 __u32 chunk_size;
540 zpios_region_t *region;
541 char *buf;
542 unsigned int random_int;
543 int chunk_noise = run_args->chunk_noise;
544 int chunk_noise_tmp = 0;
545 int thread_delay = run_args->thread_delay;
546 int thread_delay_tmp = 0;
547 int i, rc = 0;
548
549 if (chunk_noise) {
550 get_random_bytes(&random_int, sizeof (unsigned int));
551 chunk_noise_tmp = (random_int % (chunk_noise * 2))-chunk_noise;
552 }
553
554 /*
555 * It's OK to vmem_alloc() this memory because it will be copied
556 * in to the slab and pointers to the slab copy will be setup in
557 * the bio when the IO is submitted. This of course is not ideal
558 * since we want a zero-copy IO path if possible. It would be nice
559 * to have direct access to those slab entries.
560 */
561 chunk_size = run_args->chunk_size + chunk_noise_tmp;
562 buf = (char *)vmem_alloc(chunk_size, KM_SLEEP);
563 ASSERT(buf);
564
565 /* Trivial data verification pattern for now. */
566 if (run_args->flags & DMU_VERIFY)
567 memset(buf, 'z', chunk_size);
568
569 /* Write phase */
570 mutex_enter(&thr->lock);
571 thr->stats.wr_time.start = zpios_timespec_now();
572 mutex_exit(&thr->lock);
573
574 while (zpios_get_work_item(run_args, &obj, &offset,
575 &chunk_size, &region, DMU_WRITE)) {
576 if (thread_delay) {
577 get_random_bytes(&random_int, sizeof (unsigned int));
578 thread_delay_tmp = random_int % thread_delay;
579 set_current_state(TASK_UNINTERRUPTIBLE);
580 schedule_timeout(thread_delay_tmp); /* In jiffies */
581 }
582
583 t.start = zpios_timespec_now();
584 rc = zpios_dmu_write(run_args, obj.os, obj.obj,
585 offset, chunk_size, buf);
586 t.stop = zpios_timespec_now();
587 t.delta = zpios_timespec_sub(t.stop, t.start);
588
589 if (rc) {
590 zpios_print(run_args->file, "IO error while doing "
591 "dmu_write(): %d\n", rc);
592 break;
593 }
594
595 mutex_enter(&thr->lock);
596 thr->stats.wr_data += chunk_size;
597 thr->stats.wr_chunks++;
598 thr->stats.wr_time.delta = zpios_timespec_add(
599 thr->stats.wr_time.delta, t.delta);
600 mutex_exit(&thr->lock);
601
602 mutex_enter(&region->lock);
603 region->stats.wr_data += chunk_size;
604 region->stats.wr_chunks++;
605 region->stats.wr_time.delta = zpios_timespec_add(
606 region->stats.wr_time.delta, t.delta);
607
608 /* First time region was accessed */
609 if (region->init_offset == offset)
610 region->stats.wr_time.start = t.start;
611
612 mutex_exit(&region->lock);
613 }
614
615 mutex_enter(&run_args->lock_ctl);
616 run_args->threads_done++;
617 mutex_exit(&run_args->lock_ctl);
618
619 mutex_enter(&thr->lock);
620 thr->rc = rc;
621 thr->stats.wr_time.stop = zpios_timespec_now();
622 mutex_exit(&thr->lock);
623 wake_up(&run_args->waitq);
624
625 set_current_state(TASK_UNINTERRUPTIBLE);
626 schedule();
627
628 /* Check if we should exit */
629 mutex_enter(&thr->lock);
630 rc = thr->rc;
631 mutex_exit(&thr->lock);
632 if (rc)
633 goto out;
634
635 /* Read phase */
636 mutex_enter(&thr->lock);
637 thr->stats.rd_time.start = zpios_timespec_now();
638 mutex_exit(&thr->lock);
639
640 while (zpios_get_work_item(run_args, &obj, &offset,
641 &chunk_size, &region, DMU_READ)) {
642 if (thread_delay) {
643 get_random_bytes(&random_int, sizeof (unsigned int));
644 thread_delay_tmp = random_int % thread_delay;
645 set_current_state(TASK_UNINTERRUPTIBLE);
646 schedule_timeout(thread_delay_tmp); /* In jiffies */
647 }
648
649 if (run_args->flags & DMU_VERIFY)
650 memset(buf, 0, chunk_size);
651
652 t.start = zpios_timespec_now();
653 rc = zpios_dmu_read(run_args, obj.os, obj.obj,
654 offset, chunk_size, buf);
655 t.stop = zpios_timespec_now();
656 t.delta = zpios_timespec_sub(t.stop, t.start);
657
658 if (rc) {
659 zpios_print(run_args->file, "IO error while doing "
660 "dmu_read(): %d\n", rc);
661 break;
662 }
663
664 /* Trivial data verification, expensive! */
665 if (run_args->flags & DMU_VERIFY) {
666 for (i = 0; i < chunk_size; i++) {
667 if (buf[i] != 'z') {
668 zpios_print(run_args->file,
669 "IO verify error: %d/%d/%d\n",
670 (int)obj.obj, (int)offset,
671 (int)chunk_size);
672 break;
673 }
674 }
675 }
676
677 mutex_enter(&thr->lock);
678 thr->stats.rd_data += chunk_size;
679 thr->stats.rd_chunks++;
680 thr->stats.rd_time.delta = zpios_timespec_add(
681 thr->stats.rd_time.delta, t.delta);
682 mutex_exit(&thr->lock);
683
684 mutex_enter(&region->lock);
685 region->stats.rd_data += chunk_size;
686 region->stats.rd_chunks++;
687 region->stats.rd_time.delta = zpios_timespec_add(
688 region->stats.rd_time.delta, t.delta);
689
690 /* First time region was accessed */
691 if (region->init_offset == offset)
692 region->stats.rd_time.start = t.start;
693
694 mutex_exit(&region->lock);
695 }
696
697 mutex_enter(&run_args->lock_ctl);
698 run_args->threads_done++;
699 mutex_exit(&run_args->lock_ctl);
700
701 mutex_enter(&thr->lock);
702 thr->rc = rc;
703 thr->stats.rd_time.stop = zpios_timespec_now();
704 mutex_exit(&thr->lock);
705 wake_up(&run_args->waitq);
706
707out:
708 vmem_free(buf, chunk_size);
709 do_exit(0);
710
711 return (rc); /* Unreachable, due to do_exit() */
712}
713
714static int
715zpios_thread_done(run_args_t *run_args)
716{
717 ASSERT(run_args->threads_done <= run_args->thread_count);
718 return (run_args->threads_done == run_args->thread_count);
719}
720
721static int
722zpios_threads_run(run_args_t *run_args)
723{
724 struct task_struct *tsk, **tsks;
725 thread_data_t *thr = NULL;
726 zpios_time_t *tt = &(run_args->stats.total_time);
727 zpios_time_t *tw = &(run_args->stats.wr_time);
728 zpios_time_t *tr = &(run_args->stats.rd_time);
729 int i, rc = 0, tc = run_args->thread_count;
730
731 tsks = kmem_zalloc(sizeof (struct task_struct *) * tc, KM_SLEEP);
7bdf406d
TG
732
733 run_args->threads = kmem_zalloc(sizeof (thread_data_t *)*tc, KM_SLEEP);
7bdf406d
TG
734
735 init_waitqueue_head(&run_args->waitq);
736 run_args->threads_done = 0;
737
738 /* Create all the needed threads which will sleep until awoken */
739 for (i = 0; i < tc; i++) {
740 thr = kmem_zalloc(sizeof (thread_data_t), KM_SLEEP);
7bdf406d
TG
741
742 thr->thread_no = i;
743 thr->run_args = run_args;
744 thr->rc = 0;
745 mutex_init(&thr->lock, NULL, MUTEX_DEFAULT, NULL);
746 run_args->threads[i] = thr;
747
748 tsk = kthread_create(zpios_thread_main, (void *)thr,
749 "%s/%d", "zpios_io", i);
750 if (IS_ERR(tsk)) {
751 rc = -EINVAL;
752 goto taskerr;
753 }
754
755 tsks[i] = tsk;
756 }
757
758 tt->start = zpios_timespec_now();
759
760 /* Wake up all threads for write phase */
761 (void) zpios_upcall(run_args->pre, PHASE_PRE_WRITE, run_args, 0);
762 for (i = 0; i < tc; i++)
763 wake_up_process(tsks[i]);
764
765 /* Wait for write phase to complete */
766 tw->start = zpios_timespec_now();
767 wait_event(run_args->waitq, zpios_thread_done(run_args));
768 tw->stop = zpios_timespec_now();
769 (void) zpios_upcall(run_args->post, PHASE_POST_WRITE, run_args, rc);
770
771 for (i = 0; i < tc; i++) {
772 thr = run_args->threads[i];
773
774 mutex_enter(&thr->lock);
775
776 if (!rc && thr->rc)
777 rc = thr->rc;
778
779 run_args->stats.wr_data += thr->stats.wr_data;
780 run_args->stats.wr_chunks += thr->stats.wr_chunks;
781 mutex_exit(&thr->lock);
782 }
783
784 if (rc) {
785 /* Wake up all threads and tell them to exit */
786 for (i = 0; i < tc; i++) {
787 mutex_enter(&thr->lock);
788 thr->rc = rc;
789 mutex_exit(&thr->lock);
790
791 wake_up_process(tsks[i]);
792 }
793 goto out;
794 }
795
796 mutex_enter(&run_args->lock_ctl);
797 ASSERT(run_args->threads_done == run_args->thread_count);
798 run_args->threads_done = 0;
799 mutex_exit(&run_args->lock_ctl);
800
801 /* Wake up all threads for read phase */
802 (void) zpios_upcall(run_args->pre, PHASE_PRE_READ, run_args, 0);
803 for (i = 0; i < tc; i++)
804 wake_up_process(tsks[i]);
805
806 /* Wait for read phase to complete */
807 tr->start = zpios_timespec_now();
808 wait_event(run_args->waitq, zpios_thread_done(run_args));
809 tr->stop = zpios_timespec_now();
810 (void) zpios_upcall(run_args->post, PHASE_POST_READ, run_args, rc);
811
812 for (i = 0; i < tc; i++) {
813 thr = run_args->threads[i];
814
815 mutex_enter(&thr->lock);
816
817 if (!rc && thr->rc)
818 rc = thr->rc;
819
820 run_args->stats.rd_data += thr->stats.rd_data;
821 run_args->stats.rd_chunks += thr->stats.rd_chunks;
822 mutex_exit(&thr->lock);
823 }
824out:
825 tt->stop = zpios_timespec_now();
826 tt->delta = zpios_timespec_sub(tt->stop, tt->start);
827 tw->delta = zpios_timespec_sub(tw->stop, tw->start);
828 tr->delta = zpios_timespec_sub(tr->stop, tr->start);
829
830cleanup:
831 kmem_free(tsks, sizeof (struct task_struct *) * tc);
7bdf406d
TG
832 return (rc);
833
834taskerr:
835 /* Destroy all threads that were created successfully */
836 for (i = 0; i < tc; i++)
837 if (tsks[i] != NULL)
838 (void) kthread_stop(tsks[i]);
839
840 goto cleanup;
841}
842
843static int
844zpios_do_one_run(struct file *file, zpios_cmd_t *kcmd,
845 int data_size, void *data)
846{
847 run_args_t *run_args = { 0 };
848 zpios_stats_t *stats = (zpios_stats_t *)data;
849 int i, n, m, size, rc;
850
851 if ((!kcmd->cmd_chunk_size) || (!kcmd->cmd_region_size) ||
852 (!kcmd->cmd_thread_count) || (!kcmd->cmd_region_count)) {
853 zpios_print(file, "Invalid chunk_size, region_size, "
854 "thread_count, or region_count, %d\n", -EINVAL);
855 return (-EINVAL);
856 }
857
858 if (!(kcmd->cmd_flags & DMU_WRITE) ||
859 !(kcmd->cmd_flags & DMU_READ)) {
860 zpios_print(file, "Invalid flags, minimally DMU_WRITE "
861 "and DMU_READ must be set, %d\n", -EINVAL);
862 return (-EINVAL);
863 }
864
865 if ((kcmd->cmd_flags & (DMU_WRITE_ZC | DMU_READ_ZC)) &&
866 (kcmd->cmd_flags & DMU_VERIFY)) {
867 zpios_print(file, "Invalid flags, DMU_*_ZC incompatible "
868 "with DMU_VERIFY, used for performance analysis "
869 "only, %d\n", -EINVAL);
870 return (-EINVAL);
871 }
872
873 /*
874 * Opaque data on return contains structs of the following form:
875 *
876 * zpios_stat_t stats[];
877 * stats[0] = run_args->stats;
878 * stats[1-N] = threads[N]->stats;
879 * stats[N+1-M] = regions[M]->stats;
880 *
881 * Where N is the number of threads, and M is the number of regions.
882 */
883 size = (sizeof (zpios_stats_t) +
884 (kcmd->cmd_thread_count * sizeof (zpios_stats_t)) +
885 (kcmd->cmd_region_count * sizeof (zpios_stats_t)));
886 if (data_size < size) {
887 zpios_print(file, "Invalid size, command data buffer "
888 "size too small, (%d < %d)\n", data_size, size);
889 return (-ENOSPC);
890 }
891
892 rc = zpios_setup_run(&run_args, kcmd, file);
893 if (rc)
894 return (rc);
895
896 rc = zpios_threads_run(run_args);
897 zpios_remove_objset(run_args);
898 if (rc)
899 goto cleanup;
900
901 if (stats) {
902 n = 1;
903 m = 1 + kcmd->cmd_thread_count;
904 stats[0] = run_args->stats;
905
906 for (i = 0; i < kcmd->cmd_thread_count; i++)
907 stats[n+i] = run_args->threads[i]->stats;
908
909 for (i = 0; i < kcmd->cmd_region_count; i++)
910 stats[m+i] = run_args->regions[i].stats;
911 }
912
913cleanup:
914 zpios_cleanup_run(run_args);
915
916 (void) zpios_upcall(kcmd->cmd_post, PHASE_POST_RUN, run_args, 0);
917
918 return (rc);
919}
920
921static int
922zpios_open(struct inode *inode, struct file *file)
923{
924 zpios_info_t *info;
925
926 info = (zpios_info_t *)kmem_alloc(sizeof (*info), KM_SLEEP);
7bdf406d
TG
927
928 spin_lock_init(&info->info_lock);
929 info->info_size = ZPIOS_INFO_BUFFER_SIZE;
930 info->info_buffer =
931 (char *) vmem_alloc(ZPIOS_INFO_BUFFER_SIZE, KM_SLEEP);
7bdf406d
TG
932
933 info->info_head = info->info_buffer;
934 file->private_data = (void *)info;
935
936 return (0);
937}
938
939static int
940zpios_release(struct inode *inode, struct file *file)
941{
942 zpios_info_t *info = (zpios_info_t *)file->private_data;
943
944 ASSERT(info);
945 ASSERT(info->info_buffer);
946
947 vmem_free(info->info_buffer, ZPIOS_INFO_BUFFER_SIZE);
948 kmem_free(info, sizeof (*info));
949
950 return (0);
951}
952
953static int
954zpios_buffer_clear(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
955{
956 zpios_info_t *info = (zpios_info_t *)file->private_data;
957
958 ASSERT(info);
959 ASSERT(info->info_buffer);
960
961 spin_lock(&info->info_lock);
962 memset(info->info_buffer, 0, info->info_size);
963 info->info_head = info->info_buffer;
964 spin_unlock(&info->info_lock);
965
966 return (0);
967}
968
969static int
970zpios_buffer_size(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
971{
972 zpios_info_t *info = (zpios_info_t *)file->private_data;
973 char *buf;
974 int min, size, rc = 0;
975
976 ASSERT(info);
977 ASSERT(info->info_buffer);
978
979 spin_lock(&info->info_lock);
980 if (kcfg->cfg_arg1 > 0) {
981
982 size = kcfg->cfg_arg1;
983 buf = (char *)vmem_alloc(size, KM_SLEEP);
7bdf406d
TG
984
985 /* Zero fill and truncate contents when coping buffer */
986 min = ((size < info->info_size) ? size : info->info_size);
987 memset(buf, 0, size);
988 memcpy(buf, info->info_buffer, min);
989 vmem_free(info->info_buffer, info->info_size);
990 info->info_size = size;
991 info->info_buffer = buf;
992 info->info_head = info->info_buffer;
993 }
994
995 kcfg->cfg_rc1 = info->info_size;
996
997 if (copy_to_user((struct zpios_cfg_t __user *)arg,
998 kcfg, sizeof (*kcfg)))
999 rc = -EFAULT;
51d97d8f 1000
7bdf406d
TG
1001 spin_unlock(&info->info_lock);
1002
1003 return (rc);
1004}
1005
1006static int
1007zpios_ioctl_cfg(struct file *file, unsigned long arg)
1008{
1009 zpios_cfg_t kcfg;
1010 int rc = 0;
1011
1012 if (copy_from_user(&kcfg, (zpios_cfg_t *)arg, sizeof (kcfg)))
1013 return (-EFAULT);
1014
1015 if (kcfg.cfg_magic != ZPIOS_CFG_MAGIC) {
1016 zpios_print(file, "Bad config magic 0x%x != 0x%x\n",
1017 kcfg.cfg_magic, ZPIOS_CFG_MAGIC);
1018 return (-EINVAL);
1019 }
1020
1021 switch (kcfg.cfg_cmd) {
1022 case ZPIOS_CFG_BUFFER_CLEAR:
1023 /*
1024 * cfg_arg1 - Unused
1025 * cfg_rc1 - Unused
1026 */
1027 rc = zpios_buffer_clear(file, &kcfg, arg);
1028 break;
1029 case ZPIOS_CFG_BUFFER_SIZE:
1030 /*
1031 * cfg_arg1 - 0 - query size; >0 resize
1032 * cfg_rc1 - Set to current buffer size
1033 */
1034 rc = zpios_buffer_size(file, &kcfg, arg);
1035 break;
1036 default:
1037 zpios_print(file, "Bad config command %d\n",
1038 kcfg.cfg_cmd);
1039 rc = -EINVAL;
1040 break;
1041 }
1042
1043 return (rc);
1044}
1045
1046static int
1047zpios_ioctl_cmd(struct file *file, unsigned long arg)
1048{
1049 zpios_cmd_t *kcmd;
1050 void *data = NULL;
1051 int rc = -EINVAL;
1052
1053 kcmd = kmem_alloc(sizeof (zpios_cmd_t), KM_SLEEP);
7bdf406d
TG
1054
1055 rc = copy_from_user(kcmd, (zpios_cfg_t *)arg, sizeof (zpios_cmd_t));
1056 if (rc) {
1057 zpios_print(file, "Unable to copy command structure "
1058 "from user to kernel memory, %d\n", rc);
1059 goto out_cmd;
1060 }
1061
1062 if (kcmd->cmd_magic != ZPIOS_CMD_MAGIC) {
1063 zpios_print(file, "Bad command magic 0x%x != 0x%x\n",
1064 kcmd->cmd_magic, ZPIOS_CFG_MAGIC);
1065 rc = (-EINVAL);
1066 goto out_cmd;
1067 }
1068
1069 /* Allocate memory for any opaque data the caller needed to pass on */
1070 if (kcmd->cmd_data_size > 0) {
1071 data = (void *)vmem_alloc(kcmd->cmd_data_size, KM_SLEEP);
7bdf406d
TG
1072
1073 rc = copy_from_user(data, (void *)(arg + offsetof(zpios_cmd_t,
1074 cmd_data_str)), kcmd->cmd_data_size);
1075 if (rc) {
1076 zpios_print(file, "Unable to copy data buffer "
1077 "from user to kernel memory, %d\n", rc);
1078 goto out_data;
1079 }
1080 }
1081
1082 rc = zpios_do_one_run(file, kcmd, kcmd->cmd_data_size, data);
1083
1084 if (data != NULL) {
1085 /* If the test failed do not print out the stats */
1086 if (rc)
1087 goto out_data;
1088
1089 rc = copy_to_user((void *)(arg + offsetof(zpios_cmd_t,
1090 cmd_data_str)), data, kcmd->cmd_data_size);
1091 if (rc) {
1092 zpios_print(file, "Unable to copy data buffer "
1093 "from kernel to user memory, %d\n", rc);
1094 rc = -EFAULT;
1095 }
1096
1097out_data:
1098 vmem_free(data, kcmd->cmd_data_size);
1099 }
1100out_cmd:
1101 kmem_free(kcmd, sizeof (zpios_cmd_t));
1102
1103 return (rc);
1104}
1105
1106static long
1107zpios_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1108{
1109 int rc = 0;
1110
1111 /* Ignore tty ioctls */
1112 if ((cmd & 0xffffff00) == ((int)'T') << 8)
1113 return (-ENOTTY);
1114
1115 switch (cmd) {
1116 case ZPIOS_CFG:
1117 rc = zpios_ioctl_cfg(file, arg);
1118 break;
1119 case ZPIOS_CMD:
1120 rc = zpios_ioctl_cmd(file, arg);
1121 break;
1122 default:
1123 zpios_print(file, "Bad ioctl command %d\n", cmd);
1124 rc = -EINVAL;
1125 break;
1126 }
1127
1128 return (rc);
1129}
1130
1131#ifdef CONFIG_COMPAT
1132/* Compatibility handler for ioctls from 32-bit ELF binaries */
1133static long
1134zpios_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1135{
1136 return (zpios_unlocked_ioctl(file, cmd, arg));
1137}
1138#endif /* CONFIG_COMPAT */
1139
1140/*
1141 * I'm not sure why you would want to write in to this buffer from
1142 * user space since its principle use is to pass test status info
1143 * back to the user space, but I don't see any reason to prevent it.
1144 */
1145static ssize_t
1146zpios_write(struct file *file, const char __user *buf,
1147 size_t count, loff_t *ppos)
1148{
1149 zpios_info_t *info = (zpios_info_t *)file->private_data;
1150 int rc = 0;
1151
1152 ASSERT(info);
1153 ASSERT(info->info_buffer);
1154
1155 spin_lock(&info->info_lock);
1156
1157 /* Write beyond EOF */
1158 if (*ppos >= info->info_size) {
1159 rc = -EFBIG;
1160 goto out;
1161 }
1162
1163 /* Resize count if beyond EOF */
1164 if (*ppos + count > info->info_size)
1165 count = info->info_size - *ppos;
1166
1167 if (copy_from_user(info->info_buffer, buf, count)) {
1168 rc = -EFAULT;
1169 goto out;
1170 }
1171
1172 *ppos += count;
1173 rc = count;
1174out:
1175 spin_unlock(&info->info_lock);
1176 return (rc);
1177}
1178
1179static ssize_t
1180zpios_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
1181{
1182 zpios_info_t *info = (zpios_info_t *)file->private_data;
1183 int rc = 0;
1184
1185 ASSERT(info);
1186 ASSERT(info->info_buffer);
1187
1188 spin_lock(&info->info_lock);
1189
1190 /* Read beyond EOF */
1191 if (*ppos >= info->info_size)
1192 goto out;
1193
1194 /* Resize count if beyond EOF */
1195 if (*ppos + count > info->info_size)
1196 count = info->info_size - *ppos;
1197
1198 if (copy_to_user(buf, info->info_buffer + *ppos, count)) {
1199 rc = -EFAULT;
1200 goto out;
1201 }
1202
1203 *ppos += count;
1204 rc = count;
1205out:
1206 spin_unlock(&info->info_lock);
1207 return (rc);
1208}
1209
1210static loff_t zpios_seek(struct file *file, loff_t offset, int origin)
1211{
1212 zpios_info_t *info = (zpios_info_t *)file->private_data;
1213 int rc = -EINVAL;
1214
1215 ASSERT(info);
1216 ASSERT(info->info_buffer);
1217
1218 spin_lock(&info->info_lock);
1219
1220 switch (origin) {
1221 case 0: /* SEEK_SET - No-op just do it */
1222 break;
1223 case 1: /* SEEK_CUR - Seek from current */
1224 offset = file->f_pos + offset;
1225 break;
1226 case 2: /* SEEK_END - Seek from end */
1227 offset = info->info_size + offset;
1228 break;
1229 }
1230
1231 if (offset >= 0) {
1232 file->f_pos = offset;
1233 file->f_version = 0;
1234 rc = offset;
1235 }
1236
1237 spin_unlock(&info->info_lock);
1238
1239 return (rc);
1240}
1241
1242static struct file_operations zpios_fops = {
1243 .owner = THIS_MODULE,
1244 .open = zpios_open,
1245 .release = zpios_release,
1246 .unlocked_ioctl = zpios_unlocked_ioctl,
1247#ifdef CONFIG_COMPAT
1248 .compat_ioctl = zpios_compat_ioctl,
1249#endif
1250 .read = zpios_read,
1251 .write = zpios_write,
1252 .llseek = zpios_seek,
1253};
1254
1255static struct miscdevice zpios_misc = {
1256 .minor = MISC_DYNAMIC_MINOR,
1257 .name = ZPIOS_NAME,
1258 .fops = &zpios_fops,
1259};
1260
1261#ifdef DEBUG
1262#define ZFS_DEBUG_STR " (DEBUG mode)"
1263#else
1264#define ZFS_DEBUG_STR ""
1265#endif
1266
1267static int __init
1268zpios_init(void)
1269{
1270 int error;
1271
1272 error = misc_register(&zpios_misc);
1273 if (error) {
1274 printk(KERN_INFO "ZPIOS: misc_register() failed %d\n", error);
1275 } else {
1276 printk(KERN_INFO "ZPIOS: Loaded module v%s-%s%s\n",
1277 ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
1278 }
1279
1280 return (error);
1281}
1282
1283static void __exit
1284zpios_fini(void)
1285{
1286 misc_deregister(&zpios_misc);
1287
1288 printk(KERN_INFO "ZPIOS: Unloaded module v%s-%s%s\n",
1289 ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
1290}
1291
1292module_init(zpios_init);
1293module_exit(zpios_fini);
1294
1295MODULE_AUTHOR("LLNL / Sun");
1296MODULE_DESCRIPTION("Kernel PIOS implementation");
1297MODULE_LICENSE("GPL");
1298MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);