2 * ZPIOS is a heavily modified version of the original PIOS test code.
3 * It is designed to have the test code running in the Linux kernel
4 * against ZFS while still being flexibly controled from user space.
6 * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
7 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
11 * Original PIOS Test Code
12 * Copyright (C) 2004 Cluster File Systems, Inc.
13 * Written by Peter Braam <braam@clusterfs.com>
14 * Atul Vidwansa <atul@clusterfs.com>
15 * Milind Dumbare <milind@clusterfs.com>
17 * This file is part of ZFS on Linux.
18 * For details, see <http://zfsonlinux.org/>.
20 * ZPIOS is free software; you can redistribute it and/or modify it
21 * under the terms of the GNU General Public License as published by the
22 * Free Software Foundation; either version 2 of the License, or (at your
23 * option) any later version.
25 * ZPIOS is distributed in the hope that it will be useful, but WITHOUT
26 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
27 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
30 * You should have received a copy of the GNU General Public License along
31 * with ZPIOS. If not, see <http://www.gnu.org/licenses/>.
34 #include <sys/zfs_context.h>
37 #include <sys/dsl_destroy.h>
38 #include <linux/miscdevice.h>
39 #include "zpios-internal.h"
42 static char *zpios_tag
= "zpios_tag";
45 zpios_upcall(char *path
, char *phase
, run_args_t
*run_args
, int rc
)
48 * This is stack heavy but it should be OK since we are only
49 * making the upcall between tests when the stack is shallow.
51 char id
[16], chunk_size
[16], region_size
[16], thread_count
[16];
52 char region_count
[16], offset
[16], region_noise
[16], chunk_noise
[16];
53 char thread_delay
[16], flags
[16], result
[8];
54 char *argv
[16], *envp
[4];
56 if ((path
== NULL
) || (strlen(path
) == 0))
59 snprintf(id
, 15, "%d", run_args
->id
);
60 snprintf(chunk_size
, 15, "%lu", (long unsigned)run_args
->chunk_size
);
61 snprintf(region_size
, 15, "%lu", (long unsigned) run_args
->region_size
);
62 snprintf(thread_count
, 15, "%u", run_args
->thread_count
);
63 snprintf(region_count
, 15, "%u", run_args
->region_count
);
64 snprintf(offset
, 15, "%lu", (long unsigned)run_args
->offset
);
65 snprintf(region_noise
, 15, "%u", run_args
->region_noise
);
66 snprintf(chunk_noise
, 15, "%u", run_args
->chunk_noise
);
67 snprintf(thread_delay
, 15, "%u", run_args
->thread_delay
);
68 snprintf(flags
, 15, "0x%x", run_args
->flags
);
69 snprintf(result
, 7, "%d", rc
);
71 /* Passing 15 args to registered pre/post upcall */
74 argv
[2] = strlen(run_args
->log
) ? run_args
->log
: "<none>";
76 argv
[4] = run_args
->pool
;
78 argv
[6] = region_size
;
79 argv
[7] = thread_count
;
80 argv
[8] = region_count
;
82 argv
[10] = region_noise
;
83 argv
[11] = chunk_noise
;
84 argv
[12] = thread_delay
;
89 /* Passing environment for user space upcall */
91 envp
[1] = "TERM=linux";
92 envp
[2] = "PATH=/sbin:/usr/sbin:/bin:/usr/bin";
95 return (call_usermodehelper(path
, argv
, envp
, UMH_WAIT_PROC
));
99 zpios_print(struct file
*file
, const char *format
, ...)
101 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
106 ASSERT(info
->info_buffer
);
108 va_start(adx
, format
);
109 spin_lock(&info
->info_lock
);
111 /* Don't allow the kernel to start a write in the red zone */
112 if ((int)(info
->info_head
- info
->info_buffer
) >
113 (info
->info_size
- ZPIOS_INFO_BUFFER_REDZONE
)) {
116 rc
= vsprintf(info
->info_head
, format
, adx
);
118 info
->info_head
+= rc
;
121 spin_unlock(&info
->info_lock
);
128 zpios_dmu_object_create(run_args_t
*run_args
, objset_t
*os
)
134 tx
= dmu_tx_create(os
);
135 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
, 0, OBJ_SIZE
);
136 rc
= dmu_tx_assign(tx
, TXG_WAIT
);
138 zpios_print(run_args
->file
,
139 "dmu_tx_assign() failed: %d\n", rc
);
144 obj
= dmu_object_alloc(os
, DMU_OT_UINT64_OTHER
, 0, DMU_OT_NONE
, 0, tx
);
145 rc
= dmu_object_set_blocksize(os
, obj
, 128ULL << 10, 0, tx
);
147 zpios_print(run_args
->file
,
148 "dmu_object_set_blocksize() failed: %d\n", rc
);
159 zpios_dmu_object_free(run_args_t
*run_args
, objset_t
*os
, uint64_t obj
)
164 tx
= dmu_tx_create(os
);
165 dmu_tx_hold_free(tx
, obj
, 0, DMU_OBJECT_END
);
166 rc
= dmu_tx_assign(tx
, TXG_WAIT
);
168 zpios_print(run_args
->file
,
169 "dmu_tx_assign() failed: %d\n", rc
);
174 rc
= dmu_object_free(os
, obj
, tx
);
176 zpios_print(run_args
->file
,
177 "dmu_object_free() failed: %d\n", rc
);
188 zpios_dmu_setup(run_args_t
*run_args
)
190 zpios_time_t
*t
= &(run_args
->stats
.cr_time
);
196 (void) zpios_upcall(run_args
->pre
, PHASE_PRE_CREATE
, run_args
, 0);
197 t
->start
= zpios_timespec_now();
199 (void) snprintf(name
, 32, "%s/id_%d", run_args
->pool
, run_args
->id
);
200 rc
= dmu_objset_create(name
, DMU_OST_OTHER
, 0, NULL
, NULL
);
202 zpios_print(run_args
->file
, "Error dmu_objset_create(%s, ...) "
203 "failed: %d\n", name
, rc
);
207 rc
= dmu_objset_own(name
, DMU_OST_OTHER
, 0, zpios_tag
, &os
);
209 zpios_print(run_args
->file
, "Error dmu_objset_own(%s, ...) "
210 "failed: %d\n", name
, rc
);
214 if (!(run_args
->flags
& DMU_FPP
)) {
215 obj
= zpios_dmu_object_create(run_args
, os
);
218 zpios_print(run_args
->file
, "Error zpios_dmu_"
219 "object_create() failed, %d\n", rc
);
224 for (i
= 0; i
< run_args
->region_count
; i
++) {
225 zpios_region_t
*region
;
227 region
= &run_args
->regions
[i
];
228 mutex_init(®ion
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
230 if (run_args
->flags
& DMU_FPP
) {
231 /* File per process */
233 region
->obj
.obj
= zpios_dmu_object_create(run_args
, os
);
234 ASSERT(region
->obj
.obj
> 0); /* XXX - Handle this */
235 region
->wr_offset
= run_args
->offset
;
236 region
->rd_offset
= run_args
->offset
;
237 region
->init_offset
= run_args
->offset
;
238 region
->max_offset
= run_args
->offset
+
239 run_args
->region_size
;
241 /* Single shared file */
243 region
->obj
.obj
= obj
;
244 region
->wr_offset
= run_args
->offset
* i
;
245 region
->rd_offset
= run_args
->offset
* i
;
246 region
->init_offset
= run_args
->offset
* i
;
247 region
->max_offset
= run_args
->offset
*
248 i
+ run_args
->region_size
;
255 rc2
= dsl_destroy_head(name
);
257 zpios_print(run_args
->file
, "Error dsl_destroy_head"
258 "(%s, ...) failed: %d\n", name
, rc2
);
261 t
->stop
= zpios_timespec_now();
262 t
->delta
= zpios_timespec_sub(t
->stop
, t
->start
);
263 (void) zpios_upcall(run_args
->post
, PHASE_POST_CREATE
, run_args
, rc
);
269 zpios_setup_run(run_args_t
**run_args
, zpios_cmd_t
*kcmd
, struct file
*file
)
274 size
= sizeof (*ra
) + kcmd
->cmd_region_count
* sizeof (zpios_region_t
);
276 ra
= vmem_zalloc(size
, KM_SLEEP
);
278 zpios_print(file
, "Unable to vmem_zalloc() %d bytes "
279 "for regions\n", size
);
284 strncpy(ra
->pool
, kcmd
->cmd_pool
, ZPIOS_NAME_SIZE
- 1);
285 strncpy(ra
->pre
, kcmd
->cmd_pre
, ZPIOS_PATH_SIZE
- 1);
286 strncpy(ra
->post
, kcmd
->cmd_post
, ZPIOS_PATH_SIZE
- 1);
287 strncpy(ra
->log
, kcmd
->cmd_log
, ZPIOS_PATH_SIZE
- 1);
288 ra
->id
= kcmd
->cmd_id
;
289 ra
->chunk_size
= kcmd
->cmd_chunk_size
;
290 ra
->thread_count
= kcmd
->cmd_thread_count
;
291 ra
->region_count
= kcmd
->cmd_region_count
;
292 ra
->region_size
= kcmd
->cmd_region_size
;
293 ra
->offset
= kcmd
->cmd_offset
;
294 ra
->region_noise
= kcmd
->cmd_region_noise
;
295 ra
->chunk_noise
= kcmd
->cmd_chunk_noise
;
296 ra
->thread_delay
= kcmd
->cmd_thread_delay
;
297 ra
->flags
= kcmd
->cmd_flags
;
298 ra
->stats
.wr_data
= 0;
299 ra
->stats
.wr_chunks
= 0;
300 ra
->stats
.rd_data
= 0;
301 ra
->stats
.rd_chunks
= 0;
304 mutex_init(&ra
->lock_work
, NULL
, MUTEX_DEFAULT
, NULL
);
305 mutex_init(&ra
->lock_ctl
, NULL
, MUTEX_DEFAULT
, NULL
);
307 (void) zpios_upcall(ra
->pre
, PHASE_PRE_RUN
, ra
, 0);
309 rc
= zpios_dmu_setup(ra
);
311 mutex_destroy(&ra
->lock_ctl
);
312 mutex_destroy(&ra
->lock_work
);
321 zpios_get_work_item(run_args_t
*run_args
, dmu_obj_t
*obj
, __u64
*offset
,
322 __u32
*chunk_size
, zpios_region_t
**region
, __u32 flags
)
325 unsigned int random_int
;
327 get_random_bytes(&random_int
, sizeof (unsigned int));
329 mutex_enter(&run_args
->lock_work
);
330 i
= run_args
->region_next
;
333 * XXX: I don't much care for this chunk selection mechansim
334 * there's the potential to burn a lot of time here doing nothing
335 * useful while holding the global lock. This could give some
336 * misleading performance results. I'll fix it latter.
338 while (count
< run_args
->region_count
) {
340 zpios_time_t
*rw_time
;
342 j
= i
% run_args
->region_count
;
343 *region
= &(run_args
->regions
[j
]);
345 if (flags
& DMU_WRITE
) {
346 rw_offset
= &((*region
)->wr_offset
);
347 rw_time
= &((*region
)->stats
.wr_time
);
349 rw_offset
= &((*region
)->rd_offset
);
350 rw_time
= &((*region
)->stats
.rd_time
);
353 /* test if region is fully written */
354 if (*rw_offset
+ *chunk_size
> (*region
)->max_offset
) {
358 if (unlikely(rw_time
->stop
.ts_sec
== 0) &&
359 unlikely(rw_time
->stop
.ts_nsec
== 0))
360 rw_time
->stop
= zpios_timespec_now();
365 *offset
= *rw_offset
;
366 *obj
= (*region
)->obj
;
367 *rw_offset
+= *chunk_size
;
369 /* update ctl structure */
370 if (run_args
->region_noise
) {
371 get_random_bytes(&random_int
, sizeof (unsigned int));
372 run_args
->region_next
+=
373 random_int
% run_args
->region_noise
;
375 run_args
->region_next
++;
378 mutex_exit(&run_args
->lock_work
);
382 /* nothing left to do */
383 mutex_exit(&run_args
->lock_work
);
389 zpios_remove_objset(run_args_t
*run_args
)
391 zpios_time_t
*t
= &(run_args
->stats
.rm_time
);
392 zpios_region_t
*region
;
396 (void) zpios_upcall(run_args
->pre
, PHASE_PRE_REMOVE
, run_args
, 0);
397 t
->start
= zpios_timespec_now();
399 (void) snprintf(name
, 32, "%s/id_%d", run_args
->pool
, run_args
->id
);
401 if (run_args
->flags
& DMU_REMOVE
) {
402 if (run_args
->flags
& DMU_FPP
) {
403 for (i
= 0; i
< run_args
->region_count
; i
++) {
404 region
= &run_args
->regions
[i
];
405 rc
= zpios_dmu_object_free(run_args
,
406 region
->obj
.os
, region
->obj
.obj
);
408 zpios_print(run_args
->file
,
409 "Error removing object %d, %d\n",
410 (int)region
->obj
.obj
, rc
);
413 region
= &run_args
->regions
[0];
414 rc
= zpios_dmu_object_free(run_args
,
415 region
->obj
.os
, region
->obj
.obj
);
417 zpios_print(run_args
->file
,
418 "Error removing object %d, %d\n",
419 (int)region
->obj
.obj
, rc
);
423 dmu_objset_disown(run_args
->os
, zpios_tag
);
425 if (run_args
->flags
& DMU_REMOVE
) {
426 rc
= dsl_destroy_head(name
);
428 zpios_print(run_args
->file
, "Error dsl_destroy_head"
429 "(%s, ...) failed: %d\n", name
, rc
);
432 t
->stop
= zpios_timespec_now();
433 t
->delta
= zpios_timespec_sub(t
->stop
, t
->start
);
434 (void) zpios_upcall(run_args
->post
, PHASE_POST_REMOVE
, run_args
, rc
);
438 zpios_cleanup_run(run_args_t
*run_args
)
442 if (run_args
== NULL
)
445 if (run_args
->threads
!= NULL
) {
446 for (i
= 0; i
< run_args
->thread_count
; i
++) {
447 if (run_args
->threads
[i
]) {
448 mutex_destroy(&run_args
->threads
[i
]->lock
);
449 kmem_free(run_args
->threads
[i
],
450 sizeof (thread_data_t
));
454 kmem_free(run_args
->threads
,
455 sizeof (thread_data_t
*) * run_args
->thread_count
);
458 for (i
= 0; i
< run_args
->region_count
; i
++)
459 mutex_destroy(&run_args
->regions
[i
].lock
);
461 mutex_destroy(&run_args
->lock_work
);
462 mutex_destroy(&run_args
->lock_ctl
);
463 size
= run_args
->region_count
* sizeof (zpios_region_t
);
465 vmem_free(run_args
, sizeof (*run_args
) + size
);
469 zpios_dmu_write(run_args_t
*run_args
, objset_t
*os
, uint64_t object
,
470 uint64_t offset
, uint64_t size
, const void *buf
)
473 int rc
, how
= TXG_WAIT
;
476 if (run_args
->flags
& DMU_WRITE_NOWAIT
)
480 tx
= dmu_tx_create(os
);
481 dmu_tx_hold_write(tx
, object
, offset
, size
);
482 rc
= dmu_tx_assign(tx
, how
);
485 if (rc
== ERESTART
&& how
== TXG_NOWAIT
) {
490 zpios_print(run_args
->file
,
491 "Error in dmu_tx_assign(), %d", rc
);
498 // if (run_args->flags & DMU_WRITE_ZC)
499 // flags |= DMU_WRITE_ZEROCOPY;
501 dmu_write(os
, object
, offset
, size
, buf
, tx
);
508 zpios_dmu_read(run_args_t
*run_args
, objset_t
*os
, uint64_t object
,
509 uint64_t offset
, uint64_t size
, void *buf
)
513 // if (run_args->flags & DMU_READ_ZC)
514 // flags |= DMU_READ_ZEROCOPY;
516 if (run_args
->flags
& DMU_READ_NOPF
)
517 flags
|= DMU_READ_NO_PREFETCH
;
519 return (dmu_read(os
, object
, offset
, size
, buf
, flags
));
523 zpios_thread_main(void *data
)
525 thread_data_t
*thr
= (thread_data_t
*)data
;
526 run_args_t
*run_args
= thr
->run_args
;
531 zpios_region_t
*region
;
533 unsigned int random_int
;
534 int chunk_noise
= run_args
->chunk_noise
;
535 int chunk_noise_tmp
= 0;
536 int thread_delay
= run_args
->thread_delay
;
537 int thread_delay_tmp
= 0;
541 get_random_bytes(&random_int
, sizeof (unsigned int));
542 chunk_noise_tmp
= (random_int
% (chunk_noise
* 2))-chunk_noise
;
546 * It's OK to vmem_alloc() this memory because it will be copied
547 * in to the slab and pointers to the slab copy will be setup in
548 * the bio when the IO is submitted. This of course is not ideal
549 * since we want a zero-copy IO path if possible. It would be nice
550 * to have direct access to those slab entries.
552 chunk_size
= run_args
->chunk_size
+ chunk_noise_tmp
;
553 buf
= (char *)vmem_alloc(chunk_size
, KM_SLEEP
);
556 /* Trivial data verification pattern for now. */
557 if (run_args
->flags
& DMU_VERIFY
)
558 memset(buf
, 'z', chunk_size
);
561 mutex_enter(&thr
->lock
);
562 thr
->stats
.wr_time
.start
= zpios_timespec_now();
563 mutex_exit(&thr
->lock
);
565 while (zpios_get_work_item(run_args
, &obj
, &offset
,
566 &chunk_size
, ®ion
, DMU_WRITE
)) {
568 get_random_bytes(&random_int
, sizeof (unsigned int));
569 thread_delay_tmp
= random_int
% thread_delay
;
570 set_current_state(TASK_UNINTERRUPTIBLE
);
571 schedule_timeout(thread_delay_tmp
); /* In jiffies */
574 t
.start
= zpios_timespec_now();
575 rc
= zpios_dmu_write(run_args
, obj
.os
, obj
.obj
,
576 offset
, chunk_size
, buf
);
577 t
.stop
= zpios_timespec_now();
578 t
.delta
= zpios_timespec_sub(t
.stop
, t
.start
);
581 zpios_print(run_args
->file
, "IO error while doing "
582 "dmu_write(): %d\n", rc
);
586 mutex_enter(&thr
->lock
);
587 thr
->stats
.wr_data
+= chunk_size
;
588 thr
->stats
.wr_chunks
++;
589 thr
->stats
.wr_time
.delta
= zpios_timespec_add(
590 thr
->stats
.wr_time
.delta
, t
.delta
);
591 mutex_exit(&thr
->lock
);
593 mutex_enter(®ion
->lock
);
594 region
->stats
.wr_data
+= chunk_size
;
595 region
->stats
.wr_chunks
++;
596 region
->stats
.wr_time
.delta
= zpios_timespec_add(
597 region
->stats
.wr_time
.delta
, t
.delta
);
599 /* First time region was accessed */
600 if (region
->init_offset
== offset
)
601 region
->stats
.wr_time
.start
= t
.start
;
603 mutex_exit(®ion
->lock
);
606 mutex_enter(&run_args
->lock_ctl
);
607 run_args
->threads_done
++;
608 mutex_exit(&run_args
->lock_ctl
);
610 mutex_enter(&thr
->lock
);
612 thr
->stats
.wr_time
.stop
= zpios_timespec_now();
613 mutex_exit(&thr
->lock
);
614 wake_up(&run_args
->waitq
);
616 set_current_state(TASK_UNINTERRUPTIBLE
);
619 /* Check if we should exit */
620 mutex_enter(&thr
->lock
);
622 mutex_exit(&thr
->lock
);
627 mutex_enter(&thr
->lock
);
628 thr
->stats
.rd_time
.start
= zpios_timespec_now();
629 mutex_exit(&thr
->lock
);
631 while (zpios_get_work_item(run_args
, &obj
, &offset
,
632 &chunk_size
, ®ion
, DMU_READ
)) {
634 get_random_bytes(&random_int
, sizeof (unsigned int));
635 thread_delay_tmp
= random_int
% thread_delay
;
636 set_current_state(TASK_UNINTERRUPTIBLE
);
637 schedule_timeout(thread_delay_tmp
); /* In jiffies */
640 if (run_args
->flags
& DMU_VERIFY
)
641 memset(buf
, 0, chunk_size
);
643 t
.start
= zpios_timespec_now();
644 rc
= zpios_dmu_read(run_args
, obj
.os
, obj
.obj
,
645 offset
, chunk_size
, buf
);
646 t
.stop
= zpios_timespec_now();
647 t
.delta
= zpios_timespec_sub(t
.stop
, t
.start
);
650 zpios_print(run_args
->file
, "IO error while doing "
651 "dmu_read(): %d\n", rc
);
655 /* Trivial data verification, expensive! */
656 if (run_args
->flags
& DMU_VERIFY
) {
657 for (i
= 0; i
< chunk_size
; i
++) {
659 zpios_print(run_args
->file
,
660 "IO verify error: %d/%d/%d\n",
661 (int)obj
.obj
, (int)offset
,
668 mutex_enter(&thr
->lock
);
669 thr
->stats
.rd_data
+= chunk_size
;
670 thr
->stats
.rd_chunks
++;
671 thr
->stats
.rd_time
.delta
= zpios_timespec_add(
672 thr
->stats
.rd_time
.delta
, t
.delta
);
673 mutex_exit(&thr
->lock
);
675 mutex_enter(®ion
->lock
);
676 region
->stats
.rd_data
+= chunk_size
;
677 region
->stats
.rd_chunks
++;
678 region
->stats
.rd_time
.delta
= zpios_timespec_add(
679 region
->stats
.rd_time
.delta
, t
.delta
);
681 /* First time region was accessed */
682 if (region
->init_offset
== offset
)
683 region
->stats
.rd_time
.start
= t
.start
;
685 mutex_exit(®ion
->lock
);
688 mutex_enter(&run_args
->lock_ctl
);
689 run_args
->threads_done
++;
690 mutex_exit(&run_args
->lock_ctl
);
692 mutex_enter(&thr
->lock
);
694 thr
->stats
.rd_time
.stop
= zpios_timespec_now();
695 mutex_exit(&thr
->lock
);
696 wake_up(&run_args
->waitq
);
699 vmem_free(buf
, chunk_size
);
702 return (rc
); /* Unreachable, due to do_exit() */
706 zpios_thread_done(run_args_t
*run_args
)
708 ASSERT(run_args
->threads_done
<= run_args
->thread_count
);
709 return (run_args
->threads_done
== run_args
->thread_count
);
713 zpios_threads_run(run_args_t
*run_args
)
715 struct task_struct
*tsk
, **tsks
;
716 thread_data_t
*thr
= NULL
;
717 zpios_time_t
*tt
= &(run_args
->stats
.total_time
);
718 zpios_time_t
*tw
= &(run_args
->stats
.wr_time
);
719 zpios_time_t
*tr
= &(run_args
->stats
.rd_time
);
720 int i
, rc
= 0, tc
= run_args
->thread_count
;
722 tsks
= kmem_zalloc(sizeof (struct task_struct
*) * tc
, KM_SLEEP
);
728 run_args
->threads
= kmem_zalloc(sizeof (thread_data_t
*)*tc
, KM_SLEEP
);
729 if (run_args
->threads
== NULL
) {
734 init_waitqueue_head(&run_args
->waitq
);
735 run_args
->threads_done
= 0;
737 /* Create all the needed threads which will sleep until awoken */
738 for (i
= 0; i
< tc
; i
++) {
739 thr
= kmem_zalloc(sizeof (thread_data_t
), KM_SLEEP
);
746 thr
->run_args
= run_args
;
748 mutex_init(&thr
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
749 run_args
->threads
[i
] = thr
;
751 tsk
= kthread_create(zpios_thread_main
, (void *)thr
,
752 "%s/%d", "zpios_io", i
);
761 tt
->start
= zpios_timespec_now();
763 /* Wake up all threads for write phase */
764 (void) zpios_upcall(run_args
->pre
, PHASE_PRE_WRITE
, run_args
, 0);
765 for (i
= 0; i
< tc
; i
++)
766 wake_up_process(tsks
[i
]);
768 /* Wait for write phase to complete */
769 tw
->start
= zpios_timespec_now();
770 wait_event(run_args
->waitq
, zpios_thread_done(run_args
));
771 tw
->stop
= zpios_timespec_now();
772 (void) zpios_upcall(run_args
->post
, PHASE_POST_WRITE
, run_args
, rc
);
774 for (i
= 0; i
< tc
; i
++) {
775 thr
= run_args
->threads
[i
];
777 mutex_enter(&thr
->lock
);
782 run_args
->stats
.wr_data
+= thr
->stats
.wr_data
;
783 run_args
->stats
.wr_chunks
+= thr
->stats
.wr_chunks
;
784 mutex_exit(&thr
->lock
);
788 /* Wake up all threads and tell them to exit */
789 for (i
= 0; i
< tc
; i
++) {
790 mutex_enter(&thr
->lock
);
792 mutex_exit(&thr
->lock
);
794 wake_up_process(tsks
[i
]);
799 mutex_enter(&run_args
->lock_ctl
);
800 ASSERT(run_args
->threads_done
== run_args
->thread_count
);
801 run_args
->threads_done
= 0;
802 mutex_exit(&run_args
->lock_ctl
);
804 /* Wake up all threads for read phase */
805 (void) zpios_upcall(run_args
->pre
, PHASE_PRE_READ
, run_args
, 0);
806 for (i
= 0; i
< tc
; i
++)
807 wake_up_process(tsks
[i
]);
809 /* Wait for read phase to complete */
810 tr
->start
= zpios_timespec_now();
811 wait_event(run_args
->waitq
, zpios_thread_done(run_args
));
812 tr
->stop
= zpios_timespec_now();
813 (void) zpios_upcall(run_args
->post
, PHASE_POST_READ
, run_args
, rc
);
815 for (i
= 0; i
< tc
; i
++) {
816 thr
= run_args
->threads
[i
];
818 mutex_enter(&thr
->lock
);
823 run_args
->stats
.rd_data
+= thr
->stats
.rd_data
;
824 run_args
->stats
.rd_chunks
+= thr
->stats
.rd_chunks
;
825 mutex_exit(&thr
->lock
);
828 tt
->stop
= zpios_timespec_now();
829 tt
->delta
= zpios_timespec_sub(tt
->stop
, tt
->start
);
830 tw
->delta
= zpios_timespec_sub(tw
->stop
, tw
->start
);
831 tr
->delta
= zpios_timespec_sub(tr
->stop
, tr
->start
);
834 kmem_free(tsks
, sizeof (struct task_struct
*) * tc
);
836 /* Returns first encountered thread error (if any) */
840 /* Destroy all threads that were created successfully */
841 for (i
= 0; i
< tc
; i
++)
843 (void) kthread_stop(tsks
[i
]);
849 zpios_do_one_run(struct file
*file
, zpios_cmd_t
*kcmd
,
850 int data_size
, void *data
)
852 run_args_t
*run_args
= { 0 };
853 zpios_stats_t
*stats
= (zpios_stats_t
*)data
;
854 int i
, n
, m
, size
, rc
;
856 if ((!kcmd
->cmd_chunk_size
) || (!kcmd
->cmd_region_size
) ||
857 (!kcmd
->cmd_thread_count
) || (!kcmd
->cmd_region_count
)) {
858 zpios_print(file
, "Invalid chunk_size, region_size, "
859 "thread_count, or region_count, %d\n", -EINVAL
);
863 if (!(kcmd
->cmd_flags
& DMU_WRITE
) ||
864 !(kcmd
->cmd_flags
& DMU_READ
)) {
865 zpios_print(file
, "Invalid flags, minimally DMU_WRITE "
866 "and DMU_READ must be set, %d\n", -EINVAL
);
870 if ((kcmd
->cmd_flags
& (DMU_WRITE_ZC
| DMU_READ_ZC
)) &&
871 (kcmd
->cmd_flags
& DMU_VERIFY
)) {
872 zpios_print(file
, "Invalid flags, DMU_*_ZC incompatible "
873 "with DMU_VERIFY, used for performance analysis "
874 "only, %d\n", -EINVAL
);
879 * Opaque data on return contains structs of the following form:
881 * zpios_stat_t stats[];
882 * stats[0] = run_args->stats;
883 * stats[1-N] = threads[N]->stats;
884 * stats[N+1-M] = regions[M]->stats;
886 * Where N is the number of threads, and M is the number of regions.
888 size
= (sizeof (zpios_stats_t
) +
889 (kcmd
->cmd_thread_count
* sizeof (zpios_stats_t
)) +
890 (kcmd
->cmd_region_count
* sizeof (zpios_stats_t
)));
891 if (data_size
< size
) {
892 zpios_print(file
, "Invalid size, command data buffer "
893 "size too small, (%d < %d)\n", data_size
, size
);
897 rc
= zpios_setup_run(&run_args
, kcmd
, file
);
901 rc
= zpios_threads_run(run_args
);
902 zpios_remove_objset(run_args
);
908 m
= 1 + kcmd
->cmd_thread_count
;
909 stats
[0] = run_args
->stats
;
911 for (i
= 0; i
< kcmd
->cmd_thread_count
; i
++)
912 stats
[n
+i
] = run_args
->threads
[i
]->stats
;
914 for (i
= 0; i
< kcmd
->cmd_region_count
; i
++)
915 stats
[m
+i
] = run_args
->regions
[i
].stats
;
919 zpios_cleanup_run(run_args
);
921 (void) zpios_upcall(kcmd
->cmd_post
, PHASE_POST_RUN
, run_args
, 0);
927 zpios_open(struct inode
*inode
, struct file
*file
)
931 info
= (zpios_info_t
*)kmem_alloc(sizeof (*info
), KM_SLEEP
);
935 spin_lock_init(&info
->info_lock
);
936 info
->info_size
= ZPIOS_INFO_BUFFER_SIZE
;
938 (char *) vmem_alloc(ZPIOS_INFO_BUFFER_SIZE
, KM_SLEEP
);
939 if (info
->info_buffer
== NULL
) {
940 kmem_free(info
, sizeof (*info
));
944 info
->info_head
= info
->info_buffer
;
945 file
->private_data
= (void *)info
;
951 zpios_release(struct inode
*inode
, struct file
*file
)
953 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
956 ASSERT(info
->info_buffer
);
958 vmem_free(info
->info_buffer
, ZPIOS_INFO_BUFFER_SIZE
);
959 kmem_free(info
, sizeof (*info
));
965 zpios_buffer_clear(struct file
*file
, zpios_cfg_t
*kcfg
, unsigned long arg
)
967 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
970 ASSERT(info
->info_buffer
);
972 spin_lock(&info
->info_lock
);
973 memset(info
->info_buffer
, 0, info
->info_size
);
974 info
->info_head
= info
->info_buffer
;
975 spin_unlock(&info
->info_lock
);
981 zpios_buffer_size(struct file
*file
, zpios_cfg_t
*kcfg
, unsigned long arg
)
983 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
985 int min
, size
, rc
= 0;
988 ASSERT(info
->info_buffer
);
990 spin_lock(&info
->info_lock
);
991 if (kcfg
->cfg_arg1
> 0) {
993 size
= kcfg
->cfg_arg1
;
994 buf
= (char *)vmem_alloc(size
, KM_SLEEP
);
1000 /* Zero fill and truncate contents when coping buffer */
1001 min
= ((size
< info
->info_size
) ? size
: info
->info_size
);
1002 memset(buf
, 0, size
);
1003 memcpy(buf
, info
->info_buffer
, min
);
1004 vmem_free(info
->info_buffer
, info
->info_size
);
1005 info
->info_size
= size
;
1006 info
->info_buffer
= buf
;
1007 info
->info_head
= info
->info_buffer
;
1010 kcfg
->cfg_rc1
= info
->info_size
;
1012 if (copy_to_user((struct zpios_cfg_t __user
*)arg
,
1013 kcfg
, sizeof (*kcfg
)))
1016 spin_unlock(&info
->info_lock
);
1022 zpios_ioctl_cfg(struct file
*file
, unsigned long arg
)
1027 if (copy_from_user(&kcfg
, (zpios_cfg_t
*)arg
, sizeof (kcfg
)))
1030 if (kcfg
.cfg_magic
!= ZPIOS_CFG_MAGIC
) {
1031 zpios_print(file
, "Bad config magic 0x%x != 0x%x\n",
1032 kcfg
.cfg_magic
, ZPIOS_CFG_MAGIC
);
1036 switch (kcfg
.cfg_cmd
) {
1037 case ZPIOS_CFG_BUFFER_CLEAR
:
1042 rc
= zpios_buffer_clear(file
, &kcfg
, arg
);
1044 case ZPIOS_CFG_BUFFER_SIZE
:
1046 * cfg_arg1 - 0 - query size; >0 resize
1047 * cfg_rc1 - Set to current buffer size
1049 rc
= zpios_buffer_size(file
, &kcfg
, arg
);
1052 zpios_print(file
, "Bad config command %d\n",
1062 zpios_ioctl_cmd(struct file
*file
, unsigned long arg
)
1068 kcmd
= kmem_alloc(sizeof (zpios_cmd_t
), KM_SLEEP
);
1070 zpios_print(file
, "Unable to kmem_alloc() %ld byte for "
1071 "zpios_cmd_t\n", (long int)sizeof (zpios_cmd_t
));
1075 rc
= copy_from_user(kcmd
, (zpios_cfg_t
*)arg
, sizeof (zpios_cmd_t
));
1077 zpios_print(file
, "Unable to copy command structure "
1078 "from user to kernel memory, %d\n", rc
);
1082 if (kcmd
->cmd_magic
!= ZPIOS_CMD_MAGIC
) {
1083 zpios_print(file
, "Bad command magic 0x%x != 0x%x\n",
1084 kcmd
->cmd_magic
, ZPIOS_CFG_MAGIC
);
1089 /* Allocate memory for any opaque data the caller needed to pass on */
1090 if (kcmd
->cmd_data_size
> 0) {
1091 data
= (void *)vmem_alloc(kcmd
->cmd_data_size
, KM_SLEEP
);
1093 zpios_print(file
, "Unable to vmem_alloc() %ld "
1094 "bytes for data buffer\n",
1095 (long)kcmd
->cmd_data_size
);
1100 rc
= copy_from_user(data
, (void *)(arg
+ offsetof(zpios_cmd_t
,
1101 cmd_data_str
)), kcmd
->cmd_data_size
);
1103 zpios_print(file
, "Unable to copy data buffer "
1104 "from user to kernel memory, %d\n", rc
);
1109 rc
= zpios_do_one_run(file
, kcmd
, kcmd
->cmd_data_size
, data
);
1112 /* If the test failed do not print out the stats */
1116 rc
= copy_to_user((void *)(arg
+ offsetof(zpios_cmd_t
,
1117 cmd_data_str
)), data
, kcmd
->cmd_data_size
);
1119 zpios_print(file
, "Unable to copy data buffer "
1120 "from kernel to user memory, %d\n", rc
);
1125 vmem_free(data
, kcmd
->cmd_data_size
);
1128 kmem_free(kcmd
, sizeof (zpios_cmd_t
));
1134 zpios_unlocked_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
1138 /* Ignore tty ioctls */
1139 if ((cmd
& 0xffffff00) == ((int)'T') << 8)
1144 rc
= zpios_ioctl_cfg(file
, arg
);
1147 rc
= zpios_ioctl_cmd(file
, arg
);
1150 zpios_print(file
, "Bad ioctl command %d\n", cmd
);
1158 #ifdef CONFIG_COMPAT
1159 /* Compatibility handler for ioctls from 32-bit ELF binaries */
1161 zpios_compat_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
1163 return (zpios_unlocked_ioctl(file
, cmd
, arg
));
1165 #endif /* CONFIG_COMPAT */
1168 * I'm not sure why you would want to write in to this buffer from
1169 * user space since its principle use is to pass test status info
1170 * back to the user space, but I don't see any reason to prevent it.
1173 zpios_write(struct file
*file
, const char __user
*buf
,
1174 size_t count
, loff_t
*ppos
)
1176 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
1180 ASSERT(info
->info_buffer
);
1182 spin_lock(&info
->info_lock
);
1184 /* Write beyond EOF */
1185 if (*ppos
>= info
->info_size
) {
1190 /* Resize count if beyond EOF */
1191 if (*ppos
+ count
> info
->info_size
)
1192 count
= info
->info_size
- *ppos
;
1194 if (copy_from_user(info
->info_buffer
, buf
, count
)) {
1202 spin_unlock(&info
->info_lock
);
1207 zpios_read(struct file
*file
, char __user
*buf
, size_t count
, loff_t
*ppos
)
1209 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
1213 ASSERT(info
->info_buffer
);
1215 spin_lock(&info
->info_lock
);
1217 /* Read beyond EOF */
1218 if (*ppos
>= info
->info_size
)
1221 /* Resize count if beyond EOF */
1222 if (*ppos
+ count
> info
->info_size
)
1223 count
= info
->info_size
- *ppos
;
1225 if (copy_to_user(buf
, info
->info_buffer
+ *ppos
, count
)) {
1233 spin_unlock(&info
->info_lock
);
1237 static loff_t
zpios_seek(struct file
*file
, loff_t offset
, int origin
)
1239 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
1243 ASSERT(info
->info_buffer
);
1245 spin_lock(&info
->info_lock
);
1248 case 0: /* SEEK_SET - No-op just do it */
1250 case 1: /* SEEK_CUR - Seek from current */
1251 offset
= file
->f_pos
+ offset
;
1253 case 2: /* SEEK_END - Seek from end */
1254 offset
= info
->info_size
+ offset
;
1259 file
->f_pos
= offset
;
1260 file
->f_version
= 0;
1264 spin_unlock(&info
->info_lock
);
1269 static struct file_operations zpios_fops
= {
1270 .owner
= THIS_MODULE
,
1272 .release
= zpios_release
,
1273 .unlocked_ioctl
= zpios_unlocked_ioctl
,
1274 #ifdef CONFIG_COMPAT
1275 .compat_ioctl
= zpios_compat_ioctl
,
1278 .write
= zpios_write
,
1279 .llseek
= zpios_seek
,
1282 static struct miscdevice zpios_misc
= {
1283 .minor
= MISC_DYNAMIC_MINOR
,
1285 .fops
= &zpios_fops
,
1289 #define ZFS_DEBUG_STR " (DEBUG mode)"
1291 #define ZFS_DEBUG_STR ""
1299 error
= misc_register(&zpios_misc
);
1301 printk(KERN_INFO
"ZPIOS: misc_register() failed %d\n", error
);
1303 printk(KERN_INFO
"ZPIOS: Loaded module v%s-%s%s\n",
1304 ZFS_META_VERSION
, ZFS_META_RELEASE
, ZFS_DEBUG_STR
);
1315 error
= misc_deregister(&zpios_misc
);
1317 printk(KERN_INFO
"ZPIOS: misc_deregister() failed %d\n", error
);
1319 printk(KERN_INFO
"ZPIOS: Unloaded module v%s-%s%s\n",
1320 ZFS_META_VERSION
, ZFS_META_RELEASE
, ZFS_DEBUG_STR
);
1323 module_init(zpios_init
);
1324 module_exit(zpios_fini
);
1326 MODULE_AUTHOR("LLNL / Sun");
1327 MODULE_DESCRIPTION("Kernel PIOS implementation");
1328 MODULE_LICENSE("GPL");
1329 MODULE_VERSION(ZFS_META_VERSION
"-" ZFS_META_RELEASE
);