2 * ZPIOS is a heavily modified version of the original PIOS test code.
3 * It is designed to have the test code running in the Linux kernel
4 * against ZFS while still being flexibly controlled from user space.
6 * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
7 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
11 * Original PIOS Test Code
12 * Copyright (C) 2004 Cluster File Systems, Inc.
13 * Written by Peter Braam <braam@clusterfs.com>
14 * Atul Vidwansa <atul@clusterfs.com>
15 * Milind Dumbare <milind@clusterfs.com>
17 * This file is part of ZFS on Linux.
18 * For details, see <http://zfsonlinux.org/>.
20 * ZPIOS is free software; you can redistribute it and/or modify it
21 * under the terms of the GNU General Public License as published by the
22 * Free Software Foundation; either version 2 of the License, or (at your
23 * option) any later version.
25 * ZPIOS is distributed in the hope that it will be useful, but WITHOUT
26 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
27 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
30 * You should have received a copy of the GNU General Public License along
31 * with ZPIOS. If not, see <http://www.gnu.org/licenses/>.
33 * Copyright (c) 2015, Intel Corporation.
36 #include <sys/zfs_context.h>
40 #include <sys/dsl_destroy.h>
41 #include <linux/miscdevice.h>
42 #include "zpios-internal.h"
45 static char *zpios_tag
= "zpios_tag";
48 zpios_upcall(char *path
, char *phase
, run_args_t
*run_args
, int rc
)
51 * This is stack heavy but it should be OK since we are only
52 * making the upcall between tests when the stack is shallow.
54 char id
[16], chunk_size
[16], region_size
[16], thread_count
[16];
55 char region_count
[16], offset
[16], region_noise
[16], chunk_noise
[16];
56 char thread_delay
[16], flags
[16], result
[8];
57 char *argv
[16], *envp
[4];
59 if ((path
== NULL
) || (strlen(path
) == 0))
62 snprintf(id
, 15, "%d", run_args
->id
);
63 snprintf(chunk_size
, 15, "%lu", (long unsigned)run_args
->chunk_size
);
64 snprintf(region_size
, 15, "%lu", (long unsigned) run_args
->region_size
);
65 snprintf(thread_count
, 15, "%u", run_args
->thread_count
);
66 snprintf(region_count
, 15, "%u", run_args
->region_count
);
67 snprintf(offset
, 15, "%lu", (long unsigned)run_args
->offset
);
68 snprintf(region_noise
, 15, "%u", run_args
->region_noise
);
69 snprintf(chunk_noise
, 15, "%u", run_args
->chunk_noise
);
70 snprintf(thread_delay
, 15, "%u", run_args
->thread_delay
);
71 snprintf(flags
, 15, "0x%x", run_args
->flags
);
72 snprintf(result
, 7, "%d", rc
);
74 /* Passing 15 args to registered pre/post upcall */
77 argv
[2] = strlen(run_args
->log
) ? run_args
->log
: "<none>";
79 argv
[4] = run_args
->pool
;
81 argv
[6] = region_size
;
82 argv
[7] = thread_count
;
83 argv
[8] = region_count
;
85 argv
[10] = region_noise
;
86 argv
[11] = chunk_noise
;
87 argv
[12] = thread_delay
;
92 /* Passing environment for user space upcall */
94 envp
[1] = "TERM=linux";
95 envp
[2] = "PATH=/sbin:/usr/sbin:/bin:/usr/bin";
98 return (call_usermodehelper(path
, argv
, envp
, UMH_WAIT_PROC
));
102 zpios_print(struct file
*file
, const char *format
, ...)
104 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
109 ASSERT(info
->info_buffer
);
111 va_start(adx
, format
);
112 spin_lock(&info
->info_lock
);
114 /* Don't allow the kernel to start a write in the red zone */
115 if ((int)(info
->info_head
- info
->info_buffer
) >
116 (info
->info_size
- ZPIOS_INFO_BUFFER_REDZONE
)) {
119 rc
= vsprintf(info
->info_head
, format
, adx
);
121 info
->info_head
+= rc
;
124 spin_unlock(&info
->info_lock
);
131 zpios_dmu_object_create(run_args_t
*run_args
, objset_t
*os
)
135 uint64_t blksize
= run_args
->block_size
;
138 if (blksize
< SPA_MINBLOCKSIZE
||
139 blksize
> spa_maxblocksize(dmu_objset_spa(os
)) ||
141 zpios_print(run_args
->file
,
142 "invalid block size for pool: %d\n", (int)blksize
);
146 tx
= dmu_tx_create(os
);
147 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
, 0, OBJ_SIZE
);
148 rc
= dmu_tx_assign(tx
, TXG_WAIT
);
150 zpios_print(run_args
->file
,
151 "dmu_tx_assign() failed: %d\n", rc
);
156 obj
= dmu_object_alloc(os
, DMU_OT_UINT64_OTHER
, 0, DMU_OT_NONE
, 0, tx
);
157 rc
= dmu_object_set_blocksize(os
, obj
, blksize
, 0, tx
);
159 zpios_print(run_args
->file
,
160 "dmu_object_set_blocksize to %d failed: %d\n",
172 zpios_dmu_object_free(run_args_t
*run_args
, objset_t
*os
, uint64_t obj
)
177 tx
= dmu_tx_create(os
);
178 dmu_tx_hold_free(tx
, obj
, 0, DMU_OBJECT_END
);
179 rc
= dmu_tx_assign(tx
, TXG_WAIT
);
181 zpios_print(run_args
->file
,
182 "dmu_tx_assign() failed: %d\n", rc
);
187 rc
= dmu_object_free(os
, obj
, tx
);
189 zpios_print(run_args
->file
,
190 "dmu_object_free() failed: %d\n", rc
);
201 zpios_dmu_setup(run_args_t
*run_args
)
203 zpios_time_t
*t
= &(run_args
->stats
.cr_time
);
209 (void) zpios_upcall(run_args
->pre
, PHASE_PRE_CREATE
, run_args
, 0);
210 t
->start
= zpios_timespec_now();
212 (void) snprintf(name
, 32, "%s/id_%d", run_args
->pool
, run_args
->id
);
213 rc
= dmu_objset_create(name
, DMU_OST_OTHER
, 0, NULL
, NULL
);
215 zpios_print(run_args
->file
, "Error dmu_objset_create(%s, ...) "
216 "failed: %d\n", name
, rc
);
220 rc
= dmu_objset_own(name
, DMU_OST_OTHER
, 0, zpios_tag
, &os
);
222 zpios_print(run_args
->file
, "Error dmu_objset_own(%s, ...) "
223 "failed: %d\n", name
, rc
);
227 if (!(run_args
->flags
& DMU_FPP
)) {
228 obj
= zpios_dmu_object_create(run_args
, os
);
231 zpios_print(run_args
->file
, "Error zpios_dmu_"
232 "object_create() failed, %d\n", rc
);
237 for (i
= 0; i
< run_args
->region_count
; i
++) {
238 zpios_region_t
*region
;
240 region
= &run_args
->regions
[i
];
241 mutex_init(®ion
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
243 if (run_args
->flags
& DMU_FPP
) {
244 /* File per process */
246 region
->obj
.obj
= zpios_dmu_object_create(run_args
, os
);
247 ASSERT(region
->obj
.obj
> 0); /* XXX - Handle this */
248 region
->wr_offset
= run_args
->offset
;
249 region
->rd_offset
= run_args
->offset
;
250 region
->init_offset
= run_args
->offset
;
251 region
->max_offset
= run_args
->offset
+
252 run_args
->region_size
;
254 /* Single shared file */
256 region
->obj
.obj
= obj
;
257 region
->wr_offset
= run_args
->offset
* i
;
258 region
->rd_offset
= run_args
->offset
* i
;
259 region
->init_offset
= run_args
->offset
* i
;
260 region
->max_offset
= run_args
->offset
*
261 i
+ run_args
->region_size
;
268 rc2
= dsl_destroy_head(name
);
270 zpios_print(run_args
->file
, "Error dsl_destroy_head"
271 "(%s, ...) failed: %d\n", name
, rc2
);
274 t
->stop
= zpios_timespec_now();
275 t
->delta
= zpios_timespec_sub(t
->stop
, t
->start
);
276 (void) zpios_upcall(run_args
->post
, PHASE_POST_CREATE
, run_args
, rc
);
282 zpios_setup_run(run_args_t
**run_args
, zpios_cmd_t
*kcmd
, struct file
*file
)
287 size
= sizeof (*ra
) + kcmd
->cmd_region_count
* sizeof (zpios_region_t
);
289 ra
= vmem_zalloc(size
, KM_SLEEP
);
292 snprintf(ra
->pool
, sizeof (ra
->pool
), "%s", kcmd
->cmd_pool
);
293 snprintf(ra
->pre
, sizeof (ra
->pre
), "%s", kcmd
->cmd_pre
);
294 snprintf(ra
->post
, sizeof (ra
->post
), "%s", kcmd
->cmd_post
);
295 snprintf(ra
->log
, sizeof (ra
->log
), "%s", kcmd
->cmd_log
);
297 ra
->id
= kcmd
->cmd_id
;
298 ra
->chunk_size
= kcmd
->cmd_chunk_size
;
299 ra
->thread_count
= kcmd
->cmd_thread_count
;
300 ra
->region_count
= kcmd
->cmd_region_count
;
301 ra
->region_size
= kcmd
->cmd_region_size
;
302 ra
->offset
= kcmd
->cmd_offset
;
303 ra
->region_noise
= kcmd
->cmd_region_noise
;
304 ra
->chunk_noise
= kcmd
->cmd_chunk_noise
;
305 ra
->thread_delay
= kcmd
->cmd_thread_delay
;
306 ra
->flags
= kcmd
->cmd_flags
;
307 ra
->block_size
= kcmd
->cmd_block_size
;
308 ra
->stats
.wr_data
= 0;
309 ra
->stats
.wr_chunks
= 0;
310 ra
->stats
.rd_data
= 0;
311 ra
->stats
.rd_chunks
= 0;
314 mutex_init(&ra
->lock_work
, NULL
, MUTEX_DEFAULT
, NULL
);
315 mutex_init(&ra
->lock_ctl
, NULL
, MUTEX_DEFAULT
, NULL
);
317 (void) zpios_upcall(ra
->pre
, PHASE_PRE_RUN
, ra
, 0);
319 rc
= zpios_dmu_setup(ra
);
321 mutex_destroy(&ra
->lock_ctl
);
322 mutex_destroy(&ra
->lock_work
);
331 zpios_get_work_item(run_args_t
*run_args
, dmu_obj_t
*obj
, __u64
*offset
,
332 __u32
*chunk_size
, zpios_region_t
**region
, __u32 flags
)
335 unsigned int random_int
;
337 get_random_bytes(&random_int
, sizeof (unsigned int));
339 mutex_enter(&run_args
->lock_work
);
340 i
= run_args
->region_next
;
343 * XXX: I don't much care for this chunk selection mechansim
344 * there's the potential to burn a lot of time here doing nothing
345 * useful while holding the global lock. This could give some
346 * misleading performance results. I'll fix it latter.
348 while (count
< run_args
->region_count
) {
350 zpios_time_t
*rw_time
;
352 j
= i
% run_args
->region_count
;
353 *region
= &(run_args
->regions
[j
]);
355 if (flags
& DMU_WRITE
) {
356 rw_offset
= &((*region
)->wr_offset
);
357 rw_time
= &((*region
)->stats
.wr_time
);
359 rw_offset
= &((*region
)->rd_offset
);
360 rw_time
= &((*region
)->stats
.rd_time
);
363 /* test if region is fully written */
364 if (*rw_offset
+ *chunk_size
> (*region
)->max_offset
) {
368 if (unlikely(rw_time
->stop
.ts_sec
== 0) &&
369 unlikely(rw_time
->stop
.ts_nsec
== 0))
370 rw_time
->stop
= zpios_timespec_now();
375 *offset
= *rw_offset
;
376 *obj
= (*region
)->obj
;
377 *rw_offset
+= *chunk_size
;
379 /* update ctl structure */
380 if (run_args
->region_noise
) {
381 get_random_bytes(&random_int
, sizeof (unsigned int));
382 run_args
->region_next
+=
383 random_int
% run_args
->region_noise
;
385 run_args
->region_next
++;
388 mutex_exit(&run_args
->lock_work
);
392 /* nothing left to do */
393 mutex_exit(&run_args
->lock_work
);
399 zpios_remove_objset(run_args_t
*run_args
)
401 zpios_time_t
*t
= &(run_args
->stats
.rm_time
);
402 zpios_region_t
*region
;
406 (void) zpios_upcall(run_args
->pre
, PHASE_PRE_REMOVE
, run_args
, 0);
407 t
->start
= zpios_timespec_now();
409 (void) snprintf(name
, 32, "%s/id_%d", run_args
->pool
, run_args
->id
);
411 if (run_args
->flags
& DMU_REMOVE
) {
412 if (run_args
->flags
& DMU_FPP
) {
413 for (i
= 0; i
< run_args
->region_count
; i
++) {
414 region
= &run_args
->regions
[i
];
415 rc
= zpios_dmu_object_free(run_args
,
416 region
->obj
.os
, region
->obj
.obj
);
418 zpios_print(run_args
->file
,
419 "Error removing object %d, %d\n",
420 (int)region
->obj
.obj
, rc
);
423 region
= &run_args
->regions
[0];
424 rc
= zpios_dmu_object_free(run_args
,
425 region
->obj
.os
, region
->obj
.obj
);
427 zpios_print(run_args
->file
,
428 "Error removing object %d, %d\n",
429 (int)region
->obj
.obj
, rc
);
433 dmu_objset_disown(run_args
->os
, zpios_tag
);
435 if (run_args
->flags
& DMU_REMOVE
) {
436 rc
= dsl_destroy_head(name
);
438 zpios_print(run_args
->file
, "Error dsl_destroy_head"
439 "(%s, ...) failed: %d\n", name
, rc
);
442 t
->stop
= zpios_timespec_now();
443 t
->delta
= zpios_timespec_sub(t
->stop
, t
->start
);
444 (void) zpios_upcall(run_args
->post
, PHASE_POST_REMOVE
, run_args
, rc
);
448 zpios_cleanup_run(run_args_t
*run_args
)
452 if (run_args
== NULL
)
455 if (run_args
->threads
!= NULL
) {
456 for (i
= 0; i
< run_args
->thread_count
; i
++) {
457 if (run_args
->threads
[i
]) {
458 mutex_destroy(&run_args
->threads
[i
]->lock
);
459 kmem_free(run_args
->threads
[i
],
460 sizeof (thread_data_t
));
464 kmem_free(run_args
->threads
,
465 sizeof (thread_data_t
*) * run_args
->thread_count
);
468 for (i
= 0; i
< run_args
->region_count
; i
++)
469 mutex_destroy(&run_args
->regions
[i
].lock
);
471 mutex_destroy(&run_args
->lock_work
);
472 mutex_destroy(&run_args
->lock_ctl
);
473 size
= run_args
->region_count
* sizeof (zpios_region_t
);
475 vmem_free(run_args
, sizeof (*run_args
) + size
);
479 zpios_dmu_write(run_args_t
*run_args
, objset_t
*os
, uint64_t object
,
480 uint64_t offset
, uint64_t size
, const void *buf
)
483 int rc
, how
= TXG_WAIT
;
486 if (run_args
->flags
& DMU_WRITE_NOWAIT
)
490 tx
= dmu_tx_create(os
);
491 dmu_tx_hold_write(tx
, object
, offset
, size
);
492 rc
= dmu_tx_assign(tx
, how
);
495 if (rc
== ERESTART
&& how
== TXG_NOWAIT
) {
500 zpios_print(run_args
->file
,
501 "Error in dmu_tx_assign(), %d", rc
);
508 // if (run_args->flags & DMU_WRITE_ZC)
509 // flags |= DMU_WRITE_ZEROCOPY;
511 dmu_write(os
, object
, offset
, size
, buf
, tx
);
518 zpios_dmu_read(run_args_t
*run_args
, objset_t
*os
, uint64_t object
,
519 uint64_t offset
, uint64_t size
, void *buf
)
523 // if (run_args->flags & DMU_READ_ZC)
524 // flags |= DMU_READ_ZEROCOPY;
526 if (run_args
->flags
& DMU_READ_NOPF
)
527 flags
|= DMU_READ_NO_PREFETCH
;
529 return (dmu_read(os
, object
, offset
, size
, buf
, flags
));
533 zpios_thread_main(void *data
)
535 thread_data_t
*thr
= (thread_data_t
*)data
;
536 run_args_t
*run_args
= thr
->run_args
;
541 zpios_region_t
*region
;
543 unsigned int random_int
;
544 int chunk_noise
= run_args
->chunk_noise
;
545 int chunk_noise_tmp
= 0;
546 int thread_delay
= run_args
->thread_delay
;
547 int thread_delay_tmp
= 0;
551 get_random_bytes(&random_int
, sizeof (unsigned int));
552 chunk_noise_tmp
= (random_int
% (chunk_noise
* 2))-chunk_noise
;
556 * It's OK to vmem_alloc() this memory because it will be copied
557 * in to the slab and pointers to the slab copy will be setup in
558 * the bio when the IO is submitted. This of course is not ideal
559 * since we want a zero-copy IO path if possible. It would be nice
560 * to have direct access to those slab entries.
562 chunk_size
= run_args
->chunk_size
+ chunk_noise_tmp
;
563 buf
= (char *)vmem_alloc(chunk_size
, KM_SLEEP
);
566 /* Trivial data verification pattern for now. */
567 if (run_args
->flags
& DMU_VERIFY
)
568 memset(buf
, 'z', chunk_size
);
571 mutex_enter(&thr
->lock
);
572 thr
->stats
.wr_time
.start
= zpios_timespec_now();
573 mutex_exit(&thr
->lock
);
575 while (zpios_get_work_item(run_args
, &obj
, &offset
,
576 &chunk_size
, ®ion
, DMU_WRITE
)) {
578 get_random_bytes(&random_int
, sizeof (unsigned int));
579 thread_delay_tmp
= random_int
% thread_delay
;
580 set_current_state(TASK_UNINTERRUPTIBLE
);
581 schedule_timeout(thread_delay_tmp
); /* In jiffies */
584 t
.start
= zpios_timespec_now();
585 rc
= zpios_dmu_write(run_args
, obj
.os
, obj
.obj
,
586 offset
, chunk_size
, buf
);
587 t
.stop
= zpios_timespec_now();
588 t
.delta
= zpios_timespec_sub(t
.stop
, t
.start
);
591 zpios_print(run_args
->file
, "IO error while doing "
592 "dmu_write(): %d\n", rc
);
596 mutex_enter(&thr
->lock
);
597 thr
->stats
.wr_data
+= chunk_size
;
598 thr
->stats
.wr_chunks
++;
599 thr
->stats
.wr_time
.delta
= zpios_timespec_add(
600 thr
->stats
.wr_time
.delta
, t
.delta
);
601 mutex_exit(&thr
->lock
);
603 mutex_enter(®ion
->lock
);
604 region
->stats
.wr_data
+= chunk_size
;
605 region
->stats
.wr_chunks
++;
606 region
->stats
.wr_time
.delta
= zpios_timespec_add(
607 region
->stats
.wr_time
.delta
, t
.delta
);
609 /* First time region was accessed */
610 if (region
->init_offset
== offset
)
611 region
->stats
.wr_time
.start
= t
.start
;
613 mutex_exit(®ion
->lock
);
616 mutex_enter(&run_args
->lock_ctl
);
617 run_args
->threads_done
++;
618 mutex_exit(&run_args
->lock_ctl
);
620 mutex_enter(&thr
->lock
);
622 thr
->stats
.wr_time
.stop
= zpios_timespec_now();
623 mutex_exit(&thr
->lock
);
624 wake_up(&run_args
->waitq
);
626 set_current_state(TASK_UNINTERRUPTIBLE
);
629 /* Check if we should exit */
630 mutex_enter(&thr
->lock
);
632 mutex_exit(&thr
->lock
);
637 mutex_enter(&thr
->lock
);
638 thr
->stats
.rd_time
.start
= zpios_timespec_now();
639 mutex_exit(&thr
->lock
);
641 while (zpios_get_work_item(run_args
, &obj
, &offset
,
642 &chunk_size
, ®ion
, DMU_READ
)) {
644 get_random_bytes(&random_int
, sizeof (unsigned int));
645 thread_delay_tmp
= random_int
% thread_delay
;
646 set_current_state(TASK_UNINTERRUPTIBLE
);
647 schedule_timeout(thread_delay_tmp
); /* In jiffies */
650 if (run_args
->flags
& DMU_VERIFY
)
651 memset(buf
, 0, chunk_size
);
653 t
.start
= zpios_timespec_now();
654 rc
= zpios_dmu_read(run_args
, obj
.os
, obj
.obj
,
655 offset
, chunk_size
, buf
);
656 t
.stop
= zpios_timespec_now();
657 t
.delta
= zpios_timespec_sub(t
.stop
, t
.start
);
660 zpios_print(run_args
->file
, "IO error while doing "
661 "dmu_read(): %d\n", rc
);
665 /* Trivial data verification, expensive! */
666 if (run_args
->flags
& DMU_VERIFY
) {
667 for (i
= 0; i
< chunk_size
; i
++) {
669 zpios_print(run_args
->file
,
670 "IO verify error: %d/%d/%d\n",
671 (int)obj
.obj
, (int)offset
,
678 mutex_enter(&thr
->lock
);
679 thr
->stats
.rd_data
+= chunk_size
;
680 thr
->stats
.rd_chunks
++;
681 thr
->stats
.rd_time
.delta
= zpios_timespec_add(
682 thr
->stats
.rd_time
.delta
, t
.delta
);
683 mutex_exit(&thr
->lock
);
685 mutex_enter(®ion
->lock
);
686 region
->stats
.rd_data
+= chunk_size
;
687 region
->stats
.rd_chunks
++;
688 region
->stats
.rd_time
.delta
= zpios_timespec_add(
689 region
->stats
.rd_time
.delta
, t
.delta
);
691 /* First time region was accessed */
692 if (region
->init_offset
== offset
)
693 region
->stats
.rd_time
.start
= t
.start
;
695 mutex_exit(®ion
->lock
);
698 mutex_enter(&run_args
->lock_ctl
);
699 run_args
->threads_done
++;
700 mutex_exit(&run_args
->lock_ctl
);
702 mutex_enter(&thr
->lock
);
704 thr
->stats
.rd_time
.stop
= zpios_timespec_now();
705 mutex_exit(&thr
->lock
);
706 wake_up(&run_args
->waitq
);
709 vmem_free(buf
, chunk_size
);
712 return (rc
); /* Unreachable, due to do_exit() */
716 zpios_thread_done(run_args_t
*run_args
)
718 ASSERT(run_args
->threads_done
<= run_args
->thread_count
);
719 return (run_args
->threads_done
== run_args
->thread_count
);
723 zpios_threads_run(run_args_t
*run_args
)
725 struct task_struct
*tsk
, **tsks
;
726 thread_data_t
*thr
= NULL
;
727 zpios_time_t
*tt
= &(run_args
->stats
.total_time
);
728 zpios_time_t
*tw
= &(run_args
->stats
.wr_time
);
729 zpios_time_t
*tr
= &(run_args
->stats
.rd_time
);
730 int i
, rc
= 0, tc
= run_args
->thread_count
;
732 tsks
= kmem_zalloc(sizeof (struct task_struct
*) * tc
, KM_SLEEP
);
734 run_args
->threads
= kmem_zalloc(sizeof (thread_data_t
*)*tc
, KM_SLEEP
);
736 init_waitqueue_head(&run_args
->waitq
);
737 run_args
->threads_done
= 0;
739 /* Create all the needed threads which will sleep until awoken */
740 for (i
= 0; i
< tc
; i
++) {
741 thr
= kmem_zalloc(sizeof (thread_data_t
), KM_SLEEP
);
744 thr
->run_args
= run_args
;
746 mutex_init(&thr
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
747 run_args
->threads
[i
] = thr
;
749 tsk
= kthread_create(zpios_thread_main
, (void *)thr
,
750 "%s/%d", "zpios_io", i
);
759 tt
->start
= zpios_timespec_now();
761 /* Wake up all threads for write phase */
762 (void) zpios_upcall(run_args
->pre
, PHASE_PRE_WRITE
, run_args
, 0);
763 for (i
= 0; i
< tc
; i
++)
764 wake_up_process(tsks
[i
]);
766 /* Wait for write phase to complete */
767 tw
->start
= zpios_timespec_now();
768 wait_event(run_args
->waitq
, zpios_thread_done(run_args
));
769 tw
->stop
= zpios_timespec_now();
770 (void) zpios_upcall(run_args
->post
, PHASE_POST_WRITE
, run_args
, rc
);
772 for (i
= 0; i
< tc
; i
++) {
773 thr
= run_args
->threads
[i
];
775 mutex_enter(&thr
->lock
);
780 run_args
->stats
.wr_data
+= thr
->stats
.wr_data
;
781 run_args
->stats
.wr_chunks
+= thr
->stats
.wr_chunks
;
782 mutex_exit(&thr
->lock
);
786 /* Wake up all threads and tell them to exit */
787 for (i
= 0; i
< tc
; i
++) {
788 mutex_enter(&thr
->lock
);
790 mutex_exit(&thr
->lock
);
792 wake_up_process(tsks
[i
]);
797 mutex_enter(&run_args
->lock_ctl
);
798 ASSERT(run_args
->threads_done
== run_args
->thread_count
);
799 run_args
->threads_done
= 0;
800 mutex_exit(&run_args
->lock_ctl
);
802 /* Wake up all threads for read phase */
803 (void) zpios_upcall(run_args
->pre
, PHASE_PRE_READ
, run_args
, 0);
804 for (i
= 0; i
< tc
; i
++)
805 wake_up_process(tsks
[i
]);
807 /* Wait for read phase to complete */
808 tr
->start
= zpios_timespec_now();
809 wait_event(run_args
->waitq
, zpios_thread_done(run_args
));
810 tr
->stop
= zpios_timespec_now();
811 (void) zpios_upcall(run_args
->post
, PHASE_POST_READ
, run_args
, rc
);
813 for (i
= 0; i
< tc
; i
++) {
814 thr
= run_args
->threads
[i
];
816 mutex_enter(&thr
->lock
);
821 run_args
->stats
.rd_data
+= thr
->stats
.rd_data
;
822 run_args
->stats
.rd_chunks
+= thr
->stats
.rd_chunks
;
823 mutex_exit(&thr
->lock
);
826 tt
->stop
= zpios_timespec_now();
827 tt
->delta
= zpios_timespec_sub(tt
->stop
, tt
->start
);
828 tw
->delta
= zpios_timespec_sub(tw
->stop
, tw
->start
);
829 tr
->delta
= zpios_timespec_sub(tr
->stop
, tr
->start
);
832 kmem_free(tsks
, sizeof (struct task_struct
*) * tc
);
836 /* Destroy all threads that were created successfully */
837 for (i
= 0; i
< tc
; i
++)
839 (void) kthread_stop(tsks
[i
]);
845 zpios_do_one_run(struct file
*file
, zpios_cmd_t
*kcmd
,
846 int data_size
, void *data
)
848 run_args_t
*run_args
= { 0 };
849 zpios_stats_t
*stats
= (zpios_stats_t
*)data
;
850 int i
, n
, m
, size
, rc
;
852 if ((!kcmd
->cmd_chunk_size
) || (!kcmd
->cmd_region_size
) ||
853 (!kcmd
->cmd_thread_count
) || (!kcmd
->cmd_region_count
)) {
854 zpios_print(file
, "Invalid chunk_size, region_size, "
855 "thread_count, or region_count, %d\n", -EINVAL
);
859 if (!(kcmd
->cmd_flags
& DMU_WRITE
) ||
860 !(kcmd
->cmd_flags
& DMU_READ
)) {
861 zpios_print(file
, "Invalid flags, minimally DMU_WRITE "
862 "and DMU_READ must be set, %d\n", -EINVAL
);
866 if ((kcmd
->cmd_flags
& (DMU_WRITE_ZC
| DMU_READ_ZC
)) &&
867 (kcmd
->cmd_flags
& DMU_VERIFY
)) {
868 zpios_print(file
, "Invalid flags, DMU_*_ZC incompatible "
869 "with DMU_VERIFY, used for performance analysis "
870 "only, %d\n", -EINVAL
);
875 * Opaque data on return contains structs of the following form:
877 * zpios_stat_t stats[];
878 * stats[0] = run_args->stats;
879 * stats[1-N] = threads[N]->stats;
880 * stats[N+1-M] = regions[M]->stats;
882 * Where N is the number of threads, and M is the number of regions.
884 size
= (sizeof (zpios_stats_t
) +
885 (kcmd
->cmd_thread_count
* sizeof (zpios_stats_t
)) +
886 (kcmd
->cmd_region_count
* sizeof (zpios_stats_t
)));
887 if (data_size
< size
) {
888 zpios_print(file
, "Invalid size, command data buffer "
889 "size too small, (%d < %d)\n", data_size
, size
);
893 rc
= zpios_setup_run(&run_args
, kcmd
, file
);
897 rc
= zpios_threads_run(run_args
);
898 zpios_remove_objset(run_args
);
904 m
= 1 + kcmd
->cmd_thread_count
;
905 stats
[0] = run_args
->stats
;
907 for (i
= 0; i
< kcmd
->cmd_thread_count
; i
++)
908 stats
[n
+i
] = run_args
->threads
[i
]->stats
;
910 for (i
= 0; i
< kcmd
->cmd_region_count
; i
++)
911 stats
[m
+i
] = run_args
->regions
[i
].stats
;
915 zpios_cleanup_run(run_args
);
917 (void) zpios_upcall(kcmd
->cmd_post
, PHASE_POST_RUN
, run_args
, 0);
923 zpios_open(struct inode
*inode
, struct file
*file
)
927 info
= (zpios_info_t
*)kmem_alloc(sizeof (*info
), KM_SLEEP
);
929 spin_lock_init(&info
->info_lock
);
930 info
->info_size
= ZPIOS_INFO_BUFFER_SIZE
;
932 (char *)vmem_alloc(ZPIOS_INFO_BUFFER_SIZE
, KM_SLEEP
);
934 info
->info_head
= info
->info_buffer
;
935 file
->private_data
= (void *)info
;
941 zpios_release(struct inode
*inode
, struct file
*file
)
943 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
946 ASSERT(info
->info_buffer
);
948 vmem_free(info
->info_buffer
, ZPIOS_INFO_BUFFER_SIZE
);
949 kmem_free(info
, sizeof (*info
));
955 zpios_buffer_clear(struct file
*file
, zpios_cfg_t
*kcfg
, unsigned long arg
)
957 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
960 ASSERT(info
->info_buffer
);
962 spin_lock(&info
->info_lock
);
963 memset(info
->info_buffer
, 0, info
->info_size
);
964 info
->info_head
= info
->info_buffer
;
965 spin_unlock(&info
->info_lock
);
971 zpios_buffer_size(struct file
*file
, zpios_cfg_t
*kcfg
, unsigned long arg
)
973 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
975 int min
, size
, rc
= 0;
978 ASSERT(info
->info_buffer
);
980 spin_lock(&info
->info_lock
);
981 if (kcfg
->cfg_arg1
> 0) {
983 size
= kcfg
->cfg_arg1
;
984 buf
= (char *)vmem_alloc(size
, KM_SLEEP
);
986 /* Zero fill and truncate contents when coping buffer */
987 min
= ((size
< info
->info_size
) ? size
: info
->info_size
);
988 memset(buf
, 0, size
);
989 memcpy(buf
, info
->info_buffer
, min
);
990 vmem_free(info
->info_buffer
, info
->info_size
);
991 info
->info_size
= size
;
992 info
->info_buffer
= buf
;
993 info
->info_head
= info
->info_buffer
;
996 kcfg
->cfg_rc1
= info
->info_size
;
998 if (copy_to_user((struct zpios_cfg_t __user
*)arg
,
999 kcfg
, sizeof (*kcfg
)))
1002 spin_unlock(&info
->info_lock
);
1008 zpios_ioctl_cfg(struct file
*file
, unsigned long arg
)
1013 if (copy_from_user(&kcfg
, (zpios_cfg_t
*)arg
, sizeof (kcfg
)))
1016 if (kcfg
.cfg_magic
!= ZPIOS_CFG_MAGIC
) {
1017 zpios_print(file
, "Bad config magic 0x%x != 0x%x\n",
1018 kcfg
.cfg_magic
, ZPIOS_CFG_MAGIC
);
1022 switch (kcfg
.cfg_cmd
) {
1023 case ZPIOS_CFG_BUFFER_CLEAR
:
1028 rc
= zpios_buffer_clear(file
, &kcfg
, arg
);
1030 case ZPIOS_CFG_BUFFER_SIZE
:
1032 * cfg_arg1 - 0 - query size; >0 resize
1033 * cfg_rc1 - Set to current buffer size
1035 rc
= zpios_buffer_size(file
, &kcfg
, arg
);
1038 zpios_print(file
, "Bad config command %d\n",
1048 zpios_ioctl_cmd(struct file
*file
, unsigned long arg
)
1054 kcmd
= kmem_alloc(sizeof (zpios_cmd_t
), KM_SLEEP
);
1056 rc
= copy_from_user(kcmd
, (zpios_cfg_t
*)arg
, sizeof (zpios_cmd_t
));
1058 zpios_print(file
, "Unable to copy command structure "
1059 "from user to kernel memory, %d\n", rc
);
1063 if (kcmd
->cmd_magic
!= ZPIOS_CMD_MAGIC
) {
1064 zpios_print(file
, "Bad command magic 0x%x != 0x%x\n",
1065 kcmd
->cmd_magic
, ZPIOS_CFG_MAGIC
);
1070 /* Allocate memory for any opaque data the caller needed to pass on */
1071 if (kcmd
->cmd_data_size
> 0) {
1072 data
= (void *)vmem_alloc(kcmd
->cmd_data_size
, KM_SLEEP
);
1074 rc
= copy_from_user(data
, (void *)(arg
+ offsetof(zpios_cmd_t
,
1075 cmd_data_str
)), kcmd
->cmd_data_size
);
1077 zpios_print(file
, "Unable to copy data buffer "
1078 "from user to kernel memory, %d\n", rc
);
1083 rc
= zpios_do_one_run(file
, kcmd
, kcmd
->cmd_data_size
, data
);
1086 /* If the test failed do not print out the stats */
1090 rc
= copy_to_user((void *)(arg
+ offsetof(zpios_cmd_t
,
1091 cmd_data_str
)), data
, kcmd
->cmd_data_size
);
1093 zpios_print(file
, "Unable to copy data buffer "
1094 "from kernel to user memory, %d\n", rc
);
1099 vmem_free(data
, kcmd
->cmd_data_size
);
1102 kmem_free(kcmd
, sizeof (zpios_cmd_t
));
1108 zpios_unlocked_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
1112 /* Ignore tty ioctls */
1113 if ((cmd
& 0xffffff00) == ((int)'T') << 8)
1118 rc
= zpios_ioctl_cfg(file
, arg
);
1121 rc
= zpios_ioctl_cmd(file
, arg
);
1124 zpios_print(file
, "Bad ioctl command %d\n", cmd
);
1132 #ifdef CONFIG_COMPAT
1133 /* Compatibility handler for ioctls from 32-bit ELF binaries */
1135 zpios_compat_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
1137 return (zpios_unlocked_ioctl(file
, cmd
, arg
));
1139 #endif /* CONFIG_COMPAT */
1142 * I'm not sure why you would want to write in to this buffer from
1143 * user space since its principle use is to pass test status info
1144 * back to the user space, but I don't see any reason to prevent it.
1147 zpios_write(struct file
*file
, const char __user
*buf
,
1148 size_t count
, loff_t
*ppos
)
1150 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
1154 ASSERT(info
->info_buffer
);
1156 spin_lock(&info
->info_lock
);
1158 /* Write beyond EOF */
1159 if (*ppos
>= info
->info_size
) {
1164 /* Resize count if beyond EOF */
1165 if (*ppos
+ count
> info
->info_size
)
1166 count
= info
->info_size
- *ppos
;
1168 if (copy_from_user(info
->info_buffer
, buf
, count
)) {
1176 spin_unlock(&info
->info_lock
);
1181 zpios_read(struct file
*file
, char __user
*buf
, size_t count
, loff_t
*ppos
)
1183 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
1187 ASSERT(info
->info_buffer
);
1189 spin_lock(&info
->info_lock
);
1191 /* Read beyond EOF */
1192 if (*ppos
>= info
->info_size
)
1195 /* Resize count if beyond EOF */
1196 if (*ppos
+ count
> info
->info_size
)
1197 count
= info
->info_size
- *ppos
;
1199 if (copy_to_user(buf
, info
->info_buffer
+ *ppos
, count
)) {
1207 spin_unlock(&info
->info_lock
);
1211 static loff_t
zpios_seek(struct file
*file
, loff_t offset
, int origin
)
1213 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
1217 ASSERT(info
->info_buffer
);
1219 spin_lock(&info
->info_lock
);
1222 case 0: /* SEEK_SET - No-op just do it */
1224 case 1: /* SEEK_CUR - Seek from current */
1225 offset
= file
->f_pos
+ offset
;
1227 case 2: /* SEEK_END - Seek from end */
1228 offset
= info
->info_size
+ offset
;
1233 file
->f_pos
= offset
;
1234 file
->f_version
= 0;
1238 spin_unlock(&info
->info_lock
);
1243 static struct file_operations zpios_fops
= {
1244 .owner
= THIS_MODULE
,
1246 .release
= zpios_release
,
1247 .unlocked_ioctl
= zpios_unlocked_ioctl
,
1248 #ifdef CONFIG_COMPAT
1249 .compat_ioctl
= zpios_compat_ioctl
,
1252 .write
= zpios_write
,
1253 .llseek
= zpios_seek
,
1256 static struct miscdevice zpios_misc
= {
1257 .minor
= MISC_DYNAMIC_MINOR
,
1259 .fops
= &zpios_fops
,
1263 #define ZFS_DEBUG_STR " (DEBUG mode)"
1265 #define ZFS_DEBUG_STR ""
1273 error
= misc_register(&zpios_misc
);
1275 printk(KERN_INFO
"ZPIOS: misc_register() failed %d\n", error
);
1277 printk(KERN_INFO
"ZPIOS: Loaded module v%s-%s%s\n",
1278 ZFS_META_VERSION
, ZFS_META_RELEASE
, ZFS_DEBUG_STR
);
1287 misc_deregister(&zpios_misc
);
1289 printk(KERN_INFO
"ZPIOS: Unloaded module v%s-%s%s\n",
1290 ZFS_META_VERSION
, ZFS_META_RELEASE
, ZFS_DEBUG_STR
);
1293 module_init(zpios_init
);
1294 module_exit(zpios_fini
);
1296 MODULE_AUTHOR("LLNL / Sun");
1297 MODULE_DESCRIPTION("Kernel PIOS implementation");
1298 MODULE_LICENSE("GPL");
1299 MODULE_VERSION(ZFS_META_VERSION
"-" ZFS_META_RELEASE
);