1 /*****************************************************************************\
2 * ZPIOS is a heavily modified version of the original PIOS test code.
3 * It is designed to have the test code running in the Linux kernel
4 * against ZFS while still being flexibly controled from user space.
6 * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
7 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
11 * Original PIOS Test Code
12 * Copyright (C) 2004 Cluster File Systems, Inc.
13 * Written by Peter Braam <braam@clusterfs.com>
14 * Atul Vidwansa <atul@clusterfs.com>
15 * Milind Dumbare <milind@clusterfs.com>
17 * This file is part of ZFS on Linux.
18 * For details, see <http://zfsonlinux.org/>.
20 * ZPIOS is free software; you can redistribute it and/or modify it
21 * under the terms of the GNU General Public License as published by the
22 * Free Software Foundation; either version 2 of the License, or (at your
23 * option) any later version.
25 * ZPIOS is distributed in the hope that it will be useful, but WITHOUT
26 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
27 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
30 * You should have received a copy of the GNU General Public License along
31 * with ZPIOS. If not, see <http://www.gnu.org/licenses/>.
32 \*****************************************************************************/
34 #include <sys/zfs_context.h>
37 #include <linux/cdev.h>
38 #include "zpios-internal.h"
41 static spl_class
*zpios_class
;
42 static spl_device
*zpios_device
;
43 static char *zpios_tag
= "zpios_tag";
46 int zpios_upcall(char *path
, char *phase
, run_args_t
*run_args
, int rc
)
48 /* This is stack heavy but it should be OK since we are only
49 * making the upcall between tests when the stack is shallow.
51 char id
[16], chunk_size
[16], region_size
[16], thread_count
[16];
52 char region_count
[16], offset
[16], region_noise
[16], chunk_noise
[16];
53 char thread_delay
[16], flags
[16], result
[8];
54 char *argv
[16], *envp
[4];
56 if ((path
== NULL
) || (strlen(path
) == 0))
59 snprintf(id
, 15, "%d", run_args
->id
);
60 snprintf(chunk_size
, 15, "%lu", (long unsigned)run_args
->chunk_size
);
61 snprintf(region_size
, 15, "%lu",(long unsigned) run_args
->region_size
);
62 snprintf(thread_count
, 15, "%u", run_args
->thread_count
);
63 snprintf(region_count
, 15, "%u", run_args
->region_count
);
64 snprintf(offset
, 15, "%lu", (long unsigned)run_args
->offset
);
65 snprintf(region_noise
, 15, "%u", run_args
->region_noise
);
66 snprintf(chunk_noise
, 15, "%u", run_args
->chunk_noise
);
67 snprintf(thread_delay
, 15, "%u", run_args
->thread_delay
);
68 snprintf(flags
, 15, "0x%x", run_args
->flags
);
69 snprintf(result
, 7, "%d", rc
);
71 /* Passing 15 args to registered pre/post upcall */
74 argv
[2] = strlen(run_args
->log
) ? run_args
->log
: "<none>";
76 argv
[4] = run_args
->pool
;
78 argv
[6] = region_size
;
79 argv
[7] = thread_count
;
80 argv
[8] = region_count
;
82 argv
[10] = region_noise
;
83 argv
[11] = chunk_noise
;
84 argv
[12] = thread_delay
;
89 /* Passing environment for user space upcall */
91 envp
[1] = "TERM=linux";
92 envp
[2] = "PATH=/sbin:/usr/sbin:/bin:/usr/bin";
95 return call_usermodehelper(path
, argv
, envp
, UMH_WAIT_PROC
);
99 zpios_dmu_object_create(run_args_t
*run_args
, objset_t
*os
)
105 tx
= dmu_tx_create(os
);
106 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
, 0, OBJ_SIZE
);
107 rc
= dmu_tx_assign(tx
, TXG_WAIT
);
109 zpios_print(run_args
->file
,
110 "dmu_tx_assign() failed: %d\n", rc
);
115 obj
= dmu_object_alloc(os
, DMU_OT_UINT64_OTHER
, 0,
117 rc
= dmu_object_set_blocksize(os
, obj
, 128ULL << 10, 0, tx
);
119 zpios_print(run_args
->file
,
120 "dmu_object_set_blocksize() failed: %d\n", rc
);
131 zpios_dmu_object_free(run_args_t
*run_args
, objset_t
*os
, uint64_t obj
)
136 tx
= dmu_tx_create(os
);
137 dmu_tx_hold_free(tx
, obj
, 0, DMU_OBJECT_END
);
138 rc
= dmu_tx_assign(tx
, TXG_WAIT
);
140 zpios_print(run_args
->file
,
141 "dmu_tx_assign() failed: %d\n", rc
);
146 rc
= dmu_object_free(os
, obj
, tx
);
148 zpios_print(run_args
->file
,
149 "dmu_object_free() failed: %d\n", rc
);
160 zpios_dmu_setup(run_args_t
*run_args
)
162 zpios_time_t
*t
= &(run_args
->stats
.cr_time
);
168 (void)zpios_upcall(run_args
->pre
, PHASE_PRE_CREATE
, run_args
, 0);
169 t
->start
= zpios_timespec_now();
171 (void)snprintf(name
, 32, "%s/id_%d", run_args
->pool
, run_args
->id
);
172 rc
= dmu_objset_create(name
, DMU_OST_OTHER
, 0, NULL
, NULL
);
174 zpios_print(run_args
->file
, "Error dmu_objset_create(%s, ...) "
175 "failed: %d\n", name
, rc
);
179 rc
= dmu_objset_own(name
, DMU_OST_OTHER
, 0, zpios_tag
, &os
);
181 zpios_print(run_args
->file
, "Error dmu_objset_own(%s, ...) "
182 "failed: %d\n", name
, rc
);
186 if (!(run_args
->flags
& DMU_FPP
)) {
187 obj
= zpios_dmu_object_create(run_args
, os
);
190 zpios_print(run_args
->file
, "Error zpios_dmu_"
191 "object_create() failed, %d\n", rc
);
196 for (i
= 0; i
< run_args
->region_count
; i
++) {
197 zpios_region_t
*region
;
199 region
= &run_args
->regions
[i
];
200 mutex_init(®ion
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
202 if (run_args
->flags
& DMU_FPP
) {
203 /* File per process */
205 region
->obj
.obj
= zpios_dmu_object_create(run_args
, os
);
206 ASSERT(region
->obj
.obj
> 0); /* XXX - Handle this */
207 region
->wr_offset
= run_args
->offset
;
208 region
->rd_offset
= run_args
->offset
;
209 region
->init_offset
= run_args
->offset
;
210 region
->max_offset
= run_args
->offset
+
211 run_args
->region_size
;
213 /* Single shared file */
215 region
->obj
.obj
= obj
;
216 region
->wr_offset
= run_args
->offset
* i
;
217 region
->rd_offset
= run_args
->offset
* i
;
218 region
->init_offset
= run_args
->offset
* i
;
219 region
->max_offset
= run_args
->offset
*
220 i
+ run_args
->region_size
;
227 rc2
= dmu_objset_destroy(name
, B_FALSE
);
229 zpios_print(run_args
->file
, "Error dmu_objset_destroy"
230 "(%s, ...) failed: %d\n", name
, rc2
);
233 t
->stop
= zpios_timespec_now();
234 t
->delta
= zpios_timespec_sub(t
->stop
, t
->start
);
235 (void)zpios_upcall(run_args
->post
, PHASE_POST_CREATE
, run_args
, rc
);
241 zpios_setup_run(run_args_t
**run_args
, zpios_cmd_t
*kcmd
, struct file
*file
)
246 size
= sizeof(*ra
) + kcmd
->cmd_region_count
* sizeof(zpios_region_t
);
248 ra
= vmem_zalloc(size
, KM_SLEEP
);
250 zpios_print(file
, "Unable to vmem_zalloc() %d bytes "
251 "for regions\n", size
);
256 strncpy(ra
->pool
, kcmd
->cmd_pool
, ZPIOS_NAME_SIZE
- 1);
257 strncpy(ra
->pre
, kcmd
->cmd_pre
, ZPIOS_PATH_SIZE
- 1);
258 strncpy(ra
->post
, kcmd
->cmd_post
, ZPIOS_PATH_SIZE
- 1);
259 strncpy(ra
->log
, kcmd
->cmd_log
, ZPIOS_PATH_SIZE
- 1);
260 ra
->id
= kcmd
->cmd_id
;
261 ra
->chunk_size
= kcmd
->cmd_chunk_size
;
262 ra
->thread_count
= kcmd
->cmd_thread_count
;
263 ra
->region_count
= kcmd
->cmd_region_count
;
264 ra
->region_size
= kcmd
->cmd_region_size
;
265 ra
->offset
= kcmd
->cmd_offset
;
266 ra
->region_noise
= kcmd
->cmd_region_noise
;
267 ra
->chunk_noise
= kcmd
->cmd_chunk_noise
;
268 ra
->thread_delay
= kcmd
->cmd_thread_delay
;
269 ra
->flags
= kcmd
->cmd_flags
;
270 ra
->stats
.wr_data
= 0;
271 ra
->stats
.wr_chunks
= 0;
272 ra
->stats
.rd_data
= 0;
273 ra
->stats
.rd_chunks
= 0;
276 mutex_init(&ra
->lock_work
, NULL
, MUTEX_DEFAULT
, NULL
);
277 mutex_init(&ra
->lock_ctl
, NULL
, MUTEX_DEFAULT
, NULL
);
279 (void)zpios_upcall(ra
->pre
, PHASE_PRE_RUN
, ra
, 0);
281 rc
= zpios_dmu_setup(ra
);
283 mutex_destroy(&ra
->lock_ctl
);
284 mutex_destroy(&ra
->lock_work
);
293 zpios_get_work_item(run_args_t
*run_args
, dmu_obj_t
*obj
, __u64
*offset
,
294 __u32
*chunk_size
, zpios_region_t
**region
, __u32 flags
)
297 unsigned int random_int
;
299 get_random_bytes(&random_int
, sizeof(unsigned int));
301 mutex_enter(&run_args
->lock_work
);
302 i
= run_args
->region_next
;
304 /* XXX: I don't much care for this chunk selection mechansim
305 * there's the potential to burn a lot of time here doing nothing
306 * useful while holding the global lock. This could give some
307 * misleading performance results. I'll fix it latter.
309 while (count
< run_args
->region_count
) {
311 zpios_time_t
*rw_time
;
313 j
= i
% run_args
->region_count
;
314 *region
= &(run_args
->regions
[j
]);
316 if (flags
& DMU_WRITE
) {
317 rw_offset
= &((*region
)->wr_offset
);
318 rw_time
= &((*region
)->stats
.wr_time
);
320 rw_offset
= &((*region
)->rd_offset
);
321 rw_time
= &((*region
)->stats
.rd_time
);
324 /* test if region is fully written */
325 if (*rw_offset
+ *chunk_size
> (*region
)->max_offset
) {
329 if (unlikely(rw_time
->stop
.ts_sec
== 0) &&
330 unlikely(rw_time
->stop
.ts_nsec
== 0))
331 rw_time
->stop
= zpios_timespec_now();
336 *offset
= *rw_offset
;
337 *obj
= (*region
)->obj
;
338 *rw_offset
+= *chunk_size
;
340 /* update ctl structure */
341 if (run_args
->region_noise
) {
342 get_random_bytes(&random_int
, sizeof(unsigned int));
343 run_args
->region_next
+= random_int
% run_args
->region_noise
;
345 run_args
->region_next
++;
348 mutex_exit(&run_args
->lock_work
);
352 /* nothing left to do */
353 mutex_exit(&run_args
->lock_work
);
359 zpios_remove_objset(run_args_t
*run_args
)
361 zpios_time_t
*t
= &(run_args
->stats
.rm_time
);
362 zpios_region_t
*region
;
366 (void)zpios_upcall(run_args
->pre
, PHASE_PRE_REMOVE
, run_args
, 0);
367 t
->start
= zpios_timespec_now();
369 (void)snprintf(name
, 32, "%s/id_%d", run_args
->pool
, run_args
->id
);
371 if (run_args
->flags
& DMU_REMOVE
) {
372 if (run_args
->flags
& DMU_FPP
) {
373 for (i
= 0; i
< run_args
->region_count
; i
++) {
374 region
= &run_args
->regions
[i
];
375 rc
= zpios_dmu_object_free(run_args
,
379 zpios_print(run_args
->file
, "Error "
380 "removing object %d, %d\n",
381 (int)region
->obj
.obj
, rc
);
384 region
= &run_args
->regions
[0];
385 rc
= zpios_dmu_object_free(run_args
,
389 zpios_print(run_args
->file
, "Error "
390 "removing object %d, %d\n",
391 (int)region
->obj
.obj
, rc
);
395 dmu_objset_disown(run_args
->os
, zpios_tag
);
397 if (run_args
->flags
& DMU_REMOVE
) {
398 rc
= dmu_objset_destroy(name
, B_FALSE
);
400 zpios_print(run_args
->file
, "Error dmu_objset_destroy"
401 "(%s, ...) failed: %d\n", name
, rc
);
404 t
->stop
= zpios_timespec_now();
405 t
->delta
= zpios_timespec_sub(t
->stop
, t
->start
);
406 (void)zpios_upcall(run_args
->post
, PHASE_POST_REMOVE
, run_args
, rc
);
410 zpios_cleanup_run(run_args_t
*run_args
)
414 if (run_args
== NULL
)
417 if (run_args
->threads
!= NULL
) {
418 for (i
= 0; i
< run_args
->thread_count
; i
++) {
419 if (run_args
->threads
[i
]) {
420 mutex_destroy(&run_args
->threads
[i
]->lock
);
421 kmem_free(run_args
->threads
[i
],
422 sizeof(thread_data_t
));
426 kmem_free(run_args
->threads
,
427 sizeof(thread_data_t
*) * run_args
->thread_count
);
430 for (i
= 0; i
< run_args
->region_count
; i
++)
431 mutex_destroy(&run_args
->regions
[i
].lock
);
433 mutex_destroy(&run_args
->lock_work
);
434 mutex_destroy(&run_args
->lock_ctl
);
435 size
= run_args
->region_count
* sizeof(zpios_region_t
);
437 vmem_free(run_args
, sizeof(*run_args
) + size
);
441 zpios_dmu_write(run_args_t
*run_args
, objset_t
*os
, uint64_t object
,
442 uint64_t offset
, uint64_t size
, const void *buf
)
445 int rc
, how
= TXG_WAIT
;
448 if (run_args
->flags
& DMU_WRITE_NOWAIT
)
452 tx
= dmu_tx_create(os
);
453 dmu_tx_hold_write(tx
, object
, offset
, size
);
454 rc
= dmu_tx_assign(tx
, how
);
457 if (rc
== ERESTART
&& how
== TXG_NOWAIT
) {
462 zpios_print(run_args
->file
,
463 "Error in dmu_tx_assign(), %d", rc
);
470 // if (run_args->flags & DMU_WRITE_ZC)
471 // flags |= DMU_WRITE_ZEROCOPY;
473 dmu_write(os
, object
, offset
, size
, buf
, tx
);
480 zpios_dmu_read(run_args_t
*run_args
, objset_t
*os
, uint64_t object
,
481 uint64_t offset
, uint64_t size
, void *buf
)
485 // if (run_args->flags & DMU_READ_ZC)
486 // flags |= DMU_READ_ZEROCOPY;
488 if (run_args
->flags
& DMU_READ_NOPF
)
489 flags
|= DMU_READ_NO_PREFETCH
;
491 return dmu_read(os
, object
, offset
, size
, buf
, flags
);
495 zpios_thread_main(void *data
)
497 thread_data_t
*thr
= (thread_data_t
*)data
;
498 run_args_t
*run_args
= thr
->run_args
;
503 zpios_region_t
*region
;
505 unsigned int random_int
;
506 int chunk_noise
= run_args
->chunk_noise
;
507 int chunk_noise_tmp
= 0;
508 int thread_delay
= run_args
->thread_delay
;
509 int thread_delay_tmp
= 0;
513 get_random_bytes(&random_int
, sizeof(unsigned int));
514 chunk_noise_tmp
= (random_int
% (chunk_noise
* 2))-chunk_noise
;
517 /* It's OK to vmem_alloc() this memory because it will be copied
518 * in to the slab and pointers to the slab copy will be setup in
519 * the bio when the IO is submitted. This of course is not ideal
520 * since we want a zero-copy IO path if possible. It would be nice
521 * to have direct access to those slab entries.
523 chunk_size
= run_args
->chunk_size
+ chunk_noise_tmp
;
524 buf
= (char *)vmem_alloc(chunk_size
, KM_SLEEP
);
527 /* Trivial data verification pattern for now. */
528 if (run_args
->flags
& DMU_VERIFY
)
529 memset(buf
, 'z', chunk_size
);
532 mutex_enter(&thr
->lock
);
533 thr
->stats
.wr_time
.start
= zpios_timespec_now();
534 mutex_exit(&thr
->lock
);
536 while (zpios_get_work_item(run_args
, &obj
, &offset
,
537 &chunk_size
, ®ion
, DMU_WRITE
)) {
539 get_random_bytes(&random_int
, sizeof(unsigned int));
540 thread_delay_tmp
= random_int
% thread_delay
;
541 set_current_state(TASK_UNINTERRUPTIBLE
);
542 schedule_timeout(thread_delay_tmp
); /* In jiffies */
545 t
.start
= zpios_timespec_now();
546 rc
= zpios_dmu_write(run_args
, obj
.os
, obj
.obj
,
547 offset
, chunk_size
, buf
);
548 t
.stop
= zpios_timespec_now();
549 t
.delta
= zpios_timespec_sub(t
.stop
, t
.start
);
552 zpios_print(run_args
->file
, "IO error while doing "
553 "dmu_write(): %d\n", rc
);
557 mutex_enter(&thr
->lock
);
558 thr
->stats
.wr_data
+= chunk_size
;
559 thr
->stats
.wr_chunks
++;
560 thr
->stats
.wr_time
.delta
= zpios_timespec_add(
561 thr
->stats
.wr_time
.delta
, t
.delta
);
562 mutex_exit(&thr
->lock
);
564 mutex_enter(®ion
->lock
);
565 region
->stats
.wr_data
+= chunk_size
;
566 region
->stats
.wr_chunks
++;
567 region
->stats
.wr_time
.delta
= zpios_timespec_add(
568 region
->stats
.wr_time
.delta
, t
.delta
);
570 /* First time region was accessed */
571 if (region
->init_offset
== offset
)
572 region
->stats
.wr_time
.start
= t
.start
;
574 mutex_exit(®ion
->lock
);
577 mutex_enter(&run_args
->lock_ctl
);
578 run_args
->threads_done
++;
579 mutex_exit(&run_args
->lock_ctl
);
581 mutex_enter(&thr
->lock
);
583 thr
->stats
.wr_time
.stop
= zpios_timespec_now();
584 mutex_exit(&thr
->lock
);
585 wake_up(&run_args
->waitq
);
587 set_current_state(TASK_UNINTERRUPTIBLE
);
590 /* Check if we should exit */
591 mutex_enter(&thr
->lock
);
593 mutex_exit(&thr
->lock
);
598 mutex_enter(&thr
->lock
);
599 thr
->stats
.rd_time
.start
= zpios_timespec_now();
600 mutex_exit(&thr
->lock
);
602 while (zpios_get_work_item(run_args
, &obj
, &offset
,
603 &chunk_size
, ®ion
, DMU_READ
)) {
605 get_random_bytes(&random_int
, sizeof(unsigned int));
606 thread_delay_tmp
= random_int
% thread_delay
;
607 set_current_state(TASK_UNINTERRUPTIBLE
);
608 schedule_timeout(thread_delay_tmp
); /* In jiffies */
611 if (run_args
->flags
& DMU_VERIFY
)
612 memset(buf
, 0, chunk_size
);
614 t
.start
= zpios_timespec_now();
615 rc
= zpios_dmu_read(run_args
, obj
.os
, obj
.obj
,
616 offset
, chunk_size
, buf
);
617 t
.stop
= zpios_timespec_now();
618 t
.delta
= zpios_timespec_sub(t
.stop
, t
.start
);
621 zpios_print(run_args
->file
, "IO error while doing "
622 "dmu_read(): %d\n", rc
);
626 /* Trivial data verification, expensive! */
627 if (run_args
->flags
& DMU_VERIFY
) {
628 for (i
= 0; i
< chunk_size
; i
++) {
630 zpios_print(run_args
->file
,
631 "IO verify error: %d/%d/%d\n",
632 (int)obj
.obj
, (int)offset
,
639 mutex_enter(&thr
->lock
);
640 thr
->stats
.rd_data
+= chunk_size
;
641 thr
->stats
.rd_chunks
++;
642 thr
->stats
.rd_time
.delta
= zpios_timespec_add(
643 thr
->stats
.rd_time
.delta
, t
.delta
);
644 mutex_exit(&thr
->lock
);
646 mutex_enter(®ion
->lock
);
647 region
->stats
.rd_data
+= chunk_size
;
648 region
->stats
.rd_chunks
++;
649 region
->stats
.rd_time
.delta
= zpios_timespec_add(
650 region
->stats
.rd_time
.delta
, t
.delta
);
652 /* First time region was accessed */
653 if (region
->init_offset
== offset
)
654 region
->stats
.rd_time
.start
= t
.start
;
656 mutex_exit(®ion
->lock
);
659 mutex_enter(&run_args
->lock_ctl
);
660 run_args
->threads_done
++;
661 mutex_exit(&run_args
->lock_ctl
);
663 mutex_enter(&thr
->lock
);
665 thr
->stats
.rd_time
.stop
= zpios_timespec_now();
666 mutex_exit(&thr
->lock
);
667 wake_up(&run_args
->waitq
);
670 vmem_free(buf
, chunk_size
);
673 return rc
; /* Unreachable, due to do_exit() */
677 zpios_thread_done(run_args_t
*run_args
)
679 ASSERT(run_args
->threads_done
<= run_args
->thread_count
);
680 return (run_args
->threads_done
== run_args
->thread_count
);
684 zpios_threads_run(run_args_t
*run_args
)
686 struct task_struct
*tsk
, **tsks
;
687 thread_data_t
*thr
= NULL
;
688 zpios_time_t
*tt
= &(run_args
->stats
.total_time
);
689 zpios_time_t
*tw
= &(run_args
->stats
.wr_time
);
690 zpios_time_t
*tr
= &(run_args
->stats
.rd_time
);
691 int i
, rc
= 0, tc
= run_args
->thread_count
;
693 tsks
= kmem_zalloc(sizeof(struct task_struct
*) * tc
, KM_SLEEP
);
699 run_args
->threads
= kmem_zalloc(sizeof(thread_data_t
*) * tc
, KM_SLEEP
);
700 if (run_args
->threads
== NULL
) {
705 init_waitqueue_head(&run_args
->waitq
);
706 run_args
->threads_done
= 0;
708 /* Create all the needed threads which will sleep until awoken */
709 for (i
= 0; i
< tc
; i
++) {
710 thr
= kmem_zalloc(sizeof(thread_data_t
), KM_SLEEP
);
717 thr
->run_args
= run_args
;
719 mutex_init(&thr
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
720 run_args
->threads
[i
] = thr
;
722 tsk
= kthread_create(zpios_thread_main
, (void *)thr
,
723 "%s/%d", "zpios_io", i
);
732 tt
->start
= zpios_timespec_now();
734 /* Wake up all threads for write phase */
735 (void)zpios_upcall(run_args
->pre
, PHASE_PRE_WRITE
, run_args
, 0);
736 for (i
= 0; i
< tc
; i
++)
737 wake_up_process(tsks
[i
]);
739 /* Wait for write phase to complete */
740 tw
->start
= zpios_timespec_now();
741 wait_event(run_args
->waitq
, zpios_thread_done(run_args
));
742 tw
->stop
= zpios_timespec_now();
743 (void)zpios_upcall(run_args
->post
, PHASE_POST_WRITE
, run_args
, rc
);
745 for (i
= 0; i
< tc
; i
++) {
746 thr
= run_args
->threads
[i
];
748 mutex_enter(&thr
->lock
);
753 run_args
->stats
.wr_data
+= thr
->stats
.wr_data
;
754 run_args
->stats
.wr_chunks
+= thr
->stats
.wr_chunks
;
755 mutex_exit(&thr
->lock
);
759 /* Wake up all threads and tell them to exit */
760 for (i
= 0; i
< tc
; i
++) {
761 mutex_enter(&thr
->lock
);
763 mutex_exit(&thr
->lock
);
765 wake_up_process(tsks
[i
]);
770 mutex_enter(&run_args
->lock_ctl
);
771 ASSERT(run_args
->threads_done
== run_args
->thread_count
);
772 run_args
->threads_done
= 0;
773 mutex_exit(&run_args
->lock_ctl
);
775 /* Wake up all threads for read phase */
776 (void)zpios_upcall(run_args
->pre
, PHASE_PRE_READ
, run_args
, 0);
777 for (i
= 0; i
< tc
; i
++)
778 wake_up_process(tsks
[i
]);
780 /* Wait for read phase to complete */
781 tr
->start
= zpios_timespec_now();
782 wait_event(run_args
->waitq
, zpios_thread_done(run_args
));
783 tr
->stop
= zpios_timespec_now();
784 (void)zpios_upcall(run_args
->post
, PHASE_POST_READ
, run_args
, rc
);
786 for (i
= 0; i
< tc
; i
++) {
787 thr
= run_args
->threads
[i
];
789 mutex_enter(&thr
->lock
);
794 run_args
->stats
.rd_data
+= thr
->stats
.rd_data
;
795 run_args
->stats
.rd_chunks
+= thr
->stats
.rd_chunks
;
796 mutex_exit(&thr
->lock
);
799 tt
->stop
= zpios_timespec_now();
800 tt
->delta
= zpios_timespec_sub(tt
->stop
, tt
->start
);
801 tw
->delta
= zpios_timespec_sub(tw
->stop
, tw
->start
);
802 tr
->delta
= zpios_timespec_sub(tr
->stop
, tr
->start
);
805 kmem_free(tsks
, sizeof(struct task_struct
*) * tc
);
807 /* Returns first encountered thread error (if any) */
811 /* Destroy all threads that were created successfully */
812 for (i
= 0; i
< tc
; i
++)
814 (void) kthread_stop(tsks
[i
]);
820 zpios_do_one_run(struct file
*file
, zpios_cmd_t
*kcmd
,
821 int data_size
, void *data
)
823 run_args_t
*run_args
= { 0 };
824 zpios_stats_t
*stats
= (zpios_stats_t
*)data
;
825 int i
, n
, m
, size
, rc
;
827 if ((!kcmd
->cmd_chunk_size
) || (!kcmd
->cmd_region_size
) ||
828 (!kcmd
->cmd_thread_count
) || (!kcmd
->cmd_region_count
)) {
829 zpios_print(file
, "Invalid chunk_size, region_size, "
830 "thread_count, or region_count, %d\n", -EINVAL
);
834 if (!(kcmd
->cmd_flags
& DMU_WRITE
) ||
835 !(kcmd
->cmd_flags
& DMU_READ
)) {
836 zpios_print(file
, "Invalid flags, minimally DMU_WRITE "
837 "and DMU_READ must be set, %d\n", -EINVAL
);
841 if ((kcmd
->cmd_flags
& (DMU_WRITE_ZC
| DMU_READ_ZC
)) &&
842 (kcmd
->cmd_flags
& DMU_VERIFY
)) {
843 zpios_print(file
, "Invalid flags, DMU_*_ZC incompatible "
844 "with DMU_VERIFY, used for performance analysis "
845 "only, %d\n", -EINVAL
);
849 /* Opaque data on return contains structs of the following form:
851 * zpios_stat_t stats[];
852 * stats[0] = run_args->stats;
853 * stats[1-N] = threads[N]->stats;
854 * stats[N+1-M] = regions[M]->stats;
856 * Where N is the number of threads, and M is the number of regions.
858 size
= (sizeof(zpios_stats_t
) +
859 (kcmd
->cmd_thread_count
* sizeof(zpios_stats_t
)) +
860 (kcmd
->cmd_region_count
* sizeof(zpios_stats_t
)));
861 if (data_size
< size
) {
862 zpios_print(file
, "Invalid size, command data buffer "
863 "size too small, (%d < %d)\n", data_size
, size
);
867 rc
= zpios_setup_run(&run_args
, kcmd
, file
);
871 rc
= zpios_threads_run(run_args
);
872 zpios_remove_objset(run_args
);
878 m
= 1 + kcmd
->cmd_thread_count
;
879 stats
[0] = run_args
->stats
;
881 for (i
= 0; i
< kcmd
->cmd_thread_count
; i
++)
882 stats
[n
+i
] = run_args
->threads
[i
]->stats
;
884 for (i
= 0; i
< kcmd
->cmd_region_count
; i
++)
885 stats
[m
+i
] = run_args
->regions
[i
].stats
;
889 zpios_cleanup_run(run_args
);
891 (void)zpios_upcall(kcmd
->cmd_post
, PHASE_POST_RUN
, run_args
, 0);
897 zpios_open(struct inode
*inode
, struct file
*file
)
899 unsigned int minor
= iminor(inode
);
902 if (minor
>= ZPIOS_MINORS
)
905 info
= (zpios_info_t
*)kmem_alloc(sizeof(*info
), KM_SLEEP
);
909 spin_lock_init(&info
->info_lock
);
910 info
->info_size
= ZPIOS_INFO_BUFFER_SIZE
;
911 info
->info_buffer
= (char *)vmem_alloc(ZPIOS_INFO_BUFFER_SIZE
,KM_SLEEP
);
912 if (info
->info_buffer
== NULL
) {
913 kmem_free(info
, sizeof(*info
));
917 info
->info_head
= info
->info_buffer
;
918 file
->private_data
= (void *)info
;
924 zpios_release(struct inode
*inode
, struct file
*file
)
926 unsigned int minor
= iminor(inode
);
927 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
929 if (minor
>= ZPIOS_MINORS
)
933 ASSERT(info
->info_buffer
);
935 vmem_free(info
->info_buffer
, ZPIOS_INFO_BUFFER_SIZE
);
936 kmem_free(info
, sizeof(*info
));
942 zpios_buffer_clear(struct file
*file
, zpios_cfg_t
*kcfg
, unsigned long arg
)
944 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
947 ASSERT(info
->info_buffer
);
949 spin_lock(&info
->info_lock
);
950 memset(info
->info_buffer
, 0, info
->info_size
);
951 info
->info_head
= info
->info_buffer
;
952 spin_unlock(&info
->info_lock
);
958 zpios_buffer_size(struct file
*file
, zpios_cfg_t
*kcfg
, unsigned long arg
)
960 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
962 int min
, size
, rc
= 0;
965 ASSERT(info
->info_buffer
);
967 spin_lock(&info
->info_lock
);
968 if (kcfg
->cfg_arg1
> 0) {
970 size
= kcfg
->cfg_arg1
;
971 buf
= (char *)vmem_alloc(size
, KM_SLEEP
);
977 /* Zero fill and truncate contents when coping buffer */
978 min
= ((size
< info
->info_size
) ? size
: info
->info_size
);
979 memset(buf
, 0, size
);
980 memcpy(buf
, info
->info_buffer
, min
);
981 vmem_free(info
->info_buffer
, info
->info_size
);
982 info
->info_size
= size
;
983 info
->info_buffer
= buf
;
984 info
->info_head
= info
->info_buffer
;
987 kcfg
->cfg_rc1
= info
->info_size
;
989 if (copy_to_user((struct zpios_cfg_t __user
*)arg
, kcfg
, sizeof(*kcfg
)))
992 spin_unlock(&info
->info_lock
);
998 zpios_ioctl_cfg(struct file
*file
, unsigned long arg
)
1003 if (copy_from_user(&kcfg
, (zpios_cfg_t
*)arg
, sizeof(kcfg
)))
1006 if (kcfg
.cfg_magic
!= ZPIOS_CFG_MAGIC
) {
1007 zpios_print(file
, "Bad config magic 0x%x != 0x%x\n",
1008 kcfg
.cfg_magic
, ZPIOS_CFG_MAGIC
);
1012 switch (kcfg
.cfg_cmd
) {
1013 case ZPIOS_CFG_BUFFER_CLEAR
:
1014 /* cfg_arg1 - Unused
1017 rc
= zpios_buffer_clear(file
, &kcfg
, arg
);
1019 case ZPIOS_CFG_BUFFER_SIZE
:
1020 /* cfg_arg1 - 0 - query size; >0 resize
1021 * cfg_rc1 - Set to current buffer size
1023 rc
= zpios_buffer_size(file
, &kcfg
, arg
);
1026 zpios_print(file
, "Bad config command %d\n",
1036 zpios_ioctl_cmd(struct file
*file
, unsigned long arg
)
1042 kcmd
= kmem_alloc(sizeof(zpios_cmd_t
), KM_SLEEP
);
1044 zpios_print(file
, "Unable to kmem_alloc() %ld byte for "
1045 "zpios_cmd_t\n", (long int)sizeof(zpios_cmd_t
));
1049 rc
= copy_from_user(kcmd
, (zpios_cfg_t
*)arg
, sizeof(zpios_cmd_t
));
1051 zpios_print(file
, "Unable to copy command structure "
1052 "from user to kernel memory, %d\n", rc
);
1056 if (kcmd
->cmd_magic
!= ZPIOS_CMD_MAGIC
) {
1057 zpios_print(file
, "Bad command magic 0x%x != 0x%x\n",
1058 kcmd
->cmd_magic
, ZPIOS_CFG_MAGIC
);
1063 /* Allocate memory for any opaque data the caller needed to pass on */
1064 if (kcmd
->cmd_data_size
> 0) {
1065 data
= (void *)vmem_alloc(kcmd
->cmd_data_size
, KM_SLEEP
);
1067 zpios_print(file
, "Unable to vmem_alloc() %ld "
1068 "bytes for data buffer\n",
1069 (long)kcmd
->cmd_data_size
);
1074 rc
= copy_from_user(data
, (void *)(arg
+ offsetof(zpios_cmd_t
,
1075 cmd_data_str
)), kcmd
->cmd_data_size
);
1077 zpios_print(file
, "Unable to copy data buffer "
1078 "from user to kernel memory, %d\n", rc
);
1083 rc
= zpios_do_one_run(file
, kcmd
, kcmd
->cmd_data_size
, data
);
1086 /* If the test failed do not print out the stats */
1090 rc
= copy_to_user((void *)(arg
+ offsetof(zpios_cmd_t
,
1091 cmd_data_str
)), data
, kcmd
->cmd_data_size
);
1093 zpios_print(file
, "Unable to copy data buffer "
1094 "from kernel to user memory, %d\n", rc
);
1099 vmem_free(data
, kcmd
->cmd_data_size
);
1102 kmem_free(kcmd
, sizeof(zpios_cmd_t
));
1108 zpios_unlocked_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
1110 unsigned int minor
= iminor(file
->f_dentry
->d_inode
);
1113 /* Ignore tty ioctls */
1114 if ((cmd
& 0xffffff00) == ((int)'T') << 8)
1117 if (minor
>= ZPIOS_MINORS
)
1122 rc
= zpios_ioctl_cfg(file
, arg
);
1125 rc
= zpios_ioctl_cmd(file
, arg
);
1128 zpios_print(file
, "Bad ioctl command %d\n", cmd
);
1136 #ifdef CONFIG_COMPAT
1137 /* Compatibility handler for ioctls from 32-bit ELF binaries */
1139 zpios_compat_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
1141 return zpios_unlocked_ioctl(file
, cmd
, arg
);
1143 #endif /* CONFIG_COMPAT */
1145 /* I'm not sure why you would want to write in to this buffer from
1146 * user space since its principle use is to pass test status info
1147 * back to the user space, but I don't see any reason to prevent it.
1150 zpios_write(struct file
*file
, const char __user
*buf
,
1151 size_t count
, loff_t
*ppos
)
1153 unsigned int minor
= iminor(file
->f_dentry
->d_inode
);
1154 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
1157 if (minor
>= ZPIOS_MINORS
)
1161 ASSERT(info
->info_buffer
);
1163 spin_lock(&info
->info_lock
);
1165 /* Write beyond EOF */
1166 if (*ppos
>= info
->info_size
) {
1171 /* Resize count if beyond EOF */
1172 if (*ppos
+ count
> info
->info_size
)
1173 count
= info
->info_size
- *ppos
;
1175 if (copy_from_user(info
->info_buffer
, buf
, count
)) {
1183 spin_unlock(&info
->info_lock
);
1188 zpios_read(struct file
*file
, char __user
*buf
,
1189 size_t count
, loff_t
*ppos
)
1191 unsigned int minor
= iminor(file
->f_dentry
->d_inode
);
1192 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
1195 if (minor
>= ZPIOS_MINORS
)
1199 ASSERT(info
->info_buffer
);
1201 spin_lock(&info
->info_lock
);
1203 /* Read beyond EOF */
1204 if (*ppos
>= info
->info_size
)
1207 /* Resize count if beyond EOF */
1208 if (*ppos
+ count
> info
->info_size
)
1209 count
= info
->info_size
- *ppos
;
1211 if (copy_to_user(buf
, info
->info_buffer
+ *ppos
, count
)) {
1219 spin_unlock(&info
->info_lock
);
1223 static loff_t
zpios_seek(struct file
*file
, loff_t offset
, int origin
)
1225 unsigned int minor
= iminor(file
->f_dentry
->d_inode
);
1226 zpios_info_t
*info
= (zpios_info_t
*)file
->private_data
;
1229 if (minor
>= ZPIOS_MINORS
)
1233 ASSERT(info
->info_buffer
);
1235 spin_lock(&info
->info_lock
);
1238 case 0: /* SEEK_SET - No-op just do it */
1240 case 1: /* SEEK_CUR - Seek from current */
1241 offset
= file
->f_pos
+ offset
;
1243 case 2: /* SEEK_END - Seek from end */
1244 offset
= info
->info_size
+ offset
;
1249 file
->f_pos
= offset
;
1250 file
->f_version
= 0;
1254 spin_unlock(&info
->info_lock
);
1259 static struct cdev zpios_cdev
;
1260 static struct file_operations zpios_fops
= {
1261 .owner
= THIS_MODULE
,
1263 .release
= zpios_release
,
1264 .unlocked_ioctl
= zpios_unlocked_ioctl
,
1265 #ifdef CONFIG_COMPAT
1266 .compat_ioctl
= zpios_compat_ioctl
,
1269 .write
= zpios_write
,
1270 .llseek
= zpios_seek
,
1279 dev
= MKDEV(ZPIOS_MAJOR
, 0);
1280 if ((rc
= register_chrdev_region(dev
, ZPIOS_MINORS
, ZPIOS_NAME
)))
1283 /* Support for registering a character driver */
1284 cdev_init(&zpios_cdev
, &zpios_fops
);
1285 zpios_cdev
.owner
= THIS_MODULE
;
1286 kobject_set_name(&zpios_cdev
.kobj
, ZPIOS_NAME
);
1287 if ((rc
= cdev_add(&zpios_cdev
, dev
, ZPIOS_MINORS
))) {
1288 printk(KERN_ERR
"ZPIOS: Error adding cdev, %d\n", rc
);
1289 kobject_put(&zpios_cdev
.kobj
);
1290 unregister_chrdev_region(dev
, ZPIOS_MINORS
);
1294 /* Support for udev make driver info available in sysfs */
1295 zpios_class
= spl_class_create(THIS_MODULE
, ZPIOS_NAME
);
1296 if (IS_ERR(zpios_class
)) {
1297 rc
= PTR_ERR(zpios_class
);
1298 printk(KERN_ERR
"ZPIOS: Error creating zpios class, %d\n", rc
);
1299 cdev_del(&zpios_cdev
);
1300 unregister_chrdev_region(dev
, ZPIOS_MINORS
);
1304 zpios_device
= spl_device_create(zpios_class
, NULL
,
1305 dev
, NULL
, ZPIOS_NAME
);
1308 printk(KERN_ERR
"ZPIOS: Error registering zpios device, %d\n", rc
);
1315 dev_t dev
= MKDEV(ZPIOS_MAJOR
, 0);
1317 spl_device_destroy(zpios_class
, zpios_device
, dev
);
1318 spl_class_destroy(zpios_class
);
1319 cdev_del(&zpios_cdev
);
1320 unregister_chrdev_region(dev
, ZPIOS_MINORS
);
1325 spl_module_init(zpios_init
);
1326 spl_module_exit(zpios_fini
);
1328 MODULE_AUTHOR("LLNL / Sun");
1329 MODULE_DESCRIPTION("Kernel PIOS implementation");
1330 MODULE_LICENSE("GPL");