]> git.proxmox.com Git - mirror_zfs-debian.git/blob - module/zpios/pios.c
New upstream version 0.7.9
[mirror_zfs-debian.git] / module / zpios / pios.c
1 /*
2 * ZPIOS is a heavily modified version of the original PIOS test code.
3 * It is designed to have the test code running in the Linux kernel
4 * against ZFS while still being flexibly controlled from user space.
5 *
6 * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
7 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
9 * LLNL-CODE-403049
10 *
11 * Original PIOS Test Code
12 * Copyright (C) 2004 Cluster File Systems, Inc.
13 * Written by Peter Braam <braam@clusterfs.com>
14 * Atul Vidwansa <atul@clusterfs.com>
15 * Milind Dumbare <milind@clusterfs.com>
16 *
17 * This file is part of ZFS on Linux.
18 * For details, see <http://zfsonlinux.org/>.
19 *
20 * ZPIOS is free software; you can redistribute it and/or modify it
21 * under the terms of the GNU General Public License as published by the
22 * Free Software Foundation; either version 2 of the License, or (at your
23 * option) any later version.
24 *
25 * ZPIOS is distributed in the hope that it will be useful, but WITHOUT
26 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
27 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
28 * for more details.
29 *
30 * You should have received a copy of the GNU General Public License along
31 * with ZPIOS. If not, see <http://www.gnu.org/licenses/>.
32 *
33 * Copyright (c) 2015, Intel Corporation.
34 */
35
36 #include <sys/zfs_context.h>
37 #include <sys/dmu.h>
38 #include <sys/spa.h>
39 #include <sys/txg.h>
40 #include <sys/dsl_destroy.h>
41 #include <linux/miscdevice.h>
42 #include "zpios-internal.h"
43
44
45 static char *zpios_tag = "zpios_tag";
46
47 static int
48 zpios_upcall(char *path, char *phase, run_args_t *run_args, int rc)
49 {
50 /*
51 * This is stack heavy but it should be OK since we are only
52 * making the upcall between tests when the stack is shallow.
53 */
54 char id[16], chunk_size[16], region_size[16], thread_count[16];
55 char region_count[16], offset[16], region_noise[16], chunk_noise[16];
56 char thread_delay[16], flags[16], result[8];
57 char *argv[16], *envp[4];
58
59 if ((path == NULL) || (strlen(path) == 0))
60 return (-ENOENT);
61
62 snprintf(id, 15, "%d", run_args->id);
63 snprintf(chunk_size, 15, "%lu", (long unsigned)run_args->chunk_size);
64 snprintf(region_size, 15, "%lu", (long unsigned) run_args->region_size);
65 snprintf(thread_count, 15, "%u", run_args->thread_count);
66 snprintf(region_count, 15, "%u", run_args->region_count);
67 snprintf(offset, 15, "%lu", (long unsigned)run_args->offset);
68 snprintf(region_noise, 15, "%u", run_args->region_noise);
69 snprintf(chunk_noise, 15, "%u", run_args->chunk_noise);
70 snprintf(thread_delay, 15, "%u", run_args->thread_delay);
71 snprintf(flags, 15, "0x%x", run_args->flags);
72 snprintf(result, 7, "%d", rc);
73
74 /* Passing 15 args to registered pre/post upcall */
75 argv[0] = path;
76 argv[1] = phase;
77 argv[2] = strlen(run_args->log) ? run_args->log : "<none>";
78 argv[3] = id;
79 argv[4] = run_args->pool;
80 argv[5] = chunk_size;
81 argv[6] = region_size;
82 argv[7] = thread_count;
83 argv[8] = region_count;
84 argv[9] = offset;
85 argv[10] = region_noise;
86 argv[11] = chunk_noise;
87 argv[12] = thread_delay;
88 argv[13] = flags;
89 argv[14] = result;
90 argv[15] = NULL;
91
92 /* Passing environment for user space upcall */
93 envp[0] = "HOME=/";
94 envp[1] = "TERM=linux";
95 envp[2] = "PATH=/sbin:/usr/sbin:/bin:/usr/bin";
96 envp[3] = NULL;
97
98 return (call_usermodehelper(path, argv, envp, UMH_WAIT_PROC));
99 }
100
101 static int
102 zpios_print(struct file *file, const char *format, ...)
103 {
104 zpios_info_t *info = (zpios_info_t *)file->private_data;
105 va_list adx;
106 int rc;
107
108 ASSERT(info);
109 ASSERT(info->info_buffer);
110
111 va_start(adx, format);
112 spin_lock(&info->info_lock);
113
114 /* Don't allow the kernel to start a write in the red zone */
115 if ((int)(info->info_head - info->info_buffer) >
116 (info->info_size - ZPIOS_INFO_BUFFER_REDZONE)) {
117 rc = -EOVERFLOW;
118 } else {
119 rc = vsprintf(info->info_head, format, adx);
120 if (rc >= 0)
121 info->info_head += rc;
122 }
123
124 spin_unlock(&info->info_lock);
125 va_end(adx);
126
127 return (rc);
128 }
129
130 static uint64_t
131 zpios_dmu_object_create(run_args_t *run_args, objset_t *os)
132 {
133 struct dmu_tx *tx;
134 uint64_t obj = 0ULL;
135 uint64_t blksize = run_args->block_size;
136 int rc;
137
138 if (blksize < SPA_MINBLOCKSIZE ||
139 blksize > spa_maxblocksize(dmu_objset_spa(os)) ||
140 !ISP2(blksize)) {
141 zpios_print(run_args->file,
142 "invalid block size for pool: %d\n", (int)blksize);
143 return (obj);
144 }
145
146 tx = dmu_tx_create(os);
147 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE);
148 rc = dmu_tx_assign(tx, TXG_WAIT);
149 if (rc) {
150 zpios_print(run_args->file,
151 "dmu_tx_assign() failed: %d\n", rc);
152 dmu_tx_abort(tx);
153 return (obj);
154 }
155
156 obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, DMU_OT_NONE, 0, tx);
157 rc = dmu_object_set_blocksize(os, obj, blksize, 0, tx);
158 if (rc) {
159 zpios_print(run_args->file,
160 "dmu_object_set_blocksize to %d failed: %d\n",
161 (int)blksize, rc);
162 dmu_tx_abort(tx);
163 return (obj);
164 }
165
166 dmu_tx_commit(tx);
167
168 return (obj);
169 }
170
171 static int
172 zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj)
173 {
174 struct dmu_tx *tx;
175 int rc;
176
177 tx = dmu_tx_create(os);
178 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
179 rc = dmu_tx_assign(tx, TXG_WAIT);
180 if (rc) {
181 zpios_print(run_args->file,
182 "dmu_tx_assign() failed: %d\n", rc);
183 dmu_tx_abort(tx);
184 return (rc);
185 }
186
187 rc = dmu_object_free(os, obj, tx);
188 if (rc) {
189 zpios_print(run_args->file,
190 "dmu_object_free() failed: %d\n", rc);
191 dmu_tx_abort(tx);
192 return (rc);
193 }
194
195 dmu_tx_commit(tx);
196
197 return (0);
198 }
199
200 static int
201 zpios_dmu_setup(run_args_t *run_args)
202 {
203 zpios_time_t *t = &(run_args->stats.cr_time);
204 objset_t *os;
205 char name[32];
206 uint64_t obj = 0ULL;
207 int i, rc = 0, rc2;
208
209 (void) zpios_upcall(run_args->pre, PHASE_PRE_CREATE, run_args, 0);
210 t->start = zpios_timespec_now();
211
212 (void) snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
213 rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL);
214 if (rc) {
215 zpios_print(run_args->file, "Error dmu_objset_create(%s, ...) "
216 "failed: %d\n", name, rc);
217 goto out;
218 }
219
220 rc = dmu_objset_own(name, DMU_OST_OTHER, 0, zpios_tag, &os);
221 if (rc) {
222 zpios_print(run_args->file, "Error dmu_objset_own(%s, ...) "
223 "failed: %d\n", name, rc);
224 goto out_destroy;
225 }
226
227 if (!(run_args->flags & DMU_FPP)) {
228 obj = zpios_dmu_object_create(run_args, os);
229 if (obj == 0) {
230 rc = -EBADF;
231 zpios_print(run_args->file, "Error zpios_dmu_"
232 "object_create() failed, %d\n", rc);
233 goto out_destroy;
234 }
235 }
236
237 for (i = 0; i < run_args->region_count; i++) {
238 zpios_region_t *region;
239
240 region = &run_args->regions[i];
241 mutex_init(&region->lock, NULL, MUTEX_DEFAULT, NULL);
242
243 if (run_args->flags & DMU_FPP) {
244 /* File per process */
245 region->obj.os = os;
246 region->obj.obj = zpios_dmu_object_create(run_args, os);
247 ASSERT(region->obj.obj > 0); /* XXX - Handle this */
248 region->wr_offset = run_args->offset;
249 region->rd_offset = run_args->offset;
250 region->init_offset = run_args->offset;
251 region->max_offset = run_args->offset +
252 run_args->region_size;
253 } else {
254 /* Single shared file */
255 region->obj.os = os;
256 region->obj.obj = obj;
257 region->wr_offset = run_args->offset * i;
258 region->rd_offset = run_args->offset * i;
259 region->init_offset = run_args->offset * i;
260 region->max_offset = run_args->offset *
261 i + run_args->region_size;
262 }
263 }
264
265 run_args->os = os;
266 out_destroy:
267 if (rc) {
268 rc2 = dsl_destroy_head(name);
269 if (rc2)
270 zpios_print(run_args->file, "Error dsl_destroy_head"
271 "(%s, ...) failed: %d\n", name, rc2);
272 }
273 out:
274 t->stop = zpios_timespec_now();
275 t->delta = zpios_timespec_sub(t->stop, t->start);
276 (void) zpios_upcall(run_args->post, PHASE_POST_CREATE, run_args, rc);
277
278 return (rc);
279 }
280
281 static int
282 zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file)
283 {
284 run_args_t *ra;
285 int rc, size;
286
287 size = sizeof (*ra) + kcmd->cmd_region_count * sizeof (zpios_region_t);
288
289 ra = vmem_zalloc(size, KM_SLEEP);
290
291 *run_args = ra;
292 snprintf(ra->pool, sizeof (ra->pool), "%s", kcmd->cmd_pool);
293 snprintf(ra->pre, sizeof (ra->pre), "%s", kcmd->cmd_pre);
294 snprintf(ra->post, sizeof (ra->post), "%s", kcmd->cmd_post);
295 snprintf(ra->log, sizeof (ra->log), "%s", kcmd->cmd_log);
296
297 ra->id = kcmd->cmd_id;
298 ra->chunk_size = kcmd->cmd_chunk_size;
299 ra->thread_count = kcmd->cmd_thread_count;
300 ra->region_count = kcmd->cmd_region_count;
301 ra->region_size = kcmd->cmd_region_size;
302 ra->offset = kcmd->cmd_offset;
303 ra->region_noise = kcmd->cmd_region_noise;
304 ra->chunk_noise = kcmd->cmd_chunk_noise;
305 ra->thread_delay = kcmd->cmd_thread_delay;
306 ra->flags = kcmd->cmd_flags;
307 ra->block_size = kcmd->cmd_block_size;
308 ra->stats.wr_data = 0;
309 ra->stats.wr_chunks = 0;
310 ra->stats.rd_data = 0;
311 ra->stats.rd_chunks = 0;
312 ra->region_next = 0;
313 ra->file = file;
314 mutex_init(&ra->lock_work, NULL, MUTEX_DEFAULT, NULL);
315 mutex_init(&ra->lock_ctl, NULL, MUTEX_DEFAULT, NULL);
316
317 (void) zpios_upcall(ra->pre, PHASE_PRE_RUN, ra, 0);
318
319 rc = zpios_dmu_setup(ra);
320 if (rc) {
321 mutex_destroy(&ra->lock_ctl);
322 mutex_destroy(&ra->lock_work);
323 vmem_free(ra, size);
324 *run_args = NULL;
325 }
326
327 return (rc);
328 }
329
330 static int
331 zpios_get_work_item(run_args_t *run_args, dmu_obj_t *obj, __u64 *offset,
332 __u32 *chunk_size, zpios_region_t **region, __u32 flags)
333 {
334 int i, j, count = 0;
335 unsigned int random_int;
336
337 get_random_bytes(&random_int, sizeof (unsigned int));
338
339 mutex_enter(&run_args->lock_work);
340 i = run_args->region_next;
341
342 /*
343 * XXX: I don't much care for this chunk selection mechansim
344 * there's the potential to burn a lot of time here doing nothing
345 * useful while holding the global lock. This could give some
346 * misleading performance results. I'll fix it latter.
347 */
348 while (count < run_args->region_count) {
349 __u64 *rw_offset;
350 zpios_time_t *rw_time;
351
352 j = i % run_args->region_count;
353 *region = &(run_args->regions[j]);
354
355 if (flags & DMU_WRITE) {
356 rw_offset = &((*region)->wr_offset);
357 rw_time = &((*region)->stats.wr_time);
358 } else {
359 rw_offset = &((*region)->rd_offset);
360 rw_time = &((*region)->stats.rd_time);
361 }
362
363 /* test if region is fully written */
364 if (*rw_offset + *chunk_size > (*region)->max_offset) {
365 i++;
366 count++;
367
368 if (unlikely(rw_time->stop.ts_sec == 0) &&
369 unlikely(rw_time->stop.ts_nsec == 0))
370 rw_time->stop = zpios_timespec_now();
371
372 continue;
373 }
374
375 *offset = *rw_offset;
376 *obj = (*region)->obj;
377 *rw_offset += *chunk_size;
378
379 /* update ctl structure */
380 if (run_args->region_noise) {
381 get_random_bytes(&random_int, sizeof (unsigned int));
382 run_args->region_next +=
383 random_int % run_args->region_noise;
384 } else {
385 run_args->region_next++;
386 }
387
388 mutex_exit(&run_args->lock_work);
389 return (1);
390 }
391
392 /* nothing left to do */
393 mutex_exit(&run_args->lock_work);
394
395 return (0);
396 }
397
398 static void
399 zpios_remove_objset(run_args_t *run_args)
400 {
401 zpios_time_t *t = &(run_args->stats.rm_time);
402 zpios_region_t *region;
403 char name[32];
404 int rc = 0, i;
405
406 (void) zpios_upcall(run_args->pre, PHASE_PRE_REMOVE, run_args, 0);
407 t->start = zpios_timespec_now();
408
409 (void) snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
410
411 if (run_args->flags & DMU_REMOVE) {
412 if (run_args->flags & DMU_FPP) {
413 for (i = 0; i < run_args->region_count; i++) {
414 region = &run_args->regions[i];
415 rc = zpios_dmu_object_free(run_args,
416 region->obj.os, region->obj.obj);
417 if (rc)
418 zpios_print(run_args->file,
419 "Error removing object %d, %d\n",
420 (int)region->obj.obj, rc);
421 }
422 } else {
423 region = &run_args->regions[0];
424 rc = zpios_dmu_object_free(run_args,
425 region->obj.os, region->obj.obj);
426 if (rc)
427 zpios_print(run_args->file,
428 "Error removing object %d, %d\n",
429 (int)region->obj.obj, rc);
430 }
431 }
432
433 dmu_objset_disown(run_args->os, zpios_tag);
434
435 if (run_args->flags & DMU_REMOVE) {
436 rc = dsl_destroy_head(name);
437 if (rc)
438 zpios_print(run_args->file, "Error dsl_destroy_head"
439 "(%s, ...) failed: %d\n", name, rc);
440 }
441
442 t->stop = zpios_timespec_now();
443 t->delta = zpios_timespec_sub(t->stop, t->start);
444 (void) zpios_upcall(run_args->post, PHASE_POST_REMOVE, run_args, rc);
445 }
446
447 static void
448 zpios_cleanup_run(run_args_t *run_args)
449 {
450 int i, size = 0;
451
452 if (run_args == NULL)
453 return;
454
455 if (run_args->threads != NULL) {
456 for (i = 0; i < run_args->thread_count; i++) {
457 if (run_args->threads[i]) {
458 mutex_destroy(&run_args->threads[i]->lock);
459 kmem_free(run_args->threads[i],
460 sizeof (thread_data_t));
461 }
462 }
463
464 kmem_free(run_args->threads,
465 sizeof (thread_data_t *) * run_args->thread_count);
466 }
467
468 for (i = 0; i < run_args->region_count; i++)
469 mutex_destroy(&run_args->regions[i].lock);
470
471 mutex_destroy(&run_args->lock_work);
472 mutex_destroy(&run_args->lock_ctl);
473 size = run_args->region_count * sizeof (zpios_region_t);
474
475 vmem_free(run_args, sizeof (*run_args) + size);
476 }
477
478 static int
479 zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object,
480 uint64_t offset, uint64_t size, const void *buf)
481 {
482 struct dmu_tx *tx;
483 int rc, how = TXG_WAIT;
484 // int flags = 0;
485
486 if (run_args->flags & DMU_WRITE_NOWAIT)
487 how = TXG_NOWAIT;
488
489 while (1) {
490 tx = dmu_tx_create(os);
491 dmu_tx_hold_write(tx, object, offset, size);
492 rc = dmu_tx_assign(tx, how);
493
494 if (rc) {
495 if (rc == ERESTART && how == TXG_NOWAIT) {
496 dmu_tx_wait(tx);
497 dmu_tx_abort(tx);
498 continue;
499 }
500 zpios_print(run_args->file,
501 "Error in dmu_tx_assign(), %d", rc);
502 dmu_tx_abort(tx);
503 return (rc);
504 }
505 break;
506 }
507
508 // if (run_args->flags & DMU_WRITE_ZC)
509 // flags |= DMU_WRITE_ZEROCOPY;
510
511 dmu_write(os, object, offset, size, buf, tx);
512 dmu_tx_commit(tx);
513
514 return (0);
515 }
516
517 static int
518 zpios_dmu_read(run_args_t *run_args, objset_t *os, uint64_t object,
519 uint64_t offset, uint64_t size, void *buf)
520 {
521 int flags = 0;
522
523 // if (run_args->flags & DMU_READ_ZC)
524 // flags |= DMU_READ_ZEROCOPY;
525
526 if (run_args->flags & DMU_READ_NOPF)
527 flags |= DMU_READ_NO_PREFETCH;
528
529 return (dmu_read(os, object, offset, size, buf, flags));
530 }
531
532 static int
533 zpios_thread_main(void *data)
534 {
535 thread_data_t *thr = (thread_data_t *)data;
536 run_args_t *run_args = thr->run_args;
537 zpios_time_t t;
538 dmu_obj_t obj;
539 __u64 offset;
540 __u32 chunk_size;
541 zpios_region_t *region;
542 char *buf;
543 unsigned int random_int;
544 int chunk_noise = run_args->chunk_noise;
545 int chunk_noise_tmp = 0;
546 int thread_delay = run_args->thread_delay;
547 int thread_delay_tmp = 0;
548 int i, rc = 0;
549
550 if (chunk_noise) {
551 get_random_bytes(&random_int, sizeof (unsigned int));
552 chunk_noise_tmp = (random_int % (chunk_noise * 2))-chunk_noise;
553 }
554
555 /*
556 * It's OK to vmem_alloc() this memory because it will be copied
557 * in to the slab and pointers to the slab copy will be setup in
558 * the bio when the IO is submitted. This of course is not ideal
559 * since we want a zero-copy IO path if possible. It would be nice
560 * to have direct access to those slab entries.
561 */
562 chunk_size = run_args->chunk_size + chunk_noise_tmp;
563 buf = (char *)vmem_alloc(chunk_size, KM_SLEEP);
564 ASSERT(buf);
565
566 /* Trivial data verification pattern for now. */
567 if (run_args->flags & DMU_VERIFY)
568 memset(buf, 'z', chunk_size);
569
570 /* Write phase */
571 mutex_enter(&thr->lock);
572 thr->stats.wr_time.start = zpios_timespec_now();
573 mutex_exit(&thr->lock);
574
575 while (zpios_get_work_item(run_args, &obj, &offset,
576 &chunk_size, &region, DMU_WRITE)) {
577 if (thread_delay) {
578 get_random_bytes(&random_int, sizeof (unsigned int));
579 thread_delay_tmp = random_int % thread_delay;
580 set_current_state(TASK_UNINTERRUPTIBLE);
581 schedule_timeout(thread_delay_tmp); /* In jiffies */
582 }
583
584 t.start = zpios_timespec_now();
585 rc = zpios_dmu_write(run_args, obj.os, obj.obj,
586 offset, chunk_size, buf);
587 t.stop = zpios_timespec_now();
588 t.delta = zpios_timespec_sub(t.stop, t.start);
589
590 if (rc) {
591 zpios_print(run_args->file, "IO error while doing "
592 "dmu_write(): %d\n", rc);
593 break;
594 }
595
596 mutex_enter(&thr->lock);
597 thr->stats.wr_data += chunk_size;
598 thr->stats.wr_chunks++;
599 thr->stats.wr_time.delta = zpios_timespec_add(
600 thr->stats.wr_time.delta, t.delta);
601 mutex_exit(&thr->lock);
602
603 mutex_enter(&region->lock);
604 region->stats.wr_data += chunk_size;
605 region->stats.wr_chunks++;
606 region->stats.wr_time.delta = zpios_timespec_add(
607 region->stats.wr_time.delta, t.delta);
608
609 /* First time region was accessed */
610 if (region->init_offset == offset)
611 region->stats.wr_time.start = t.start;
612
613 mutex_exit(&region->lock);
614 }
615
616 mutex_enter(&run_args->lock_ctl);
617 run_args->threads_done++;
618 mutex_exit(&run_args->lock_ctl);
619
620 mutex_enter(&thr->lock);
621 thr->rc = rc;
622 thr->stats.wr_time.stop = zpios_timespec_now();
623 mutex_exit(&thr->lock);
624 wake_up(&run_args->waitq);
625
626 set_current_state(TASK_UNINTERRUPTIBLE);
627 schedule();
628
629 /* Check if we should exit */
630 mutex_enter(&thr->lock);
631 rc = thr->rc;
632 mutex_exit(&thr->lock);
633 if (rc)
634 goto out;
635
636 /* Read phase */
637 mutex_enter(&thr->lock);
638 thr->stats.rd_time.start = zpios_timespec_now();
639 mutex_exit(&thr->lock);
640
641 while (zpios_get_work_item(run_args, &obj, &offset,
642 &chunk_size, &region, DMU_READ)) {
643 if (thread_delay) {
644 get_random_bytes(&random_int, sizeof (unsigned int));
645 thread_delay_tmp = random_int % thread_delay;
646 set_current_state(TASK_UNINTERRUPTIBLE);
647 schedule_timeout(thread_delay_tmp); /* In jiffies */
648 }
649
650 if (run_args->flags & DMU_VERIFY)
651 memset(buf, 0, chunk_size);
652
653 t.start = zpios_timespec_now();
654 rc = zpios_dmu_read(run_args, obj.os, obj.obj,
655 offset, chunk_size, buf);
656 t.stop = zpios_timespec_now();
657 t.delta = zpios_timespec_sub(t.stop, t.start);
658
659 if (rc) {
660 zpios_print(run_args->file, "IO error while doing "
661 "dmu_read(): %d\n", rc);
662 break;
663 }
664
665 /* Trivial data verification, expensive! */
666 if (run_args->flags & DMU_VERIFY) {
667 for (i = 0; i < chunk_size; i++) {
668 if (buf[i] != 'z') {
669 zpios_print(run_args->file,
670 "IO verify error: %d/%d/%d\n",
671 (int)obj.obj, (int)offset,
672 (int)chunk_size);
673 break;
674 }
675 }
676 }
677
678 mutex_enter(&thr->lock);
679 thr->stats.rd_data += chunk_size;
680 thr->stats.rd_chunks++;
681 thr->stats.rd_time.delta = zpios_timespec_add(
682 thr->stats.rd_time.delta, t.delta);
683 mutex_exit(&thr->lock);
684
685 mutex_enter(&region->lock);
686 region->stats.rd_data += chunk_size;
687 region->stats.rd_chunks++;
688 region->stats.rd_time.delta = zpios_timespec_add(
689 region->stats.rd_time.delta, t.delta);
690
691 /* First time region was accessed */
692 if (region->init_offset == offset)
693 region->stats.rd_time.start = t.start;
694
695 mutex_exit(&region->lock);
696 }
697
698 mutex_enter(&run_args->lock_ctl);
699 run_args->threads_done++;
700 mutex_exit(&run_args->lock_ctl);
701
702 mutex_enter(&thr->lock);
703 thr->rc = rc;
704 thr->stats.rd_time.stop = zpios_timespec_now();
705 mutex_exit(&thr->lock);
706 wake_up(&run_args->waitq);
707
708 out:
709 vmem_free(buf, chunk_size);
710 do_exit(0);
711
712 return (rc); /* Unreachable, due to do_exit() */
713 }
714
715 static int
716 zpios_thread_done(run_args_t *run_args)
717 {
718 ASSERT(run_args->threads_done <= run_args->thread_count);
719 return (run_args->threads_done == run_args->thread_count);
720 }
721
722 static int
723 zpios_threads_run(run_args_t *run_args)
724 {
725 struct task_struct *tsk, **tsks;
726 thread_data_t *thr = NULL;
727 zpios_time_t *tt = &(run_args->stats.total_time);
728 zpios_time_t *tw = &(run_args->stats.wr_time);
729 zpios_time_t *tr = &(run_args->stats.rd_time);
730 int i, rc = 0, tc = run_args->thread_count;
731
732 tsks = kmem_zalloc(sizeof (struct task_struct *) * tc, KM_SLEEP);
733
734 run_args->threads = kmem_zalloc(sizeof (thread_data_t *)*tc, KM_SLEEP);
735
736 init_waitqueue_head(&run_args->waitq);
737 run_args->threads_done = 0;
738
739 /* Create all the needed threads which will sleep until awoken */
740 for (i = 0; i < tc; i++) {
741 thr = kmem_zalloc(sizeof (thread_data_t), KM_SLEEP);
742
743 thr->thread_no = i;
744 thr->run_args = run_args;
745 thr->rc = 0;
746 mutex_init(&thr->lock, NULL, MUTEX_DEFAULT, NULL);
747 run_args->threads[i] = thr;
748
749 tsk = kthread_create(zpios_thread_main, (void *)thr,
750 "%s/%d", "zpios_io", i);
751 if (IS_ERR(tsk)) {
752 rc = -EINVAL;
753 goto taskerr;
754 }
755
756 tsks[i] = tsk;
757 }
758
759 tt->start = zpios_timespec_now();
760
761 /* Wake up all threads for write phase */
762 (void) zpios_upcall(run_args->pre, PHASE_PRE_WRITE, run_args, 0);
763 for (i = 0; i < tc; i++)
764 wake_up_process(tsks[i]);
765
766 /* Wait for write phase to complete */
767 tw->start = zpios_timespec_now();
768 wait_event(run_args->waitq, zpios_thread_done(run_args));
769 tw->stop = zpios_timespec_now();
770 (void) zpios_upcall(run_args->post, PHASE_POST_WRITE, run_args, rc);
771
772 for (i = 0; i < tc; i++) {
773 thr = run_args->threads[i];
774
775 mutex_enter(&thr->lock);
776
777 if (!rc && thr->rc)
778 rc = thr->rc;
779
780 run_args->stats.wr_data += thr->stats.wr_data;
781 run_args->stats.wr_chunks += thr->stats.wr_chunks;
782 mutex_exit(&thr->lock);
783 }
784
785 if (rc) {
786 /* Wake up all threads and tell them to exit */
787 for (i = 0; i < tc; i++) {
788 mutex_enter(&thr->lock);
789 thr->rc = rc;
790 mutex_exit(&thr->lock);
791
792 wake_up_process(tsks[i]);
793 }
794 goto out;
795 }
796
797 mutex_enter(&run_args->lock_ctl);
798 ASSERT(run_args->threads_done == run_args->thread_count);
799 run_args->threads_done = 0;
800 mutex_exit(&run_args->lock_ctl);
801
802 /* Wake up all threads for read phase */
803 (void) zpios_upcall(run_args->pre, PHASE_PRE_READ, run_args, 0);
804 for (i = 0; i < tc; i++)
805 wake_up_process(tsks[i]);
806
807 /* Wait for read phase to complete */
808 tr->start = zpios_timespec_now();
809 wait_event(run_args->waitq, zpios_thread_done(run_args));
810 tr->stop = zpios_timespec_now();
811 (void) zpios_upcall(run_args->post, PHASE_POST_READ, run_args, rc);
812
813 for (i = 0; i < tc; i++) {
814 thr = run_args->threads[i];
815
816 mutex_enter(&thr->lock);
817
818 if (!rc && thr->rc)
819 rc = thr->rc;
820
821 run_args->stats.rd_data += thr->stats.rd_data;
822 run_args->stats.rd_chunks += thr->stats.rd_chunks;
823 mutex_exit(&thr->lock);
824 }
825 out:
826 tt->stop = zpios_timespec_now();
827 tt->delta = zpios_timespec_sub(tt->stop, tt->start);
828 tw->delta = zpios_timespec_sub(tw->stop, tw->start);
829 tr->delta = zpios_timespec_sub(tr->stop, tr->start);
830
831 cleanup:
832 kmem_free(tsks, sizeof (struct task_struct *) * tc);
833 return (rc);
834
835 taskerr:
836 /* Destroy all threads that were created successfully */
837 for (i = 0; i < tc; i++)
838 if (tsks[i] != NULL)
839 (void) kthread_stop(tsks[i]);
840
841 goto cleanup;
842 }
843
844 static int
845 zpios_do_one_run(struct file *file, zpios_cmd_t *kcmd,
846 int data_size, void *data)
847 {
848 run_args_t *run_args = { 0 };
849 zpios_stats_t *stats = (zpios_stats_t *)data;
850 int i, n, m, size, rc;
851
852 if ((!kcmd->cmd_chunk_size) || (!kcmd->cmd_region_size) ||
853 (!kcmd->cmd_thread_count) || (!kcmd->cmd_region_count)) {
854 zpios_print(file, "Invalid chunk_size, region_size, "
855 "thread_count, or region_count, %d\n", -EINVAL);
856 return (-EINVAL);
857 }
858
859 if (!(kcmd->cmd_flags & DMU_WRITE) ||
860 !(kcmd->cmd_flags & DMU_READ)) {
861 zpios_print(file, "Invalid flags, minimally DMU_WRITE "
862 "and DMU_READ must be set, %d\n", -EINVAL);
863 return (-EINVAL);
864 }
865
866 if ((kcmd->cmd_flags & (DMU_WRITE_ZC | DMU_READ_ZC)) &&
867 (kcmd->cmd_flags & DMU_VERIFY)) {
868 zpios_print(file, "Invalid flags, DMU_*_ZC incompatible "
869 "with DMU_VERIFY, used for performance analysis "
870 "only, %d\n", -EINVAL);
871 return (-EINVAL);
872 }
873
874 /*
875 * Opaque data on return contains structs of the following form:
876 *
877 * zpios_stat_t stats[];
878 * stats[0] = run_args->stats;
879 * stats[1-N] = threads[N]->stats;
880 * stats[N+1-M] = regions[M]->stats;
881 *
882 * Where N is the number of threads, and M is the number of regions.
883 */
884 size = (sizeof (zpios_stats_t) +
885 (kcmd->cmd_thread_count * sizeof (zpios_stats_t)) +
886 (kcmd->cmd_region_count * sizeof (zpios_stats_t)));
887 if (data_size < size) {
888 zpios_print(file, "Invalid size, command data buffer "
889 "size too small, (%d < %d)\n", data_size, size);
890 return (-ENOSPC);
891 }
892
893 rc = zpios_setup_run(&run_args, kcmd, file);
894 if (rc)
895 return (rc);
896
897 rc = zpios_threads_run(run_args);
898 zpios_remove_objset(run_args);
899 if (rc)
900 goto cleanup;
901
902 if (stats) {
903 n = 1;
904 m = 1 + kcmd->cmd_thread_count;
905 stats[0] = run_args->stats;
906
907 for (i = 0; i < kcmd->cmd_thread_count; i++)
908 stats[n+i] = run_args->threads[i]->stats;
909
910 for (i = 0; i < kcmd->cmd_region_count; i++)
911 stats[m+i] = run_args->regions[i].stats;
912 }
913
914 cleanup:
915 zpios_cleanup_run(run_args);
916
917 (void) zpios_upcall(kcmd->cmd_post, PHASE_POST_RUN, run_args, 0);
918
919 return (rc);
920 }
921
922 static int
923 zpios_open(struct inode *inode, struct file *file)
924 {
925 zpios_info_t *info;
926
927 info = (zpios_info_t *)kmem_alloc(sizeof (*info), KM_SLEEP);
928
929 spin_lock_init(&info->info_lock);
930 info->info_size = ZPIOS_INFO_BUFFER_SIZE;
931 info->info_buffer =
932 (char *)vmem_alloc(ZPIOS_INFO_BUFFER_SIZE, KM_SLEEP);
933
934 info->info_head = info->info_buffer;
935 file->private_data = (void *)info;
936
937 return (0);
938 }
939
940 static int
941 zpios_release(struct inode *inode, struct file *file)
942 {
943 zpios_info_t *info = (zpios_info_t *)file->private_data;
944
945 ASSERT(info);
946 ASSERT(info->info_buffer);
947
948 vmem_free(info->info_buffer, ZPIOS_INFO_BUFFER_SIZE);
949 kmem_free(info, sizeof (*info));
950
951 return (0);
952 }
953
954 static int
955 zpios_buffer_clear(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
956 {
957 zpios_info_t *info = (zpios_info_t *)file->private_data;
958
959 ASSERT(info);
960 ASSERT(info->info_buffer);
961
962 spin_lock(&info->info_lock);
963 memset(info->info_buffer, 0, info->info_size);
964 info->info_head = info->info_buffer;
965 spin_unlock(&info->info_lock);
966
967 return (0);
968 }
969
970 static int
971 zpios_buffer_size(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
972 {
973 zpios_info_t *info = (zpios_info_t *)file->private_data;
974 char *buf;
975 int min, size, rc = 0;
976
977 ASSERT(info);
978 ASSERT(info->info_buffer);
979
980 spin_lock(&info->info_lock);
981 if (kcfg->cfg_arg1 > 0) {
982
983 size = kcfg->cfg_arg1;
984 buf = (char *)vmem_alloc(size, KM_SLEEP);
985
986 /* Zero fill and truncate contents when coping buffer */
987 min = ((size < info->info_size) ? size : info->info_size);
988 memset(buf, 0, size);
989 memcpy(buf, info->info_buffer, min);
990 vmem_free(info->info_buffer, info->info_size);
991 info->info_size = size;
992 info->info_buffer = buf;
993 info->info_head = info->info_buffer;
994 }
995
996 kcfg->cfg_rc1 = info->info_size;
997
998 if (copy_to_user((struct zpios_cfg_t __user *)arg,
999 kcfg, sizeof (*kcfg)))
1000 rc = -EFAULT;
1001
1002 spin_unlock(&info->info_lock);
1003
1004 return (rc);
1005 }
1006
1007 static int
1008 zpios_ioctl_cfg(struct file *file, unsigned long arg)
1009 {
1010 zpios_cfg_t kcfg;
1011 int rc = 0;
1012
1013 if (copy_from_user(&kcfg, (zpios_cfg_t *)arg, sizeof (kcfg)))
1014 return (-EFAULT);
1015
1016 if (kcfg.cfg_magic != ZPIOS_CFG_MAGIC) {
1017 zpios_print(file, "Bad config magic 0x%x != 0x%x\n",
1018 kcfg.cfg_magic, ZPIOS_CFG_MAGIC);
1019 return (-EINVAL);
1020 }
1021
1022 switch (kcfg.cfg_cmd) {
1023 case ZPIOS_CFG_BUFFER_CLEAR:
1024 /*
1025 * cfg_arg1 - Unused
1026 * cfg_rc1 - Unused
1027 */
1028 rc = zpios_buffer_clear(file, &kcfg, arg);
1029 break;
1030 case ZPIOS_CFG_BUFFER_SIZE:
1031 /*
1032 * cfg_arg1 - 0 - query size; >0 resize
1033 * cfg_rc1 - Set to current buffer size
1034 */
1035 rc = zpios_buffer_size(file, &kcfg, arg);
1036 break;
1037 default:
1038 zpios_print(file, "Bad config command %d\n",
1039 kcfg.cfg_cmd);
1040 rc = -EINVAL;
1041 break;
1042 }
1043
1044 return (rc);
1045 }
1046
1047 static int
1048 zpios_ioctl_cmd(struct file *file, unsigned long arg)
1049 {
1050 zpios_cmd_t *kcmd;
1051 void *data = NULL;
1052 int rc = -EINVAL;
1053
1054 kcmd = kmem_alloc(sizeof (zpios_cmd_t), KM_SLEEP);
1055
1056 rc = copy_from_user(kcmd, (zpios_cfg_t *)arg, sizeof (zpios_cmd_t));
1057 if (rc) {
1058 zpios_print(file, "Unable to copy command structure "
1059 "from user to kernel memory, %d\n", rc);
1060 goto out_cmd;
1061 }
1062
1063 if (kcmd->cmd_magic != ZPIOS_CMD_MAGIC) {
1064 zpios_print(file, "Bad command magic 0x%x != 0x%x\n",
1065 kcmd->cmd_magic, ZPIOS_CFG_MAGIC);
1066 rc = (-EINVAL);
1067 goto out_cmd;
1068 }
1069
1070 /* Allocate memory for any opaque data the caller needed to pass on */
1071 if (kcmd->cmd_data_size > 0) {
1072 data = (void *)vmem_alloc(kcmd->cmd_data_size, KM_SLEEP);
1073
1074 rc = copy_from_user(data, (void *)(arg + offsetof(zpios_cmd_t,
1075 cmd_data_str)), kcmd->cmd_data_size);
1076 if (rc) {
1077 zpios_print(file, "Unable to copy data buffer "
1078 "from user to kernel memory, %d\n", rc);
1079 goto out_data;
1080 }
1081 }
1082
1083 rc = zpios_do_one_run(file, kcmd, kcmd->cmd_data_size, data);
1084
1085 if (data != NULL) {
1086 /* If the test failed do not print out the stats */
1087 if (rc)
1088 goto out_data;
1089
1090 rc = copy_to_user((void *)(arg + offsetof(zpios_cmd_t,
1091 cmd_data_str)), data, kcmd->cmd_data_size);
1092 if (rc) {
1093 zpios_print(file, "Unable to copy data buffer "
1094 "from kernel to user memory, %d\n", rc);
1095 rc = -EFAULT;
1096 }
1097
1098 out_data:
1099 vmem_free(data, kcmd->cmd_data_size);
1100 }
1101 out_cmd:
1102 kmem_free(kcmd, sizeof (zpios_cmd_t));
1103
1104 return (rc);
1105 }
1106
1107 static long
1108 zpios_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1109 {
1110 int rc = 0;
1111
1112 /* Ignore tty ioctls */
1113 if ((cmd & 0xffffff00) == ((int)'T') << 8)
1114 return (-ENOTTY);
1115
1116 switch (cmd) {
1117 case ZPIOS_CFG:
1118 rc = zpios_ioctl_cfg(file, arg);
1119 break;
1120 case ZPIOS_CMD:
1121 rc = zpios_ioctl_cmd(file, arg);
1122 break;
1123 default:
1124 zpios_print(file, "Bad ioctl command %d\n", cmd);
1125 rc = -EINVAL;
1126 break;
1127 }
1128
1129 return (rc);
1130 }
1131
1132 #ifdef CONFIG_COMPAT
1133 /* Compatibility handler for ioctls from 32-bit ELF binaries */
1134 static long
1135 zpios_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1136 {
1137 return (zpios_unlocked_ioctl(file, cmd, arg));
1138 }
1139 #endif /* CONFIG_COMPAT */
1140
1141 /*
1142 * I'm not sure why you would want to write in to this buffer from
1143 * user space since its principle use is to pass test status info
1144 * back to the user space, but I don't see any reason to prevent it.
1145 */
1146 static ssize_t
1147 zpios_write(struct file *file, const char __user *buf,
1148 size_t count, loff_t *ppos)
1149 {
1150 zpios_info_t *info = (zpios_info_t *)file->private_data;
1151 int rc = 0;
1152
1153 ASSERT(info);
1154 ASSERT(info->info_buffer);
1155
1156 spin_lock(&info->info_lock);
1157
1158 /* Write beyond EOF */
1159 if (*ppos >= info->info_size) {
1160 rc = -EFBIG;
1161 goto out;
1162 }
1163
1164 /* Resize count if beyond EOF */
1165 if (*ppos + count > info->info_size)
1166 count = info->info_size - *ppos;
1167
1168 if (copy_from_user(info->info_buffer, buf, count)) {
1169 rc = -EFAULT;
1170 goto out;
1171 }
1172
1173 *ppos += count;
1174 rc = count;
1175 out:
1176 spin_unlock(&info->info_lock);
1177 return (rc);
1178 }
1179
1180 static ssize_t
1181 zpios_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
1182 {
1183 zpios_info_t *info = (zpios_info_t *)file->private_data;
1184 int rc = 0;
1185
1186 ASSERT(info);
1187 ASSERT(info->info_buffer);
1188
1189 spin_lock(&info->info_lock);
1190
1191 /* Read beyond EOF */
1192 if (*ppos >= info->info_size)
1193 goto out;
1194
1195 /* Resize count if beyond EOF */
1196 if (*ppos + count > info->info_size)
1197 count = info->info_size - *ppos;
1198
1199 if (copy_to_user(buf, info->info_buffer + *ppos, count)) {
1200 rc = -EFAULT;
1201 goto out;
1202 }
1203
1204 *ppos += count;
1205 rc = count;
1206 out:
1207 spin_unlock(&info->info_lock);
1208 return (rc);
1209 }
1210
1211 static loff_t zpios_seek(struct file *file, loff_t offset, int origin)
1212 {
1213 zpios_info_t *info = (zpios_info_t *)file->private_data;
1214 int rc = -EINVAL;
1215
1216 ASSERT(info);
1217 ASSERT(info->info_buffer);
1218
1219 spin_lock(&info->info_lock);
1220
1221 switch (origin) {
1222 case 0: /* SEEK_SET - No-op just do it */
1223 break;
1224 case 1: /* SEEK_CUR - Seek from current */
1225 offset = file->f_pos + offset;
1226 break;
1227 case 2: /* SEEK_END - Seek from end */
1228 offset = info->info_size + offset;
1229 break;
1230 }
1231
1232 if (offset >= 0) {
1233 file->f_pos = offset;
1234 file->f_version = 0;
1235 rc = offset;
1236 }
1237
1238 spin_unlock(&info->info_lock);
1239
1240 return (rc);
1241 }
1242
1243 static struct file_operations zpios_fops = {
1244 .owner = THIS_MODULE,
1245 .open = zpios_open,
1246 .release = zpios_release,
1247 .unlocked_ioctl = zpios_unlocked_ioctl,
1248 #ifdef CONFIG_COMPAT
1249 .compat_ioctl = zpios_compat_ioctl,
1250 #endif
1251 .read = zpios_read,
1252 .write = zpios_write,
1253 .llseek = zpios_seek,
1254 };
1255
1256 static struct miscdevice zpios_misc = {
1257 .minor = MISC_DYNAMIC_MINOR,
1258 .name = ZPIOS_NAME,
1259 .fops = &zpios_fops,
1260 };
1261
1262 #ifdef DEBUG
1263 #define ZFS_DEBUG_STR " (DEBUG mode)"
1264 #else
1265 #define ZFS_DEBUG_STR ""
1266 #endif
1267
1268 static int __init
1269 zpios_init(void)
1270 {
1271 int error;
1272
1273 error = misc_register(&zpios_misc);
1274 if (error) {
1275 printk(KERN_INFO "ZPIOS: misc_register() failed %d\n", error);
1276 } else {
1277 printk(KERN_INFO "ZPIOS: Loaded module v%s-%s%s\n",
1278 ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
1279 }
1280
1281 return (error);
1282 }
1283
1284 static void __exit
1285 zpios_fini(void)
1286 {
1287 misc_deregister(&zpios_misc);
1288
1289 printk(KERN_INFO "ZPIOS: Unloaded module v%s-%s%s\n",
1290 ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
1291 }
1292
1293 module_init(zpios_init);
1294 module_exit(zpios_fini);
1295
1296 MODULE_AUTHOR("LLNL / Sun");
1297 MODULE_DESCRIPTION("Kernel PIOS implementation");
1298 MODULE_LICENSE("GPL");
1299 MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);