]> git.proxmox.com Git - mirror_zfs.git/blame - module/zpios/pios.c
Illumos #3464
[mirror_zfs.git] / module / zpios / pios.c
CommitLineData
302ef151
BB
1/*****************************************************************************\
2 * ZPIOS is a heavily modified version of the original PIOS test code.
3 * It is designed to have the test code running in the Linux kernel
4 * against ZFS while still being flexibly controled from user space.
5 *
6 * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
7 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
9 * LLNL-CODE-403049
10 *
11 * Original PIOS Test Code
12 * Copyright (C) 2004 Cluster File Systems, Inc.
13 * Written by Peter Braam <braam@clusterfs.com>
14 * Atul Vidwansa <atul@clusterfs.com>
15 * Milind Dumbare <milind@clusterfs.com>
16 *
17 * This file is part of ZFS on Linux.
92db59ca 18 * For details, see <http://zfsonlinux.org/>.
302ef151
BB
19 *
20 * ZPIOS is free software; you can redistribute it and/or modify it
21 * under the terms of the GNU General Public License as published by the
22 * Free Software Foundation; either version 2 of the License, or (at your
23 * option) any later version.
24 *
25 * ZPIOS is distributed in the hope that it will be useful, but WITHOUT
26 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
27 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
28 * for more details.
29 *
30 * You should have received a copy of the GNU General Public License along
31 * with ZPIOS. If not, see <http://www.gnu.org/licenses/>.
32\*****************************************************************************/
33
34#include <sys/zfs_context.h>
35#include <sys/dmu.h>
36#include <sys/txg.h>
13fe0198 37#include <sys/dsl_destroy.h>
302ef151
BB
38#include <linux/cdev.h>
39#include "zpios-internal.h"
40
41
42static spl_class *zpios_class;
43static spl_device *zpios_device;
44static char *zpios_tag = "zpios_tag";
45
46static
47int zpios_upcall(char *path, char *phase, run_args_t *run_args, int rc)
48{
49 /* This is stack heavy but it should be OK since we are only
50 * making the upcall between tests when the stack is shallow.
51 */
52 char id[16], chunk_size[16], region_size[16], thread_count[16];
53 char region_count[16], offset[16], region_noise[16], chunk_noise[16];
54 char thread_delay[16], flags[16], result[8];
55 char *argv[16], *envp[4];
56
57 if ((path == NULL) || (strlen(path) == 0))
58 return -ENOENT;
59
60 snprintf(id, 15, "%d", run_args->id);
61 snprintf(chunk_size, 15, "%lu", (long unsigned)run_args->chunk_size);
62 snprintf(region_size, 15, "%lu",(long unsigned) run_args->region_size);
63 snprintf(thread_count, 15, "%u", run_args->thread_count);
64 snprintf(region_count, 15, "%u", run_args->region_count);
65 snprintf(offset, 15, "%lu", (long unsigned)run_args->offset);
66 snprintf(region_noise, 15, "%u", run_args->region_noise);
67 snprintf(chunk_noise, 15, "%u", run_args->chunk_noise);
68 snprintf(thread_delay, 15, "%u", run_args->thread_delay);
69 snprintf(flags, 15, "0x%x", run_args->flags);
70 snprintf(result, 7, "%d", rc);
71
72 /* Passing 15 args to registered pre/post upcall */
73 argv[0] = path;
74 argv[1] = phase;
75 argv[2] = strlen(run_args->log) ? run_args->log : "<none>";
76 argv[3] = id;
77 argv[4] = run_args->pool;
78 argv[5] = chunk_size;
79 argv[6] = region_size;
80 argv[7] = thread_count;
81 argv[8] = region_count;
82 argv[9] = offset;
83 argv[10] = region_noise;
84 argv[11] = chunk_noise;
85 argv[12] = thread_delay;
86 argv[13] = flags;
87 argv[14] = result;
88 argv[15] = NULL;
89
90 /* Passing environment for user space upcall */
91 envp[0] = "HOME=/";
92 envp[1] = "TERM=linux";
93 envp[2] = "PATH=/sbin:/usr/sbin:/bin:/usr/bin";
94 envp[3] = NULL;
95
761394b3 96 return call_usermodehelper(path, argv, envp, UMH_WAIT_PROC);
302ef151
BB
97}
98
99static uint64_t
100zpios_dmu_object_create(run_args_t *run_args, objset_t *os)
101{
102 struct dmu_tx *tx;
103 uint64_t obj = 0ULL;
104 int rc;
105
106 tx = dmu_tx_create(os);
107 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE);
108 rc = dmu_tx_assign(tx, TXG_WAIT);
109 if (rc) {
110 zpios_print(run_args->file,
111 "dmu_tx_assign() failed: %d\n", rc);
112 dmu_tx_abort(tx);
113 return obj;
114 }
115
116 obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
117 DMU_OT_NONE, 0, tx);
118 rc = dmu_object_set_blocksize(os, obj, 128ULL << 10, 0, tx);
119 if (rc) {
120 zpios_print(run_args->file,
121 "dmu_object_set_blocksize() failed: %d\n", rc);
122 dmu_tx_abort(tx);
123 return obj;
124 }
125
126 dmu_tx_commit(tx);
127
128 return obj;
129}
130
131static int
132zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj)
133{
134 struct dmu_tx *tx;
135 int rc;
136
137 tx = dmu_tx_create(os);
138 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
139 rc = dmu_tx_assign(tx, TXG_WAIT);
140 if (rc) {
141 zpios_print(run_args->file,
142 "dmu_tx_assign() failed: %d\n", rc);
143 dmu_tx_abort(tx);
144 return rc;
145 }
146
147 rc = dmu_object_free(os, obj, tx);
148 if (rc) {
149 zpios_print(run_args->file,
150 "dmu_object_free() failed: %d\n", rc);
151 dmu_tx_abort(tx);
152 return rc;
153 }
154
155 dmu_tx_commit(tx);
156
157 return 0;
158}
159
160static int
161zpios_dmu_setup(run_args_t *run_args)
162{
163 zpios_time_t *t = &(run_args->stats.cr_time);
164 objset_t *os;
165 char name[32];
166 uint64_t obj = 0ULL;
167 int i, rc = 0, rc2;
168
169 (void)zpios_upcall(run_args->pre, PHASE_PRE_CREATE, run_args, 0);
170 t->start = zpios_timespec_now();
171
172 (void)snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
173 rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL);
174 if (rc) {
175 zpios_print(run_args->file, "Error dmu_objset_create(%s, ...) "
176 "failed: %d\n", name, rc);
177 goto out;
178 }
179
180 rc = dmu_objset_own(name, DMU_OST_OTHER, 0, zpios_tag, &os);
181 if (rc) {
182 zpios_print(run_args->file, "Error dmu_objset_own(%s, ...) "
183 "failed: %d\n", name, rc);
184 goto out_destroy;
185 }
186
187 if (!(run_args->flags & DMU_FPP)) {
188 obj = zpios_dmu_object_create(run_args, os);
189 if (obj == 0) {
190 rc = -EBADF;
191 zpios_print(run_args->file, "Error zpios_dmu_"
192 "object_create() failed, %d\n", rc);
193 goto out_destroy;
194 }
195 }
196
197 for (i = 0; i < run_args->region_count; i++) {
198 zpios_region_t *region;
199
200 region = &run_args->regions[i];
201 mutex_init(&region->lock, NULL, MUTEX_DEFAULT, NULL);
202
203 if (run_args->flags & DMU_FPP) {
204 /* File per process */
205 region->obj.os = os;
206 region->obj.obj = zpios_dmu_object_create(run_args, os);
207 ASSERT(region->obj.obj > 0); /* XXX - Handle this */
208 region->wr_offset = run_args->offset;
209 region->rd_offset = run_args->offset;
210 region->init_offset = run_args->offset;
211 region->max_offset = run_args->offset +
212 run_args->region_size;
213 } else {
214 /* Single shared file */
215 region->obj.os = os;
216 region->obj.obj = obj;
217 region->wr_offset = run_args->offset * i;
218 region->rd_offset = run_args->offset * i;
219 region->init_offset = run_args->offset * i;
220 region->max_offset = run_args->offset *
221 i + run_args->region_size;
222 }
223 }
224
225 run_args->os = os;
226out_destroy:
227 if (rc) {
13fe0198 228 rc2 = dsl_destroy_head(name);
302ef151 229 if (rc2)
13fe0198 230 zpios_print(run_args->file, "Error dsl_destroy_head"
302ef151
BB
231 "(%s, ...) failed: %d\n", name, rc2);
232 }
233out:
234 t->stop = zpios_timespec_now();
235 t->delta = zpios_timespec_sub(t->stop, t->start);
236 (void)zpios_upcall(run_args->post, PHASE_POST_CREATE, run_args, rc);
237
238 return rc;
239}
240
241static int
242zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file)
243{
244 run_args_t *ra;
245 int rc, size;
246
247 size = sizeof(*ra) + kcmd->cmd_region_count * sizeof(zpios_region_t);
248
249 ra = vmem_zalloc(size, KM_SLEEP);
250 if (ra == NULL) {
251 zpios_print(file, "Unable to vmem_zalloc() %d bytes "
252 "for regions\n", size);
253 return -ENOMEM;
254 }
255
256 *run_args = ra;
257 strncpy(ra->pool, kcmd->cmd_pool, ZPIOS_NAME_SIZE - 1);
258 strncpy(ra->pre, kcmd->cmd_pre, ZPIOS_PATH_SIZE - 1);
259 strncpy(ra->post, kcmd->cmd_post, ZPIOS_PATH_SIZE - 1);
260 strncpy(ra->log, kcmd->cmd_log, ZPIOS_PATH_SIZE - 1);
261 ra->id = kcmd->cmd_id;
262 ra->chunk_size = kcmd->cmd_chunk_size;
263 ra->thread_count = kcmd->cmd_thread_count;
264 ra->region_count = kcmd->cmd_region_count;
265 ra->region_size = kcmd->cmd_region_size;
266 ra->offset = kcmd->cmd_offset;
267 ra->region_noise = kcmd->cmd_region_noise;
268 ra->chunk_noise = kcmd->cmd_chunk_noise;
269 ra->thread_delay = kcmd->cmd_thread_delay;
270 ra->flags = kcmd->cmd_flags;
271 ra->stats.wr_data = 0;
272 ra->stats.wr_chunks = 0;
273 ra->stats.rd_data = 0;
274 ra->stats.rd_chunks = 0;
275 ra->region_next = 0;
276 ra->file = file;
277 mutex_init(&ra->lock_work, NULL, MUTEX_DEFAULT, NULL);
278 mutex_init(&ra->lock_ctl, NULL, MUTEX_DEFAULT, NULL);
279
280 (void)zpios_upcall(ra->pre, PHASE_PRE_RUN, ra, 0);
281
282 rc = zpios_dmu_setup(ra);
283 if (rc) {
284 mutex_destroy(&ra->lock_ctl);
285 mutex_destroy(&ra->lock_work);
286 vmem_free(ra, size);
287 *run_args = NULL;
288 }
289
290 return rc;
291}
292
293static int
294zpios_get_work_item(run_args_t *run_args, dmu_obj_t *obj, __u64 *offset,
295 __u32 *chunk_size, zpios_region_t **region, __u32 flags)
296{
297 int i, j, count = 0;
298 unsigned int random_int;
299
300 get_random_bytes(&random_int, sizeof(unsigned int));
301
302 mutex_enter(&run_args->lock_work);
303 i = run_args->region_next;
304
305 /* XXX: I don't much care for this chunk selection mechansim
306 * there's the potential to burn a lot of time here doing nothing
307 * useful while holding the global lock. This could give some
308 * misleading performance results. I'll fix it latter.
309 */
310 while (count < run_args->region_count) {
311 __u64 *rw_offset;
312 zpios_time_t *rw_time;
313
314 j = i % run_args->region_count;
315 *region = &(run_args->regions[j]);
316
317 if (flags & DMU_WRITE) {
318 rw_offset = &((*region)->wr_offset);
319 rw_time = &((*region)->stats.wr_time);
320 } else {
321 rw_offset = &((*region)->rd_offset);
322 rw_time = &((*region)->stats.rd_time);
323 }
324
325 /* test if region is fully written */
326 if (*rw_offset + *chunk_size > (*region)->max_offset) {
327 i++;
328 count++;
329
330 if (unlikely(rw_time->stop.ts_sec == 0) &&
331 unlikely(rw_time->stop.ts_nsec == 0))
332 rw_time->stop = zpios_timespec_now();
333
334 continue;
335 }
336
337 *offset = *rw_offset;
338 *obj = (*region)->obj;
339 *rw_offset += *chunk_size;
340
341 /* update ctl structure */
342 if (run_args->region_noise) {
343 get_random_bytes(&random_int, sizeof(unsigned int));
344 run_args->region_next += random_int % run_args->region_noise;
345 } else {
346 run_args->region_next++;
347 }
348
349 mutex_exit(&run_args->lock_work);
350 return 1;
351 }
352
353 /* nothing left to do */
354 mutex_exit(&run_args->lock_work);
355
356 return 0;
357}
358
359static void
360zpios_remove_objset(run_args_t *run_args)
361{
362 zpios_time_t *t = &(run_args->stats.rm_time);
363 zpios_region_t *region;
364 char name[32];
365 int rc = 0, i;
366
367 (void)zpios_upcall(run_args->pre, PHASE_PRE_REMOVE, run_args, 0);
368 t->start = zpios_timespec_now();
369
370 (void)snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
371
372 if (run_args->flags & DMU_REMOVE) {
373 if (run_args->flags & DMU_FPP) {
374 for (i = 0; i < run_args->region_count; i++) {
375 region = &run_args->regions[i];
376 rc = zpios_dmu_object_free(run_args,
377 region->obj.os,
378 region->obj.obj);
379 if (rc)
380 zpios_print(run_args->file, "Error "
381 "removing object %d, %d\n",
382 (int)region->obj.obj, rc);
383 }
384 } else {
385 region = &run_args->regions[0];
386 rc = zpios_dmu_object_free(run_args,
387 region->obj.os,
388 region->obj.obj);
389 if (rc)
390 zpios_print(run_args->file, "Error "
391 "removing object %d, %d\n",
392 (int)region->obj.obj, rc);
393 }
394 }
395
396 dmu_objset_disown(run_args->os, zpios_tag);
397
398 if (run_args->flags & DMU_REMOVE) {
13fe0198 399 rc = dsl_destroy_head(name);
302ef151 400 if (rc)
13fe0198 401 zpios_print(run_args->file, "Error dsl_destroy_head"
302ef151
BB
402 "(%s, ...) failed: %d\n", name, rc);
403 }
404
405 t->stop = zpios_timespec_now();
406 t->delta = zpios_timespec_sub(t->stop, t->start);
407 (void)zpios_upcall(run_args->post, PHASE_POST_REMOVE, run_args, rc);
408}
409
410static void
411zpios_cleanup_run(run_args_t *run_args)
412{
413 int i, size = 0;
414
415 if (run_args == NULL)
416 return;
417
418 if (run_args->threads != NULL) {
419 for (i = 0; i < run_args->thread_count; i++) {
420 if (run_args->threads[i]) {
421 mutex_destroy(&run_args->threads[i]->lock);
422 kmem_free(run_args->threads[i],
423 sizeof(thread_data_t));
424 }
425 }
426
427 kmem_free(run_args->threads,
428 sizeof(thread_data_t *) * run_args->thread_count);
429 }
430
431 for (i = 0; i < run_args->region_count; i++)
432 mutex_destroy(&run_args->regions[i].lock);
433
434 mutex_destroy(&run_args->lock_work);
435 mutex_destroy(&run_args->lock_ctl);
436 size = run_args->region_count * sizeof(zpios_region_t);
437
438 vmem_free(run_args, sizeof(*run_args) + size);
439}
440
441static int
442zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object,
443 uint64_t offset, uint64_t size, const void *buf)
444{
445 struct dmu_tx *tx;
446 int rc, how = TXG_WAIT;
447// int flags = 0;
448
449 if (run_args->flags & DMU_WRITE_NOWAIT)
450 how = TXG_NOWAIT;
451
452 while (1) {
453 tx = dmu_tx_create(os);
454 dmu_tx_hold_write(tx, object, offset, size);
455 rc = dmu_tx_assign(tx, how);
456
457 if (rc) {
458 if (rc == ERESTART && how == TXG_NOWAIT) {
459 dmu_tx_wait(tx);
460 dmu_tx_abort(tx);
461 continue;
462 }
463 zpios_print(run_args->file,
464 "Error in dmu_tx_assign(), %d", rc);
465 dmu_tx_abort(tx);
466 return rc;
467 }
468 break;
469 }
470
471// if (run_args->flags & DMU_WRITE_ZC)
472// flags |= DMU_WRITE_ZEROCOPY;
473
474 dmu_write(os, object, offset, size, buf, tx);
475 dmu_tx_commit(tx);
476
477 return 0;
478}
479
480static int
481zpios_dmu_read(run_args_t *run_args, objset_t *os, uint64_t object,
482 uint64_t offset, uint64_t size, void *buf)
483{
484 int flags = 0;
485
486// if (run_args->flags & DMU_READ_ZC)
487// flags |= DMU_READ_ZEROCOPY;
488
489 if (run_args->flags & DMU_READ_NOPF)
490 flags |= DMU_READ_NO_PREFETCH;
491
492 return dmu_read(os, object, offset, size, buf, flags);
493}
494
495static int
496zpios_thread_main(void *data)
497{
498 thread_data_t *thr = (thread_data_t *)data;
499 run_args_t *run_args = thr->run_args;
500 zpios_time_t t;
501 dmu_obj_t obj;
502 __u64 offset;
503 __u32 chunk_size;
504 zpios_region_t *region;
505 char *buf;
506 unsigned int random_int;
507 int chunk_noise = run_args->chunk_noise;
508 int chunk_noise_tmp = 0;
509 int thread_delay = run_args->thread_delay;
510 int thread_delay_tmp = 0;
511 int i, rc = 0;
512
513 if (chunk_noise) {
514 get_random_bytes(&random_int, sizeof(unsigned int));
515 chunk_noise_tmp = (random_int % (chunk_noise * 2))-chunk_noise;
516 }
517
518 /* It's OK to vmem_alloc() this memory because it will be copied
519 * in to the slab and pointers to the slab copy will be setup in
520 * the bio when the IO is submitted. This of course is not ideal
521 * since we want a zero-copy IO path if possible. It would be nice
522 * to have direct access to those slab entries.
523 */
524 chunk_size = run_args->chunk_size + chunk_noise_tmp;
525 buf = (char *)vmem_alloc(chunk_size, KM_SLEEP);
526 ASSERT(buf);
527
528 /* Trivial data verification pattern for now. */
529 if (run_args->flags & DMU_VERIFY)
530 memset(buf, 'z', chunk_size);
531
532 /* Write phase */
533 mutex_enter(&thr->lock);
534 thr->stats.wr_time.start = zpios_timespec_now();
535 mutex_exit(&thr->lock);
536
537 while (zpios_get_work_item(run_args, &obj, &offset,
538 &chunk_size, &region, DMU_WRITE)) {
539 if (thread_delay) {
540 get_random_bytes(&random_int, sizeof(unsigned int));
541 thread_delay_tmp = random_int % thread_delay;
542 set_current_state(TASK_UNINTERRUPTIBLE);
543 schedule_timeout(thread_delay_tmp); /* In jiffies */
544 }
545
546 t.start = zpios_timespec_now();
547 rc = zpios_dmu_write(run_args, obj.os, obj.obj,
548 offset, chunk_size, buf);
549 t.stop = zpios_timespec_now();
550 t.delta = zpios_timespec_sub(t.stop, t.start);
551
552 if (rc) {
553 zpios_print(run_args->file, "IO error while doing "
554 "dmu_write(): %d\n", rc);
555 break;
556 }
557
558 mutex_enter(&thr->lock);
559 thr->stats.wr_data += chunk_size;
560 thr->stats.wr_chunks++;
561 thr->stats.wr_time.delta = zpios_timespec_add(
562 thr->stats.wr_time.delta, t.delta);
563 mutex_exit(&thr->lock);
564
565 mutex_enter(&region->lock);
566 region->stats.wr_data += chunk_size;
567 region->stats.wr_chunks++;
568 region->stats.wr_time.delta = zpios_timespec_add(
569 region->stats.wr_time.delta, t.delta);
570
571 /* First time region was accessed */
572 if (region->init_offset == offset)
573 region->stats.wr_time.start = t.start;
574
575 mutex_exit(&region->lock);
576 }
577
578 mutex_enter(&run_args->lock_ctl);
579 run_args->threads_done++;
580 mutex_exit(&run_args->lock_ctl);
581
582 mutex_enter(&thr->lock);
583 thr->rc = rc;
584 thr->stats.wr_time.stop = zpios_timespec_now();
585 mutex_exit(&thr->lock);
586 wake_up(&run_args->waitq);
587
588 set_current_state(TASK_UNINTERRUPTIBLE);
589 schedule();
590
591 /* Check if we should exit */
592 mutex_enter(&thr->lock);
593 rc = thr->rc;
594 mutex_exit(&thr->lock);
595 if (rc)
596 goto out;
597
598 /* Read phase */
599 mutex_enter(&thr->lock);
600 thr->stats.rd_time.start = zpios_timespec_now();
601 mutex_exit(&thr->lock);
602
603 while (zpios_get_work_item(run_args, &obj, &offset,
604 &chunk_size, &region, DMU_READ)) {
605 if (thread_delay) {
606 get_random_bytes(&random_int, sizeof(unsigned int));
607 thread_delay_tmp = random_int % thread_delay;
608 set_current_state(TASK_UNINTERRUPTIBLE);
609 schedule_timeout(thread_delay_tmp); /* In jiffies */
610 }
611
612 if (run_args->flags & DMU_VERIFY)
613 memset(buf, 0, chunk_size);
614
615 t.start = zpios_timespec_now();
616 rc = zpios_dmu_read(run_args, obj.os, obj.obj,
617 offset, chunk_size, buf);
618 t.stop = zpios_timespec_now();
619 t.delta = zpios_timespec_sub(t.stop, t.start);
620
621 if (rc) {
622 zpios_print(run_args->file, "IO error while doing "
623 "dmu_read(): %d\n", rc);
624 break;
625 }
626
627 /* Trivial data verification, expensive! */
628 if (run_args->flags & DMU_VERIFY) {
629 for (i = 0; i < chunk_size; i++) {
630 if (buf[i] != 'z') {
631 zpios_print(run_args->file,
632 "IO verify error: %d/%d/%d\n",
633 (int)obj.obj, (int)offset,
634 (int)chunk_size);
635 break;
636 }
637 }
638 }
639
640 mutex_enter(&thr->lock);
641 thr->stats.rd_data += chunk_size;
642 thr->stats.rd_chunks++;
643 thr->stats.rd_time.delta = zpios_timespec_add(
644 thr->stats.rd_time.delta, t.delta);
645 mutex_exit(&thr->lock);
646
647 mutex_enter(&region->lock);
648 region->stats.rd_data += chunk_size;
649 region->stats.rd_chunks++;
650 region->stats.rd_time.delta = zpios_timespec_add(
651 region->stats.rd_time.delta, t.delta);
652
653 /* First time region was accessed */
654 if (region->init_offset == offset)
655 region->stats.rd_time.start = t.start;
656
657 mutex_exit(&region->lock);
658 }
659
660 mutex_enter(&run_args->lock_ctl);
661 run_args->threads_done++;
662 mutex_exit(&run_args->lock_ctl);
663
664 mutex_enter(&thr->lock);
665 thr->rc = rc;
666 thr->stats.rd_time.stop = zpios_timespec_now();
667 mutex_exit(&thr->lock);
668 wake_up(&run_args->waitq);
669
670out:
671 vmem_free(buf, chunk_size);
672 do_exit(0);
673
674 return rc; /* Unreachable, due to do_exit() */
675}
676
677static int
678zpios_thread_done(run_args_t *run_args)
679{
680 ASSERT(run_args->threads_done <= run_args->thread_count);
681 return (run_args->threads_done == run_args->thread_count);
682}
683
684static int
685zpios_threads_run(run_args_t *run_args)
686{
687 struct task_struct *tsk, **tsks;
688 thread_data_t *thr = NULL;
689 zpios_time_t *tt = &(run_args->stats.total_time);
690 zpios_time_t *tw = &(run_args->stats.wr_time);
691 zpios_time_t *tr = &(run_args->stats.rd_time);
692 int i, rc = 0, tc = run_args->thread_count;
693
694 tsks = kmem_zalloc(sizeof(struct task_struct *) * tc, KM_SLEEP);
695 if (tsks == NULL) {
696 rc = -ENOMEM;
697 goto cleanup2;
698 }
699
700 run_args->threads = kmem_zalloc(sizeof(thread_data_t *) * tc, KM_SLEEP);
701 if (run_args->threads == NULL) {
702 rc = -ENOMEM;
703 goto cleanup;
704 }
705
706 init_waitqueue_head(&run_args->waitq);
707 run_args->threads_done = 0;
708
709 /* Create all the needed threads which will sleep until awoken */
710 for (i = 0; i < tc; i++) {
711 thr = kmem_zalloc(sizeof(thread_data_t), KM_SLEEP);
712 if (thr == NULL) {
713 rc = -ENOMEM;
714 goto taskerr;
715 }
716
717 thr->thread_no = i;
718 thr->run_args = run_args;
719 thr->rc = 0;
720 mutex_init(&thr->lock, NULL, MUTEX_DEFAULT, NULL);
721 run_args->threads[i] = thr;
722
723 tsk = kthread_create(zpios_thread_main, (void *)thr,
724 "%s/%d", "zpios_io", i);
725 if (IS_ERR(tsk)) {
726 rc = -EINVAL;
727 goto taskerr;
728 }
729
730 tsks[i] = tsk;
731 }
732
733 tt->start = zpios_timespec_now();
734
735 /* Wake up all threads for write phase */
736 (void)zpios_upcall(run_args->pre, PHASE_PRE_WRITE, run_args, 0);
737 for (i = 0; i < tc; i++)
738 wake_up_process(tsks[i]);
739
740 /* Wait for write phase to complete */
741 tw->start = zpios_timespec_now();
742 wait_event(run_args->waitq, zpios_thread_done(run_args));
743 tw->stop = zpios_timespec_now();
744 (void)zpios_upcall(run_args->post, PHASE_POST_WRITE, run_args, rc);
745
746 for (i = 0; i < tc; i++) {
747 thr = run_args->threads[i];
748
749 mutex_enter(&thr->lock);
750
751 if (!rc && thr->rc)
752 rc = thr->rc;
753
754 run_args->stats.wr_data += thr->stats.wr_data;
755 run_args->stats.wr_chunks += thr->stats.wr_chunks;
756 mutex_exit(&thr->lock);
757 }
758
759 if (rc) {
760 /* Wake up all threads and tell them to exit */
761 for (i = 0; i < tc; i++) {
762 mutex_enter(&thr->lock);
763 thr->rc = rc;
764 mutex_exit(&thr->lock);
765
766 wake_up_process(tsks[i]);
767 }
768 goto out;
769 }
770
771 mutex_enter(&run_args->lock_ctl);
772 ASSERT(run_args->threads_done == run_args->thread_count);
773 run_args->threads_done = 0;
774 mutex_exit(&run_args->lock_ctl);
775
776 /* Wake up all threads for read phase */
777 (void)zpios_upcall(run_args->pre, PHASE_PRE_READ, run_args, 0);
778 for (i = 0; i < tc; i++)
779 wake_up_process(tsks[i]);
780
781 /* Wait for read phase to complete */
782 tr->start = zpios_timespec_now();
783 wait_event(run_args->waitq, zpios_thread_done(run_args));
784 tr->stop = zpios_timespec_now();
785 (void)zpios_upcall(run_args->post, PHASE_POST_READ, run_args, rc);
786
787 for (i = 0; i < tc; i++) {
788 thr = run_args->threads[i];
789
790 mutex_enter(&thr->lock);
791
792 if (!rc && thr->rc)
793 rc = thr->rc;
794
795 run_args->stats.rd_data += thr->stats.rd_data;
796 run_args->stats.rd_chunks += thr->stats.rd_chunks;
797 mutex_exit(&thr->lock);
798 }
799out:
800 tt->stop = zpios_timespec_now();
801 tt->delta = zpios_timespec_sub(tt->stop, tt->start);
802 tw->delta = zpios_timespec_sub(tw->stop, tw->start);
803 tr->delta = zpios_timespec_sub(tr->stop, tr->start);
804
805cleanup:
806 kmem_free(tsks, sizeof(struct task_struct *) * tc);
807cleanup2:
808 /* Returns first encountered thread error (if any) */
809 return rc;
810
811taskerr:
812 /* Destroy all threads that were created successfully */
813 for (i = 0; i < tc; i++)
814 if (tsks[i] != NULL)
815 (void) kthread_stop(tsks[i]);
816
817 goto cleanup;
818}
819
820static int
821zpios_do_one_run(struct file *file, zpios_cmd_t *kcmd,
822 int data_size, void *data)
823{
824 run_args_t *run_args = { 0 };
825 zpios_stats_t *stats = (zpios_stats_t *)data;
826 int i, n, m, size, rc;
827
828 if ((!kcmd->cmd_chunk_size) || (!kcmd->cmd_region_size) ||
829 (!kcmd->cmd_thread_count) || (!kcmd->cmd_region_count)) {
830 zpios_print(file, "Invalid chunk_size, region_size, "
831 "thread_count, or region_count, %d\n", -EINVAL);
832 return -EINVAL;
833 }
834
835 if (!(kcmd->cmd_flags & DMU_WRITE) ||
836 !(kcmd->cmd_flags & DMU_READ)) {
837 zpios_print(file, "Invalid flags, minimally DMU_WRITE "
838 "and DMU_READ must be set, %d\n", -EINVAL);
839 return -EINVAL;
840 }
841
842 if ((kcmd->cmd_flags & (DMU_WRITE_ZC | DMU_READ_ZC)) &&
843 (kcmd->cmd_flags & DMU_VERIFY)) {
844 zpios_print(file, "Invalid flags, DMU_*_ZC incompatible "
845 "with DMU_VERIFY, used for performance analysis "
846 "only, %d\n", -EINVAL);
847 return -EINVAL;
848 }
849
850 /* Opaque data on return contains structs of the following form:
851 *
852 * zpios_stat_t stats[];
853 * stats[0] = run_args->stats;
854 * stats[1-N] = threads[N]->stats;
855 * stats[N+1-M] = regions[M]->stats;
856 *
857 * Where N is the number of threads, and M is the number of regions.
858 */
859 size = (sizeof(zpios_stats_t) +
860 (kcmd->cmd_thread_count * sizeof(zpios_stats_t)) +
861 (kcmd->cmd_region_count * sizeof(zpios_stats_t)));
862 if (data_size < size) {
863 zpios_print(file, "Invalid size, command data buffer "
864 "size too small, (%d < %d)\n", data_size, size);
865 return -ENOSPC;
866 }
867
868 rc = zpios_setup_run(&run_args, kcmd, file);
869 if (rc)
870 return rc;
871
872 rc = zpios_threads_run(run_args);
873 zpios_remove_objset(run_args);
874 if (rc)
875 goto cleanup;
876
877 if (stats) {
878 n = 1;
879 m = 1 + kcmd->cmd_thread_count;
880 stats[0] = run_args->stats;
881
882 for (i = 0; i < kcmd->cmd_thread_count; i++)
883 stats[n+i] = run_args->threads[i]->stats;
884
885 for (i = 0; i < kcmd->cmd_region_count; i++)
886 stats[m+i] = run_args->regions[i].stats;
887 }
888
889cleanup:
890 zpios_cleanup_run(run_args);
891
892 (void)zpios_upcall(kcmd->cmd_post, PHASE_POST_RUN, run_args, 0);
893
894 return rc;
895}
896
897static int
898zpios_open(struct inode *inode, struct file *file)
899{
900 unsigned int minor = iminor(inode);
901 zpios_info_t *info;
902
903 if (minor >= ZPIOS_MINORS)
904 return -ENXIO;
905
906 info = (zpios_info_t *)kmem_alloc(sizeof(*info), KM_SLEEP);
907 if (info == NULL)
908 return -ENOMEM;
909
910 spin_lock_init(&info->info_lock);
911 info->info_size = ZPIOS_INFO_BUFFER_SIZE;
912 info->info_buffer = (char *)vmem_alloc(ZPIOS_INFO_BUFFER_SIZE,KM_SLEEP);
913 if (info->info_buffer == NULL) {
914 kmem_free(info, sizeof(*info));
915 return -ENOMEM;
916 }
917
918 info->info_head = info->info_buffer;
919 file->private_data = (void *)info;
920
921 return 0;
922}
923
924static int
925zpios_release(struct inode *inode, struct file *file)
926{
927 unsigned int minor = iminor(inode);
928 zpios_info_t *info = (zpios_info_t *)file->private_data;
929
930 if (minor >= ZPIOS_MINORS)
931 return -ENXIO;
932
933 ASSERT(info);
934 ASSERT(info->info_buffer);
935
936 vmem_free(info->info_buffer, ZPIOS_INFO_BUFFER_SIZE);
937 kmem_free(info, sizeof(*info));
938
939 return 0;
940}
941
942static int
943zpios_buffer_clear(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
944{
945 zpios_info_t *info = (zpios_info_t *)file->private_data;
946
947 ASSERT(info);
948 ASSERT(info->info_buffer);
949
950 spin_lock(&info->info_lock);
951 memset(info->info_buffer, 0, info->info_size);
952 info->info_head = info->info_buffer;
953 spin_unlock(&info->info_lock);
954
955 return 0;
956}
957
958static int
959zpios_buffer_size(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
960{
961 zpios_info_t *info = (zpios_info_t *)file->private_data;
962 char *buf;
963 int min, size, rc = 0;
964
965 ASSERT(info);
966 ASSERT(info->info_buffer);
967
968 spin_lock(&info->info_lock);
969 if (kcfg->cfg_arg1 > 0) {
970
971 size = kcfg->cfg_arg1;
972 buf = (char *)vmem_alloc(size, KM_SLEEP);
973 if (buf == NULL) {
974 rc = -ENOMEM;
975 goto out;
976 }
977
978 /* Zero fill and truncate contents when coping buffer */
979 min = ((size < info->info_size) ? size : info->info_size);
980 memset(buf, 0, size);
981 memcpy(buf, info->info_buffer, min);
982 vmem_free(info->info_buffer, info->info_size);
983 info->info_size = size;
984 info->info_buffer = buf;
985 info->info_head = info->info_buffer;
986 }
987
988 kcfg->cfg_rc1 = info->info_size;
989
990 if (copy_to_user((struct zpios_cfg_t __user *)arg, kcfg, sizeof(*kcfg)))
991 rc = -EFAULT;
992out:
993 spin_unlock(&info->info_lock);
994
995 return rc;
996}
997
998static int
999zpios_ioctl_cfg(struct file *file, unsigned long arg)
1000{
1001 zpios_cfg_t kcfg;
1002 int rc = 0;
1003
1004 if (copy_from_user(&kcfg, (zpios_cfg_t *)arg, sizeof(kcfg)))
1005 return -EFAULT;
1006
1007 if (kcfg.cfg_magic != ZPIOS_CFG_MAGIC) {
1008 zpios_print(file, "Bad config magic 0x%x != 0x%x\n",
1009 kcfg.cfg_magic, ZPIOS_CFG_MAGIC);
1010 return -EINVAL;
1011 }
1012
1013 switch (kcfg.cfg_cmd) {
1014 case ZPIOS_CFG_BUFFER_CLEAR:
1015 /* cfg_arg1 - Unused
1016 * cfg_rc1 - Unused
1017 */
1018 rc = zpios_buffer_clear(file, &kcfg, arg);
1019 break;
1020 case ZPIOS_CFG_BUFFER_SIZE:
1021 /* cfg_arg1 - 0 - query size; >0 resize
1022 * cfg_rc1 - Set to current buffer size
1023 */
1024 rc = zpios_buffer_size(file, &kcfg, arg);
1025 break;
1026 default:
1027 zpios_print(file, "Bad config command %d\n",
1028 kcfg.cfg_cmd);
1029 rc = -EINVAL;
1030 break;
1031 }
1032
1033 return rc;
1034}
1035
1036static int
1037zpios_ioctl_cmd(struct file *file, unsigned long arg)
1038{
1039 zpios_cmd_t *kcmd;
1040 void *data = NULL;
1041 int rc = -EINVAL;
1042
1043 kcmd = kmem_alloc(sizeof(zpios_cmd_t), KM_SLEEP);
1044 if (kcmd == NULL) {
1045 zpios_print(file, "Unable to kmem_alloc() %ld byte for "
1046 "zpios_cmd_t\n", (long int)sizeof(zpios_cmd_t));
1047 return -ENOMEM;
1048 }
1049
1050 rc = copy_from_user(kcmd, (zpios_cfg_t *)arg, sizeof(zpios_cmd_t));
1051 if (rc) {
1052 zpios_print(file, "Unable to copy command structure "
1053 "from user to kernel memory, %d\n", rc);
1054 goto out_cmd;
1055 }
1056
1057 if (kcmd->cmd_magic != ZPIOS_CMD_MAGIC) {
1058 zpios_print(file, "Bad command magic 0x%x != 0x%x\n",
1059 kcmd->cmd_magic, ZPIOS_CFG_MAGIC);
1060 rc = -EINVAL;
1061 goto out_cmd;
1062 }
1063
1064 /* Allocate memory for any opaque data the caller needed to pass on */
1065 if (kcmd->cmd_data_size > 0) {
1066 data = (void *)vmem_alloc(kcmd->cmd_data_size, KM_SLEEP);
1067 if (data == NULL) {
1068 zpios_print(file, "Unable to vmem_alloc() %ld "
1069 "bytes for data buffer\n",
1070 (long)kcmd->cmd_data_size);
1071 rc = -ENOMEM;
1072 goto out_cmd;
1073 }
1074
1075 rc = copy_from_user(data, (void *)(arg + offsetof(zpios_cmd_t,
1076 cmd_data_str)), kcmd->cmd_data_size);
1077 if (rc) {
1078 zpios_print(file, "Unable to copy data buffer "
1079 "from user to kernel memory, %d\n", rc);
1080 goto out_data;
1081 }
1082 }
1083
1084 rc = zpios_do_one_run(file, kcmd, kcmd->cmd_data_size, data);
1085
1086 if (data != NULL) {
1087 /* If the test failed do not print out the stats */
1088 if (rc)
1089 goto out_data;
1090
1091 rc = copy_to_user((void *)(arg + offsetof(zpios_cmd_t,
1092 cmd_data_str)), data, kcmd->cmd_data_size);
1093 if (rc) {
1094 zpios_print(file, "Unable to copy data buffer "
1095 "from kernel to user memory, %d\n", rc);
1096 rc = -EFAULT;
1097 }
1098
1099out_data:
1100 vmem_free(data, kcmd->cmd_data_size);
1101 }
1102out_cmd:
1103 kmem_free(kcmd, sizeof(zpios_cmd_t));
1104
1105 return rc;
1106}
1107
1f30b9d4
BB
1108static long
1109zpios_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
302ef151 1110{
1f30b9d4 1111 unsigned int minor = iminor(file->f_dentry->d_inode);
302ef151
BB
1112 int rc = 0;
1113
1114 /* Ignore tty ioctls */
1115 if ((cmd & 0xffffff00) == ((int)'T') << 8)
1116 return -ENOTTY;
1117
1118 if (minor >= ZPIOS_MINORS)
1119 return -ENXIO;
1120
1121 switch (cmd) {
1122 case ZPIOS_CFG:
1123 rc = zpios_ioctl_cfg(file, arg);
1124 break;
1125 case ZPIOS_CMD:
1126 rc = zpios_ioctl_cmd(file, arg);
1127 break;
1128 default:
1129 zpios_print(file, "Bad ioctl command %d\n", cmd);
1130 rc = -EINVAL;
1131 break;
1132 }
1133
1134 return rc;
1135}
1136
1137#ifdef CONFIG_COMPAT
1138/* Compatibility handler for ioctls from 32-bit ELF binaries */
1139static long
1140zpios_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1141{
1f30b9d4 1142 return zpios_unlocked_ioctl(file, cmd, arg);
302ef151
BB
1143}
1144#endif /* CONFIG_COMPAT */
1145
1146/* I'm not sure why you would want to write in to this buffer from
1147 * user space since its principle use is to pass test status info
1148 * back to the user space, but I don't see any reason to prevent it.
1149 */
1150static ssize_t
1151zpios_write(struct file *file, const char __user *buf,
1152 size_t count, loff_t *ppos)
1153{
1154 unsigned int minor = iminor(file->f_dentry->d_inode);
1155 zpios_info_t *info = (zpios_info_t *)file->private_data;
1156 int rc = 0;
1157
1158 if (minor >= ZPIOS_MINORS)
1159 return -ENXIO;
1160
1161 ASSERT(info);
1162 ASSERT(info->info_buffer);
1163
1164 spin_lock(&info->info_lock);
1165
1166 /* Write beyond EOF */
1167 if (*ppos >= info->info_size) {
1168 rc = -EFBIG;
1169 goto out;
1170 }
1171
1172 /* Resize count if beyond EOF */
1173 if (*ppos + count > info->info_size)
1174 count = info->info_size - *ppos;
1175
1176 if (copy_from_user(info->info_buffer, buf, count)) {
1177 rc = -EFAULT;
1178 goto out;
1179 }
1180
1181 *ppos += count;
1182 rc = count;
1183out:
1184 spin_unlock(&info->info_lock);
1185 return rc;
1186}
1187
1188static ssize_t
1189zpios_read(struct file *file, char __user *buf,
1190 size_t count, loff_t *ppos)
1191{
1192 unsigned int minor = iminor(file->f_dentry->d_inode);
1193 zpios_info_t *info = (zpios_info_t *)file->private_data;
1194 int rc = 0;
1195
1196 if (minor >= ZPIOS_MINORS)
1197 return -ENXIO;
1198
1199 ASSERT(info);
1200 ASSERT(info->info_buffer);
1201
1202 spin_lock(&info->info_lock);
1203
1204 /* Read beyond EOF */
1205 if (*ppos >= info->info_size)
1206 goto out;
1207
1208 /* Resize count if beyond EOF */
1209 if (*ppos + count > info->info_size)
1210 count = info->info_size - *ppos;
1211
1212 if (copy_to_user(buf, info->info_buffer + *ppos, count)) {
1213 rc = -EFAULT;
1214 goto out;
1215 }
1216
1217 *ppos += count;
1218 rc = count;
1219out:
1220 spin_unlock(&info->info_lock);
1221 return rc;
1222}
1223
1224static loff_t zpios_seek(struct file *file, loff_t offset, int origin)
1225{
1226 unsigned int minor = iminor(file->f_dentry->d_inode);
1227 zpios_info_t *info = (zpios_info_t *)file->private_data;
1228 int rc = -EINVAL;
1229
1230 if (minor >= ZPIOS_MINORS)
1231 return -ENXIO;
1232
1233 ASSERT(info);
1234 ASSERT(info->info_buffer);
1235
1236 spin_lock(&info->info_lock);
1237
1238 switch (origin) {
1239 case 0: /* SEEK_SET - No-op just do it */
1240 break;
1241 case 1: /* SEEK_CUR - Seek from current */
1242 offset = file->f_pos + offset;
1243 break;
1244 case 2: /* SEEK_END - Seek from end */
1245 offset = info->info_size + offset;
1246 break;
1247 }
1248
1249 if (offset >= 0) {
1250 file->f_pos = offset;
1251 file->f_version = 0;
1252 rc = offset;
1253 }
1254
1255 spin_unlock(&info->info_lock);
1256
1257 return rc;
1258}
1259
1260static struct cdev zpios_cdev;
1261static struct file_operations zpios_fops = {
1262 .owner = THIS_MODULE,
1263 .open = zpios_open,
1264 .release = zpios_release,
1f30b9d4 1265 .unlocked_ioctl = zpios_unlocked_ioctl,
302ef151
BB
1266#ifdef CONFIG_COMPAT
1267 .compat_ioctl = zpios_compat_ioctl,
1268#endif
1269 .read = zpios_read,
1270 .write = zpios_write,
1271 .llseek = zpios_seek,
1272};
1273
1274static int
1275zpios_init(void)
1276{
1277 dev_t dev;
1278 int rc;
1279
1280 dev = MKDEV(ZPIOS_MAJOR, 0);
1281 if ((rc = register_chrdev_region(dev, ZPIOS_MINORS, ZPIOS_NAME)))
1282 goto error;
1283
1284 /* Support for registering a character driver */
1285 cdev_init(&zpios_cdev, &zpios_fops);
1286 zpios_cdev.owner = THIS_MODULE;
1287 kobject_set_name(&zpios_cdev.kobj, ZPIOS_NAME);
1288 if ((rc = cdev_add(&zpios_cdev, dev, ZPIOS_MINORS))) {
1289 printk(KERN_ERR "ZPIOS: Error adding cdev, %d\n", rc);
1290 kobject_put(&zpios_cdev.kobj);
1291 unregister_chrdev_region(dev, ZPIOS_MINORS);
1292 goto error;
1293 }
1294
1295 /* Support for udev make driver info available in sysfs */
1296 zpios_class = spl_class_create(THIS_MODULE, ZPIOS_NAME);
1297 if (IS_ERR(zpios_class)) {
1298 rc = PTR_ERR(zpios_class);
1299 printk(KERN_ERR "ZPIOS: Error creating zpios class, %d\n", rc);
1300 cdev_del(&zpios_cdev);
1301 unregister_chrdev_region(dev, ZPIOS_MINORS);
1302 goto error;
1303 }
1304
1305 zpios_device = spl_device_create(zpios_class, NULL,
1306 dev, NULL, ZPIOS_NAME);
1307 return 0;
1308error:
1309 printk(KERN_ERR "ZPIOS: Error registering zpios device, %d\n", rc);
1310 return rc;
1311}
1312
1313static int
1314zpios_fini(void)
1315{
1316 dev_t dev = MKDEV(ZPIOS_MAJOR, 0);
1317
1318 spl_device_destroy(zpios_class, zpios_device, dev);
1319 spl_class_destroy(zpios_class);
1320 cdev_del(&zpios_cdev);
1321 unregister_chrdev_region(dev, ZPIOS_MINORS);
1322
1323 return 0;
1324}
1325
1326spl_module_init(zpios_init);
1327spl_module_exit(zpios_fini);
1328
1329MODULE_AUTHOR("LLNL / Sun");
1330MODULE_DESCRIPTION("Kernel PIOS implementation");
1331MODULE_LICENSE("GPL");