]> git.proxmox.com Git - mirror_zfs-debian.git/blob - module/zpios/pios.c
Imported Upstream version 0.6.5.3
[mirror_zfs-debian.git] / module / zpios / pios.c
1 /*
2 * ZPIOS is a heavily modified version of the original PIOS test code.
3 * It is designed to have the test code running in the Linux kernel
4 * against ZFS while still being flexibly controled from user space.
5 *
6 * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
7 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
9 * LLNL-CODE-403049
10 *
11 * Original PIOS Test Code
12 * Copyright (C) 2004 Cluster File Systems, Inc.
13 * Written by Peter Braam <braam@clusterfs.com>
14 * Atul Vidwansa <atul@clusterfs.com>
15 * Milind Dumbare <milind@clusterfs.com>
16 *
17 * This file is part of ZFS on Linux.
18 * For details, see <http://zfsonlinux.org/>.
19 *
20 * ZPIOS is free software; you can redistribute it and/or modify it
21 * under the terms of the GNU General Public License as published by the
22 * Free Software Foundation; either version 2 of the License, or (at your
23 * option) any later version.
24 *
25 * ZPIOS is distributed in the hope that it will be useful, but WITHOUT
26 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
27 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
28 * for more details.
29 *
30 * You should have received a copy of the GNU General Public License along
31 * with ZPIOS. If not, see <http://www.gnu.org/licenses/>.
32 */
33
34 #include <sys/zfs_context.h>
35 #include <sys/dmu.h>
36 #include <sys/txg.h>
37 #include <sys/dsl_destroy.h>
38 #include <linux/miscdevice.h>
39 #include "zpios-internal.h"
40
41
42 static char *zpios_tag = "zpios_tag";
43
44 static int
45 zpios_upcall(char *path, char *phase, run_args_t *run_args, int rc)
46 {
47 /*
48 * This is stack heavy but it should be OK since we are only
49 * making the upcall between tests when the stack is shallow.
50 */
51 char id[16], chunk_size[16], region_size[16], thread_count[16];
52 char region_count[16], offset[16], region_noise[16], chunk_noise[16];
53 char thread_delay[16], flags[16], result[8];
54 char *argv[16], *envp[4];
55
56 if ((path == NULL) || (strlen(path) == 0))
57 return (-ENOENT);
58
59 snprintf(id, 15, "%d", run_args->id);
60 snprintf(chunk_size, 15, "%lu", (long unsigned)run_args->chunk_size);
61 snprintf(region_size, 15, "%lu", (long unsigned) run_args->region_size);
62 snprintf(thread_count, 15, "%u", run_args->thread_count);
63 snprintf(region_count, 15, "%u", run_args->region_count);
64 snprintf(offset, 15, "%lu", (long unsigned)run_args->offset);
65 snprintf(region_noise, 15, "%u", run_args->region_noise);
66 snprintf(chunk_noise, 15, "%u", run_args->chunk_noise);
67 snprintf(thread_delay, 15, "%u", run_args->thread_delay);
68 snprintf(flags, 15, "0x%x", run_args->flags);
69 snprintf(result, 7, "%d", rc);
70
71 /* Passing 15 args to registered pre/post upcall */
72 argv[0] = path;
73 argv[1] = phase;
74 argv[2] = strlen(run_args->log) ? run_args->log : "<none>";
75 argv[3] = id;
76 argv[4] = run_args->pool;
77 argv[5] = chunk_size;
78 argv[6] = region_size;
79 argv[7] = thread_count;
80 argv[8] = region_count;
81 argv[9] = offset;
82 argv[10] = region_noise;
83 argv[11] = chunk_noise;
84 argv[12] = thread_delay;
85 argv[13] = flags;
86 argv[14] = result;
87 argv[15] = NULL;
88
89 /* Passing environment for user space upcall */
90 envp[0] = "HOME=/";
91 envp[1] = "TERM=linux";
92 envp[2] = "PATH=/sbin:/usr/sbin:/bin:/usr/bin";
93 envp[3] = NULL;
94
95 return (call_usermodehelper(path, argv, envp, UMH_WAIT_PROC));
96 }
97
98 static int
99 zpios_print(struct file *file, const char *format, ...)
100 {
101 zpios_info_t *info = (zpios_info_t *)file->private_data;
102 va_list adx;
103 int rc;
104
105 ASSERT(info);
106 ASSERT(info->info_buffer);
107
108 va_start(adx, format);
109 spin_lock(&info->info_lock);
110
111 /* Don't allow the kernel to start a write in the red zone */
112 if ((int)(info->info_head - info->info_buffer) >
113 (info->info_size - ZPIOS_INFO_BUFFER_REDZONE)) {
114 rc = -EOVERFLOW;
115 } else {
116 rc = vsprintf(info->info_head, format, adx);
117 if (rc >= 0)
118 info->info_head += rc;
119 }
120
121 spin_unlock(&info->info_lock);
122 va_end(adx);
123
124 return (rc);
125 }
126
127 static uint64_t
128 zpios_dmu_object_create(run_args_t *run_args, objset_t *os)
129 {
130 struct dmu_tx *tx;
131 uint64_t obj = 0ULL;
132 int rc;
133
134 tx = dmu_tx_create(os);
135 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE);
136 rc = dmu_tx_assign(tx, TXG_WAIT);
137 if (rc) {
138 zpios_print(run_args->file,
139 "dmu_tx_assign() failed: %d\n", rc);
140 dmu_tx_abort(tx);
141 return (obj);
142 }
143
144 obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, DMU_OT_NONE, 0, tx);
145 rc = dmu_object_set_blocksize(os, obj, 128ULL << 10, 0, tx);
146 if (rc) {
147 zpios_print(run_args->file,
148 "dmu_object_set_blocksize() failed: %d\n", rc);
149 dmu_tx_abort(tx);
150 return (obj);
151 }
152
153 dmu_tx_commit(tx);
154
155 return (obj);
156 }
157
158 static int
159 zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj)
160 {
161 struct dmu_tx *tx;
162 int rc;
163
164 tx = dmu_tx_create(os);
165 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
166 rc = dmu_tx_assign(tx, TXG_WAIT);
167 if (rc) {
168 zpios_print(run_args->file,
169 "dmu_tx_assign() failed: %d\n", rc);
170 dmu_tx_abort(tx);
171 return (rc);
172 }
173
174 rc = dmu_object_free(os, obj, tx);
175 if (rc) {
176 zpios_print(run_args->file,
177 "dmu_object_free() failed: %d\n", rc);
178 dmu_tx_abort(tx);
179 return (rc);
180 }
181
182 dmu_tx_commit(tx);
183
184 return (0);
185 }
186
187 static int
188 zpios_dmu_setup(run_args_t *run_args)
189 {
190 zpios_time_t *t = &(run_args->stats.cr_time);
191 objset_t *os;
192 char name[32];
193 uint64_t obj = 0ULL;
194 int i, rc = 0, rc2;
195
196 (void) zpios_upcall(run_args->pre, PHASE_PRE_CREATE, run_args, 0);
197 t->start = zpios_timespec_now();
198
199 (void) snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
200 rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL);
201 if (rc) {
202 zpios_print(run_args->file, "Error dmu_objset_create(%s, ...) "
203 "failed: %d\n", name, rc);
204 goto out;
205 }
206
207 rc = dmu_objset_own(name, DMU_OST_OTHER, 0, zpios_tag, &os);
208 if (rc) {
209 zpios_print(run_args->file, "Error dmu_objset_own(%s, ...) "
210 "failed: %d\n", name, rc);
211 goto out_destroy;
212 }
213
214 if (!(run_args->flags & DMU_FPP)) {
215 obj = zpios_dmu_object_create(run_args, os);
216 if (obj == 0) {
217 rc = -EBADF;
218 zpios_print(run_args->file, "Error zpios_dmu_"
219 "object_create() failed, %d\n", rc);
220 goto out_destroy;
221 }
222 }
223
224 for (i = 0; i < run_args->region_count; i++) {
225 zpios_region_t *region;
226
227 region = &run_args->regions[i];
228 mutex_init(&region->lock, NULL, MUTEX_DEFAULT, NULL);
229
230 if (run_args->flags & DMU_FPP) {
231 /* File per process */
232 region->obj.os = os;
233 region->obj.obj = zpios_dmu_object_create(run_args, os);
234 ASSERT(region->obj.obj > 0); /* XXX - Handle this */
235 region->wr_offset = run_args->offset;
236 region->rd_offset = run_args->offset;
237 region->init_offset = run_args->offset;
238 region->max_offset = run_args->offset +
239 run_args->region_size;
240 } else {
241 /* Single shared file */
242 region->obj.os = os;
243 region->obj.obj = obj;
244 region->wr_offset = run_args->offset * i;
245 region->rd_offset = run_args->offset * i;
246 region->init_offset = run_args->offset * i;
247 region->max_offset = run_args->offset *
248 i + run_args->region_size;
249 }
250 }
251
252 run_args->os = os;
253 out_destroy:
254 if (rc) {
255 rc2 = dsl_destroy_head(name);
256 if (rc2)
257 zpios_print(run_args->file, "Error dsl_destroy_head"
258 "(%s, ...) failed: %d\n", name, rc2);
259 }
260 out:
261 t->stop = zpios_timespec_now();
262 t->delta = zpios_timespec_sub(t->stop, t->start);
263 (void) zpios_upcall(run_args->post, PHASE_POST_CREATE, run_args, rc);
264
265 return (rc);
266 }
267
268 static int
269 zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file)
270 {
271 run_args_t *ra;
272 int rc, size;
273
274 size = sizeof (*ra) + kcmd->cmd_region_count * sizeof (zpios_region_t);
275
276 ra = vmem_zalloc(size, KM_SLEEP);
277 if (ra == NULL) {
278 zpios_print(file, "Unable to vmem_zalloc() %d bytes "
279 "for regions\n", size);
280 return (-ENOMEM);
281 }
282
283 *run_args = ra;
284 strncpy(ra->pool, kcmd->cmd_pool, ZPIOS_NAME_SIZE - 1);
285 strncpy(ra->pre, kcmd->cmd_pre, ZPIOS_PATH_SIZE - 1);
286 strncpy(ra->post, kcmd->cmd_post, ZPIOS_PATH_SIZE - 1);
287 strncpy(ra->log, kcmd->cmd_log, ZPIOS_PATH_SIZE - 1);
288 ra->id = kcmd->cmd_id;
289 ra->chunk_size = kcmd->cmd_chunk_size;
290 ra->thread_count = kcmd->cmd_thread_count;
291 ra->region_count = kcmd->cmd_region_count;
292 ra->region_size = kcmd->cmd_region_size;
293 ra->offset = kcmd->cmd_offset;
294 ra->region_noise = kcmd->cmd_region_noise;
295 ra->chunk_noise = kcmd->cmd_chunk_noise;
296 ra->thread_delay = kcmd->cmd_thread_delay;
297 ra->flags = kcmd->cmd_flags;
298 ra->stats.wr_data = 0;
299 ra->stats.wr_chunks = 0;
300 ra->stats.rd_data = 0;
301 ra->stats.rd_chunks = 0;
302 ra->region_next = 0;
303 ra->file = file;
304 mutex_init(&ra->lock_work, NULL, MUTEX_DEFAULT, NULL);
305 mutex_init(&ra->lock_ctl, NULL, MUTEX_DEFAULT, NULL);
306
307 (void) zpios_upcall(ra->pre, PHASE_PRE_RUN, ra, 0);
308
309 rc = zpios_dmu_setup(ra);
310 if (rc) {
311 mutex_destroy(&ra->lock_ctl);
312 mutex_destroy(&ra->lock_work);
313 vmem_free(ra, size);
314 *run_args = NULL;
315 }
316
317 return (rc);
318 }
319
320 static int
321 zpios_get_work_item(run_args_t *run_args, dmu_obj_t *obj, __u64 *offset,
322 __u32 *chunk_size, zpios_region_t **region, __u32 flags)
323 {
324 int i, j, count = 0;
325 unsigned int random_int;
326
327 get_random_bytes(&random_int, sizeof (unsigned int));
328
329 mutex_enter(&run_args->lock_work);
330 i = run_args->region_next;
331
332 /*
333 * XXX: I don't much care for this chunk selection mechansim
334 * there's the potential to burn a lot of time here doing nothing
335 * useful while holding the global lock. This could give some
336 * misleading performance results. I'll fix it latter.
337 */
338 while (count < run_args->region_count) {
339 __u64 *rw_offset;
340 zpios_time_t *rw_time;
341
342 j = i % run_args->region_count;
343 *region = &(run_args->regions[j]);
344
345 if (flags & DMU_WRITE) {
346 rw_offset = &((*region)->wr_offset);
347 rw_time = &((*region)->stats.wr_time);
348 } else {
349 rw_offset = &((*region)->rd_offset);
350 rw_time = &((*region)->stats.rd_time);
351 }
352
353 /* test if region is fully written */
354 if (*rw_offset + *chunk_size > (*region)->max_offset) {
355 i++;
356 count++;
357
358 if (unlikely(rw_time->stop.ts_sec == 0) &&
359 unlikely(rw_time->stop.ts_nsec == 0))
360 rw_time->stop = zpios_timespec_now();
361
362 continue;
363 }
364
365 *offset = *rw_offset;
366 *obj = (*region)->obj;
367 *rw_offset += *chunk_size;
368
369 /* update ctl structure */
370 if (run_args->region_noise) {
371 get_random_bytes(&random_int, sizeof (unsigned int));
372 run_args->region_next +=
373 random_int % run_args->region_noise;
374 } else {
375 run_args->region_next++;
376 }
377
378 mutex_exit(&run_args->lock_work);
379 return (1);
380 }
381
382 /* nothing left to do */
383 mutex_exit(&run_args->lock_work);
384
385 return (0);
386 }
387
388 static void
389 zpios_remove_objset(run_args_t *run_args)
390 {
391 zpios_time_t *t = &(run_args->stats.rm_time);
392 zpios_region_t *region;
393 char name[32];
394 int rc = 0, i;
395
396 (void) zpios_upcall(run_args->pre, PHASE_PRE_REMOVE, run_args, 0);
397 t->start = zpios_timespec_now();
398
399 (void) snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
400
401 if (run_args->flags & DMU_REMOVE) {
402 if (run_args->flags & DMU_FPP) {
403 for (i = 0; i < run_args->region_count; i++) {
404 region = &run_args->regions[i];
405 rc = zpios_dmu_object_free(run_args,
406 region->obj.os, region->obj.obj);
407 if (rc)
408 zpios_print(run_args->file,
409 "Error removing object %d, %d\n",
410 (int)region->obj.obj, rc);
411 }
412 } else {
413 region = &run_args->regions[0];
414 rc = zpios_dmu_object_free(run_args,
415 region->obj.os, region->obj.obj);
416 if (rc)
417 zpios_print(run_args->file,
418 "Error removing object %d, %d\n",
419 (int)region->obj.obj, rc);
420 }
421 }
422
423 dmu_objset_disown(run_args->os, zpios_tag);
424
425 if (run_args->flags & DMU_REMOVE) {
426 rc = dsl_destroy_head(name);
427 if (rc)
428 zpios_print(run_args->file, "Error dsl_destroy_head"
429 "(%s, ...) failed: %d\n", name, rc);
430 }
431
432 t->stop = zpios_timespec_now();
433 t->delta = zpios_timespec_sub(t->stop, t->start);
434 (void) zpios_upcall(run_args->post, PHASE_POST_REMOVE, run_args, rc);
435 }
436
437 static void
438 zpios_cleanup_run(run_args_t *run_args)
439 {
440 int i, size = 0;
441
442 if (run_args == NULL)
443 return;
444
445 if (run_args->threads != NULL) {
446 for (i = 0; i < run_args->thread_count; i++) {
447 if (run_args->threads[i]) {
448 mutex_destroy(&run_args->threads[i]->lock);
449 kmem_free(run_args->threads[i],
450 sizeof (thread_data_t));
451 }
452 }
453
454 kmem_free(run_args->threads,
455 sizeof (thread_data_t *) * run_args->thread_count);
456 }
457
458 for (i = 0; i < run_args->region_count; i++)
459 mutex_destroy(&run_args->regions[i].lock);
460
461 mutex_destroy(&run_args->lock_work);
462 mutex_destroy(&run_args->lock_ctl);
463 size = run_args->region_count * sizeof (zpios_region_t);
464
465 vmem_free(run_args, sizeof (*run_args) + size);
466 }
467
468 static int
469 zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object,
470 uint64_t offset, uint64_t size, const void *buf)
471 {
472 struct dmu_tx *tx;
473 int rc, how = TXG_WAIT;
474 // int flags = 0;
475
476 if (run_args->flags & DMU_WRITE_NOWAIT)
477 how = TXG_NOWAIT;
478
479 while (1) {
480 tx = dmu_tx_create(os);
481 dmu_tx_hold_write(tx, object, offset, size);
482 rc = dmu_tx_assign(tx, how);
483
484 if (rc) {
485 if (rc == ERESTART && how == TXG_NOWAIT) {
486 dmu_tx_wait(tx);
487 dmu_tx_abort(tx);
488 continue;
489 }
490 zpios_print(run_args->file,
491 "Error in dmu_tx_assign(), %d", rc);
492 dmu_tx_abort(tx);
493 return (rc);
494 }
495 break;
496 }
497
498 // if (run_args->flags & DMU_WRITE_ZC)
499 // flags |= DMU_WRITE_ZEROCOPY;
500
501 dmu_write(os, object, offset, size, buf, tx);
502 dmu_tx_commit(tx);
503
504 return (0);
505 }
506
507 static int
508 zpios_dmu_read(run_args_t *run_args, objset_t *os, uint64_t object,
509 uint64_t offset, uint64_t size, void *buf)
510 {
511 int flags = 0;
512
513 // if (run_args->flags & DMU_READ_ZC)
514 // flags |= DMU_READ_ZEROCOPY;
515
516 if (run_args->flags & DMU_READ_NOPF)
517 flags |= DMU_READ_NO_PREFETCH;
518
519 return (dmu_read(os, object, offset, size, buf, flags));
520 }
521
522 static int
523 zpios_thread_main(void *data)
524 {
525 thread_data_t *thr = (thread_data_t *)data;
526 run_args_t *run_args = thr->run_args;
527 zpios_time_t t;
528 dmu_obj_t obj;
529 __u64 offset;
530 __u32 chunk_size;
531 zpios_region_t *region;
532 char *buf;
533 unsigned int random_int;
534 int chunk_noise = run_args->chunk_noise;
535 int chunk_noise_tmp = 0;
536 int thread_delay = run_args->thread_delay;
537 int thread_delay_tmp = 0;
538 int i, rc = 0;
539
540 if (chunk_noise) {
541 get_random_bytes(&random_int, sizeof (unsigned int));
542 chunk_noise_tmp = (random_int % (chunk_noise * 2))-chunk_noise;
543 }
544
545 /*
546 * It's OK to vmem_alloc() this memory because it will be copied
547 * in to the slab and pointers to the slab copy will be setup in
548 * the bio when the IO is submitted. This of course is not ideal
549 * since we want a zero-copy IO path if possible. It would be nice
550 * to have direct access to those slab entries.
551 */
552 chunk_size = run_args->chunk_size + chunk_noise_tmp;
553 buf = (char *)vmem_alloc(chunk_size, KM_SLEEP);
554 ASSERT(buf);
555
556 /* Trivial data verification pattern for now. */
557 if (run_args->flags & DMU_VERIFY)
558 memset(buf, 'z', chunk_size);
559
560 /* Write phase */
561 mutex_enter(&thr->lock);
562 thr->stats.wr_time.start = zpios_timespec_now();
563 mutex_exit(&thr->lock);
564
565 while (zpios_get_work_item(run_args, &obj, &offset,
566 &chunk_size, &region, DMU_WRITE)) {
567 if (thread_delay) {
568 get_random_bytes(&random_int, sizeof (unsigned int));
569 thread_delay_tmp = random_int % thread_delay;
570 set_current_state(TASK_UNINTERRUPTIBLE);
571 schedule_timeout(thread_delay_tmp); /* In jiffies */
572 }
573
574 t.start = zpios_timespec_now();
575 rc = zpios_dmu_write(run_args, obj.os, obj.obj,
576 offset, chunk_size, buf);
577 t.stop = zpios_timespec_now();
578 t.delta = zpios_timespec_sub(t.stop, t.start);
579
580 if (rc) {
581 zpios_print(run_args->file, "IO error while doing "
582 "dmu_write(): %d\n", rc);
583 break;
584 }
585
586 mutex_enter(&thr->lock);
587 thr->stats.wr_data += chunk_size;
588 thr->stats.wr_chunks++;
589 thr->stats.wr_time.delta = zpios_timespec_add(
590 thr->stats.wr_time.delta, t.delta);
591 mutex_exit(&thr->lock);
592
593 mutex_enter(&region->lock);
594 region->stats.wr_data += chunk_size;
595 region->stats.wr_chunks++;
596 region->stats.wr_time.delta = zpios_timespec_add(
597 region->stats.wr_time.delta, t.delta);
598
599 /* First time region was accessed */
600 if (region->init_offset == offset)
601 region->stats.wr_time.start = t.start;
602
603 mutex_exit(&region->lock);
604 }
605
606 mutex_enter(&run_args->lock_ctl);
607 run_args->threads_done++;
608 mutex_exit(&run_args->lock_ctl);
609
610 mutex_enter(&thr->lock);
611 thr->rc = rc;
612 thr->stats.wr_time.stop = zpios_timespec_now();
613 mutex_exit(&thr->lock);
614 wake_up(&run_args->waitq);
615
616 set_current_state(TASK_UNINTERRUPTIBLE);
617 schedule();
618
619 /* Check if we should exit */
620 mutex_enter(&thr->lock);
621 rc = thr->rc;
622 mutex_exit(&thr->lock);
623 if (rc)
624 goto out;
625
626 /* Read phase */
627 mutex_enter(&thr->lock);
628 thr->stats.rd_time.start = zpios_timespec_now();
629 mutex_exit(&thr->lock);
630
631 while (zpios_get_work_item(run_args, &obj, &offset,
632 &chunk_size, &region, DMU_READ)) {
633 if (thread_delay) {
634 get_random_bytes(&random_int, sizeof (unsigned int));
635 thread_delay_tmp = random_int % thread_delay;
636 set_current_state(TASK_UNINTERRUPTIBLE);
637 schedule_timeout(thread_delay_tmp); /* In jiffies */
638 }
639
640 if (run_args->flags & DMU_VERIFY)
641 memset(buf, 0, chunk_size);
642
643 t.start = zpios_timespec_now();
644 rc = zpios_dmu_read(run_args, obj.os, obj.obj,
645 offset, chunk_size, buf);
646 t.stop = zpios_timespec_now();
647 t.delta = zpios_timespec_sub(t.stop, t.start);
648
649 if (rc) {
650 zpios_print(run_args->file, "IO error while doing "
651 "dmu_read(): %d\n", rc);
652 break;
653 }
654
655 /* Trivial data verification, expensive! */
656 if (run_args->flags & DMU_VERIFY) {
657 for (i = 0; i < chunk_size; i++) {
658 if (buf[i] != 'z') {
659 zpios_print(run_args->file,
660 "IO verify error: %d/%d/%d\n",
661 (int)obj.obj, (int)offset,
662 (int)chunk_size);
663 break;
664 }
665 }
666 }
667
668 mutex_enter(&thr->lock);
669 thr->stats.rd_data += chunk_size;
670 thr->stats.rd_chunks++;
671 thr->stats.rd_time.delta = zpios_timespec_add(
672 thr->stats.rd_time.delta, t.delta);
673 mutex_exit(&thr->lock);
674
675 mutex_enter(&region->lock);
676 region->stats.rd_data += chunk_size;
677 region->stats.rd_chunks++;
678 region->stats.rd_time.delta = zpios_timespec_add(
679 region->stats.rd_time.delta, t.delta);
680
681 /* First time region was accessed */
682 if (region->init_offset == offset)
683 region->stats.rd_time.start = t.start;
684
685 mutex_exit(&region->lock);
686 }
687
688 mutex_enter(&run_args->lock_ctl);
689 run_args->threads_done++;
690 mutex_exit(&run_args->lock_ctl);
691
692 mutex_enter(&thr->lock);
693 thr->rc = rc;
694 thr->stats.rd_time.stop = zpios_timespec_now();
695 mutex_exit(&thr->lock);
696 wake_up(&run_args->waitq);
697
698 out:
699 vmem_free(buf, chunk_size);
700 do_exit(0);
701
702 return (rc); /* Unreachable, due to do_exit() */
703 }
704
705 static int
706 zpios_thread_done(run_args_t *run_args)
707 {
708 ASSERT(run_args->threads_done <= run_args->thread_count);
709 return (run_args->threads_done == run_args->thread_count);
710 }
711
712 static int
713 zpios_threads_run(run_args_t *run_args)
714 {
715 struct task_struct *tsk, **tsks;
716 thread_data_t *thr = NULL;
717 zpios_time_t *tt = &(run_args->stats.total_time);
718 zpios_time_t *tw = &(run_args->stats.wr_time);
719 zpios_time_t *tr = &(run_args->stats.rd_time);
720 int i, rc = 0, tc = run_args->thread_count;
721
722 tsks = kmem_zalloc(sizeof (struct task_struct *) * tc, KM_SLEEP);
723 if (tsks == NULL) {
724 rc = -ENOMEM;
725 goto cleanup2;
726 }
727
728 run_args->threads = kmem_zalloc(sizeof (thread_data_t *)*tc, KM_SLEEP);
729 if (run_args->threads == NULL) {
730 rc = -ENOMEM;
731 goto cleanup;
732 }
733
734 init_waitqueue_head(&run_args->waitq);
735 run_args->threads_done = 0;
736
737 /* Create all the needed threads which will sleep until awoken */
738 for (i = 0; i < tc; i++) {
739 thr = kmem_zalloc(sizeof (thread_data_t), KM_SLEEP);
740 if (thr == NULL) {
741 rc = -ENOMEM;
742 goto taskerr;
743 }
744
745 thr->thread_no = i;
746 thr->run_args = run_args;
747 thr->rc = 0;
748 mutex_init(&thr->lock, NULL, MUTEX_DEFAULT, NULL);
749 run_args->threads[i] = thr;
750
751 tsk = kthread_create(zpios_thread_main, (void *)thr,
752 "%s/%d", "zpios_io", i);
753 if (IS_ERR(tsk)) {
754 rc = -EINVAL;
755 goto taskerr;
756 }
757
758 tsks[i] = tsk;
759 }
760
761 tt->start = zpios_timespec_now();
762
763 /* Wake up all threads for write phase */
764 (void) zpios_upcall(run_args->pre, PHASE_PRE_WRITE, run_args, 0);
765 for (i = 0; i < tc; i++)
766 wake_up_process(tsks[i]);
767
768 /* Wait for write phase to complete */
769 tw->start = zpios_timespec_now();
770 wait_event(run_args->waitq, zpios_thread_done(run_args));
771 tw->stop = zpios_timespec_now();
772 (void) zpios_upcall(run_args->post, PHASE_POST_WRITE, run_args, rc);
773
774 for (i = 0; i < tc; i++) {
775 thr = run_args->threads[i];
776
777 mutex_enter(&thr->lock);
778
779 if (!rc && thr->rc)
780 rc = thr->rc;
781
782 run_args->stats.wr_data += thr->stats.wr_data;
783 run_args->stats.wr_chunks += thr->stats.wr_chunks;
784 mutex_exit(&thr->lock);
785 }
786
787 if (rc) {
788 /* Wake up all threads and tell them to exit */
789 for (i = 0; i < tc; i++) {
790 mutex_enter(&thr->lock);
791 thr->rc = rc;
792 mutex_exit(&thr->lock);
793
794 wake_up_process(tsks[i]);
795 }
796 goto out;
797 }
798
799 mutex_enter(&run_args->lock_ctl);
800 ASSERT(run_args->threads_done == run_args->thread_count);
801 run_args->threads_done = 0;
802 mutex_exit(&run_args->lock_ctl);
803
804 /* Wake up all threads for read phase */
805 (void) zpios_upcall(run_args->pre, PHASE_PRE_READ, run_args, 0);
806 for (i = 0; i < tc; i++)
807 wake_up_process(tsks[i]);
808
809 /* Wait for read phase to complete */
810 tr->start = zpios_timespec_now();
811 wait_event(run_args->waitq, zpios_thread_done(run_args));
812 tr->stop = zpios_timespec_now();
813 (void) zpios_upcall(run_args->post, PHASE_POST_READ, run_args, rc);
814
815 for (i = 0; i < tc; i++) {
816 thr = run_args->threads[i];
817
818 mutex_enter(&thr->lock);
819
820 if (!rc && thr->rc)
821 rc = thr->rc;
822
823 run_args->stats.rd_data += thr->stats.rd_data;
824 run_args->stats.rd_chunks += thr->stats.rd_chunks;
825 mutex_exit(&thr->lock);
826 }
827 out:
828 tt->stop = zpios_timespec_now();
829 tt->delta = zpios_timespec_sub(tt->stop, tt->start);
830 tw->delta = zpios_timespec_sub(tw->stop, tw->start);
831 tr->delta = zpios_timespec_sub(tr->stop, tr->start);
832
833 cleanup:
834 kmem_free(tsks, sizeof (struct task_struct *) * tc);
835 cleanup2:
836 /* Returns first encountered thread error (if any) */
837 return (rc);
838
839 taskerr:
840 /* Destroy all threads that were created successfully */
841 for (i = 0; i < tc; i++)
842 if (tsks[i] != NULL)
843 (void) kthread_stop(tsks[i]);
844
845 goto cleanup;
846 }
847
848 static int
849 zpios_do_one_run(struct file *file, zpios_cmd_t *kcmd,
850 int data_size, void *data)
851 {
852 run_args_t *run_args = { 0 };
853 zpios_stats_t *stats = (zpios_stats_t *)data;
854 int i, n, m, size, rc;
855
856 if ((!kcmd->cmd_chunk_size) || (!kcmd->cmd_region_size) ||
857 (!kcmd->cmd_thread_count) || (!kcmd->cmd_region_count)) {
858 zpios_print(file, "Invalid chunk_size, region_size, "
859 "thread_count, or region_count, %d\n", -EINVAL);
860 return (-EINVAL);
861 }
862
863 if (!(kcmd->cmd_flags & DMU_WRITE) ||
864 !(kcmd->cmd_flags & DMU_READ)) {
865 zpios_print(file, "Invalid flags, minimally DMU_WRITE "
866 "and DMU_READ must be set, %d\n", -EINVAL);
867 return (-EINVAL);
868 }
869
870 if ((kcmd->cmd_flags & (DMU_WRITE_ZC | DMU_READ_ZC)) &&
871 (kcmd->cmd_flags & DMU_VERIFY)) {
872 zpios_print(file, "Invalid flags, DMU_*_ZC incompatible "
873 "with DMU_VERIFY, used for performance analysis "
874 "only, %d\n", -EINVAL);
875 return (-EINVAL);
876 }
877
878 /*
879 * Opaque data on return contains structs of the following form:
880 *
881 * zpios_stat_t stats[];
882 * stats[0] = run_args->stats;
883 * stats[1-N] = threads[N]->stats;
884 * stats[N+1-M] = regions[M]->stats;
885 *
886 * Where N is the number of threads, and M is the number of regions.
887 */
888 size = (sizeof (zpios_stats_t) +
889 (kcmd->cmd_thread_count * sizeof (zpios_stats_t)) +
890 (kcmd->cmd_region_count * sizeof (zpios_stats_t)));
891 if (data_size < size) {
892 zpios_print(file, "Invalid size, command data buffer "
893 "size too small, (%d < %d)\n", data_size, size);
894 return (-ENOSPC);
895 }
896
897 rc = zpios_setup_run(&run_args, kcmd, file);
898 if (rc)
899 return (rc);
900
901 rc = zpios_threads_run(run_args);
902 zpios_remove_objset(run_args);
903 if (rc)
904 goto cleanup;
905
906 if (stats) {
907 n = 1;
908 m = 1 + kcmd->cmd_thread_count;
909 stats[0] = run_args->stats;
910
911 for (i = 0; i < kcmd->cmd_thread_count; i++)
912 stats[n+i] = run_args->threads[i]->stats;
913
914 for (i = 0; i < kcmd->cmd_region_count; i++)
915 stats[m+i] = run_args->regions[i].stats;
916 }
917
918 cleanup:
919 zpios_cleanup_run(run_args);
920
921 (void) zpios_upcall(kcmd->cmd_post, PHASE_POST_RUN, run_args, 0);
922
923 return (rc);
924 }
925
926 static int
927 zpios_open(struct inode *inode, struct file *file)
928 {
929 zpios_info_t *info;
930
931 info = (zpios_info_t *)kmem_alloc(sizeof (*info), KM_SLEEP);
932 if (info == NULL)
933 return (-ENOMEM);
934
935 spin_lock_init(&info->info_lock);
936 info->info_size = ZPIOS_INFO_BUFFER_SIZE;
937 info->info_buffer =
938 (char *) vmem_alloc(ZPIOS_INFO_BUFFER_SIZE, KM_SLEEP);
939 if (info->info_buffer == NULL) {
940 kmem_free(info, sizeof (*info));
941 return (-ENOMEM);
942 }
943
944 info->info_head = info->info_buffer;
945 file->private_data = (void *)info;
946
947 return (0);
948 }
949
950 static int
951 zpios_release(struct inode *inode, struct file *file)
952 {
953 zpios_info_t *info = (zpios_info_t *)file->private_data;
954
955 ASSERT(info);
956 ASSERT(info->info_buffer);
957
958 vmem_free(info->info_buffer, ZPIOS_INFO_BUFFER_SIZE);
959 kmem_free(info, sizeof (*info));
960
961 return (0);
962 }
963
964 static int
965 zpios_buffer_clear(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
966 {
967 zpios_info_t *info = (zpios_info_t *)file->private_data;
968
969 ASSERT(info);
970 ASSERT(info->info_buffer);
971
972 spin_lock(&info->info_lock);
973 memset(info->info_buffer, 0, info->info_size);
974 info->info_head = info->info_buffer;
975 spin_unlock(&info->info_lock);
976
977 return (0);
978 }
979
980 static int
981 zpios_buffer_size(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
982 {
983 zpios_info_t *info = (zpios_info_t *)file->private_data;
984 char *buf;
985 int min, size, rc = 0;
986
987 ASSERT(info);
988 ASSERT(info->info_buffer);
989
990 spin_lock(&info->info_lock);
991 if (kcfg->cfg_arg1 > 0) {
992
993 size = kcfg->cfg_arg1;
994 buf = (char *)vmem_alloc(size, KM_SLEEP);
995 if (buf == NULL) {
996 rc = -ENOMEM;
997 goto out;
998 }
999
1000 /* Zero fill and truncate contents when coping buffer */
1001 min = ((size < info->info_size) ? size : info->info_size);
1002 memset(buf, 0, size);
1003 memcpy(buf, info->info_buffer, min);
1004 vmem_free(info->info_buffer, info->info_size);
1005 info->info_size = size;
1006 info->info_buffer = buf;
1007 info->info_head = info->info_buffer;
1008 }
1009
1010 kcfg->cfg_rc1 = info->info_size;
1011
1012 if (copy_to_user((struct zpios_cfg_t __user *)arg,
1013 kcfg, sizeof (*kcfg)))
1014 rc = -EFAULT;
1015 out:
1016 spin_unlock(&info->info_lock);
1017
1018 return (rc);
1019 }
1020
1021 static int
1022 zpios_ioctl_cfg(struct file *file, unsigned long arg)
1023 {
1024 zpios_cfg_t kcfg;
1025 int rc = 0;
1026
1027 if (copy_from_user(&kcfg, (zpios_cfg_t *)arg, sizeof (kcfg)))
1028 return (-EFAULT);
1029
1030 if (kcfg.cfg_magic != ZPIOS_CFG_MAGIC) {
1031 zpios_print(file, "Bad config magic 0x%x != 0x%x\n",
1032 kcfg.cfg_magic, ZPIOS_CFG_MAGIC);
1033 return (-EINVAL);
1034 }
1035
1036 switch (kcfg.cfg_cmd) {
1037 case ZPIOS_CFG_BUFFER_CLEAR:
1038 /*
1039 * cfg_arg1 - Unused
1040 * cfg_rc1 - Unused
1041 */
1042 rc = zpios_buffer_clear(file, &kcfg, arg);
1043 break;
1044 case ZPIOS_CFG_BUFFER_SIZE:
1045 /*
1046 * cfg_arg1 - 0 - query size; >0 resize
1047 * cfg_rc1 - Set to current buffer size
1048 */
1049 rc = zpios_buffer_size(file, &kcfg, arg);
1050 break;
1051 default:
1052 zpios_print(file, "Bad config command %d\n",
1053 kcfg.cfg_cmd);
1054 rc = -EINVAL;
1055 break;
1056 }
1057
1058 return (rc);
1059 }
1060
1061 static int
1062 zpios_ioctl_cmd(struct file *file, unsigned long arg)
1063 {
1064 zpios_cmd_t *kcmd;
1065 void *data = NULL;
1066 int rc = -EINVAL;
1067
1068 kcmd = kmem_alloc(sizeof (zpios_cmd_t), KM_SLEEP);
1069 if (kcmd == NULL) {
1070 zpios_print(file, "Unable to kmem_alloc() %ld byte for "
1071 "zpios_cmd_t\n", (long int)sizeof (zpios_cmd_t));
1072 return (-ENOMEM);
1073 }
1074
1075 rc = copy_from_user(kcmd, (zpios_cfg_t *)arg, sizeof (zpios_cmd_t));
1076 if (rc) {
1077 zpios_print(file, "Unable to copy command structure "
1078 "from user to kernel memory, %d\n", rc);
1079 goto out_cmd;
1080 }
1081
1082 if (kcmd->cmd_magic != ZPIOS_CMD_MAGIC) {
1083 zpios_print(file, "Bad command magic 0x%x != 0x%x\n",
1084 kcmd->cmd_magic, ZPIOS_CFG_MAGIC);
1085 rc = (-EINVAL);
1086 goto out_cmd;
1087 }
1088
1089 /* Allocate memory for any opaque data the caller needed to pass on */
1090 if (kcmd->cmd_data_size > 0) {
1091 data = (void *)vmem_alloc(kcmd->cmd_data_size, KM_SLEEP);
1092 if (data == NULL) {
1093 zpios_print(file, "Unable to vmem_alloc() %ld "
1094 "bytes for data buffer\n",
1095 (long)kcmd->cmd_data_size);
1096 rc = -ENOMEM;
1097 goto out_cmd;
1098 }
1099
1100 rc = copy_from_user(data, (void *)(arg + offsetof(zpios_cmd_t,
1101 cmd_data_str)), kcmd->cmd_data_size);
1102 if (rc) {
1103 zpios_print(file, "Unable to copy data buffer "
1104 "from user to kernel memory, %d\n", rc);
1105 goto out_data;
1106 }
1107 }
1108
1109 rc = zpios_do_one_run(file, kcmd, kcmd->cmd_data_size, data);
1110
1111 if (data != NULL) {
1112 /* If the test failed do not print out the stats */
1113 if (rc)
1114 goto out_data;
1115
1116 rc = copy_to_user((void *)(arg + offsetof(zpios_cmd_t,
1117 cmd_data_str)), data, kcmd->cmd_data_size);
1118 if (rc) {
1119 zpios_print(file, "Unable to copy data buffer "
1120 "from kernel to user memory, %d\n", rc);
1121 rc = -EFAULT;
1122 }
1123
1124 out_data:
1125 vmem_free(data, kcmd->cmd_data_size);
1126 }
1127 out_cmd:
1128 kmem_free(kcmd, sizeof (zpios_cmd_t));
1129
1130 return (rc);
1131 }
1132
1133 static long
1134 zpios_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1135 {
1136 int rc = 0;
1137
1138 /* Ignore tty ioctls */
1139 if ((cmd & 0xffffff00) == ((int)'T') << 8)
1140 return (-ENOTTY);
1141
1142 switch (cmd) {
1143 case ZPIOS_CFG:
1144 rc = zpios_ioctl_cfg(file, arg);
1145 break;
1146 case ZPIOS_CMD:
1147 rc = zpios_ioctl_cmd(file, arg);
1148 break;
1149 default:
1150 zpios_print(file, "Bad ioctl command %d\n", cmd);
1151 rc = -EINVAL;
1152 break;
1153 }
1154
1155 return (rc);
1156 }
1157
1158 #ifdef CONFIG_COMPAT
1159 /* Compatibility handler for ioctls from 32-bit ELF binaries */
1160 static long
1161 zpios_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1162 {
1163 return (zpios_unlocked_ioctl(file, cmd, arg));
1164 }
1165 #endif /* CONFIG_COMPAT */
1166
1167 /*
1168 * I'm not sure why you would want to write in to this buffer from
1169 * user space since its principle use is to pass test status info
1170 * back to the user space, but I don't see any reason to prevent it.
1171 */
1172 static ssize_t
1173 zpios_write(struct file *file, const char __user *buf,
1174 size_t count, loff_t *ppos)
1175 {
1176 zpios_info_t *info = (zpios_info_t *)file->private_data;
1177 int rc = 0;
1178
1179 ASSERT(info);
1180 ASSERT(info->info_buffer);
1181
1182 spin_lock(&info->info_lock);
1183
1184 /* Write beyond EOF */
1185 if (*ppos >= info->info_size) {
1186 rc = -EFBIG;
1187 goto out;
1188 }
1189
1190 /* Resize count if beyond EOF */
1191 if (*ppos + count > info->info_size)
1192 count = info->info_size - *ppos;
1193
1194 if (copy_from_user(info->info_buffer, buf, count)) {
1195 rc = -EFAULT;
1196 goto out;
1197 }
1198
1199 *ppos += count;
1200 rc = count;
1201 out:
1202 spin_unlock(&info->info_lock);
1203 return (rc);
1204 }
1205
1206 static ssize_t
1207 zpios_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
1208 {
1209 zpios_info_t *info = (zpios_info_t *)file->private_data;
1210 int rc = 0;
1211
1212 ASSERT(info);
1213 ASSERT(info->info_buffer);
1214
1215 spin_lock(&info->info_lock);
1216
1217 /* Read beyond EOF */
1218 if (*ppos >= info->info_size)
1219 goto out;
1220
1221 /* Resize count if beyond EOF */
1222 if (*ppos + count > info->info_size)
1223 count = info->info_size - *ppos;
1224
1225 if (copy_to_user(buf, info->info_buffer + *ppos, count)) {
1226 rc = -EFAULT;
1227 goto out;
1228 }
1229
1230 *ppos += count;
1231 rc = count;
1232 out:
1233 spin_unlock(&info->info_lock);
1234 return (rc);
1235 }
1236
1237 static loff_t zpios_seek(struct file *file, loff_t offset, int origin)
1238 {
1239 zpios_info_t *info = (zpios_info_t *)file->private_data;
1240 int rc = -EINVAL;
1241
1242 ASSERT(info);
1243 ASSERT(info->info_buffer);
1244
1245 spin_lock(&info->info_lock);
1246
1247 switch (origin) {
1248 case 0: /* SEEK_SET - No-op just do it */
1249 break;
1250 case 1: /* SEEK_CUR - Seek from current */
1251 offset = file->f_pos + offset;
1252 break;
1253 case 2: /* SEEK_END - Seek from end */
1254 offset = info->info_size + offset;
1255 break;
1256 }
1257
1258 if (offset >= 0) {
1259 file->f_pos = offset;
1260 file->f_version = 0;
1261 rc = offset;
1262 }
1263
1264 spin_unlock(&info->info_lock);
1265
1266 return (rc);
1267 }
1268
1269 static struct file_operations zpios_fops = {
1270 .owner = THIS_MODULE,
1271 .open = zpios_open,
1272 .release = zpios_release,
1273 .unlocked_ioctl = zpios_unlocked_ioctl,
1274 #ifdef CONFIG_COMPAT
1275 .compat_ioctl = zpios_compat_ioctl,
1276 #endif
1277 .read = zpios_read,
1278 .write = zpios_write,
1279 .llseek = zpios_seek,
1280 };
1281
1282 static struct miscdevice zpios_misc = {
1283 .minor = MISC_DYNAMIC_MINOR,
1284 .name = ZPIOS_NAME,
1285 .fops = &zpios_fops,
1286 };
1287
1288 #ifdef DEBUG
1289 #define ZFS_DEBUG_STR " (DEBUG mode)"
1290 #else
1291 #define ZFS_DEBUG_STR ""
1292 #endif
1293
1294 static int __init
1295 zpios_init(void)
1296 {
1297 int error;
1298
1299 error = misc_register(&zpios_misc);
1300 if (error) {
1301 printk(KERN_INFO "ZPIOS: misc_register() failed %d\n", error);
1302 } else {
1303 printk(KERN_INFO "ZPIOS: Loaded module v%s-%s%s\n",
1304 ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
1305 }
1306
1307 return (error);
1308 }
1309
1310 static void __exit
1311 zpios_fini(void)
1312 {
1313 misc_deregister(&zpios_misc);
1314
1315 printk(KERN_INFO "ZPIOS: Unloaded module v%s-%s%s\n",
1316 ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
1317 }
1318
1319 module_init(zpios_init);
1320 module_exit(zpios_fini);
1321
1322 MODULE_AUTHOR("LLNL / Sun");
1323 MODULE_DESCRIPTION("Kernel PIOS implementation");
1324 MODULE_LICENSE("GPL");
1325 MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);