]>
Commit | Line | Data |
---|---|---|
d1d7e268 | 1 | /* |
302ef151 BB |
2 | * ZPIOS is a heavily modified version of the original PIOS test code. |
3 | * It is designed to have the test code running in the Linux kernel | |
4 | * against ZFS while still being flexibly controled from user space. | |
5 | * | |
6 | * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC. | |
7 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | |
8 | * Written by Brian Behlendorf <behlendorf1@llnl.gov>. | |
9 | * LLNL-CODE-403049 | |
10 | * | |
11 | * Original PIOS Test Code | |
12 | * Copyright (C) 2004 Cluster File Systems, Inc. | |
13 | * Written by Peter Braam <braam@clusterfs.com> | |
14 | * Atul Vidwansa <atul@clusterfs.com> | |
15 | * Milind Dumbare <milind@clusterfs.com> | |
16 | * | |
17 | * This file is part of ZFS on Linux. | |
92db59ca | 18 | * For details, see <http://zfsonlinux.org/>. |
302ef151 BB |
19 | * |
20 | * ZPIOS is free software; you can redistribute it and/or modify it | |
21 | * under the terms of the GNU General Public License as published by the | |
22 | * Free Software Foundation; either version 2 of the License, or (at your | |
23 | * option) any later version. | |
24 | * | |
25 | * ZPIOS is distributed in the hope that it will be useful, but WITHOUT | |
26 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
27 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
28 | * for more details. | |
29 | * | |
30 | * You should have received a copy of the GNU General Public License along | |
31 | * with ZPIOS. If not, see <http://www.gnu.org/licenses/>. | |
d1d7e268 | 32 | */ |
302ef151 BB |
33 | |
34 | #include <sys/zfs_context.h> | |
35 | #include <sys/dmu.h> | |
36 | #include <sys/txg.h> | |
13fe0198 | 37 | #include <sys/dsl_destroy.h> |
302ef151 BB |
38 | #include <linux/cdev.h> |
39 | #include "zpios-internal.h" | |
40 | ||
41 | ||
42 | static spl_class *zpios_class; | |
43 | static spl_device *zpios_device; | |
44 | static char *zpios_tag = "zpios_tag"; | |
45 | ||
d1d7e268 MK |
46 | static int |
47 | zpios_upcall(char *path, char *phase, run_args_t *run_args, int rc) | |
302ef151 | 48 | { |
d1d7e268 MK |
49 | /* |
50 | * This is stack heavy but it should be OK since we are only | |
302ef151 BB |
51 | * making the upcall between tests when the stack is shallow. |
52 | */ | |
d1d7e268 | 53 | char id[16], chunk_size[16], region_size[16], thread_count[16]; |
302ef151 | 54 | char region_count[16], offset[16], region_noise[16], chunk_noise[16]; |
d1d7e268 MK |
55 | char thread_delay[16], flags[16], result[8]; |
56 | char *argv[16], *envp[4]; | |
302ef151 BB |
57 | |
58 | if ((path == NULL) || (strlen(path) == 0)) | |
d1d7e268 | 59 | return (-ENOENT); |
302ef151 BB |
60 | |
61 | snprintf(id, 15, "%d", run_args->id); | |
62 | snprintf(chunk_size, 15, "%lu", (long unsigned)run_args->chunk_size); | |
d1d7e268 | 63 | snprintf(region_size, 15, "%lu", (long unsigned) run_args->region_size); |
302ef151 BB |
64 | snprintf(thread_count, 15, "%u", run_args->thread_count); |
65 | snprintf(region_count, 15, "%u", run_args->region_count); | |
66 | snprintf(offset, 15, "%lu", (long unsigned)run_args->offset); | |
67 | snprintf(region_noise, 15, "%u", run_args->region_noise); | |
68 | snprintf(chunk_noise, 15, "%u", run_args->chunk_noise); | |
69 | snprintf(thread_delay, 15, "%u", run_args->thread_delay); | |
70 | snprintf(flags, 15, "0x%x", run_args->flags); | |
71 | snprintf(result, 7, "%d", rc); | |
72 | ||
73 | /* Passing 15 args to registered pre/post upcall */ | |
d1d7e268 | 74 | argv[0] = path; |
302ef151 BB |
75 | argv[1] = phase; |
76 | argv[2] = strlen(run_args->log) ? run_args->log : "<none>"; | |
77 | argv[3] = id; | |
78 | argv[4] = run_args->pool; | |
79 | argv[5] = chunk_size; | |
80 | argv[6] = region_size; | |
81 | argv[7] = thread_count; | |
82 | argv[8] = region_count; | |
83 | argv[9] = offset; | |
84 | argv[10] = region_noise; | |
85 | argv[11] = chunk_noise; | |
86 | argv[12] = thread_delay; | |
87 | argv[13] = flags; | |
88 | argv[14] = result; | |
89 | argv[15] = NULL; | |
90 | ||
91 | /* Passing environment for user space upcall */ | |
d1d7e268 MK |
92 | envp[0] = "HOME=/"; |
93 | envp[1] = "TERM=linux"; | |
94 | envp[2] = "PATH=/sbin:/usr/sbin:/bin:/usr/bin"; | |
95 | envp[3] = NULL; | |
302ef151 | 96 | |
d1d7e268 MK |
97 | return (call_usermodehelper(path, argv, envp, UMH_WAIT_PROC)); |
98 | } | |
99 | ||
100 | static int | |
101 | zpios_print(struct file *file, const char *format, ...) | |
102 | { | |
103 | zpios_info_t *info = (zpios_info_t *)file->private_data; | |
104 | va_list adx; | |
105 | int rc; | |
106 | ||
107 | ASSERT(info); | |
108 | ASSERT(info->info_buffer); | |
109 | ||
110 | va_start(adx, format); | |
111 | spin_lock(&info->info_lock); | |
112 | ||
113 | /* Don't allow the kernel to start a write in the red zone */ | |
114 | if ((int)(info->info_head - info->info_buffer) > | |
115 | (info->info_size - ZPIOS_INFO_BUFFER_REDZONE)) { | |
116 | rc = -EOVERFLOW; | |
117 | } else { | |
118 | rc = vsprintf(info->info_head, format, adx); | |
119 | if (rc >= 0) | |
120 | info->info_head += rc; | |
121 | } | |
122 | ||
123 | spin_unlock(&info->info_lock); | |
124 | va_end(adx); | |
125 | ||
126 | return (rc); | |
302ef151 BB |
127 | } |
128 | ||
129 | static uint64_t | |
130 | zpios_dmu_object_create(run_args_t *run_args, objset_t *os) | |
131 | { | |
132 | struct dmu_tx *tx; | |
d1d7e268 | 133 | uint64_t obj = 0ULL; |
302ef151 BB |
134 | int rc; |
135 | ||
136 | tx = dmu_tx_create(os); | |
137 | dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE); | |
138 | rc = dmu_tx_assign(tx, TXG_WAIT); | |
139 | if (rc) { | |
140 | zpios_print(run_args->file, | |
d1d7e268 | 141 | "dmu_tx_assign() failed: %d\n", rc); |
302ef151 | 142 | dmu_tx_abort(tx); |
d1d7e268 | 143 | return (obj); |
302ef151 BB |
144 | } |
145 | ||
d1d7e268 | 146 | obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, DMU_OT_NONE, 0, tx); |
302ef151 BB |
147 | rc = dmu_object_set_blocksize(os, obj, 128ULL << 10, 0, tx); |
148 | if (rc) { | |
149 | zpios_print(run_args->file, | |
150 | "dmu_object_set_blocksize() failed: %d\n", rc); | |
d1d7e268 MK |
151 | dmu_tx_abort(tx); |
152 | return (obj); | |
302ef151 BB |
153 | } |
154 | ||
155 | dmu_tx_commit(tx); | |
156 | ||
d1d7e268 | 157 | return (obj); |
302ef151 BB |
158 | } |
159 | ||
160 | static int | |
161 | zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj) | |
162 | { | |
163 | struct dmu_tx *tx; | |
164 | int rc; | |
165 | ||
166 | tx = dmu_tx_create(os); | |
d1d7e268 | 167 | dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); |
302ef151 BB |
168 | rc = dmu_tx_assign(tx, TXG_WAIT); |
169 | if (rc) { | |
170 | zpios_print(run_args->file, | |
171 | "dmu_tx_assign() failed: %d\n", rc); | |
172 | dmu_tx_abort(tx); | |
d1d7e268 | 173 | return (rc); |
302ef151 BB |
174 | } |
175 | ||
176 | rc = dmu_object_free(os, obj, tx); | |
177 | if (rc) { | |
178 | zpios_print(run_args->file, | |
179 | "dmu_object_free() failed: %d\n", rc); | |
d1d7e268 MK |
180 | dmu_tx_abort(tx); |
181 | return (rc); | |
302ef151 BB |
182 | } |
183 | ||
184 | dmu_tx_commit(tx); | |
185 | ||
d1d7e268 | 186 | return (0); |
302ef151 BB |
187 | } |
188 | ||
189 | static int | |
190 | zpios_dmu_setup(run_args_t *run_args) | |
191 | { | |
192 | zpios_time_t *t = &(run_args->stats.cr_time); | |
193 | objset_t *os; | |
194 | char name[32]; | |
195 | uint64_t obj = 0ULL; | |
196 | int i, rc = 0, rc2; | |
197 | ||
d1d7e268 | 198 | (void) zpios_upcall(run_args->pre, PHASE_PRE_CREATE, run_args, 0); |
302ef151 BB |
199 | t->start = zpios_timespec_now(); |
200 | ||
d1d7e268 | 201 | (void) snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id); |
302ef151 BB |
202 | rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL); |
203 | if (rc) { | |
204 | zpios_print(run_args->file, "Error dmu_objset_create(%s, ...) " | |
205 | "failed: %d\n", name, rc); | |
206 | goto out; | |
207 | } | |
208 | ||
d1d7e268 MK |
209 | rc = dmu_objset_own(name, DMU_OST_OTHER, 0, zpios_tag, &os); |
210 | if (rc) { | |
302ef151 BB |
211 | zpios_print(run_args->file, "Error dmu_objset_own(%s, ...) " |
212 | "failed: %d\n", name, rc); | |
213 | goto out_destroy; | |
d1d7e268 | 214 | } |
302ef151 BB |
215 | |
216 | if (!(run_args->flags & DMU_FPP)) { | |
217 | obj = zpios_dmu_object_create(run_args, os); | |
218 | if (obj == 0) { | |
219 | rc = -EBADF; | |
220 | zpios_print(run_args->file, "Error zpios_dmu_" | |
221 | "object_create() failed, %d\n", rc); | |
222 | goto out_destroy; | |
223 | } | |
224 | } | |
225 | ||
226 | for (i = 0; i < run_args->region_count; i++) { | |
227 | zpios_region_t *region; | |
228 | ||
229 | region = &run_args->regions[i]; | |
d1d7e268 | 230 | mutex_init(®ion->lock, NULL, MUTEX_DEFAULT, NULL); |
302ef151 BB |
231 | |
232 | if (run_args->flags & DMU_FPP) { | |
233 | /* File per process */ | |
234 | region->obj.os = os; | |
235 | region->obj.obj = zpios_dmu_object_create(run_args, os); | |
236 | ASSERT(region->obj.obj > 0); /* XXX - Handle this */ | |
237 | region->wr_offset = run_args->offset; | |
238 | region->rd_offset = run_args->offset; | |
239 | region->init_offset = run_args->offset; | |
240 | region->max_offset = run_args->offset + | |
d1d7e268 | 241 | run_args->region_size; |
302ef151 BB |
242 | } else { |
243 | /* Single shared file */ | |
244 | region->obj.os = os; | |
245 | region->obj.obj = obj; | |
246 | region->wr_offset = run_args->offset * i; | |
247 | region->rd_offset = run_args->offset * i; | |
248 | region->init_offset = run_args->offset * i; | |
249 | region->max_offset = run_args->offset * | |
d1d7e268 | 250 | i + run_args->region_size; |
302ef151 BB |
251 | } |
252 | } | |
253 | ||
254 | run_args->os = os; | |
255 | out_destroy: | |
256 | if (rc) { | |
13fe0198 | 257 | rc2 = dsl_destroy_head(name); |
302ef151 | 258 | if (rc2) |
13fe0198 | 259 | zpios_print(run_args->file, "Error dsl_destroy_head" |
302ef151 BB |
260 | "(%s, ...) failed: %d\n", name, rc2); |
261 | } | |
262 | out: | |
263 | t->stop = zpios_timespec_now(); | |
264 | t->delta = zpios_timespec_sub(t->stop, t->start); | |
d1d7e268 | 265 | (void) zpios_upcall(run_args->post, PHASE_POST_CREATE, run_args, rc); |
302ef151 | 266 | |
d1d7e268 | 267 | return (rc); |
302ef151 BB |
268 | } |
269 | ||
270 | static int | |
271 | zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file) | |
272 | { | |
273 | run_args_t *ra; | |
274 | int rc, size; | |
275 | ||
d1d7e268 | 276 | size = sizeof (*ra) + kcmd->cmd_region_count * sizeof (zpios_region_t); |
302ef151 BB |
277 | |
278 | ra = vmem_zalloc(size, KM_SLEEP); | |
279 | if (ra == NULL) { | |
280 | zpios_print(file, "Unable to vmem_zalloc() %d bytes " | |
281 | "for regions\n", size); | |
d1d7e268 | 282 | return (-ENOMEM); |
302ef151 BB |
283 | } |
284 | ||
285 | *run_args = ra; | |
286 | strncpy(ra->pool, kcmd->cmd_pool, ZPIOS_NAME_SIZE - 1); | |
287 | strncpy(ra->pre, kcmd->cmd_pre, ZPIOS_PATH_SIZE - 1); | |
288 | strncpy(ra->post, kcmd->cmd_post, ZPIOS_PATH_SIZE - 1); | |
289 | strncpy(ra->log, kcmd->cmd_log, ZPIOS_PATH_SIZE - 1); | |
d1d7e268 MK |
290 | ra->id = kcmd->cmd_id; |
291 | ra->chunk_size = kcmd->cmd_chunk_size; | |
292 | ra->thread_count = kcmd->cmd_thread_count; | |
293 | ra->region_count = kcmd->cmd_region_count; | |
294 | ra->region_size = kcmd->cmd_region_size; | |
295 | ra->offset = kcmd->cmd_offset; | |
296 | ra->region_noise = kcmd->cmd_region_noise; | |
297 | ra->chunk_noise = kcmd->cmd_chunk_noise; | |
298 | ra->thread_delay = kcmd->cmd_thread_delay; | |
299 | ra->flags = kcmd->cmd_flags; | |
300 | ra->stats.wr_data = 0; | |
301 | ra->stats.wr_chunks = 0; | |
302 | ra->stats.rd_data = 0; | |
303 | ra->stats.rd_chunks = 0; | |
304 | ra->region_next = 0; | |
305 | ra->file = file; | |
306 | mutex_init(&ra->lock_work, NULL, MUTEX_DEFAULT, NULL); | |
307 | mutex_init(&ra->lock_ctl, NULL, MUTEX_DEFAULT, NULL); | |
308 | ||
309 | (void) zpios_upcall(ra->pre, PHASE_PRE_RUN, ra, 0); | |
302ef151 BB |
310 | |
311 | rc = zpios_dmu_setup(ra); | |
312 | if (rc) { | |
d1d7e268 MK |
313 | mutex_destroy(&ra->lock_ctl); |
314 | mutex_destroy(&ra->lock_work); | |
302ef151 BB |
315 | vmem_free(ra, size); |
316 | *run_args = NULL; | |
317 | } | |
318 | ||
d1d7e268 | 319 | return (rc); |
302ef151 BB |
320 | } |
321 | ||
322 | static int | |
323 | zpios_get_work_item(run_args_t *run_args, dmu_obj_t *obj, __u64 *offset, | |
324 | __u32 *chunk_size, zpios_region_t **region, __u32 flags) | |
325 | { | |
326 | int i, j, count = 0; | |
327 | unsigned int random_int; | |
328 | ||
d1d7e268 | 329 | get_random_bytes(&random_int, sizeof (unsigned int)); |
302ef151 BB |
330 | |
331 | mutex_enter(&run_args->lock_work); | |
332 | i = run_args->region_next; | |
333 | ||
d1d7e268 MK |
334 | /* |
335 | * XXX: I don't much care for this chunk selection mechansim | |
302ef151 BB |
336 | * there's the potential to burn a lot of time here doing nothing |
337 | * useful while holding the global lock. This could give some | |
338 | * misleading performance results. I'll fix it latter. | |
339 | */ | |
340 | while (count < run_args->region_count) { | |
341 | __u64 *rw_offset; | |
342 | zpios_time_t *rw_time; | |
343 | ||
344 | j = i % run_args->region_count; | |
345 | *region = &(run_args->regions[j]); | |
346 | ||
347 | if (flags & DMU_WRITE) { | |
348 | rw_offset = &((*region)->wr_offset); | |
349 | rw_time = &((*region)->stats.wr_time); | |
350 | } else { | |
351 | rw_offset = &((*region)->rd_offset); | |
352 | rw_time = &((*region)->stats.rd_time); | |
353 | } | |
354 | ||
355 | /* test if region is fully written */ | |
356 | if (*rw_offset + *chunk_size > (*region)->max_offset) { | |
357 | i++; | |
358 | count++; | |
359 | ||
360 | if (unlikely(rw_time->stop.ts_sec == 0) && | |
361 | unlikely(rw_time->stop.ts_nsec == 0)) | |
362 | rw_time->stop = zpios_timespec_now(); | |
363 | ||
364 | continue; | |
365 | } | |
366 | ||
367 | *offset = *rw_offset; | |
368 | *obj = (*region)->obj; | |
369 | *rw_offset += *chunk_size; | |
370 | ||
371 | /* update ctl structure */ | |
372 | if (run_args->region_noise) { | |
d1d7e268 MK |
373 | get_random_bytes(&random_int, sizeof (unsigned int)); |
374 | run_args->region_next += | |
375 | random_int % run_args->region_noise; | |
302ef151 BB |
376 | } else { |
377 | run_args->region_next++; | |
378 | } | |
379 | ||
380 | mutex_exit(&run_args->lock_work); | |
d1d7e268 | 381 | return (1); |
302ef151 BB |
382 | } |
383 | ||
384 | /* nothing left to do */ | |
385 | mutex_exit(&run_args->lock_work); | |
386 | ||
d1d7e268 | 387 | return (0); |
302ef151 BB |
388 | } |
389 | ||
390 | static void | |
391 | zpios_remove_objset(run_args_t *run_args) | |
392 | { | |
393 | zpios_time_t *t = &(run_args->stats.rm_time); | |
394 | zpios_region_t *region; | |
395 | char name[32]; | |
396 | int rc = 0, i; | |
397 | ||
d1d7e268 | 398 | (void) zpios_upcall(run_args->pre, PHASE_PRE_REMOVE, run_args, 0); |
302ef151 BB |
399 | t->start = zpios_timespec_now(); |
400 | ||
d1d7e268 | 401 | (void) snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id); |
302ef151 BB |
402 | |
403 | if (run_args->flags & DMU_REMOVE) { | |
404 | if (run_args->flags & DMU_FPP) { | |
405 | for (i = 0; i < run_args->region_count; i++) { | |
406 | region = &run_args->regions[i]; | |
407 | rc = zpios_dmu_object_free(run_args, | |
d1d7e268 | 408 | region->obj.os, region->obj.obj); |
302ef151 | 409 | if (rc) |
d1d7e268 MK |
410 | zpios_print(run_args->file, |
411 | "Error removing object %d, %d\n", | |
412 | (int)region->obj.obj, rc); | |
302ef151 BB |
413 | } |
414 | } else { | |
415 | region = &run_args->regions[0]; | |
416 | rc = zpios_dmu_object_free(run_args, | |
d1d7e268 | 417 | region->obj.os, region->obj.obj); |
302ef151 | 418 | if (rc) |
d1d7e268 MK |
419 | zpios_print(run_args->file, |
420 | "Error removing object %d, %d\n", | |
421 | (int)region->obj.obj, rc); | |
302ef151 BB |
422 | } |
423 | } | |
424 | ||
425 | dmu_objset_disown(run_args->os, zpios_tag); | |
426 | ||
427 | if (run_args->flags & DMU_REMOVE) { | |
13fe0198 | 428 | rc = dsl_destroy_head(name); |
302ef151 | 429 | if (rc) |
13fe0198 | 430 | zpios_print(run_args->file, "Error dsl_destroy_head" |
d1d7e268 | 431 | "(%s, ...) failed: %d\n", name, rc); |
302ef151 BB |
432 | } |
433 | ||
434 | t->stop = zpios_timespec_now(); | |
435 | t->delta = zpios_timespec_sub(t->stop, t->start); | |
d1d7e268 | 436 | (void) zpios_upcall(run_args->post, PHASE_POST_REMOVE, run_args, rc); |
302ef151 BB |
437 | } |
438 | ||
439 | static void | |
440 | zpios_cleanup_run(run_args_t *run_args) | |
441 | { | |
442 | int i, size = 0; | |
443 | ||
444 | if (run_args == NULL) | |
445 | return; | |
446 | ||
447 | if (run_args->threads != NULL) { | |
448 | for (i = 0; i < run_args->thread_count; i++) { | |
449 | if (run_args->threads[i]) { | |
450 | mutex_destroy(&run_args->threads[i]->lock); | |
451 | kmem_free(run_args->threads[i], | |
d1d7e268 | 452 | sizeof (thread_data_t)); |
302ef151 BB |
453 | } |
454 | } | |
455 | ||
456 | kmem_free(run_args->threads, | |
d1d7e268 | 457 | sizeof (thread_data_t *) * run_args->thread_count); |
302ef151 BB |
458 | } |
459 | ||
460 | for (i = 0; i < run_args->region_count; i++) | |
461 | mutex_destroy(&run_args->regions[i].lock); | |
462 | ||
463 | mutex_destroy(&run_args->lock_work); | |
464 | mutex_destroy(&run_args->lock_ctl); | |
d1d7e268 | 465 | size = run_args->region_count * sizeof (zpios_region_t); |
302ef151 | 466 | |
d1d7e268 | 467 | vmem_free(run_args, sizeof (*run_args) + size); |
302ef151 BB |
468 | } |
469 | ||
470 | static int | |
471 | zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object, | |
472 | uint64_t offset, uint64_t size, const void *buf) | |
473 | { | |
474 | struct dmu_tx *tx; | |
475 | int rc, how = TXG_WAIT; | |
476 | // int flags = 0; | |
477 | ||
478 | if (run_args->flags & DMU_WRITE_NOWAIT) | |
479 | how = TXG_NOWAIT; | |
480 | ||
481 | while (1) { | |
482 | tx = dmu_tx_create(os); | |
483 | dmu_tx_hold_write(tx, object, offset, size); | |
484 | rc = dmu_tx_assign(tx, how); | |
485 | ||
486 | if (rc) { | |
487 | if (rc == ERESTART && how == TXG_NOWAIT) { | |
488 | dmu_tx_wait(tx); | |
489 | dmu_tx_abort(tx); | |
490 | continue; | |
491 | } | |
492 | zpios_print(run_args->file, | |
493 | "Error in dmu_tx_assign(), %d", rc); | |
494 | dmu_tx_abort(tx); | |
d1d7e268 | 495 | return (rc); |
302ef151 BB |
496 | } |
497 | break; | |
498 | } | |
499 | ||
500 | // if (run_args->flags & DMU_WRITE_ZC) | |
501 | // flags |= DMU_WRITE_ZEROCOPY; | |
502 | ||
503 | dmu_write(os, object, offset, size, buf, tx); | |
504 | dmu_tx_commit(tx); | |
505 | ||
d1d7e268 | 506 | return (0); |
302ef151 BB |
507 | } |
508 | ||
509 | static int | |
510 | zpios_dmu_read(run_args_t *run_args, objset_t *os, uint64_t object, | |
d1d7e268 | 511 | uint64_t offset, uint64_t size, void *buf) |
302ef151 BB |
512 | { |
513 | int flags = 0; | |
514 | ||
515 | // if (run_args->flags & DMU_READ_ZC) | |
516 | // flags |= DMU_READ_ZEROCOPY; | |
517 | ||
518 | if (run_args->flags & DMU_READ_NOPF) | |
519 | flags |= DMU_READ_NO_PREFETCH; | |
520 | ||
d1d7e268 | 521 | return (dmu_read(os, object, offset, size, buf, flags)); |
302ef151 BB |
522 | } |
523 | ||
524 | static int | |
525 | zpios_thread_main(void *data) | |
526 | { | |
527 | thread_data_t *thr = (thread_data_t *)data; | |
528 | run_args_t *run_args = thr->run_args; | |
529 | zpios_time_t t; | |
530 | dmu_obj_t obj; | |
531 | __u64 offset; | |
532 | __u32 chunk_size; | |
533 | zpios_region_t *region; | |
534 | char *buf; | |
535 | unsigned int random_int; | |
536 | int chunk_noise = run_args->chunk_noise; | |
537 | int chunk_noise_tmp = 0; | |
538 | int thread_delay = run_args->thread_delay; | |
539 | int thread_delay_tmp = 0; | |
540 | int i, rc = 0; | |
541 | ||
542 | if (chunk_noise) { | |
d1d7e268 | 543 | get_random_bytes(&random_int, sizeof (unsigned int)); |
302ef151 BB |
544 | chunk_noise_tmp = (random_int % (chunk_noise * 2))-chunk_noise; |
545 | } | |
546 | ||
d1d7e268 MK |
547 | /* |
548 | * It's OK to vmem_alloc() this memory because it will be copied | |
302ef151 BB |
549 | * in to the slab and pointers to the slab copy will be setup in |
550 | * the bio when the IO is submitted. This of course is not ideal | |
551 | * since we want a zero-copy IO path if possible. It would be nice | |
552 | * to have direct access to those slab entries. | |
553 | */ | |
554 | chunk_size = run_args->chunk_size + chunk_noise_tmp; | |
555 | buf = (char *)vmem_alloc(chunk_size, KM_SLEEP); | |
556 | ASSERT(buf); | |
557 | ||
558 | /* Trivial data verification pattern for now. */ | |
559 | if (run_args->flags & DMU_VERIFY) | |
560 | memset(buf, 'z', chunk_size); | |
561 | ||
562 | /* Write phase */ | |
563 | mutex_enter(&thr->lock); | |
564 | thr->stats.wr_time.start = zpios_timespec_now(); | |
565 | mutex_exit(&thr->lock); | |
566 | ||
567 | while (zpios_get_work_item(run_args, &obj, &offset, | |
d1d7e268 | 568 | &chunk_size, ®ion, DMU_WRITE)) { |
302ef151 | 569 | if (thread_delay) { |
d1d7e268 | 570 | get_random_bytes(&random_int, sizeof (unsigned int)); |
302ef151 BB |
571 | thread_delay_tmp = random_int % thread_delay; |
572 | set_current_state(TASK_UNINTERRUPTIBLE); | |
573 | schedule_timeout(thread_delay_tmp); /* In jiffies */ | |
574 | } | |
575 | ||
576 | t.start = zpios_timespec_now(); | |
577 | rc = zpios_dmu_write(run_args, obj.os, obj.obj, | |
d1d7e268 | 578 | offset, chunk_size, buf); |
302ef151 BB |
579 | t.stop = zpios_timespec_now(); |
580 | t.delta = zpios_timespec_sub(t.stop, t.start); | |
581 | ||
582 | if (rc) { | |
583 | zpios_print(run_args->file, "IO error while doing " | |
584 | "dmu_write(): %d\n", rc); | |
585 | break; | |
586 | } | |
587 | ||
588 | mutex_enter(&thr->lock); | |
589 | thr->stats.wr_data += chunk_size; | |
590 | thr->stats.wr_chunks++; | |
591 | thr->stats.wr_time.delta = zpios_timespec_add( | |
d1d7e268 | 592 | thr->stats.wr_time.delta, t.delta); |
302ef151 BB |
593 | mutex_exit(&thr->lock); |
594 | ||
595 | mutex_enter(®ion->lock); | |
596 | region->stats.wr_data += chunk_size; | |
597 | region->stats.wr_chunks++; | |
598 | region->stats.wr_time.delta = zpios_timespec_add( | |
d1d7e268 | 599 | region->stats.wr_time.delta, t.delta); |
302ef151 BB |
600 | |
601 | /* First time region was accessed */ | |
602 | if (region->init_offset == offset) | |
603 | region->stats.wr_time.start = t.start; | |
604 | ||
605 | mutex_exit(®ion->lock); | |
606 | } | |
607 | ||
608 | mutex_enter(&run_args->lock_ctl); | |
609 | run_args->threads_done++; | |
610 | mutex_exit(&run_args->lock_ctl); | |
611 | ||
612 | mutex_enter(&thr->lock); | |
613 | thr->rc = rc; | |
614 | thr->stats.wr_time.stop = zpios_timespec_now(); | |
615 | mutex_exit(&thr->lock); | |
616 | wake_up(&run_args->waitq); | |
617 | ||
618 | set_current_state(TASK_UNINTERRUPTIBLE); | |
619 | schedule(); | |
620 | ||
621 | /* Check if we should exit */ | |
622 | mutex_enter(&thr->lock); | |
623 | rc = thr->rc; | |
624 | mutex_exit(&thr->lock); | |
625 | if (rc) | |
626 | goto out; | |
627 | ||
628 | /* Read phase */ | |
629 | mutex_enter(&thr->lock); | |
630 | thr->stats.rd_time.start = zpios_timespec_now(); | |
631 | mutex_exit(&thr->lock); | |
632 | ||
633 | while (zpios_get_work_item(run_args, &obj, &offset, | |
d1d7e268 | 634 | &chunk_size, ®ion, DMU_READ)) { |
302ef151 | 635 | if (thread_delay) { |
d1d7e268 | 636 | get_random_bytes(&random_int, sizeof (unsigned int)); |
302ef151 BB |
637 | thread_delay_tmp = random_int % thread_delay; |
638 | set_current_state(TASK_UNINTERRUPTIBLE); | |
639 | schedule_timeout(thread_delay_tmp); /* In jiffies */ | |
640 | } | |
641 | ||
642 | if (run_args->flags & DMU_VERIFY) | |
643 | memset(buf, 0, chunk_size); | |
644 | ||
645 | t.start = zpios_timespec_now(); | |
646 | rc = zpios_dmu_read(run_args, obj.os, obj.obj, | |
647 | offset, chunk_size, buf); | |
648 | t.stop = zpios_timespec_now(); | |
649 | t.delta = zpios_timespec_sub(t.stop, t.start); | |
650 | ||
651 | if (rc) { | |
652 | zpios_print(run_args->file, "IO error while doing " | |
653 | "dmu_read(): %d\n", rc); | |
654 | break; | |
655 | } | |
656 | ||
657 | /* Trivial data verification, expensive! */ | |
658 | if (run_args->flags & DMU_VERIFY) { | |
659 | for (i = 0; i < chunk_size; i++) { | |
660 | if (buf[i] != 'z') { | |
661 | zpios_print(run_args->file, | |
d1d7e268 MK |
662 | "IO verify error: %d/%d/%d\n", |
663 | (int)obj.obj, (int)offset, | |
664 | (int)chunk_size); | |
302ef151 BB |
665 | break; |
666 | } | |
667 | } | |
668 | } | |
669 | ||
670 | mutex_enter(&thr->lock); | |
671 | thr->stats.rd_data += chunk_size; | |
672 | thr->stats.rd_chunks++; | |
673 | thr->stats.rd_time.delta = zpios_timespec_add( | |
d1d7e268 | 674 | thr->stats.rd_time.delta, t.delta); |
302ef151 BB |
675 | mutex_exit(&thr->lock); |
676 | ||
677 | mutex_enter(®ion->lock); | |
678 | region->stats.rd_data += chunk_size; | |
679 | region->stats.rd_chunks++; | |
680 | region->stats.rd_time.delta = zpios_timespec_add( | |
d1d7e268 | 681 | region->stats.rd_time.delta, t.delta); |
302ef151 BB |
682 | |
683 | /* First time region was accessed */ | |
684 | if (region->init_offset == offset) | |
685 | region->stats.rd_time.start = t.start; | |
686 | ||
687 | mutex_exit(®ion->lock); | |
688 | } | |
689 | ||
690 | mutex_enter(&run_args->lock_ctl); | |
691 | run_args->threads_done++; | |
692 | mutex_exit(&run_args->lock_ctl); | |
693 | ||
694 | mutex_enter(&thr->lock); | |
695 | thr->rc = rc; | |
696 | thr->stats.rd_time.stop = zpios_timespec_now(); | |
697 | mutex_exit(&thr->lock); | |
698 | wake_up(&run_args->waitq); | |
699 | ||
700 | out: | |
701 | vmem_free(buf, chunk_size); | |
702 | do_exit(0); | |
703 | ||
d1d7e268 | 704 | return (rc); /* Unreachable, due to do_exit() */ |
302ef151 BB |
705 | } |
706 | ||
707 | static int | |
708 | zpios_thread_done(run_args_t *run_args) | |
709 | { | |
710 | ASSERT(run_args->threads_done <= run_args->thread_count); | |
711 | return (run_args->threads_done == run_args->thread_count); | |
712 | } | |
713 | ||
714 | static int | |
715 | zpios_threads_run(run_args_t *run_args) | |
716 | { | |
717 | struct task_struct *tsk, **tsks; | |
718 | thread_data_t *thr = NULL; | |
719 | zpios_time_t *tt = &(run_args->stats.total_time); | |
720 | zpios_time_t *tw = &(run_args->stats.wr_time); | |
721 | zpios_time_t *tr = &(run_args->stats.rd_time); | |
722 | int i, rc = 0, tc = run_args->thread_count; | |
723 | ||
d1d7e268 | 724 | tsks = kmem_zalloc(sizeof (struct task_struct *) * tc, KM_SLEEP); |
302ef151 BB |
725 | if (tsks == NULL) { |
726 | rc = -ENOMEM; | |
727 | goto cleanup2; | |
728 | } | |
729 | ||
d1d7e268 | 730 | run_args->threads = kmem_zalloc(sizeof (thread_data_t *)*tc, KM_SLEEP); |
302ef151 BB |
731 | if (run_args->threads == NULL) { |
732 | rc = -ENOMEM; | |
733 | goto cleanup; | |
734 | } | |
735 | ||
736 | init_waitqueue_head(&run_args->waitq); | |
737 | run_args->threads_done = 0; | |
738 | ||
739 | /* Create all the needed threads which will sleep until awoken */ | |
740 | for (i = 0; i < tc; i++) { | |
d1d7e268 | 741 | thr = kmem_zalloc(sizeof (thread_data_t), KM_SLEEP); |
302ef151 BB |
742 | if (thr == NULL) { |
743 | rc = -ENOMEM; | |
744 | goto taskerr; | |
745 | } | |
746 | ||
747 | thr->thread_no = i; | |
748 | thr->run_args = run_args; | |
749 | thr->rc = 0; | |
750 | mutex_init(&thr->lock, NULL, MUTEX_DEFAULT, NULL); | |
751 | run_args->threads[i] = thr; | |
752 | ||
753 | tsk = kthread_create(zpios_thread_main, (void *)thr, | |
d1d7e268 | 754 | "%s/%d", "zpios_io", i); |
302ef151 BB |
755 | if (IS_ERR(tsk)) { |
756 | rc = -EINVAL; | |
757 | goto taskerr; | |
758 | } | |
759 | ||
760 | tsks[i] = tsk; | |
761 | } | |
762 | ||
763 | tt->start = zpios_timespec_now(); | |
764 | ||
765 | /* Wake up all threads for write phase */ | |
d1d7e268 | 766 | (void) zpios_upcall(run_args->pre, PHASE_PRE_WRITE, run_args, 0); |
302ef151 BB |
767 | for (i = 0; i < tc; i++) |
768 | wake_up_process(tsks[i]); | |
769 | ||
770 | /* Wait for write phase to complete */ | |
771 | tw->start = zpios_timespec_now(); | |
772 | wait_event(run_args->waitq, zpios_thread_done(run_args)); | |
773 | tw->stop = zpios_timespec_now(); | |
d1d7e268 | 774 | (void) zpios_upcall(run_args->post, PHASE_POST_WRITE, run_args, rc); |
302ef151 BB |
775 | |
776 | for (i = 0; i < tc; i++) { | |
777 | thr = run_args->threads[i]; | |
778 | ||
779 | mutex_enter(&thr->lock); | |
780 | ||
781 | if (!rc && thr->rc) | |
782 | rc = thr->rc; | |
783 | ||
784 | run_args->stats.wr_data += thr->stats.wr_data; | |
785 | run_args->stats.wr_chunks += thr->stats.wr_chunks; | |
786 | mutex_exit(&thr->lock); | |
787 | } | |
788 | ||
789 | if (rc) { | |
790 | /* Wake up all threads and tell them to exit */ | |
791 | for (i = 0; i < tc; i++) { | |
792 | mutex_enter(&thr->lock); | |
793 | thr->rc = rc; | |
794 | mutex_exit(&thr->lock); | |
795 | ||
796 | wake_up_process(tsks[i]); | |
797 | } | |
798 | goto out; | |
799 | } | |
800 | ||
801 | mutex_enter(&run_args->lock_ctl); | |
802 | ASSERT(run_args->threads_done == run_args->thread_count); | |
803 | run_args->threads_done = 0; | |
804 | mutex_exit(&run_args->lock_ctl); | |
805 | ||
806 | /* Wake up all threads for read phase */ | |
d1d7e268 MK |
807 | (void) zpios_upcall(run_args->pre, PHASE_PRE_READ, run_args, 0); |
808 | for (i = 0; i < tc; i++) | |
302ef151 BB |
809 | wake_up_process(tsks[i]); |
810 | ||
811 | /* Wait for read phase to complete */ | |
812 | tr->start = zpios_timespec_now(); | |
813 | wait_event(run_args->waitq, zpios_thread_done(run_args)); | |
814 | tr->stop = zpios_timespec_now(); | |
d1d7e268 | 815 | (void) zpios_upcall(run_args->post, PHASE_POST_READ, run_args, rc); |
302ef151 BB |
816 | |
817 | for (i = 0; i < tc; i++) { | |
818 | thr = run_args->threads[i]; | |
819 | ||
820 | mutex_enter(&thr->lock); | |
821 | ||
822 | if (!rc && thr->rc) | |
823 | rc = thr->rc; | |
824 | ||
825 | run_args->stats.rd_data += thr->stats.rd_data; | |
826 | run_args->stats.rd_chunks += thr->stats.rd_chunks; | |
827 | mutex_exit(&thr->lock); | |
828 | } | |
829 | out: | |
830 | tt->stop = zpios_timespec_now(); | |
831 | tt->delta = zpios_timespec_sub(tt->stop, tt->start); | |
832 | tw->delta = zpios_timespec_sub(tw->stop, tw->start); | |
833 | tr->delta = zpios_timespec_sub(tr->stop, tr->start); | |
834 | ||
835 | cleanup: | |
d1d7e268 | 836 | kmem_free(tsks, sizeof (struct task_struct *) * tc); |
302ef151 BB |
837 | cleanup2: |
838 | /* Returns first encountered thread error (if any) */ | |
d1d7e268 | 839 | return (rc); |
302ef151 BB |
840 | |
841 | taskerr: | |
842 | /* Destroy all threads that were created successfully */ | |
843 | for (i = 0; i < tc; i++) | |
844 | if (tsks[i] != NULL) | |
845 | (void) kthread_stop(tsks[i]); | |
846 | ||
847 | goto cleanup; | |
848 | } | |
849 | ||
850 | static int | |
851 | zpios_do_one_run(struct file *file, zpios_cmd_t *kcmd, | |
d1d7e268 | 852 | int data_size, void *data) |
302ef151 BB |
853 | { |
854 | run_args_t *run_args = { 0 }; | |
855 | zpios_stats_t *stats = (zpios_stats_t *)data; | |
856 | int i, n, m, size, rc; | |
857 | ||
858 | if ((!kcmd->cmd_chunk_size) || (!kcmd->cmd_region_size) || | |
859 | (!kcmd->cmd_thread_count) || (!kcmd->cmd_region_count)) { | |
860 | zpios_print(file, "Invalid chunk_size, region_size, " | |
d1d7e268 MK |
861 | "thread_count, or region_count, %d\n", -EINVAL); |
862 | return (-EINVAL); | |
302ef151 BB |
863 | } |
864 | ||
865 | if (!(kcmd->cmd_flags & DMU_WRITE) || | |
866 | !(kcmd->cmd_flags & DMU_READ)) { | |
867 | zpios_print(file, "Invalid flags, minimally DMU_WRITE " | |
d1d7e268 MK |
868 | "and DMU_READ must be set, %d\n", -EINVAL); |
869 | return (-EINVAL); | |
302ef151 BB |
870 | } |
871 | ||
872 | if ((kcmd->cmd_flags & (DMU_WRITE_ZC | DMU_READ_ZC)) && | |
873 | (kcmd->cmd_flags & DMU_VERIFY)) { | |
874 | zpios_print(file, "Invalid flags, DMU_*_ZC incompatible " | |
d1d7e268 MK |
875 | "with DMU_VERIFY, used for performance analysis " |
876 | "only, %d\n", -EINVAL); | |
877 | return (-EINVAL); | |
302ef151 BB |
878 | } |
879 | ||
d1d7e268 MK |
880 | /* |
881 | * Opaque data on return contains structs of the following form: | |
302ef151 BB |
882 | * |
883 | * zpios_stat_t stats[]; | |
884 | * stats[0] = run_args->stats; | |
885 | * stats[1-N] = threads[N]->stats; | |
886 | * stats[N+1-M] = regions[M]->stats; | |
887 | * | |
888 | * Where N is the number of threads, and M is the number of regions. | |
889 | */ | |
d1d7e268 MK |
890 | size = (sizeof (zpios_stats_t) + |
891 | (kcmd->cmd_thread_count * sizeof (zpios_stats_t)) + | |
892 | (kcmd->cmd_region_count * sizeof (zpios_stats_t))); | |
302ef151 BB |
893 | if (data_size < size) { |
894 | zpios_print(file, "Invalid size, command data buffer " | |
d1d7e268 MK |
895 | "size too small, (%d < %d)\n", data_size, size); |
896 | return (-ENOSPC); | |
302ef151 BB |
897 | } |
898 | ||
899 | rc = zpios_setup_run(&run_args, kcmd, file); | |
900 | if (rc) | |
d1d7e268 | 901 | return (rc); |
302ef151 | 902 | |
d1d7e268 | 903 | rc = zpios_threads_run(run_args); |
302ef151 BB |
904 | zpios_remove_objset(run_args); |
905 | if (rc) | |
906 | goto cleanup; | |
907 | ||
908 | if (stats) { | |
909 | n = 1; | |
910 | m = 1 + kcmd->cmd_thread_count; | |
911 | stats[0] = run_args->stats; | |
912 | ||
913 | for (i = 0; i < kcmd->cmd_thread_count; i++) | |
914 | stats[n+i] = run_args->threads[i]->stats; | |
915 | ||
916 | for (i = 0; i < kcmd->cmd_region_count; i++) | |
917 | stats[m+i] = run_args->regions[i].stats; | |
918 | } | |
919 | ||
920 | cleanup: | |
d1d7e268 | 921 | zpios_cleanup_run(run_args); |
302ef151 | 922 | |
d1d7e268 | 923 | (void) zpios_upcall(kcmd->cmd_post, PHASE_POST_RUN, run_args, 0); |
302ef151 | 924 | |
d1d7e268 | 925 | return (rc); |
302ef151 BB |
926 | } |
927 | ||
928 | static int | |
929 | zpios_open(struct inode *inode, struct file *file) | |
930 | { | |
931 | unsigned int minor = iminor(inode); | |
932 | zpios_info_t *info; | |
933 | ||
934 | if (minor >= ZPIOS_MINORS) | |
d1d7e268 | 935 | return (-ENXIO); |
302ef151 | 936 | |
d1d7e268 | 937 | info = (zpios_info_t *)kmem_alloc(sizeof (*info), KM_SLEEP); |
302ef151 | 938 | if (info == NULL) |
d1d7e268 | 939 | return (-ENOMEM); |
302ef151 BB |
940 | |
941 | spin_lock_init(&info->info_lock); | |
942 | info->info_size = ZPIOS_INFO_BUFFER_SIZE; | |
d1d7e268 MK |
943 | info->info_buffer = |
944 | (char *) vmem_alloc(ZPIOS_INFO_BUFFER_SIZE, KM_SLEEP); | |
302ef151 | 945 | if (info->info_buffer == NULL) { |
d1d7e268 MK |
946 | kmem_free(info, sizeof (*info)); |
947 | return (-ENOMEM); | |
302ef151 BB |
948 | } |
949 | ||
950 | info->info_head = info->info_buffer; | |
951 | file->private_data = (void *)info; | |
952 | ||
d1d7e268 | 953 | return (0); |
302ef151 BB |
954 | } |
955 | ||
956 | static int | |
957 | zpios_release(struct inode *inode, struct file *file) | |
958 | { | |
959 | unsigned int minor = iminor(inode); | |
960 | zpios_info_t *info = (zpios_info_t *)file->private_data; | |
961 | ||
962 | if (minor >= ZPIOS_MINORS) | |
d1d7e268 | 963 | return (-ENXIO); |
302ef151 BB |
964 | |
965 | ASSERT(info); | |
966 | ASSERT(info->info_buffer); | |
967 | ||
968 | vmem_free(info->info_buffer, ZPIOS_INFO_BUFFER_SIZE); | |
d1d7e268 | 969 | kmem_free(info, sizeof (*info)); |
302ef151 | 970 | |
d1d7e268 | 971 | return (0); |
302ef151 BB |
972 | } |
973 | ||
974 | static int | |
975 | zpios_buffer_clear(struct file *file, zpios_cfg_t *kcfg, unsigned long arg) | |
976 | { | |
977 | zpios_info_t *info = (zpios_info_t *)file->private_data; | |
978 | ||
979 | ASSERT(info); | |
980 | ASSERT(info->info_buffer); | |
981 | ||
982 | spin_lock(&info->info_lock); | |
983 | memset(info->info_buffer, 0, info->info_size); | |
984 | info->info_head = info->info_buffer; | |
985 | spin_unlock(&info->info_lock); | |
986 | ||
d1d7e268 | 987 | return (0); |
302ef151 BB |
988 | } |
989 | ||
990 | static int | |
991 | zpios_buffer_size(struct file *file, zpios_cfg_t *kcfg, unsigned long arg) | |
992 | { | |
993 | zpios_info_t *info = (zpios_info_t *)file->private_data; | |
994 | char *buf; | |
995 | int min, size, rc = 0; | |
996 | ||
997 | ASSERT(info); | |
998 | ASSERT(info->info_buffer); | |
999 | ||
1000 | spin_lock(&info->info_lock); | |
1001 | if (kcfg->cfg_arg1 > 0) { | |
1002 | ||
1003 | size = kcfg->cfg_arg1; | |
1004 | buf = (char *)vmem_alloc(size, KM_SLEEP); | |
1005 | if (buf == NULL) { | |
1006 | rc = -ENOMEM; | |
1007 | goto out; | |
1008 | } | |
1009 | ||
1010 | /* Zero fill and truncate contents when coping buffer */ | |
1011 | min = ((size < info->info_size) ? size : info->info_size); | |
1012 | memset(buf, 0, size); | |
1013 | memcpy(buf, info->info_buffer, min); | |
1014 | vmem_free(info->info_buffer, info->info_size); | |
1015 | info->info_size = size; | |
1016 | info->info_buffer = buf; | |
1017 | info->info_head = info->info_buffer; | |
1018 | } | |
1019 | ||
1020 | kcfg->cfg_rc1 = info->info_size; | |
1021 | ||
d1d7e268 MK |
1022 | if (copy_to_user((struct zpios_cfg_t __user *)arg, |
1023 | kcfg, sizeof (*kcfg))) | |
302ef151 BB |
1024 | rc = -EFAULT; |
1025 | out: | |
1026 | spin_unlock(&info->info_lock); | |
1027 | ||
d1d7e268 | 1028 | return (rc); |
302ef151 BB |
1029 | } |
1030 | ||
1031 | static int | |
1032 | zpios_ioctl_cfg(struct file *file, unsigned long arg) | |
1033 | { | |
1034 | zpios_cfg_t kcfg; | |
1035 | int rc = 0; | |
1036 | ||
d1d7e268 MK |
1037 | if (copy_from_user(&kcfg, (zpios_cfg_t *)arg, sizeof (kcfg))) |
1038 | return (-EFAULT); | |
302ef151 BB |
1039 | |
1040 | if (kcfg.cfg_magic != ZPIOS_CFG_MAGIC) { | |
1041 | zpios_print(file, "Bad config magic 0x%x != 0x%x\n", | |
d1d7e268 MK |
1042 | kcfg.cfg_magic, ZPIOS_CFG_MAGIC); |
1043 | return (-EINVAL); | |
302ef151 BB |
1044 | } |
1045 | ||
1046 | switch (kcfg.cfg_cmd) { | |
1047 | case ZPIOS_CFG_BUFFER_CLEAR: | |
d1d7e268 MK |
1048 | /* |
1049 | * cfg_arg1 - Unused | |
302ef151 BB |
1050 | * cfg_rc1 - Unused |
1051 | */ | |
1052 | rc = zpios_buffer_clear(file, &kcfg, arg); | |
1053 | break; | |
1054 | case ZPIOS_CFG_BUFFER_SIZE: | |
d1d7e268 MK |
1055 | /* |
1056 | * cfg_arg1 - 0 - query size; >0 resize | |
302ef151 BB |
1057 | * cfg_rc1 - Set to current buffer size |
1058 | */ | |
1059 | rc = zpios_buffer_size(file, &kcfg, arg); | |
1060 | break; | |
1061 | default: | |
1062 | zpios_print(file, "Bad config command %d\n", | |
1063 | kcfg.cfg_cmd); | |
1064 | rc = -EINVAL; | |
1065 | break; | |
1066 | } | |
1067 | ||
d1d7e268 | 1068 | return (rc); |
302ef151 BB |
1069 | } |
1070 | ||
1071 | static int | |
1072 | zpios_ioctl_cmd(struct file *file, unsigned long arg) | |
1073 | { | |
1074 | zpios_cmd_t *kcmd; | |
1075 | void *data = NULL; | |
1076 | int rc = -EINVAL; | |
1077 | ||
d1d7e268 | 1078 | kcmd = kmem_alloc(sizeof (zpios_cmd_t), KM_SLEEP); |
302ef151 BB |
1079 | if (kcmd == NULL) { |
1080 | zpios_print(file, "Unable to kmem_alloc() %ld byte for " | |
d1d7e268 MK |
1081 | "zpios_cmd_t\n", (long int)sizeof (zpios_cmd_t)); |
1082 | return (-ENOMEM); | |
302ef151 BB |
1083 | } |
1084 | ||
d1d7e268 | 1085 | rc = copy_from_user(kcmd, (zpios_cfg_t *)arg, sizeof (zpios_cmd_t)); |
302ef151 BB |
1086 | if (rc) { |
1087 | zpios_print(file, "Unable to copy command structure " | |
1088 | "from user to kernel memory, %d\n", rc); | |
1089 | goto out_cmd; | |
1090 | } | |
1091 | ||
1092 | if (kcmd->cmd_magic != ZPIOS_CMD_MAGIC) { | |
1093 | zpios_print(file, "Bad command magic 0x%x != 0x%x\n", | |
d1d7e268 MK |
1094 | kcmd->cmd_magic, ZPIOS_CFG_MAGIC); |
1095 | rc = (-EINVAL); | |
302ef151 BB |
1096 | goto out_cmd; |
1097 | } | |
1098 | ||
1099 | /* Allocate memory for any opaque data the caller needed to pass on */ | |
1100 | if (kcmd->cmd_data_size > 0) { | |
1101 | data = (void *)vmem_alloc(kcmd->cmd_data_size, KM_SLEEP); | |
1102 | if (data == NULL) { | |
1103 | zpios_print(file, "Unable to vmem_alloc() %ld " | |
1104 | "bytes for data buffer\n", | |
1105 | (long)kcmd->cmd_data_size); | |
1106 | rc = -ENOMEM; | |
1107 | goto out_cmd; | |
1108 | } | |
1109 | ||
1110 | rc = copy_from_user(data, (void *)(arg + offsetof(zpios_cmd_t, | |
d1d7e268 | 1111 | cmd_data_str)), kcmd->cmd_data_size); |
302ef151 BB |
1112 | if (rc) { |
1113 | zpios_print(file, "Unable to copy data buffer " | |
1114 | "from user to kernel memory, %d\n", rc); | |
1115 | goto out_data; | |
1116 | } | |
1117 | } | |
1118 | ||
1119 | rc = zpios_do_one_run(file, kcmd, kcmd->cmd_data_size, data); | |
1120 | ||
1121 | if (data != NULL) { | |
1122 | /* If the test failed do not print out the stats */ | |
1123 | if (rc) | |
1124 | goto out_data; | |
1125 | ||
1126 | rc = copy_to_user((void *)(arg + offsetof(zpios_cmd_t, | |
d1d7e268 | 1127 | cmd_data_str)), data, kcmd->cmd_data_size); |
302ef151 BB |
1128 | if (rc) { |
1129 | zpios_print(file, "Unable to copy data buffer " | |
1130 | "from kernel to user memory, %d\n", rc); | |
1131 | rc = -EFAULT; | |
1132 | } | |
1133 | ||
1134 | out_data: | |
1135 | vmem_free(data, kcmd->cmd_data_size); | |
1136 | } | |
1137 | out_cmd: | |
d1d7e268 | 1138 | kmem_free(kcmd, sizeof (zpios_cmd_t)); |
302ef151 | 1139 | |
d1d7e268 | 1140 | return (rc); |
302ef151 BB |
1141 | } |
1142 | ||
1f30b9d4 BB |
1143 | static long |
1144 | zpios_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |
302ef151 | 1145 | { |
d1d7e268 | 1146 | unsigned int minor = iminor(file->f_dentry->d_inode); |
302ef151 BB |
1147 | int rc = 0; |
1148 | ||
1149 | /* Ignore tty ioctls */ | |
1150 | if ((cmd & 0xffffff00) == ((int)'T') << 8) | |
d1d7e268 | 1151 | return (-ENOTTY); |
302ef151 BB |
1152 | |
1153 | if (minor >= ZPIOS_MINORS) | |
d1d7e268 | 1154 | return (-ENXIO); |
302ef151 BB |
1155 | |
1156 | switch (cmd) { | |
1157 | case ZPIOS_CFG: | |
1158 | rc = zpios_ioctl_cfg(file, arg); | |
1159 | break; | |
1160 | case ZPIOS_CMD: | |
1161 | rc = zpios_ioctl_cmd(file, arg); | |
1162 | break; | |
1163 | default: | |
1164 | zpios_print(file, "Bad ioctl command %d\n", cmd); | |
1165 | rc = -EINVAL; | |
1166 | break; | |
1167 | } | |
1168 | ||
d1d7e268 | 1169 | return (rc); |
302ef151 BB |
1170 | } |
1171 | ||
1172 | #ifdef CONFIG_COMPAT | |
1173 | /* Compatibility handler for ioctls from 32-bit ELF binaries */ | |
1174 | static long | |
1175 | zpios_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |
1176 | { | |
d1d7e268 | 1177 | return (zpios_unlocked_ioctl(file, cmd, arg)); |
302ef151 BB |
1178 | } |
1179 | #endif /* CONFIG_COMPAT */ | |
1180 | ||
d1d7e268 MK |
1181 | /* |
1182 | * I'm not sure why you would want to write in to this buffer from | |
302ef151 BB |
1183 | * user space since its principle use is to pass test status info |
1184 | * back to the user space, but I don't see any reason to prevent it. | |
1185 | */ | |
1186 | static ssize_t | |
1187 | zpios_write(struct file *file, const char __user *buf, | |
d1d7e268 | 1188 | size_t count, loff_t *ppos) |
302ef151 | 1189 | { |
d1d7e268 | 1190 | unsigned int minor = iminor(file->f_dentry->d_inode); |
302ef151 BB |
1191 | zpios_info_t *info = (zpios_info_t *)file->private_data; |
1192 | int rc = 0; | |
1193 | ||
1194 | if (minor >= ZPIOS_MINORS) | |
d1d7e268 | 1195 | return (-ENXIO); |
302ef151 BB |
1196 | |
1197 | ASSERT(info); | |
1198 | ASSERT(info->info_buffer); | |
1199 | ||
1200 | spin_lock(&info->info_lock); | |
1201 | ||
1202 | /* Write beyond EOF */ | |
1203 | if (*ppos >= info->info_size) { | |
1204 | rc = -EFBIG; | |
1205 | goto out; | |
1206 | } | |
1207 | ||
1208 | /* Resize count if beyond EOF */ | |
1209 | if (*ppos + count > info->info_size) | |
1210 | count = info->info_size - *ppos; | |
1211 | ||
1212 | if (copy_from_user(info->info_buffer, buf, count)) { | |
1213 | rc = -EFAULT; | |
1214 | goto out; | |
1215 | } | |
1216 | ||
1217 | *ppos += count; | |
1218 | rc = count; | |
1219 | out: | |
1220 | spin_unlock(&info->info_lock); | |
d1d7e268 | 1221 | return (rc); |
302ef151 BB |
1222 | } |
1223 | ||
1224 | static ssize_t | |
d1d7e268 | 1225 | zpios_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) |
302ef151 | 1226 | { |
d1d7e268 | 1227 | unsigned int minor = iminor(file->f_dentry->d_inode); |
302ef151 BB |
1228 | zpios_info_t *info = (zpios_info_t *)file->private_data; |
1229 | int rc = 0; | |
1230 | ||
1231 | if (minor >= ZPIOS_MINORS) | |
d1d7e268 | 1232 | return (-ENXIO); |
302ef151 BB |
1233 | |
1234 | ASSERT(info); | |
1235 | ASSERT(info->info_buffer); | |
1236 | ||
1237 | spin_lock(&info->info_lock); | |
1238 | ||
1239 | /* Read beyond EOF */ | |
1240 | if (*ppos >= info->info_size) | |
1241 | goto out; | |
1242 | ||
1243 | /* Resize count if beyond EOF */ | |
1244 | if (*ppos + count > info->info_size) | |
1245 | count = info->info_size - *ppos; | |
1246 | ||
1247 | if (copy_to_user(buf, info->info_buffer + *ppos, count)) { | |
1248 | rc = -EFAULT; | |
1249 | goto out; | |
1250 | } | |
1251 | ||
1252 | *ppos += count; | |
1253 | rc = count; | |
1254 | out: | |
1255 | spin_unlock(&info->info_lock); | |
d1d7e268 | 1256 | return (rc); |
302ef151 BB |
1257 | } |
1258 | ||
1259 | static loff_t zpios_seek(struct file *file, loff_t offset, int origin) | |
1260 | { | |
d1d7e268 | 1261 | unsigned int minor = iminor(file->f_dentry->d_inode); |
302ef151 BB |
1262 | zpios_info_t *info = (zpios_info_t *)file->private_data; |
1263 | int rc = -EINVAL; | |
1264 | ||
1265 | if (minor >= ZPIOS_MINORS) | |
d1d7e268 | 1266 | return (-ENXIO); |
302ef151 BB |
1267 | |
1268 | ASSERT(info); | |
1269 | ASSERT(info->info_buffer); | |
1270 | ||
1271 | spin_lock(&info->info_lock); | |
1272 | ||
1273 | switch (origin) { | |
1274 | case 0: /* SEEK_SET - No-op just do it */ | |
1275 | break; | |
1276 | case 1: /* SEEK_CUR - Seek from current */ | |
1277 | offset = file->f_pos + offset; | |
1278 | break; | |
1279 | case 2: /* SEEK_END - Seek from end */ | |
1280 | offset = info->info_size + offset; | |
1281 | break; | |
1282 | } | |
1283 | ||
1284 | if (offset >= 0) { | |
1285 | file->f_pos = offset; | |
1286 | file->f_version = 0; | |
1287 | rc = offset; | |
1288 | } | |
1289 | ||
1290 | spin_unlock(&info->info_lock); | |
1291 | ||
d1d7e268 | 1292 | return (rc); |
302ef151 BB |
1293 | } |
1294 | ||
1295 | static struct cdev zpios_cdev; | |
1296 | static struct file_operations zpios_fops = { | |
1297 | .owner = THIS_MODULE, | |
1298 | .open = zpios_open, | |
1299 | .release = zpios_release, | |
1f30b9d4 | 1300 | .unlocked_ioctl = zpios_unlocked_ioctl, |
302ef151 BB |
1301 | #ifdef CONFIG_COMPAT |
1302 | .compat_ioctl = zpios_compat_ioctl, | |
1303 | #endif | |
1304 | .read = zpios_read, | |
1305 | .write = zpios_write, | |
1306 | .llseek = zpios_seek, | |
1307 | }; | |
1308 | ||
1309 | static int | |
1310 | zpios_init(void) | |
1311 | { | |
1312 | dev_t dev; | |
1313 | int rc; | |
1314 | ||
1315 | dev = MKDEV(ZPIOS_MAJOR, 0); | |
1316 | if ((rc = register_chrdev_region(dev, ZPIOS_MINORS, ZPIOS_NAME))) | |
1317 | goto error; | |
1318 | ||
1319 | /* Support for registering a character driver */ | |
1320 | cdev_init(&zpios_cdev, &zpios_fops); | |
1321 | zpios_cdev.owner = THIS_MODULE; | |
1322 | kobject_set_name(&zpios_cdev.kobj, ZPIOS_NAME); | |
1323 | if ((rc = cdev_add(&zpios_cdev, dev, ZPIOS_MINORS))) { | |
1324 | printk(KERN_ERR "ZPIOS: Error adding cdev, %d\n", rc); | |
1325 | kobject_put(&zpios_cdev.kobj); | |
1326 | unregister_chrdev_region(dev, ZPIOS_MINORS); | |
1327 | goto error; | |
1328 | } | |
1329 | ||
1330 | /* Support for udev make driver info available in sysfs */ | |
1331 | zpios_class = spl_class_create(THIS_MODULE, ZPIOS_NAME); | |
1332 | if (IS_ERR(zpios_class)) { | |
1333 | rc = PTR_ERR(zpios_class); | |
1334 | printk(KERN_ERR "ZPIOS: Error creating zpios class, %d\n", rc); | |
1335 | cdev_del(&zpios_cdev); | |
1336 | unregister_chrdev_region(dev, ZPIOS_MINORS); | |
1337 | goto error; | |
1338 | } | |
1339 | ||
1340 | zpios_device = spl_device_create(zpios_class, NULL, | |
d1d7e268 MK |
1341 | dev, NULL, ZPIOS_NAME); |
1342 | ||
1343 | return (0); | |
302ef151 BB |
1344 | error: |
1345 | printk(KERN_ERR "ZPIOS: Error registering zpios device, %d\n", rc); | |
d1d7e268 | 1346 | return (rc); |
302ef151 BB |
1347 | } |
1348 | ||
1349 | static int | |
1350 | zpios_fini(void) | |
1351 | { | |
1352 | dev_t dev = MKDEV(ZPIOS_MAJOR, 0); | |
1353 | ||
1354 | spl_device_destroy(zpios_class, zpios_device, dev); | |
1355 | spl_class_destroy(zpios_class); | |
1356 | cdev_del(&zpios_cdev); | |
1357 | unregister_chrdev_region(dev, ZPIOS_MINORS); | |
1358 | ||
d1d7e268 | 1359 | return (0); |
302ef151 BB |
1360 | } |
1361 | ||
1362 | spl_module_init(zpios_init); | |
1363 | spl_module_exit(zpios_fini); | |
1364 | ||
1365 | MODULE_AUTHOR("LLNL / Sun"); | |
1366 | MODULE_DESCRIPTION("Kernel PIOS implementation"); | |
1367 | MODULE_LICENSE("GPL"); | |
99e349db | 1368 | MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE); |