]>
Commit | Line | Data |
---|---|---|
dd027328 RG |
1 | /* |
2 | * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. | |
3 | * | |
4 | * This program is free software: you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
d3c68d0b | 6 | * the Free Software Foundation, either version 2 of the License, or |
dd027328 RG |
7 | * (at your option) any later version. |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
16 | */ | |
17 | ||
18 | /* | |
19 | * Oracle Data Analytics Accelerator (DAX) | |
20 | * | |
21 | * DAX is a coprocessor which resides on the SPARC M7 (DAX1) and M8 | |
22 | * (DAX2) processor chips, and has direct access to the CPU's L3 | |
23 | * caches as well as physical memory. It can perform several | |
24 | * operations on data streams with various input and output formats. | |
25 | * The driver provides a transport mechanism only and has limited | |
26 | * knowledge of the various opcodes and data formats. A user space | |
27 | * library provides high level services and translates these into low | |
28 | * level commands which are then passed into the driver and | |
29 | * subsequently the hypervisor and the coprocessor. The library is | |
30 | * the recommended way for applications to use the coprocessor, and | |
31 | * the driver interface is not intended for general use. | |
32 | * | |
44348e8a | 33 | * See Documentation/sparc/oradax/oracle-dax.txt for more details. |
dd027328 RG |
34 | */ |
35 | ||
36 | #include <linux/uaccess.h> | |
37 | #include <linux/module.h> | |
38 | #include <linux/delay.h> | |
39 | #include <linux/cdev.h> | |
40 | #include <linux/slab.h> | |
41 | #include <linux/mm.h> | |
42 | ||
43 | #include <asm/hypervisor.h> | |
44 | #include <asm/mdesc.h> | |
45 | #include <asm/oradax.h> | |
46 | ||
47 | MODULE_LICENSE("GPL"); | |
48 | MODULE_DESCRIPTION("Driver for Oracle Data Analytics Accelerator"); | |
49 | ||
50 | #define DAX_DBG_FLG_BASIC 0x01 | |
51 | #define DAX_DBG_FLG_STAT 0x02 | |
52 | #define DAX_DBG_FLG_INFO 0x04 | |
53 | #define DAX_DBG_FLG_ALL 0xff | |
54 | ||
55 | #define dax_err(fmt, ...) pr_err("%s: " fmt "\n", __func__, ##__VA_ARGS__) | |
56 | #define dax_info(fmt, ...) pr_info("%s: " fmt "\n", __func__, ##__VA_ARGS__) | |
57 | ||
58 | #define dax_dbg(fmt, ...) do { \ | |
59 | if (dax_debug & DAX_DBG_FLG_BASIC)\ | |
60 | dax_info(fmt, ##__VA_ARGS__); \ | |
61 | } while (0) | |
62 | #define dax_stat_dbg(fmt, ...) do { \ | |
63 | if (dax_debug & DAX_DBG_FLG_STAT) \ | |
64 | dax_info(fmt, ##__VA_ARGS__); \ | |
65 | } while (0) | |
66 | #define dax_info_dbg(fmt, ...) do { \ | |
67 | if (dax_debug & DAX_DBG_FLG_INFO) \ | |
68 | dax_info(fmt, ##__VA_ARGS__); \ | |
69 | } while (0) | |
70 | ||
71 | #define DAX1_MINOR 1 | |
72 | #define DAX1_MAJOR 1 | |
73 | #define DAX2_MINOR 0 | |
74 | #define DAX2_MAJOR 2 | |
75 | ||
76 | #define DAX1_STR "ORCL,sun4v-dax" | |
77 | #define DAX2_STR "ORCL,sun4v-dax2" | |
78 | ||
79 | #define DAX_CA_ELEMS (DAX_MMAP_LEN / sizeof(struct dax_cca)) | |
80 | ||
81 | #define DAX_CCB_USEC 100 | |
82 | #define DAX_CCB_RETRIES 10000 | |
83 | ||
84 | /* stream types */ | |
85 | enum { | |
86 | OUT, | |
87 | PRI, | |
88 | SEC, | |
89 | TBL, | |
90 | NUM_STREAM_TYPES | |
91 | }; | |
92 | ||
93 | /* completion status */ | |
94 | #define CCA_STAT_NOT_COMPLETED 0 | |
95 | #define CCA_STAT_COMPLETED 1 | |
96 | #define CCA_STAT_FAILED 2 | |
97 | #define CCA_STAT_KILLED 3 | |
98 | #define CCA_STAT_NOT_RUN 4 | |
99 | #define CCA_STAT_PIPE_OUT 5 | |
100 | #define CCA_STAT_PIPE_SRC 6 | |
101 | #define CCA_STAT_PIPE_DST 7 | |
102 | ||
103 | /* completion err */ | |
104 | #define CCA_ERR_SUCCESS 0x0 /* no error */ | |
105 | #define CCA_ERR_OVERFLOW 0x1 /* buffer overflow */ | |
106 | #define CCA_ERR_DECODE 0x2 /* CCB decode error */ | |
107 | #define CCA_ERR_PAGE_OVERFLOW 0x3 /* page overflow */ | |
108 | #define CCA_ERR_KILLED 0x7 /* command was killed */ | |
109 | #define CCA_ERR_TIMEOUT 0x8 /* Timeout */ | |
110 | #define CCA_ERR_ADI 0x9 /* ADI error */ | |
111 | #define CCA_ERR_DATA_FMT 0xA /* data format error */ | |
112 | #define CCA_ERR_OTHER_NO_RETRY 0xE /* Other error, do not retry */ | |
113 | #define CCA_ERR_OTHER_RETRY 0xF /* Other error, retry */ | |
114 | #define CCA_ERR_PARTIAL_SYMBOL 0x80 /* QP partial symbol warning */ | |
115 | ||
116 | /* CCB address types */ | |
117 | #define DAX_ADDR_TYPE_NONE 0 | |
118 | #define DAX_ADDR_TYPE_VA_ALT 1 /* secondary context */ | |
119 | #define DAX_ADDR_TYPE_RA 2 /* real address */ | |
120 | #define DAX_ADDR_TYPE_VA 3 /* virtual address */ | |
121 | ||
122 | /* dax_header_t opcode */ | |
123 | #define DAX_OP_SYNC_NOP 0x0 | |
124 | #define DAX_OP_EXTRACT 0x1 | |
125 | #define DAX_OP_SCAN_VALUE 0x2 | |
126 | #define DAX_OP_SCAN_RANGE 0x3 | |
127 | #define DAX_OP_TRANSLATE 0x4 | |
128 | #define DAX_OP_SELECT 0x5 | |
129 | #define DAX_OP_INVERT 0x10 /* OR with translate, scan opcodes */ | |
130 | ||
131 | struct dax_header { | |
132 | u32 ccb_version:4; /* 31:28 CCB Version */ | |
133 | /* 27:24 Sync Flags */ | |
134 | u32 pipe:1; /* Pipeline */ | |
135 | u32 longccb:1; /* Longccb. Set for scan with lu2, lu3, lu4. */ | |
136 | u32 cond:1; /* Conditional */ | |
137 | u32 serial:1; /* Serial */ | |
138 | u32 opcode:8; /* 23:16 Opcode */ | |
139 | /* 15:0 Address Type. */ | |
140 | u32 reserved:3; /* 15:13 reserved */ | |
141 | u32 table_addr_type:2; /* 12:11 Huffman Table Address Type */ | |
142 | u32 out_addr_type:3; /* 10:8 Destination Address Type */ | |
143 | u32 sec_addr_type:3; /* 7:5 Secondary Source Address Type */ | |
144 | u32 pri_addr_type:3; /* 4:2 Primary Source Address Type */ | |
145 | u32 cca_addr_type:2; /* 1:0 Completion Address Type */ | |
146 | }; | |
147 | ||
148 | struct dax_control { | |
149 | u32 pri_fmt:4; /* 31:28 Primary Input Format */ | |
150 | u32 pri_elem_size:5; /* 27:23 Primary Input Element Size(less1) */ | |
151 | u32 pri_offset:3; /* 22:20 Primary Input Starting Offset */ | |
152 | u32 sec_encoding:1; /* 19 Secondary Input Encoding */ | |
153 | /* (must be 0 for Select) */ | |
154 | u32 sec_offset:3; /* 18:16 Secondary Input Starting Offset */ | |
155 | u32 sec_elem_size:2; /* 15:14 Secondary Input Element Size */ | |
156 | /* (must be 0 for Select) */ | |
157 | u32 out_fmt:2; /* 13:12 Output Format */ | |
158 | u32 out_elem_size:2; /* 11:10 Output Element Size */ | |
159 | u32 misc:10; /* 9:0 Opcode specific info */ | |
160 | }; | |
161 | ||
162 | struct dax_data_access { | |
163 | u64 flow_ctrl:2; /* 63:62 Flow Control Type */ | |
164 | u64 pipe_target:2; /* 61:60 Pipeline Target */ | |
165 | u64 out_buf_size:20; /* 59:40 Output Buffer Size */ | |
166 | /* (cachelines less 1) */ | |
167 | u64 unused1:8; /* 39:32 Reserved, Set to 0 */ | |
168 | u64 out_alloc:5; /* 31:27 Output Allocation */ | |
169 | u64 unused2:1; /* 26 Reserved */ | |
170 | u64 pri_len_fmt:2; /* 25:24 Input Length Format */ | |
171 | u64 pri_len:24; /* 23:0 Input Element/Byte/Bit Count */ | |
172 | /* (less 1) */ | |
173 | }; | |
174 | ||
175 | struct dax_ccb { | |
176 | struct dax_header hdr; /* CCB Header */ | |
177 | struct dax_control ctrl;/* Control Word */ | |
178 | void *ca; /* Completion Address */ | |
179 | void *pri; /* Primary Input Address */ | |
180 | struct dax_data_access dac; /* Data Access Control */ | |
181 | void *sec; /* Secondary Input Address */ | |
182 | u64 dword5; /* depends on opcode */ | |
183 | void *out; /* Output Address */ | |
184 | void *tbl; /* Table Address or bitmap */ | |
185 | }; | |
186 | ||
187 | struct dax_cca { | |
188 | u8 status; /* user may mwait on this address */ | |
189 | u8 err; /* user visible error notification */ | |
190 | u8 rsvd[2]; /* reserved */ | |
191 | u32 n_remaining; /* for QP partial symbol warning */ | |
192 | u32 output_sz; /* output in bytes */ | |
193 | u32 rsvd2; /* reserved */ | |
194 | u64 run_cycles; /* run time in OCND2 cycles */ | |
195 | u64 run_stats; /* nothing reported in version 1.0 */ | |
196 | u32 n_processed; /* number input elements */ | |
197 | u32 rsvd3[5]; /* reserved */ | |
198 | u64 retval; /* command return value */ | |
199 | u64 rsvd4[8]; /* reserved */ | |
200 | }; | |
201 | ||
202 | /* per thread CCB context */ | |
203 | struct dax_ctx { | |
204 | struct dax_ccb *ccb_buf; | |
205 | u64 ccb_buf_ra; /* cached RA of ccb_buf */ | |
206 | struct dax_cca *ca_buf; | |
207 | u64 ca_buf_ra; /* cached RA of ca_buf */ | |
208 | struct page *pages[DAX_CA_ELEMS][NUM_STREAM_TYPES]; | |
209 | /* array of locked pages */ | |
210 | struct task_struct *owner; /* thread that owns ctx */ | |
211 | struct task_struct *client; /* requesting thread */ | |
212 | union ccb_result result; | |
213 | u32 ccb_count; | |
214 | u32 fail_count; | |
215 | }; | |
216 | ||
217 | /* driver public entry points */ | |
218 | static int dax_open(struct inode *inode, struct file *file); | |
219 | static ssize_t dax_read(struct file *filp, char __user *buf, | |
220 | size_t count, loff_t *ppos); | |
221 | static ssize_t dax_write(struct file *filp, const char __user *buf, | |
222 | size_t count, loff_t *ppos); | |
223 | static int dax_devmap(struct file *f, struct vm_area_struct *vma); | |
224 | static int dax_close(struct inode *i, struct file *f); | |
225 | ||
226 | static const struct file_operations dax_fops = { | |
227 | .owner = THIS_MODULE, | |
228 | .open = dax_open, | |
229 | .read = dax_read, | |
230 | .write = dax_write, | |
231 | .mmap = dax_devmap, | |
232 | .release = dax_close, | |
233 | }; | |
234 | ||
235 | static int dax_ccb_exec(struct dax_ctx *ctx, const char __user *buf, | |
236 | size_t count, loff_t *ppos); | |
237 | static int dax_ccb_info(u64 ca, struct ccb_info_result *info); | |
238 | static int dax_ccb_kill(u64 ca, u16 *kill_res); | |
239 | ||
240 | static struct cdev c_dev; | |
241 | static struct class *cl; | |
242 | static dev_t first; | |
243 | ||
244 | static int max_ccb_version; | |
245 | static int dax_debug; | |
246 | module_param(dax_debug, int, 0644); | |
247 | MODULE_PARM_DESC(dax_debug, "Debug flags"); | |
248 | ||
249 | static int __init dax_attach(void) | |
250 | { | |
251 | unsigned long dummy, hv_rv, major, minor, minor_requested, max_ccbs; | |
252 | struct mdesc_handle *hp = mdesc_grab(); | |
253 | char *prop, *dax_name; | |
254 | bool found = false; | |
255 | int len, ret = 0; | |
256 | u64 pn; | |
257 | ||
258 | if (hp == NULL) { | |
259 | dax_err("Unable to grab mdesc"); | |
260 | return -ENODEV; | |
261 | } | |
262 | ||
263 | mdesc_for_each_node_by_name(hp, pn, "virtual-device") { | |
264 | prop = (char *)mdesc_get_property(hp, pn, "name", &len); | |
265 | if (prop == NULL) | |
266 | continue; | |
267 | if (strncmp(prop, "dax", strlen("dax"))) | |
268 | continue; | |
269 | dax_dbg("Found node 0x%llx = %s", pn, prop); | |
270 | ||
271 | prop = (char *)mdesc_get_property(hp, pn, "compatible", &len); | |
272 | if (prop == NULL) | |
273 | continue; | |
274 | dax_dbg("Found node 0x%llx = %s", pn, prop); | |
275 | found = true; | |
276 | break; | |
277 | } | |
278 | ||
279 | if (!found) { | |
280 | dax_err("No DAX device found"); | |
281 | ret = -ENODEV; | |
282 | goto done; | |
283 | } | |
284 | ||
285 | if (strncmp(prop, DAX2_STR, strlen(DAX2_STR)) == 0) { | |
286 | dax_name = DAX_NAME "2"; | |
287 | major = DAX2_MAJOR; | |
288 | minor_requested = DAX2_MINOR; | |
289 | max_ccb_version = 1; | |
290 | dax_dbg("MD indicates DAX2 coprocessor"); | |
291 | } else if (strncmp(prop, DAX1_STR, strlen(DAX1_STR)) == 0) { | |
292 | dax_name = DAX_NAME "1"; | |
293 | major = DAX1_MAJOR; | |
294 | minor_requested = DAX1_MINOR; | |
295 | max_ccb_version = 0; | |
296 | dax_dbg("MD indicates DAX1 coprocessor"); | |
297 | } else { | |
298 | dax_err("Unknown dax type: %s", prop); | |
299 | ret = -ENODEV; | |
300 | goto done; | |
301 | } | |
302 | ||
303 | minor = minor_requested; | |
304 | dax_dbg("Registering DAX HV api with major %ld minor %ld", major, | |
305 | minor); | |
306 | if (sun4v_hvapi_register(HV_GRP_DAX, major, &minor)) { | |
307 | dax_err("hvapi_register failed"); | |
308 | ret = -ENODEV; | |
309 | goto done; | |
310 | } else { | |
311 | dax_dbg("Max minor supported by HV = %ld (major %ld)", minor, | |
312 | major); | |
313 | minor = min(minor, minor_requested); | |
314 | dax_dbg("registered DAX major %ld minor %ld", major, minor); | |
315 | } | |
316 | ||
317 | /* submit a zero length ccb array to query coprocessor queue size */ | |
318 | hv_rv = sun4v_ccb_submit(0, 0, HV_CCB_QUERY_CMD, 0, &max_ccbs, &dummy); | |
319 | if (hv_rv != 0) { | |
320 | dax_err("get_hwqueue_size failed with status=%ld and max_ccbs=%ld", | |
321 | hv_rv, max_ccbs); | |
322 | ret = -ENODEV; | |
323 | goto done; | |
324 | } | |
325 | ||
326 | if (max_ccbs != DAX_MAX_CCBS) { | |
327 | dax_err("HV reports unsupported max_ccbs=%ld", max_ccbs); | |
328 | ret = -ENODEV; | |
329 | goto done; | |
330 | } | |
331 | ||
332 | if (alloc_chrdev_region(&first, 0, 1, DAX_NAME) < 0) { | |
333 | dax_err("alloc_chrdev_region failed"); | |
334 | ret = -ENXIO; | |
335 | goto done; | |
336 | } | |
337 | ||
338 | cl = class_create(THIS_MODULE, DAX_NAME); | |
2d85ec8a | 339 | if (IS_ERR(cl)) { |
dd027328 | 340 | dax_err("class_create failed"); |
2d85ec8a | 341 | ret = PTR_ERR(cl); |
dd027328 RG |
342 | goto class_error; |
343 | } | |
344 | ||
345 | if (device_create(cl, NULL, first, NULL, dax_name) == NULL) { | |
346 | dax_err("device_create failed"); | |
347 | ret = -ENXIO; | |
348 | goto device_error; | |
349 | } | |
350 | ||
351 | cdev_init(&c_dev, &dax_fops); | |
352 | if (cdev_add(&c_dev, first, 1) == -1) { | |
353 | dax_err("cdev_add failed"); | |
354 | ret = -ENXIO; | |
355 | goto cdev_error; | |
356 | } | |
357 | ||
358 | pr_info("Attached DAX module\n"); | |
359 | goto done; | |
360 | ||
361 | cdev_error: | |
362 | device_destroy(cl, first); | |
363 | device_error: | |
364 | class_destroy(cl); | |
365 | class_error: | |
366 | unregister_chrdev_region(first, 1); | |
367 | done: | |
368 | mdesc_release(hp); | |
369 | return ret; | |
370 | } | |
371 | module_init(dax_attach); | |
372 | ||
373 | static void __exit dax_detach(void) | |
374 | { | |
375 | pr_info("Cleaning up DAX module\n"); | |
376 | cdev_del(&c_dev); | |
377 | device_destroy(cl, first); | |
378 | class_destroy(cl); | |
379 | unregister_chrdev_region(first, 1); | |
380 | } | |
381 | module_exit(dax_detach); | |
382 | ||
383 | /* map completion area */ | |
384 | static int dax_devmap(struct file *f, struct vm_area_struct *vma) | |
385 | { | |
386 | struct dax_ctx *ctx = (struct dax_ctx *)f->private_data; | |
387 | size_t len = vma->vm_end - vma->vm_start; | |
388 | ||
389 | dax_dbg("len=0x%lx, flags=0x%lx", len, vma->vm_flags); | |
390 | ||
391 | if (ctx->owner != current) { | |
392 | dax_dbg("devmap called from wrong thread"); | |
393 | return -EINVAL; | |
394 | } | |
395 | ||
396 | if (len != DAX_MMAP_LEN) { | |
397 | dax_dbg("len(%lu) != DAX_MMAP_LEN(%d)", len, DAX_MMAP_LEN); | |
398 | return -EINVAL; | |
399 | } | |
400 | ||
401 | /* completion area is mapped read-only for user */ | |
402 | if (vma->vm_flags & VM_WRITE) | |
403 | return -EPERM; | |
404 | vma->vm_flags &= ~VM_MAYWRITE; | |
405 | ||
406 | if (remap_pfn_range(vma, vma->vm_start, ctx->ca_buf_ra >> PAGE_SHIFT, | |
407 | len, vma->vm_page_prot)) | |
408 | return -EAGAIN; | |
409 | ||
410 | dax_dbg("mmapped completion area at uva 0x%lx", vma->vm_start); | |
411 | return 0; | |
412 | } | |
413 | ||
414 | /* Unlock user pages. Called during dequeue or device close */ | |
415 | static void dax_unlock_pages(struct dax_ctx *ctx, int ccb_index, int nelem) | |
416 | { | |
417 | int i, j; | |
418 | ||
419 | for (i = ccb_index; i < ccb_index + nelem; i++) { | |
420 | for (j = 0; j < NUM_STREAM_TYPES; j++) { | |
421 | struct page *p = ctx->pages[i][j]; | |
422 | ||
423 | if (p) { | |
424 | dax_dbg("freeing page %p", p); | |
425 | if (j == OUT) | |
426 | set_page_dirty(p); | |
427 | put_page(p); | |
428 | ctx->pages[i][j] = NULL; | |
429 | } | |
430 | } | |
431 | } | |
432 | } | |
433 | ||
434 | static int dax_lock_page(void *va, struct page **p) | |
435 | { | |
436 | int ret; | |
437 | ||
438 | dax_dbg("uva %p", va); | |
439 | ||
440 | ret = get_user_pages_fast((unsigned long)va, 1, 1, p); | |
441 | if (ret == 1) { | |
442 | dax_dbg("locked page %p, for VA %p", *p, va); | |
443 | return 0; | |
444 | } | |
445 | ||
446 | dax_dbg("get_user_pages failed, va=%p, ret=%d", va, ret); | |
447 | return -1; | |
448 | } | |
449 | ||
450 | static int dax_lock_pages(struct dax_ctx *ctx, int idx, | |
451 | int nelem, u64 *err_va) | |
452 | { | |
453 | int i; | |
454 | ||
455 | for (i = 0; i < nelem; i++) { | |
456 | struct dax_ccb *ccbp = &ctx->ccb_buf[i]; | |
457 | ||
458 | /* | |
459 | * For each address in the CCB whose type is virtual, | |
460 | * lock the page and change the type to virtual alternate | |
461 | * context. On error, return the offending address in | |
462 | * err_va. | |
463 | */ | |
464 | if (ccbp->hdr.out_addr_type == DAX_ADDR_TYPE_VA) { | |
465 | dax_dbg("output"); | |
466 | if (dax_lock_page(ccbp->out, | |
467 | &ctx->pages[i + idx][OUT]) != 0) { | |
468 | *err_va = (u64)ccbp->out; | |
469 | goto error; | |
470 | } | |
471 | ccbp->hdr.out_addr_type = DAX_ADDR_TYPE_VA_ALT; | |
472 | } | |
473 | ||
474 | if (ccbp->hdr.pri_addr_type == DAX_ADDR_TYPE_VA) { | |
475 | dax_dbg("input"); | |
476 | if (dax_lock_page(ccbp->pri, | |
477 | &ctx->pages[i + idx][PRI]) != 0) { | |
478 | *err_va = (u64)ccbp->pri; | |
479 | goto error; | |
480 | } | |
481 | ccbp->hdr.pri_addr_type = DAX_ADDR_TYPE_VA_ALT; | |
482 | } | |
483 | ||
484 | if (ccbp->hdr.sec_addr_type == DAX_ADDR_TYPE_VA) { | |
485 | dax_dbg("sec input"); | |
486 | if (dax_lock_page(ccbp->sec, | |
487 | &ctx->pages[i + idx][SEC]) != 0) { | |
488 | *err_va = (u64)ccbp->sec; | |
489 | goto error; | |
490 | } | |
491 | ccbp->hdr.sec_addr_type = DAX_ADDR_TYPE_VA_ALT; | |
492 | } | |
493 | ||
494 | if (ccbp->hdr.table_addr_type == DAX_ADDR_TYPE_VA) { | |
495 | dax_dbg("tbl"); | |
496 | if (dax_lock_page(ccbp->tbl, | |
497 | &ctx->pages[i + idx][TBL]) != 0) { | |
498 | *err_va = (u64)ccbp->tbl; | |
499 | goto error; | |
500 | } | |
501 | ccbp->hdr.table_addr_type = DAX_ADDR_TYPE_VA_ALT; | |
502 | } | |
503 | ||
504 | /* skip over 2nd 64 bytes of long CCB */ | |
505 | if (ccbp->hdr.longccb) | |
506 | i++; | |
507 | } | |
508 | return DAX_SUBMIT_OK; | |
509 | ||
510 | error: | |
511 | dax_unlock_pages(ctx, idx, nelem); | |
512 | return DAX_SUBMIT_ERR_NOACCESS; | |
513 | } | |
514 | ||
515 | static void dax_ccb_wait(struct dax_ctx *ctx, int idx) | |
516 | { | |
517 | int ret, nretries; | |
518 | u16 kill_res; | |
519 | ||
520 | dax_dbg("idx=%d", idx); | |
521 | ||
522 | for (nretries = 0; nretries < DAX_CCB_RETRIES; nretries++) { | |
523 | if (ctx->ca_buf[idx].status == CCA_STAT_NOT_COMPLETED) | |
524 | udelay(DAX_CCB_USEC); | |
525 | else | |
526 | return; | |
527 | } | |
528 | dax_dbg("ctx (%p): CCB[%d] timed out, wait usec=%d, retries=%d. Killing ccb", | |
529 | (void *)ctx, idx, DAX_CCB_USEC, DAX_CCB_RETRIES); | |
530 | ||
531 | ret = dax_ccb_kill(ctx->ca_buf_ra + idx * sizeof(struct dax_cca), | |
532 | &kill_res); | |
533 | dax_dbg("Kill CCB[%d] %s", idx, ret ? "failed" : "succeeded"); | |
534 | } | |
535 | ||
536 | static int dax_close(struct inode *ino, struct file *f) | |
537 | { | |
538 | struct dax_ctx *ctx = (struct dax_ctx *)f->private_data; | |
539 | int i; | |
540 | ||
541 | f->private_data = NULL; | |
542 | ||
543 | for (i = 0; i < DAX_CA_ELEMS; i++) { | |
544 | if (ctx->ca_buf[i].status == CCA_STAT_NOT_COMPLETED) { | |
545 | dax_dbg("CCB[%d] not completed", i); | |
546 | dax_ccb_wait(ctx, i); | |
547 | } | |
548 | dax_unlock_pages(ctx, i, 1); | |
549 | } | |
550 | ||
551 | kfree(ctx->ccb_buf); | |
552 | kfree(ctx->ca_buf); | |
553 | dax_stat_dbg("CCBs: %d good, %d bad", ctx->ccb_count, ctx->fail_count); | |
554 | kfree(ctx); | |
555 | ||
556 | return 0; | |
557 | } | |
558 | ||
559 | static ssize_t dax_read(struct file *f, char __user *buf, | |
560 | size_t count, loff_t *ppos) | |
561 | { | |
562 | struct dax_ctx *ctx = f->private_data; | |
563 | ||
564 | if (ctx->client != current) | |
565 | return -EUSERS; | |
566 | ||
567 | ctx->client = NULL; | |
568 | ||
569 | if (count != sizeof(union ccb_result)) | |
570 | return -EINVAL; | |
571 | if (copy_to_user(buf, &ctx->result, sizeof(union ccb_result))) | |
572 | return -EFAULT; | |
573 | return count; | |
574 | } | |
575 | ||
576 | static ssize_t dax_write(struct file *f, const char __user *buf, | |
577 | size_t count, loff_t *ppos) | |
578 | { | |
579 | struct dax_ctx *ctx = f->private_data; | |
580 | struct dax_command hdr; | |
581 | unsigned long ca; | |
582 | int i, idx, ret; | |
583 | ||
584 | if (ctx->client != NULL) | |
585 | return -EINVAL; | |
586 | ||
587 | if (count == 0 || count > DAX_MAX_CCBS * sizeof(struct dax_ccb)) | |
588 | return -EINVAL; | |
589 | ||
590 | if (count % sizeof(struct dax_ccb) == 0) | |
591 | return dax_ccb_exec(ctx, buf, count, ppos); /* CCB EXEC */ | |
592 | ||
593 | if (count != sizeof(struct dax_command)) | |
594 | return -EINVAL; | |
595 | ||
596 | /* immediate command */ | |
597 | if (ctx->owner != current) | |
598 | return -EUSERS; | |
599 | ||
600 | if (copy_from_user(&hdr, buf, sizeof(hdr))) | |
601 | return -EFAULT; | |
602 | ||
603 | ca = ctx->ca_buf_ra + hdr.ca_offset; | |
604 | ||
605 | switch (hdr.command) { | |
606 | case CCB_KILL: | |
607 | if (hdr.ca_offset >= DAX_MMAP_LEN) { | |
608 | dax_dbg("invalid ca_offset (%d) >= ca_buflen (%d)", | |
609 | hdr.ca_offset, DAX_MMAP_LEN); | |
610 | return -EINVAL; | |
611 | } | |
612 | ||
613 | ret = dax_ccb_kill(ca, &ctx->result.kill.action); | |
614 | if (ret != 0) { | |
615 | dax_dbg("dax_ccb_kill failed (ret=%d)", ret); | |
616 | return ret; | |
617 | } | |
618 | ||
619 | dax_info_dbg("killed (ca_offset %d)", hdr.ca_offset); | |
620 | idx = hdr.ca_offset / sizeof(struct dax_cca); | |
621 | ctx->ca_buf[idx].status = CCA_STAT_KILLED; | |
622 | ctx->ca_buf[idx].err = CCA_ERR_KILLED; | |
623 | ctx->client = current; | |
624 | return count; | |
625 | ||
626 | case CCB_INFO: | |
627 | if (hdr.ca_offset >= DAX_MMAP_LEN) { | |
628 | dax_dbg("invalid ca_offset (%d) >= ca_buflen (%d)", | |
629 | hdr.ca_offset, DAX_MMAP_LEN); | |
630 | return -EINVAL; | |
631 | } | |
632 | ||
633 | ret = dax_ccb_info(ca, &ctx->result.info); | |
634 | if (ret != 0) { | |
635 | dax_dbg("dax_ccb_info failed (ret=%d)", ret); | |
636 | return ret; | |
637 | } | |
638 | ||
639 | dax_info_dbg("info succeeded on ca_offset %d", hdr.ca_offset); | |
640 | ctx->client = current; | |
641 | return count; | |
642 | ||
643 | case CCB_DEQUEUE: | |
644 | for (i = 0; i < DAX_CA_ELEMS; i++) { | |
645 | if (ctx->ca_buf[i].status != | |
646 | CCA_STAT_NOT_COMPLETED) | |
647 | dax_unlock_pages(ctx, i, 1); | |
648 | } | |
649 | return count; | |
650 | ||
651 | default: | |
652 | return -EINVAL; | |
653 | } | |
654 | } | |
655 | ||
656 | static int dax_open(struct inode *inode, struct file *f) | |
657 | { | |
658 | struct dax_ctx *ctx = NULL; | |
659 | int i; | |
660 | ||
661 | ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); | |
662 | if (ctx == NULL) | |
663 | goto done; | |
664 | ||
665 | ctx->ccb_buf = kcalloc(DAX_MAX_CCBS, sizeof(struct dax_ccb), | |
666 | GFP_KERNEL); | |
667 | if (ctx->ccb_buf == NULL) | |
668 | goto done; | |
669 | ||
670 | ctx->ccb_buf_ra = virt_to_phys(ctx->ccb_buf); | |
671 | dax_dbg("ctx->ccb_buf=0x%p, ccb_buf_ra=0x%llx", | |
672 | (void *)ctx->ccb_buf, ctx->ccb_buf_ra); | |
673 | ||
674 | /* allocate CCB completion area buffer */ | |
675 | ctx->ca_buf = kzalloc(DAX_MMAP_LEN, GFP_KERNEL); | |
676 | if (ctx->ca_buf == NULL) | |
677 | goto alloc_error; | |
678 | for (i = 0; i < DAX_CA_ELEMS; i++) | |
679 | ctx->ca_buf[i].status = CCA_STAT_COMPLETED; | |
680 | ||
681 | ctx->ca_buf_ra = virt_to_phys(ctx->ca_buf); | |
682 | dax_dbg("ctx=0x%p, ctx->ca_buf=0x%p, ca_buf_ra=0x%llx", | |
683 | (void *)ctx, (void *)ctx->ca_buf, ctx->ca_buf_ra); | |
684 | ||
685 | ctx->owner = current; | |
686 | f->private_data = ctx; | |
687 | return 0; | |
688 | ||
689 | alloc_error: | |
690 | kfree(ctx->ccb_buf); | |
691 | done: | |
692 | if (ctx != NULL) | |
693 | kfree(ctx); | |
694 | return -ENOMEM; | |
695 | } | |
696 | ||
697 | static char *dax_hv_errno(unsigned long hv_ret, int *ret) | |
698 | { | |
699 | switch (hv_ret) { | |
700 | case HV_EBADALIGN: | |
701 | *ret = -EFAULT; | |
702 | return "HV_EBADALIGN"; | |
703 | case HV_ENORADDR: | |
704 | *ret = -EFAULT; | |
705 | return "HV_ENORADDR"; | |
706 | case HV_EINVAL: | |
707 | *ret = -EINVAL; | |
708 | return "HV_EINVAL"; | |
709 | case HV_EWOULDBLOCK: | |
710 | *ret = -EAGAIN; | |
711 | return "HV_EWOULDBLOCK"; | |
712 | case HV_ENOACCESS: | |
713 | *ret = -EPERM; | |
714 | return "HV_ENOACCESS"; | |
715 | default: | |
716 | break; | |
717 | } | |
718 | ||
719 | *ret = -EIO; | |
720 | return "UNKNOWN"; | |
721 | } | |
722 | ||
723 | static int dax_ccb_kill(u64 ca, u16 *kill_res) | |
724 | { | |
725 | unsigned long hv_ret; | |
726 | int count, ret = 0; | |
727 | char *err_str; | |
728 | ||
729 | for (count = 0; count < DAX_CCB_RETRIES; count++) { | |
730 | dax_dbg("attempting kill on ca_ra 0x%llx", ca); | |
731 | hv_ret = sun4v_ccb_kill(ca, kill_res); | |
732 | ||
733 | if (hv_ret == HV_EOK) { | |
734 | dax_info_dbg("HV_EOK (ca_ra 0x%llx): %d", ca, | |
735 | *kill_res); | |
736 | } else { | |
737 | err_str = dax_hv_errno(hv_ret, &ret); | |
738 | dax_dbg("%s (ca_ra 0x%llx)", err_str, ca); | |
739 | } | |
740 | ||
741 | if (ret != -EAGAIN) | |
742 | return ret; | |
743 | dax_info_dbg("ccb_kill count = %d", count); | |
744 | udelay(DAX_CCB_USEC); | |
745 | } | |
746 | ||
747 | return -EAGAIN; | |
748 | } | |
749 | ||
750 | static int dax_ccb_info(u64 ca, struct ccb_info_result *info) | |
751 | { | |
752 | unsigned long hv_ret; | |
753 | char *err_str; | |
754 | int ret = 0; | |
755 | ||
756 | dax_dbg("attempting info on ca_ra 0x%llx", ca); | |
757 | hv_ret = sun4v_ccb_info(ca, info); | |
758 | ||
759 | if (hv_ret == HV_EOK) { | |
760 | dax_info_dbg("HV_EOK (ca_ra 0x%llx): %d", ca, info->state); | |
761 | if (info->state == DAX_CCB_ENQUEUED) { | |
762 | dax_info_dbg("dax_unit %d, queue_num %d, queue_pos %d", | |
763 | info->inst_num, info->q_num, info->q_pos); | |
764 | } | |
765 | } else { | |
766 | err_str = dax_hv_errno(hv_ret, &ret); | |
767 | dax_dbg("%s (ca_ra 0x%llx)", err_str, ca); | |
768 | } | |
769 | ||
770 | return ret; | |
771 | } | |
772 | ||
773 | static void dax_prt_ccbs(struct dax_ccb *ccb, int nelem) | |
774 | { | |
775 | int i, j; | |
776 | u64 *ccbp; | |
777 | ||
778 | dax_dbg("ccb buffer:"); | |
779 | for (i = 0; i < nelem; i++) { | |
780 | ccbp = (u64 *)&ccb[i]; | |
781 | dax_dbg(" %sccb[%d]", ccb[i].hdr.longccb ? "long " : "", i); | |
782 | for (j = 0; j < 8; j++) | |
783 | dax_dbg("\tccb[%d].dwords[%d]=0x%llx", | |
784 | i, j, *(ccbp + j)); | |
785 | } | |
786 | } | |
787 | ||
788 | /* | |
789 | * Validates user CCB content. Also sets completion address and address types | |
790 | * for all addresses contained in CCB. | |
791 | */ | |
792 | static int dax_preprocess_usr_ccbs(struct dax_ctx *ctx, int idx, int nelem) | |
793 | { | |
794 | int i; | |
795 | ||
796 | /* | |
797 | * The user is not allowed to specify real address types in | |
798 | * the CCB header. This must be enforced by the kernel before | |
799 | * submitting the CCBs to HV. The only allowed values for all | |
800 | * address fields are VA or IMM | |
801 | */ | |
802 | for (i = 0; i < nelem; i++) { | |
803 | struct dax_ccb *ccbp = &ctx->ccb_buf[i]; | |
804 | unsigned long ca_offset; | |
805 | ||
806 | if (ccbp->hdr.ccb_version > max_ccb_version) | |
807 | return DAX_SUBMIT_ERR_CCB_INVAL; | |
808 | ||
809 | switch (ccbp->hdr.opcode) { | |
810 | case DAX_OP_SYNC_NOP: | |
811 | case DAX_OP_EXTRACT: | |
812 | case DAX_OP_SCAN_VALUE: | |
813 | case DAX_OP_SCAN_RANGE: | |
814 | case DAX_OP_TRANSLATE: | |
815 | case DAX_OP_SCAN_VALUE | DAX_OP_INVERT: | |
816 | case DAX_OP_SCAN_RANGE | DAX_OP_INVERT: | |
817 | case DAX_OP_TRANSLATE | DAX_OP_INVERT: | |
818 | case DAX_OP_SELECT: | |
819 | break; | |
820 | default: | |
821 | return DAX_SUBMIT_ERR_CCB_INVAL; | |
822 | } | |
823 | ||
824 | if (ccbp->hdr.out_addr_type != DAX_ADDR_TYPE_VA && | |
825 | ccbp->hdr.out_addr_type != DAX_ADDR_TYPE_NONE) { | |
826 | dax_dbg("invalid out_addr_type in user CCB[%d]", i); | |
827 | return DAX_SUBMIT_ERR_CCB_INVAL; | |
828 | } | |
829 | ||
830 | if (ccbp->hdr.pri_addr_type != DAX_ADDR_TYPE_VA && | |
831 | ccbp->hdr.pri_addr_type != DAX_ADDR_TYPE_NONE) { | |
832 | dax_dbg("invalid pri_addr_type in user CCB[%d]", i); | |
833 | return DAX_SUBMIT_ERR_CCB_INVAL; | |
834 | } | |
835 | ||
836 | if (ccbp->hdr.sec_addr_type != DAX_ADDR_TYPE_VA && | |
837 | ccbp->hdr.sec_addr_type != DAX_ADDR_TYPE_NONE) { | |
838 | dax_dbg("invalid sec_addr_type in user CCB[%d]", i); | |
839 | return DAX_SUBMIT_ERR_CCB_INVAL; | |
840 | } | |
841 | ||
842 | if (ccbp->hdr.table_addr_type != DAX_ADDR_TYPE_VA && | |
843 | ccbp->hdr.table_addr_type != DAX_ADDR_TYPE_NONE) { | |
844 | dax_dbg("invalid table_addr_type in user CCB[%d]", i); | |
845 | return DAX_SUBMIT_ERR_CCB_INVAL; | |
846 | } | |
847 | ||
848 | /* set completion (real) address and address type */ | |
849 | ccbp->hdr.cca_addr_type = DAX_ADDR_TYPE_RA; | |
850 | ca_offset = (idx + i) * sizeof(struct dax_cca); | |
851 | ccbp->ca = (void *)ctx->ca_buf_ra + ca_offset; | |
852 | memset(&ctx->ca_buf[idx + i], 0, sizeof(struct dax_cca)); | |
853 | ||
854 | dax_dbg("ccb[%d]=%p, ca_offset=0x%lx, compl RA=0x%llx", | |
855 | i, ccbp, ca_offset, ctx->ca_buf_ra + ca_offset); | |
856 | ||
857 | /* skip over 2nd 64 bytes of long CCB */ | |
858 | if (ccbp->hdr.longccb) | |
859 | i++; | |
860 | } | |
861 | ||
862 | return DAX_SUBMIT_OK; | |
863 | } | |
864 | ||
865 | static int dax_ccb_exec(struct dax_ctx *ctx, const char __user *buf, | |
866 | size_t count, loff_t *ppos) | |
867 | { | |
868 | unsigned long accepted_len, hv_rv; | |
869 | int i, idx, nccbs, naccepted; | |
870 | ||
871 | ctx->client = current; | |
872 | idx = *ppos; | |
873 | nccbs = count / sizeof(struct dax_ccb); | |
874 | ||
875 | if (ctx->owner != current) { | |
876 | dax_dbg("wrong thread"); | |
877 | ctx->result.exec.status = DAX_SUBMIT_ERR_THR_INIT; | |
878 | return 0; | |
879 | } | |
880 | dax_dbg("args: ccb_buf_len=%ld, idx=%d", count, idx); | |
881 | ||
882 | /* for given index and length, verify ca_buf range exists */ | |
49d7006d | 883 | if (idx < 0 || idx > (DAX_CA_ELEMS - nccbs)) { |
dd027328 RG |
884 | ctx->result.exec.status = DAX_SUBMIT_ERR_NO_CA_AVAIL; |
885 | return 0; | |
886 | } | |
887 | ||
888 | /* | |
889 | * Copy CCBs into kernel buffer to prevent modification by the | |
890 | * user in between validation and submission. | |
891 | */ | |
892 | if (copy_from_user(ctx->ccb_buf, buf, count)) { | |
893 | dax_dbg("copyin of user CCB buffer failed"); | |
894 | ctx->result.exec.status = DAX_SUBMIT_ERR_CCB_ARR_MMU_MISS; | |
895 | return 0; | |
896 | } | |
897 | ||
898 | /* check to see if ca_buf[idx] .. ca_buf[idx + nccbs] are available */ | |
899 | for (i = idx; i < idx + nccbs; i++) { | |
900 | if (ctx->ca_buf[i].status == CCA_STAT_NOT_COMPLETED) { | |
901 | dax_dbg("CA range not available, dequeue needed"); | |
902 | ctx->result.exec.status = DAX_SUBMIT_ERR_NO_CA_AVAIL; | |
903 | return 0; | |
904 | } | |
905 | } | |
906 | dax_unlock_pages(ctx, idx, nccbs); | |
907 | ||
908 | ctx->result.exec.status = dax_preprocess_usr_ccbs(ctx, idx, nccbs); | |
909 | if (ctx->result.exec.status != DAX_SUBMIT_OK) | |
910 | return 0; | |
911 | ||
912 | ctx->result.exec.status = dax_lock_pages(ctx, idx, nccbs, | |
913 | &ctx->result.exec.status_data); | |
914 | if (ctx->result.exec.status != DAX_SUBMIT_OK) | |
915 | return 0; | |
916 | ||
917 | if (dax_debug & DAX_DBG_FLG_BASIC) | |
918 | dax_prt_ccbs(ctx->ccb_buf, nccbs); | |
919 | ||
920 | hv_rv = sun4v_ccb_submit(ctx->ccb_buf_ra, count, | |
921 | HV_CCB_QUERY_CMD | HV_CCB_VA_SECONDARY, 0, | |
922 | &accepted_len, &ctx->result.exec.status_data); | |
923 | ||
924 | switch (hv_rv) { | |
925 | case HV_EOK: | |
926 | /* | |
927 | * Hcall succeeded with no errors but the accepted | |
928 | * length may be less than the requested length. The | |
929 | * only way the driver can resubmit the remainder is | |
930 | * to wait for completion of the submitted CCBs since | |
931 | * there is no way to guarantee the ordering semantics | |
932 | * required by the client applications. Therefore we | |
933 | * let the user library deal with resubmissions. | |
934 | */ | |
935 | ctx->result.exec.status = DAX_SUBMIT_OK; | |
936 | break; | |
937 | case HV_EWOULDBLOCK: | |
938 | /* | |
939 | * This is a transient HV API error. The user library | |
940 | * can retry. | |
941 | */ | |
942 | dax_dbg("hcall returned HV_EWOULDBLOCK"); | |
943 | ctx->result.exec.status = DAX_SUBMIT_ERR_WOULDBLOCK; | |
944 | break; | |
945 | case HV_ENOMAP: | |
946 | /* | |
947 | * HV was unable to translate a VA. The VA it could | |
948 | * not translate is returned in the status_data param. | |
949 | */ | |
950 | dax_dbg("hcall returned HV_ENOMAP"); | |
951 | ctx->result.exec.status = DAX_SUBMIT_ERR_NOMAP; | |
952 | break; | |
953 | case HV_EINVAL: | |
954 | /* | |
955 | * This is the result of an invalid user CCB as HV is | |
956 | * validating some of the user CCB fields. Pass this | |
957 | * error back to the user. There is no supporting info | |
958 | * to isolate the invalid field. | |
959 | */ | |
960 | dax_dbg("hcall returned HV_EINVAL"); | |
961 | ctx->result.exec.status = DAX_SUBMIT_ERR_CCB_INVAL; | |
962 | break; | |
963 | case HV_ENOACCESS: | |
964 | /* | |
965 | * HV found a VA that did not have the appropriate | |
966 | * permissions (such as the w bit). The VA in question | |
967 | * is returned in status_data param. | |
968 | */ | |
969 | dax_dbg("hcall returned HV_ENOACCESS"); | |
970 | ctx->result.exec.status = DAX_SUBMIT_ERR_NOACCESS; | |
971 | break; | |
972 | case HV_EUNAVAILABLE: | |
973 | /* | |
974 | * The requested CCB operation could not be performed | |
975 | * at this time. Return the specific unavailable code | |
976 | * in the status_data field. | |
977 | */ | |
978 | dax_dbg("hcall returned HV_EUNAVAILABLE"); | |
979 | ctx->result.exec.status = DAX_SUBMIT_ERR_UNAVAIL; | |
980 | break; | |
981 | default: | |
982 | ctx->result.exec.status = DAX_SUBMIT_ERR_INTERNAL; | |
983 | dax_dbg("unknown hcall return value (%ld)", hv_rv); | |
984 | break; | |
985 | } | |
986 | ||
987 | /* unlock pages associated with the unaccepted CCBs */ | |
988 | naccepted = accepted_len / sizeof(struct dax_ccb); | |
989 | dax_unlock_pages(ctx, idx + naccepted, nccbs - naccepted); | |
990 | ||
991 | /* mark unaccepted CCBs as not completed */ | |
992 | for (i = idx + naccepted; i < idx + nccbs; i++) | |
993 | ctx->ca_buf[i].status = CCA_STAT_COMPLETED; | |
994 | ||
995 | ctx->ccb_count += naccepted; | |
996 | ctx->fail_count += nccbs - naccepted; | |
997 | ||
998 | dax_dbg("hcall rv=%ld, accepted_len=%ld, status_data=0x%llx, ret status=%d", | |
999 | hv_rv, accepted_len, ctx->result.exec.status_data, | |
1000 | ctx->result.exec.status); | |
1001 | ||
1002 | if (count == accepted_len) | |
1003 | ctx->client = NULL; /* no read needed to complete protocol */ | |
1004 | return accepted_len; | |
1005 | } |