]>
Commit | Line | Data |
---|---|---|
f87d0fbb RR |
1 | /* |
2 | * Helpers for the host side of a virtio ring. | |
3 | * | |
4 | * Since these may be in userspace, we use (inline) accessors. | |
5 | */ | |
9d1b972f | 6 | #include <linux/compiler.h> |
f558a845 | 7 | #include <linux/module.h> |
f87d0fbb RR |
8 | #include <linux/vringh.h> |
9 | #include <linux/virtio_ring.h> | |
10 | #include <linux/kernel.h> | |
11 | #include <linux/ratelimit.h> | |
12 | #include <linux/uaccess.h> | |
13 | #include <linux/slab.h> | |
14 | #include <linux/export.h> | |
b9f7ac8c | 15 | #include <uapi/linux/virtio_config.h> |
f87d0fbb RR |
16 | |
17 | static __printf(1,2) __cold void vringh_bad(const char *fmt, ...) | |
18 | { | |
19 | static DEFINE_RATELIMIT_STATE(vringh_rs, | |
20 | DEFAULT_RATELIMIT_INTERVAL, | |
21 | DEFAULT_RATELIMIT_BURST); | |
22 | if (__ratelimit(&vringh_rs)) { | |
23 | va_list ap; | |
24 | va_start(ap, fmt); | |
25 | printk(KERN_NOTICE "vringh:"); | |
26 | vprintk(fmt, ap); | |
27 | va_end(ap); | |
28 | } | |
29 | } | |
30 | ||
31 | /* Returns vring->num if empty, -ve on error. */ | |
32 | static inline int __vringh_get_head(const struct vringh *vrh, | |
b9f7ac8c MT |
33 | int (*getu16)(const struct vringh *vrh, |
34 | u16 *val, const __virtio16 *p), | |
f87d0fbb RR |
35 | u16 *last_avail_idx) |
36 | { | |
37 | u16 avail_idx, i, head; | |
38 | int err; | |
39 | ||
b9f7ac8c | 40 | err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx); |
f87d0fbb RR |
41 | if (err) { |
42 | vringh_bad("Failed to access avail idx at %p", | |
43 | &vrh->vring.avail->idx); | |
44 | return err; | |
45 | } | |
46 | ||
47 | if (*last_avail_idx == avail_idx) | |
48 | return vrh->vring.num; | |
49 | ||
50 | /* Only get avail ring entries after they have been exposed by guest. */ | |
51 | virtio_rmb(vrh->weak_barriers); | |
52 | ||
53 | i = *last_avail_idx & (vrh->vring.num - 1); | |
54 | ||
b9f7ac8c | 55 | err = getu16(vrh, &head, &vrh->vring.avail->ring[i]); |
f87d0fbb RR |
56 | if (err) { |
57 | vringh_bad("Failed to read head: idx %d address %p", | |
58 | *last_avail_idx, &vrh->vring.avail->ring[i]); | |
59 | return err; | |
60 | } | |
61 | ||
62 | if (head >= vrh->vring.num) { | |
63 | vringh_bad("Guest says index %u > %u is available", | |
64 | head, vrh->vring.num); | |
65 | return -EINVAL; | |
66 | } | |
67 | ||
68 | (*last_avail_idx)++; | |
69 | return head; | |
70 | } | |
71 | ||
72 | /* Copy some bytes to/from the iovec. Returns num copied. */ | |
73 | static inline ssize_t vringh_iov_xfer(struct vringh_kiov *iov, | |
74 | void *ptr, size_t len, | |
75 | int (*xfer)(void *addr, void *ptr, | |
76 | size_t len)) | |
77 | { | |
78 | int err, done = 0; | |
79 | ||
80 | while (len && iov->i < iov->used) { | |
81 | size_t partlen; | |
82 | ||
83 | partlen = min(iov->iov[iov->i].iov_len, len); | |
84 | err = xfer(iov->iov[iov->i].iov_base, ptr, partlen); | |
85 | if (err) | |
86 | return err; | |
87 | done += partlen; | |
88 | len -= partlen; | |
89 | ptr += partlen; | |
90 | iov->consumed += partlen; | |
91 | iov->iov[iov->i].iov_len -= partlen; | |
92 | iov->iov[iov->i].iov_base += partlen; | |
93 | ||
94 | if (!iov->iov[iov->i].iov_len) { | |
95 | /* Fix up old iov element then increment. */ | |
96 | iov->iov[iov->i].iov_len = iov->consumed; | |
97 | iov->iov[iov->i].iov_base -= iov->consumed; | |
98 | ||
99 | iov->consumed = 0; | |
100 | iov->i++; | |
101 | } | |
102 | } | |
103 | return done; | |
104 | } | |
105 | ||
106 | /* May reduce *len if range is shorter. */ | |
107 | static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len, | |
108 | struct vringh_range *range, | |
109 | bool (*getrange)(struct vringh *, | |
110 | u64, struct vringh_range *)) | |
111 | { | |
112 | if (addr < range->start || addr > range->end_incl) { | |
113 | if (!getrange(vrh, addr, range)) | |
114 | return false; | |
115 | } | |
116 | BUG_ON(addr < range->start || addr > range->end_incl); | |
117 | ||
118 | /* To end of memory? */ | |
119 | if (unlikely(addr + *len == 0)) { | |
120 | if (range->end_incl == -1ULL) | |
121 | return true; | |
122 | goto truncate; | |
123 | } | |
124 | ||
125 | /* Otherwise, don't wrap. */ | |
126 | if (addr + *len < addr) { | |
127 | vringh_bad("Wrapping descriptor %zu@0x%llx", | |
128 | *len, (unsigned long long)addr); | |
129 | return false; | |
130 | } | |
131 | ||
132 | if (unlikely(addr + *len - 1 > range->end_incl)) | |
133 | goto truncate; | |
134 | return true; | |
135 | ||
136 | truncate: | |
137 | *len = range->end_incl + 1 - addr; | |
138 | return true; | |
139 | } | |
140 | ||
141 | static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len, | |
142 | struct vringh_range *range, | |
143 | bool (*getrange)(struct vringh *, | |
144 | u64, struct vringh_range *)) | |
145 | { | |
146 | return true; | |
147 | } | |
148 | ||
149 | /* No reason for this code to be inline. */ | |
b9f7ac8c MT |
150 | static int move_to_indirect(const struct vringh *vrh, |
151 | int *up_next, u16 *i, void *addr, | |
f87d0fbb RR |
152 | const struct vring_desc *desc, |
153 | struct vring_desc **descs, int *desc_max) | |
154 | { | |
b9f7ac8c MT |
155 | u32 len; |
156 | ||
f87d0fbb RR |
157 | /* Indirect tables can't have indirect. */ |
158 | if (*up_next != -1) { | |
159 | vringh_bad("Multilevel indirect %u->%u", *up_next, *i); | |
160 | return -EINVAL; | |
161 | } | |
162 | ||
b9f7ac8c MT |
163 | len = vringh32_to_cpu(vrh, desc->len); |
164 | if (unlikely(len % sizeof(struct vring_desc))) { | |
f87d0fbb RR |
165 | vringh_bad("Strange indirect len %u", desc->len); |
166 | return -EINVAL; | |
167 | } | |
168 | ||
169 | /* We will check this when we follow it! */ | |
b9f7ac8c MT |
170 | if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) |
171 | *up_next = vringh16_to_cpu(vrh, desc->next); | |
f87d0fbb RR |
172 | else |
173 | *up_next = -2; | |
174 | *descs = addr; | |
b9f7ac8c | 175 | *desc_max = len / sizeof(struct vring_desc); |
f87d0fbb RR |
176 | |
177 | /* Now, start at the first indirect. */ | |
178 | *i = 0; | |
179 | return 0; | |
180 | } | |
181 | ||
182 | static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp) | |
183 | { | |
184 | struct kvec *new; | |
185 | unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2; | |
186 | ||
187 | if (new_num < 8) | |
188 | new_num = 8; | |
189 | ||
190 | flag = (iov->max_num & VRINGH_IOV_ALLOCATED); | |
191 | if (flag) | |
192 | new = krealloc(iov->iov, new_num * sizeof(struct iovec), gfp); | |
193 | else { | |
194 | new = kmalloc(new_num * sizeof(struct iovec), gfp); | |
195 | if (new) { | |
196 | memcpy(new, iov->iov, | |
197 | iov->max_num * sizeof(struct iovec)); | |
198 | flag = VRINGH_IOV_ALLOCATED; | |
199 | } | |
200 | } | |
201 | if (!new) | |
202 | return -ENOMEM; | |
203 | iov->iov = new; | |
204 | iov->max_num = (new_num | flag); | |
205 | return 0; | |
206 | } | |
207 | ||
208 | static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next, | |
209 | struct vring_desc **descs, int *desc_max) | |
210 | { | |
211 | u16 i = *up_next; | |
212 | ||
213 | *up_next = -1; | |
214 | *descs = vrh->vring.desc; | |
215 | *desc_max = vrh->vring.num; | |
216 | return i; | |
217 | } | |
218 | ||
219 | static int slow_copy(struct vringh *vrh, void *dst, const void *src, | |
220 | bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, | |
221 | struct vringh_range *range, | |
222 | bool (*getrange)(struct vringh *vrh, | |
223 | u64, | |
224 | struct vringh_range *)), | |
225 | bool (*getrange)(struct vringh *vrh, | |
226 | u64 addr, | |
227 | struct vringh_range *r), | |
228 | struct vringh_range *range, | |
229 | int (*copy)(void *dst, const void *src, size_t len)) | |
230 | { | |
231 | size_t part, len = sizeof(struct vring_desc); | |
232 | ||
233 | do { | |
234 | u64 addr; | |
235 | int err; | |
236 | ||
237 | part = len; | |
238 | addr = (u64)(unsigned long)src - range->offset; | |
239 | ||
240 | if (!rcheck(vrh, addr, &part, range, getrange)) | |
241 | return -EINVAL; | |
242 | ||
243 | err = copy(dst, src, part); | |
244 | if (err) | |
245 | return err; | |
246 | ||
247 | dst += part; | |
248 | src += part; | |
249 | len -= part; | |
250 | } while (len); | |
251 | return 0; | |
252 | } | |
253 | ||
254 | static inline int | |
255 | __vringh_iov(struct vringh *vrh, u16 i, | |
256 | struct vringh_kiov *riov, | |
257 | struct vringh_kiov *wiov, | |
258 | bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, | |
259 | struct vringh_range *range, | |
260 | bool (*getrange)(struct vringh *, u64, | |
261 | struct vringh_range *)), | |
262 | bool (*getrange)(struct vringh *, u64, struct vringh_range *), | |
263 | gfp_t gfp, | |
264 | int (*copy)(void *dst, const void *src, size_t len)) | |
265 | { | |
266 | int err, count = 0, up_next, desc_max; | |
267 | struct vring_desc desc, *descs; | |
268 | struct vringh_range range = { -1ULL, 0 }, slowrange; | |
269 | bool slow = false; | |
270 | ||
271 | /* We start traversing vring's descriptor table. */ | |
272 | descs = vrh->vring.desc; | |
273 | desc_max = vrh->vring.num; | |
274 | up_next = -1; | |
275 | ||
276 | if (riov) | |
277 | riov->i = riov->used = 0; | |
278 | else if (wiov) | |
279 | wiov->i = wiov->used = 0; | |
280 | else | |
281 | /* You must want something! */ | |
282 | BUG(); | |
283 | ||
284 | for (;;) { | |
285 | void *addr; | |
286 | struct vringh_kiov *iov; | |
287 | size_t len; | |
288 | ||
289 | if (unlikely(slow)) | |
290 | err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange, | |
291 | &slowrange, copy); | |
292 | else | |
293 | err = copy(&desc, &descs[i], sizeof(desc)); | |
294 | if (unlikely(err)) | |
295 | goto fail; | |
296 | ||
b9f7ac8c MT |
297 | if (unlikely(desc.flags & |
298 | cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) { | |
299 | u64 a = vringh64_to_cpu(vrh, desc.addr); | |
300 | ||
f87d0fbb | 301 | /* Make sure it's OK, and get offset. */ |
b9f7ac8c MT |
302 | len = vringh32_to_cpu(vrh, desc.len); |
303 | if (!rcheck(vrh, a, &len, &range, getrange)) { | |
f87d0fbb RR |
304 | err = -EINVAL; |
305 | goto fail; | |
306 | } | |
307 | ||
b9f7ac8c | 308 | if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { |
f87d0fbb RR |
309 | slow = true; |
310 | /* We need to save this range to use offset */ | |
311 | slowrange = range; | |
312 | } | |
313 | ||
b9f7ac8c MT |
314 | addr = (void *)(long)(a + range.offset); |
315 | err = move_to_indirect(vrh, &up_next, &i, addr, &desc, | |
f87d0fbb RR |
316 | &descs, &desc_max); |
317 | if (err) | |
318 | goto fail; | |
319 | continue; | |
320 | } | |
321 | ||
322 | if (count++ == vrh->vring.num) { | |
323 | vringh_bad("Descriptor loop in %p", descs); | |
324 | err = -ELOOP; | |
325 | goto fail; | |
326 | } | |
327 | ||
b9f7ac8c | 328 | if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE)) |
f87d0fbb RR |
329 | iov = wiov; |
330 | else { | |
331 | iov = riov; | |
332 | if (unlikely(wiov && wiov->i)) { | |
333 | vringh_bad("Readable desc %p after writable", | |
334 | &descs[i]); | |
335 | err = -EINVAL; | |
336 | goto fail; | |
337 | } | |
338 | } | |
339 | ||
340 | if (!iov) { | |
341 | vringh_bad("Unexpected %s desc", | |
342 | !wiov ? "writable" : "readable"); | |
343 | err = -EPROTO; | |
344 | goto fail; | |
345 | } | |
346 | ||
347 | again: | |
348 | /* Make sure it's OK, and get offset. */ | |
b9f7ac8c MT |
349 | len = vringh32_to_cpu(vrh, desc.len); |
350 | if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range, | |
351 | getrange)) { | |
f87d0fbb RR |
352 | err = -EINVAL; |
353 | goto fail; | |
354 | } | |
b9f7ac8c MT |
355 | addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) + |
356 | range.offset); | |
f87d0fbb RR |
357 | |
358 | if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) { | |
359 | err = resize_iovec(iov, gfp); | |
360 | if (err) | |
361 | goto fail; | |
362 | } | |
363 | ||
364 | iov->iov[iov->used].iov_base = addr; | |
365 | iov->iov[iov->used].iov_len = len; | |
366 | iov->used++; | |
367 | ||
b9f7ac8c MT |
368 | if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { |
369 | desc.len = cpu_to_vringh32(vrh, | |
370 | vringh32_to_cpu(vrh, desc.len) - len); | |
371 | desc.addr = cpu_to_vringh64(vrh, | |
372 | vringh64_to_cpu(vrh, desc.addr) + len); | |
f87d0fbb RR |
373 | goto again; |
374 | } | |
375 | ||
b9f7ac8c MT |
376 | if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) { |
377 | i = vringh16_to_cpu(vrh, desc.next); | |
f87d0fbb RR |
378 | } else { |
379 | /* Just in case we need to finish traversing above. */ | |
380 | if (unlikely(up_next > 0)) { | |
381 | i = return_from_indirect(vrh, &up_next, | |
382 | &descs, &desc_max); | |
383 | slow = false; | |
384 | } else | |
385 | break; | |
386 | } | |
387 | ||
388 | if (i >= desc_max) { | |
389 | vringh_bad("Chained index %u > %u", i, desc_max); | |
390 | err = -EINVAL; | |
391 | goto fail; | |
392 | } | |
393 | } | |
394 | ||
395 | return 0; | |
396 | ||
397 | fail: | |
398 | return err; | |
399 | } | |
400 | ||
401 | static inline int __vringh_complete(struct vringh *vrh, | |
402 | const struct vring_used_elem *used, | |
403 | unsigned int num_used, | |
b9f7ac8c MT |
404 | int (*putu16)(const struct vringh *vrh, |
405 | __virtio16 *p, u16 val), | |
f87d0fbb RR |
406 | int (*putused)(struct vring_used_elem *dst, |
407 | const struct vring_used_elem | |
408 | *src, unsigned num)) | |
409 | { | |
410 | struct vring_used *used_ring; | |
411 | int err; | |
412 | u16 used_idx, off; | |
413 | ||
414 | used_ring = vrh->vring.used; | |
415 | used_idx = vrh->last_used_idx + vrh->completed; | |
416 | ||
417 | off = used_idx % vrh->vring.num; | |
418 | ||
419 | /* Compiler knows num_used == 1 sometimes, hence extra check */ | |
420 | if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) { | |
421 | u16 part = vrh->vring.num - off; | |
422 | err = putused(&used_ring->ring[off], used, part); | |
423 | if (!err) | |
424 | err = putused(&used_ring->ring[0], used + part, | |
425 | num_used - part); | |
426 | } else | |
427 | err = putused(&used_ring->ring[off], used, num_used); | |
428 | ||
429 | if (err) { | |
430 | vringh_bad("Failed to write %u used entries %u at %p", | |
431 | num_used, off, &used_ring->ring[off]); | |
432 | return err; | |
433 | } | |
434 | ||
435 | /* Make sure buffer is written before we update index. */ | |
436 | virtio_wmb(vrh->weak_barriers); | |
437 | ||
b9f7ac8c | 438 | err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used); |
f87d0fbb RR |
439 | if (err) { |
440 | vringh_bad("Failed to update used index at %p", | |
441 | &vrh->vring.used->idx); | |
442 | return err; | |
443 | } | |
444 | ||
445 | vrh->completed += num_used; | |
446 | return 0; | |
447 | } | |
448 | ||
449 | ||
450 | static inline int __vringh_need_notify(struct vringh *vrh, | |
b9f7ac8c MT |
451 | int (*getu16)(const struct vringh *vrh, |
452 | u16 *val, | |
453 | const __virtio16 *p)) | |
f87d0fbb RR |
454 | { |
455 | bool notify; | |
456 | u16 used_event; | |
457 | int err; | |
458 | ||
459 | /* Flush out used index update. This is paired with the | |
460 | * barrier that the Guest executes when enabling | |
461 | * interrupts. */ | |
462 | virtio_mb(vrh->weak_barriers); | |
463 | ||
464 | /* Old-style, without event indices. */ | |
465 | if (!vrh->event_indices) { | |
466 | u16 flags; | |
b9f7ac8c | 467 | err = getu16(vrh, &flags, &vrh->vring.avail->flags); |
f87d0fbb RR |
468 | if (err) { |
469 | vringh_bad("Failed to get flags at %p", | |
470 | &vrh->vring.avail->flags); | |
471 | return err; | |
472 | } | |
473 | return (!(flags & VRING_AVAIL_F_NO_INTERRUPT)); | |
474 | } | |
475 | ||
476 | /* Modern: we know when other side wants to know. */ | |
b9f7ac8c | 477 | err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring)); |
f87d0fbb RR |
478 | if (err) { |
479 | vringh_bad("Failed to get used event idx at %p", | |
480 | &vring_used_event(&vrh->vring)); | |
481 | return err; | |
482 | } | |
483 | ||
484 | /* Just in case we added so many that we wrap. */ | |
485 | if (unlikely(vrh->completed > 0xffff)) | |
486 | notify = true; | |
487 | else | |
488 | notify = vring_need_event(used_event, | |
489 | vrh->last_used_idx + vrh->completed, | |
490 | vrh->last_used_idx); | |
491 | ||
492 | vrh->last_used_idx += vrh->completed; | |
493 | vrh->completed = 0; | |
494 | return notify; | |
495 | } | |
496 | ||
497 | static inline bool __vringh_notify_enable(struct vringh *vrh, | |
b9f7ac8c MT |
498 | int (*getu16)(const struct vringh *vrh, |
499 | u16 *val, const __virtio16 *p), | |
500 | int (*putu16)(const struct vringh *vrh, | |
501 | __virtio16 *p, u16 val)) | |
f87d0fbb RR |
502 | { |
503 | u16 avail; | |
504 | ||
505 | if (!vrh->event_indices) { | |
506 | /* Old-school; update flags. */ | |
b9f7ac8c | 507 | if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) { |
f87d0fbb RR |
508 | vringh_bad("Clearing used flags %p", |
509 | &vrh->vring.used->flags); | |
510 | return true; | |
511 | } | |
512 | } else { | |
b9f7ac8c | 513 | if (putu16(vrh, &vring_avail_event(&vrh->vring), |
f87d0fbb RR |
514 | vrh->last_avail_idx) != 0) { |
515 | vringh_bad("Updating avail event index %p", | |
516 | &vring_avail_event(&vrh->vring)); | |
517 | return true; | |
518 | } | |
519 | } | |
520 | ||
521 | /* They could have slipped one in as we were doing that: make | |
522 | * sure it's written, then check again. */ | |
523 | virtio_mb(vrh->weak_barriers); | |
524 | ||
b9f7ac8c | 525 | if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) { |
f87d0fbb RR |
526 | vringh_bad("Failed to check avail idx at %p", |
527 | &vrh->vring.avail->idx); | |
528 | return true; | |
529 | } | |
530 | ||
531 | /* This is unlikely, so we just leave notifications enabled | |
532 | * (if we're using event_indices, we'll only get one | |
533 | * notification anyway). */ | |
534 | return avail == vrh->last_avail_idx; | |
535 | } | |
536 | ||
537 | static inline void __vringh_notify_disable(struct vringh *vrh, | |
b9f7ac8c MT |
538 | int (*putu16)(const struct vringh *vrh, |
539 | __virtio16 *p, u16 val)) | |
f87d0fbb RR |
540 | { |
541 | if (!vrh->event_indices) { | |
542 | /* Old-school; update flags. */ | |
b9f7ac8c MT |
543 | if (putu16(vrh, &vrh->vring.used->flags, |
544 | VRING_USED_F_NO_NOTIFY)) { | |
f87d0fbb RR |
545 | vringh_bad("Setting used flags %p", |
546 | &vrh->vring.used->flags); | |
547 | } | |
548 | } | |
549 | } | |
550 | ||
551 | /* Userspace access helpers: in this case, addresses are really userspace. */ | |
b9f7ac8c | 552 | static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p) |
f87d0fbb | 553 | { |
b9f7ac8c MT |
554 | __virtio16 v = 0; |
555 | int rc = get_user(v, (__force __virtio16 __user *)p); | |
556 | *val = vringh16_to_cpu(vrh, v); | |
557 | return rc; | |
f87d0fbb RR |
558 | } |
559 | ||
b9f7ac8c | 560 | static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val) |
f87d0fbb | 561 | { |
b9f7ac8c MT |
562 | __virtio16 v = cpu_to_vringh16(vrh, val); |
563 | return put_user(v, (__force __virtio16 __user *)p); | |
f87d0fbb RR |
564 | } |
565 | ||
566 | static inline int copydesc_user(void *dst, const void *src, size_t len) | |
567 | { | |
568 | return copy_from_user(dst, (__force void __user *)src, len) ? | |
569 | -EFAULT : 0; | |
570 | } | |
571 | ||
572 | static inline int putused_user(struct vring_used_elem *dst, | |
573 | const struct vring_used_elem *src, | |
574 | unsigned int num) | |
575 | { | |
576 | return copy_to_user((__force void __user *)dst, src, | |
577 | sizeof(*dst) * num) ? -EFAULT : 0; | |
578 | } | |
579 | ||
580 | static inline int xfer_from_user(void *src, void *dst, size_t len) | |
581 | { | |
582 | return copy_from_user(dst, (__force void __user *)src, len) ? | |
583 | -EFAULT : 0; | |
584 | } | |
585 | ||
586 | static inline int xfer_to_user(void *dst, void *src, size_t len) | |
587 | { | |
588 | return copy_to_user((__force void __user *)dst, src, len) ? | |
589 | -EFAULT : 0; | |
590 | } | |
591 | ||
592 | /** | |
593 | * vringh_init_user - initialize a vringh for a userspace vring. | |
594 | * @vrh: the vringh to initialize. | |
595 | * @features: the feature bits for this ring. | |
596 | * @num: the number of elements. | |
597 | * @weak_barriers: true if we only need memory barriers, not I/O. | |
598 | * @desc: the userpace descriptor pointer. | |
599 | * @avail: the userpace avail pointer. | |
600 | * @used: the userpace used pointer. | |
601 | * | |
602 | * Returns an error if num is invalid: you should check pointers | |
603 | * yourself! | |
604 | */ | |
b97a8a90 | 605 | int vringh_init_user(struct vringh *vrh, u64 features, |
f87d0fbb RR |
606 | unsigned int num, bool weak_barriers, |
607 | struct vring_desc __user *desc, | |
608 | struct vring_avail __user *avail, | |
609 | struct vring_used __user *used) | |
610 | { | |
611 | /* Sane power of 2 please! */ | |
612 | if (!num || num > 0xffff || (num & (num - 1))) { | |
613 | vringh_bad("Bad ring size %u", num); | |
614 | return -EINVAL; | |
615 | } | |
616 | ||
b9f7ac8c | 617 | vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); |
f87d0fbb RR |
618 | vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); |
619 | vrh->weak_barriers = weak_barriers; | |
620 | vrh->completed = 0; | |
621 | vrh->last_avail_idx = 0; | |
622 | vrh->last_used_idx = 0; | |
623 | vrh->vring.num = num; | |
624 | /* vring expects kernel addresses, but only used via accessors. */ | |
625 | vrh->vring.desc = (__force struct vring_desc *)desc; | |
626 | vrh->vring.avail = (__force struct vring_avail *)avail; | |
627 | vrh->vring.used = (__force struct vring_used *)used; | |
628 | return 0; | |
629 | } | |
630 | EXPORT_SYMBOL(vringh_init_user); | |
631 | ||
632 | /** | |
633 | * vringh_getdesc_user - get next available descriptor from userspace ring. | |
634 | * @vrh: the userspace vring. | |
635 | * @riov: where to put the readable descriptors (or NULL) | |
636 | * @wiov: where to put the writable descriptors (or NULL) | |
637 | * @getrange: function to call to check ranges. | |
638 | * @head: head index we received, for passing to vringh_complete_user(). | |
639 | * | |
640 | * Returns 0 if there was no descriptor, 1 if there was, or -errno. | |
641 | * | |
642 | * Note that on error return, you can tell the difference between an | |
643 | * invalid ring and a single invalid descriptor: in the former case, | |
644 | * *head will be vrh->vring.num. You may be able to ignore an invalid | |
645 | * descriptor, but there's not much you can do with an invalid ring. | |
646 | * | |
647 | * Note that you may need to clean up riov and wiov, even on error! | |
648 | */ | |
649 | int vringh_getdesc_user(struct vringh *vrh, | |
650 | struct vringh_iov *riov, | |
651 | struct vringh_iov *wiov, | |
652 | bool (*getrange)(struct vringh *vrh, | |
653 | u64 addr, struct vringh_range *r), | |
654 | u16 *head) | |
655 | { | |
656 | int err; | |
657 | ||
658 | *head = vrh->vring.num; | |
659 | err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx); | |
660 | if (err < 0) | |
661 | return err; | |
662 | ||
663 | /* Empty... */ | |
664 | if (err == vrh->vring.num) | |
665 | return 0; | |
666 | ||
667 | /* We need the layouts to be the identical for this to work */ | |
668 | BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov)); | |
669 | BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) != | |
670 | offsetof(struct vringh_iov, iov)); | |
671 | BUILD_BUG_ON(offsetof(struct vringh_kiov, i) != | |
672 | offsetof(struct vringh_iov, i)); | |
673 | BUILD_BUG_ON(offsetof(struct vringh_kiov, used) != | |
674 | offsetof(struct vringh_iov, used)); | |
675 | BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) != | |
676 | offsetof(struct vringh_iov, max_num)); | |
677 | BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); | |
678 | BUILD_BUG_ON(offsetof(struct iovec, iov_base) != | |
679 | offsetof(struct kvec, iov_base)); | |
680 | BUILD_BUG_ON(offsetof(struct iovec, iov_len) != | |
681 | offsetof(struct kvec, iov_len)); | |
682 | BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base) | |
683 | != sizeof(((struct kvec *)NULL)->iov_base)); | |
684 | BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len) | |
685 | != sizeof(((struct kvec *)NULL)->iov_len)); | |
686 | ||
687 | *head = err; | |
688 | err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov, | |
689 | (struct vringh_kiov *)wiov, | |
690 | range_check, getrange, GFP_KERNEL, copydesc_user); | |
691 | if (err) | |
692 | return err; | |
693 | ||
694 | return 1; | |
695 | } | |
696 | EXPORT_SYMBOL(vringh_getdesc_user); | |
697 | ||
698 | /** | |
699 | * vringh_iov_pull_user - copy bytes from vring_iov. | |
700 | * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume) | |
701 | * @dst: the place to copy. | |
702 | * @len: the maximum length to copy. | |
703 | * | |
704 | * Returns the bytes copied <= len or a negative errno. | |
705 | */ | |
706 | ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len) | |
707 | { | |
708 | return vringh_iov_xfer((struct vringh_kiov *)riov, | |
709 | dst, len, xfer_from_user); | |
710 | } | |
711 | EXPORT_SYMBOL(vringh_iov_pull_user); | |
712 | ||
713 | /** | |
714 | * vringh_iov_push_user - copy bytes into vring_iov. | |
715 | * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume) | |
716 | * @dst: the place to copy. | |
717 | * @len: the maximum length to copy. | |
718 | * | |
719 | * Returns the bytes copied <= len or a negative errno. | |
720 | */ | |
721 | ssize_t vringh_iov_push_user(struct vringh_iov *wiov, | |
722 | const void *src, size_t len) | |
723 | { | |
724 | return vringh_iov_xfer((struct vringh_kiov *)wiov, | |
725 | (void *)src, len, xfer_to_user); | |
726 | } | |
727 | EXPORT_SYMBOL(vringh_iov_push_user); | |
728 | ||
729 | /** | |
730 | * vringh_abandon_user - we've decided not to handle the descriptor(s). | |
731 | * @vrh: the vring. | |
732 | * @num: the number of descriptors to put back (ie. num | |
733 | * vringh_get_user() to undo). | |
734 | * | |
735 | * The next vringh_get_user() will return the old descriptor(s) again. | |
736 | */ | |
737 | void vringh_abandon_user(struct vringh *vrh, unsigned int num) | |
738 | { | |
739 | /* We only update vring_avail_event(vr) when we want to be notified, | |
740 | * so we haven't changed that yet. */ | |
741 | vrh->last_avail_idx -= num; | |
742 | } | |
743 | EXPORT_SYMBOL(vringh_abandon_user); | |
744 | ||
745 | /** | |
746 | * vringh_complete_user - we've finished with descriptor, publish it. | |
747 | * @vrh: the vring. | |
748 | * @head: the head as filled in by vringh_getdesc_user. | |
749 | * @len: the length of data we have written. | |
750 | * | |
751 | * You should check vringh_need_notify_user() after one or more calls | |
752 | * to this function. | |
753 | */ | |
754 | int vringh_complete_user(struct vringh *vrh, u16 head, u32 len) | |
755 | { | |
756 | struct vring_used_elem used; | |
757 | ||
b9f7ac8c MT |
758 | used.id = cpu_to_vringh32(vrh, head); |
759 | used.len = cpu_to_vringh32(vrh, len); | |
f87d0fbb RR |
760 | return __vringh_complete(vrh, &used, 1, putu16_user, putused_user); |
761 | } | |
762 | EXPORT_SYMBOL(vringh_complete_user); | |
763 | ||
764 | /** | |
765 | * vringh_complete_multi_user - we've finished with many descriptors. | |
766 | * @vrh: the vring. | |
767 | * @used: the head, length pairs. | |
768 | * @num_used: the number of used elements. | |
769 | * | |
770 | * You should check vringh_need_notify_user() after one or more calls | |
771 | * to this function. | |
772 | */ | |
773 | int vringh_complete_multi_user(struct vringh *vrh, | |
774 | const struct vring_used_elem used[], | |
775 | unsigned num_used) | |
776 | { | |
777 | return __vringh_complete(vrh, used, num_used, | |
778 | putu16_user, putused_user); | |
779 | } | |
780 | EXPORT_SYMBOL(vringh_complete_multi_user); | |
781 | ||
782 | /** | |
783 | * vringh_notify_enable_user - we want to know if something changes. | |
784 | * @vrh: the vring. | |
785 | * | |
786 | * This always enables notifications, but returns false if there are | |
787 | * now more buffers available in the vring. | |
788 | */ | |
789 | bool vringh_notify_enable_user(struct vringh *vrh) | |
790 | { | |
791 | return __vringh_notify_enable(vrh, getu16_user, putu16_user); | |
792 | } | |
793 | EXPORT_SYMBOL(vringh_notify_enable_user); | |
794 | ||
795 | /** | |
796 | * vringh_notify_disable_user - don't tell us if something changes. | |
797 | * @vrh: the vring. | |
798 | * | |
799 | * This is our normal running state: we disable and then only enable when | |
800 | * we're going to sleep. | |
801 | */ | |
802 | void vringh_notify_disable_user(struct vringh *vrh) | |
803 | { | |
804 | __vringh_notify_disable(vrh, putu16_user); | |
805 | } | |
806 | EXPORT_SYMBOL(vringh_notify_disable_user); | |
807 | ||
808 | /** | |
809 | * vringh_need_notify_user - must we tell the other side about used buffers? | |
810 | * @vrh: the vring we've called vringh_complete_user() on. | |
811 | * | |
812 | * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. | |
813 | */ | |
814 | int vringh_need_notify_user(struct vringh *vrh) | |
815 | { | |
816 | return __vringh_need_notify(vrh, getu16_user); | |
817 | } | |
818 | EXPORT_SYMBOL(vringh_need_notify_user); | |
819 | ||
820 | /* Kernelspace access helpers. */ | |
b9f7ac8c MT |
821 | static inline int getu16_kern(const struct vringh *vrh, |
822 | u16 *val, const __virtio16 *p) | |
f87d0fbb | 823 | { |
9d1b972f | 824 | *val = vringh16_to_cpu(vrh, READ_ONCE(*p)); |
f87d0fbb RR |
825 | return 0; |
826 | } | |
827 | ||
b9f7ac8c | 828 | static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val) |
f87d0fbb | 829 | { |
9d1b972f | 830 | WRITE_ONCE(*p, cpu_to_vringh16(vrh, val)); |
f87d0fbb RR |
831 | return 0; |
832 | } | |
833 | ||
834 | static inline int copydesc_kern(void *dst, const void *src, size_t len) | |
835 | { | |
836 | memcpy(dst, src, len); | |
837 | return 0; | |
838 | } | |
839 | ||
840 | static inline int putused_kern(struct vring_used_elem *dst, | |
841 | const struct vring_used_elem *src, | |
842 | unsigned int num) | |
843 | { | |
844 | memcpy(dst, src, num * sizeof(*dst)); | |
845 | return 0; | |
846 | } | |
847 | ||
848 | static inline int xfer_kern(void *src, void *dst, size_t len) | |
849 | { | |
850 | memcpy(dst, src, len); | |
851 | return 0; | |
852 | } | |
853 | ||
854 | /** | |
855 | * vringh_init_kern - initialize a vringh for a kernelspace vring. | |
856 | * @vrh: the vringh to initialize. | |
857 | * @features: the feature bits for this ring. | |
858 | * @num: the number of elements. | |
859 | * @weak_barriers: true if we only need memory barriers, not I/O. | |
860 | * @desc: the userpace descriptor pointer. | |
861 | * @avail: the userpace avail pointer. | |
862 | * @used: the userpace used pointer. | |
863 | * | |
864 | * Returns an error if num is invalid. | |
865 | */ | |
b97a8a90 | 866 | int vringh_init_kern(struct vringh *vrh, u64 features, |
f87d0fbb RR |
867 | unsigned int num, bool weak_barriers, |
868 | struct vring_desc *desc, | |
869 | struct vring_avail *avail, | |
870 | struct vring_used *used) | |
871 | { | |
872 | /* Sane power of 2 please! */ | |
873 | if (!num || num > 0xffff || (num & (num - 1))) { | |
874 | vringh_bad("Bad ring size %u", num); | |
875 | return -EINVAL; | |
876 | } | |
877 | ||
b9f7ac8c | 878 | vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); |
f87d0fbb RR |
879 | vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); |
880 | vrh->weak_barriers = weak_barriers; | |
881 | vrh->completed = 0; | |
882 | vrh->last_avail_idx = 0; | |
883 | vrh->last_used_idx = 0; | |
884 | vrh->vring.num = num; | |
885 | vrh->vring.desc = desc; | |
886 | vrh->vring.avail = avail; | |
887 | vrh->vring.used = used; | |
888 | return 0; | |
889 | } | |
890 | EXPORT_SYMBOL(vringh_init_kern); | |
891 | ||
892 | /** | |
893 | * vringh_getdesc_kern - get next available descriptor from kernelspace ring. | |
894 | * @vrh: the kernelspace vring. | |
895 | * @riov: where to put the readable descriptors (or NULL) | |
896 | * @wiov: where to put the writable descriptors (or NULL) | |
897 | * @head: head index we received, for passing to vringh_complete_kern(). | |
898 | * @gfp: flags for allocating larger riov/wiov. | |
899 | * | |
900 | * Returns 0 if there was no descriptor, 1 if there was, or -errno. | |
901 | * | |
902 | * Note that on error return, you can tell the difference between an | |
903 | * invalid ring and a single invalid descriptor: in the former case, | |
904 | * *head will be vrh->vring.num. You may be able to ignore an invalid | |
905 | * descriptor, but there's not much you can do with an invalid ring. | |
906 | * | |
907 | * Note that you may need to clean up riov and wiov, even on error! | |
908 | */ | |
909 | int vringh_getdesc_kern(struct vringh *vrh, | |
910 | struct vringh_kiov *riov, | |
911 | struct vringh_kiov *wiov, | |
912 | u16 *head, | |
913 | gfp_t gfp) | |
914 | { | |
915 | int err; | |
916 | ||
917 | err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx); | |
918 | if (err < 0) | |
919 | return err; | |
920 | ||
921 | /* Empty... */ | |
922 | if (err == vrh->vring.num) | |
923 | return 0; | |
924 | ||
925 | *head = err; | |
926 | err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, | |
927 | gfp, copydesc_kern); | |
928 | if (err) | |
929 | return err; | |
930 | ||
931 | return 1; | |
932 | } | |
933 | EXPORT_SYMBOL(vringh_getdesc_kern); | |
934 | ||
935 | /** | |
936 | * vringh_iov_pull_kern - copy bytes from vring_iov. | |
937 | * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume) | |
938 | * @dst: the place to copy. | |
939 | * @len: the maximum length to copy. | |
940 | * | |
941 | * Returns the bytes copied <= len or a negative errno. | |
942 | */ | |
943 | ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len) | |
944 | { | |
945 | return vringh_iov_xfer(riov, dst, len, xfer_kern); | |
946 | } | |
947 | EXPORT_SYMBOL(vringh_iov_pull_kern); | |
948 | ||
949 | /** | |
950 | * vringh_iov_push_kern - copy bytes into vring_iov. | |
951 | * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume) | |
952 | * @dst: the place to copy. | |
953 | * @len: the maximum length to copy. | |
954 | * | |
955 | * Returns the bytes copied <= len or a negative errno. | |
956 | */ | |
957 | ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, | |
958 | const void *src, size_t len) | |
959 | { | |
960 | return vringh_iov_xfer(wiov, (void *)src, len, xfer_kern); | |
961 | } | |
962 | EXPORT_SYMBOL(vringh_iov_push_kern); | |
963 | ||
964 | /** | |
965 | * vringh_abandon_kern - we've decided not to handle the descriptor(s). | |
966 | * @vrh: the vring. | |
967 | * @num: the number of descriptors to put back (ie. num | |
968 | * vringh_get_kern() to undo). | |
969 | * | |
970 | * The next vringh_get_kern() will return the old descriptor(s) again. | |
971 | */ | |
972 | void vringh_abandon_kern(struct vringh *vrh, unsigned int num) | |
973 | { | |
974 | /* We only update vring_avail_event(vr) when we want to be notified, | |
975 | * so we haven't changed that yet. */ | |
976 | vrh->last_avail_idx -= num; | |
977 | } | |
978 | EXPORT_SYMBOL(vringh_abandon_kern); | |
979 | ||
980 | /** | |
981 | * vringh_complete_kern - we've finished with descriptor, publish it. | |
982 | * @vrh: the vring. | |
983 | * @head: the head as filled in by vringh_getdesc_kern. | |
984 | * @len: the length of data we have written. | |
985 | * | |
986 | * You should check vringh_need_notify_kern() after one or more calls | |
987 | * to this function. | |
988 | */ | |
989 | int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len) | |
990 | { | |
991 | struct vring_used_elem used; | |
992 | ||
b9f7ac8c MT |
993 | used.id = cpu_to_vringh32(vrh, head); |
994 | used.len = cpu_to_vringh32(vrh, len); | |
f87d0fbb RR |
995 | |
996 | return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern); | |
997 | } | |
998 | EXPORT_SYMBOL(vringh_complete_kern); | |
999 | ||
1000 | /** | |
1001 | * vringh_notify_enable_kern - we want to know if something changes. | |
1002 | * @vrh: the vring. | |
1003 | * | |
1004 | * This always enables notifications, but returns false if there are | |
1005 | * now more buffers available in the vring. | |
1006 | */ | |
1007 | bool vringh_notify_enable_kern(struct vringh *vrh) | |
1008 | { | |
1009 | return __vringh_notify_enable(vrh, getu16_kern, putu16_kern); | |
1010 | } | |
1011 | EXPORT_SYMBOL(vringh_notify_enable_kern); | |
1012 | ||
1013 | /** | |
1014 | * vringh_notify_disable_kern - don't tell us if something changes. | |
1015 | * @vrh: the vring. | |
1016 | * | |
1017 | * This is our normal running state: we disable and then only enable when | |
1018 | * we're going to sleep. | |
1019 | */ | |
1020 | void vringh_notify_disable_kern(struct vringh *vrh) | |
1021 | { | |
1022 | __vringh_notify_disable(vrh, putu16_kern); | |
1023 | } | |
1024 | EXPORT_SYMBOL(vringh_notify_disable_kern); | |
1025 | ||
1026 | /** | |
1027 | * vringh_need_notify_kern - must we tell the other side about used buffers? | |
1028 | * @vrh: the vring we've called vringh_complete_kern() on. | |
1029 | * | |
1030 | * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. | |
1031 | */ | |
1032 | int vringh_need_notify_kern(struct vringh *vrh) | |
1033 | { | |
1034 | return __vringh_need_notify(vrh, getu16_kern); | |
1035 | } | |
1036 | EXPORT_SYMBOL(vringh_need_notify_kern); | |
f558a845 DJ |
1037 | |
1038 | MODULE_LICENSE("GPL"); |