]> git.proxmox.com Git - mirror_qemu.git/blame - dma-helpers.c
pseries: Implement IOMMU and DMA for PAPR PCI devices
[mirror_qemu.git] / dma-helpers.c
CommitLineData
244ab90e
AL
1/*
2 * DMA helper functions
3 *
4 * Copyright (c) 2009 Red Hat
5 *
6 * This work is licensed under the terms of the GNU General Public License
7 * (GNU GPL), version 2 or later.
8 */
9
10#include "dma.h"
c57c4658 11#include "trace.h"
e5332e63
DG
12#include "range.h"
13#include "qemu-thread.h"
244ab90e 14
e5332e63
DG
15/* #define DEBUG_IOMMU */
16
17static void do_dma_memory_set(dma_addr_t addr, uint8_t c, dma_addr_t len)
d86a77f8
DG
18{
19#define FILLBUF_SIZE 512
20 uint8_t fillbuf[FILLBUF_SIZE];
21 int l;
22
23 memset(fillbuf, c, FILLBUF_SIZE);
24 while (len > 0) {
25 l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
26 cpu_physical_memory_rw(addr, fillbuf, l, true);
27 len -= len;
28 addr += len;
29 }
e5332e63
DG
30}
31
32int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len)
33{
34 if (dma_has_iommu(dma)) {
35 return iommu_dma_memory_set(dma, addr, c, len);
36 }
37 do_dma_memory_set(addr, c, len);
38
d86a77f8
DG
39 return 0;
40}
41
c65bcef3 42void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma)
244ab90e 43{
7267c094 44 qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry));
244ab90e
AL
45 qsg->nsg = 0;
46 qsg->nalloc = alloc_hint;
47 qsg->size = 0;
c65bcef3 48 qsg->dma = dma;
244ab90e
AL
49}
50
d3231181 51void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
244ab90e
AL
52{
53 if (qsg->nsg == qsg->nalloc) {
54 qsg->nalloc = 2 * qsg->nalloc + 1;
7267c094 55 qsg->sg = g_realloc(qsg->sg, qsg->nalloc * sizeof(ScatterGatherEntry));
244ab90e
AL
56 }
57 qsg->sg[qsg->nsg].base = base;
58 qsg->sg[qsg->nsg].len = len;
59 qsg->size += len;
60 ++qsg->nsg;
61}
62
63void qemu_sglist_destroy(QEMUSGList *qsg)
64{
7267c094 65 g_free(qsg->sg);
244ab90e
AL
66}
67
59a703eb 68typedef struct {
37b7842c 69 BlockDriverAIOCB common;
59a703eb
AL
70 BlockDriverState *bs;
71 BlockDriverAIOCB *acb;
72 QEMUSGList *sg;
73 uint64_t sector_num;
43cf8ae6 74 DMADirection dir;
c3adb5b9 75 bool in_cancel;
59a703eb 76 int sg_cur_index;
d3231181 77 dma_addr_t sg_cur_byte;
59a703eb
AL
78 QEMUIOVector iov;
79 QEMUBH *bh;
cb144ccb 80 DMAIOFunc *io_func;
37b7842c 81} DMAAIOCB;
59a703eb
AL
82
83static void dma_bdrv_cb(void *opaque, int ret);
84
85static void reschedule_dma(void *opaque)
86{
37b7842c 87 DMAAIOCB *dbs = (DMAAIOCB *)opaque;
59a703eb
AL
88
89 qemu_bh_delete(dbs->bh);
90 dbs->bh = NULL;
c3adb5b9 91 dma_bdrv_cb(dbs, 0);
59a703eb
AL
92}
93
94static void continue_after_map_failure(void *opaque)
95{
37b7842c 96 DMAAIOCB *dbs = (DMAAIOCB *)opaque;
59a703eb
AL
97
98 dbs->bh = qemu_bh_new(reschedule_dma, dbs);
99 qemu_bh_schedule(dbs->bh);
100}
101
7403b14e 102static void dma_bdrv_unmap(DMAAIOCB *dbs)
59a703eb 103{
59a703eb
AL
104 int i;
105
59a703eb 106 for (i = 0; i < dbs->iov.niov; ++i) {
c65bcef3
DG
107 dma_memory_unmap(dbs->sg->dma, dbs->iov.iov[i].iov_base,
108 dbs->iov.iov[i].iov_len, dbs->dir,
109 dbs->iov.iov[i].iov_len);
59a703eb 110 }
c3adb5b9
PB
111 qemu_iovec_reset(&dbs->iov);
112}
113
114static void dma_complete(DMAAIOCB *dbs, int ret)
115{
c57c4658
KW
116 trace_dma_complete(dbs, ret, dbs->common.cb);
117
c3adb5b9
PB
118 dma_bdrv_unmap(dbs);
119 if (dbs->common.cb) {
120 dbs->common.cb(dbs->common.opaque, ret);
121 }
122 qemu_iovec_destroy(&dbs->iov);
123 if (dbs->bh) {
124 qemu_bh_delete(dbs->bh);
125 dbs->bh = NULL;
126 }
127 if (!dbs->in_cancel) {
128 /* Requests may complete while dma_aio_cancel is in progress. In
129 * this case, the AIOCB should not be released because it is still
130 * referenced by dma_aio_cancel. */
131 qemu_aio_release(dbs);
132 }
7403b14e
AL
133}
134
856ae5c3 135static void dma_bdrv_cb(void *opaque, int ret)
7403b14e
AL
136{
137 DMAAIOCB *dbs = (DMAAIOCB *)opaque;
c65bcef3 138 dma_addr_t cur_addr, cur_len;
7403b14e
AL
139 void *mem;
140
c57c4658
KW
141 trace_dma_bdrv_cb(dbs, ret);
142
7403b14e
AL
143 dbs->acb = NULL;
144 dbs->sector_num += dbs->iov.size / 512;
145 dma_bdrv_unmap(dbs);
59a703eb
AL
146
147 if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
c3adb5b9 148 dma_complete(dbs, ret);
59a703eb
AL
149 return;
150 }
151
152 while (dbs->sg_cur_index < dbs->sg->nsg) {
153 cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
154 cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
c65bcef3 155 mem = dma_memory_map(dbs->sg->dma, cur_addr, &cur_len, dbs->dir);
59a703eb
AL
156 if (!mem)
157 break;
158 qemu_iovec_add(&dbs->iov, mem, cur_len);
159 dbs->sg_cur_byte += cur_len;
160 if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
161 dbs->sg_cur_byte = 0;
162 ++dbs->sg_cur_index;
163 }
164 }
165
166 if (dbs->iov.size == 0) {
c57c4658 167 trace_dma_map_wait(dbs);
59a703eb
AL
168 cpu_register_map_client(dbs, continue_after_map_failure);
169 return;
170 }
171
cb144ccb
CH
172 dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov,
173 dbs->iov.size / 512, dma_bdrv_cb, dbs);
6bee44ea 174 assert(dbs->acb);
59a703eb
AL
175}
176
c16b5a2c
CH
177static void dma_aio_cancel(BlockDriverAIOCB *acb)
178{
179 DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
180
c57c4658
KW
181 trace_dma_aio_cancel(dbs);
182
c16b5a2c 183 if (dbs->acb) {
c3adb5b9
PB
184 BlockDriverAIOCB *acb = dbs->acb;
185 dbs->acb = NULL;
186 dbs->in_cancel = true;
187 bdrv_aio_cancel(acb);
188 dbs->in_cancel = false;
c16b5a2c 189 }
c3adb5b9
PB
190 dbs->common.cb = NULL;
191 dma_complete(dbs, 0);
c16b5a2c
CH
192}
193
194static AIOPool dma_aio_pool = {
195 .aiocb_size = sizeof(DMAAIOCB),
196 .cancel = dma_aio_cancel,
197};
198
cb144ccb 199BlockDriverAIOCB *dma_bdrv_io(
59a703eb 200 BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num,
cb144ccb 201 DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
43cf8ae6 202 void *opaque, DMADirection dir)
59a703eb 203{
cb144ccb 204 DMAAIOCB *dbs = qemu_aio_get(&dma_aio_pool, bs, cb, opaque);
59a703eb 205
43cf8ae6 206 trace_dma_bdrv_io(dbs, bs, sector_num, (dir == DMA_DIRECTION_TO_DEVICE));
c57c4658 207
37b7842c 208 dbs->acb = NULL;
59a703eb 209 dbs->bs = bs;
59a703eb
AL
210 dbs->sg = sg;
211 dbs->sector_num = sector_num;
212 dbs->sg_cur_index = 0;
213 dbs->sg_cur_byte = 0;
43cf8ae6 214 dbs->dir = dir;
cb144ccb 215 dbs->io_func = io_func;
59a703eb
AL
216 dbs->bh = NULL;
217 qemu_iovec_init(&dbs->iov, sg->nsg);
218 dma_bdrv_cb(dbs, 0);
37b7842c 219 return &dbs->common;
59a703eb
AL
220}
221
222
223BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
224 QEMUSGList *sg, uint64_t sector,
225 void (*cb)(void *opaque, int ret), void *opaque)
226{
43cf8ae6
DG
227 return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque,
228 DMA_DIRECTION_FROM_DEVICE);
59a703eb
AL
229}
230
231BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
232 QEMUSGList *sg, uint64_t sector,
233 void (*cb)(void *opaque, int ret), void *opaque)
234{
43cf8ae6
DG
235 return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque,
236 DMA_DIRECTION_TO_DEVICE);
59a703eb 237}
8171ee35
PB
238
239
c65bcef3
DG
240static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, QEMUSGList *sg,
241 DMADirection dir)
8171ee35
PB
242{
243 uint64_t resid;
244 int sg_cur_index;
245
246 resid = sg->size;
247 sg_cur_index = 0;
248 len = MIN(len, resid);
249 while (len > 0) {
250 ScatterGatherEntry entry = sg->sg[sg_cur_index++];
251 int32_t xfer = MIN(len, entry.len);
c65bcef3 252 dma_memory_rw(sg->dma, entry.base, ptr, xfer, dir);
8171ee35
PB
253 ptr += xfer;
254 len -= xfer;
255 resid -= xfer;
256 }
257
258 return resid;
259}
260
261uint64_t dma_buf_read(uint8_t *ptr, int32_t len, QEMUSGList *sg)
262{
c65bcef3 263 return dma_buf_rw(ptr, len, sg, DMA_DIRECTION_FROM_DEVICE);
8171ee35
PB
264}
265
266uint64_t dma_buf_write(uint8_t *ptr, int32_t len, QEMUSGList *sg)
267{
c65bcef3 268 return dma_buf_rw(ptr, len, sg, DMA_DIRECTION_TO_DEVICE);
8171ee35 269}
84a69356
PB
270
271void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
272 QEMUSGList *sg, enum BlockAcctType type)
273{
274 bdrv_acct_start(bs, cookie, sg->size, type);
275}
e5332e63
DG
276
277bool iommu_dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len,
278 DMADirection dir)
279{
280 target_phys_addr_t paddr, plen;
281
282#ifdef DEBUG_IOMMU
283 fprintf(stderr, "dma_memory_check context=%p addr=0x" DMA_ADDR_FMT
284 " len=0x" DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir);
285#endif
286
287 while (len) {
288 if (dma->translate(dma, addr, &paddr, &plen, dir) != 0) {
289 return false;
290 }
291
292 /* The translation might be valid for larger regions. */
293 if (plen > len) {
294 plen = len;
295 }
296
297 len -= plen;
298 addr += plen;
299 }
300
301 return true;
302}
303
304int iommu_dma_memory_rw(DMAContext *dma, dma_addr_t addr,
305 void *buf, dma_addr_t len, DMADirection dir)
306{
307 target_phys_addr_t paddr, plen;
308 int err;
309
310#ifdef DEBUG_IOMMU
311 fprintf(stderr, "dma_memory_rw context=%p addr=0x" DMA_ADDR_FMT " len=0x"
312 DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir);
313#endif
314
315 while (len) {
316 err = dma->translate(dma, addr, &paddr, &plen, dir);
317 if (err) {
318 /*
319 * In case of failure on reads from the guest, we clean the
320 * destination buffer so that a device that doesn't test
321 * for errors will not expose qemu internal memory.
322 */
323 memset(buf, 0, len);
324 return -1;
325 }
326
327 /* The translation might be valid for larger regions. */
328 if (plen > len) {
329 plen = len;
330 }
331
332 cpu_physical_memory_rw(paddr, buf, plen,
333 dir == DMA_DIRECTION_FROM_DEVICE);
334
335 len -= plen;
336 addr += plen;
337 buf += plen;
338 }
339
340 return 0;
341}
342
343int iommu_dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c,
344 dma_addr_t len)
345{
346 target_phys_addr_t paddr, plen;
347 int err;
348
349#ifdef DEBUG_IOMMU
350 fprintf(stderr, "dma_memory_set context=%p addr=0x" DMA_ADDR_FMT
351 " len=0x" DMA_ADDR_FMT "\n", dma, addr, len);
352#endif
353
354 while (len) {
355 err = dma->translate(dma, addr, &paddr, &plen,
356 DMA_DIRECTION_FROM_DEVICE);
357 if (err) {
358 return err;
359 }
360
361 /* The translation might be valid for larger regions. */
362 if (plen > len) {
363 plen = len;
364 }
365
366 do_dma_memory_set(paddr, c, plen);
367
368 len -= plen;
369 addr += plen;
370 }
371
372 return 0;
373}
374
375void dma_context_init(DMAContext *dma, DMATranslateFunc translate,
376 DMAMapFunc map, DMAUnmapFunc unmap)
377{
378#ifdef DEBUG_IOMMU
379 fprintf(stderr, "dma_context_init(%p, %p, %p, %p)\n",
380 dma, translate, map, unmap);
381#endif
382 dma->translate = translate;
383 dma->map = map;
384 dma->unmap = unmap;
385}
386
387void *iommu_dma_memory_map(DMAContext *dma, dma_addr_t addr, dma_addr_t *len,
388 DMADirection dir)
389{
390 int err;
391 target_phys_addr_t paddr, plen;
392 void *buf;
393
394 if (dma->map) {
395 return dma->map(dma, addr, len, dir);
396 }
397
398 plen = *len;
399 err = dma->translate(dma, addr, &paddr, &plen, dir);
400 if (err) {
401 return NULL;
402 }
403
404 /*
405 * If this is true, the virtual region is contiguous,
406 * but the translated physical region isn't. We just
407 * clamp *len, much like cpu_physical_memory_map() does.
408 */
409 if (plen < *len) {
410 *len = plen;
411 }
412
413 buf = cpu_physical_memory_map(paddr, &plen,
414 dir == DMA_DIRECTION_FROM_DEVICE);
415 *len = plen;
416
417 return buf;
418}
419
420void iommu_dma_memory_unmap(DMAContext *dma, void *buffer, dma_addr_t len,
421 DMADirection dir, dma_addr_t access_len)
422{
423 if (dma->unmap) {
424 dma->unmap(dma, buffer, len, dir, access_len);
425 return;
426 }
427
428 cpu_physical_memory_unmap(buffer, len,
429 dir == DMA_DIRECTION_FROM_DEVICE,
430 access_len);
431
432}