4 * Copyright (c) Intel Corporation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include "spdk/stdinc.h"
36 #include "ioat_internal.h"
39 #include "spdk/util.h"
41 #include "spdk_internal/log.h"
45 TAILQ_HEAD(, spdk_ioat_chan
) attached_chans
;
48 static struct ioat_driver g_ioat_driver
= {
49 .lock
= PTHREAD_MUTEX_INITIALIZER
,
50 .attached_chans
= TAILQ_HEAD_INITIALIZER(g_ioat_driver
.attached_chans
),
54 ioat_get_chansts(struct spdk_ioat_chan
*ioat
)
56 return spdk_mmio_read_8(&ioat
->regs
->chansts
);
60 ioat_write_chancmp(struct spdk_ioat_chan
*ioat
, uint64_t addr
)
62 spdk_mmio_write_8(&ioat
->regs
->chancmp
, addr
);
66 ioat_write_chainaddr(struct spdk_ioat_chan
*ioat
, uint64_t addr
)
68 spdk_mmio_write_8(&ioat
->regs
->chainaddr
, addr
);
72 ioat_suspend(struct spdk_ioat_chan
*ioat
)
74 ioat
->regs
->chancmd
= SPDK_IOAT_CHANCMD_SUSPEND
;
78 ioat_reset(struct spdk_ioat_chan
*ioat
)
80 ioat
->regs
->chancmd
= SPDK_IOAT_CHANCMD_RESET
;
83 static inline uint32_t
84 ioat_reset_pending(struct spdk_ioat_chan
*ioat
)
88 cmd
= ioat
->regs
->chancmd
;
89 return (cmd
& SPDK_IOAT_CHANCMD_RESET
) == SPDK_IOAT_CHANCMD_RESET
;
93 ioat_map_pci_bar(struct spdk_ioat_chan
*ioat
)
97 uint64_t phys_addr
, size
;
100 rc
= spdk_pci_device_map_bar(ioat
->device
, regs_bar
, &addr
, &phys_addr
, &size
);
101 if (rc
!= 0 || addr
== NULL
) {
102 SPDK_ERRLOG("pci_device_map_range failed with error code %d\n",
107 ioat
->regs
= (volatile struct spdk_ioat_registers
*)addr
;
113 ioat_unmap_pci_bar(struct spdk_ioat_chan
*ioat
)
116 void *addr
= (void *)ioat
->regs
;
119 rc
= spdk_pci_device_unmap_bar(ioat
->device
, 0, addr
);
125 static inline uint32_t
126 ioat_get_active(struct spdk_ioat_chan
*ioat
)
128 return (ioat
->head
- ioat
->tail
) & ((1 << ioat
->ring_size_order
) - 1);
131 static inline uint32_t
132 ioat_get_ring_space(struct spdk_ioat_chan
*ioat
)
134 return (1 << ioat
->ring_size_order
) - ioat_get_active(ioat
) - 1;
138 ioat_get_ring_index(struct spdk_ioat_chan
*ioat
, uint32_t index
)
140 return index
& ((1 << ioat
->ring_size_order
) - 1);
144 ioat_get_ring_entry(struct spdk_ioat_chan
*ioat
, uint32_t index
,
145 struct ioat_descriptor
**desc
,
146 union spdk_ioat_hw_desc
**hw_desc
)
148 uint32_t i
= ioat_get_ring_index(ioat
, index
);
150 *desc
= &ioat
->ring
[i
];
151 *hw_desc
= &ioat
->hw_ring
[i
];
155 ioat_submit_single(struct spdk_ioat_chan
*ioat
)
161 ioat_flush(struct spdk_ioat_chan
*ioat
)
163 ioat
->regs
->dmacount
= (uint16_t)ioat
->head
;
166 static struct ioat_descriptor
*
167 ioat_prep_null(struct spdk_ioat_chan
*ioat
)
169 struct ioat_descriptor
*desc
;
170 union spdk_ioat_hw_desc
*hw_desc
;
172 if (ioat_get_ring_space(ioat
) < 1) {
176 ioat_get_ring_entry(ioat
, ioat
->head
, &desc
, &hw_desc
);
178 hw_desc
->dma
.u
.control_raw
= 0;
179 hw_desc
->dma
.u
.control
.op
= SPDK_IOAT_OP_COPY
;
180 hw_desc
->dma
.u
.control
.null
= 1;
181 hw_desc
->dma
.u
.control
.completion_update
= 1;
183 hw_desc
->dma
.size
= 8;
184 hw_desc
->dma
.src_addr
= 0;
185 hw_desc
->dma
.dest_addr
= 0;
187 desc
->callback_fn
= NULL
;
188 desc
->callback_arg
= NULL
;
190 ioat_submit_single(ioat
);
195 static struct ioat_descriptor
*
196 ioat_prep_copy(struct spdk_ioat_chan
*ioat
, uint64_t dst
,
197 uint64_t src
, uint32_t len
)
199 struct ioat_descriptor
*desc
;
200 union spdk_ioat_hw_desc
*hw_desc
;
202 assert(len
<= ioat
->max_xfer_size
);
204 if (ioat_get_ring_space(ioat
) < 1) {
208 ioat_get_ring_entry(ioat
, ioat
->head
, &desc
, &hw_desc
);
210 hw_desc
->dma
.u
.control_raw
= 0;
211 hw_desc
->dma
.u
.control
.op
= SPDK_IOAT_OP_COPY
;
212 hw_desc
->dma
.u
.control
.completion_update
= 1;
214 hw_desc
->dma
.size
= len
;
215 hw_desc
->dma
.src_addr
= src
;
216 hw_desc
->dma
.dest_addr
= dst
;
218 desc
->callback_fn
= NULL
;
219 desc
->callback_arg
= NULL
;
221 ioat_submit_single(ioat
);
226 static struct ioat_descriptor
*
227 ioat_prep_fill(struct spdk_ioat_chan
*ioat
, uint64_t dst
,
228 uint64_t fill_pattern
, uint32_t len
)
230 struct ioat_descriptor
*desc
;
231 union spdk_ioat_hw_desc
*hw_desc
;
233 assert(len
<= ioat
->max_xfer_size
);
235 if (ioat_get_ring_space(ioat
) < 1) {
239 ioat_get_ring_entry(ioat
, ioat
->head
, &desc
, &hw_desc
);
241 hw_desc
->fill
.u
.control_raw
= 0;
242 hw_desc
->fill
.u
.control
.op
= SPDK_IOAT_OP_FILL
;
243 hw_desc
->fill
.u
.control
.completion_update
= 1;
245 hw_desc
->fill
.size
= len
;
246 hw_desc
->fill
.src_data
= fill_pattern
;
247 hw_desc
->fill
.dest_addr
= dst
;
249 desc
->callback_fn
= NULL
;
250 desc
->callback_arg
= NULL
;
252 ioat_submit_single(ioat
);
257 static int ioat_reset_hw(struct spdk_ioat_chan
*ioat
)
264 status
= ioat_get_chansts(ioat
);
265 if (is_ioat_active(status
) || is_ioat_idle(status
)) {
269 timeout
= 20; /* in milliseconds */
270 while (is_ioat_active(status
) || is_ioat_idle(status
)) {
274 SPDK_ERRLOG("timed out waiting for suspend\n");
277 status
= ioat_get_chansts(ioat
);
281 * Clear any outstanding errors.
282 * CHANERR is write-1-to-clear, so write the current CHANERR bits back to reset everything.
284 chanerr
= ioat
->regs
->chanerr
;
285 ioat
->regs
->chanerr
= chanerr
;
287 if (ioat
->regs
->cbver
< SPDK_IOAT_VER_3_3
) {
288 rc
= spdk_pci_device_cfg_read32(ioat
->device
, &chanerr
,
289 SPDK_IOAT_PCI_CHANERR_INT_OFFSET
);
291 SPDK_ERRLOG("failed to read the internal channel error register\n");
295 spdk_pci_device_cfg_write32(ioat
->device
, chanerr
,
296 SPDK_IOAT_PCI_CHANERR_INT_OFFSET
);
302 while (ioat_reset_pending(ioat
)) {
306 SPDK_ERRLOG("timed out waiting for reset\n");
315 ioat_process_channel_events(struct spdk_ioat_chan
*ioat
)
317 struct ioat_descriptor
*desc
;
318 uint64_t status
, completed_descriptor
, hw_desc_phys_addr
;
321 if (ioat
->head
== ioat
->tail
) {
325 status
= *ioat
->comp_update
;
326 completed_descriptor
= status
& SPDK_IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK
;
328 if (is_ioat_halted(status
)) {
329 SPDK_ERRLOG("Channel halted (%x)\n", ioat
->regs
->chanerr
);
333 if (completed_descriptor
== ioat
->last_seen
) {
338 tail
= ioat_get_ring_index(ioat
, ioat
->tail
);
339 desc
= &ioat
->ring
[tail
];
341 if (desc
->callback_fn
) {
342 desc
->callback_fn(desc
->callback_arg
);
345 hw_desc_phys_addr
= desc
->phys_addr
;
347 } while (hw_desc_phys_addr
!= completed_descriptor
);
349 ioat
->last_seen
= hw_desc_phys_addr
;
354 ioat_channel_destruct(struct spdk_ioat_chan
*ioat
)
356 ioat_unmap_pci_bar(ioat
);
363 spdk_dma_free(ioat
->hw_ring
);
366 if (ioat
->comp_update
) {
367 spdk_dma_free((void *)ioat
->comp_update
);
368 ioat
->comp_update
= NULL
;
373 ioat_channel_start(struct spdk_ioat_chan
*ioat
)
375 uint8_t xfercap
, version
;
377 int i
, num_descriptors
;
378 uint64_t comp_update_bus_addr
= 0;
381 if (ioat_map_pci_bar(ioat
) != 0) {
382 SPDK_ERRLOG("ioat_map_pci_bar() failed\n");
386 version
= ioat
->regs
->cbver
;
387 if (version
< SPDK_IOAT_VER_3_0
) {
388 SPDK_ERRLOG(" unsupported IOAT version %u.%u\n",
389 version
>> 4, version
& 0xF);
393 /* Always support DMA copy */
394 ioat
->dma_capabilities
= SPDK_IOAT_ENGINE_COPY_SUPPORTED
;
395 if (ioat
->regs
->dmacapability
& SPDK_IOAT_DMACAP_BFILL
) {
396 ioat
->dma_capabilities
|= SPDK_IOAT_ENGINE_FILL_SUPPORTED
;
398 xfercap
= ioat
->regs
->xfercap
;
400 /* Only bits [4:0] are valid. */
403 /* 0 means 4 GB max transfer size. */
404 ioat
->max_xfer_size
= 1ULL << 32;
405 } else if (xfercap
< 12) {
406 /* XFERCAP must be at least 12 (4 KB) according to the spec. */
407 SPDK_ERRLOG("invalid XFERCAP value %u\n", xfercap
);
410 ioat
->max_xfer_size
= 1U << xfercap
;
413 ioat
->comp_update
= spdk_dma_zmalloc(sizeof(*ioat
->comp_update
), SPDK_IOAT_CHANCMP_ALIGN
,
414 &comp_update_bus_addr
);
415 if (ioat
->comp_update
== NULL
) {
419 ioat
->ring_size_order
= IOAT_DEFAULT_ORDER
;
421 num_descriptors
= 1 << ioat
->ring_size_order
;
423 ioat
->ring
= calloc(num_descriptors
, sizeof(struct ioat_descriptor
));
428 ioat
->hw_ring
= spdk_dma_zmalloc(num_descriptors
* sizeof(union spdk_ioat_hw_desc
), 64,
430 if (!ioat
->hw_ring
) {
434 for (i
= 0; i
< num_descriptors
; i
++) {
435 phys_addr
= spdk_vtophys(&ioat
->hw_ring
[i
]);
436 if (phys_addr
== SPDK_VTOPHYS_ERROR
) {
437 SPDK_ERRLOG("Failed to translate descriptor %u to physical address\n", i
);
441 ioat
->ring
[i
].phys_addr
= phys_addr
;
442 ioat
->hw_ring
[ioat_get_ring_index(ioat
, i
- 1)].generic
.next
= phys_addr
;
451 ioat
->regs
->chanctrl
= SPDK_IOAT_CHANCTRL_ANY_ERR_ABORT_EN
;
452 ioat_write_chancmp(ioat
, comp_update_bus_addr
);
453 ioat_write_chainaddr(ioat
, ioat
->ring
[0].phys_addr
);
455 ioat_prep_null(ioat
);
461 status
= ioat_get_chansts(ioat
);
462 if (is_ioat_idle(status
)) {
467 if (is_ioat_idle(status
)) {
468 ioat_process_channel_events(ioat
);
470 SPDK_ERRLOG("could not start channel: status = %p\n error = %#x\n",
471 (void *)status
, ioat
->regs
->chanerr
);
478 /* Caller must hold g_ioat_driver.lock */
479 static struct spdk_ioat_chan
*
480 ioat_attach(struct spdk_pci_device
*device
)
482 struct spdk_ioat_chan
*ioat
;
485 ioat
= calloc(1, sizeof(struct spdk_ioat_chan
));
490 /* Enable PCI busmaster. */
491 spdk_pci_device_cfg_read32(device
, &cmd_reg
, 4);
493 spdk_pci_device_cfg_write32(device
, cmd_reg
, 4);
495 ioat
->device
= device
;
497 if (ioat_channel_start(ioat
) != 0) {
498 ioat_channel_destruct(ioat
);
506 struct ioat_enum_ctx
{
507 spdk_ioat_probe_cb probe_cb
;
508 spdk_ioat_attach_cb attach_cb
;
512 /* This function must only be called while holding g_ioat_driver.lock */
514 ioat_enum_cb(void *ctx
, struct spdk_pci_device
*pci_dev
)
516 struct ioat_enum_ctx
*enum_ctx
= ctx
;
517 struct spdk_ioat_chan
*ioat
;
519 /* Verify that this device is not already attached */
520 TAILQ_FOREACH(ioat
, &g_ioat_driver
.attached_chans
, tailq
) {
522 * NOTE: This assumes that the PCI abstraction layer will use the same device handle
523 * across enumerations; we could compare by BDF instead if this is not true.
525 if (pci_dev
== ioat
->device
) {
530 if (enum_ctx
->probe_cb(enum_ctx
->cb_ctx
, pci_dev
)) {
532 * Since I/OAT init is relatively quick, just perform the full init during probing.
533 * If this turns out to be a bottleneck later, this can be changed to work like
534 * NVMe with a list of devices to initialize in parallel.
536 ioat
= ioat_attach(pci_dev
);
538 SPDK_ERRLOG("ioat_attach() failed\n");
542 TAILQ_INSERT_TAIL(&g_ioat_driver
.attached_chans
, ioat
, tailq
);
544 enum_ctx
->attach_cb(enum_ctx
->cb_ctx
, pci_dev
, ioat
);
551 spdk_ioat_probe(void *cb_ctx
, spdk_ioat_probe_cb probe_cb
, spdk_ioat_attach_cb attach_cb
)
554 struct ioat_enum_ctx enum_ctx
;
556 pthread_mutex_lock(&g_ioat_driver
.lock
);
558 enum_ctx
.probe_cb
= probe_cb
;
559 enum_ctx
.attach_cb
= attach_cb
;
560 enum_ctx
.cb_ctx
= cb_ctx
;
562 rc
= spdk_pci_ioat_enumerate(ioat_enum_cb
, &enum_ctx
);
564 pthread_mutex_unlock(&g_ioat_driver
.lock
);
570 spdk_ioat_detach(struct spdk_ioat_chan
*ioat
)
572 struct ioat_driver
*driver
= &g_ioat_driver
;
574 /* ioat should be in the free list (not registered to a thread)
575 * when calling ioat_detach().
577 pthread_mutex_lock(&driver
->lock
);
578 TAILQ_REMOVE(&driver
->attached_chans
, ioat
, tailq
);
579 pthread_mutex_unlock(&driver
->lock
);
581 ioat_channel_destruct(ioat
);
585 #define _2MB_PAGE(ptr) ((ptr) & ~(0x200000 - 1))
586 #define _2MB_OFFSET(ptr) ((ptr) & (0x200000 - 1))
589 spdk_ioat_submit_copy(struct spdk_ioat_chan
*ioat
, void *cb_arg
, spdk_ioat_req_cb cb_fn
,
590 void *dst
, const void *src
, uint64_t nbytes
)
592 struct ioat_descriptor
*last_desc
;
593 uint64_t remaining
, op_size
;
595 uint64_t vdst_page
, vsrc_page
;
596 uint64_t pdst_page
, psrc_page
;
603 orig_head
= ioat
->head
;
605 vdst
= (uint64_t)dst
;
606 vsrc
= (uint64_t)src
;
607 vdst_page
= vsrc_page
= 0;
608 pdst_page
= psrc_page
= SPDK_VTOPHYS_ERROR
;
612 if (_2MB_PAGE(vsrc
) != vsrc_page
) {
613 vsrc_page
= _2MB_PAGE(vsrc
);
614 psrc_page
= spdk_vtophys((void *)vsrc_page
);
617 if (_2MB_PAGE(vdst
) != vdst_page
) {
618 vdst_page
= _2MB_PAGE(vdst
);
619 pdst_page
= spdk_vtophys((void *)vdst_page
);
622 op_size
= spdk_min(op_size
, (0x200000 - _2MB_OFFSET(vsrc
)));
623 op_size
= spdk_min(op_size
, (0x200000 - _2MB_OFFSET(vdst
)));
624 op_size
= spdk_min(op_size
, ioat
->max_xfer_size
);
625 remaining
-= op_size
;
627 last_desc
= ioat_prep_copy(ioat
,
628 pdst_page
+ _2MB_OFFSET(vdst
),
629 psrc_page
+ _2MB_OFFSET(vsrc
),
632 if (remaining
== 0 || last_desc
== NULL
) {
640 /* Issue null descriptor for null transfer */
642 last_desc
= ioat_prep_null(ioat
);
646 last_desc
->callback_fn
= cb_fn
;
647 last_desc
->callback_arg
= cb_arg
;
650 * Ran out of descriptors in the ring - reset head to leave things as they were
651 * in case we managed to fill out any descriptors.
653 ioat
->head
= orig_head
;
662 spdk_ioat_submit_fill(struct spdk_ioat_chan
*ioat
, void *cb_arg
, spdk_ioat_req_cb cb_fn
,
663 void *dst
, uint64_t fill_pattern
, uint64_t nbytes
)
665 struct ioat_descriptor
*last_desc
= NULL
;
666 uint64_t remaining
, op_size
;
674 if (!(ioat
->dma_capabilities
& SPDK_IOAT_ENGINE_FILL_SUPPORTED
)) {
675 SPDK_ERRLOG("Channel does not support memory fill\n");
679 orig_head
= ioat
->head
;
681 vdst
= (uint64_t)dst
;
686 op_size
= spdk_min(op_size
, (0x200000 - _2MB_OFFSET(vdst
)));
687 op_size
= spdk_min(op_size
, ioat
->max_xfer_size
);
688 remaining
-= op_size
;
690 last_desc
= ioat_prep_fill(ioat
,
691 spdk_vtophys((void *)vdst
),
695 if (remaining
== 0 || last_desc
== NULL
) {
703 last_desc
->callback_fn
= cb_fn
;
704 last_desc
->callback_arg
= cb_arg
;
707 * Ran out of descriptors in the ring - reset head to leave things as they were
708 * in case we managed to fill out any descriptors.
710 ioat
->head
= orig_head
;
719 spdk_ioat_get_dma_capabilities(struct spdk_ioat_chan
*ioat
)
724 return ioat
->dma_capabilities
;
728 spdk_ioat_process_events(struct spdk_ioat_chan
*ioat
)
730 return ioat_process_channel_events(ioat
);
733 SPDK_LOG_REGISTER_COMPONENT("ioat", SPDK_LOG_IOAT
)