4 * Copyright (c) Intel Corporation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include "ioat_internal.h"
37 #include "spdk/util.h"
39 #include "spdk_internal/log.h"
45 TAILQ_HEAD(, spdk_ioat_chan
) attached_chans
;
48 static struct ioat_driver g_ioat_driver
= {
49 .lock
= PTHREAD_MUTEX_INITIALIZER
,
50 .attached_chans
= TAILQ_HEAD_INITIALIZER(g_ioat_driver
.attached_chans
),
54 ioat_get_chansts(struct spdk_ioat_chan
*ioat
)
56 return spdk_mmio_read_8(&ioat
->regs
->chansts
);
60 ioat_write_chancmp(struct spdk_ioat_chan
*ioat
, uint64_t addr
)
62 spdk_mmio_write_8(&ioat
->regs
->chancmp
, addr
);
66 ioat_write_chainaddr(struct spdk_ioat_chan
*ioat
, uint64_t addr
)
68 spdk_mmio_write_8(&ioat
->regs
->chainaddr
, addr
);
72 ioat_suspend(struct spdk_ioat_chan
*ioat
)
74 ioat
->regs
->chancmd
= SPDK_IOAT_CHANCMD_SUSPEND
;
78 ioat_reset(struct spdk_ioat_chan
*ioat
)
80 ioat
->regs
->chancmd
= SPDK_IOAT_CHANCMD_RESET
;
83 static inline uint32_t
84 ioat_reset_pending(struct spdk_ioat_chan
*ioat
)
88 cmd
= ioat
->regs
->chancmd
;
89 return (cmd
& SPDK_IOAT_CHANCMD_RESET
) == SPDK_IOAT_CHANCMD_RESET
;
93 ioat_map_pci_bar(struct spdk_ioat_chan
*ioat
)
97 uint64_t phys_addr
, size
;
100 rc
= spdk_pci_device_map_bar(ioat
->device
, regs_bar
, &addr
, &phys_addr
, &size
);
101 if (rc
!= 0 || addr
== NULL
) {
102 SPDK_ERRLOG("pci_device_map_range failed with error code %d\n",
107 ioat
->regs
= (volatile struct spdk_ioat_registers
*)addr
;
113 ioat_unmap_pci_bar(struct spdk_ioat_chan
*ioat
)
116 void *addr
= (void *)ioat
->regs
;
119 rc
= spdk_pci_device_unmap_bar(ioat
->device
, 0, addr
);
125 static inline uint32_t
126 ioat_get_active(struct spdk_ioat_chan
*ioat
)
128 return (ioat
->head
- ioat
->tail
) & ((1 << ioat
->ring_size_order
) - 1);
131 static inline uint32_t
132 ioat_get_ring_space(struct spdk_ioat_chan
*ioat
)
134 return (1 << ioat
->ring_size_order
) - ioat_get_active(ioat
) - 1;
138 ioat_get_ring_index(struct spdk_ioat_chan
*ioat
, uint32_t index
)
140 return index
& ((1 << ioat
->ring_size_order
) - 1);
144 ioat_get_ring_entry(struct spdk_ioat_chan
*ioat
, uint32_t index
,
145 struct ioat_descriptor
**desc
,
146 union spdk_ioat_hw_desc
**hw_desc
)
148 uint32_t i
= ioat_get_ring_index(ioat
, index
);
150 *desc
= &ioat
->ring
[i
];
151 *hw_desc
= &ioat
->hw_ring
[i
];
155 ioat_get_desc_phys_addr(struct spdk_ioat_chan
*ioat
, uint32_t index
)
157 return ioat
->hw_ring_phys_addr
+
158 ioat_get_ring_index(ioat
, index
) * sizeof(union spdk_ioat_hw_desc
);
162 ioat_submit_single(struct spdk_ioat_chan
*ioat
)
168 ioat_flush(struct spdk_ioat_chan
*ioat
)
170 ioat
->regs
->dmacount
= (uint16_t)ioat
->head
;
173 static struct ioat_descriptor
*
174 ioat_prep_null(struct spdk_ioat_chan
*ioat
)
176 struct ioat_descriptor
*desc
;
177 union spdk_ioat_hw_desc
*hw_desc
;
179 if (ioat_get_ring_space(ioat
) < 1) {
183 ioat_get_ring_entry(ioat
, ioat
->head
, &desc
, &hw_desc
);
185 hw_desc
->dma
.u
.control_raw
= 0;
186 hw_desc
->dma
.u
.control
.op
= SPDK_IOAT_OP_COPY
;
187 hw_desc
->dma
.u
.control
.null
= 1;
188 hw_desc
->dma
.u
.control
.completion_update
= 1;
190 hw_desc
->dma
.size
= 8;
191 hw_desc
->dma
.src_addr
= 0;
192 hw_desc
->dma
.dest_addr
= 0;
194 desc
->callback_fn
= NULL
;
195 desc
->callback_arg
= NULL
;
197 ioat_submit_single(ioat
);
202 static struct ioat_descriptor
*
203 ioat_prep_copy(struct spdk_ioat_chan
*ioat
, uint64_t dst
,
204 uint64_t src
, uint32_t len
)
206 struct ioat_descriptor
*desc
;
207 union spdk_ioat_hw_desc
*hw_desc
;
209 assert(len
<= ioat
->max_xfer_size
);
211 if (ioat_get_ring_space(ioat
) < 1) {
215 ioat_get_ring_entry(ioat
, ioat
->head
, &desc
, &hw_desc
);
217 hw_desc
->dma
.u
.control_raw
= 0;
218 hw_desc
->dma
.u
.control
.op
= SPDK_IOAT_OP_COPY
;
219 hw_desc
->dma
.u
.control
.completion_update
= 1;
221 hw_desc
->dma
.size
= len
;
222 hw_desc
->dma
.src_addr
= src
;
223 hw_desc
->dma
.dest_addr
= dst
;
225 desc
->callback_fn
= NULL
;
226 desc
->callback_arg
= NULL
;
228 ioat_submit_single(ioat
);
233 static struct ioat_descriptor
*
234 ioat_prep_fill(struct spdk_ioat_chan
*ioat
, uint64_t dst
,
235 uint64_t fill_pattern
, uint32_t len
)
237 struct ioat_descriptor
*desc
;
238 union spdk_ioat_hw_desc
*hw_desc
;
240 assert(len
<= ioat
->max_xfer_size
);
242 if (ioat_get_ring_space(ioat
) < 1) {
246 ioat_get_ring_entry(ioat
, ioat
->head
, &desc
, &hw_desc
);
248 hw_desc
->fill
.u
.control_raw
= 0;
249 hw_desc
->fill
.u
.control
.op
= SPDK_IOAT_OP_FILL
;
250 hw_desc
->fill
.u
.control
.completion_update
= 1;
252 hw_desc
->fill
.size
= len
;
253 hw_desc
->fill
.src_data
= fill_pattern
;
254 hw_desc
->fill
.dest_addr
= dst
;
256 desc
->callback_fn
= NULL
;
257 desc
->callback_arg
= NULL
;
259 ioat_submit_single(ioat
);
264 static int ioat_reset_hw(struct spdk_ioat_chan
*ioat
)
270 status
= ioat_get_chansts(ioat
);
271 if (is_ioat_active(status
) || is_ioat_idle(status
)) {
275 timeout
= 20; /* in milliseconds */
276 while (is_ioat_active(status
) || is_ioat_idle(status
)) {
280 SPDK_ERRLOG("timed out waiting for suspend\n");
283 status
= ioat_get_chansts(ioat
);
287 * Clear any outstanding errors.
288 * CHANERR is write-1-to-clear, so write the current CHANERR bits back to reset everything.
290 chanerr
= ioat
->regs
->chanerr
;
291 ioat
->regs
->chanerr
= chanerr
;
296 while (ioat_reset_pending(ioat
)) {
300 SPDK_ERRLOG("timed out waiting for reset\n");
309 ioat_process_channel_events(struct spdk_ioat_chan
*ioat
)
311 struct ioat_descriptor
*desc
;
312 uint64_t status
, completed_descriptor
, hw_desc_phys_addr
;
315 if (ioat
->head
== ioat
->tail
) {
319 status
= *ioat
->comp_update
;
320 completed_descriptor
= status
& SPDK_IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK
;
322 if (is_ioat_halted(status
)) {
323 SPDK_ERRLOG("Channel halted (%x)\n", ioat
->regs
->chanerr
);
327 if (completed_descriptor
== ioat
->last_seen
) {
332 tail
= ioat_get_ring_index(ioat
, ioat
->tail
);
333 desc
= &ioat
->ring
[tail
];
335 if (desc
->callback_fn
) {
336 desc
->callback_fn(desc
->callback_arg
);
339 hw_desc_phys_addr
= ioat_get_desc_phys_addr(ioat
, ioat
->tail
);
341 } while (hw_desc_phys_addr
!= completed_descriptor
);
343 ioat
->last_seen
= hw_desc_phys_addr
;
348 ioat_channel_destruct(struct spdk_ioat_chan
*ioat
)
350 ioat_unmap_pci_bar(ioat
);
357 spdk_free(ioat
->hw_ring
);
360 if (ioat
->comp_update
) {
361 spdk_free((void *)ioat
->comp_update
);
362 ioat
->comp_update
= NULL
;
369 ioat_channel_start(struct spdk_ioat_chan
*ioat
)
371 uint8_t xfercap
, version
;
373 int i
, num_descriptors
;
374 uint64_t comp_update_bus_addr
= 0;
376 if (ioat_map_pci_bar(ioat
) != 0) {
377 SPDK_ERRLOG("ioat_map_pci_bar() failed\n");
381 version
= ioat
->regs
->cbver
;
382 if (version
< SPDK_IOAT_VER_3_0
) {
383 SPDK_ERRLOG(" unsupported IOAT version %u.%u\n",
384 version
>> 4, version
& 0xF);
388 /* Always support DMA copy */
389 ioat
->dma_capabilities
= SPDK_IOAT_ENGINE_COPY_SUPPORTED
;
390 if (ioat
->regs
->dmacapability
& SPDK_IOAT_DMACAP_BFILL
)
391 ioat
->dma_capabilities
|= SPDK_IOAT_ENGINE_FILL_SUPPORTED
;
392 xfercap
= ioat
->regs
->xfercap
;
394 /* Only bits [4:0] are valid. */
397 /* 0 means 4 GB max transfer size. */
398 ioat
->max_xfer_size
= 1ULL << 32;
399 } else if (xfercap
< 12) {
400 /* XFCERCAP must be at least 12 (4 KB) according to the spec. */
401 SPDK_ERRLOG("invalid XFERCAP value %u\n", xfercap
);
404 ioat
->max_xfer_size
= 1U << xfercap
;
407 ioat
->comp_update
= spdk_zmalloc(sizeof(*ioat
->comp_update
), SPDK_IOAT_CHANCMP_ALIGN
,
408 &comp_update_bus_addr
);
409 if (ioat
->comp_update
== NULL
) {
413 ioat
->ring_size_order
= IOAT_DEFAULT_ORDER
;
415 num_descriptors
= 1 << ioat
->ring_size_order
;
417 ioat
->ring
= calloc(num_descriptors
, sizeof(struct ioat_descriptor
));
422 ioat
->hw_ring
= spdk_zmalloc(num_descriptors
* sizeof(union spdk_ioat_hw_desc
), 64,
423 &ioat
->hw_ring_phys_addr
);
424 if (!ioat
->hw_ring
) {
428 for (i
= 0; i
< num_descriptors
; i
++) {
429 ioat
->hw_ring
[i
].generic
.next
= ioat_get_desc_phys_addr(ioat
, i
+ 1);
438 ioat
->regs
->chanctrl
= SPDK_IOAT_CHANCTRL_ANY_ERR_ABORT_EN
;
439 ioat_write_chancmp(ioat
, comp_update_bus_addr
);
440 ioat_write_chainaddr(ioat
, ioat
->hw_ring_phys_addr
);
442 ioat_prep_null(ioat
);
448 status
= ioat_get_chansts(ioat
);
449 if (is_ioat_idle(status
))
453 if (is_ioat_idle(status
)) {
454 ioat_process_channel_events(ioat
);
456 SPDK_ERRLOG("could not start channel: status = %p\n error = %#x\n",
457 (void *)status
, ioat
->regs
->chanerr
);
464 /* Caller must hold g_ioat_driver.lock */
465 static struct spdk_ioat_chan
*
466 ioat_attach(void *device
)
468 struct spdk_ioat_chan
*ioat
;
471 ioat
= calloc(1, sizeof(struct spdk_ioat_chan
));
476 /* Enable PCI busmaster. */
477 spdk_pci_device_cfg_read32(device
, &cmd_reg
, 4);
479 spdk_pci_device_cfg_write32(device
, cmd_reg
, 4);
481 ioat
->device
= device
;
483 if (ioat_channel_start(ioat
) != 0) {
484 ioat_channel_destruct(ioat
);
492 struct ioat_enum_ctx
{
493 spdk_ioat_probe_cb probe_cb
;
494 spdk_ioat_attach_cb attach_cb
;
498 /* This function must only be called while holding g_ioat_driver.lock */
500 ioat_enum_cb(void *ctx
, struct spdk_pci_device
*pci_dev
)
502 struct ioat_enum_ctx
*enum_ctx
= ctx
;
503 struct spdk_ioat_chan
*ioat
;
505 /* Verify that this device is not already attached */
506 TAILQ_FOREACH(ioat
, &g_ioat_driver
.attached_chans
, tailq
) {
508 * NOTE: This assumes that the PCI abstraction layer will use the same device handle
509 * across enumerations; we could compare by BDF instead if this is not true.
511 if (pci_dev
== ioat
->device
) {
516 if (enum_ctx
->probe_cb(enum_ctx
->cb_ctx
, pci_dev
)) {
518 * Since I/OAT init is relatively quick, just perform the full init during probing.
519 * If this turns out to be a bottleneck later, this can be changed to work like
520 * NVMe with a list of devices to initialize in parallel.
522 ioat
= ioat_attach(pci_dev
);
524 SPDK_ERRLOG("ioat_attach() failed\n");
528 TAILQ_INSERT_TAIL(&g_ioat_driver
.attached_chans
, ioat
, tailq
);
530 enum_ctx
->attach_cb(enum_ctx
->cb_ctx
, pci_dev
, ioat
);
537 spdk_ioat_probe(void *cb_ctx
, spdk_ioat_probe_cb probe_cb
, spdk_ioat_attach_cb attach_cb
)
540 struct ioat_enum_ctx enum_ctx
;
542 pthread_mutex_lock(&g_ioat_driver
.lock
);
544 enum_ctx
.probe_cb
= probe_cb
;
545 enum_ctx
.attach_cb
= attach_cb
;
546 enum_ctx
.cb_ctx
= cb_ctx
;
548 rc
= spdk_pci_ioat_enumerate(ioat_enum_cb
, &enum_ctx
);
550 pthread_mutex_unlock(&g_ioat_driver
.lock
);
556 spdk_ioat_detach(struct spdk_ioat_chan
*ioat
)
558 struct ioat_driver
*driver
= &g_ioat_driver
;
560 /* ioat should be in the free list (not registered to a thread)
561 * when calling ioat_detach().
563 pthread_mutex_lock(&driver
->lock
);
564 TAILQ_REMOVE(&driver
->attached_chans
, ioat
, tailq
);
565 pthread_mutex_unlock(&driver
->lock
);
567 ioat_channel_destruct(ioat
);
573 #define _2MB_PAGE(ptr) ((ptr) & ~(0x200000 - 1))
574 #define _2MB_OFFSET(ptr) ((ptr) & (0x200000 - 1))
577 spdk_ioat_submit_copy(struct spdk_ioat_chan
*ioat
, void *cb_arg
, spdk_ioat_req_cb cb_fn
,
578 void *dst
, const void *src
, uint64_t nbytes
)
580 struct ioat_descriptor
*last_desc
;
581 uint64_t remaining
, op_size
;
583 uint64_t vdst_page
, vsrc_page
;
584 uint64_t pdst_page
, psrc_page
;
591 orig_head
= ioat
->head
;
593 vdst
= (uint64_t)dst
;
594 vsrc
= (uint64_t)src
;
595 vdst_page
= vsrc_page
= 0;
596 pdst_page
= psrc_page
= SPDK_VTOPHYS_ERROR
;
600 if (_2MB_PAGE(vsrc
) != vsrc_page
) {
601 vsrc_page
= _2MB_PAGE(vsrc
);
602 psrc_page
= spdk_vtophys((void *)vsrc_page
);
605 if (_2MB_PAGE(vdst
) != vdst_page
) {
606 vdst_page
= _2MB_PAGE(vdst
);
607 pdst_page
= spdk_vtophys((void *)vdst_page
);
610 op_size
= spdk_min(op_size
, (0x200000 - _2MB_OFFSET(vsrc
)));
611 op_size
= spdk_min(op_size
, (0x200000 - _2MB_OFFSET(vdst
)));
612 op_size
= spdk_min(op_size
, ioat
->max_xfer_size
);
613 remaining
-= op_size
;
615 last_desc
= ioat_prep_copy(ioat
,
616 pdst_page
+ _2MB_OFFSET(vdst
),
617 psrc_page
+ _2MB_OFFSET(vsrc
),
620 if (remaining
== 0 || last_desc
== NULL
) {
628 /* Issue null descriptor for null transfer */
630 last_desc
= ioat_prep_null(ioat
);
634 last_desc
->callback_fn
= cb_fn
;
635 last_desc
->callback_arg
= cb_arg
;
638 * Ran out of descriptors in the ring - reset head to leave things as they were
639 * in case we managed to fill out any descriptors.
641 ioat
->head
= orig_head
;
650 spdk_ioat_submit_fill(struct spdk_ioat_chan
*ioat
, void *cb_arg
, spdk_ioat_req_cb cb_fn
,
651 void *dst
, uint64_t fill_pattern
, uint64_t nbytes
)
653 struct ioat_descriptor
*last_desc
= NULL
;
654 uint64_t remaining
, op_size
;
662 if (!(ioat
->dma_capabilities
& SPDK_IOAT_ENGINE_FILL_SUPPORTED
)) {
663 SPDK_ERRLOG("Channel does not support memory fill\n");
667 orig_head
= ioat
->head
;
669 vdst
= (uint64_t)dst
;
674 op_size
= spdk_min(op_size
, ioat
->max_xfer_size
);
675 remaining
-= op_size
;
677 last_desc
= ioat_prep_fill(ioat
,
678 spdk_vtophys((void *)vdst
),
682 if (remaining
== 0 || last_desc
== NULL
) {
690 last_desc
->callback_fn
= cb_fn
;
691 last_desc
->callback_arg
= cb_arg
;
694 * Ran out of descriptors in the ring - reset head to leave things as they were
695 * in case we managed to fill out any descriptors.
697 ioat
->head
= orig_head
;
706 spdk_ioat_get_dma_capabilities(struct spdk_ioat_chan
*ioat
)
711 return ioat
->dma_capabilities
;
715 spdk_ioat_process_events(struct spdk_ioat_chan
*ioat
)
717 return ioat_process_channel_events(ioat
);
720 SPDK_LOG_REGISTER_TRACE_FLAG("ioat", SPDK_TRACE_IOAT
)