]>
git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/dpdk/drivers/bus/vmbus/vmbus_channel.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (c) 2018, Microsoft Corporation.
12 #include <rte_tailq.h>
14 #include <rte_malloc.h>
16 #include <rte_atomic.h>
17 #include <rte_memory.h>
18 #include <rte_bus_vmbus.h>
23 vmbus_sync_set_bit(volatile uint32_t *addr
, uint32_t mask
)
25 /* Use GCC builtin which atomic does atomic OR operation */
26 __sync_or_and_fetch(addr
, mask
);
30 vmbus_send_interrupt(const struct rte_vmbus_device
*dev
, uint32_t relid
)
35 int_addr
= dev
->int_page
+ relid
/ 32;
36 int_mask
= 1u << (relid
% 32);
38 vmbus_sync_set_bit(int_addr
, int_mask
);
42 vmbus_set_monitor(const struct rte_vmbus_device
*dev
, uint32_t monitor_id
)
44 uint32_t *monitor_addr
, monitor_mask
;
45 unsigned int trigger_index
;
47 trigger_index
= monitor_id
/ HV_MON_TRIG_LEN
;
48 monitor_mask
= 1u << (monitor_id
% HV_MON_TRIG_LEN
);
50 monitor_addr
= &dev
->monitor_page
->trigs
[trigger_index
].pending
;
51 vmbus_sync_set_bit(monitor_addr
, monitor_mask
);
55 vmbus_set_event(const struct rte_vmbus_device
*dev
,
56 const struct vmbus_channel
*chan
)
58 vmbus_send_interrupt(dev
, chan
->relid
);
59 vmbus_set_monitor(dev
, chan
->monitor_id
);
63 * Set the wait between when hypervisor examines the trigger.
66 rte_vmbus_set_latency(const struct rte_vmbus_device
*dev
,
67 const struct vmbus_channel
*chan
,
70 uint32_t trig_idx
= chan
->monitor_id
/ VMBUS_MONTRIG_LEN
;
71 uint32_t trig_offs
= chan
->monitor_id
% VMBUS_MONTRIG_LEN
;
73 if (latency
>= UINT16_MAX
* 100) {
74 VMBUS_LOG(ERR
, "invalid latency value %u", latency
);
78 if (trig_idx
>= VMBUS_MONTRIGS_MAX
) {
79 VMBUS_LOG(ERR
, "invalid monitor trigger %u",
84 /* Host value is expressed in 100 nanosecond units */
85 dev
->monitor_page
->lat
[trig_idx
][trig_offs
] = latency
/ 100;
89 * Notify host that there are data pending on our TX bufring.
91 * Since this in userspace, rely on the monitor page.
92 * Can't do a hypercall from userspace.
95 rte_vmbus_chan_signal_tx(const struct vmbus_channel
*chan
)
97 const struct rte_vmbus_device
*dev
= chan
->device
;
98 const struct vmbus_br
*tbr
= &chan
->txbr
;
100 /* Make sure all updates are done before signaling host */
103 /* If host is ignoring interrupts? */
107 vmbus_set_event(dev
, chan
);
111 /* Do a simple send directly using transmit ring. */
112 int rte_vmbus_chan_send(struct vmbus_channel
*chan
, uint16_t type
,
113 void *data
, uint32_t dlen
,
114 uint64_t xactid
, uint32_t flags
, bool *need_sig
)
116 struct vmbus_chanpkt pkt
;
117 unsigned int pktlen
, pad_pktlen
;
118 const uint32_t hlen
= sizeof(pkt
);
119 bool send_evt
= false;
124 pktlen
= hlen
+ dlen
;
125 pad_pktlen
= RTE_ALIGN(pktlen
, sizeof(uint64_t));
128 pkt
.hdr
.flags
= flags
;
129 pkt
.hdr
.hlen
= hlen
>> VMBUS_CHANPKT_SIZE_SHIFT
;
130 pkt
.hdr
.tlen
= pad_pktlen
>> VMBUS_CHANPKT_SIZE_SHIFT
;
131 pkt
.hdr
.xactid
= xactid
;
133 iov
[0].iov_base
= &pkt
;
134 iov
[0].iov_len
= hlen
;
135 iov
[1].iov_base
= data
;
136 iov
[1].iov_len
= dlen
;
137 iov
[2].iov_base
= &pad
;
138 iov
[2].iov_len
= pad_pktlen
- pktlen
;
140 error
= vmbus_txbr_write(&chan
->txbr
, iov
, 3, &send_evt
);
143 * caller sets need_sig to non-NULL if it will handle
144 * signaling if required later.
145 * if need_sig is NULL, signal now if needed.
148 *need_sig
|= send_evt
;
149 else if (error
== 0 && send_evt
)
150 rte_vmbus_chan_signal_tx(chan
);
154 /* Do a scatter/gather send where the descriptor points to data. */
155 int rte_vmbus_chan_send_sglist(struct vmbus_channel
*chan
,
156 struct vmbus_gpa sg
[], uint32_t sglen
,
157 void *data
, uint32_t dlen
,
158 uint64_t xactid
, bool *need_sig
)
160 struct vmbus_chanpkt_sglist pkt
;
161 unsigned int pktlen
, pad_pktlen
, hlen
;
162 bool send_evt
= false;
167 hlen
= offsetof(struct vmbus_chanpkt_sglist
, gpa
[sglen
]);
168 pktlen
= hlen
+ dlen
;
169 pad_pktlen
= RTE_ALIGN(pktlen
, sizeof(uint64_t));
171 pkt
.hdr
.type
= VMBUS_CHANPKT_TYPE_GPA
;
172 pkt
.hdr
.flags
= VMBUS_CHANPKT_FLAG_RC
;
173 pkt
.hdr
.hlen
= hlen
>> VMBUS_CHANPKT_SIZE_SHIFT
;
174 pkt
.hdr
.tlen
= pad_pktlen
>> VMBUS_CHANPKT_SIZE_SHIFT
;
175 pkt
.hdr
.xactid
= xactid
;
179 iov
[0].iov_base
= &pkt
;
180 iov
[0].iov_len
= sizeof(pkt
);
181 iov
[1].iov_base
= sg
;
182 iov
[1].iov_len
= sizeof(struct vmbus_gpa
) * sglen
;
183 iov
[2].iov_base
= data
;
184 iov
[2].iov_len
= dlen
;
185 iov
[3].iov_base
= &pad
;
186 iov
[3].iov_len
= pad_pktlen
- pktlen
;
188 error
= vmbus_txbr_write(&chan
->txbr
, iov
, 4, &send_evt
);
190 /* if caller is batching, just propagate the status */
192 *need_sig
|= send_evt
;
193 else if (error
== 0 && send_evt
)
194 rte_vmbus_chan_signal_tx(chan
);
198 bool rte_vmbus_chan_rx_empty(const struct vmbus_channel
*channel
)
200 const struct vmbus_br
*br
= &channel
->rxbr
;
203 return br
->vbr
->rindex
== br
->vbr
->windex
;
206 /* Signal host after reading N bytes */
207 void rte_vmbus_chan_signal_read(struct vmbus_channel
*chan
, uint32_t bytes_read
)
209 struct vmbus_br
*rbr
= &chan
->rxbr
;
210 uint32_t write_sz
, pending_sz
;
212 /* No need for signaling on older versions */
213 if (!rbr
->vbr
->feature_bits
.feat_pending_send_sz
)
216 /* Make sure reading of pending happens after new read index */
219 pending_sz
= rbr
->vbr
->pending_send
;
224 write_sz
= vmbus_br_availwrite(rbr
, rbr
->vbr
->windex
);
226 /* If there was space before then host was not blocked */
227 if (write_sz
- bytes_read
> pending_sz
)
230 /* If pending write will not fit */
231 if (write_sz
<= pending_sz
)
234 vmbus_set_event(chan
->device
, chan
);
237 int rte_vmbus_chan_recv(struct vmbus_channel
*chan
, void *data
, uint32_t *len
,
238 uint64_t *request_id
)
240 struct vmbus_chanpkt_hdr pkt
;
241 uint32_t dlen
, hlen
, bufferlen
= *len
;
246 error
= vmbus_rxbr_peek(&chan
->rxbr
, &pkt
, sizeof(pkt
));
250 if (unlikely(pkt
.hlen
< VMBUS_CHANPKT_HLEN_MIN
)) {
251 VMBUS_LOG(ERR
, "VMBUS recv, invalid hlen %u", pkt
.hlen
);
252 /* XXX this channel is dead actually. */
256 if (unlikely(pkt
.hlen
> pkt
.tlen
)) {
257 VMBUS_LOG(ERR
, "VMBUS recv,invalid hlen %u and tlen %u",
262 /* Length are in quad words */
263 hlen
= pkt
.hlen
<< VMBUS_CHANPKT_SIZE_SHIFT
;
264 dlen
= (pkt
.tlen
<< VMBUS_CHANPKT_SIZE_SHIFT
) - hlen
;
267 /* If caller buffer is not large enough */
268 if (unlikely(dlen
> bufferlen
))
272 *request_id
= pkt
.xactid
;
274 /* Read data and skip packet header */
275 error
= vmbus_rxbr_read(&chan
->rxbr
, data
, dlen
, hlen
);
279 rte_vmbus_chan_signal_read(chan
, dlen
+ hlen
+ sizeof(uint64_t));
283 /* TODO: replace this with inplace ring buffer (no copy) */
284 int rte_vmbus_chan_recv_raw(struct vmbus_channel
*chan
,
285 void *data
, uint32_t *len
)
287 struct vmbus_chanpkt_hdr pkt
;
288 uint32_t dlen
, bufferlen
= *len
;
291 error
= vmbus_rxbr_peek(&chan
->rxbr
, &pkt
, sizeof(pkt
));
295 if (unlikely(pkt
.hlen
< VMBUS_CHANPKT_HLEN_MIN
)) {
296 VMBUS_LOG(ERR
, "VMBUS recv, invalid hlen %u", pkt
.hlen
);
297 /* XXX this channel is dead actually. */
301 if (unlikely(pkt
.hlen
> pkt
.tlen
)) {
302 VMBUS_LOG(ERR
, "VMBUS recv,invalid hlen %u and tlen %u",
307 /* Length are in quad words */
308 dlen
= pkt
.tlen
<< VMBUS_CHANPKT_SIZE_SHIFT
;
311 /* If caller buffer is not large enough */
312 if (unlikely(dlen
> bufferlen
))
315 /* Read data and skip packet header */
316 error
= vmbus_rxbr_read(&chan
->rxbr
, data
, dlen
, 0);
320 /* Return the number of bytes read */
321 return dlen
+ sizeof(uint64_t);
324 int vmbus_chan_create(const struct rte_vmbus_device
*device
,
325 uint16_t relid
, uint16_t subid
, uint8_t monitor_id
,
326 struct vmbus_channel
**new_chan
)
328 struct vmbus_channel
*chan
;
331 chan
= rte_zmalloc_socket("VMBUS", sizeof(*chan
), RTE_CACHE_LINE_SIZE
,
332 device
->device
.numa_node
);
336 STAILQ_INIT(&chan
->subchannel_list
);
337 chan
->device
= device
;
338 chan
->subchannel_id
= subid
;
340 chan
->monitor_id
= monitor_id
;
343 err
= vmbus_uio_map_rings(chan
);
352 /* Setup the primary channel */
353 int rte_vmbus_chan_open(struct rte_vmbus_device
*device
,
354 struct vmbus_channel
**new_chan
)
356 struct mapped_vmbus_resource
*uio_res
;
359 uio_res
= vmbus_uio_find_resource(device
);
361 VMBUS_LOG(ERR
, "can't find uio resource");
365 err
= vmbus_chan_create(device
, device
->relid
, 0,
366 device
->monitor_id
, new_chan
);
368 device
->primary
= *new_chan
;
369 uio_res
->primary
= *new_chan
;
375 int rte_vmbus_max_channels(const struct rte_vmbus_device
*device
)
377 if (vmbus_uio_subchannels_supported(device
, device
->primary
))
378 return VMBUS_MAX_CHANNELS
;
383 /* Setup secondary channel */
384 int rte_vmbus_subchan_open(struct vmbus_channel
*primary
,
385 struct vmbus_channel
**new_chan
)
387 struct vmbus_channel
*chan
;
390 err
= vmbus_uio_get_subchan(primary
, &chan
);
394 STAILQ_INSERT_TAIL(&primary
->subchannel_list
, chan
, next
);
399 uint16_t rte_vmbus_sub_channel_index(const struct vmbus_channel
*chan
)
401 return chan
->subchannel_id
;
404 void rte_vmbus_chan_close(struct vmbus_channel
*chan
)
406 const struct rte_vmbus_device
*device
= chan
->device
;
407 struct vmbus_channel
*primary
= device
->primary
;
410 * intentionally leak primary channel because
411 * secondary may still reference it
413 if (chan
!= primary
) {
414 STAILQ_REMOVE(&primary
->subchannel_list
, chan
,
415 vmbus_channel
, next
);
421 static void vmbus_dump_ring(FILE *f
, const char *id
, const struct vmbus_br
*br
)
423 const struct vmbus_bufring
*vbr
= br
->vbr
;
424 struct vmbus_chanpkt_hdr pkt
;
426 fprintf(f
, "%s windex=%u rindex=%u mask=%u pending=%u feature=%#x\n",
427 id
, vbr
->windex
, vbr
->rindex
, vbr
->imask
,
428 vbr
->pending_send
, vbr
->feature_bits
.value
);
429 fprintf(f
, " size=%u avail write=%u read=%u\n",
430 br
->dsize
, vmbus_br_availwrite(br
, vbr
->windex
),
431 vmbus_br_availread(br
));
433 if (vmbus_rxbr_peek(br
, &pkt
, sizeof(pkt
)) == 0)
434 fprintf(f
, " pkt type %#x len %u flags %#x xactid %#"PRIx64
"\n",
436 pkt
.tlen
<< VMBUS_CHANPKT_SIZE_SHIFT
,
437 pkt
.flags
, pkt
.xactid
);
440 void rte_vmbus_chan_dump(FILE *f
, const struct vmbus_channel
*chan
)
442 fprintf(f
, "channel[%u] relid=%u monitor=%u\n",
443 chan
->subchannel_id
, chan
->relid
, chan
->monitor_id
);
444 vmbus_dump_ring(f
, "rxbr", &chan
->rxbr
);
445 vmbus_dump_ring(f
, "txbr", &chan
->txbr
);