]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/dpdk/drivers/bus/vmbus/vmbus_channel.c
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / spdk / dpdk / drivers / bus / vmbus / vmbus_channel.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (c) 2018, Microsoft Corporation.
3 * All Rights Reserved.
4 */
5
6 #include <unistd.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <sys/uio.h>
10
11 #include <rte_eal.h>
12 #include <rte_tailq.h>
13 #include <rte_log.h>
14 #include <rte_malloc.h>
15 #include <rte_bus.h>
16 #include <rte_atomic.h>
17 #include <rte_memory.h>
18 #include <rte_bus_vmbus.h>
19
20 #include "private.h"
21
22 static inline void
23 vmbus_sync_set_bit(volatile uint32_t *addr, uint32_t mask)
24 {
25 /* Use GCC builtin which atomic does atomic OR operation */
26 __sync_or_and_fetch(addr, mask);
27 }
28
29 static inline void
30 vmbus_send_interrupt(const struct rte_vmbus_device *dev, uint32_t relid)
31 {
32 uint32_t *int_addr;
33 uint32_t int_mask;
34
35 int_addr = dev->int_page + relid / 32;
36 int_mask = 1u << (relid % 32);
37
38 vmbus_sync_set_bit(int_addr, int_mask);
39 }
40
41 static inline void
42 vmbus_set_monitor(const struct rte_vmbus_device *dev, uint32_t monitor_id)
43 {
44 uint32_t *monitor_addr, monitor_mask;
45 unsigned int trigger_index;
46
47 trigger_index = monitor_id / HV_MON_TRIG_LEN;
48 monitor_mask = 1u << (monitor_id % HV_MON_TRIG_LEN);
49
50 monitor_addr = &dev->monitor_page->trigs[trigger_index].pending;
51 vmbus_sync_set_bit(monitor_addr, monitor_mask);
52 }
53
54 static void
55 vmbus_set_event(const struct rte_vmbus_device *dev,
56 const struct vmbus_channel *chan)
57 {
58 vmbus_send_interrupt(dev, chan->relid);
59 vmbus_set_monitor(dev, chan->monitor_id);
60 }
61
62 /*
63 * Notify host that there are data pending on our TX bufring.
64 *
65 * Since this in userspace, rely on the monitor page.
66 * Can't do a hypercall from userspace.
67 */
68 void
69 rte_vmbus_chan_signal_tx(const struct vmbus_channel *chan)
70 {
71 const struct rte_vmbus_device *dev = chan->device;
72 const struct vmbus_br *tbr = &chan->txbr;
73
74 /* Make sure all updates are done before signaling host */
75 rte_smp_wmb();
76
77 /* If host is ignoring interrupts? */
78 if (tbr->vbr->imask)
79 return;
80
81 vmbus_set_event(dev, chan);
82 }
83
84
85 /* Do a simple send directly using transmit ring. */
86 int rte_vmbus_chan_send(struct vmbus_channel *chan, uint16_t type,
87 void *data, uint32_t dlen,
88 uint64_t xactid, uint32_t flags, bool *need_sig)
89 {
90 struct vmbus_chanpkt pkt;
91 unsigned int pktlen, pad_pktlen;
92 const uint32_t hlen = sizeof(pkt);
93 bool send_evt = false;
94 uint64_t pad = 0;
95 struct iovec iov[3];
96 int error;
97
98 pktlen = hlen + dlen;
99 pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t));
100
101 pkt.hdr.type = type;
102 pkt.hdr.flags = flags;
103 pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
104 pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
105 pkt.hdr.xactid = xactid;
106
107 iov[0].iov_base = &pkt;
108 iov[0].iov_len = hlen;
109 iov[1].iov_base = data;
110 iov[1].iov_len = dlen;
111 iov[2].iov_base = &pad;
112 iov[2].iov_len = pad_pktlen - pktlen;
113
114 error = vmbus_txbr_write(&chan->txbr, iov, 3, &send_evt);
115
116 /*
117 * caller sets need_sig to non-NULL if it will handle
118 * signaling if required later.
119 * if need_sig is NULL, signal now if needed.
120 */
121 if (need_sig)
122 *need_sig |= send_evt;
123 else if (error == 0 && send_evt)
124 rte_vmbus_chan_signal_tx(chan);
125 return error;
126 }
127
128 /* Do a scatter/gather send where the descriptor points to data. */
129 int rte_vmbus_chan_send_sglist(struct vmbus_channel *chan,
130 struct vmbus_gpa sg[], uint32_t sglen,
131 void *data, uint32_t dlen,
132 uint64_t xactid, bool *need_sig)
133 {
134 struct vmbus_chanpkt_sglist pkt;
135 unsigned int pktlen, pad_pktlen, hlen;
136 bool send_evt = false;
137 struct iovec iov[4];
138 uint64_t pad = 0;
139 int error;
140
141 hlen = offsetof(struct vmbus_chanpkt_sglist, gpa[sglen]);
142 pktlen = hlen + dlen;
143 pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t));
144
145 pkt.hdr.type = VMBUS_CHANPKT_TYPE_GPA;
146 pkt.hdr.flags = VMBUS_CHANPKT_FLAG_RC;
147 pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
148 pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
149 pkt.hdr.xactid = xactid;
150 pkt.rsvd = 0;
151 pkt.gpa_cnt = sglen;
152
153 iov[0].iov_base = &pkt;
154 iov[0].iov_len = sizeof(pkt);
155 iov[1].iov_base = sg;
156 iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
157 iov[2].iov_base = data;
158 iov[2].iov_len = dlen;
159 iov[3].iov_base = &pad;
160 iov[3].iov_len = pad_pktlen - pktlen;
161
162 error = vmbus_txbr_write(&chan->txbr, iov, 4, &send_evt);
163
164 /* if caller is batching, just propagate the status */
165 if (need_sig)
166 *need_sig |= send_evt;
167 else if (error == 0 && send_evt)
168 rte_vmbus_chan_signal_tx(chan);
169 return error;
170 }
171
172 bool rte_vmbus_chan_rx_empty(const struct vmbus_channel *channel)
173 {
174 const struct vmbus_br *br = &channel->rxbr;
175
176 return br->vbr->rindex == br->vbr->windex;
177 }
178
179 /* Signal host after reading N bytes */
180 void rte_vmbus_chan_signal_read(struct vmbus_channel *chan, uint32_t bytes_read)
181 {
182 struct vmbus_br *rbr = &chan->rxbr;
183 uint32_t write_sz, pending_sz;
184
185 /* No need for signaling on older versions */
186 if (!rbr->vbr->feature_bits.feat_pending_send_sz)
187 return;
188
189 /* Make sure reading of pending happens after new read index */
190 rte_mb();
191
192 pending_sz = rbr->vbr->pending_send;
193 if (!pending_sz)
194 return;
195
196 rte_smp_rmb();
197 write_sz = vmbus_br_availwrite(rbr, rbr->vbr->windex);
198
199 /* If there was space before then host was not blocked */
200 if (write_sz - bytes_read > pending_sz)
201 return;
202
203 /* If pending write will not fit */
204 if (write_sz <= pending_sz)
205 return;
206
207 vmbus_set_event(chan->device, chan);
208 }
209
210 int rte_vmbus_chan_recv(struct vmbus_channel *chan, void *data, uint32_t *len,
211 uint64_t *request_id)
212 {
213 struct vmbus_chanpkt_hdr pkt;
214 uint32_t dlen, hlen, bufferlen = *len;
215 int error;
216
217 *len = 0;
218
219 error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt));
220 if (error)
221 return error;
222
223 if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) {
224 VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen);
225 /* XXX this channel is dead actually. */
226 return -EIO;
227 }
228
229 if (unlikely(pkt.hlen > pkt.tlen)) {
230 VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u",
231 pkt.hlen, pkt.tlen);
232 return -EIO;
233 }
234
235 /* Length are in quad words */
236 hlen = pkt.hlen << VMBUS_CHANPKT_SIZE_SHIFT;
237 dlen = (pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT) - hlen;
238 *len = dlen;
239
240 /* If caller buffer is not large enough */
241 if (unlikely(dlen > bufferlen))
242 return -ENOBUFS;
243
244 if (request_id)
245 *request_id = pkt.xactid;
246
247 /* Read data and skip packet header */
248 error = vmbus_rxbr_read(&chan->rxbr, data, dlen, hlen);
249 if (error)
250 return error;
251
252 rte_vmbus_chan_signal_read(chan, dlen + hlen + sizeof(uint64_t));
253 return 0;
254 }
255
256 /* TODO: replace this with inplace ring buffer (no copy) */
257 int rte_vmbus_chan_recv_raw(struct vmbus_channel *chan,
258 void *data, uint32_t *len)
259 {
260 struct vmbus_chanpkt_hdr pkt;
261 uint32_t dlen, bufferlen = *len;
262 int error;
263
264 error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt));
265 if (error)
266 return error;
267
268 if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) {
269 VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen);
270 /* XXX this channel is dead actually. */
271 return -EIO;
272 }
273
274 if (unlikely(pkt.hlen > pkt.tlen)) {
275 VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u",
276 pkt.hlen, pkt.tlen);
277 return -EIO;
278 }
279
280 /* Length are in quad words */
281 dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT;
282 *len = dlen;
283
284 /* If caller buffer is not large enough */
285 if (unlikely(dlen > bufferlen))
286 return -ENOBUFS;
287
288 /* Read data and skip packet header */
289 error = vmbus_rxbr_read(&chan->rxbr, data, dlen, 0);
290 if (error)
291 return error;
292
293 /* Return the number of bytes read */
294 return dlen + sizeof(uint64_t);
295 }
296
297 int vmbus_chan_create(const struct rte_vmbus_device *device,
298 uint16_t relid, uint16_t subid, uint8_t monitor_id,
299 struct vmbus_channel **new_chan)
300 {
301 struct vmbus_channel *chan;
302 int err;
303
304 chan = rte_zmalloc_socket("VMBUS", sizeof(*chan), RTE_CACHE_LINE_SIZE,
305 device->device.numa_node);
306 if (!chan)
307 return -ENOMEM;
308
309 STAILQ_INIT(&chan->subchannel_list);
310 chan->device = device;
311 chan->subchannel_id = subid;
312 chan->relid = relid;
313 chan->monitor_id = monitor_id;
314 *new_chan = chan;
315
316 err = vmbus_uio_map_rings(chan);
317 if (err) {
318 rte_free(chan);
319 return err;
320 }
321
322 return 0;
323 }
324
325 /* Setup the primary channel */
326 int rte_vmbus_chan_open(struct rte_vmbus_device *device,
327 struct vmbus_channel **new_chan)
328 {
329 int err;
330
331 err = vmbus_chan_create(device, device->relid, 0,
332 device->monitor_id, new_chan);
333 if (!err)
334 device->primary = *new_chan;
335
336 return err;
337 }
338
339 int rte_vmbus_max_channels(const struct rte_vmbus_device *device)
340 {
341 if (vmbus_uio_subchannels_supported(device, device->primary))
342 return VMBUS_MAX_CHANNELS;
343 else
344 return 1;
345 }
346
347 /* Setup secondary channel */
348 int rte_vmbus_subchan_open(struct vmbus_channel *primary,
349 struct vmbus_channel **new_chan)
350 {
351 struct vmbus_channel *chan;
352 int err;
353
354 err = vmbus_uio_get_subchan(primary, &chan);
355 if (err)
356 return err;
357
358 STAILQ_INSERT_TAIL(&primary->subchannel_list, chan, next);
359 *new_chan = chan;
360 return 0;
361 }
362
363 uint16_t rte_vmbus_sub_channel_index(const struct vmbus_channel *chan)
364 {
365 return chan->subchannel_id;
366 }
367
368 void rte_vmbus_chan_close(struct vmbus_channel *chan)
369 {
370 const struct rte_vmbus_device *device = chan->device;
371 struct vmbus_channel *primary = device->primary;
372
373 if (chan != primary)
374 STAILQ_REMOVE(&primary->subchannel_list, chan,
375 vmbus_channel, next);
376
377 rte_free(chan);
378 }
379
380 static void vmbus_dump_ring(FILE *f, const char *id, const struct vmbus_br *br)
381 {
382 const struct vmbus_bufring *vbr = br->vbr;
383 struct vmbus_chanpkt_hdr pkt;
384
385 fprintf(f, "%s windex=%u rindex=%u mask=%u pending=%u feature=%#x\n",
386 id, vbr->windex, vbr->rindex, vbr->imask,
387 vbr->pending_send, vbr->feature_bits.value);
388 fprintf(f, " size=%u avail write=%u read=%u\n",
389 br->dsize, vmbus_br_availwrite(br, vbr->windex),
390 vmbus_br_availread(br));
391
392 if (vmbus_rxbr_peek(br, &pkt, sizeof(pkt)) == 0)
393 fprintf(f, " pkt type %#x len %u flags %#x xactid %#"PRIx64"\n",
394 pkt.type,
395 pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT,
396 pkt.flags, pkt.xactid);
397 }
398
399 void rte_vmbus_chan_dump(FILE *f, const struct vmbus_channel *chan)
400 {
401 fprintf(f, "channel[%u] relid=%u monitor=%u\n",
402 chan->subchannel_id, chan->relid, chan->monitor_id);
403 vmbus_dump_ring(f, "rxbr", &chan->rxbr);
404 vmbus_dump_ring(f, "txbr", &chan->txbr);
405 }