]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/dpdk/drivers/bus/vmbus/vmbus_channel.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / dpdk / drivers / bus / vmbus / vmbus_channel.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (c) 2018, Microsoft Corporation.
3 * All Rights Reserved.
4 */
5
6 #include <unistd.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <sys/uio.h>
10
11 #include <rte_eal.h>
12 #include <rte_tailq.h>
13 #include <rte_log.h>
14 #include <rte_malloc.h>
15 #include <rte_bus.h>
16 #include <rte_atomic.h>
17 #include <rte_memory.h>
18 #include <rte_bus_vmbus.h>
19
20 #include "private.h"
21
22 static inline void
23 vmbus_sync_set_bit(volatile uint32_t *addr, uint32_t mask)
24 {
25 /* Use GCC builtin which atomic does atomic OR operation */
26 __sync_or_and_fetch(addr, mask);
27 }
28
29 static inline void
30 vmbus_send_interrupt(const struct rte_vmbus_device *dev, uint32_t relid)
31 {
32 uint32_t *int_addr;
33 uint32_t int_mask;
34
35 int_addr = dev->int_page + relid / 32;
36 int_mask = 1u << (relid % 32);
37
38 vmbus_sync_set_bit(int_addr, int_mask);
39 }
40
41 static inline void
42 vmbus_set_monitor(const struct rte_vmbus_device *dev, uint32_t monitor_id)
43 {
44 uint32_t *monitor_addr, monitor_mask;
45 unsigned int trigger_index;
46
47 trigger_index = monitor_id / HV_MON_TRIG_LEN;
48 monitor_mask = 1u << (monitor_id % HV_MON_TRIG_LEN);
49
50 monitor_addr = &dev->monitor_page->trigs[trigger_index].pending;
51 vmbus_sync_set_bit(monitor_addr, monitor_mask);
52 }
53
54 static void
55 vmbus_set_event(const struct rte_vmbus_device *dev,
56 const struct vmbus_channel *chan)
57 {
58 vmbus_send_interrupt(dev, chan->relid);
59 vmbus_set_monitor(dev, chan->monitor_id);
60 }
61
62 /*
63 * Set the wait between when hypervisor examines the trigger.
64 */
65 void
66 rte_vmbus_set_latency(const struct rte_vmbus_device *dev,
67 const struct vmbus_channel *chan,
68 uint32_t latency)
69 {
70 uint32_t trig_idx = chan->monitor_id / VMBUS_MONTRIG_LEN;
71 uint32_t trig_offs = chan->monitor_id % VMBUS_MONTRIG_LEN;
72
73 if (latency >= UINT16_MAX * 100) {
74 VMBUS_LOG(ERR, "invalid latency value %u", latency);
75 return;
76 }
77
78 if (trig_idx >= VMBUS_MONTRIGS_MAX) {
79 VMBUS_LOG(ERR, "invalid monitor trigger %u",
80 trig_idx);
81 return;
82 }
83
84 /* Host value is expressed in 100 nanosecond units */
85 dev->monitor_page->lat[trig_idx][trig_offs] = latency / 100;
86 }
87
88 /*
89 * Notify host that there are data pending on our TX bufring.
90 *
91 * Since this in userspace, rely on the monitor page.
92 * Can't do a hypercall from userspace.
93 */
94 void
95 rte_vmbus_chan_signal_tx(const struct vmbus_channel *chan)
96 {
97 const struct rte_vmbus_device *dev = chan->device;
98 const struct vmbus_br *tbr = &chan->txbr;
99
100 /* Make sure all updates are done before signaling host */
101 rte_smp_wmb();
102
103 /* If host is ignoring interrupts? */
104 if (tbr->vbr->imask)
105 return;
106
107 vmbus_set_event(dev, chan);
108 }
109
110
111 /* Do a simple send directly using transmit ring. */
112 int rte_vmbus_chan_send(struct vmbus_channel *chan, uint16_t type,
113 void *data, uint32_t dlen,
114 uint64_t xactid, uint32_t flags, bool *need_sig)
115 {
116 struct vmbus_chanpkt pkt;
117 unsigned int pktlen, pad_pktlen;
118 const uint32_t hlen = sizeof(pkt);
119 bool send_evt = false;
120 uint64_t pad = 0;
121 struct iovec iov[3];
122 int error;
123
124 pktlen = hlen + dlen;
125 pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t));
126
127 pkt.hdr.type = type;
128 pkt.hdr.flags = flags;
129 pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
130 pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
131 pkt.hdr.xactid = xactid;
132
133 iov[0].iov_base = &pkt;
134 iov[0].iov_len = hlen;
135 iov[1].iov_base = data;
136 iov[1].iov_len = dlen;
137 iov[2].iov_base = &pad;
138 iov[2].iov_len = pad_pktlen - pktlen;
139
140 error = vmbus_txbr_write(&chan->txbr, iov, 3, &send_evt);
141
142 /*
143 * caller sets need_sig to non-NULL if it will handle
144 * signaling if required later.
145 * if need_sig is NULL, signal now if needed.
146 */
147 if (need_sig)
148 *need_sig |= send_evt;
149 else if (error == 0 && send_evt)
150 rte_vmbus_chan_signal_tx(chan);
151 return error;
152 }
153
154 /* Do a scatter/gather send where the descriptor points to data. */
155 int rte_vmbus_chan_send_sglist(struct vmbus_channel *chan,
156 struct vmbus_gpa sg[], uint32_t sglen,
157 void *data, uint32_t dlen,
158 uint64_t xactid, bool *need_sig)
159 {
160 struct vmbus_chanpkt_sglist pkt;
161 unsigned int pktlen, pad_pktlen, hlen;
162 bool send_evt = false;
163 struct iovec iov[4];
164 uint64_t pad = 0;
165 int error;
166
167 hlen = offsetof(struct vmbus_chanpkt_sglist, gpa[sglen]);
168 pktlen = hlen + dlen;
169 pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t));
170
171 pkt.hdr.type = VMBUS_CHANPKT_TYPE_GPA;
172 pkt.hdr.flags = VMBUS_CHANPKT_FLAG_RC;
173 pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
174 pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
175 pkt.hdr.xactid = xactid;
176 pkt.rsvd = 0;
177 pkt.gpa_cnt = sglen;
178
179 iov[0].iov_base = &pkt;
180 iov[0].iov_len = sizeof(pkt);
181 iov[1].iov_base = sg;
182 iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
183 iov[2].iov_base = data;
184 iov[2].iov_len = dlen;
185 iov[3].iov_base = &pad;
186 iov[3].iov_len = pad_pktlen - pktlen;
187
188 error = vmbus_txbr_write(&chan->txbr, iov, 4, &send_evt);
189
190 /* if caller is batching, just propagate the status */
191 if (need_sig)
192 *need_sig |= send_evt;
193 else if (error == 0 && send_evt)
194 rte_vmbus_chan_signal_tx(chan);
195 return error;
196 }
197
198 bool rte_vmbus_chan_rx_empty(const struct vmbus_channel *channel)
199 {
200 const struct vmbus_br *br = &channel->rxbr;
201
202 return br->vbr->rindex == br->vbr->windex;
203 }
204
205 /* Signal host after reading N bytes */
206 void rte_vmbus_chan_signal_read(struct vmbus_channel *chan, uint32_t bytes_read)
207 {
208 struct vmbus_br *rbr = &chan->rxbr;
209 uint32_t write_sz, pending_sz;
210
211 /* No need for signaling on older versions */
212 if (!rbr->vbr->feature_bits.feat_pending_send_sz)
213 return;
214
215 /* Make sure reading of pending happens after new read index */
216 rte_mb();
217
218 pending_sz = rbr->vbr->pending_send;
219 if (!pending_sz)
220 return;
221
222 rte_smp_rmb();
223 write_sz = vmbus_br_availwrite(rbr, rbr->vbr->windex);
224
225 /* If there was space before then host was not blocked */
226 if (write_sz - bytes_read > pending_sz)
227 return;
228
229 /* If pending write will not fit */
230 if (write_sz <= pending_sz)
231 return;
232
233 vmbus_set_event(chan->device, chan);
234 }
235
236 int rte_vmbus_chan_recv(struct vmbus_channel *chan, void *data, uint32_t *len,
237 uint64_t *request_id)
238 {
239 struct vmbus_chanpkt_hdr pkt;
240 uint32_t dlen, hlen, bufferlen = *len;
241 int error;
242
243 *len = 0;
244
245 error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt));
246 if (error)
247 return error;
248
249 if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) {
250 VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen);
251 /* XXX this channel is dead actually. */
252 return -EIO;
253 }
254
255 if (unlikely(pkt.hlen > pkt.tlen)) {
256 VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u",
257 pkt.hlen, pkt.tlen);
258 return -EIO;
259 }
260
261 /* Length are in quad words */
262 hlen = pkt.hlen << VMBUS_CHANPKT_SIZE_SHIFT;
263 dlen = (pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT) - hlen;
264 *len = dlen;
265
266 /* If caller buffer is not large enough */
267 if (unlikely(dlen > bufferlen))
268 return -ENOBUFS;
269
270 if (request_id)
271 *request_id = pkt.xactid;
272
273 /* Read data and skip packet header */
274 error = vmbus_rxbr_read(&chan->rxbr, data, dlen, hlen);
275 if (error)
276 return error;
277
278 rte_vmbus_chan_signal_read(chan, dlen + hlen + sizeof(uint64_t));
279 return 0;
280 }
281
282 /* TODO: replace this with inplace ring buffer (no copy) */
283 int rte_vmbus_chan_recv_raw(struct vmbus_channel *chan,
284 void *data, uint32_t *len)
285 {
286 struct vmbus_chanpkt_hdr pkt;
287 uint32_t dlen, bufferlen = *len;
288 int error;
289
290 error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt));
291 if (error)
292 return error;
293
294 if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) {
295 VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen);
296 /* XXX this channel is dead actually. */
297 return -EIO;
298 }
299
300 if (unlikely(pkt.hlen > pkt.tlen)) {
301 VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u",
302 pkt.hlen, pkt.tlen);
303 return -EIO;
304 }
305
306 /* Length are in quad words */
307 dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT;
308 *len = dlen;
309
310 /* If caller buffer is not large enough */
311 if (unlikely(dlen > bufferlen))
312 return -ENOBUFS;
313
314 /* Read data and skip packet header */
315 error = vmbus_rxbr_read(&chan->rxbr, data, dlen, 0);
316 if (error)
317 return error;
318
319 /* Return the number of bytes read */
320 return dlen + sizeof(uint64_t);
321 }
322
323 int vmbus_chan_create(const struct rte_vmbus_device *device,
324 uint16_t relid, uint16_t subid, uint8_t monitor_id,
325 struct vmbus_channel **new_chan)
326 {
327 struct vmbus_channel *chan;
328 int err;
329
330 chan = rte_zmalloc_socket("VMBUS", sizeof(*chan), RTE_CACHE_LINE_SIZE,
331 device->device.numa_node);
332 if (!chan)
333 return -ENOMEM;
334
335 STAILQ_INIT(&chan->subchannel_list);
336 chan->device = device;
337 chan->subchannel_id = subid;
338 chan->relid = relid;
339 chan->monitor_id = monitor_id;
340 *new_chan = chan;
341
342 err = vmbus_uio_map_rings(chan);
343 if (err) {
344 rte_free(chan);
345 return err;
346 }
347
348 return 0;
349 }
350
351 /* Setup the primary channel */
352 int rte_vmbus_chan_open(struct rte_vmbus_device *device,
353 struct vmbus_channel **new_chan)
354 {
355 struct mapped_vmbus_resource *uio_res;
356 int err;
357
358 uio_res = vmbus_uio_find_resource(device);
359 if (!uio_res) {
360 VMBUS_LOG(ERR, "can't find uio resource");
361 return -EINVAL;
362 }
363
364 err = vmbus_chan_create(device, device->relid, 0,
365 device->monitor_id, new_chan);
366 if (!err) {
367 device->primary = *new_chan;
368 uio_res->primary = *new_chan;
369 }
370
371 return err;
372 }
373
374 int rte_vmbus_max_channels(const struct rte_vmbus_device *device)
375 {
376 if (vmbus_uio_subchannels_supported(device, device->primary))
377 return VMBUS_MAX_CHANNELS;
378 else
379 return 1;
380 }
381
382 /* Setup secondary channel */
383 int rte_vmbus_subchan_open(struct vmbus_channel *primary,
384 struct vmbus_channel **new_chan)
385 {
386 struct vmbus_channel *chan;
387 int err;
388
389 err = vmbus_uio_get_subchan(primary, &chan);
390 if (err)
391 return err;
392
393 STAILQ_INSERT_TAIL(&primary->subchannel_list, chan, next);
394 *new_chan = chan;
395 return 0;
396 }
397
398 uint16_t rte_vmbus_sub_channel_index(const struct vmbus_channel *chan)
399 {
400 return chan->subchannel_id;
401 }
402
403 void rte_vmbus_chan_close(struct vmbus_channel *chan)
404 {
405 const struct rte_vmbus_device *device = chan->device;
406 struct vmbus_channel *primary = device->primary;
407
408 /*
409 * intentionally leak primary channel because
410 * secondary may still reference it
411 */
412 if (chan != primary) {
413 STAILQ_REMOVE(&primary->subchannel_list, chan,
414 vmbus_channel, next);
415 rte_free(chan);
416 }
417
418 }
419
420 static void vmbus_dump_ring(FILE *f, const char *id, const struct vmbus_br *br)
421 {
422 const struct vmbus_bufring *vbr = br->vbr;
423 struct vmbus_chanpkt_hdr pkt;
424
425 fprintf(f, "%s windex=%u rindex=%u mask=%u pending=%u feature=%#x\n",
426 id, vbr->windex, vbr->rindex, vbr->imask,
427 vbr->pending_send, vbr->feature_bits.value);
428 fprintf(f, " size=%u avail write=%u read=%u\n",
429 br->dsize, vmbus_br_availwrite(br, vbr->windex),
430 vmbus_br_availread(br));
431
432 if (vmbus_rxbr_peek(br, &pkt, sizeof(pkt)) == 0)
433 fprintf(f, " pkt type %#x len %u flags %#x xactid %#"PRIx64"\n",
434 pkt.type,
435 pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT,
436 pkt.flags, pkt.xactid);
437 }
438
439 void rte_vmbus_chan_dump(FILE *f, const struct vmbus_channel *chan)
440 {
441 fprintf(f, "channel[%u] relid=%u monitor=%u\n",
442 chan->subchannel_id, chan->relid, chan->monitor_id);
443 vmbus_dump_ring(f, "rxbr", &chan->rxbr);
444 vmbus_dump_ring(f, "txbr", &chan->txbr);
445 }