]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved. | |
3 | * | |
4 | * Redistribution and use in source and binary forms, with or without | |
5 | * modification, are permitted provided that the following conditions are | |
6 | * met: | |
7 | * | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * | |
11 | * 2. Redistributions in binary form must reproduce the above copyright | |
12 | * notice, this list of conditions and the following disclaimer in the | |
13 | * documentation and/or other materials provided with the | |
14 | * distribution. | |
15 | * | |
16 | * 3. Neither the name of the authors nor the names of their contributors | |
17 | * may be used to endorse or promote products derived from this | |
18 | * software without specific prior written permission. | |
19 | * | |
20 | * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND | |
21 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS | |
24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | |
30 | * THE POSSIBILITY OF SUCH DAMAGE. | |
31 | */ | |
32 | ||
33 | /* | |
34 | * $FreeBSD: head/sys/net/netmap.h 231198 2012-02-08 11:43:29Z luigi $ | |
35 | * $Id: netmap.h 10879 2012-04-12 22:48:59Z luigi $ | |
36 | * | |
37 | * Definitions of constants and the structures used by the netmap | |
38 | * framework, for the part visible to both kernel and userspace. | |
39 | * Detailed info on netmap is available with "man netmap" or at | |
40 | * | |
41 | * http://info.iet.unipi.it/~luigi/netmap/ | |
42 | */ | |
43 | ||
44 | #ifndef _NET_NETMAP_H_ | |
45 | #define _NET_NETMAP_H_ | |
46 | ||
47 | /* | |
48 | * --- Netmap data structures --- | |
49 | * | |
50 | * The data structures used by netmap are shown below. Those in | |
51 | * capital letters are in an mmapp()ed area shared with userspace, | |
52 | * while others are private to the kernel. | |
53 | * Shared structures do not contain pointers but only memory | |
54 | * offsets, so that addressing is portable between kernel and userspace. | |
55 | ||
56 | ||
57 | softc | |
58 | +----------------+ | |
59 | | standard fields| | |
60 | | if_pspare[0] ----------+ | |
61 | +----------------+ | | |
62 | | | |
63 | +----------------+<------+ | |
64 | |(netmap_adapter)| | |
65 | | | netmap_kring | |
66 | | tx_rings *--------------------------------->+---------------+ | |
67 | | | netmap_kring | ring *---------. | |
68 | | rx_rings *--------->+---------------+ | nr_hwcur | | | |
69 | +----------------+ | ring *--------. | nr_hwavail | V | |
70 | | nr_hwcur | | | selinfo | | | |
71 | | nr_hwavail | | +---------------+ . | |
72 | | selinfo | | | ... | . | |
73 | +---------------+ | |(ntx+1 entries)| | |
74 | | .... | | | | | |
75 | |(nrx+1 entries)| | +---------------+ | |
76 | | | | | |
77 | KERNEL +---------------+ | | |
78 | | | |
79 | ==================================================================== | |
80 | | | |
81 | USERSPACE | NETMAP_RING | |
82 | +---->+-------------+ | |
83 | / | cur | | |
84 | NETMAP_IF (nifp, one per file desc.) / | avail | | |
85 | +---------------+ / | buf_ofs | | |
86 | | ni_tx_rings | / +=============+ | |
87 | | ni_rx_rings | / | buf_idx | slot[0] | |
88 | | | / | len, flags | | |
89 | | | / +-------------+ | |
90 | +===============+ / | buf_idx | slot[1] | |
91 | | txring_ofs[0] | (rel.to nifp)--' | len, flags | | |
92 | | txring_ofs[1] | +-------------+ | |
93 | (num_rings+1 entries) (nr_num_slots entries) | |
94 | | txring_ofs[n] | | buf_idx | slot[n-1] | |
95 | +---------------+ | len, flags | | |
96 | | rxring_ofs[0] | +-------------+ | |
97 | | rxring_ofs[1] | | |
98 | (num_rings+1 entries) | |
99 | | txring_ofs[n] | | |
100 | +---------------+ | |
101 | ||
102 | * The private descriptor ('softc' or 'adapter') of each interface | |
103 | * is extended with a "struct netmap_adapter" containing netmap-related | |
104 | * info (see description in dev/netmap/netmap_kernel.h. | |
105 | * Among other things, tx_rings and rx_rings point to the arrays of | |
106 | * "struct netmap_kring" which in turn reache the various | |
107 | * "struct netmap_ring", shared with userspace. | |
108 | ||
109 | * The NETMAP_RING is the userspace-visible replica of the NIC ring. | |
110 | * Each slot has the index of a buffer, its length and some flags. | |
111 | * In user space, the buffer address is computed as | |
112 | * (char *)ring + buf_ofs + index*NETMAP_BUF_SIZE | |
113 | * In the kernel, buffers do not necessarily need to be contiguous, | |
114 | * and the virtual and physical addresses are derived through | |
115 | * a lookup table. | |
116 | * To associate a different buffer to a slot, applications must | |
117 | * write the new index in buf_idx, and set NS_BUF_CHANGED flag to | |
118 | * make sure that the kernel updates the hardware ring as needed. | |
119 | * | |
120 | * Normally the driver is not requested to report the result of | |
121 | * transmissions (this can dramatically speed up operation). | |
122 | * However the user may request to report completion by setting | |
123 | * NS_REPORT. | |
124 | */ | |
125 | struct netmap_slot { | |
126 | uint32_t buf_idx; /* buffer index */ | |
127 | uint16_t len; /* packet length, to be copied to/from the hw ring */ | |
128 | uint16_t flags; /* buf changed, etc. */ | |
129 | #define NS_BUF_CHANGED 0x0001 /* must resync the map, buffer changed */ | |
130 | #define NS_REPORT 0x0002 /* ask the hardware to report results | |
131 | * e.g. by generating an interrupt | |
132 | */ | |
133 | }; | |
134 | ||
135 | /* | |
136 | * Netmap representation of a TX or RX ring (also known as "queue"). | |
137 | * This is a queue implemented as a fixed-size circular array. | |
138 | * At the software level, two fields are important: avail and cur. | |
139 | * | |
140 | * In TX rings: | |
141 | * avail indicates the number of slots available for transmission. | |
142 | * It is updated by the kernel after every netmap system call. | |
143 | * It MUST BE decremented by the application when it appends a | |
144 | * packet. | |
145 | * cur indicates the slot to use for the next packet | |
146 | * to send (i.e. the "tail" of the queue). | |
147 | * It MUST BE incremented by the application before | |
148 | * netmap system calls to reflect the number of newly | |
149 | * sent packets. | |
150 | * It is checked by the kernel on netmap system calls | |
151 | * (normally unmodified by the kernel unless invalid). | |
152 | * | |
153 | * The kernel side of netmap uses two additional fields in its own | |
154 | * private ring structure, netmap_kring: | |
155 | * nr_hwcur is a copy of nr_cur on an NIOCTXSYNC. | |
156 | * nr_hwavail is the number of slots known as available by the | |
157 | * hardware. It is updated on an INTR (inc by the | |
158 | * number of packets sent) and on a NIOCTXSYNC | |
159 | * (decrease by nr_cur - nr_hwcur) | |
160 | * A special case, nr_hwavail is -1 if the transmit | |
161 | * side is idle (no pending transmits). | |
162 | * | |
163 | * In RX rings: | |
164 | * avail is the number of packets available (possibly 0). | |
165 | * It MUST BE decremented by the application when it consumes | |
166 | * a packet, and it is updated to nr_hwavail on a NIOCRXSYNC | |
167 | * cur indicates the first slot that contains a packet not | |
168 | * processed yet (the "head" of the queue). | |
169 | * It MUST BE incremented by the software when it consumes | |
170 | * a packet. | |
171 | * reserved indicates the number of buffers before 'cur' | |
172 | * that the application has still in use. Normally 0, | |
173 | * it MUST BE incremented by the application when it | |
174 | * does not return the buffer immediately, and decremented | |
175 | * when the buffer is finally freed. | |
176 | * | |
177 | * The kernel side of netmap uses two additional fields in the kring: | |
178 | * nr_hwcur is a copy of nr_cur on an NIOCRXSYNC | |
179 | * nr_hwavail is the number of packets available. It is updated | |
180 | * on INTR (inc by the number of new packets arrived) | |
181 | * and on NIOCRXSYNC (decreased by nr_cur - nr_hwcur). | |
182 | * | |
183 | * DATA OWNERSHIP/LOCKING: | |
184 | * The netmap_ring is owned by the user program and it is only | |
185 | * accessed or modified in the upper half of the kernel during | |
186 | * a system call. | |
187 | * | |
188 | * The netmap_kring is only modified by the upper half of the kernel. | |
189 | */ | |
190 | struct netmap_ring { | |
191 | /* | |
192 | * nr_buf_base_ofs is meant to be used through macros. | |
193 | * It contains the offset of the buffer region from this | |
194 | * descriptor. | |
195 | */ | |
196 | ssize_t buf_ofs; | |
197 | uint32_t num_slots; /* number of slots in the ring. */ | |
198 | uint32_t avail; /* number of usable slots */ | |
199 | uint32_t cur; /* 'current' r/w position */ | |
200 | uint32_t reserved; /* not refilled before current */ | |
201 | ||
202 | uint16_t nr_buf_size; | |
203 | uint16_t flags; | |
204 | #define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */ | |
205 | ||
206 | struct timeval ts; /* time of last *sync() */ | |
207 | ||
208 | /* the slots follow. This struct has variable size */ | |
209 | struct netmap_slot slot[0]; /* array of slots. */ | |
210 | }; | |
211 | ||
212 | ||
213 | /* | |
214 | * Netmap representation of an interface and its queue(s). | |
215 | * There is one netmap_if for each file descriptor on which we want | |
216 | * to select/poll. We assume that on each interface has the same number | |
217 | * of receive and transmit queues. | |
218 | * select/poll operates on one or all pairs depending on the value of | |
219 | * nmr_queueid passed on the ioctl. | |
220 | */ | |
221 | struct netmap_if { | |
222 | char ni_name[IFNAMSIZ]; /* name of the interface. */ | |
223 | u_int ni_version; /* API version, currently unused */ | |
224 | u_int ni_rx_rings; /* number of rx rings */ | |
225 | u_int ni_tx_rings; /* if zero, same as ni_rx_rings */ | |
226 | /* | |
227 | * The following array contains the offset of each netmap ring | |
228 | * from this structure. The first ni_tx_queues+1 entries refer | |
229 | * to the tx rings, the next ni_rx_queues+1 refer to the rx rings | |
230 | * (the last entry in each block refers to the host stack rings). | |
231 | * The area is filled up by the kernel on NIOCREG, | |
232 | * and then only read by userspace code. | |
233 | */ | |
234 | ssize_t ring_ofs[0]; | |
235 | }; | |
236 | ||
237 | #ifndef NIOCREGIF | |
238 | /* | |
239 | * ioctl names and related fields | |
240 | * | |
241 | * NIOCGINFO takes a struct ifreq, the interface name is the input, | |
242 | * the outputs are number of queues and number of descriptor | |
243 | * for each queue (useful to set number of threads etc.). | |
244 | * | |
245 | * NIOCREGIF takes an interface name within a struct ifreq, | |
246 | * and activates netmap mode on the interface (if possible). | |
247 | * | |
248 | * NIOCUNREGIF unregisters the interface associated to the fd. | |
249 | * | |
250 | * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues, | |
251 | * whose identity is set in NIOCREGIF through nr_ringid | |
252 | */ | |
253 | ||
254 | /* | |
255 | * struct nmreq overlays a struct ifreq | |
256 | */ | |
257 | struct nmreq { | |
258 | char nr_name[IFNAMSIZ]; | |
259 | uint32_t nr_version; /* API version */ | |
260 | #define NETMAP_API 3 /* current version */ | |
261 | uint32_t nr_offset; /* nifp offset in the shared region */ | |
262 | uint32_t nr_memsize; /* size of the shared region */ | |
263 | uint32_t nr_tx_slots; /* slots in tx rings */ | |
264 | uint32_t nr_rx_slots; /* slots in rx rings */ | |
265 | uint16_t nr_tx_rings; /* number of tx rings */ | |
266 | uint16_t nr_rx_rings; /* number of rx rings */ | |
267 | uint16_t nr_ringid; /* ring(s) we care about */ | |
268 | #define NETMAP_HW_RING 0x4000 /* low bits indicate one hw ring */ | |
269 | #define NETMAP_SW_RING 0x2000 /* process the sw ring */ | |
270 | #define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */ | |
271 | #define NETMAP_RING_MASK 0xfff /* the ring number */ | |
272 | uint16_t spare1; | |
273 | uint32_t spare2[4]; | |
274 | }; | |
275 | ||
276 | /* | |
277 | * FreeBSD uses the size value embedded in the _IOWR to determine | |
278 | * how much to copy in/out. So we need it to match the actual | |
279 | * data structure we pass. We put some spares in the structure | |
280 | * to ease compatibility with other versions | |
281 | */ | |
282 | #define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */ | |
283 | #define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */ | |
284 | #define NIOCUNREGIF _IO('i', 147) /* interface unregister */ | |
285 | #define NIOCTXSYNC _IO('i', 148) /* sync tx queues */ | |
286 | #define NIOCRXSYNC _IO('i', 149) /* sync rx queues */ | |
287 | #endif /* !NIOCREGIF */ | |
288 | ||
289 | #endif /* _NET_NETMAP_H_ */ |