]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | .. SPDX-License-Identifier: BSD-3-Clause |
2 | Copyright(c) 2010-2014 Intel Corporation. | |
7c673cae FG |
3 | |
4 | IPv4 Multicast Sample Application | |
5 | ================================= | |
6 | ||
7 | The IPv4 Multicast application is a simple example of packet processing | |
8 | using the Data Plane Development Kit (DPDK). | |
9 | The application performs L3 multicasting. | |
10 | ||
11 | Overview | |
12 | -------- | |
13 | ||
14 | The application demonstrates the use of zero-copy buffers for packet forwarding. | |
15 | The initialization and run-time paths are very similar to those of the :doc:`l2_forward_real_virtual`. | |
16 | This guide highlights the differences between the two applications. | |
17 | There are two key differences from the L2 Forwarding sample application: | |
18 | ||
19 | * The IPv4 Multicast sample application makes use of indirect buffers. | |
20 | ||
21 | * The forwarding decision is taken based on information read from the input packet's IPv4 header. | |
22 | ||
23 | The lookup method is the Four-byte Key (FBK) hash-based method. | |
24 | The lookup table is composed of pairs of destination IPv4 address (the FBK) | |
25 | and a port mask associated with that IPv4 address. | |
26 | ||
9f95a23c TL |
27 | .. note:: |
28 | ||
29 | The max port mask supported in the given hash table is 0xf, so only first | |
30 | four ports can be supported. | |
31 | If using non-consecutive ports, use the destination IPv4 address accordingly. | |
32 | ||
7c673cae FG |
33 | For convenience and simplicity, this sample application does not take IANA-assigned multicast addresses into account, |
34 | but instead equates the last four bytes of the multicast group (that is, the last four bytes of the destination IP address) | |
35 | with the mask of ports to multicast packets to. | |
36 | Also, the application does not consider the Ethernet addresses; | |
37 | it looks only at the IPv4 destination address for any given packet. | |
38 | ||
9f95a23c TL |
39 | Compiling the Application |
40 | ------------------------- | |
7c673cae | 41 | |
9f95a23c | 42 | To compile the sample application see :doc:`compiling`. |
7c673cae | 43 | |
9f95a23c | 44 | The application is located in the ``ipv4_multicast`` sub-directory. |
7c673cae FG |
45 | |
46 | Running the Application | |
47 | ----------------------- | |
48 | ||
49 | The application has a number of command line options: | |
50 | ||
51 | .. code-block:: console | |
52 | ||
53 | ./build/ipv4_multicast [EAL options] -- -p PORTMASK [-q NQ] | |
54 | ||
55 | where, | |
56 | ||
57 | * -p PORTMASK: Hexadecimal bitmask of ports to configure | |
58 | ||
59 | * -q NQ: determines the number of queues per lcore | |
60 | ||
61 | .. note:: | |
62 | ||
63 | Unlike the basic L2/L3 Forwarding sample applications, | |
64 | NUMA support is not provided in the IPv4 Multicast sample application. | |
65 | ||
66 | Typically, to run the IPv4 Multicast sample application, issue the following command (as root): | |
67 | ||
68 | .. code-block:: console | |
69 | ||
11fdf7f2 | 70 | ./build/ipv4_multicast -l 0-3 -n 3 -- -p 0x3 -q 1 |
7c673cae FG |
71 | |
72 | In this command: | |
73 | ||
9f95a23c | 74 | * The -l option enables cores 0, 1, 2 and 3 |
7c673cae FG |
75 | |
76 | * The -n option specifies 3 memory channels | |
77 | ||
78 | * The -p option enables ports 0 and 1 | |
79 | ||
80 | * The -q option assigns 1 queue to each lcore | |
81 | ||
82 | Refer to the *DPDK Getting Started Guide* for general information on running applications | |
83 | and the Environment Abstraction Layer (EAL) options. | |
84 | ||
85 | Explanation | |
86 | ----------- | |
87 | ||
88 | The following sections provide some explanation of the code. | |
89 | As mentioned in the overview section, | |
90 | the initialization and run-time paths are very similar to those of the :doc:`l2_forward_real_virtual`. | |
91 | The following sections describe aspects that are specific to the IPv4 Multicast sample application. | |
92 | ||
93 | Memory Pool Initialization | |
94 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
95 | ||
96 | The IPv4 Multicast sample application uses three memory pools. | |
97 | Two of the pools are for indirect buffers used for packet duplication purposes. | |
98 | Memory pools for indirect buffers are initialized differently from the memory pool for direct buffers: | |
99 | ||
100 | .. code-block:: c | |
101 | ||
11fdf7f2 TL |
102 | packet_pool = rte_pktmbuf_pool_create("packet_pool", NB_PKT_MBUF, 32, |
103 | 0, PKT_MBUF_DATA_SIZE, rte_socket_id()); | |
104 | header_pool = rte_pktmbuf_pool_create("header_pool", NB_HDR_MBUF, 32, | |
105 | 0, HDR_MBUF_DATA_SIZE, rte_socket_id()); | |
106 | clone_pool = rte_pktmbuf_pool_create("clone_pool", NB_CLONE_MBUF, 32, | |
107 | 0, 0, rte_socket_id()); | |
7c673cae FG |
108 | |
109 | The reason for this is because indirect buffers are not supposed to hold any packet data and | |
110 | therefore can be initialized with lower amount of reserved memory for each buffer. | |
111 | ||
112 | Hash Initialization | |
113 | ~~~~~~~~~~~~~~~~~~~ | |
114 | ||
115 | The hash object is created and loaded with the pre-configured entries read from a global array: | |
116 | ||
117 | .. code-block:: c | |
118 | ||
119 | static int | |
120 | ||
121 | init_mcast_hash(void) | |
122 | { | |
123 | uint32_t i; | |
124 | mcast_hash_params.socket_id = rte_socket_id(); | |
125 | ||
126 | mcast_hash = rte_fbk_hash_create(&mcast_hash_params); | |
127 | if (mcast_hash == NULL){ | |
128 | return -1; | |
129 | } | |
130 | ||
131 | for (i = 0; i < N_MCAST_GROUPS; i ++){ | |
132 | if (rte_fbk_hash_add_key(mcast_hash, mcast_group_table[i].ip, mcast_group_table[i].port_mask) < 0) { | |
133 | return -1; | |
134 | } | |
135 | } | |
136 | return 0; | |
137 | } | |
138 | ||
139 | Forwarding | |
140 | ~~~~~~~~~~ | |
141 | ||
142 | All forwarding is done inside the mcast_forward() function. | |
143 | Firstly, the Ethernet* header is removed from the packet and the IPv4 address is extracted from the IPv4 header: | |
144 | ||
145 | .. code-block:: c | |
146 | ||
147 | /* Remove the Ethernet header from the input packet */ | |
148 | ||
149 | iphdr = (struct ipv4_hdr *)rte_pktmbuf_adj(m, sizeof(struct ether_hdr)); | |
150 | RTE_ASSERT(iphdr != NULL); | |
151 | dest_addr = rte_be_to_cpu_32(iphdr->dst_addr); | |
152 | ||
153 | Then, the packet is checked to see if it has a multicast destination address and | |
154 | if the routing table has any ports assigned to the destination address: | |
155 | ||
156 | .. code-block:: c | |
157 | ||
158 | if (!IS_IPV4_MCAST(dest_addr) || | |
159 | (hash = rte_fbk_hash_lookup(mcast_hash, dest_addr)) <= 0 || | |
160 | (port_mask = hash & enabled_port_mask) == 0) { | |
161 | rte_pktmbuf_free(m); | |
162 | return; | |
163 | } | |
164 | ||
165 | Then, the number of ports in the destination portmask is calculated with the help of the bitcnt() function: | |
166 | ||
167 | .. code-block:: c | |
168 | ||
169 | /* Get number of bits set. */ | |
170 | ||
171 | static inline uint32_t bitcnt(uint32_t v) | |
172 | { | |
173 | uint32_t n; | |
174 | ||
175 | for (n = 0; v != 0; v &= v - 1, n++) | |
176 | ; | |
177 | return n; | |
178 | } | |
179 | ||
180 | This is done to determine which forwarding algorithm to use. | |
181 | This is explained in more detail in the next section. | |
182 | ||
183 | Thereafter, a destination Ethernet address is constructed: | |
184 | ||
185 | .. code-block:: c | |
186 | ||
187 | /* construct destination Ethernet address */ | |
188 | ||
189 | dst_eth_addr = ETHER_ADDR_FOR_IPV4_MCAST(dest_addr); | |
190 | ||
191 | Since Ethernet addresses are also part of the multicast process, each outgoing packet carries the same destination Ethernet address. | |
192 | The destination Ethernet address is constructed from the lower 23 bits of the multicast group OR-ed | |
193 | with the Ethernet address 01:00:5e:00:00:00, as per RFC 1112: | |
194 | ||
195 | .. code-block:: c | |
196 | ||
197 | #define ETHER_ADDR_FOR_IPV4_MCAST(x) \ | |
198 | (rte_cpu_to_be_64(0x01005e000000ULL | ((x) & 0x7fffff)) >> 16) | |
199 | ||
200 | Then, packets are dispatched to the destination ports according to the portmask associated with a multicast group: | |
201 | ||
202 | .. code-block:: c | |
203 | ||
204 | for (port = 0; use_clone != port_mask; port_mask >>= 1, port++) { | |
205 | /* Prepare output packet and send it out. */ | |
206 | ||
207 | if ((port_mask & 1) != 0) { | |
208 | if (likely ((mc = mcast_out_pkt(m, use_clone)) != NULL)) | |
209 | mcast_send_pkt(mc, &dst_eth_addr.as_addr, qconf, port); | |
210 | else if (use_clone == 0) | |
211 | rte_pktmbuf_free(m); | |
212 | } | |
213 | } | |
214 | ||
215 | The actual packet transmission is done in the mcast_send_pkt() function: | |
216 | ||
217 | .. code-block:: c | |
218 | ||
9f95a23c | 219 | static inline void mcast_send_pkt(struct rte_mbuf *pkt, struct ether_addr *dest_addr, struct lcore_queue_conf *qconf, uint16_t port) |
7c673cae FG |
220 | { |
221 | struct ether_hdr *ethdr; | |
222 | uint16_t len; | |
223 | ||
224 | /* Construct Ethernet header. */ | |
225 | ||
226 | ethdr = (struct ether_hdr *)rte_pktmbuf_prepend(pkt, (uint16_t) sizeof(*ethdr)); | |
227 | ||
228 | RTE_ASSERT(ethdr != NULL); | |
229 | ||
230 | ether_addr_copy(dest_addr, ðdr->d_addr); | |
231 | ether_addr_copy(&ports_eth_addr[port], ðdr->s_addr); | |
232 | ethdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4); | |
233 | ||
234 | /* Put new packet into the output queue */ | |
235 | ||
236 | len = qconf->tx_mbufs[port].len; | |
237 | qconf->tx_mbufs[port].m_table[len] = pkt; | |
238 | qconf->tx_mbufs[port].len = ++len; | |
239 | ||
240 | /* Transmit packets */ | |
241 | ||
242 | if (unlikely(MAX_PKT_BURST == len)) | |
243 | send_burst(qconf, port); | |
244 | } | |
245 | ||
246 | Buffer Cloning | |
247 | ~~~~~~~~~~~~~~ | |
248 | ||
249 | This is the most important part of the application since it demonstrates the use of zero- copy buffer cloning. | |
250 | There are two approaches for creating the outgoing packet and although both are based on the data zero-copy idea, | |
251 | there are some differences in the detail. | |
252 | ||
253 | The first approach creates a clone of the input packet, for example, | |
254 | walk though all segments of the input packet and for each of segment, | |
255 | create a new buffer and attach that new buffer to the segment | |
256 | (refer to rte_pktmbuf_clone() in the rte_mbuf library for more details). | |
257 | A new buffer is then allocated for the packet header and is prepended to the cloned buffer. | |
258 | ||
259 | The second approach does not make a clone, it just increments the reference counter for all input packet segment, | |
260 | allocates a new buffer for the packet header and prepends it to the input packet. | |
261 | ||
262 | Basically, the first approach reuses only the input packet's data, but creates its own copy of packet's metadata. | |
263 | The second approach reuses both input packet's data and metadata. | |
264 | ||
265 | The advantage of first approach is that each outgoing packet has its own copy of the metadata, | |
266 | so we can safely modify the data pointer of the input packet. | |
267 | That allows us to skip creation if the output packet is for the last destination port | |
268 | and instead modify input packet's header in place. | |
269 | For example, for N destination ports, we need to invoke mcast_out_pkt() (N-1) times. | |
270 | ||
271 | The advantage of the second approach is that there is less work to be done for each outgoing packet, | |
272 | that is, the "clone" operation is skipped completely. | |
273 | However, there is a price to pay. | |
274 | The input packet's metadata must remain intact, so for N destination ports, | |
275 | we need to invoke mcast_out_pkt() (N) times. | |
276 | ||
277 | Therefore, for a small number of outgoing ports (and segments in the input packet), | |
278 | first approach is faster. | |
279 | As the number of outgoing ports (and/or input segments) grows, the second approach becomes more preferable. | |
280 | ||
281 | Depending on the number of segments or the number of ports in the outgoing portmask, | |
282 | either the first (with cloning) or the second (without cloning) approach is taken: | |
283 | ||
284 | .. code-block:: c | |
285 | ||
286 | use_clone = (port_num <= MCAST_CLONE_PORTS && m->pkt.nb_segs <= MCAST_CLONE_SEGS); | |
287 | ||
288 | It is the mcast_out_pkt() function that performs the packet duplication (either with or without actually cloning the buffers): | |
289 | ||
290 | .. code-block:: c | |
291 | ||
292 | static inline struct rte_mbuf *mcast_out_pkt(struct rte_mbuf *pkt, int use_clone) | |
293 | { | |
294 | struct rte_mbuf *hdr; | |
295 | ||
296 | /* Create new mbuf for the header. */ | |
297 | ||
298 | if (unlikely ((hdr = rte_pktmbuf_alloc(header_pool)) == NULL)) | |
299 | return NULL; | |
300 | ||
301 | /* If requested, then make a new clone packet. */ | |
302 | ||
303 | if (use_clone != 0 && unlikely ((pkt = rte_pktmbuf_clone(pkt, clone_pool)) == NULL)) { | |
304 | rte_pktmbuf_free(hdr); | |
305 | return NULL; | |
306 | } | |
307 | ||
308 | /* prepend new header */ | |
309 | ||
310 | hdr->pkt.next = pkt; | |
311 | ||
312 | /* update header's fields */ | |
313 | ||
314 | hdr->pkt.pkt_len = (uint16_t)(hdr->pkt.data_len + pkt->pkt.pkt_len); | |
9f95a23c | 315 | hdr->pkt.nb_segs = pkt->pkt.nb_segs + 1; |
7c673cae FG |
316 | |
317 | /* copy metadata from source packet */ | |
318 | ||
319 | hdr->pkt.in_port = pkt->pkt.in_port; | |
320 | hdr->pkt.vlan_macip = pkt->pkt.vlan_macip; | |
321 | hdr->pkt.hash = pkt->pkt.hash; | |
7c673cae FG |
322 | rte_mbuf_sanity_check(hdr, RTE_MBUF_PKT, 1); |
323 | ||
324 | return hdr; | |
325 | } |