]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | .. BSD LICENSE |
2 | Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
3 | All rights reserved. | |
4 | ||
5 | Redistribution and use in source and binary forms, with or without | |
6 | modification, are permitted provided that the following conditions | |
7 | are met: | |
8 | ||
9 | * Redistributions of source code must retain the above copyright | |
10 | notice, this list of conditions and the following disclaimer. | |
11 | * Redistributions in binary form must reproduce the above copyright | |
12 | notice, this list of conditions and the following disclaimer in | |
13 | the documentation and/or other materials provided with the | |
14 | distribution. | |
15 | * Neither the name of Intel Corporation nor the names of its | |
16 | contributors may be used to endorse or promote products derived | |
17 | from this software without specific prior written permission. | |
18 | ||
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
30 | ||
31 | .. _multi_process_app: | |
32 | ||
33 | Multi-process Sample Application | |
34 | ================================ | |
35 | ||
36 | This chapter describes the example applications for multi-processing that are included in the DPDK. | |
37 | ||
38 | Example Applications | |
39 | -------------------- | |
40 | ||
41 | Building the Sample Applications | |
42 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
43 | ||
44 | The multi-process example applications are built in the same way as other sample applications, | |
45 | and as documented in the *DPDK Getting Started Guide*. | |
46 | To build all the example applications: | |
47 | ||
48 | #. Set RTE_SDK and go to the example directory: | |
49 | ||
50 | .. code-block:: console | |
51 | ||
52 | export RTE_SDK=/path/to/rte_sdk | |
53 | cd ${RTE_SDK}/examples/multi_process | |
54 | ||
55 | #. Set the target (a default target will be used if not specified). For example: | |
56 | ||
57 | .. code-block:: console | |
58 | ||
59 | export RTE_TARGET=x86_64-native-linuxapp-gcc | |
60 | ||
61 | See the *DPDK Getting Started Guide* for possible RTE_TARGET values. | |
62 | ||
63 | #. Build the applications: | |
64 | ||
65 | .. code-block:: console | |
66 | ||
67 | make | |
68 | ||
69 | .. note:: | |
70 | ||
71 | If just a specific multi-process application needs to be built, | |
72 | the final make command can be run just in that application's directory, | |
73 | rather than at the top-level multi-process directory. | |
74 | ||
75 | Basic Multi-process Example | |
76 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
77 | ||
78 | The examples/simple_mp folder in the DPDK release contains a basic example application to demonstrate how | |
79 | two DPDK processes can work together using queues and memory pools to share information. | |
80 | ||
81 | Running the Application | |
82 | ^^^^^^^^^^^^^^^^^^^^^^^ | |
83 | ||
84 | To run the application, start one copy of the simple_mp binary in one terminal, | |
85 | passing at least two cores in the coremask, as follows: | |
86 | ||
87 | .. code-block:: console | |
88 | ||
89 | ./build/simple_mp -c 3 -n 4 --proc-type=primary | |
90 | ||
91 | For the first DPDK process run, the proc-type flag can be omitted or set to auto, | |
92 | since all DPDK processes will default to being a primary instance, | |
93 | meaning they have control over the hugepage shared memory regions. | |
94 | The process should start successfully and display a command prompt as follows: | |
95 | ||
96 | .. code-block:: console | |
97 | ||
98 | $ ./build/simple_mp -c 3 -n 4 --proc-type=primary | |
99 | EAL: coremask set to 3 | |
100 | EAL: Detected lcore 0 on socket 0 | |
101 | EAL: Detected lcore 1 on socket 0 | |
102 | EAL: Detected lcore 2 on socket 0 | |
103 | EAL: Detected lcore 3 on socket 0 | |
104 | ... | |
105 | ||
106 | EAL: Requesting 2 pages of size 1073741824 | |
107 | EAL: Requesting 768 pages of size 2097152 | |
108 | EAL: Ask a virtual area of 0x40000000 bytes | |
109 | EAL: Virtual area found at 0x7ff200000000 (size = 0x40000000) | |
110 | ... | |
111 | ||
112 | EAL: check igb_uio module | |
113 | EAL: check module finished | |
114 | EAL: Master core 0 is ready (tid=54e41820) | |
115 | EAL: Core 1 is ready (tid=53b32700) | |
116 | ||
117 | Starting core 1 | |
118 | ||
119 | simple_mp > | |
120 | ||
121 | To run the secondary process to communicate with the primary process, | |
122 | again run the same binary setting at least two cores in the coremask: | |
123 | ||
124 | .. code-block:: console | |
125 | ||
126 | ./build/simple_mp -c C -n 4 --proc-type=secondary | |
127 | ||
128 | When running a secondary process such as that shown above, the proc-type parameter can again be specified as auto. | |
129 | However, omitting the parameter altogether will cause the process to try and start as a primary rather than secondary process. | |
130 | ||
131 | Once the process type is specified correctly, | |
132 | the process starts up, displaying largely similar status messages to the primary instance as it initializes. | |
133 | Once again, you will be presented with a command prompt. | |
134 | ||
135 | Once both processes are running, messages can be sent between them using the send command. | |
136 | At any stage, either process can be terminated using the quit command. | |
137 | ||
138 | .. code-block:: console | |
139 | ||
140 | EAL: Master core 10 is ready (tid=b5f89820) EAL: Master core 8 is ready (tid=864a3820) | |
141 | EAL: Core 11 is ready (tid=84ffe700) EAL: Core 9 is ready (tid=85995700) | |
142 | Starting core 11 Starting core 9 | |
143 | simple_mp > send hello_secondary simple_mp > core 9: Received 'hello_secondary' | |
144 | simple_mp > core 11: Received 'hello_primary' simple_mp > send hello_primary | |
145 | simple_mp > quit simple_mp > quit | |
146 | ||
147 | .. note:: | |
148 | ||
149 | If the primary instance is terminated, the secondary instance must also be shut-down and restarted after the primary. | |
150 | This is necessary because the primary instance will clear and reset the shared memory regions on startup, | |
151 | invalidating the secondary process's pointers. | |
152 | The secondary process can be stopped and restarted without affecting the primary process. | |
153 | ||
154 | How the Application Works | |
155 | ^^^^^^^^^^^^^^^^^^^^^^^^^ | |
156 | ||
157 | The core of this example application is based on using two queues and a single memory pool in shared memory. | |
158 | These three objects are created at startup by the primary process, | |
159 | since the secondary process cannot create objects in memory as it cannot reserve memory zones, | |
160 | and the secondary process then uses lookup functions to attach to these objects as it starts up. | |
161 | ||
162 | .. code-block:: c | |
163 | ||
164 | if (rte_eal_process_type() == RTE_PROC_PRIMARY){ | |
165 | send_ring = rte_ring_create(_PRI_2_SEC, ring_size, SOCKET0, flags); | |
166 | recv_ring = rte_ring_create(_SEC_2_PRI, ring_size, SOCKET0, flags); | |
167 | message_pool = rte_mempool_create(_MSG_POOL, pool_size, string_size, pool_cache, priv_data_sz, NULL, NULL, NULL, NULL, SOCKET0, flags); | |
168 | } else { | |
169 | recv_ring = rte_ring_lookup(_PRI_2_SEC); | |
170 | send_ring = rte_ring_lookup(_SEC_2_PRI); | |
171 | message_pool = rte_mempool_lookup(_MSG_POOL); | |
172 | } | |
173 | ||
174 | Note, however, that the named ring structure used as send_ring in the primary process is the recv_ring in the secondary process. | |
175 | ||
176 | Once the rings and memory pools are all available in both the primary and secondary processes, | |
177 | the application simply dedicates two threads to sending and receiving messages respectively. | |
178 | The receive thread simply dequeues any messages on the receive ring, prints them, | |
179 | and frees the buffer space used by the messages back to the memory pool. | |
180 | The send thread makes use of the command-prompt library to interactively request user input for messages to send. | |
181 | Once a send command is issued by the user, a buffer is allocated from the memory pool, filled in with the message contents, | |
182 | then enqueued on the appropriate rte_ring. | |
183 | ||
184 | Symmetric Multi-process Example | |
185 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
186 | ||
187 | The second example of DPDK multi-process support demonstrates how a set of processes can run in parallel, | |
188 | with each process performing the same set of packet- processing operations. | |
189 | (Since each process is identical in functionality to the others, | |
190 | we refer to this as symmetric multi-processing, to differentiate it from asymmetric multi- processing - | |
191 | such as a client-server mode of operation seen in the next example, | |
192 | where different processes perform different tasks, yet co-operate to form a packet-processing system.) | |
193 | The following diagram shows the data-flow through the application, using two processes. | |
194 | ||
195 | .. _figure_sym_multi_proc_app: | |
196 | ||
197 | .. figure:: img/sym_multi_proc_app.* | |
198 | ||
199 | Example Data Flow in a Symmetric Multi-process Application | |
200 | ||
201 | ||
202 | As the diagram shows, each process reads packets from each of the network ports in use. | |
203 | RSS is used to distribute incoming packets on each port to different hardware RX queues. | |
204 | Each process reads a different RX queue on each port and so does not contend with any other process for that queue access. | |
205 | Similarly, each process writes outgoing packets to a different TX queue on each port. | |
206 | ||
207 | Running the Application | |
208 | ^^^^^^^^^^^^^^^^^^^^^^^ | |
209 | ||
210 | As with the simple_mp example, the first instance of the symmetric_mp process must be run as the primary instance, | |
211 | though with a number of other application- specific parameters also provided after the EAL arguments. | |
212 | These additional parameters are: | |
213 | ||
214 | * -p <portmask>, where portmask is a hexadecimal bitmask of what ports on the system are to be used. | |
215 | For example: -p 3 to use ports 0 and 1 only. | |
216 | ||
217 | * --num-procs <N>, where N is the total number of symmetric_mp instances that will be run side-by-side to perform packet processing. | |
218 | This parameter is used to configure the appropriate number of receive queues on each network port. | |
219 | ||
220 | * --proc-id <n>, where n is a numeric value in the range 0 <= n < N (number of processes, specified above). | |
221 | This identifies which symmetric_mp instance is being run, so that each process can read a unique receive queue on each network port. | |
222 | ||
223 | The secondary symmetric_mp instances must also have these parameters specified, | |
224 | and the first two must be the same as those passed to the primary instance, or errors result. | |
225 | ||
226 | For example, to run a set of four symmetric_mp instances, running on lcores 1-4, | |
227 | all performing level-2 forwarding of packets between ports 0 and 1, | |
228 | the following commands can be used (assuming run as root): | |
229 | ||
230 | .. code-block:: console | |
231 | ||
232 | # ./build/symmetric_mp -c 2 -n 4 --proc-type=auto -- -p 3 --num-procs=4 --proc-id=0 | |
233 | # ./build/symmetric_mp -c 4 -n 4 --proc-type=auto -- -p 3 --num-procs=4 --proc-id=1 | |
234 | # ./build/symmetric_mp -c 8 -n 4 --proc-type=auto -- -p 3 --num-procs=4 --proc-id=2 | |
235 | # ./build/symmetric_mp -c 10 -n 4 --proc-type=auto -- -p 3 --num-procs=4 --proc-id=3 | |
236 | ||
237 | .. note:: | |
238 | ||
239 | In the above example, the process type can be explicitly specified as primary or secondary, rather than auto. | |
240 | When using auto, the first process run creates all the memory structures needed for all processes - | |
241 | irrespective of whether it has a proc-id of 0, 1, 2 or 3. | |
242 | ||
243 | .. note:: | |
244 | ||
245 | For the symmetric multi-process example, since all processes work in the same manner, | |
246 | once the hugepage shared memory and the network ports are initialized, | |
247 | it is not necessary to restart all processes if the primary instance dies. | |
248 | Instead, that process can be restarted as a secondary, | |
249 | by explicitly setting the proc-type to secondary on the command line. | |
250 | (All subsequent instances launched will also need this explicitly specified, | |
251 | as auto-detection will detect no primary processes running and therefore attempt to re-initialize shared memory.) | |
252 | ||
253 | How the Application Works | |
254 | ^^^^^^^^^^^^^^^^^^^^^^^^^ | |
255 | ||
256 | The initialization calls in both the primary and secondary instances are the same for the most part, | |
257 | calling the rte_eal_init(), 1 G and 10 G driver initialization and then rte_eal_pci_probe() functions. | |
258 | Thereafter, the initialization done depends on whether the process is configured as a primary or secondary instance. | |
259 | ||
260 | In the primary instance, a memory pool is created for the packet mbufs and the network ports to be used are initialized - | |
261 | the number of RX and TX queues per port being determined by the num-procs parameter passed on the command-line. | |
262 | The structures for the initialized network ports are stored in shared memory and | |
263 | therefore will be accessible by the secondary process as it initializes. | |
264 | ||
265 | .. code-block:: c | |
266 | ||
267 | if (num_ports & 1) | |
268 | rte_exit(EXIT_FAILURE, "Application must use an even number of ports\n"); | |
269 | ||
270 | for(i = 0; i < num_ports; i++){ | |
271 | if(proc_type == RTE_PROC_PRIMARY) | |
272 | if (smp_port_init(ports[i], mp, (uint16_t)num_procs) < 0) | |
273 | rte_exit(EXIT_FAILURE, "Error initializing ports\n"); | |
274 | } | |
275 | ||
276 | In the secondary instance, rather than initializing the network ports, the port information exported by the primary process is used, | |
277 | giving the secondary process access to the hardware and software rings for each network port. | |
278 | Similarly, the memory pool of mbufs is accessed by doing a lookup for it by name: | |
279 | ||
280 | .. code-block:: c | |
281 | ||
282 | mp = (proc_type == RTE_PROC_SECONDARY) ? rte_mempool_lookup(_SMP_MBUF_POOL) : rte_mempool_create(_SMP_MBUF_POOL, NB_MBUFS, MBUF_SIZE, ... ) | |
283 | ||
284 | Once this initialization is complete, the main loop of each process, both primary and secondary, | |
285 | is exactly the same - each process reads from each port using the queue corresponding to its proc-id parameter, | |
286 | and writes to the corresponding transmit queue on the output port. | |
287 | ||
288 | Client-Server Multi-process Example | |
289 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
290 | ||
291 | The third example multi-process application included with the DPDK shows how one can | |
292 | use a client-server type multi-process design to do packet processing. | |
293 | In this example, a single server process performs the packet reception from the ports being used and | |
294 | distributes these packets using round-robin ordering among a set of client processes, | |
295 | which perform the actual packet processing. | |
296 | In this case, the client applications just perform level-2 forwarding of packets by sending each packet out on a different network port. | |
297 | ||
298 | The following diagram shows the data-flow through the application, using two client processes. | |
299 | ||
300 | .. _figure_client_svr_sym_multi_proc_app: | |
301 | ||
302 | .. figure:: img/client_svr_sym_multi_proc_app.* | |
303 | ||
304 | Example Data Flow in a Client-Server Symmetric Multi-process Application | |
305 | ||
306 | ||
307 | Running the Application | |
308 | ^^^^^^^^^^^^^^^^^^^^^^^ | |
309 | ||
310 | The server process must be run initially as the primary process to set up all memory structures for use by the clients. | |
311 | In addition to the EAL parameters, the application- specific parameters are: | |
312 | ||
313 | * -p <portmask >, where portmask is a hexadecimal bitmask of what ports on the system are to be used. | |
314 | For example: -p 3 to use ports 0 and 1 only. | |
315 | ||
316 | * -n <num-clients>, where the num-clients parameter is the number of client processes that will process the packets received | |
317 | by the server application. | |
318 | ||
319 | .. note:: | |
320 | ||
321 | In the server process, a single thread, the master thread, that is, the lowest numbered lcore in the coremask, performs all packet I/O. | |
322 | If a coremask is specified with more than a single lcore bit set in it, | |
323 | an additional lcore will be used for a thread to periodically print packet count statistics. | |
324 | ||
325 | Since the server application stores configuration data in shared memory, including the network ports to be used, | |
326 | the only application parameter needed by a client process is its client instance ID. | |
327 | Therefore, to run a server application on lcore 1 (with lcore 2 printing statistics) along with two client processes running on lcores 3 and 4, | |
328 | the following commands could be used: | |
329 | ||
330 | .. code-block:: console | |
331 | ||
332 | # ./mp_server/build/mp_server -c 6 -n 4 -- -p 3 -n 2 | |
333 | # ./mp_client/build/mp_client -c 8 -n 4 --proc-type=auto -- -n 0 | |
334 | # ./mp_client/build/mp_client -c 10 -n 4 --proc-type=auto -- -n 1 | |
335 | ||
336 | .. note:: | |
337 | ||
338 | If the server application dies and needs to be restarted, all client applications also need to be restarted, | |
339 | as there is no support in the server application for it to run as a secondary process. | |
340 | Any client processes that need restarting can be restarted without affecting the server process. | |
341 | ||
342 | How the Application Works | |
343 | ^^^^^^^^^^^^^^^^^^^^^^^^^ | |
344 | ||
345 | The server process performs the network port and data structure initialization much as the symmetric multi-process application does when run as primary. | |
346 | One additional enhancement in this sample application is that the server process stores its port configuration data in a memory zone in hugepage shared memory. | |
347 | This eliminates the need for the client processes to have the portmask parameter passed into them on the command line, | |
348 | as is done for the symmetric multi-process application, and therefore eliminates mismatched parameters as a potential source of errors. | |
349 | ||
350 | In the same way that the server process is designed to be run as a primary process instance only, | |
351 | the client processes are designed to be run as secondary instances only. | |
352 | They have no code to attempt to create shared memory objects. | |
353 | Instead, handles to all needed rings and memory pools are obtained via calls to rte_ring_lookup() and rte_mempool_lookup(). | |
354 | The network ports for use by the processes are obtained by loading the network port drivers and probing the PCI bus, | |
355 | which will, as in the symmetric multi-process example, | |
356 | automatically get access to the network ports using the settings already configured by the primary/server process. | |
357 | ||
358 | Once all applications are initialized, the server operates by reading packets from each network port in turn and | |
359 | distributing those packets to the client queues (software rings, one for each client process) in round-robin order. | |
360 | On the client side, the packets are read from the rings in as big of bursts as possible, then routed out to a different network port. | |
361 | The routing used is very simple. All packets received on the first NIC port are transmitted back out on the second port and vice versa. | |
362 | Similarly, packets are routed between the 3rd and 4th network ports and so on. | |
363 | The sending of packets is done by writing the packets directly to the network ports; they are not transferred back via the server process. | |
364 | ||
365 | In both the server and the client processes, outgoing packets are buffered before being sent, | |
366 | so as to allow the sending of multiple packets in a single burst to improve efficiency. | |
367 | For example, the client process will buffer packets to send, | |
368 | until either the buffer is full or until we receive no further packets from the server. | |
369 | ||
370 | Master-slave Multi-process Example | |
371 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
372 | ||
373 | The fourth example of DPDK multi-process support demonstrates a master-slave model that | |
374 | provide the capability of application recovery if a slave process crashes or meets unexpected conditions. | |
375 | In addition, it also demonstrates the floating process, | |
376 | which can run among different cores in contrast to the traditional way of binding a process/thread to a specific CPU core, | |
377 | using the local cache mechanism of mempool structures. | |
378 | ||
379 | This application performs the same functionality as the L2 Forwarding sample application, | |
380 | therefore this chapter does not cover that part but describes functionality that is introduced in this multi-process example only. | |
381 | Please refer to :doc:`l2_forward_real_virtual` for more information. | |
382 | ||
383 | Unlike previous examples where all processes are started from the command line with input arguments, in this example, | |
384 | only one process is spawned from the command line and that process creates other processes. | |
385 | The following section describes this in more detail. | |
386 | ||
387 | Master-slave Process Models | |
388 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
389 | ||
390 | The process spawned from the command line is called the *master process* in this document. | |
391 | A process created by the master is called a *slave process*. | |
392 | The application has only one master process, but could have multiple slave processes. | |
393 | ||
394 | Once the master process begins to run, it tries to initialize all the resources such as | |
395 | memory, CPU cores, driver, ports, and so on, as the other examples do. | |
396 | Thereafter, it creates slave processes, as shown in the following figure. | |
397 | ||
398 | .. _figure_master_slave_proc: | |
399 | ||
400 | .. figure:: img/master_slave_proc.* | |
401 | ||
402 | Master-slave Process Workflow | |
403 | ||
404 | ||
405 | The master process calls the rte_eal_mp_remote_launch() EAL function to launch an application function for each pinned thread through the pipe. | |
406 | Then, it waits to check if any slave processes have exited. | |
407 | If so, the process tries to re-initialize the resources that belong to that slave and launch them in the pinned thread entry again. | |
408 | The following section describes the recovery procedures in more detail. | |
409 | ||
410 | For each pinned thread in EAL, after reading any data from the pipe, it tries to call the function that the application specified. | |
411 | In this master specified function, a fork() call creates a slave process that performs the L2 forwarding task. | |
412 | Then, the function waits until the slave exits, is killed or crashes. Thereafter, it notifies the master of this event and returns. | |
413 | Finally, the EAL pinned thread waits until the new function is launched. | |
414 | ||
415 | After discussing the master-slave model, it is necessary to mention another issue, global and static variables. | |
416 | ||
417 | For multiple-thread cases, all global and static variables have only one copy and they can be accessed by any thread if applicable. | |
418 | So, they can be used to sync or share data among threads. | |
419 | ||
420 | In the previous examples, each process has separate global and static variables in memory and are independent of each other. | |
421 | If it is necessary to share the knowledge, some communication mechanism should be deployed, such as, memzone, ring, shared memory, and so on. | |
422 | The global or static variables are not a valid approach to share data among processes. | |
423 | For variables in this example, on the one hand, the slave process inherits all the knowledge of these variables after being created by the master. | |
424 | On the other hand, other processes cannot know if one or more processes modifies them after slave creation since that | |
425 | is the nature of a multiple process address space. | |
426 | But this does not mean that these variables cannot be used to share or sync data; it depends on the use case. | |
427 | The following are the possible use cases: | |
428 | ||
429 | #. The master process starts and initializes a variable and it will never be changed after slave processes created. This case is OK. | |
430 | ||
431 | #. After the slave processes are created, the master or slave cores need to change a variable, but other processes do not need to know the change. | |
432 | This case is also OK. | |
433 | ||
434 | #. After the slave processes are created, the master or a slave needs to change a variable. | |
435 | In the meantime, one or more other process needs to be aware of the change. | |
436 | In this case, global and static variables cannot be used to share knowledge. Another communication mechanism is needed. | |
437 | A simple approach without lock protection can be a heap buffer allocated by rte_malloc or mem zone. | |
438 | ||
439 | Slave Process Recovery Mechanism | |
440 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
441 | ||
442 | Before talking about the recovery mechanism, it is necessary to know what is needed before a new slave instance can run if a previous one exited. | |
443 | ||
444 | When a slave process exits, the system returns all the resources allocated for this process automatically. | |
445 | However, this does not include the resources that were allocated by the DPDK. All the hardware resources are shared among the processes, | |
446 | which include memzone, mempool, ring, a heap buffer allocated by the rte_malloc library, and so on. | |
447 | If the new instance runs and the allocated resource is not returned, either resource allocation failed or the hardware resource is lost forever. | |
448 | ||
449 | When a slave process runs, it may have dependencies on other processes. | |
450 | They could have execution sequence orders; they could share the ring to communicate; they could share the same port for reception and forwarding; | |
451 | they could use lock structures to do exclusive access in some critical path. | |
452 | What happens to the dependent process(es) if the peer leaves? | |
453 | The consequence are varied since the dependency cases are complex. | |
454 | It depends on what the processed had shared. | |
455 | However, it is necessary to notify the peer(s) if one slave exited. | |
456 | Then, the peer(s) will be aware of that and wait until the new instance begins to run. | |
457 | ||
458 | Therefore, to provide the capability to resume the new slave instance if the previous one exited, it is necessary to provide several mechanisms: | |
459 | ||
460 | #. Keep a resource list for each slave process. | |
461 | Before a slave process run, the master should prepare a resource list. | |
462 | After it exits, the master could either delete the allocated resources and create new ones, | |
463 | or re-initialize those for use by the new instance. | |
464 | ||
465 | #. Set up a notification mechanism for slave process exit cases. After the specific slave leaves, | |
466 | the master should be notified and then help to create a new instance. | |
467 | This mechanism is provided in Section `Master-slave Process Models`_. | |
468 | ||
469 | #. Use a synchronization mechanism among dependent processes. | |
470 | The master should have the capability to stop or kill slave processes that have a dependency on the one that has exited. | |
471 | Then, after the new instance of exited slave process begins to run, the dependency ones could resume or run from the start. | |
472 | The example sends a STOP command to slave processes dependent on the exited one, then they will exit. | |
473 | Thereafter, the master creates new instances for the exited slave processes. | |
474 | ||
475 | The following diagram describes slave process recovery. | |
476 | ||
477 | .. _figure_slave_proc_recov: | |
478 | ||
479 | .. figure:: img/slave_proc_recov.* | |
480 | ||
481 | Slave Process Recovery Process Flow | |
482 | ||
483 | ||
484 | Floating Process Support | |
485 | ^^^^^^^^^^^^^^^^^^^^^^^^ | |
486 | ||
487 | When the DPDK application runs, there is always a -c option passed in to indicate the cores that are enabled. | |
488 | Then, the DPDK creates a thread for each enabled core. | |
489 | By doing so, it creates a 1:1 mapping between the enabled core and each thread. | |
490 | The enabled core always has an ID, therefore, each thread has a unique core ID in the DPDK execution environment. | |
491 | With the ID, each thread can easily access the structures or resources exclusively belonging to it without using function parameter passing. | |
492 | It can easily use the rte_lcore_id() function to get the value in every function that is called. | |
493 | ||
494 | For threads/processes not created in that way, either pinned to a core or not, they will not own a unique ID and the | |
495 | rte_lcore_id() function will not work in the correct way. | |
496 | However, sometimes these threads/processes still need the unique ID mechanism to do easy access on structures or resources. | |
497 | For example, the DPDK mempool library provides a local cache mechanism | |
498 | (refer to :ref:`mempool_local_cache`) | |
499 | for fast element allocation and freeing. | |
500 | If using a non-unique ID or a fake one, | |
501 | a race condition occurs if two or more threads/ processes with the same core ID try to use the local cache. | |
502 | ||
503 | Therefore, unused core IDs from the passing of parameters with the -c option are used to organize the core ID allocation array. | |
504 | Once the floating process is spawned, it tries to allocate a unique core ID from the array and release it on exit. | |
505 | ||
506 | A natural way to spawn a floating process is to use the fork() function and allocate a unique core ID from the unused core ID array. | |
507 | However, it is necessary to write new code to provide a notification mechanism for slave exit | |
508 | and make sure the process recovery mechanism can work with it. | |
509 | ||
510 | To avoid producing redundant code, the Master-Slave process model is still used to spawn floating processes, | |
511 | then cancel the affinity to specific cores. | |
512 | Besides that, clear the core ID assigned to the DPDK spawning a thread that has a 1:1 mapping with the core mask. | |
513 | Thereafter, get a new core ID from the unused core ID allocation array. | |
514 | ||
515 | Run the Application | |
516 | ^^^^^^^^^^^^^^^^^^^ | |
517 | ||
518 | This example has a command line similar to the L2 Forwarding sample application with a few differences. | |
519 | ||
520 | To run the application, start one copy of the l2fwd_fork binary in one terminal. | |
521 | Unlike the L2 Forwarding example, | |
522 | this example requires at least three cores since the master process will wait and be accountable for slave process recovery. | |
523 | The command is as follows: | |
524 | ||
525 | .. code-block:: console | |
526 | ||
527 | #./build/l2fwd_fork -c 1c -n 4 -- -p 3 -f | |
528 | ||
529 | This example provides another -f option to specify the use of floating process. | |
530 | If not specified, the example will use a pinned process to perform the L2 forwarding task. | |
531 | ||
532 | To verify the recovery mechanism, proceed as follows: First, check the PID of the slave processes: | |
533 | ||
534 | .. code-block:: console | |
535 | ||
536 | #ps -fe | grep l2fwd_fork | |
537 | root 5136 4843 29 11:11 pts/1 00:00:05 ./build/l2fwd_fork | |
538 | root 5145 5136 98 11:11 pts/1 00:00:11 ./build/l2fwd_fork | |
539 | root 5146 5136 98 11:11 pts/1 00:00:11 ./build/l2fwd_fork | |
540 | ||
541 | Then, kill one of the slaves: | |
542 | ||
543 | .. code-block:: console | |
544 | ||
545 | #kill -9 5145 | |
546 | ||
547 | After 1 or 2 seconds, check whether the slave has resumed: | |
548 | ||
549 | .. code-block:: console | |
550 | ||
551 | #ps -fe | grep l2fwd_fork | |
552 | root 5136 4843 3 11:11 pts/1 00:00:06 ./build/l2fwd_fork | |
553 | root 5247 5136 99 11:14 pts/1 00:00:01 ./build/l2fwd_fork | |
554 | root 5248 5136 99 11:14 pts/1 00:00:01 ./build/l2fwd_fork | |
555 | ||
556 | It can also monitor the traffic generator statics to see whether slave processes have resumed. | |
557 | ||
558 | Explanation | |
559 | ^^^^^^^^^^^ | |
560 | ||
561 | As described in previous sections, | |
562 | not all global and static variables need to change to be accessible in multiple processes; | |
563 | it depends on how they are used. | |
564 | In this example, | |
565 | the statics info on packets dropped/forwarded/received count needs to be updated by the slave process, | |
566 | and the master needs to see the update and print them out. | |
567 | So, it needs to allocate a heap buffer using rte_zmalloc. | |
568 | In addition, if the -f option is specified, | |
569 | an array is needed to store the allocated core ID for the floating process so that the master can return it | |
570 | after a slave has exited accidentally. | |
571 | ||
572 | .. code-block:: c | |
573 | ||
574 | static int | |
575 | l2fwd_malloc_shared_struct(void) | |
576 | { | |
577 | port_statistics = rte_zmalloc("port_stat", sizeof(struct l2fwd_port_statistics) * RTE_MAX_ETHPORTS, 0); | |
578 | ||
579 | if (port_statistics == NULL) | |
580 | return -1; | |
581 | ||
582 | /* allocate mapping_id array */ | |
583 | ||
584 | if (float_proc) { | |
585 | int i; | |
586 | ||
587 | mapping_id = rte_malloc("mapping_id", sizeof(unsigned) * RTE_MAX_LCORE, 0); | |
588 | if (mapping_id == NULL) | |
589 | return -1; | |
590 | ||
591 | for (i = 0 ;i < RTE_MAX_LCORE; i++) | |
592 | mapping_id[i] = INVALID_MAPPING_ID; | |
593 | ||
594 | } | |
595 | return 0; | |
596 | } | |
597 | ||
598 | For each slave process, packets are received from one port and forwarded to another port that another slave is operating on. | |
599 | If the other slave exits accidentally, the port it is operating on may not work normally, | |
600 | so the first slave cannot forward packets to that port. | |
601 | There is a dependency on the port in this case. So, the master should recognize the dependency. | |
602 | The following is the code to detect this dependency: | |
603 | ||
604 | .. code-block:: c | |
605 | ||
606 | for (portid = 0; portid < nb_ports; portid++) { | |
607 | /* skip ports that are not enabled */ | |
608 | ||
609 | if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) | |
610 | continue; | |
611 | ||
612 | /* Find pair ports' lcores */ | |
613 | ||
614 | find_lcore = find_pair_lcore = 0; | |
615 | pair_port = l2fwd_dst_ports[portid]; | |
616 | ||
617 | for (i = 0; i < RTE_MAX_LCORE; i++) { | |
618 | if (!rte_lcore_is_enabled(i)) | |
619 | continue; | |
620 | ||
621 | for (j = 0; j < lcore_queue_conf[i].n_rx_port;j++) { | |
622 | if (lcore_queue_conf[i].rx_port_list[j] == portid) { | |
623 | lcore = i; | |
624 | find_lcore = 1; | |
625 | break; | |
626 | } | |
627 | ||
628 | if (lcore_queue_conf[i].rx_port_list[j] == pair_port) { | |
629 | pair_lcore = i; | |
630 | find_pair_lcore = 1; | |
631 | break; | |
632 | } | |
633 | } | |
634 | ||
635 | if (find_lcore && find_pair_lcore) | |
636 | break; | |
637 | } | |
638 | ||
639 | if (!find_lcore || !find_pair_lcore) | |
640 | rte_exit(EXIT_FAILURE, "Not find port=%d pair\\n", portid); | |
641 | ||
642 | printf("lcore %u and %u paired\\n", lcore, pair_lcore); | |
643 | ||
644 | lcore_resource[lcore].pair_id = pair_lcore; | |
645 | lcore_resource[pair_lcore].pair_id = lcore; | |
646 | } | |
647 | ||
648 | Before launching the slave process, | |
649 | it is necessary to set up the communication channel between the master and slave so that | |
650 | the master can notify the slave if its peer process with the dependency exited. | |
651 | In addition, the master needs to register a callback function in the case where a specific slave exited. | |
652 | ||
653 | .. code-block:: c | |
654 | ||
655 | for (i = 0; i < RTE_MAX_LCORE; i++) { | |
656 | if (lcore_resource[i].enabled) { | |
657 | /* Create ring for master and slave communication */ | |
658 | ||
659 | ret = create_ms_ring(i); | |
660 | if (ret != 0) | |
661 | rte_exit(EXIT_FAILURE, "Create ring for lcore=%u failed",i); | |
662 | ||
663 | if (flib_register_slave_exit_notify(i,slave_exit_cb) != 0) | |
664 | rte_exit(EXIT_FAILURE, "Register master_trace_slave_exit failed"); | |
665 | } | |
666 | } | |
667 | ||
668 | After launching the slave process, the master waits and prints out the port statics periodically. | |
669 | If an event indicating that a slave process exited is detected, | |
670 | it sends the STOP command to the peer and waits until it has also exited. | |
671 | Then, it tries to clean up the execution environment and prepare new resources. | |
672 | Finally, the new slave instance is launched. | |
673 | ||
674 | .. code-block:: c | |
675 | ||
676 | while (1) { | |
677 | sleep(1); | |
678 | cur_tsc = rte_rdtsc(); | |
679 | diff_tsc = cur_tsc - prev_tsc; | |
680 | ||
681 | /* if timer is enabled */ | |
682 | ||
683 | if (timer_period > 0) { | |
684 | /* advance the timer */ | |
685 | timer_tsc += diff_tsc; | |
686 | ||
687 | /* if timer has reached its timeout */ | |
688 | if (unlikely(timer_tsc >= (uint64_t) timer_period)) { | |
689 | print_stats(); | |
690 | ||
691 | /* reset the timer */ | |
692 | timer_tsc = 0; | |
693 | } | |
694 | } | |
695 | ||
696 | prev_tsc = cur_tsc; | |
697 | ||
698 | /* Check any slave need restart or recreate */ | |
699 | ||
700 | rte_spinlock_lock(&res_lock); | |
701 | ||
702 | for (i = 0; i < RTE_MAX_LCORE; i++) { | |
703 | struct lcore_resource_struct *res = &lcore_resource[i]; | |
704 | struct lcore_resource_struct *pair = &lcore_resource[res->pair_id]; | |
705 | ||
706 | /* If find slave exited, try to reset pair */ | |
707 | ||
708 | if (res->enabled && res->flags && pair->enabled) { | |
709 | if (!pair->flags) { | |
710 | master_sendcmd_with_ack(pair->lcore_id, CMD_STOP); | |
711 | rte_spinlock_unlock(&res_lock); | |
712 | sleep(1); | |
713 | rte_spinlock_lock(&res_lock); | |
714 | if (pair->flags) | |
715 | continue; | |
716 | } | |
717 | ||
718 | if (reset_pair(res->lcore_id, pair->lcore_id) != 0) | |
719 | rte_exit(EXIT_FAILURE, "failed to reset slave"); | |
720 | ||
721 | res->flags = 0; | |
722 | pair->flags = 0; | |
723 | } | |
724 | } | |
725 | rte_spinlock_unlock(&res_lock); | |
726 | } | |
727 | ||
728 | When the slave process is spawned and starts to run, it checks whether the floating process option is applied. | |
729 | If so, it clears the affinity to a specific core and also sets the unique core ID to 0. | |
730 | Then, it tries to allocate a new core ID. | |
731 | Since the core ID has changed, the resource allocated by the master cannot work, | |
732 | so it remaps the resource to the new core ID slot. | |
733 | ||
734 | .. code-block:: c | |
735 | ||
736 | static int | |
737 | l2fwd_launch_one_lcore( attribute ((unused)) void *dummy) | |
738 | { | |
739 | unsigned lcore_id = rte_lcore_id(); | |
740 | ||
741 | if (float_proc) { | |
742 | unsigned flcore_id; | |
743 | ||
744 | /* Change it to floating process, also change it's lcore_id */ | |
745 | ||
746 | clear_cpu_affinity(); | |
747 | ||
748 | RTE_PER_LCORE(_lcore_id) = 0; | |
749 | ||
750 | /* Get a lcore_id */ | |
751 | ||
752 | if (flib_assign_lcore_id() < 0 ) { | |
753 | printf("flib_assign_lcore_id failed\n"); | |
754 | return -1; | |
755 | } | |
756 | ||
757 | flcore_id = rte_lcore_id(); | |
758 | ||
759 | /* Set mapping id, so master can return it after slave exited */ | |
760 | ||
761 | mapping_id[lcore_id] = flcore_id; | |
762 | printf("Org lcore_id = %u, cur lcore_id = %u\n",lcore_id, flcore_id); | |
763 | remapping_slave_resource(lcore_id, flcore_id); | |
764 | } | |
765 | ||
766 | l2fwd_main_loop(); | |
767 | ||
768 | /* return lcore_id before return */ | |
769 | if (float_proc) { | |
770 | flib_free_lcore_id(rte_lcore_id()); | |
771 | mapping_id[lcore_id] = INVALID_MAPPING_ID; | |
772 | } | |
773 | return 0; | |
774 | } |