]>
Commit | Line | Data |
---|---|---|
72865317 | 1 | /* |
ff073a71 | 2 | * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. |
72865317 BP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
db73f716 | 18 | #include "dpif-netdev.h" |
72865317 | 19 | |
72865317 BP |
20 | #include <ctype.h> |
21 | #include <errno.h> | |
22 | #include <fcntl.h> | |
23 | #include <inttypes.h> | |
72865317 | 24 | #include <netinet/in.h> |
9d82ec47 | 25 | #include <sys/socket.h> |
7f3adc00 | 26 | #include <net/if.h> |
cdee00fd | 27 | #include <stdint.h> |
72865317 BP |
28 | #include <stdlib.h> |
29 | #include <string.h> | |
30 | #include <sys/ioctl.h> | |
31 | #include <sys/stat.h> | |
72865317 BP |
32 | #include <unistd.h> |
33 | ||
2c0ea78f | 34 | #include "classifier.h" |
59e6d833 | 35 | #include "cmap.h" |
72865317 | 36 | #include "csum.h" |
614c4892 | 37 | #include "dpif.h" |
72865317 | 38 | #include "dpif-provider.h" |
614c4892 | 39 | #include "dummy.h" |
36956a7d | 40 | #include "dynamic-string.h" |
afae68b1 | 41 | #include "fat-rwlock.h" |
72865317 | 42 | #include "flow.h" |
9f361d6b | 43 | #include "cmap.h" |
6c3eee82 | 44 | #include "latch.h" |
72865317 | 45 | #include "list.h" |
8c301900 | 46 | #include "meta-flow.h" |
72865317 | 47 | #include "netdev.h" |
8617afff | 48 | #include "netdev-dpdk.h" |
de281153 | 49 | #include "netdev-vport.h" |
cdee00fd | 50 | #include "netlink.h" |
f094af7b | 51 | #include "odp-execute.h" |
72865317 BP |
52 | #include "odp-util.h" |
53 | #include "ofp-print.h" | |
54 | #include "ofpbuf.h" | |
5a034064 | 55 | #include "ovs-numa.h" |
61e7deb1 | 56 | #include "ovs-rcu.h" |
91088554 | 57 | #include "packet-dpif.h" |
72865317 BP |
58 | #include "packets.h" |
59 | #include "poll-loop.h" | |
26c6b6cd | 60 | #include "random.h" |
d33ed218 | 61 | #include "seq.h" |
462278db | 62 | #include "shash.h" |
0cbfe35d | 63 | #include "sset.h" |
72865317 | 64 | #include "timeval.h" |
74cc3969 | 65 | #include "unixctl.h" |
72865317 | 66 | #include "util.h" |
72865317 | 67 | #include "vlog.h" |
5136ce49 | 68 | |
d98e6007 | 69 | VLOG_DEFINE_THIS_MODULE(dpif_netdev); |
72865317 | 70 | |
2c0ea78f GS |
71 | /* By default, choose a priority in the middle. */ |
72 | #define NETDEV_RULE_PRIORITY 0x8000 | |
73 | ||
8bb113da | 74 | #define FLOW_DUMP_MAX_BATCH 50 |
adcf00ba AZ |
75 | /* Use per thread recirc_depth to prevent recirculation loop. */ |
76 | #define MAX_RECIRC_DEPTH 5 | |
77 | DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0) | |
e4cfed38 | 78 | |
72865317 | 79 | /* Configuration parameters. */ |
72865317 BP |
80 | enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ |
81 | ||
8a4e3a85 BP |
82 | /* Protects against changes to 'dp_netdevs'. */ |
83 | static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER; | |
84 | ||
85 | /* Contains all 'struct dp_netdev's. */ | |
86 | static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex) | |
87 | = SHASH_INITIALIZER(&dp_netdevs); | |
88 | ||
623540e4 | 89 | static struct vlog_rate_limit upcall_rl = VLOG_RATE_LIMIT_INIT(600, 600); |
6b31e073 | 90 | |
79df317f | 91 | /* Stores a miniflow with inline values */ |
9bbf1c3d DDP |
92 | |
93 | /* There are fields in the flow structure that we never use. Therefore we can | |
94 | * save a few words of memory */ | |
95 | #define NETDEV_KEY_BUF_SIZE_U32 (FLOW_U32S \ | |
96 | - MINI_N_INLINE \ | |
97 | - FLOW_U32_SIZE(regs) \ | |
98 | - FLOW_U32_SIZE(metadata) \ | |
99 | ) | |
100 | struct netdev_flow_key { | |
101 | struct miniflow flow; | |
102 | uint32_t buf[NETDEV_KEY_BUF_SIZE_U32]; | |
103 | }; | |
104 | ||
105 | /* Exact match cache for frequently used flows | |
106 | * | |
107 | * The cache uses a 32-bit hash of the packet (which can be the RSS hash) to | |
108 | * search its entries for a miniflow that matches exactly the miniflow of the | |
109 | * packet. It stores the 'cls_rule'(rule) that matches the miniflow. | |
110 | * | |
111 | * A cache entry holds a reference to its 'dp_netdev_flow'. | |
112 | * | |
113 | * A miniflow with a given hash can be in one of EM_FLOW_HASH_SEGS different | |
114 | * entries. The 32-bit hash is split into EM_FLOW_HASH_SEGS values (each of | |
115 | * them is EM_FLOW_HASH_SHIFT bits wide and the remainder is thrown away). Each | |
116 | * value is the index of a cache entry where the miniflow could be. | |
117 | * | |
118 | * | |
119 | * Thread-safety | |
120 | * ============= | |
121 | * | |
122 | * Each pmd_thread has its own private exact match cache. | |
123 | * If dp_netdev_input is not called from a pmd thread, a mutex is used. | |
124 | */ | |
125 | ||
126 | #define EM_FLOW_HASH_SHIFT 10 | |
127 | #define EM_FLOW_HASH_ENTRIES (1u << EM_FLOW_HASH_SHIFT) | |
128 | #define EM_FLOW_HASH_MASK (EM_FLOW_HASH_ENTRIES - 1) | |
129 | #define EM_FLOW_HASH_SEGS 2 | |
130 | ||
131 | struct emc_entry { | |
132 | uint32_t hash; | |
0023a1cb | 133 | uint32_t mf_len; |
9bbf1c3d DDP |
134 | struct netdev_flow_key mf; |
135 | struct dp_netdev_flow *flow; | |
136 | }; | |
137 | ||
138 | struct emc_cache { | |
139 | struct emc_entry entries[EM_FLOW_HASH_ENTRIES]; | |
140 | }; | |
141 | ||
142 | /* Iterate in the exact match cache through every entry that might contain a | |
143 | * miniflow with hash 'HASH'. */ | |
144 | #define EMC_FOR_EACH_POS_WITH_HASH(EMC, CURRENT_ENTRY, HASH) \ | |
145 | for (uint32_t i__ = 0, srch_hash__ = (HASH); \ | |
146 | (CURRENT_ENTRY) = &(EMC)->entries[srch_hash__ & EM_FLOW_HASH_MASK], \ | |
147 | i__ < EM_FLOW_HASH_SEGS; \ | |
148 | i__++, srch_hash__ >>= EM_FLOW_HASH_SHIFT) | |
149 | ||
8a4e3a85 BP |
150 | /* Datapath based on the network device interface from netdev.h. |
151 | * | |
152 | * | |
153 | * Thread-safety | |
154 | * ============= | |
155 | * | |
156 | * Some members, marked 'const', are immutable. Accessing other members | |
157 | * requires synchronization, as noted in more detail below. | |
158 | * | |
159 | * Acquisition order is, from outermost to innermost: | |
160 | * | |
161 | * dp_netdev_mutex (global) | |
59e6d833 | 162 | * port_mutex |
8a4e3a85 | 163 | * flow_mutex |
8a4e3a85 | 164 | */ |
72865317 | 165 | struct dp_netdev { |
8a4e3a85 BP |
166 | const struct dpif_class *const class; |
167 | const char *const name; | |
6b31e073 | 168 | struct dpif *dpif; |
6a8267c5 BP |
169 | struct ovs_refcount ref_cnt; |
170 | atomic_flag destroyed; | |
72865317 | 171 | |
8a4e3a85 BP |
172 | /* Flows. |
173 | * | |
afae68b1 JR |
174 | * Writers of 'flow_table' must take the 'flow_mutex'. Corresponding |
175 | * changes to 'cls' must be made while still holding the 'flow_mutex'. | |
8a4e3a85 BP |
176 | */ |
177 | struct ovs_mutex flow_mutex; | |
afae68b1 | 178 | struct classifier cls; |
9f361d6b | 179 | struct cmap flow_table OVS_GUARDED; /* Flow table. */ |
8a4e3a85 | 180 | |
8a4e3a85 BP |
181 | /* Statistics. |
182 | * | |
51852a57 BP |
183 | * ovsthread_stats is internally synchronized. */ |
184 | struct ovsthread_stats stats; /* Contains 'struct dp_netdev_stats *'. */ | |
72865317 | 185 | |
8a4e3a85 BP |
186 | /* Ports. |
187 | * | |
59e6d833 BP |
188 | * Protected by RCU. Take the mutex to add or remove ports. */ |
189 | struct ovs_mutex port_mutex; | |
190 | struct cmap ports; | |
d33ed218 | 191 | struct seq *port_seq; /* Incremented whenever a port changes. */ |
6c3eee82 | 192 | |
6b31e073 RW |
193 | /* Protects access to ofproto-dpif-upcall interface during revalidator |
194 | * thread synchronization. */ | |
195 | struct fat_rwlock upcall_rwlock; | |
623540e4 EJ |
196 | upcall_callback *upcall_cb; /* Callback function for executing upcalls. */ |
197 | void *upcall_aux; | |
6b31e073 | 198 | |
65f13b50 AW |
199 | /* Stores all 'struct dp_netdev_pmd_thread's. */ |
200 | struct cmap poll_threads; | |
201 | ||
202 | /* Protects the access of the 'struct dp_netdev_pmd_thread' | |
203 | * instance for non-pmd thread. */ | |
204 | struct ovs_mutex non_pmd_mutex; | |
205 | ||
206 | /* Each pmd thread will store its pointer to | |
207 | * 'struct dp_netdev_pmd_thread' in 'per_pmd_key'. */ | |
208 | ovsthread_key_t per_pmd_key; | |
f2eee189 AW |
209 | |
210 | /* Number of rx queues for each dpdk interface and the cpu mask | |
211 | * for pin of pmd threads. */ | |
212 | size_t n_dpdk_rxqs; | |
213 | char *pmd_cmask; | |
72865317 BP |
214 | }; |
215 | ||
8a4e3a85 | 216 | static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp, |
59e6d833 | 217 | odp_port_t); |
ff073a71 | 218 | |
51852a57 BP |
219 | enum dp_stat_type { |
220 | DP_STAT_HIT, /* Packets that matched in the flow table. */ | |
221 | DP_STAT_MISS, /* Packets that did not match. */ | |
222 | DP_STAT_LOST, /* Packets not passed up to the client. */ | |
223 | DP_N_STATS | |
224 | }; | |
225 | ||
226 | /* Contained by struct dp_netdev's 'stats' member. */ | |
227 | struct dp_netdev_stats { | |
228 | struct ovs_mutex mutex; /* Protects 'n'. */ | |
229 | ||
230 | /* Indexed by DP_STAT_*, protected by 'mutex'. */ | |
231 | unsigned long long int n[DP_N_STATS] OVS_GUARDED; | |
232 | }; | |
233 | ||
234 | ||
72865317 BP |
235 | /* A port in a netdev-based datapath. */ |
236 | struct dp_netdev_port { | |
59e6d833 | 237 | struct cmap_node node; /* Node in dp_netdev's 'ports'. */ |
ff073a71 | 238 | odp_port_t port_no; |
72865317 | 239 | struct netdev *netdev; |
4b609110 | 240 | struct netdev_saved_flags *sf; |
55c955bd | 241 | struct netdev_rxq **rxq; |
b284085e | 242 | struct ovs_refcount ref_cnt; |
0cbfe35d | 243 | char *type; /* Port type as requested by user. */ |
72865317 BP |
244 | }; |
245 | ||
8a4e3a85 BP |
246 | /* A flow in dp_netdev's 'flow_table'. |
247 | * | |
248 | * | |
249 | * Thread-safety | |
250 | * ============= | |
251 | * | |
252 | * Except near the beginning or ending of its lifespan, rule 'rule' belongs to | |
253 | * its dp_netdev's classifier. The text below calls this classifier 'cls'. | |
254 | * | |
255 | * Motivation | |
256 | * ---------- | |
257 | * | |
258 | * The thread safety rules described here for "struct dp_netdev_flow" are | |
259 | * motivated by two goals: | |
260 | * | |
261 | * - Prevent threads that read members of "struct dp_netdev_flow" from | |
262 | * reading bad data due to changes by some thread concurrently modifying | |
263 | * those members. | |
264 | * | |
265 | * - Prevent two threads making changes to members of a given "struct | |
266 | * dp_netdev_flow" from interfering with each other. | |
267 | * | |
268 | * | |
269 | * Rules | |
270 | * ----- | |
271 | * | |
ed79f89a DDP |
272 | * A flow 'flow' may be accessed without a risk of being freed during an RCU |
273 | * grace period. Code that needs to hold onto a flow for a while | |
274 | * should try incrementing 'flow->ref_cnt' with dp_netdev_flow_ref(). | |
8a4e3a85 BP |
275 | * |
276 | * 'flow->ref_cnt' protects 'flow' from being freed. It doesn't protect the | |
ed79f89a DDP |
277 | * flow from being deleted from 'cls' and it doesn't protect members of 'flow' |
278 | * from modification. | |
8a4e3a85 BP |
279 | * |
280 | * Some members, marked 'const', are immutable. Accessing other members | |
281 | * requires synchronization, as noted in more detail below. | |
282 | */ | |
72865317 | 283 | struct dp_netdev_flow { |
9bbf1c3d | 284 | bool dead; |
2c0ea78f | 285 | /* Packet classification. */ |
8a4e3a85 | 286 | const struct cls_rule cr; /* In owning dp_netdev's 'cls'. */ |
2c0ea78f | 287 | |
8a4e3a85 | 288 | /* Hash table index by unmasked flow. */ |
9f361d6b | 289 | const struct cmap_node node; /* In owning dp_netdev's 'flow_table'. */ |
8a4e3a85 | 290 | const struct flow flow; /* The flow that created this entry. */ |
72865317 | 291 | |
ed79f89a DDP |
292 | /* Number of references. |
293 | * The classifier owns one reference. | |
294 | * Any thread trying to keep a rule from being freed should hold its own | |
295 | * reference. */ | |
296 | struct ovs_refcount ref_cnt; | |
297 | ||
8a4e3a85 BP |
298 | /* Statistics. |
299 | * | |
300 | * Reading or writing these members requires 'mutex'. */ | |
679ba04c | 301 | struct ovsthread_stats stats; /* Contains "struct dp_netdev_flow_stats". */ |
8a4e3a85 | 302 | |
45c626a3 | 303 | /* Actions. */ |
61e7deb1 | 304 | OVSRCU_TYPE(struct dp_netdev_actions *) actions; |
72865317 BP |
305 | }; |
306 | ||
ed79f89a | 307 | static void dp_netdev_flow_unref(struct dp_netdev_flow *); |
9bbf1c3d | 308 | static bool dp_netdev_flow_ref(struct dp_netdev_flow *); |
8a4e3a85 | 309 | |
679ba04c BP |
310 | /* Contained by struct dp_netdev_flow's 'stats' member. */ |
311 | struct dp_netdev_flow_stats { | |
312 | struct ovs_mutex mutex; /* Guards all the other members. */ | |
313 | ||
314 | long long int used OVS_GUARDED; /* Last used time, in monotonic msecs. */ | |
315 | long long int packet_count OVS_GUARDED; /* Number of packets matched. */ | |
316 | long long int byte_count OVS_GUARDED; /* Number of bytes matched. */ | |
317 | uint16_t tcp_flags OVS_GUARDED; /* Bitwise-OR of seen tcp_flags values. */ | |
318 | }; | |
319 | ||
a84cb64a BP |
320 | /* A set of datapath actions within a "struct dp_netdev_flow". |
321 | * | |
322 | * | |
323 | * Thread-safety | |
324 | * ============= | |
325 | * | |
45c626a3 | 326 | * A struct dp_netdev_actions 'actions' is protected with RCU. */ |
a84cb64a | 327 | struct dp_netdev_actions { |
a84cb64a BP |
328 | /* These members are immutable: they do not change during the struct's |
329 | * lifetime. */ | |
330 | struct nlattr *actions; /* Sequence of OVS_ACTION_ATTR_* attributes. */ | |
331 | unsigned int size; /* Size of 'actions', in bytes. */ | |
332 | }; | |
333 | ||
334 | struct dp_netdev_actions *dp_netdev_actions_create(const struct nlattr *, | |
335 | size_t); | |
61e7deb1 BP |
336 | struct dp_netdev_actions *dp_netdev_flow_get_actions( |
337 | const struct dp_netdev_flow *); | |
338 | static void dp_netdev_actions_free(struct dp_netdev_actions *); | |
a84cb64a | 339 | |
e4cfed38 PS |
340 | /* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate |
341 | * the performance overhead of interrupt processing. Therefore netdev can | |
342 | * not implement rx-wait for these devices. dpif-netdev needs to poll | |
343 | * these device to check for recv buffer. pmd-thread does polling for | |
344 | * devices assigned to itself thread. | |
345 | * | |
346 | * DPDK used PMD for accessing NIC. | |
347 | * | |
65f13b50 AW |
348 | * Note, instance with cpu core id NON_PMD_CORE_ID will be reserved for |
349 | * I/O of all non-pmd threads. There will be no actual thread created | |
350 | * for the instance. | |
e4cfed38 | 351 | **/ |
65f13b50 | 352 | struct dp_netdev_pmd_thread { |
6c3eee82 | 353 | struct dp_netdev *dp; |
65f13b50 AW |
354 | struct cmap_node node; /* In 'dp->poll_threads'. */ |
355 | /* Per thread exact-match cache. Note, the instance for cpu core | |
356 | * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly | |
357 | * need to be protected (e.g. by 'dp_netdev_mutex'). All other | |
358 | * instances will only be accessed by its own pmd thread. */ | |
9bbf1c3d | 359 | struct emc_cache flow_cache; |
65f13b50 AW |
360 | struct latch exit_latch; /* For terminating the pmd thread. */ |
361 | atomic_uint change_seq; /* For reloading pmd ports. */ | |
6c3eee82 | 362 | pthread_t thread; |
65f13b50 AW |
363 | int index; /* Idx of this pmd thread among pmd*/ |
364 | /* threads on same numa node. */ | |
365 | int core_id; /* CPU core id of this pmd thread. */ | |
366 | int numa_id; /* numa node id of this pmd thread. */ | |
6c3eee82 BP |
367 | }; |
368 | ||
84067a4c JR |
369 | #define PMD_INITIAL_SEQ 1 |
370 | ||
72865317 BP |
371 | /* Interface to netdev-based datapath. */ |
372 | struct dpif_netdev { | |
373 | struct dpif dpif; | |
374 | struct dp_netdev *dp; | |
d33ed218 | 375 | uint64_t last_port_seq; |
72865317 BP |
376 | }; |
377 | ||
8a4e3a85 | 378 | static int get_port_by_number(struct dp_netdev *dp, odp_port_t port_no, |
59e6d833 | 379 | struct dp_netdev_port **portp); |
8a4e3a85 | 380 | static int get_port_by_name(struct dp_netdev *dp, const char *devname, |
59e6d833 | 381 | struct dp_netdev_port **portp); |
8a4e3a85 BP |
382 | static void dp_netdev_free(struct dp_netdev *) |
383 | OVS_REQUIRES(dp_netdev_mutex); | |
72865317 | 384 | static void dp_netdev_flow_flush(struct dp_netdev *); |
8a4e3a85 BP |
385 | static int do_add_port(struct dp_netdev *dp, const char *devname, |
386 | const char *type, odp_port_t port_no) | |
59e6d833 | 387 | OVS_REQUIRES(dp->port_mutex); |
c40b890f | 388 | static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *) |
59e6d833 | 389 | OVS_REQUIRES(dp->port_mutex); |
614c4892 BP |
390 | static int dpif_netdev_open(const struct dpif_class *, const char *name, |
391 | bool create, struct dpif **); | |
65f13b50 | 392 | static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, |
8cbf4f47 DDP |
393 | struct dpif_packet **, int c, |
394 | bool may_steal, struct pkt_metadata *, | |
4edb9ae9 | 395 | const struct nlattr *actions, |
e4cfed38 | 396 | size_t actions_len); |
65f13b50 | 397 | static void dp_netdev_input(struct dp_netdev_pmd_thread *, |
3c33f0ff JR |
398 | struct dpif_packet **, int cnt, |
399 | struct pkt_metadata *); | |
6b31e073 | 400 | static void dp_netdev_disable_upcall(struct dp_netdev *); |
65f13b50 AW |
401 | static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, |
402 | struct dp_netdev *dp, int index, | |
403 | int core_id, int numa_id); | |
f2eee189 | 404 | static void dp_netdev_set_nonpmd(struct dp_netdev *dp); |
65f13b50 AW |
405 | static struct dp_netdev_pmd_thread *dp_netdev_get_nonpmd(struct dp_netdev *dp); |
406 | static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp); | |
407 | static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id); | |
408 | static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id); | |
f2eee189 | 409 | static void dp_netdev_reset_pmd_threads(struct dp_netdev *dp); |
72865317 | 410 | |
9bbf1c3d DDP |
411 | static void emc_clear_entry(struct emc_entry *ce); |
412 | ||
413 | static void | |
414 | emc_cache_init(struct emc_cache *flow_cache) | |
415 | { | |
416 | int i; | |
417 | ||
418 | for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) { | |
419 | flow_cache->entries[i].flow = NULL; | |
420 | flow_cache->entries[i].hash = 0; | |
0023a1cb | 421 | flow_cache->entries[i].mf_len = 0; |
9bbf1c3d DDP |
422 | miniflow_initialize(&flow_cache->entries[i].mf.flow, |
423 | flow_cache->entries[i].mf.buf); | |
424 | } | |
425 | } | |
426 | ||
427 | static void | |
428 | emc_cache_uninit(struct emc_cache *flow_cache) | |
429 | { | |
430 | int i; | |
431 | ||
432 | for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) { | |
433 | emc_clear_entry(&flow_cache->entries[i]); | |
434 | } | |
435 | } | |
436 | ||
72865317 BP |
437 | static struct dpif_netdev * |
438 | dpif_netdev_cast(const struct dpif *dpif) | |
439 | { | |
cb22974d | 440 | ovs_assert(dpif->dpif_class->open == dpif_netdev_open); |
72865317 BP |
441 | return CONTAINER_OF(dpif, struct dpif_netdev, dpif); |
442 | } | |
443 | ||
444 | static struct dp_netdev * | |
445 | get_dp_netdev(const struct dpif *dpif) | |
446 | { | |
447 | return dpif_netdev_cast(dpif)->dp; | |
448 | } | |
449 | ||
2197d7ab | 450 | static int |
2240af25 DDP |
451 | dpif_netdev_enumerate(struct sset *all_dps, |
452 | const struct dpif_class *dpif_class) | |
2197d7ab GL |
453 | { |
454 | struct shash_node *node; | |
455 | ||
97be1538 | 456 | ovs_mutex_lock(&dp_netdev_mutex); |
2197d7ab | 457 | SHASH_FOR_EACH(node, &dp_netdevs) { |
2240af25 DDP |
458 | struct dp_netdev *dp = node->data; |
459 | if (dpif_class != dp->class) { | |
460 | /* 'dp_netdevs' contains both "netdev" and "dummy" dpifs. | |
461 | * If the class doesn't match, skip this dpif. */ | |
462 | continue; | |
463 | } | |
2197d7ab GL |
464 | sset_add(all_dps, node->name); |
465 | } | |
97be1538 | 466 | ovs_mutex_unlock(&dp_netdev_mutex); |
5279f8fd | 467 | |
2197d7ab GL |
468 | return 0; |
469 | } | |
470 | ||
add90f6f EJ |
471 | static bool |
472 | dpif_netdev_class_is_dummy(const struct dpif_class *class) | |
473 | { | |
474 | return class != &dpif_netdev_class; | |
475 | } | |
476 | ||
0aeaabc8 JP |
477 | static const char * |
478 | dpif_netdev_port_open_type(const struct dpif_class *class, const char *type) | |
479 | { | |
480 | return strcmp(type, "internal") ? type | |
add90f6f | 481 | : dpif_netdev_class_is_dummy(class) ? "dummy" |
0aeaabc8 JP |
482 | : "tap"; |
483 | } | |
484 | ||
72865317 BP |
485 | static struct dpif * |
486 | create_dpif_netdev(struct dp_netdev *dp) | |
487 | { | |
462278db | 488 | uint16_t netflow_id = hash_string(dp->name, 0); |
72865317 | 489 | struct dpif_netdev *dpif; |
72865317 | 490 | |
6a8267c5 | 491 | ovs_refcount_ref(&dp->ref_cnt); |
72865317 | 492 | |
72865317 | 493 | dpif = xmalloc(sizeof *dpif); |
614c4892 | 494 | dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id); |
72865317 | 495 | dpif->dp = dp; |
d33ed218 | 496 | dpif->last_port_seq = seq_read(dp->port_seq); |
72865317 BP |
497 | |
498 | return &dpif->dpif; | |
499 | } | |
500 | ||
4e022ec0 AW |
501 | /* Choose an unused, non-zero port number and return it on success. |
502 | * Return ODPP_NONE on failure. */ | |
503 | static odp_port_t | |
e44768b7 | 504 | choose_port(struct dp_netdev *dp, const char *name) |
59e6d833 | 505 | OVS_REQUIRES(dp->port_mutex) |
e44768b7 | 506 | { |
4e022ec0 | 507 | uint32_t port_no; |
e44768b7 JP |
508 | |
509 | if (dp->class != &dpif_netdev_class) { | |
510 | const char *p; | |
511 | int start_no = 0; | |
512 | ||
513 | /* If the port name begins with "br", start the number search at | |
514 | * 100 to make writing tests easier. */ | |
515 | if (!strncmp(name, "br", 2)) { | |
516 | start_no = 100; | |
517 | } | |
518 | ||
519 | /* If the port name contains a number, try to assign that port number. | |
520 | * This can make writing unit tests easier because port numbers are | |
521 | * predictable. */ | |
522 | for (p = name; *p != '\0'; p++) { | |
523 | if (isdigit((unsigned char) *p)) { | |
524 | port_no = start_no + strtol(p, NULL, 10); | |
ff073a71 BP |
525 | if (port_no > 0 && port_no != odp_to_u32(ODPP_NONE) |
526 | && !dp_netdev_lookup_port(dp, u32_to_odp(port_no))) { | |
4e022ec0 | 527 | return u32_to_odp(port_no); |
e44768b7 JP |
528 | } |
529 | break; | |
530 | } | |
531 | } | |
532 | } | |
533 | ||
ff073a71 BP |
534 | for (port_no = 1; port_no <= UINT16_MAX; port_no++) { |
535 | if (!dp_netdev_lookup_port(dp, u32_to_odp(port_no))) { | |
4e022ec0 | 536 | return u32_to_odp(port_no); |
e44768b7 JP |
537 | } |
538 | } | |
539 | ||
4e022ec0 | 540 | return ODPP_NONE; |
e44768b7 JP |
541 | } |
542 | ||
72865317 | 543 | static int |
614c4892 BP |
544 | create_dp_netdev(const char *name, const struct dpif_class *class, |
545 | struct dp_netdev **dpp) | |
8a4e3a85 | 546 | OVS_REQUIRES(dp_netdev_mutex) |
72865317 BP |
547 | { |
548 | struct dp_netdev *dp; | |
549 | int error; | |
72865317 | 550 | |
462278db | 551 | dp = xzalloc(sizeof *dp); |
8a4e3a85 BP |
552 | shash_add(&dp_netdevs, name, dp); |
553 | ||
554 | *CONST_CAST(const struct dpif_class **, &dp->class) = class; | |
555 | *CONST_CAST(const char **, &dp->name) = xstrdup(name); | |
6a8267c5 | 556 | ovs_refcount_init(&dp->ref_cnt); |
1a65ba85 | 557 | atomic_flag_clear(&dp->destroyed); |
8a4e3a85 BP |
558 | |
559 | ovs_mutex_init(&dp->flow_mutex); | |
560 | classifier_init(&dp->cls, NULL); | |
9f361d6b | 561 | cmap_init(&dp->flow_table); |
8a4e3a85 | 562 | |
51852a57 | 563 | ovsthread_stats_init(&dp->stats); |
ed27e010 | 564 | |
59e6d833 BP |
565 | ovs_mutex_init(&dp->port_mutex); |
566 | cmap_init(&dp->ports); | |
d33ed218 | 567 | dp->port_seq = seq_create(); |
6b31e073 RW |
568 | fat_rwlock_init(&dp->upcall_rwlock); |
569 | ||
570 | /* Disable upcalls by default. */ | |
571 | dp_netdev_disable_upcall(dp); | |
623540e4 | 572 | dp->upcall_aux = NULL; |
6b31e073 | 573 | dp->upcall_cb = NULL; |
e44768b7 | 574 | |
65f13b50 AW |
575 | cmap_init(&dp->poll_threads); |
576 | ovs_mutex_init_recursive(&dp->non_pmd_mutex); | |
577 | ovsthread_key_create(&dp->per_pmd_key, NULL); | |
578 | ||
579 | /* Reserves the core NON_PMD_CORE_ID for all non-pmd threads. */ | |
580 | ovs_numa_try_pin_core_specific(NON_PMD_CORE_ID); | |
f2eee189 AW |
581 | dp_netdev_set_nonpmd(dp); |
582 | dp->n_dpdk_rxqs = NR_QUEUE; | |
65f13b50 | 583 | |
59e6d833 | 584 | ovs_mutex_lock(&dp->port_mutex); |
4e022ec0 | 585 | error = do_add_port(dp, name, "internal", ODPP_LOCAL); |
59e6d833 | 586 | ovs_mutex_unlock(&dp->port_mutex); |
72865317 BP |
587 | if (error) { |
588 | dp_netdev_free(dp); | |
462278db | 589 | return error; |
72865317 BP |
590 | } |
591 | ||
462278db | 592 | *dpp = dp; |
72865317 BP |
593 | return 0; |
594 | } | |
595 | ||
596 | static int | |
614c4892 | 597 | dpif_netdev_open(const struct dpif_class *class, const char *name, |
4a387741 | 598 | bool create, struct dpif **dpifp) |
72865317 | 599 | { |
462278db | 600 | struct dp_netdev *dp; |
5279f8fd | 601 | int error; |
462278db | 602 | |
97be1538 | 603 | ovs_mutex_lock(&dp_netdev_mutex); |
462278db BP |
604 | dp = shash_find_data(&dp_netdevs, name); |
605 | if (!dp) { | |
5279f8fd | 606 | error = create ? create_dp_netdev(name, class, &dp) : ENODEV; |
72865317 | 607 | } else { |
5279f8fd BP |
608 | error = (dp->class != class ? EINVAL |
609 | : create ? EEXIST | |
610 | : 0); | |
611 | } | |
612 | if (!error) { | |
613 | *dpifp = create_dpif_netdev(dp); | |
6b31e073 | 614 | dp->dpif = *dpifp; |
72865317 | 615 | } |
97be1538 | 616 | ovs_mutex_unlock(&dp_netdev_mutex); |
462278db | 617 | |
5279f8fd | 618 | return error; |
72865317 BP |
619 | } |
620 | ||
88ace79b DDP |
621 | static void |
622 | dp_netdev_destroy_upcall_lock(struct dp_netdev *dp) | |
623 | OVS_NO_THREAD_SAFETY_ANALYSIS | |
624 | { | |
625 | /* Check that upcalls are disabled, i.e. that the rwlock is taken */ | |
626 | ovs_assert(fat_rwlock_tryrdlock(&dp->upcall_rwlock)); | |
627 | ||
628 | /* Before freeing a lock we should release it */ | |
629 | fat_rwlock_unlock(&dp->upcall_rwlock); | |
630 | fat_rwlock_destroy(&dp->upcall_rwlock); | |
631 | } | |
632 | ||
8a4e3a85 BP |
633 | /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp' |
634 | * through the 'dp_netdevs' shash while freeing 'dp'. */ | |
1ba530f4 BP |
635 | static void |
636 | dp_netdev_free(struct dp_netdev *dp) | |
8a4e3a85 | 637 | OVS_REQUIRES(dp_netdev_mutex) |
1ba530f4 | 638 | { |
59e6d833 | 639 | struct dp_netdev_port *port; |
51852a57 BP |
640 | struct dp_netdev_stats *bucket; |
641 | int i; | |
4ad28026 | 642 | |
8a4e3a85 BP |
643 | shash_find_and_delete(&dp_netdevs, dp->name); |
644 | ||
65f13b50 AW |
645 | dp_netdev_destroy_all_pmds(dp); |
646 | ovs_mutex_destroy(&dp->non_pmd_mutex); | |
647 | ovsthread_key_delete(dp->per_pmd_key); | |
6c3eee82 | 648 | |
1ba530f4 | 649 | dp_netdev_flow_flush(dp); |
59e6d833 | 650 | ovs_mutex_lock(&dp->port_mutex); |
a532e683 | 651 | CMAP_FOR_EACH (port, node, &dp->ports) { |
c40b890f | 652 | do_del_port(dp, port); |
1ba530f4 | 653 | } |
59e6d833 | 654 | ovs_mutex_unlock(&dp->port_mutex); |
51852a57 BP |
655 | |
656 | OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) { | |
657 | ovs_mutex_destroy(&bucket->mutex); | |
658 | free_cacheline(bucket); | |
659 | } | |
660 | ovsthread_stats_destroy(&dp->stats); | |
f5126b57 | 661 | |
2c0ea78f | 662 | classifier_destroy(&dp->cls); |
9f361d6b | 663 | cmap_destroy(&dp->flow_table); |
8a4e3a85 | 664 | ovs_mutex_destroy(&dp->flow_mutex); |
d33ed218 | 665 | seq_destroy(dp->port_seq); |
59e6d833 | 666 | cmap_destroy(&dp->ports); |
88ace79b DDP |
667 | |
668 | /* Upcalls must be disabled at this point */ | |
669 | dp_netdev_destroy_upcall_lock(dp); | |
9bbf1c3d | 670 | |
f2eee189 | 671 | free(dp->pmd_cmask); |
8a4e3a85 | 672 | free(CONST_CAST(char *, dp->name)); |
72865317 BP |
673 | free(dp); |
674 | } | |
675 | ||
8a4e3a85 BP |
676 | static void |
677 | dp_netdev_unref(struct dp_netdev *dp) | |
678 | { | |
679 | if (dp) { | |
680 | /* Take dp_netdev_mutex so that, if dp->ref_cnt falls to zero, we can't | |
681 | * get a new reference to 'dp' through the 'dp_netdevs' shash. */ | |
682 | ovs_mutex_lock(&dp_netdev_mutex); | |
24f83812 | 683 | if (ovs_refcount_unref_relaxed(&dp->ref_cnt) == 1) { |
8a4e3a85 BP |
684 | dp_netdev_free(dp); |
685 | } | |
686 | ovs_mutex_unlock(&dp_netdev_mutex); | |
687 | } | |
688 | } | |
689 | ||
72865317 BP |
690 | static void |
691 | dpif_netdev_close(struct dpif *dpif) | |
692 | { | |
693 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
5279f8fd | 694 | |
8a4e3a85 | 695 | dp_netdev_unref(dp); |
72865317 BP |
696 | free(dpif); |
697 | } | |
698 | ||
699 | static int | |
7dab847a | 700 | dpif_netdev_destroy(struct dpif *dpif) |
72865317 BP |
701 | { |
702 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
5279f8fd | 703 | |
6a8267c5 | 704 | if (!atomic_flag_test_and_set(&dp->destroyed)) { |
24f83812 | 705 | if (ovs_refcount_unref_relaxed(&dp->ref_cnt) == 1) { |
6a8267c5 BP |
706 | /* Can't happen: 'dpif' still owns a reference to 'dp'. */ |
707 | OVS_NOT_REACHED(); | |
708 | } | |
709 | } | |
5279f8fd | 710 | |
72865317 BP |
711 | return 0; |
712 | } | |
713 | ||
714 | static int | |
a8d9304d | 715 | dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats) |
72865317 BP |
716 | { |
717 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
51852a57 BP |
718 | struct dp_netdev_stats *bucket; |
719 | size_t i; | |
5279f8fd | 720 | |
9f361d6b | 721 | stats->n_flows = cmap_count(&dp->flow_table); |
8a4e3a85 | 722 | |
51852a57 BP |
723 | stats->n_hit = stats->n_missed = stats->n_lost = 0; |
724 | OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) { | |
725 | ovs_mutex_lock(&bucket->mutex); | |
726 | stats->n_hit += bucket->n[DP_STAT_HIT]; | |
727 | stats->n_missed += bucket->n[DP_STAT_MISS]; | |
728 | stats->n_lost += bucket->n[DP_STAT_LOST]; | |
729 | ovs_mutex_unlock(&bucket->mutex); | |
730 | } | |
1ce3fa06 | 731 | stats->n_masks = UINT32_MAX; |
847108dc | 732 | stats->n_mask_hit = UINT64_MAX; |
5279f8fd | 733 | |
72865317 BP |
734 | return 0; |
735 | } | |
736 | ||
e4cfed38 | 737 | static void |
65f13b50 | 738 | dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread *pmd) |
e4cfed38 | 739 | { |
65f13b50 AW |
740 | int old_seq; |
741 | ||
742 | atomic_add_relaxed(&pmd->change_seq, 1, &old_seq); | |
743 | } | |
e4cfed38 | 744 | |
65f13b50 AW |
745 | /* Causes all pmd threads to reload its tx/rx devices. |
746 | * Must be called after adding/removing ports. */ | |
747 | static void | |
748 | dp_netdev_reload_pmds(struct dp_netdev *dp) | |
749 | { | |
750 | struct dp_netdev_pmd_thread *pmd; | |
e4cfed38 | 751 | |
65f13b50 AW |
752 | CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { |
753 | dp_netdev_reload_pmd__(pmd); | |
84067a4c | 754 | } |
e4cfed38 PS |
755 | } |
756 | ||
59e6d833 BP |
757 | static uint32_t |
758 | hash_port_no(odp_port_t port_no) | |
759 | { | |
760 | return hash_int(odp_to_u32(port_no), 0); | |
761 | } | |
762 | ||
72865317 | 763 | static int |
c3827f61 | 764 | do_add_port(struct dp_netdev *dp, const char *devname, const char *type, |
4e022ec0 | 765 | odp_port_t port_no) |
59e6d833 | 766 | OVS_REQUIRES(dp->port_mutex) |
72865317 | 767 | { |
4b609110 | 768 | struct netdev_saved_flags *sf; |
72865317 BP |
769 | struct dp_netdev_port *port; |
770 | struct netdev *netdev; | |
2499a8ce | 771 | enum netdev_flags flags; |
0cbfe35d | 772 | const char *open_type; |
72865317 | 773 | int error; |
55c955bd | 774 | int i; |
72865317 BP |
775 | |
776 | /* XXX reject devices already in some dp_netdev. */ | |
777 | ||
778 | /* Open and validate network device. */ | |
0aeaabc8 | 779 | open_type = dpif_netdev_port_open_type(dp->class, type); |
0cbfe35d | 780 | error = netdev_open(devname, open_type, &netdev); |
72865317 BP |
781 | if (error) { |
782 | return error; | |
783 | } | |
72865317 BP |
784 | /* XXX reject non-Ethernet devices */ |
785 | ||
2499a8ce AC |
786 | netdev_get_flags(netdev, &flags); |
787 | if (flags & NETDEV_LOOPBACK) { | |
788 | VLOG_ERR("%s: cannot add a loopback device", devname); | |
789 | netdev_close(netdev); | |
790 | return EINVAL; | |
791 | } | |
792 | ||
5a034064 AW |
793 | if (netdev_is_pmd(netdev)) { |
794 | int n_cores = ovs_numa_get_n_cores(); | |
795 | ||
796 | if (n_cores == OVS_CORE_UNSPEC) { | |
797 | VLOG_ERR("%s, cannot get cpu core info", devname); | |
798 | return ENOENT; | |
799 | } | |
800 | /* There can only be ovs_numa_get_n_cores() pmd threads, | |
f2eee189 AW |
801 | * so creates a txq for each. */ |
802 | error = netdev_set_multiq(netdev, n_cores, dp->n_dpdk_rxqs); | |
5a034064 AW |
803 | if (error) { |
804 | VLOG_ERR("%s, cannot set multiq", devname); | |
805 | return errno; | |
806 | } | |
807 | } | |
e4cfed38 PS |
808 | port = xzalloc(sizeof *port); |
809 | port->port_no = port_no; | |
810 | port->netdev = netdev; | |
55c955bd | 811 | port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev)); |
e4cfed38 | 812 | port->type = xstrdup(type); |
55c955bd PS |
813 | for (i = 0; i < netdev_n_rxq(netdev); i++) { |
814 | error = netdev_rxq_open(netdev, &port->rxq[i], i); | |
815 | if (error | |
816 | && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) { | |
817 | VLOG_ERR("%s: cannot receive packets on this network device (%s)", | |
818 | devname, ovs_strerror(errno)); | |
819 | netdev_close(netdev); | |
16bea12c TG |
820 | free(port->type); |
821 | free(port->rxq); | |
822 | free(port); | |
55c955bd PS |
823 | return error; |
824 | } | |
7b6b0ef4 BP |
825 | } |
826 | ||
4b609110 | 827 | error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf); |
72865317 | 828 | if (error) { |
55c955bd PS |
829 | for (i = 0; i < netdev_n_rxq(netdev); i++) { |
830 | netdev_rxq_close(port->rxq[i]); | |
831 | } | |
72865317 | 832 | netdev_close(netdev); |
16bea12c | 833 | free(port->type); |
f7791740 | 834 | free(port->rxq); |
e4cfed38 | 835 | free(port); |
72865317 BP |
836 | return error; |
837 | } | |
4b609110 | 838 | port->sf = sf; |
e4cfed38 PS |
839 | |
840 | if (netdev_is_pmd(netdev)) { | |
65f13b50 AW |
841 | dp_netdev_set_pmds_on_numa(dp, netdev_get_numa_id(netdev)); |
842 | dp_netdev_reload_pmds(dp); | |
e4cfed38 PS |
843 | } |
844 | ovs_refcount_init(&port->ref_cnt); | |
72865317 | 845 | |
59e6d833 | 846 | cmap_insert(&dp->ports, &port->node, hash_port_no(port_no)); |
d33ed218 | 847 | seq_change(dp->port_seq); |
72865317 BP |
848 | |
849 | return 0; | |
850 | } | |
851 | ||
247527db BP |
852 | static int |
853 | dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev, | |
4e022ec0 | 854 | odp_port_t *port_nop) |
247527db BP |
855 | { |
856 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
3aa30359 BP |
857 | char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; |
858 | const char *dpif_port; | |
4e022ec0 | 859 | odp_port_t port_no; |
5279f8fd | 860 | int error; |
247527db | 861 | |
59e6d833 | 862 | ovs_mutex_lock(&dp->port_mutex); |
3aa30359 | 863 | dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf); |
4e022ec0 | 864 | if (*port_nop != ODPP_NONE) { |
ff073a71 BP |
865 | port_no = *port_nop; |
866 | error = dp_netdev_lookup_port(dp, *port_nop) ? EBUSY : 0; | |
232dfa4a | 867 | } else { |
3aa30359 | 868 | port_no = choose_port(dp, dpif_port); |
5279f8fd | 869 | error = port_no == ODPP_NONE ? EFBIG : 0; |
232dfa4a | 870 | } |
5279f8fd | 871 | if (!error) { |
247527db | 872 | *port_nop = port_no; |
5279f8fd | 873 | error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no); |
247527db | 874 | } |
59e6d833 | 875 | ovs_mutex_unlock(&dp->port_mutex); |
5279f8fd BP |
876 | |
877 | return error; | |
72865317 BP |
878 | } |
879 | ||
880 | static int | |
4e022ec0 | 881 | dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no) |
72865317 BP |
882 | { |
883 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
5279f8fd BP |
884 | int error; |
885 | ||
59e6d833 | 886 | ovs_mutex_lock(&dp->port_mutex); |
c40b890f BP |
887 | if (port_no == ODPP_LOCAL) { |
888 | error = EINVAL; | |
889 | } else { | |
890 | struct dp_netdev_port *port; | |
891 | ||
892 | error = get_port_by_number(dp, port_no, &port); | |
893 | if (!error) { | |
894 | do_del_port(dp, port); | |
895 | } | |
896 | } | |
59e6d833 | 897 | ovs_mutex_unlock(&dp->port_mutex); |
5279f8fd BP |
898 | |
899 | return error; | |
72865317 BP |
900 | } |
901 | ||
902 | static bool | |
4e022ec0 | 903 | is_valid_port_number(odp_port_t port_no) |
72865317 | 904 | { |
ff073a71 BP |
905 | return port_no != ODPP_NONE; |
906 | } | |
907 | ||
908 | static struct dp_netdev_port * | |
909 | dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no) | |
910 | { | |
911 | struct dp_netdev_port *port; | |
912 | ||
59e6d833 | 913 | CMAP_FOR_EACH_WITH_HASH (port, node, hash_port_no(port_no), &dp->ports) { |
ff073a71 BP |
914 | if (port->port_no == port_no) { |
915 | return port; | |
916 | } | |
917 | } | |
918 | return NULL; | |
72865317 BP |
919 | } |
920 | ||
921 | static int | |
922 | get_port_by_number(struct dp_netdev *dp, | |
4e022ec0 | 923 | odp_port_t port_no, struct dp_netdev_port **portp) |
72865317 BP |
924 | { |
925 | if (!is_valid_port_number(port_no)) { | |
926 | *portp = NULL; | |
927 | return EINVAL; | |
928 | } else { | |
ff073a71 | 929 | *portp = dp_netdev_lookup_port(dp, port_no); |
72865317 BP |
930 | return *portp ? 0 : ENOENT; |
931 | } | |
932 | } | |
933 | ||
b284085e PS |
934 | static void |
935 | port_ref(struct dp_netdev_port *port) | |
936 | { | |
937 | if (port) { | |
938 | ovs_refcount_ref(&port->ref_cnt); | |
939 | } | |
940 | } | |
941 | ||
a1fdee13 AW |
942 | static bool |
943 | port_try_ref(struct dp_netdev_port *port) | |
944 | { | |
945 | if (port) { | |
946 | return ovs_refcount_try_ref_rcu(&port->ref_cnt); | |
947 | } | |
948 | ||
949 | return false; | |
950 | } | |
951 | ||
b284085e | 952 | static void |
59e6d833 | 953 | port_destroy__(struct dp_netdev_port *port) |
b284085e | 954 | { |
98de6beb | 955 | int n_rxq = netdev_n_rxq(port->netdev); |
59e6d833 | 956 | int i; |
55c955bd | 957 | |
59e6d833 BP |
958 | netdev_close(port->netdev); |
959 | netdev_restore_flags(port->sf); | |
55c955bd | 960 | |
59e6d833 BP |
961 | for (i = 0; i < n_rxq; i++) { |
962 | netdev_rxq_close(port->rxq[i]); | |
963 | } | |
964 | free(port->rxq); | |
965 | free(port->type); | |
966 | free(port); | |
967 | } | |
968 | ||
969 | static void | |
970 | port_unref(struct dp_netdev_port *port) | |
971 | { | |
24f83812 | 972 | if (port && ovs_refcount_unref_relaxed(&port->ref_cnt) == 1) { |
59e6d833 | 973 | ovsrcu_postpone(port_destroy__, port); |
b284085e PS |
974 | } |
975 | } | |
976 | ||
72865317 BP |
977 | static int |
978 | get_port_by_name(struct dp_netdev *dp, | |
979 | const char *devname, struct dp_netdev_port **portp) | |
59e6d833 | 980 | OVS_REQUIRES(dp->port_mutex) |
72865317 BP |
981 | { |
982 | struct dp_netdev_port *port; | |
983 | ||
a532e683 | 984 | CMAP_FOR_EACH (port, node, &dp->ports) { |
3efb6063 | 985 | if (!strcmp(netdev_get_name(port->netdev), devname)) { |
72865317 BP |
986 | *portp = port; |
987 | return 0; | |
988 | } | |
989 | } | |
990 | return ENOENT; | |
991 | } | |
992 | ||
65f13b50 AW |
993 | static int |
994 | get_n_pmd_threads_on_numa(struct dp_netdev *dp, int numa_id) | |
995 | { | |
996 | struct dp_netdev_pmd_thread *pmd; | |
997 | int n_pmds = 0; | |
998 | ||
999 | CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { | |
1000 | if (pmd->numa_id == numa_id) { | |
1001 | n_pmds++; | |
1002 | } | |
1003 | } | |
1004 | ||
1005 | return n_pmds; | |
1006 | } | |
1007 | ||
1008 | /* Returns 'true' if there is a port with pmd netdev and the netdev | |
1009 | * is on numa node 'numa_id'. */ | |
1010 | static bool | |
1011 | has_pmd_port_for_numa(struct dp_netdev *dp, int numa_id) | |
1012 | { | |
1013 | struct dp_netdev_port *port; | |
1014 | ||
1015 | CMAP_FOR_EACH (port, node, &dp->ports) { | |
1016 | if (netdev_is_pmd(port->netdev) | |
1017 | && netdev_get_numa_id(port->netdev) == numa_id) { | |
1018 | return true; | |
1019 | } | |
1020 | } | |
1021 | ||
1022 | return false; | |
1023 | } | |
1024 | ||
1025 | ||
c40b890f BP |
1026 | static void |
1027 | do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port) | |
59e6d833 | 1028 | OVS_REQUIRES(dp->port_mutex) |
72865317 | 1029 | { |
c40b890f | 1030 | cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no)); |
d33ed218 | 1031 | seq_change(dp->port_seq); |
e4cfed38 | 1032 | if (netdev_is_pmd(port->netdev)) { |
65f13b50 AW |
1033 | int numa_id = netdev_get_numa_id(port->netdev); |
1034 | ||
1035 | /* If there is no netdev on the numa node, deletes the pmd threads | |
1036 | * for that numa. Else, just reloads the queues. */ | |
1037 | if (!has_pmd_port_for_numa(dp, numa_id)) { | |
1038 | dp_netdev_del_pmds_on_numa(dp, numa_id); | |
1039 | } | |
1040 | dp_netdev_reload_pmds(dp); | |
e4cfed38 | 1041 | } |
72865317 | 1042 | |
b284085e | 1043 | port_unref(port); |
72865317 BP |
1044 | } |
1045 | ||
1046 | static void | |
4c738a8d BP |
1047 | answer_port_query(const struct dp_netdev_port *port, |
1048 | struct dpif_port *dpif_port) | |
72865317 | 1049 | { |
3efb6063 | 1050 | dpif_port->name = xstrdup(netdev_get_name(port->netdev)); |
0cbfe35d | 1051 | dpif_port->type = xstrdup(port->type); |
4c738a8d | 1052 | dpif_port->port_no = port->port_no; |
72865317 BP |
1053 | } |
1054 | ||
1055 | static int | |
4e022ec0 | 1056 | dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no, |
4c738a8d | 1057 | struct dpif_port *dpif_port) |
72865317 BP |
1058 | { |
1059 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
1060 | struct dp_netdev_port *port; | |
1061 | int error; | |
1062 | ||
1063 | error = get_port_by_number(dp, port_no, &port); | |
4afba28d | 1064 | if (!error && dpif_port) { |
4c738a8d | 1065 | answer_port_query(port, dpif_port); |
72865317 | 1066 | } |
5279f8fd | 1067 | |
72865317 BP |
1068 | return error; |
1069 | } | |
1070 | ||
1071 | static int | |
1072 | dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname, | |
4c738a8d | 1073 | struct dpif_port *dpif_port) |
72865317 BP |
1074 | { |
1075 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
1076 | struct dp_netdev_port *port; | |
1077 | int error; | |
1078 | ||
59e6d833 | 1079 | ovs_mutex_lock(&dp->port_mutex); |
72865317 | 1080 | error = get_port_by_name(dp, devname, &port); |
4afba28d | 1081 | if (!error && dpif_port) { |
4c738a8d | 1082 | answer_port_query(port, dpif_port); |
72865317 | 1083 | } |
59e6d833 | 1084 | ovs_mutex_unlock(&dp->port_mutex); |
5279f8fd | 1085 | |
72865317 BP |
1086 | return error; |
1087 | } | |
1088 | ||
61e7deb1 BP |
1089 | static void |
1090 | dp_netdev_flow_free(struct dp_netdev_flow *flow) | |
1091 | { | |
1092 | struct dp_netdev_flow_stats *bucket; | |
1093 | size_t i; | |
1094 | ||
1095 | OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &flow->stats) { | |
1096 | ovs_mutex_destroy(&bucket->mutex); | |
1097 | free_cacheline(bucket); | |
1098 | } | |
1099 | ovsthread_stats_destroy(&flow->stats); | |
1100 | ||
1101 | cls_rule_destroy(CONST_CAST(struct cls_rule *, &flow->cr)); | |
1102 | dp_netdev_actions_free(dp_netdev_flow_get_actions(flow)); | |
61e7deb1 BP |
1103 | free(flow); |
1104 | } | |
1105 | ||
ed79f89a DDP |
1106 | static void dp_netdev_flow_unref(struct dp_netdev_flow *flow) |
1107 | { | |
1108 | if (ovs_refcount_unref_relaxed(&flow->ref_cnt) == 1) { | |
1109 | ovsrcu_postpone(dp_netdev_flow_free, flow); | |
1110 | } | |
1111 | } | |
1112 | ||
72865317 | 1113 | static void |
8a4e3a85 | 1114 | dp_netdev_remove_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow) |
8a4e3a85 | 1115 | OVS_REQUIRES(dp->flow_mutex) |
72865317 | 1116 | { |
8a4e3a85 | 1117 | struct cls_rule *cr = CONST_CAST(struct cls_rule *, &flow->cr); |
9f361d6b | 1118 | struct cmap_node *node = CONST_CAST(struct cmap_node *, &flow->node); |
2c0ea78f | 1119 | |
8a4e3a85 | 1120 | classifier_remove(&dp->cls, cr); |
9f361d6b | 1121 | cmap_remove(&dp->flow_table, node, flow_hash(&flow->flow, 0)); |
9bbf1c3d | 1122 | flow->dead = true; |
ed79f89a DDP |
1123 | |
1124 | dp_netdev_flow_unref(flow); | |
72865317 BP |
1125 | } |
1126 | ||
1127 | static void | |
1128 | dp_netdev_flow_flush(struct dp_netdev *dp) | |
1129 | { | |
78c8df12 | 1130 | struct dp_netdev_flow *netdev_flow; |
72865317 | 1131 | |
8a4e3a85 | 1132 | ovs_mutex_lock(&dp->flow_mutex); |
6bc3bb82 | 1133 | CMAP_FOR_EACH (netdev_flow, node, &dp->flow_table) { |
8a4e3a85 | 1134 | dp_netdev_remove_flow(dp, netdev_flow); |
72865317 | 1135 | } |
8a4e3a85 | 1136 | ovs_mutex_unlock(&dp->flow_mutex); |
72865317 BP |
1137 | } |
1138 | ||
1139 | static int | |
1140 | dpif_netdev_flow_flush(struct dpif *dpif) | |
1141 | { | |
1142 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
5279f8fd | 1143 | |
72865317 BP |
1144 | dp_netdev_flow_flush(dp); |
1145 | return 0; | |
1146 | } | |
1147 | ||
b0ec0f27 | 1148 | struct dp_netdev_port_state { |
59e6d833 | 1149 | struct cmap_position position; |
4c738a8d | 1150 | char *name; |
b0ec0f27 BP |
1151 | }; |
1152 | ||
1153 | static int | |
1154 | dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep) | |
1155 | { | |
1156 | *statep = xzalloc(sizeof(struct dp_netdev_port_state)); | |
1157 | return 0; | |
1158 | } | |
1159 | ||
72865317 | 1160 | static int |
b0ec0f27 | 1161 | dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_, |
4c738a8d | 1162 | struct dpif_port *dpif_port) |
72865317 | 1163 | { |
b0ec0f27 | 1164 | struct dp_netdev_port_state *state = state_; |
72865317 | 1165 | struct dp_netdev *dp = get_dp_netdev(dpif); |
59e6d833 | 1166 | struct cmap_node *node; |
ff073a71 | 1167 | int retval; |
72865317 | 1168 | |
59e6d833 | 1169 | node = cmap_next_position(&dp->ports, &state->position); |
ff073a71 BP |
1170 | if (node) { |
1171 | struct dp_netdev_port *port; | |
5279f8fd | 1172 | |
ff073a71 BP |
1173 | port = CONTAINER_OF(node, struct dp_netdev_port, node); |
1174 | ||
1175 | free(state->name); | |
1176 | state->name = xstrdup(netdev_get_name(port->netdev)); | |
1177 | dpif_port->name = state->name; | |
1178 | dpif_port->type = port->type; | |
1179 | dpif_port->port_no = port->port_no; | |
1180 | ||
1181 | retval = 0; | |
1182 | } else { | |
1183 | retval = EOF; | |
72865317 | 1184 | } |
5279f8fd | 1185 | |
ff073a71 | 1186 | return retval; |
b0ec0f27 BP |
1187 | } |
1188 | ||
1189 | static int | |
4c738a8d | 1190 | dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) |
b0ec0f27 | 1191 | { |
4c738a8d BP |
1192 | struct dp_netdev_port_state *state = state_; |
1193 | free(state->name); | |
b0ec0f27 BP |
1194 | free(state); |
1195 | return 0; | |
72865317 BP |
1196 | } |
1197 | ||
1198 | static int | |
67a4917b | 1199 | dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED) |
72865317 BP |
1200 | { |
1201 | struct dpif_netdev *dpif = dpif_netdev_cast(dpif_); | |
d33ed218 | 1202 | uint64_t new_port_seq; |
5279f8fd BP |
1203 | int error; |
1204 | ||
d33ed218 BP |
1205 | new_port_seq = seq_read(dpif->dp->port_seq); |
1206 | if (dpif->last_port_seq != new_port_seq) { | |
1207 | dpif->last_port_seq = new_port_seq; | |
5279f8fd | 1208 | error = ENOBUFS; |
72865317 | 1209 | } else { |
5279f8fd | 1210 | error = EAGAIN; |
72865317 | 1211 | } |
5279f8fd BP |
1212 | |
1213 | return error; | |
72865317 BP |
1214 | } |
1215 | ||
1216 | static void | |
1217 | dpif_netdev_port_poll_wait(const struct dpif *dpif_) | |
1218 | { | |
1219 | struct dpif_netdev *dpif = dpif_netdev_cast(dpif_); | |
5279f8fd | 1220 | |
d33ed218 | 1221 | seq_wait(dpif->dp->port_seq, dpif->last_port_seq); |
8a4e3a85 BP |
1222 | } |
1223 | ||
1224 | static struct dp_netdev_flow * | |
1225 | dp_netdev_flow_cast(const struct cls_rule *cr) | |
1226 | { | |
1227 | return cr ? CONTAINER_OF(cr, struct dp_netdev_flow, cr) : NULL; | |
72865317 BP |
1228 | } |
1229 | ||
9bbf1c3d DDP |
1230 | static bool dp_netdev_flow_ref(struct dp_netdev_flow *flow) |
1231 | { | |
1232 | return ovs_refcount_try_ref_rcu(&flow->ref_cnt); | |
1233 | } | |
1234 | ||
79df317f DDP |
1235 | /* netdev_flow_key utilities. |
1236 | * | |
1237 | * netdev_flow_key is basically a miniflow. We use these functions | |
1238 | * (netdev_flow_key_clone, netdev_flow_key_equal, ...) instead of the miniflow | |
1239 | * functions (miniflow_clone_inline, miniflow_equal, ...), because: | |
1240 | * | |
1241 | * - Since we are dealing exclusively with miniflows created by | |
1242 | * miniflow_extract(), if the map is different the miniflow is different. | |
1243 | * Therefore we can be faster by comparing the map and the miniflow in a | |
1244 | * single memcmp(). | |
1245 | * _ netdev_flow_key's miniflow has always inline values. | |
1246 | * - These functions can be inlined by the compiler. | |
1247 | * | |
1248 | * The following assertions make sure that what we're doing with miniflow is | |
1249 | * safe | |
1250 | */ | |
1251 | BUILD_ASSERT_DECL(offsetof(struct miniflow, inline_values) | |
1252 | == sizeof(uint64_t)); | |
1253 | BUILD_ASSERT_DECL(offsetof(struct netdev_flow_key, flow) == 0); | |
1254 | ||
1255 | static inline struct netdev_flow_key * | |
1256 | miniflow_to_netdev_flow_key(const struct miniflow *mf) | |
1257 | { | |
1258 | return (struct netdev_flow_key *) CONST_CAST(struct miniflow *, mf); | |
1259 | } | |
1260 | ||
1261 | /* Given the number of bits set in the miniflow map, returns the size of the | |
1262 | * netdev_flow key */ | |
1263 | static inline uint32_t | |
1264 | netdev_flow_key_size(uint32_t flow_u32s) | |
1265 | { | |
1266 | return MINIFLOW_VALUES_SIZE(flow_u32s) | |
1267 | + offsetof(struct miniflow, inline_values); | |
1268 | } | |
1269 | ||
1270 | /* Used to compare 'netdev_flow_key's (miniflows) in the exact match cache. */ | |
1271 | static inline bool | |
1272 | netdev_flow_key_equal(const struct netdev_flow_key *a, | |
0023a1cb DDP |
1273 | const struct netdev_flow_key *b, |
1274 | uint32_t size) | |
79df317f | 1275 | { |
0023a1cb | 1276 | return !memcmp(a, b, size); |
79df317f DDP |
1277 | } |
1278 | ||
1279 | static inline void | |
1280 | netdev_flow_key_clone(struct netdev_flow_key *dst, | |
1281 | const struct netdev_flow_key *src, | |
1282 | uint32_t size) | |
1283 | { | |
0023a1cb | 1284 | memcpy(dst, src, size); |
79df317f DDP |
1285 | } |
1286 | ||
9bbf1c3d DDP |
1287 | static inline bool |
1288 | emc_entry_alive(struct emc_entry *ce) | |
1289 | { | |
1290 | return ce->flow && !ce->flow->dead; | |
1291 | } | |
1292 | ||
1293 | static void | |
1294 | emc_clear_entry(struct emc_entry *ce) | |
1295 | { | |
1296 | if (ce->flow) { | |
1297 | dp_netdev_flow_unref(ce->flow); | |
1298 | ce->flow = NULL; | |
1299 | } | |
1300 | } | |
1301 | ||
1302 | static inline void | |
1303 | emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow, | |
79df317f | 1304 | const struct netdev_flow_key *mf, uint32_t hash) |
9bbf1c3d DDP |
1305 | { |
1306 | if (ce->flow != flow) { | |
1307 | if (ce->flow) { | |
1308 | dp_netdev_flow_unref(ce->flow); | |
1309 | } | |
1310 | ||
1311 | if (dp_netdev_flow_ref(flow)) { | |
1312 | ce->flow = flow; | |
1313 | } else { | |
1314 | ce->flow = NULL; | |
1315 | } | |
1316 | } | |
1317 | if (mf) { | |
0023a1cb DDP |
1318 | uint32_t mf_len = netdev_flow_key_size(count_1bits(mf->flow.map)); |
1319 | ||
1320 | netdev_flow_key_clone(&ce->mf, mf, mf_len); | |
9bbf1c3d | 1321 | ce->hash = hash; |
0023a1cb | 1322 | ce->mf_len = mf_len; |
9bbf1c3d DDP |
1323 | } |
1324 | } | |
1325 | ||
1326 | static inline void | |
1327 | emc_insert(struct emc_cache *cache, const struct miniflow *mf, uint32_t hash, | |
1328 | struct dp_netdev_flow *flow) | |
1329 | { | |
1330 | struct emc_entry *to_be_replaced = NULL; | |
1331 | struct emc_entry *current_entry; | |
1332 | ||
1333 | EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, hash) { | |
1334 | if (current_entry->hash == hash | |
79df317f | 1335 | && netdev_flow_key_equal(¤t_entry->mf, |
0023a1cb DDP |
1336 | miniflow_to_netdev_flow_key(mf), |
1337 | current_entry->mf_len)) { | |
9bbf1c3d DDP |
1338 | |
1339 | /* We found the entry with the 'mf' miniflow */ | |
1340 | emc_change_entry(current_entry, flow, NULL, 0); | |
1341 | return; | |
1342 | } | |
1343 | ||
1344 | /* Replacement policy: put the flow in an empty (not alive) entry, or | |
1345 | * in the first entry where it can be */ | |
1346 | if (!to_be_replaced | |
1347 | || (emc_entry_alive(to_be_replaced) | |
1348 | && !emc_entry_alive(current_entry)) | |
1349 | || current_entry->hash < to_be_replaced->hash) { | |
1350 | to_be_replaced = current_entry; | |
1351 | } | |
1352 | } | |
1353 | /* We didn't find the miniflow in the cache. | |
1354 | * The 'to_be_replaced' entry is where the new flow will be stored */ | |
1355 | ||
79df317f DDP |
1356 | emc_change_entry(to_be_replaced, flow, miniflow_to_netdev_flow_key(mf), |
1357 | hash); | |
9bbf1c3d DDP |
1358 | } |
1359 | ||
1360 | static inline struct dp_netdev_flow * | |
1361 | emc_lookup(struct emc_cache *cache, const struct miniflow *mf, uint32_t hash) | |
1362 | { | |
1363 | struct emc_entry *current_entry; | |
1364 | ||
1365 | EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, hash) { | |
1366 | if (current_entry->hash == hash && emc_entry_alive(current_entry) | |
79df317f | 1367 | && netdev_flow_key_equal(¤t_entry->mf, |
0023a1cb DDP |
1368 | miniflow_to_netdev_flow_key(mf), |
1369 | current_entry->mf_len)) { | |
9bbf1c3d DDP |
1370 | |
1371 | /* We found the entry with the 'mf' miniflow */ | |
1372 | return current_entry->flow; | |
1373 | } | |
1374 | } | |
1375 | ||
1376 | return NULL; | |
1377 | } | |
1378 | ||
72865317 | 1379 | static struct dp_netdev_flow * |
4f150744 | 1380 | dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct miniflow *key) |
2c0ea78f | 1381 | { |
8a4e3a85 | 1382 | struct dp_netdev_flow *netdev_flow; |
4f150744 | 1383 | struct cls_rule *rule; |
2c0ea78f | 1384 | |
b7648634 | 1385 | classifier_lookup_miniflow_batch(&dp->cls, &key, &rule, 1); |
4f150744 | 1386 | netdev_flow = dp_netdev_flow_cast(rule); |
2c0ea78f | 1387 | |
8a4e3a85 | 1388 | return netdev_flow; |
2c0ea78f GS |
1389 | } |
1390 | ||
1391 | static struct dp_netdev_flow * | |
1392 | dp_netdev_find_flow(const struct dp_netdev *dp, const struct flow *flow) | |
72865317 | 1393 | { |
1763b4b8 | 1394 | struct dp_netdev_flow *netdev_flow; |
72865317 | 1395 | |
9f361d6b | 1396 | CMAP_FOR_EACH_WITH_HASH (netdev_flow, node, flow_hash(flow, 0), |
1763b4b8 | 1397 | &dp->flow_table) { |
2c0ea78f | 1398 | if (flow_equal(&netdev_flow->flow, flow)) { |
61e7deb1 | 1399 | return netdev_flow; |
72865317 BP |
1400 | } |
1401 | } | |
8a4e3a85 | 1402 | |
72865317 BP |
1403 | return NULL; |
1404 | } | |
1405 | ||
1406 | static void | |
6fe09f8c | 1407 | get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow, |
1763b4b8 | 1408 | struct dpif_flow_stats *stats) |
feebdea2 | 1409 | { |
679ba04c BP |
1410 | struct dp_netdev_flow_stats *bucket; |
1411 | size_t i; | |
1412 | ||
1413 | memset(stats, 0, sizeof *stats); | |
1414 | OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) { | |
1415 | ovs_mutex_lock(&bucket->mutex); | |
1416 | stats->n_packets += bucket->packet_count; | |
1417 | stats->n_bytes += bucket->byte_count; | |
1418 | stats->used = MAX(stats->used, bucket->used); | |
1419 | stats->tcp_flags |= bucket->tcp_flags; | |
1420 | ovs_mutex_unlock(&bucket->mutex); | |
1421 | } | |
72865317 BP |
1422 | } |
1423 | ||
6fe09f8c JS |
1424 | static void |
1425 | dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow *netdev_flow, | |
1426 | struct ofpbuf *buffer, struct dpif_flow *flow) | |
1427 | { | |
1428 | struct flow_wildcards wc; | |
1429 | struct dp_netdev_actions *actions; | |
1430 | ||
1431 | minimask_expand(&netdev_flow->cr.match.mask, &wc); | |
1432 | odp_flow_key_from_mask(buffer, &wc.masks, &netdev_flow->flow, | |
1433 | odp_to_u32(wc.masks.in_port.odp_port), | |
1434 | SIZE_MAX, true); | |
1435 | flow->mask = ofpbuf_data(buffer); | |
1436 | flow->mask_len = ofpbuf_size(buffer); | |
1437 | ||
1438 | actions = dp_netdev_flow_get_actions(netdev_flow); | |
1439 | flow->actions = actions->actions; | |
1440 | flow->actions_len = actions->size; | |
1441 | ||
1442 | get_dpif_flow_stats(netdev_flow, &flow->stats); | |
1443 | } | |
1444 | ||
36956a7d | 1445 | static int |
8c301900 JR |
1446 | dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len, |
1447 | const struct nlattr *mask_key, | |
1448 | uint32_t mask_key_len, const struct flow *flow, | |
1449 | struct flow *mask) | |
1450 | { | |
1451 | if (mask_key_len) { | |
80e44883 BP |
1452 | enum odp_key_fitness fitness; |
1453 | ||
1454 | fitness = odp_flow_key_to_mask(mask_key, mask_key_len, mask, flow); | |
1455 | if (fitness) { | |
8c301900 JR |
1456 | /* This should not happen: it indicates that |
1457 | * odp_flow_key_from_mask() and odp_flow_key_to_mask() | |
1458 | * disagree on the acceptable form of a mask. Log the problem | |
1459 | * as an error, with enough details to enable debugging. */ | |
1460 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); | |
1461 | ||
1462 | if (!VLOG_DROP_ERR(&rl)) { | |
1463 | struct ds s; | |
1464 | ||
1465 | ds_init(&s); | |
1466 | odp_flow_format(key, key_len, mask_key, mask_key_len, NULL, &s, | |
1467 | true); | |
80e44883 BP |
1468 | VLOG_ERR("internal error parsing flow mask %s (%s)", |
1469 | ds_cstr(&s), odp_key_fitness_to_string(fitness)); | |
8c301900 JR |
1470 | ds_destroy(&s); |
1471 | } | |
1472 | ||
1473 | return EINVAL; | |
1474 | } | |
8c301900 JR |
1475 | } else { |
1476 | enum mf_field_id id; | |
1477 | /* No mask key, unwildcard everything except fields whose | |
1478 | * prerequisities are not met. */ | |
1479 | memset(mask, 0x0, sizeof *mask); | |
1480 | ||
1481 | for (id = 0; id < MFF_N_IDS; ++id) { | |
1482 | /* Skip registers and metadata. */ | |
1483 | if (!(id >= MFF_REG0 && id < MFF_REG0 + FLOW_N_REGS) | |
1484 | && id != MFF_METADATA) { | |
1485 | const struct mf_field *mf = mf_from_id(id); | |
1486 | if (mf_are_prereqs_ok(mf, flow)) { | |
1487 | mf_mask_field(mf, mask); | |
1488 | } | |
1489 | } | |
1490 | } | |
1491 | } | |
1492 | ||
f3f750e5 BP |
1493 | /* Force unwildcard the in_port. |
1494 | * | |
1495 | * We need to do this even in the case where we unwildcard "everything" | |
1496 | * above because "everything" only includes the 16-bit OpenFlow port number | |
1497 | * mask->in_port.ofp_port, which only covers half of the 32-bit datapath | |
1498 | * port number mask->in_port.odp_port. */ | |
1499 | mask->in_port.odp_port = u32_to_odp(UINT32_MAX); | |
1500 | ||
8c301900 JR |
1501 | return 0; |
1502 | } | |
1503 | ||
1504 | static int | |
1505 | dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len, | |
1506 | struct flow *flow) | |
36956a7d | 1507 | { |
586ddea5 BP |
1508 | odp_port_t in_port; |
1509 | ||
8c301900 | 1510 | if (odp_flow_key_to_flow(key, key_len, flow)) { |
36956a7d | 1511 | /* This should not happen: it indicates that odp_flow_key_from_flow() |
8c301900 JR |
1512 | * and odp_flow_key_to_flow() disagree on the acceptable form of a |
1513 | * flow. Log the problem as an error, with enough details to enable | |
1514 | * debugging. */ | |
36956a7d BP |
1515 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); |
1516 | ||
1517 | if (!VLOG_DROP_ERR(&rl)) { | |
1518 | struct ds s; | |
1519 | ||
1520 | ds_init(&s); | |
8c301900 | 1521 | odp_flow_format(key, key_len, NULL, 0, NULL, &s, true); |
36956a7d BP |
1522 | VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s)); |
1523 | ds_destroy(&s); | |
1524 | } | |
1525 | ||
1526 | return EINVAL; | |
1527 | } | |
1528 | ||
586ddea5 BP |
1529 | in_port = flow->in_port.odp_port; |
1530 | if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) { | |
18886b60 BP |
1531 | return EINVAL; |
1532 | } | |
1533 | ||
36956a7d BP |
1534 | return 0; |
1535 | } | |
1536 | ||
72865317 | 1537 | static int |
6fe09f8c | 1538 | dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get) |
72865317 BP |
1539 | { |
1540 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
1763b4b8 | 1541 | struct dp_netdev_flow *netdev_flow; |
bc4a05c6 BP |
1542 | struct flow key; |
1543 | int error; | |
36956a7d | 1544 | |
6fe09f8c | 1545 | error = dpif_netdev_flow_from_nlattrs(get->key, get->key_len, &key); |
bc4a05c6 BP |
1546 | if (error) { |
1547 | return error; | |
1548 | } | |
14608a15 | 1549 | |
2c0ea78f | 1550 | netdev_flow = dp_netdev_find_flow(dp, &key); |
8a4e3a85 | 1551 | |
1763b4b8 | 1552 | if (netdev_flow) { |
6fe09f8c | 1553 | dp_netdev_flow_to_dpif_flow(netdev_flow, get->buffer, get->flow); |
61e7deb1 | 1554 | } else { |
5279f8fd | 1555 | error = ENOENT; |
72865317 | 1556 | } |
bc4a05c6 | 1557 | |
5279f8fd | 1558 | return error; |
72865317 BP |
1559 | } |
1560 | ||
72865317 | 1561 | static int |
ae2ceebd EJ |
1562 | dp_netdev_flow_add(struct dp_netdev *dp, struct match *match, |
1563 | const struct nlattr *actions, size_t actions_len) | |
8a4e3a85 | 1564 | OVS_REQUIRES(dp->flow_mutex) |
72865317 | 1565 | { |
1763b4b8 | 1566 | struct dp_netdev_flow *netdev_flow; |
72865317 | 1567 | |
1763b4b8 | 1568 | netdev_flow = xzalloc(sizeof *netdev_flow); |
ae2ceebd | 1569 | *CONST_CAST(struct flow *, &netdev_flow->flow) = match->flow; |
8a4e3a85 | 1570 | |
ed79f89a DDP |
1571 | ovs_refcount_init(&netdev_flow->ref_cnt); |
1572 | ||
679ba04c BP |
1573 | ovsthread_stats_init(&netdev_flow->stats); |
1574 | ||
61e7deb1 BP |
1575 | ovsrcu_set(&netdev_flow->actions, |
1576 | dp_netdev_actions_create(actions, actions_len)); | |
2c0ea78f | 1577 | |
8a4e3a85 | 1578 | cls_rule_init(CONST_CAST(struct cls_rule *, &netdev_flow->cr), |
ae2ceebd | 1579 | match, NETDEV_RULE_PRIORITY); |
9f361d6b JR |
1580 | cmap_insert(&dp->flow_table, |
1581 | CONST_CAST(struct cmap_node *, &netdev_flow->node), | |
ae2ceebd | 1582 | flow_hash(&match->flow, 0)); |
8a4e3a85 BP |
1583 | classifier_insert(&dp->cls, |
1584 | CONST_CAST(struct cls_rule *, &netdev_flow->cr)); | |
72865317 | 1585 | |
623540e4 EJ |
1586 | if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) { |
1587 | struct ds ds = DS_EMPTY_INITIALIZER; | |
1588 | ||
1589 | ds_put_cstr(&ds, "flow_add: "); | |
1590 | match_format(match, &ds, OFP_DEFAULT_PRIORITY); | |
1591 | ds_put_cstr(&ds, ", actions:"); | |
1592 | format_odp_actions(&ds, actions, actions_len); | |
1593 | ||
1594 | VLOG_DBG_RL(&upcall_rl, "%s", ds_cstr(&ds)); | |
1595 | ||
1596 | ds_destroy(&ds); | |
1597 | } | |
1598 | ||
72865317 BP |
1599 | return 0; |
1600 | } | |
1601 | ||
1602 | static void | |
1763b4b8 | 1603 | clear_stats(struct dp_netdev_flow *netdev_flow) |
72865317 | 1604 | { |
679ba04c BP |
1605 | struct dp_netdev_flow_stats *bucket; |
1606 | size_t i; | |
1607 | ||
1608 | OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) { | |
1609 | ovs_mutex_lock(&bucket->mutex); | |
1610 | bucket->used = 0; | |
1611 | bucket->packet_count = 0; | |
1612 | bucket->byte_count = 0; | |
1613 | bucket->tcp_flags = 0; | |
1614 | ovs_mutex_unlock(&bucket->mutex); | |
1615 | } | |
72865317 BP |
1616 | } |
1617 | ||
1618 | static int | |
89625d1e | 1619 | dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put) |
72865317 BP |
1620 | { |
1621 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
1763b4b8 | 1622 | struct dp_netdev_flow *netdev_flow; |
4f150744 | 1623 | struct miniflow miniflow; |
ae2ceebd | 1624 | struct match match; |
36956a7d BP |
1625 | int error; |
1626 | ||
ae2ceebd | 1627 | error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &match.flow); |
8c301900 JR |
1628 | if (error) { |
1629 | return error; | |
1630 | } | |
1631 | error = dpif_netdev_mask_from_nlattrs(put->key, put->key_len, | |
1632 | put->mask, put->mask_len, | |
ae2ceebd | 1633 | &match.flow, &match.wc.masks); |
36956a7d BP |
1634 | if (error) { |
1635 | return error; | |
1636 | } | |
ae2ceebd | 1637 | miniflow_init(&miniflow, &match.flow); |
72865317 | 1638 | |
8a4e3a85 | 1639 | ovs_mutex_lock(&dp->flow_mutex); |
4f150744 | 1640 | netdev_flow = dp_netdev_lookup_flow(dp, &miniflow); |
1763b4b8 | 1641 | if (!netdev_flow) { |
89625d1e | 1642 | if (put->flags & DPIF_FP_CREATE) { |
9f361d6b | 1643 | if (cmap_count(&dp->flow_table) < MAX_FLOWS) { |
89625d1e BP |
1644 | if (put->stats) { |
1645 | memset(put->stats, 0, sizeof *put->stats); | |
feebdea2 | 1646 | } |
ae2ceebd | 1647 | error = dp_netdev_flow_add(dp, &match, put->actions, |
5279f8fd | 1648 | put->actions_len); |
72865317 | 1649 | } else { |
5279f8fd | 1650 | error = EFBIG; |
72865317 BP |
1651 | } |
1652 | } else { | |
5279f8fd | 1653 | error = ENOENT; |
72865317 BP |
1654 | } |
1655 | } else { | |
2c0ea78f | 1656 | if (put->flags & DPIF_FP_MODIFY |
ae2ceebd | 1657 | && flow_equal(&match.flow, &netdev_flow->flow)) { |
8a4e3a85 BP |
1658 | struct dp_netdev_actions *new_actions; |
1659 | struct dp_netdev_actions *old_actions; | |
1660 | ||
1661 | new_actions = dp_netdev_actions_create(put->actions, | |
1662 | put->actions_len); | |
1663 | ||
61e7deb1 BP |
1664 | old_actions = dp_netdev_flow_get_actions(netdev_flow); |
1665 | ovsrcu_set(&netdev_flow->actions, new_actions); | |
679ba04c | 1666 | |
a84cb64a BP |
1667 | if (put->stats) { |
1668 | get_dpif_flow_stats(netdev_flow, put->stats); | |
1669 | } | |
1670 | if (put->flags & DPIF_FP_ZERO_STATS) { | |
1671 | clear_stats(netdev_flow); | |
72865317 | 1672 | } |
8a4e3a85 | 1673 | |
61e7deb1 | 1674 | ovsrcu_postpone(dp_netdev_actions_free, old_actions); |
2c0ea78f | 1675 | } else if (put->flags & DPIF_FP_CREATE) { |
5279f8fd | 1676 | error = EEXIST; |
2c0ea78f GS |
1677 | } else { |
1678 | /* Overlapping flow. */ | |
1679 | error = EINVAL; | |
72865317 BP |
1680 | } |
1681 | } | |
8a4e3a85 | 1682 | ovs_mutex_unlock(&dp->flow_mutex); |
5715de14 | 1683 | miniflow_destroy(&miniflow); |
5279f8fd BP |
1684 | |
1685 | return error; | |
72865317 BP |
1686 | } |
1687 | ||
72865317 | 1688 | static int |
b99d3cee | 1689 | dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del) |
72865317 BP |
1690 | { |
1691 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
1763b4b8 | 1692 | struct dp_netdev_flow *netdev_flow; |
14608a15 | 1693 | struct flow key; |
36956a7d BP |
1694 | int error; |
1695 | ||
b99d3cee | 1696 | error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key); |
36956a7d BP |
1697 | if (error) { |
1698 | return error; | |
1699 | } | |
72865317 | 1700 | |
8a4e3a85 | 1701 | ovs_mutex_lock(&dp->flow_mutex); |
2c0ea78f | 1702 | netdev_flow = dp_netdev_find_flow(dp, &key); |
1763b4b8 | 1703 | if (netdev_flow) { |
b99d3cee | 1704 | if (del->stats) { |
1763b4b8 | 1705 | get_dpif_flow_stats(netdev_flow, del->stats); |
feebdea2 | 1706 | } |
8a4e3a85 | 1707 | dp_netdev_remove_flow(dp, netdev_flow); |
72865317 | 1708 | } else { |
5279f8fd | 1709 | error = ENOENT; |
72865317 | 1710 | } |
8a4e3a85 | 1711 | ovs_mutex_unlock(&dp->flow_mutex); |
5279f8fd BP |
1712 | |
1713 | return error; | |
72865317 BP |
1714 | } |
1715 | ||
ac64794a BP |
1716 | struct dpif_netdev_flow_dump { |
1717 | struct dpif_flow_dump up; | |
9f361d6b | 1718 | struct cmap_position pos; |
d2ad7ef1 JS |
1719 | int status; |
1720 | struct ovs_mutex mutex; | |
e723fd32 JS |
1721 | }; |
1722 | ||
ac64794a BP |
1723 | static struct dpif_netdev_flow_dump * |
1724 | dpif_netdev_flow_dump_cast(struct dpif_flow_dump *dump) | |
72865317 | 1725 | { |
ac64794a | 1726 | return CONTAINER_OF(dump, struct dpif_netdev_flow_dump, up); |
e723fd32 JS |
1727 | } |
1728 | ||
ac64794a BP |
1729 | static struct dpif_flow_dump * |
1730 | dpif_netdev_flow_dump_create(const struct dpif *dpif_) | |
e723fd32 | 1731 | { |
ac64794a | 1732 | struct dpif_netdev_flow_dump *dump; |
e723fd32 | 1733 | |
ac64794a BP |
1734 | dump = xmalloc(sizeof *dump); |
1735 | dpif_flow_dump_init(&dump->up, dpif_); | |
9f361d6b | 1736 | memset(&dump->pos, 0, sizeof dump->pos); |
ac64794a BP |
1737 | dump->status = 0; |
1738 | ovs_mutex_init(&dump->mutex); | |
1739 | ||
1740 | return &dump->up; | |
e723fd32 JS |
1741 | } |
1742 | ||
1743 | static int | |
ac64794a | 1744 | dpif_netdev_flow_dump_destroy(struct dpif_flow_dump *dump_) |
e723fd32 | 1745 | { |
ac64794a | 1746 | struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_); |
e723fd32 | 1747 | |
ac64794a BP |
1748 | ovs_mutex_destroy(&dump->mutex); |
1749 | free(dump); | |
704a1e09 BP |
1750 | return 0; |
1751 | } | |
1752 | ||
ac64794a BP |
1753 | struct dpif_netdev_flow_dump_thread { |
1754 | struct dpif_flow_dump_thread up; | |
1755 | struct dpif_netdev_flow_dump *dump; | |
8bb113da RW |
1756 | struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH]; |
1757 | struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH]; | |
ac64794a BP |
1758 | }; |
1759 | ||
1760 | static struct dpif_netdev_flow_dump_thread * | |
1761 | dpif_netdev_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread) | |
1762 | { | |
1763 | return CONTAINER_OF(thread, struct dpif_netdev_flow_dump_thread, up); | |
1764 | } | |
1765 | ||
1766 | static struct dpif_flow_dump_thread * | |
1767 | dpif_netdev_flow_dump_thread_create(struct dpif_flow_dump *dump_) | |
1768 | { | |
1769 | struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_); | |
1770 | struct dpif_netdev_flow_dump_thread *thread; | |
1771 | ||
1772 | thread = xmalloc(sizeof *thread); | |
1773 | dpif_flow_dump_thread_init(&thread->up, &dump->up); | |
1774 | thread->dump = dump; | |
1775 | return &thread->up; | |
1776 | } | |
1777 | ||
1778 | static void | |
1779 | dpif_netdev_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_) | |
1780 | { | |
1781 | struct dpif_netdev_flow_dump_thread *thread | |
1782 | = dpif_netdev_flow_dump_thread_cast(thread_); | |
1783 | ||
1784 | free(thread); | |
1785 | } | |
1786 | ||
704a1e09 | 1787 | static int |
ac64794a | 1788 | dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_, |
8bb113da | 1789 | struct dpif_flow *flows, int max_flows) |
ac64794a BP |
1790 | { |
1791 | struct dpif_netdev_flow_dump_thread *thread | |
1792 | = dpif_netdev_flow_dump_thread_cast(thread_); | |
1793 | struct dpif_netdev_flow_dump *dump = thread->dump; | |
1794 | struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif); | |
8bb113da | 1795 | struct dp_netdev_flow *netdev_flows[FLOW_DUMP_MAX_BATCH]; |
ac64794a | 1796 | struct dp_netdev *dp = get_dp_netdev(&dpif->dpif); |
8bb113da RW |
1797 | int n_flows = 0; |
1798 | int i; | |
14608a15 | 1799 | |
ac64794a | 1800 | ovs_mutex_lock(&dump->mutex); |
8bb113da | 1801 | if (!dump->status) { |
8bb113da RW |
1802 | for (n_flows = 0; n_flows < MIN(max_flows, FLOW_DUMP_MAX_BATCH); |
1803 | n_flows++) { | |
9f361d6b | 1804 | struct cmap_node *node; |
8bb113da | 1805 | |
9f361d6b | 1806 | node = cmap_next_position(&dp->flow_table, &dump->pos); |
8bb113da RW |
1807 | if (!node) { |
1808 | dump->status = EOF; | |
1809 | break; | |
1810 | } | |
1811 | netdev_flows[n_flows] = CONTAINER_OF(node, struct dp_netdev_flow, | |
1812 | node); | |
d2ad7ef1 | 1813 | } |
8a4e3a85 | 1814 | } |
ac64794a | 1815 | ovs_mutex_unlock(&dump->mutex); |
ac64794a | 1816 | |
8bb113da RW |
1817 | for (i = 0; i < n_flows; i++) { |
1818 | struct odputil_keybuf *maskbuf = &thread->maskbuf[i]; | |
1819 | struct odputil_keybuf *keybuf = &thread->keybuf[i]; | |
1820 | struct dp_netdev_flow *netdev_flow = netdev_flows[i]; | |
1821 | struct dpif_flow *f = &flows[i]; | |
1822 | struct dp_netdev_actions *dp_actions; | |
1823 | struct flow_wildcards wc; | |
1824 | struct ofpbuf buf; | |
1825 | ||
1826 | minimask_expand(&netdev_flow->cr.match.mask, &wc); | |
1827 | ||
1828 | /* Key. */ | |
1829 | ofpbuf_use_stack(&buf, keybuf, sizeof *keybuf); | |
1830 | odp_flow_key_from_flow(&buf, &netdev_flow->flow, &wc.masks, | |
1831 | netdev_flow->flow.in_port.odp_port, true); | |
1832 | f->key = ofpbuf_data(&buf); | |
1833 | f->key_len = ofpbuf_size(&buf); | |
1834 | ||
1835 | /* Mask. */ | |
1836 | ofpbuf_use_stack(&buf, maskbuf, sizeof *maskbuf); | |
1837 | odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow, | |
1838 | odp_to_u32(wc.masks.in_port.odp_port), | |
1839 | SIZE_MAX, true); | |
1840 | f->mask = ofpbuf_data(&buf); | |
1841 | f->mask_len = ofpbuf_size(&buf); | |
1842 | ||
1843 | /* Actions. */ | |
1844 | dp_actions = dp_netdev_flow_get_actions(netdev_flow); | |
1845 | f->actions = dp_actions->actions; | |
1846 | f->actions_len = dp_actions->size; | |
1847 | ||
1848 | /* Stats. */ | |
1849 | get_dpif_flow_stats(netdev_flow, &f->stats); | |
1850 | } | |
feebdea2 | 1851 | |
8bb113da | 1852 | return n_flows; |
72865317 BP |
1853 | } |
1854 | ||
1855 | static int | |
758c456d | 1856 | dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) |
65f13b50 | 1857 | OVS_NO_THREAD_SAFETY_ANALYSIS |
72865317 BP |
1858 | { |
1859 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
65f13b50 | 1860 | struct dp_netdev_pmd_thread *pmd; |
8cbf4f47 | 1861 | struct dpif_packet packet, *pp; |
758c456d | 1862 | struct pkt_metadata *md = &execute->md; |
72865317 | 1863 | |
1f317cb5 PS |
1864 | if (ofpbuf_size(execute->packet) < ETH_HEADER_LEN || |
1865 | ofpbuf_size(execute->packet) > UINT16_MAX) { | |
72865317 BP |
1866 | return EINVAL; |
1867 | } | |
1868 | ||
91088554 | 1869 | packet.ofpbuf = *execute->packet; |
8cbf4f47 | 1870 | pp = &packet; |
91088554 | 1871 | |
65f13b50 AW |
1872 | /* Tries finding the 'pmd'. If NULL is returned, that means |
1873 | * the current thread is a non-pmd thread and should use | |
1874 | * dp_netdev_get_nonpmd(). */ | |
1875 | pmd = ovsthread_getspecific(dp->per_pmd_key); | |
1876 | if (!pmd) { | |
1877 | pmd = dp_netdev_get_nonpmd(dp); | |
1878 | } | |
1879 | ||
1880 | /* If the current thread is non-pmd thread, acquires | |
1881 | * the 'non_pmd_mutex'. */ | |
1882 | if (pmd->core_id == NON_PMD_CORE_ID) { | |
1883 | ovs_mutex_lock(&dp->non_pmd_mutex); | |
1884 | } | |
1885 | dp_netdev_execute_actions(pmd, &pp, 1, false, md, execute->actions, | |
9bbf1c3d | 1886 | execute->actions_len); |
65f13b50 AW |
1887 | if (pmd->core_id == NON_PMD_CORE_ID) { |
1888 | ovs_mutex_unlock(&dp->non_pmd_mutex); | |
1889 | } | |
8a4e3a85 | 1890 | |
91088554 DDP |
1891 | /* Even though may_steal is set to false, some actions could modify or |
1892 | * reallocate the ofpbuf memory. We need to pass those changes to the | |
1893 | * caller */ | |
1894 | *execute->packet = packet.ofpbuf; | |
1895 | ||
758c456d | 1896 | return 0; |
72865317 BP |
1897 | } |
1898 | ||
1a0c894a BP |
1899 | static void |
1900 | dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) | |
1901 | { | |
1902 | size_t i; | |
1903 | ||
1904 | for (i = 0; i < n_ops; i++) { | |
1905 | struct dpif_op *op = ops[i]; | |
1906 | ||
1907 | switch (op->type) { | |
1908 | case DPIF_OP_FLOW_PUT: | |
1909 | op->error = dpif_netdev_flow_put(dpif, &op->u.flow_put); | |
1910 | break; | |
1911 | ||
1912 | case DPIF_OP_FLOW_DEL: | |
1913 | op->error = dpif_netdev_flow_del(dpif, &op->u.flow_del); | |
1914 | break; | |
1915 | ||
1916 | case DPIF_OP_EXECUTE: | |
1917 | op->error = dpif_netdev_execute(dpif, &op->u.execute); | |
1918 | break; | |
6fe09f8c JS |
1919 | |
1920 | case DPIF_OP_FLOW_GET: | |
1921 | op->error = dpif_netdev_flow_get(dpif, &op->u.flow_get); | |
1922 | break; | |
1a0c894a BP |
1923 | } |
1924 | } | |
1925 | } | |
1926 | ||
f2eee189 AW |
1927 | /* Returns true if the configuration for rx queues or cpu mask |
1928 | * is changed. */ | |
1929 | static bool | |
1930 | pmd_config_changed(const struct dp_netdev *dp, size_t rxqs, const char *cmask) | |
1931 | { | |
1932 | if (dp->n_dpdk_rxqs != rxqs) { | |
1933 | return true; | |
1934 | } else { | |
1935 | if (dp->pmd_cmask != NULL && cmask != NULL) { | |
1936 | return strcmp(dp->pmd_cmask, cmask); | |
1937 | } else { | |
1938 | return (dp->pmd_cmask != NULL || cmask != NULL); | |
1939 | } | |
1940 | } | |
1941 | } | |
1942 | ||
1943 | /* Resets pmd threads if the configuration for 'rxq's or cpu mask changes. */ | |
1944 | static int | |
1945 | dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask) | |
1946 | { | |
1947 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
1948 | ||
1949 | if (pmd_config_changed(dp, n_rxqs, cmask)) { | |
1950 | struct dp_netdev_port *port; | |
1951 | ||
1952 | dp_netdev_destroy_all_pmds(dp); | |
1953 | ||
1954 | CMAP_FOR_EACH (port, node, &dp->ports) { | |
1955 | if (netdev_is_pmd(port->netdev)) { | |
1956 | int i, err; | |
1957 | ||
1958 | /* Closes the existing 'rxq's. */ | |
1959 | for (i = 0; i < netdev_n_rxq(port->netdev); i++) { | |
1960 | netdev_rxq_close(port->rxq[i]); | |
1961 | port->rxq[i] = NULL; | |
1962 | } | |
1963 | ||
1964 | /* Sets the new rx queue config. */ | |
1965 | err = netdev_set_multiq(port->netdev, ovs_numa_get_n_cores(), | |
1966 | n_rxqs); | |
1967 | if (err) { | |
1968 | VLOG_ERR("Failed to set dpdk interface %s rx_queue to:" | |
1969 | " %u", netdev_get_name(port->netdev), | |
1970 | n_rxqs); | |
1971 | return err; | |
1972 | } | |
1973 | ||
1974 | /* If the set_multiq() above succeeds, reopens the 'rxq's. */ | |
1975 | port->rxq = xrealloc(port->rxq, sizeof *port->rxq | |
1976 | * netdev_n_rxq(port->netdev)); | |
1977 | for (i = 0; i < netdev_n_rxq(port->netdev); i++) { | |
1978 | netdev_rxq_open(port->netdev, &port->rxq[i], i); | |
1979 | } | |
1980 | } | |
1981 | } | |
1982 | dp->n_dpdk_rxqs = n_rxqs; | |
1983 | ||
1984 | /* Reconfigures the cpu mask. */ | |
1985 | ovs_numa_set_cpu_mask(cmask); | |
1986 | free(dp->pmd_cmask); | |
1987 | dp->pmd_cmask = cmask ? xstrdup(cmask) : NULL; | |
1988 | ||
1989 | /* Restores the non-pmd. */ | |
1990 | dp_netdev_set_nonpmd(dp); | |
1991 | /* Restores all pmd threads. */ | |
1992 | dp_netdev_reset_pmd_threads(dp); | |
1993 | } | |
1994 | ||
1995 | return 0; | |
1996 | } | |
1997 | ||
5bf93d67 EJ |
1998 | static int |
1999 | dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED, | |
2000 | uint32_t queue_id, uint32_t *priority) | |
2001 | { | |
2002 | *priority = queue_id; | |
2003 | return 0; | |
2004 | } | |
2005 | ||
72865317 | 2006 | \f |
a84cb64a BP |
2007 | /* Creates and returns a new 'struct dp_netdev_actions', with a reference count |
2008 | * of 1, whose actions are a copy of from the 'ofpacts_len' bytes of | |
2009 | * 'ofpacts'. */ | |
2010 | struct dp_netdev_actions * | |
2011 | dp_netdev_actions_create(const struct nlattr *actions, size_t size) | |
2012 | { | |
2013 | struct dp_netdev_actions *netdev_actions; | |
2014 | ||
2015 | netdev_actions = xmalloc(sizeof *netdev_actions); | |
a84cb64a BP |
2016 | netdev_actions->actions = xmemdup(actions, size); |
2017 | netdev_actions->size = size; | |
2018 | ||
2019 | return netdev_actions; | |
2020 | } | |
2021 | ||
a84cb64a | 2022 | struct dp_netdev_actions * |
61e7deb1 | 2023 | dp_netdev_flow_get_actions(const struct dp_netdev_flow *flow) |
a84cb64a | 2024 | { |
61e7deb1 | 2025 | return ovsrcu_get(struct dp_netdev_actions *, &flow->actions); |
a84cb64a BP |
2026 | } |
2027 | ||
61e7deb1 BP |
2028 | static void |
2029 | dp_netdev_actions_free(struct dp_netdev_actions *actions) | |
a84cb64a | 2030 | { |
61e7deb1 BP |
2031 | free(actions->actions); |
2032 | free(actions); | |
a84cb64a BP |
2033 | } |
2034 | \f | |
e4cfed38 | 2035 | |
5794e276 | 2036 | static void |
65f13b50 | 2037 | dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd, |
9bbf1c3d DDP |
2038 | struct dp_netdev_port *port, |
2039 | struct netdev_rxq *rxq) | |
e4cfed38 | 2040 | { |
8cbf4f47 DDP |
2041 | struct dpif_packet *packets[NETDEV_MAX_RX_BATCH]; |
2042 | int error, cnt; | |
e4cfed38 | 2043 | |
8cbf4f47 | 2044 | error = netdev_rxq_recv(rxq, packets, &cnt); |
e4cfed38 | 2045 | if (!error) { |
3c33f0ff JR |
2046 | struct pkt_metadata md = PKT_METADATA_INITIALIZER(port->port_no); |
2047 | ||
2048 | *recirc_depth_get() = 0; | |
65f13b50 | 2049 | dp_netdev_input(pmd, packets, cnt, &md); |
e4cfed38 | 2050 | } else if (error != EAGAIN && error != EOPNOTSUPP) { |
3c33f0ff | 2051 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); |
e4cfed38 PS |
2052 | |
2053 | VLOG_ERR_RL(&rl, "error receiving data from %s: %s", | |
3c33f0ff | 2054 | netdev_get_name(port->netdev), ovs_strerror(error)); |
e4cfed38 PS |
2055 | } |
2056 | } | |
2057 | ||
2058 | static void | |
2059 | dpif_netdev_run(struct dpif *dpif) | |
2060 | { | |
2061 | struct dp_netdev_port *port; | |
2062 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
65f13b50 | 2063 | struct dp_netdev_pmd_thread *non_pmd = dp_netdev_get_nonpmd(dp); |
e4cfed38 | 2064 | |
65f13b50 | 2065 | ovs_mutex_lock(&dp->non_pmd_mutex); |
a532e683 | 2066 | CMAP_FOR_EACH (port, node, &dp->ports) { |
55c955bd PS |
2067 | if (!netdev_is_pmd(port->netdev)) { |
2068 | int i; | |
2069 | ||
2070 | for (i = 0; i < netdev_n_rxq(port->netdev); i++) { | |
65f13b50 | 2071 | dp_netdev_process_rxq_port(non_pmd, port, port->rxq[i]); |
55c955bd | 2072 | } |
e4cfed38 PS |
2073 | } |
2074 | } | |
65f13b50 | 2075 | ovs_mutex_unlock(&dp->non_pmd_mutex); |
e4cfed38 PS |
2076 | } |
2077 | ||
2078 | static void | |
2079 | dpif_netdev_wait(struct dpif *dpif) | |
2080 | { | |
2081 | struct dp_netdev_port *port; | |
2082 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
2083 | ||
59e6d833 | 2084 | ovs_mutex_lock(&dp_netdev_mutex); |
a532e683 | 2085 | CMAP_FOR_EACH (port, node, &dp->ports) { |
55c955bd PS |
2086 | if (!netdev_is_pmd(port->netdev)) { |
2087 | int i; | |
2088 | ||
2089 | for (i = 0; i < netdev_n_rxq(port->netdev); i++) { | |
2090 | netdev_rxq_wait(port->rxq[i]); | |
2091 | } | |
e4cfed38 PS |
2092 | } |
2093 | } | |
59e6d833 | 2094 | ovs_mutex_unlock(&dp_netdev_mutex); |
e4cfed38 PS |
2095 | } |
2096 | ||
f7791740 | 2097 | struct rxq_poll { |
e4cfed38 | 2098 | struct dp_netdev_port *port; |
55c955bd | 2099 | struct netdev_rxq *rx; |
e4cfed38 PS |
2100 | }; |
2101 | ||
2102 | static int | |
65f13b50 | 2103 | pmd_load_queues(struct dp_netdev_pmd_thread *pmd, |
f7791740 | 2104 | struct rxq_poll **ppoll_list, int poll_cnt) |
e4cfed38 | 2105 | { |
f7791740 | 2106 | struct rxq_poll *poll_list = *ppoll_list; |
e4cfed38 | 2107 | struct dp_netdev_port *port; |
65f13b50 | 2108 | int n_pmds_on_numa, index, i; |
e4cfed38 PS |
2109 | |
2110 | /* Simple scheduler for netdev rx polling. */ | |
e4cfed38 | 2111 | for (i = 0; i < poll_cnt; i++) { |
65f13b50 | 2112 | port_unref(poll_list[i].port); |
e4cfed38 PS |
2113 | } |
2114 | ||
2115 | poll_cnt = 0; | |
65f13b50 | 2116 | n_pmds_on_numa = get_n_pmd_threads_on_numa(pmd->dp, pmd->numa_id); |
e4cfed38 PS |
2117 | index = 0; |
2118 | ||
65f13b50 | 2119 | CMAP_FOR_EACH (port, node, &pmd->dp->ports) { |
a1fdee13 AW |
2120 | /* Calls port_try_ref() to prevent the main thread |
2121 | * from deleting the port. */ | |
2122 | if (port_try_ref(port)) { | |
65f13b50 AW |
2123 | if (netdev_is_pmd(port->netdev) |
2124 | && netdev_get_numa_id(port->netdev) == pmd->numa_id) { | |
a1fdee13 AW |
2125 | int i; |
2126 | ||
2127 | for (i = 0; i < netdev_n_rxq(port->netdev); i++) { | |
65f13b50 | 2128 | if ((index % n_pmds_on_numa) == pmd->index) { |
a1fdee13 AW |
2129 | poll_list = xrealloc(poll_list, |
2130 | sizeof *poll_list * (poll_cnt + 1)); | |
2131 | ||
2132 | port_ref(port); | |
2133 | poll_list[poll_cnt].port = port; | |
2134 | poll_list[poll_cnt].rx = port->rxq[i]; | |
2135 | poll_cnt++; | |
2136 | } | |
2137 | index++; | |
55c955bd | 2138 | } |
e4cfed38 | 2139 | } |
a1fdee13 AW |
2140 | /* Unrefs the port_try_ref(). */ |
2141 | port_unref(port); | |
e4cfed38 PS |
2142 | } |
2143 | } | |
2144 | ||
e4cfed38 PS |
2145 | *ppoll_list = poll_list; |
2146 | return poll_cnt; | |
2147 | } | |
2148 | ||
6c3eee82 | 2149 | static void * |
e4cfed38 | 2150 | pmd_thread_main(void *f_) |
6c3eee82 | 2151 | { |
65f13b50 | 2152 | struct dp_netdev_pmd_thread *pmd = f_; |
e4cfed38 | 2153 | unsigned int lc = 0; |
f7791740 | 2154 | struct rxq_poll *poll_list; |
84067a4c | 2155 | unsigned int port_seq = PMD_INITIAL_SEQ; |
e4cfed38 PS |
2156 | int poll_cnt; |
2157 | int i; | |
6c3eee82 | 2158 | |
e4cfed38 PS |
2159 | poll_cnt = 0; |
2160 | poll_list = NULL; | |
2161 | ||
65f13b50 AW |
2162 | /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */ |
2163 | ovsthread_setspecific(pmd->dp->per_pmd_key, pmd); | |
2164 | pmd_thread_setaffinity_cpu(pmd->core_id); | |
e4cfed38 | 2165 | reload: |
65f13b50 AW |
2166 | emc_cache_init(&pmd->flow_cache); |
2167 | poll_cnt = pmd_load_queues(pmd, &poll_list, poll_cnt); | |
6c3eee82 | 2168 | |
e4cfed38 | 2169 | for (;;) { |
6c3eee82 BP |
2170 | int i; |
2171 | ||
e4cfed38 | 2172 | for (i = 0; i < poll_cnt; i++) { |
65f13b50 | 2173 | dp_netdev_process_rxq_port(pmd, poll_list[i].port, poll_list[i].rx); |
e4cfed38 PS |
2174 | } |
2175 | ||
2176 | if (lc++ > 1024) { | |
84067a4c | 2177 | unsigned int seq; |
6c3eee82 | 2178 | |
e4cfed38 | 2179 | lc = 0; |
84067a4c JR |
2180 | |
2181 | ovsrcu_quiesce(); | |
2182 | ||
65f13b50 | 2183 | atomic_read_relaxed(&pmd->change_seq, &seq); |
84067a4c JR |
2184 | if (seq != port_seq) { |
2185 | port_seq = seq; | |
6c3eee82 BP |
2186 | break; |
2187 | } | |
2188 | } | |
e4cfed38 | 2189 | } |
6c3eee82 | 2190 | |
65f13b50 | 2191 | emc_cache_uninit(&pmd->flow_cache); |
9bbf1c3d | 2192 | |
65f13b50 | 2193 | if (!latch_is_set(&pmd->exit_latch)){ |
e4cfed38 PS |
2194 | goto reload; |
2195 | } | |
6c3eee82 | 2196 | |
e4cfed38 PS |
2197 | for (i = 0; i < poll_cnt; i++) { |
2198 | port_unref(poll_list[i].port); | |
6c3eee82 | 2199 | } |
6c3eee82 | 2200 | |
e4cfed38 | 2201 | free(poll_list); |
6c3eee82 BP |
2202 | return NULL; |
2203 | } | |
2204 | ||
6b31e073 RW |
2205 | static void |
2206 | dp_netdev_disable_upcall(struct dp_netdev *dp) | |
2207 | OVS_ACQUIRES(dp->upcall_rwlock) | |
2208 | { | |
2209 | fat_rwlock_wrlock(&dp->upcall_rwlock); | |
2210 | } | |
2211 | ||
2212 | static void | |
2213 | dpif_netdev_disable_upcall(struct dpif *dpif) | |
2214 | OVS_NO_THREAD_SAFETY_ANALYSIS | |
2215 | { | |
2216 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
2217 | dp_netdev_disable_upcall(dp); | |
2218 | } | |
2219 | ||
2220 | static void | |
2221 | dp_netdev_enable_upcall(struct dp_netdev *dp) | |
2222 | OVS_RELEASES(dp->upcall_rwlock) | |
2223 | { | |
2224 | fat_rwlock_unlock(&dp->upcall_rwlock); | |
2225 | } | |
2226 | ||
2227 | static void | |
2228 | dpif_netdev_enable_upcall(struct dpif *dpif) | |
2229 | OVS_NO_THREAD_SAFETY_ANALYSIS | |
2230 | { | |
2231 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
2232 | dp_netdev_enable_upcall(dp); | |
2233 | } | |
2234 | ||
65f13b50 AW |
2235 | /* Returns the pointer to the dp_netdev_pmd_thread for non-pmd threads. */ |
2236 | static struct dp_netdev_pmd_thread * | |
2237 | dp_netdev_get_nonpmd(struct dp_netdev *dp) | |
2238 | { | |
2239 | struct dp_netdev_pmd_thread *pmd; | |
2240 | struct cmap_node *pnode; | |
2241 | ||
2242 | pnode = cmap_find(&dp->poll_threads, hash_int(NON_PMD_CORE_ID, 0)); | |
2243 | ovs_assert(pnode); | |
2244 | pmd = CONTAINER_OF(pnode, struct dp_netdev_pmd_thread, node); | |
2245 | ||
2246 | return pmd; | |
2247 | } | |
2248 | ||
f2eee189 AW |
2249 | /* Sets the 'struct dp_netdev_pmd_thread' for non-pmd threads. */ |
2250 | static void | |
2251 | dp_netdev_set_nonpmd(struct dp_netdev *dp) | |
2252 | { | |
2253 | struct dp_netdev_pmd_thread *non_pmd; | |
2254 | ||
2255 | non_pmd = xzalloc(sizeof *non_pmd); | |
2256 | dp_netdev_configure_pmd(non_pmd, dp, 0, NON_PMD_CORE_ID, | |
2257 | OVS_NUMA_UNSPEC); | |
2258 | } | |
2259 | ||
65f13b50 | 2260 | /* Configures the 'pmd' based on the input argument. */ |
6c3eee82 | 2261 | static void |
65f13b50 AW |
2262 | dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, |
2263 | int index, int core_id, int numa_id) | |
2264 | { | |
2265 | pmd->dp = dp; | |
2266 | pmd->index = index; | |
2267 | pmd->core_id = core_id; | |
2268 | pmd->numa_id = numa_id; | |
2269 | latch_init(&pmd->exit_latch); | |
2270 | atomic_init(&pmd->change_seq, PMD_INITIAL_SEQ); | |
2271 | /* init the 'flow_cache' since there is no | |
2272 | * actual thread created for NON_PMD_CORE_ID. */ | |
2273 | if (core_id == NON_PMD_CORE_ID) { | |
2274 | emc_cache_init(&pmd->flow_cache); | |
2275 | } | |
2276 | cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, &pmd->node), | |
2277 | hash_int(core_id, 0)); | |
2278 | } | |
2279 | ||
2280 | /* Stops the pmd thread, removes it from the 'dp->poll_threads' | |
2281 | * and destroys the struct. */ | |
2282 | static void | |
2283 | dp_netdev_del_pmd(struct dp_netdev_pmd_thread *pmd) | |
6c3eee82 | 2284 | { |
65f13b50 AW |
2285 | /* Uninit the 'flow_cache' since there is |
2286 | * no actual thread uninit it. */ | |
2287 | if (pmd->core_id == NON_PMD_CORE_ID) { | |
2288 | emc_cache_uninit(&pmd->flow_cache); | |
2289 | } else { | |
2290 | latch_set(&pmd->exit_latch); | |
2291 | dp_netdev_reload_pmd__(pmd); | |
2292 | ovs_numa_unpin_core(pmd->core_id); | |
2293 | xpthread_join(pmd->thread, NULL); | |
2294 | } | |
2295 | cmap_remove(&pmd->dp->poll_threads, &pmd->node, hash_int(pmd->core_id, 0)); | |
2296 | latch_destroy(&pmd->exit_latch); | |
2297 | free(pmd); | |
2298 | } | |
6c3eee82 | 2299 | |
65f13b50 AW |
2300 | /* Destroys all pmd threads. */ |
2301 | static void | |
2302 | dp_netdev_destroy_all_pmds(struct dp_netdev *dp) | |
2303 | { | |
2304 | struct dp_netdev_pmd_thread *pmd; | |
2305 | ||
2306 | CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { | |
2307 | dp_netdev_del_pmd(pmd); | |
6c3eee82 | 2308 | } |
65f13b50 | 2309 | } |
6c3eee82 | 2310 | |
65f13b50 AW |
2311 | /* Deletes all pmd threads on numa node 'numa_id'. */ |
2312 | static void | |
2313 | dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id) | |
2314 | { | |
2315 | struct dp_netdev_pmd_thread *pmd; | |
6c3eee82 | 2316 | |
65f13b50 AW |
2317 | CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { |
2318 | if (pmd->numa_id == numa_id) { | |
2319 | dp_netdev_del_pmd(pmd); | |
2320 | } | |
6c3eee82 | 2321 | } |
65f13b50 | 2322 | } |
6c3eee82 | 2323 | |
65f13b50 AW |
2324 | /* Checks the numa node id of 'netdev' and starts pmd threads for |
2325 | * the numa node. */ | |
2326 | static void | |
2327 | dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id) | |
2328 | { | |
2329 | int n_pmds; | |
e4cfed38 | 2330 | |
65f13b50 AW |
2331 | if (!ovs_numa_numa_id_is_valid(numa_id)) { |
2332 | VLOG_ERR("Cannot create pmd threads due to numa id (%d)" | |
2333 | "invalid", numa_id); | |
2334 | return ; | |
2335 | } | |
2336 | ||
2337 | n_pmds = get_n_pmd_threads_on_numa(dp, numa_id); | |
2338 | ||
2339 | /* If there are already pmd threads created for the numa node | |
2340 | * in which 'netdev' is on, do nothing. Else, creates the | |
2341 | * pmd threads for the numa node. */ | |
2342 | if (!n_pmds) { | |
2343 | int can_have, n_unpinned, i; | |
2344 | ||
2345 | n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id); | |
2346 | if (!n_unpinned) { | |
2347 | VLOG_ERR("Cannot create pmd threads due to out of unpinned " | |
2348 | "cores on numa node"); | |
2349 | return; | |
2350 | } | |
6c3eee82 | 2351 | |
f2eee189 AW |
2352 | /* If cpu mask is specified, uses all unpinned cores, otherwise |
2353 | * tries creating NR_PMD_THREADS pmd threads. */ | |
2354 | can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, NR_PMD_THREADS); | |
65f13b50 AW |
2355 | for (i = 0; i < can_have; i++) { |
2356 | struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd); | |
2357 | int core_id = ovs_numa_get_unpinned_core_on_numa(numa_id); | |
e4cfed38 | 2358 | |
65f13b50 AW |
2359 | dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id); |
2360 | /* Each thread will distribute all devices rx-queues among | |
2361 | * themselves. */ | |
2362 | pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd); | |
2363 | } | |
2364 | VLOG_INFO("Created %d pmd threads on numa node %d", can_have, numa_id); | |
6c3eee82 BP |
2365 | } |
2366 | } | |
e4cfed38 | 2367 | |
6c3eee82 | 2368 | \f |
679ba04c BP |
2369 | static void * |
2370 | dp_netdev_flow_stats_new_cb(void) | |
2371 | { | |
2372 | struct dp_netdev_flow_stats *bucket = xzalloc_cacheline(sizeof *bucket); | |
2373 | ovs_mutex_init(&bucket->mutex); | |
2374 | return bucket; | |
2375 | } | |
2376 | ||
f2eee189 AW |
2377 | /* Called after pmd threads config change. Restarts pmd threads with |
2378 | * new configuration. */ | |
2379 | static void | |
2380 | dp_netdev_reset_pmd_threads(struct dp_netdev *dp) | |
2381 | { | |
2382 | struct dp_netdev_port *port; | |
2383 | ||
2384 | CMAP_FOR_EACH (port, node, &dp->ports) { | |
2385 | if (netdev_is_pmd(port->netdev)) { | |
2386 | int numa_id = netdev_get_numa_id(port->netdev); | |
2387 | ||
2388 | dp_netdev_set_pmds_on_numa(dp, numa_id); | |
2389 | } | |
2390 | } | |
2391 | } | |
2392 | ||
72865317 | 2393 | static void |
1763b4b8 | 2394 | dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow, |
8cbf4f47 DDP |
2395 | int cnt, int size, |
2396 | uint16_t tcp_flags) | |
72865317 | 2397 | { |
679ba04c BP |
2398 | long long int now = time_msec(); |
2399 | struct dp_netdev_flow_stats *bucket; | |
2400 | ||
2401 | bucket = ovsthread_stats_bucket_get(&netdev_flow->stats, | |
2402 | dp_netdev_flow_stats_new_cb); | |
2403 | ||
2404 | ovs_mutex_lock(&bucket->mutex); | |
2405 | bucket->used = MAX(now, bucket->used); | |
8cbf4f47 DDP |
2406 | bucket->packet_count += cnt; |
2407 | bucket->byte_count += size; | |
679ba04c BP |
2408 | bucket->tcp_flags |= tcp_flags; |
2409 | ovs_mutex_unlock(&bucket->mutex); | |
72865317 BP |
2410 | } |
2411 | ||
51852a57 BP |
2412 | static void * |
2413 | dp_netdev_stats_new_cb(void) | |
2414 | { | |
2415 | struct dp_netdev_stats *bucket = xzalloc_cacheline(sizeof *bucket); | |
2416 | ovs_mutex_init(&bucket->mutex); | |
2417 | return bucket; | |
2418 | } | |
2419 | ||
2420 | static void | |
8cbf4f47 | 2421 | dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type, int cnt) |
51852a57 BP |
2422 | { |
2423 | struct dp_netdev_stats *bucket; | |
2424 | ||
2425 | bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb); | |
2426 | ovs_mutex_lock(&bucket->mutex); | |
8cbf4f47 | 2427 | bucket->n[type] += cnt; |
51852a57 BP |
2428 | ovs_mutex_unlock(&bucket->mutex); |
2429 | } | |
2430 | ||
623540e4 EJ |
2431 | static int |
2432 | dp_netdev_upcall(struct dp_netdev *dp, struct dpif_packet *packet_, | |
2433 | struct flow *flow, struct flow_wildcards *wc, | |
2434 | enum dpif_upcall_type type, const struct nlattr *userdata, | |
2435 | struct ofpbuf *actions, struct ofpbuf *put_actions) | |
2436 | { | |
2437 | struct ofpbuf *packet = &packet_->ofpbuf; | |
2438 | ||
2439 | if (type == DPIF_UC_MISS) { | |
2440 | dp_netdev_count_packet(dp, DP_STAT_MISS, 1); | |
2441 | } | |
2442 | ||
2443 | if (OVS_UNLIKELY(!dp->upcall_cb)) { | |
2444 | return ENODEV; | |
2445 | } | |
2446 | ||
2447 | if (OVS_UNLIKELY(!VLOG_DROP_DBG(&upcall_rl))) { | |
2448 | struct ds ds = DS_EMPTY_INITIALIZER; | |
2449 | struct ofpbuf key; | |
2450 | char *packet_str; | |
2451 | ||
2452 | ofpbuf_init(&key, 0); | |
2453 | odp_flow_key_from_flow(&key, flow, &wc->masks, flow->in_port.odp_port, | |
2454 | true); | |
2455 | ||
2456 | packet_str = ofp_packet_to_string(ofpbuf_data(packet), | |
2457 | ofpbuf_size(packet)); | |
2458 | ||
2459 | odp_flow_key_format(ofpbuf_data(&key), ofpbuf_size(&key), &ds); | |
2460 | ||
2461 | VLOG_DBG("%s: %s upcall:\n%s\n%s", dp->name, | |
2462 | dpif_upcall_type_to_string(type), ds_cstr(&ds), packet_str); | |
2463 | ||
2464 | ofpbuf_uninit(&key); | |
2465 | free(packet_str); | |
2466 | ds_destroy(&ds); | |
2467 | } | |
2468 | ||
2469 | return dp->upcall_cb(packet, flow, type, userdata, actions, wc, | |
2470 | put_actions, dp->upcall_aux); | |
2471 | } | |
2472 | ||
9bbf1c3d DDP |
2473 | static inline uint32_t |
2474 | dpif_netdev_packet_get_dp_hash(struct dpif_packet *packet, | |
2475 | const struct miniflow *mf) | |
2476 | { | |
2477 | uint32_t hash; | |
2478 | ||
2479 | hash = dpif_packet_get_dp_hash(packet); | |
2480 | if (OVS_UNLIKELY(!hash)) { | |
2481 | hash = miniflow_hash_5tuple(mf, 0); | |
2482 | dpif_packet_set_dp_hash(packet, hash); | |
2483 | } | |
2484 | return hash; | |
2485 | } | |
2486 | ||
567bbb2e | 2487 | struct packet_batch { |
8cbf4f47 DDP |
2488 | unsigned int packet_count; |
2489 | unsigned int byte_count; | |
2490 | uint16_t tcp_flags; | |
2491 | ||
2492 | struct dp_netdev_flow *flow; | |
2493 | ||
2494 | struct dpif_packet *packets[NETDEV_MAX_RX_BATCH]; | |
2495 | struct pkt_metadata md; | |
2496 | }; | |
2497 | ||
2498 | static inline void | |
9bbf1c3d DDP |
2499 | packet_batch_update(struct packet_batch *batch, struct dpif_packet *packet, |
2500 | const struct miniflow *mf) | |
8cbf4f47 DDP |
2501 | { |
2502 | batch->tcp_flags |= miniflow_get_tcp_flags(mf); | |
2503 | batch->packets[batch->packet_count++] = packet; | |
2504 | batch->byte_count += ofpbuf_size(&packet->ofpbuf); | |
2505 | } | |
2506 | ||
2507 | static inline void | |
567bbb2e | 2508 | packet_batch_init(struct packet_batch *batch, struct dp_netdev_flow *flow, |
84d6d5eb | 2509 | struct pkt_metadata *md) |
8cbf4f47 DDP |
2510 | { |
2511 | batch->flow = flow; | |
2512 | batch->md = *md; | |
8cbf4f47 DDP |
2513 | |
2514 | batch->packet_count = 0; | |
2515 | batch->byte_count = 0; | |
2516 | batch->tcp_flags = 0; | |
8cbf4f47 DDP |
2517 | } |
2518 | ||
2519 | static inline void | |
65f13b50 AW |
2520 | packet_batch_execute(struct packet_batch *batch, |
2521 | struct dp_netdev_pmd_thread *pmd) | |
8cbf4f47 DDP |
2522 | { |
2523 | struct dp_netdev_actions *actions; | |
2524 | struct dp_netdev_flow *flow = batch->flow; | |
2525 | ||
2526 | dp_netdev_flow_used(batch->flow, batch->packet_count, batch->byte_count, | |
2527 | batch->tcp_flags); | |
2528 | ||
2529 | actions = dp_netdev_flow_get_actions(flow); | |
2530 | ||
65f13b50 AW |
2531 | dp_netdev_execute_actions(pmd, batch->packets, batch->packet_count, true, |
2532 | &batch->md, actions->actions, actions->size); | |
8cbf4f47 | 2533 | |
65f13b50 | 2534 | dp_netdev_count_packet(pmd->dp, DP_STAT_HIT, batch->packet_count); |
8cbf4f47 DDP |
2535 | } |
2536 | ||
9bbf1c3d DDP |
2537 | static inline bool |
2538 | dp_netdev_queue_batches(struct dpif_packet *pkt, struct pkt_metadata *md, | |
2539 | struct dp_netdev_flow *flow, const struct miniflow *mf, | |
2540 | struct packet_batch *batches, size_t *n_batches, | |
2541 | size_t max_batches) | |
2542 | { | |
2543 | struct packet_batch *batch = NULL; | |
2544 | int j; | |
2545 | ||
2546 | if (OVS_UNLIKELY(!flow)) { | |
2547 | return false; | |
2548 | } | |
2549 | /* XXX: This O(n^2) algortihm makes sense if we're operating under the | |
2550 | * assumption that the number of distinct flows (and therefore the | |
2551 | * number of distinct batches) is quite small. If this turns out not | |
2552 | * to be the case, it may make sense to pre sort based on the | |
2553 | * netdev_flow pointer. That done we can get the appropriate batching | |
2554 | * in O(n * log(n)) instead. */ | |
2555 | for (j = *n_batches - 1; j >= 0; j--) { | |
2556 | if (batches[j].flow == flow) { | |
2557 | batch = &batches[j]; | |
2558 | packet_batch_update(batch, pkt, mf); | |
2559 | return true; | |
2560 | } | |
2561 | } | |
2562 | if (OVS_UNLIKELY(*n_batches >= max_batches)) { | |
2563 | return false; | |
2564 | } | |
2565 | ||
2566 | batch = &batches[(*n_batches)++]; | |
2567 | packet_batch_init(batch, flow, md); | |
2568 | packet_batch_update(batch, pkt, mf); | |
2569 | return true; | |
2570 | } | |
2571 | ||
2572 | static inline void | |
2573 | dpif_packet_swap(struct dpif_packet **a, struct dpif_packet **b) | |
2574 | { | |
2575 | struct dpif_packet *tmp = *a; | |
2576 | *a = *b; | |
2577 | *b = tmp; | |
2578 | } | |
2579 | ||
2580 | /* Try to process all ('cnt') the 'packets' using only the exact match cache | |
2581 | * 'flow_cache'. If a flow is not found for a packet 'packets[i]', or if there | |
2582 | * is no matching batch for a packet's flow, the miniflow is copied into 'keys' | |
2583 | * and the packet pointer is moved at the beginning of the 'packets' array. | |
2584 | * | |
2585 | * The function returns the number of packets that needs to be processed in the | |
2586 | * 'packets' array (they have been moved to the beginning of the vector). | |
2587 | */ | |
2588 | static inline size_t | |
65f13b50 AW |
2589 | emc_processing(struct dp_netdev_pmd_thread *pmd, struct dpif_packet **packets, |
2590 | size_t cnt, struct pkt_metadata *md, | |
2591 | struct netdev_flow_key *keys) | |
72865317 | 2592 | { |
9bbf1c3d DDP |
2593 | struct netdev_flow_key key; |
2594 | struct packet_batch batches[4]; | |
65f13b50 | 2595 | struct emc_cache *flow_cache = &pmd->flow_cache; |
84d6d5eb | 2596 | size_t n_batches, i; |
9bbf1c3d | 2597 | size_t notfound_cnt = 0; |
8cbf4f47 | 2598 | |
9bbf1c3d DDP |
2599 | n_batches = 0; |
2600 | miniflow_initialize(&key.flow, key.buf); | |
84d6d5eb | 2601 | for (i = 0; i < cnt; i++) { |
9bbf1c3d DDP |
2602 | struct dp_netdev_flow *flow; |
2603 | uint32_t hash; | |
2604 | ||
84d6d5eb EJ |
2605 | if (OVS_UNLIKELY(ofpbuf_size(&packets[i]->ofpbuf) < ETH_HEADER_LEN)) { |
2606 | dpif_packet_delete(packets[i]); | |
84d6d5eb EJ |
2607 | continue; |
2608 | } | |
8cbf4f47 | 2609 | |
9bbf1c3d DDP |
2610 | miniflow_extract(&packets[i]->ofpbuf, md, &key.flow); |
2611 | ||
2612 | hash = dpif_netdev_packet_get_dp_hash(packets[i], &key.flow); | |
2613 | ||
2614 | flow = emc_lookup(flow_cache, &key.flow, hash); | |
2615 | if (OVS_UNLIKELY(!dp_netdev_queue_batches(packets[i], md, | |
2616 | flow, &key.flow, | |
2617 | batches, &n_batches, | |
2618 | ARRAY_SIZE(batches)))) { | |
2619 | if (i != notfound_cnt) { | |
2620 | dpif_packet_swap(&packets[i], &packets[notfound_cnt]); | |
2621 | } | |
2622 | ||
2623 | keys[notfound_cnt++] = key; | |
2624 | } | |
2625 | } | |
2626 | ||
2627 | for (i = 0; i < n_batches; i++) { | |
65f13b50 | 2628 | packet_batch_execute(&batches[i], pmd); |
84d6d5eb | 2629 | } |
4f150744 | 2630 | |
9bbf1c3d DDP |
2631 | return notfound_cnt; |
2632 | } | |
2633 | ||
2634 | static inline void | |
65f13b50 | 2635 | fast_path_processing(struct dp_netdev_pmd_thread *pmd, |
9bbf1c3d DDP |
2636 | struct dpif_packet **packets, size_t cnt, |
2637 | struct pkt_metadata *md, struct netdev_flow_key *keys) | |
2638 | { | |
1a0d5831 | 2639 | #if !defined(__CHECKER__) && !defined(_WIN32) |
9bbf1c3d DDP |
2640 | const size_t PKT_ARRAY_SIZE = cnt; |
2641 | #else | |
1a0d5831 | 2642 | /* Sparse or MSVC doesn't like variable length array. */ |
9bbf1c3d DDP |
2643 | enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH }; |
2644 | #endif | |
2645 | struct packet_batch batches[PKT_ARRAY_SIZE]; | |
2646 | const struct miniflow *mfs[PKT_ARRAY_SIZE]; /* NULL at bad packets. */ | |
2647 | struct cls_rule *rules[PKT_ARRAY_SIZE]; | |
65f13b50 AW |
2648 | struct dp_netdev *dp = pmd->dp; |
2649 | struct emc_cache *flow_cache = &pmd->flow_cache; | |
9bbf1c3d DDP |
2650 | size_t n_batches, i; |
2651 | bool any_miss; | |
2652 | ||
2653 | for (i = 0; i < cnt; i++) { | |
2654 | mfs[i] = &keys[i].flow; | |
2655 | } | |
623540e4 EJ |
2656 | any_miss = !classifier_lookup_miniflow_batch(&dp->cls, mfs, rules, cnt); |
2657 | if (OVS_UNLIKELY(any_miss) && !fat_rwlock_tryrdlock(&dp->upcall_rwlock)) { | |
2658 | uint64_t actions_stub[512 / 8], slow_stub[512 / 8]; | |
2659 | struct ofpbuf actions, put_actions; | |
2660 | struct match match; | |
2661 | ||
2662 | ofpbuf_use_stub(&actions, actions_stub, sizeof actions_stub); | |
2663 | ofpbuf_use_stub(&put_actions, slow_stub, sizeof slow_stub); | |
2664 | ||
2665 | for (i = 0; i < cnt; i++) { | |
2666 | const struct dp_netdev_flow *netdev_flow; | |
2667 | struct ofpbuf *add_actions; | |
2668 | int error; | |
2669 | ||
2670 | if (OVS_LIKELY(rules[i] || !mfs[i])) { | |
2671 | continue; | |
2672 | } | |
2673 | ||
2674 | /* It's possible that an earlier slow path execution installed | |
2675 | * the rule this flow needs. In this case, it's a lot cheaper | |
2676 | * to catch it here than execute a miss. */ | |
2677 | netdev_flow = dp_netdev_lookup_flow(dp, mfs[i]); | |
2678 | if (netdev_flow) { | |
2679 | rules[i] = CONST_CAST(struct cls_rule *, &netdev_flow->cr); | |
2680 | continue; | |
2681 | } | |
2682 | ||
2683 | miniflow_expand(mfs[i], &match.flow); | |
2684 | ||
2685 | ofpbuf_clear(&actions); | |
2686 | ofpbuf_clear(&put_actions); | |
2687 | ||
2688 | error = dp_netdev_upcall(dp, packets[i], &match.flow, &match.wc, | |
2689 | DPIF_UC_MISS, NULL, &actions, | |
2690 | &put_actions); | |
2691 | if (OVS_UNLIKELY(error && error != ENOSPC)) { | |
2692 | continue; | |
2693 | } | |
2694 | ||
2695 | /* We can't allow the packet batching in the next loop to execute | |
2696 | * the actions. Otherwise, if there are any slow path actions, | |
2697 | * we'll send the packet up twice. */ | |
ac8c2081 | 2698 | dp_netdev_execute_actions(pmd, &packets[i], 1, true, md, |
65f13b50 | 2699 | ofpbuf_data(&actions), |
623540e4 EJ |
2700 | ofpbuf_size(&actions)); |
2701 | ||
2702 | add_actions = ofpbuf_size(&put_actions) | |
2703 | ? &put_actions | |
2704 | : &actions; | |
2705 | ||
2706 | ovs_mutex_lock(&dp->flow_mutex); | |
2707 | /* XXX: There's a brief race where this flow could have already | |
2708 | * been installed since we last did the flow lookup. This could be | |
2709 | * solved by moving the mutex lock outside the loop, but that's an | |
2710 | * awful long time to be locking everyone out of making flow | |
2711 | * installs. If we move to a per-core classifier, it would be | |
2712 | * reasonable. */ | |
2713 | if (OVS_LIKELY(error != ENOSPC) | |
2714 | && !dp_netdev_lookup_flow(dp, mfs[i])) { | |
2715 | dp_netdev_flow_add(dp, &match, ofpbuf_data(add_actions), | |
2716 | ofpbuf_size(add_actions)); | |
2717 | } | |
2718 | ovs_mutex_unlock(&dp->flow_mutex); | |
2719 | } | |
2720 | ||
2721 | ofpbuf_uninit(&actions); | |
2722 | ofpbuf_uninit(&put_actions); | |
2723 | fat_rwlock_unlock(&dp->upcall_rwlock); | |
ac8c2081 DDP |
2724 | } else if (OVS_UNLIKELY(any_miss)) { |
2725 | int dropped_cnt = 0; | |
2726 | ||
2727 | for (i = 0; i < cnt; i++) { | |
2728 | if (OVS_UNLIKELY(!rules[i] && mfs[i])) { | |
2729 | dpif_packet_delete(packets[i]); | |
2730 | dropped_cnt++; | |
2731 | } | |
2732 | } | |
2733 | ||
2734 | dp_netdev_count_packet(dp, DP_STAT_LOST, dropped_cnt); | |
623540e4 | 2735 | } |
84d6d5eb EJ |
2736 | |
2737 | n_batches = 0; | |
8cbf4f47 | 2738 | for (i = 0; i < cnt; i++) { |
9bbf1c3d | 2739 | struct dpif_packet *packet = packets[i]; |
84d6d5eb | 2740 | struct dp_netdev_flow *flow; |
8cbf4f47 | 2741 | |
623540e4 | 2742 | if (OVS_UNLIKELY(!rules[i] || !mfs[i])) { |
84d6d5eb EJ |
2743 | continue; |
2744 | } | |
2745 | ||
84d6d5eb | 2746 | flow = dp_netdev_flow_cast(rules[i]); |
65f13b50 AW |
2747 | emc_insert(flow_cache, mfs[i], dpif_packet_get_dp_hash(packet), |
2748 | flow); | |
9bbf1c3d DDP |
2749 | dp_netdev_queue_batches(packet, md, flow, mfs[i], batches, &n_batches, |
2750 | ARRAY_SIZE(batches)); | |
8cbf4f47 DDP |
2751 | } |
2752 | ||
84d6d5eb | 2753 | for (i = 0; i < n_batches; i++) { |
65f13b50 | 2754 | packet_batch_execute(&batches[i], pmd); |
72865317 BP |
2755 | } |
2756 | } | |
2757 | ||
adcf00ba | 2758 | static void |
65f13b50 | 2759 | dp_netdev_input(struct dp_netdev_pmd_thread *pmd, |
9bbf1c3d DDP |
2760 | struct dpif_packet **packets, int cnt, struct pkt_metadata *md) |
2761 | { | |
1a0d5831 | 2762 | #if !defined(__CHECKER__) && !defined(_WIN32) |
9bbf1c3d DDP |
2763 | const size_t PKT_ARRAY_SIZE = cnt; |
2764 | #else | |
1a0d5831 | 2765 | /* Sparse or MSVC doesn't like variable length array. */ |
9bbf1c3d DDP |
2766 | enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH }; |
2767 | #endif | |
2768 | struct netdev_flow_key keys[PKT_ARRAY_SIZE]; | |
2769 | size_t newcnt; | |
2770 | ||
65f13b50 | 2771 | newcnt = emc_processing(pmd, packets, cnt, md, keys); |
9bbf1c3d | 2772 | if (OVS_UNLIKELY(newcnt)) { |
65f13b50 | 2773 | fast_path_processing(pmd, packets, newcnt, md, keys); |
9bbf1c3d DDP |
2774 | } |
2775 | } | |
2776 | ||
9080a111 | 2777 | struct dp_netdev_execute_aux { |
65f13b50 | 2778 | struct dp_netdev_pmd_thread *pmd; |
9080a111 JR |
2779 | }; |
2780 | ||
6b31e073 | 2781 | static void |
623540e4 EJ |
2782 | dpif_netdev_register_upcall_cb(struct dpif *dpif, upcall_callback *cb, |
2783 | void *aux) | |
6b31e073 RW |
2784 | { |
2785 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
623540e4 | 2786 | dp->upcall_aux = aux; |
6b31e073 RW |
2787 | dp->upcall_cb = cb; |
2788 | } | |
2789 | ||
ac8c2081 DDP |
2790 | static void |
2791 | dp_netdev_drop_packets(struct dpif_packet ** packets, int cnt, bool may_steal) | |
2792 | { | |
2793 | int i; | |
2794 | ||
2795 | if (may_steal) { | |
2796 | for (i = 0; i < cnt; i++) { | |
2797 | dpif_packet_delete(packets[i]); | |
2798 | } | |
2799 | } | |
2800 | } | |
2801 | ||
9080a111 | 2802 | static void |
8cbf4f47 | 2803 | dp_execute_cb(void *aux_, struct dpif_packet **packets, int cnt, |
572f732a | 2804 | struct pkt_metadata *md, |
09f9da0b | 2805 | const struct nlattr *a, bool may_steal) |
8a4e3a85 | 2806 | OVS_NO_THREAD_SAFETY_ANALYSIS |
9080a111 JR |
2807 | { |
2808 | struct dp_netdev_execute_aux *aux = aux_; | |
623540e4 | 2809 | uint32_t *depth = recirc_depth_get(); |
65f13b50 AW |
2810 | struct dp_netdev_pmd_thread *pmd= aux->pmd; |
2811 | struct dp_netdev *dp= pmd->dp; | |
09f9da0b | 2812 | int type = nl_attr_type(a); |
8a4e3a85 | 2813 | struct dp_netdev_port *p; |
8cbf4f47 | 2814 | int i; |
9080a111 | 2815 | |
09f9da0b JR |
2816 | switch ((enum ovs_action_attr)type) { |
2817 | case OVS_ACTION_ATTR_OUTPUT: | |
623540e4 | 2818 | p = dp_netdev_lookup_port(dp, u32_to_odp(nl_attr_get_u32(a))); |
26a5075b | 2819 | if (OVS_LIKELY(p)) { |
65f13b50 | 2820 | netdev_send(p->netdev, pmd->core_id, packets, cnt, may_steal); |
ac8c2081 | 2821 | return; |
8a4e3a85 | 2822 | } |
09f9da0b JR |
2823 | break; |
2824 | ||
623540e4 EJ |
2825 | case OVS_ACTION_ATTR_USERSPACE: |
2826 | if (!fat_rwlock_tryrdlock(&dp->upcall_rwlock)) { | |
2827 | const struct nlattr *userdata; | |
2828 | struct ofpbuf actions; | |
2829 | struct flow flow; | |
4fc65926 | 2830 | |
623540e4 EJ |
2831 | userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA); |
2832 | ofpbuf_init(&actions, 0); | |
8cbf4f47 | 2833 | |
623540e4 EJ |
2834 | for (i = 0; i < cnt; i++) { |
2835 | int error; | |
2836 | ||
2837 | ofpbuf_clear(&actions); | |
2838 | ||
2839 | flow_extract(&packets[i]->ofpbuf, md, &flow); | |
2840 | error = dp_netdev_upcall(dp, packets[i], &flow, NULL, | |
2841 | DPIF_UC_ACTION, userdata, &actions, | |
2842 | NULL); | |
2843 | if (!error || error == ENOSPC) { | |
ac8c2081 DDP |
2844 | dp_netdev_execute_actions(pmd, &packets[i], 1, may_steal, |
2845 | md, ofpbuf_data(&actions), | |
623540e4 | 2846 | ofpbuf_size(&actions)); |
ac8c2081 | 2847 | } else if (may_steal) { |
623540e4 EJ |
2848 | dpif_packet_delete(packets[i]); |
2849 | } | |
db73f716 | 2850 | } |
623540e4 EJ |
2851 | ofpbuf_uninit(&actions); |
2852 | fat_rwlock_unlock(&dp->upcall_rwlock); | |
6b31e073 | 2853 | |
ac8c2081 DDP |
2854 | return; |
2855 | } | |
09f9da0b | 2856 | break; |
572f732a | 2857 | |
347bf289 AZ |
2858 | case OVS_ACTION_ATTR_HASH: { |
2859 | const struct ovs_action_hash *hash_act; | |
2860 | uint32_t hash; | |
2861 | ||
2862 | hash_act = nl_attr_get(a); | |
8cbf4f47 | 2863 | |
8cbf4f47 DDP |
2864 | for (i = 0; i < cnt; i++) { |
2865 | ||
8cbf4f47 DDP |
2866 | if (hash_act->hash_alg == OVS_HASH_ALG_L4) { |
2867 | /* Hash need not be symmetric, nor does it need to include | |
2868 | * L2 fields. */ | |
9bbf1c3d DDP |
2869 | hash = hash_2words(dpif_packet_get_dp_hash(packets[i]), |
2870 | hash_act->hash_basis); | |
8cbf4f47 DDP |
2871 | } else { |
2872 | VLOG_WARN("Unknown hash algorithm specified " | |
2873 | "for the hash action."); | |
2874 | hash = 2; | |
2875 | } | |
2876 | ||
347bf289 AZ |
2877 | if (!hash) { |
2878 | hash = 1; /* 0 is not valid */ | |
2879 | } | |
2880 | ||
8cbf4f47 DDP |
2881 | if (i == 0) { |
2882 | md->dp_hash = hash; | |
2883 | } | |
9bbf1c3d | 2884 | dpif_packet_set_dp_hash(packets[i], hash); |
347bf289 | 2885 | } |
ac8c2081 | 2886 | return; |
347bf289 AZ |
2887 | } |
2888 | ||
adcf00ba AZ |
2889 | case OVS_ACTION_ATTR_RECIRC: |
2890 | if (*depth < MAX_RECIRC_DEPTH) { | |
572f732a | 2891 | |
adcf00ba | 2892 | (*depth)++; |
8cbf4f47 DDP |
2893 | for (i = 0; i < cnt; i++) { |
2894 | struct dpif_packet *recirc_pkt; | |
2895 | struct pkt_metadata recirc_md = *md; | |
2896 | ||
2897 | recirc_pkt = (may_steal) ? packets[i] | |
2898 | : dpif_packet_clone(packets[i]); | |
2899 | ||
2900 | recirc_md.recirc_id = nl_attr_get_u32(a); | |
2901 | ||
2902 | /* Hash is private to each packet */ | |
61a2647e | 2903 | recirc_md.dp_hash = dpif_packet_get_dp_hash(packets[i]); |
8cbf4f47 | 2904 | |
65f13b50 | 2905 | dp_netdev_input(pmd, &recirc_pkt, 1, |
9bbf1c3d | 2906 | &recirc_md); |
8cbf4f47 | 2907 | } |
adcf00ba AZ |
2908 | (*depth)--; |
2909 | ||
ac8c2081 | 2910 | return; |
adcf00ba | 2911 | } |
ac8c2081 DDP |
2912 | |
2913 | VLOG_WARN("Packet dropped. Max recirculation depth exceeded."); | |
572f732a | 2914 | break; |
572f732a | 2915 | |
09f9da0b JR |
2916 | case OVS_ACTION_ATTR_PUSH_VLAN: |
2917 | case OVS_ACTION_ATTR_POP_VLAN: | |
2918 | case OVS_ACTION_ATTR_PUSH_MPLS: | |
2919 | case OVS_ACTION_ATTR_POP_MPLS: | |
2920 | case OVS_ACTION_ATTR_SET: | |
6d670e7f | 2921 | case OVS_ACTION_ATTR_SET_MASKED: |
09f9da0b JR |
2922 | case OVS_ACTION_ATTR_SAMPLE: |
2923 | case OVS_ACTION_ATTR_UNSPEC: | |
2924 | case __OVS_ACTION_ATTR_MAX: | |
2925 | OVS_NOT_REACHED(); | |
da546e07 | 2926 | } |
ac8c2081 DDP |
2927 | |
2928 | dp_netdev_drop_packets(packets, cnt, may_steal); | |
98403001 BP |
2929 | } |
2930 | ||
4edb9ae9 | 2931 | static void |
65f13b50 | 2932 | dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, |
8cbf4f47 DDP |
2933 | struct dpif_packet **packets, int cnt, |
2934 | bool may_steal, struct pkt_metadata *md, | |
9080a111 | 2935 | const struct nlattr *actions, size_t actions_len) |
72865317 | 2936 | { |
65f13b50 | 2937 | struct dp_netdev_execute_aux aux = {pmd}; |
9080a111 | 2938 | |
8cbf4f47 DDP |
2939 | odp_execute_actions(&aux, packets, cnt, may_steal, md, actions, |
2940 | actions_len, dp_execute_cb); | |
72865317 BP |
2941 | } |
2942 | ||
2943 | const struct dpif_class dpif_netdev_class = { | |
72865317 | 2944 | "netdev", |
2197d7ab | 2945 | dpif_netdev_enumerate, |
0aeaabc8 | 2946 | dpif_netdev_port_open_type, |
72865317 BP |
2947 | dpif_netdev_open, |
2948 | dpif_netdev_close, | |
7dab847a | 2949 | dpif_netdev_destroy, |
e4cfed38 PS |
2950 | dpif_netdev_run, |
2951 | dpif_netdev_wait, | |
72865317 | 2952 | dpif_netdev_get_stats, |
72865317 BP |
2953 | dpif_netdev_port_add, |
2954 | dpif_netdev_port_del, | |
2955 | dpif_netdev_port_query_by_number, | |
2956 | dpif_netdev_port_query_by_name, | |
98403001 | 2957 | NULL, /* port_get_pid */ |
b0ec0f27 BP |
2958 | dpif_netdev_port_dump_start, |
2959 | dpif_netdev_port_dump_next, | |
2960 | dpif_netdev_port_dump_done, | |
72865317 BP |
2961 | dpif_netdev_port_poll, |
2962 | dpif_netdev_port_poll_wait, | |
72865317 | 2963 | dpif_netdev_flow_flush, |
ac64794a BP |
2964 | dpif_netdev_flow_dump_create, |
2965 | dpif_netdev_flow_dump_destroy, | |
2966 | dpif_netdev_flow_dump_thread_create, | |
2967 | dpif_netdev_flow_dump_thread_destroy, | |
704a1e09 | 2968 | dpif_netdev_flow_dump_next, |
1a0c894a | 2969 | dpif_netdev_operate, |
6b31e073 RW |
2970 | NULL, /* recv_set */ |
2971 | NULL, /* handlers_set */ | |
f2eee189 | 2972 | dpif_netdev_pmd_set, |
5bf93d67 | 2973 | dpif_netdev_queue_to_priority, |
6b31e073 RW |
2974 | NULL, /* recv */ |
2975 | NULL, /* recv_wait */ | |
2976 | NULL, /* recv_purge */ | |
2977 | dpif_netdev_register_upcall_cb, | |
2978 | dpif_netdev_enable_upcall, | |
2979 | dpif_netdev_disable_upcall, | |
72865317 | 2980 | }; |
614c4892 | 2981 | |
74cc3969 BP |
2982 | static void |
2983 | dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED, | |
2984 | const char *argv[], void *aux OVS_UNUSED) | |
2985 | { | |
59e6d833 BP |
2986 | struct dp_netdev_port *old_port; |
2987 | struct dp_netdev_port *new_port; | |
74cc3969 | 2988 | struct dp_netdev *dp; |
ff073a71 | 2989 | odp_port_t port_no; |
74cc3969 | 2990 | |
8a4e3a85 | 2991 | ovs_mutex_lock(&dp_netdev_mutex); |
74cc3969 BP |
2992 | dp = shash_find_data(&dp_netdevs, argv[1]); |
2993 | if (!dp || !dpif_netdev_class_is_dummy(dp->class)) { | |
8a4e3a85 | 2994 | ovs_mutex_unlock(&dp_netdev_mutex); |
74cc3969 BP |
2995 | unixctl_command_reply_error(conn, "unknown datapath or not a dummy"); |
2996 | return; | |
2997 | } | |
8a4e3a85 BP |
2998 | ovs_refcount_ref(&dp->ref_cnt); |
2999 | ovs_mutex_unlock(&dp_netdev_mutex); | |
74cc3969 | 3000 | |
59e6d833 BP |
3001 | ovs_mutex_lock(&dp->port_mutex); |
3002 | if (get_port_by_name(dp, argv[2], &old_port)) { | |
74cc3969 | 3003 | unixctl_command_reply_error(conn, "unknown port"); |
8a4e3a85 | 3004 | goto exit; |
74cc3969 BP |
3005 | } |
3006 | ||
ff073a71 BP |
3007 | port_no = u32_to_odp(atoi(argv[3])); |
3008 | if (!port_no || port_no == ODPP_NONE) { | |
74cc3969 | 3009 | unixctl_command_reply_error(conn, "bad port number"); |
8a4e3a85 | 3010 | goto exit; |
74cc3969 | 3011 | } |
ff073a71 | 3012 | if (dp_netdev_lookup_port(dp, port_no)) { |
74cc3969 | 3013 | unixctl_command_reply_error(conn, "port number already in use"); |
8a4e3a85 | 3014 | goto exit; |
74cc3969 | 3015 | } |
59e6d833 BP |
3016 | |
3017 | /* Remove old port. */ | |
3018 | cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->port_no)); | |
3019 | ovsrcu_postpone(free, old_port); | |
3020 | ||
3021 | /* Insert new port (cmap semantics mean we cannot re-insert 'old_port'). */ | |
3022 | new_port = xmemdup(old_port, sizeof *old_port); | |
3023 | new_port->port_no = port_no; | |
3024 | cmap_insert(&dp->ports, &new_port->node, hash_port_no(port_no)); | |
3025 | ||
d33ed218 | 3026 | seq_change(dp->port_seq); |
74cc3969 | 3027 | unixctl_command_reply(conn, NULL); |
8a4e3a85 BP |
3028 | |
3029 | exit: | |
59e6d833 | 3030 | ovs_mutex_unlock(&dp->port_mutex); |
8a4e3a85 | 3031 | dp_netdev_unref(dp); |
74cc3969 BP |
3032 | } |
3033 | ||
c40b890f BP |
3034 | static void |
3035 | dpif_dummy_delete_port(struct unixctl_conn *conn, int argc OVS_UNUSED, | |
3036 | const char *argv[], void *aux OVS_UNUSED) | |
3037 | { | |
3038 | struct dp_netdev_port *port; | |
3039 | struct dp_netdev *dp; | |
3040 | ||
3041 | ovs_mutex_lock(&dp_netdev_mutex); | |
3042 | dp = shash_find_data(&dp_netdevs, argv[1]); | |
3043 | if (!dp || !dpif_netdev_class_is_dummy(dp->class)) { | |
3044 | ovs_mutex_unlock(&dp_netdev_mutex); | |
3045 | unixctl_command_reply_error(conn, "unknown datapath or not a dummy"); | |
3046 | return; | |
3047 | } | |
3048 | ovs_refcount_ref(&dp->ref_cnt); | |
3049 | ovs_mutex_unlock(&dp_netdev_mutex); | |
3050 | ||
3051 | ovs_mutex_lock(&dp->port_mutex); | |
3052 | if (get_port_by_name(dp, argv[2], &port)) { | |
3053 | unixctl_command_reply_error(conn, "unknown port"); | |
3054 | } else if (port->port_no == ODPP_LOCAL) { | |
3055 | unixctl_command_reply_error(conn, "can't delete local port"); | |
3056 | } else { | |
3057 | do_del_port(dp, port); | |
3058 | unixctl_command_reply(conn, NULL); | |
3059 | } | |
3060 | ovs_mutex_unlock(&dp->port_mutex); | |
3061 | ||
3062 | dp_netdev_unref(dp); | |
3063 | } | |
3064 | ||
0cbfe35d BP |
3065 | static void |
3066 | dpif_dummy_register__(const char *type) | |
3067 | { | |
3068 | struct dpif_class *class; | |
3069 | ||
3070 | class = xmalloc(sizeof *class); | |
3071 | *class = dpif_netdev_class; | |
3072 | class->type = xstrdup(type); | |
3073 | dp_register_provider(class); | |
3074 | } | |
3075 | ||
614c4892 | 3076 | void |
0cbfe35d | 3077 | dpif_dummy_register(bool override) |
614c4892 | 3078 | { |
0cbfe35d BP |
3079 | if (override) { |
3080 | struct sset types; | |
3081 | const char *type; | |
3082 | ||
3083 | sset_init(&types); | |
3084 | dp_enumerate_types(&types); | |
3085 | SSET_FOR_EACH (type, &types) { | |
3086 | if (!dp_unregister_provider(type)) { | |
3087 | dpif_dummy_register__(type); | |
3088 | } | |
3089 | } | |
3090 | sset_destroy(&types); | |
614c4892 | 3091 | } |
0cbfe35d BP |
3092 | |
3093 | dpif_dummy_register__("dummy"); | |
74cc3969 BP |
3094 | |
3095 | unixctl_command_register("dpif-dummy/change-port-number", | |
74467d5c | 3096 | "dp port new-number", |
74cc3969 | 3097 | 3, 3, dpif_dummy_change_port_number, NULL); |
74467d5c | 3098 | unixctl_command_register("dpif-dummy/delete-port", "dp port", |
c40b890f | 3099 | 2, 2, dpif_dummy_delete_port, NULL); |
614c4892 | 3100 | } |