]>
Commit | Line | Data |
---|---|---|
72865317 | 1 | /* |
ff073a71 | 2 | * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. |
72865317 BP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
db73f716 | 18 | #include "dpif-netdev.h" |
72865317 | 19 | |
72865317 BP |
20 | #include <ctype.h> |
21 | #include <errno.h> | |
22 | #include <fcntl.h> | |
23 | #include <inttypes.h> | |
72865317 | 24 | #include <netinet/in.h> |
9d82ec47 | 25 | #include <sys/socket.h> |
7f3adc00 | 26 | #include <net/if.h> |
cdee00fd | 27 | #include <stdint.h> |
72865317 BP |
28 | #include <stdlib.h> |
29 | #include <string.h> | |
30 | #include <sys/ioctl.h> | |
31 | #include <sys/stat.h> | |
72865317 BP |
32 | #include <unistd.h> |
33 | ||
2c0ea78f | 34 | #include "classifier.h" |
59e6d833 | 35 | #include "cmap.h" |
72865317 | 36 | #include "csum.h" |
614c4892 | 37 | #include "dpif.h" |
72865317 | 38 | #include "dpif-provider.h" |
614c4892 | 39 | #include "dummy.h" |
36956a7d | 40 | #include "dynamic-string.h" |
afae68b1 | 41 | #include "fat-rwlock.h" |
72865317 | 42 | #include "flow.h" |
9f361d6b | 43 | #include "cmap.h" |
6c3eee82 | 44 | #include "latch.h" |
72865317 | 45 | #include "list.h" |
8c301900 | 46 | #include "meta-flow.h" |
72865317 | 47 | #include "netdev.h" |
8617afff | 48 | #include "netdev-dpdk.h" |
de281153 | 49 | #include "netdev-vport.h" |
cdee00fd | 50 | #include "netlink.h" |
f094af7b | 51 | #include "odp-execute.h" |
72865317 BP |
52 | #include "odp-util.h" |
53 | #include "ofp-print.h" | |
54 | #include "ofpbuf.h" | |
5a034064 | 55 | #include "ovs-numa.h" |
61e7deb1 | 56 | #include "ovs-rcu.h" |
91088554 | 57 | #include "packet-dpif.h" |
72865317 BP |
58 | #include "packets.h" |
59 | #include "poll-loop.h" | |
26c6b6cd | 60 | #include "random.h" |
d33ed218 | 61 | #include "seq.h" |
462278db | 62 | #include "shash.h" |
0cbfe35d | 63 | #include "sset.h" |
72865317 | 64 | #include "timeval.h" |
74cc3969 | 65 | #include "unixctl.h" |
72865317 | 66 | #include "util.h" |
72865317 | 67 | #include "vlog.h" |
5136ce49 | 68 | |
d98e6007 | 69 | VLOG_DEFINE_THIS_MODULE(dpif_netdev); |
72865317 | 70 | |
2c0ea78f GS |
71 | /* By default, choose a priority in the middle. */ |
72 | #define NETDEV_RULE_PRIORITY 0x8000 | |
73 | ||
8bb113da | 74 | #define FLOW_DUMP_MAX_BATCH 50 |
adcf00ba AZ |
75 | /* Use per thread recirc_depth to prevent recirculation loop. */ |
76 | #define MAX_RECIRC_DEPTH 5 | |
77 | DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0) | |
e4cfed38 | 78 | |
72865317 | 79 | /* Configuration parameters. */ |
72865317 BP |
80 | enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ |
81 | ||
8a4e3a85 BP |
82 | /* Protects against changes to 'dp_netdevs'. */ |
83 | static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER; | |
84 | ||
85 | /* Contains all 'struct dp_netdev's. */ | |
86 | static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex) | |
87 | = SHASH_INITIALIZER(&dp_netdevs); | |
88 | ||
623540e4 | 89 | static struct vlog_rate_limit upcall_rl = VLOG_RATE_LIMIT_INIT(600, 600); |
6b31e073 | 90 | |
9bbf1c3d DDP |
91 | /* Stores a miniflow */ |
92 | ||
93 | /* There are fields in the flow structure that we never use. Therefore we can | |
94 | * save a few words of memory */ | |
95 | #define NETDEV_KEY_BUF_SIZE_U32 (FLOW_U32S \ | |
96 | - MINI_N_INLINE \ | |
97 | - FLOW_U32_SIZE(regs) \ | |
98 | - FLOW_U32_SIZE(metadata) \ | |
99 | ) | |
100 | struct netdev_flow_key { | |
101 | struct miniflow flow; | |
102 | uint32_t buf[NETDEV_KEY_BUF_SIZE_U32]; | |
103 | }; | |
104 | ||
105 | /* Exact match cache for frequently used flows | |
106 | * | |
107 | * The cache uses a 32-bit hash of the packet (which can be the RSS hash) to | |
108 | * search its entries for a miniflow that matches exactly the miniflow of the | |
109 | * packet. It stores the 'cls_rule'(rule) that matches the miniflow. | |
110 | * | |
111 | * A cache entry holds a reference to its 'dp_netdev_flow'. | |
112 | * | |
113 | * A miniflow with a given hash can be in one of EM_FLOW_HASH_SEGS different | |
114 | * entries. The 32-bit hash is split into EM_FLOW_HASH_SEGS values (each of | |
115 | * them is EM_FLOW_HASH_SHIFT bits wide and the remainder is thrown away). Each | |
116 | * value is the index of a cache entry where the miniflow could be. | |
117 | * | |
118 | * | |
119 | * Thread-safety | |
120 | * ============= | |
121 | * | |
122 | * Each pmd_thread has its own private exact match cache. | |
123 | * If dp_netdev_input is not called from a pmd thread, a mutex is used. | |
124 | */ | |
125 | ||
126 | #define EM_FLOW_HASH_SHIFT 10 | |
127 | #define EM_FLOW_HASH_ENTRIES (1u << EM_FLOW_HASH_SHIFT) | |
128 | #define EM_FLOW_HASH_MASK (EM_FLOW_HASH_ENTRIES - 1) | |
129 | #define EM_FLOW_HASH_SEGS 2 | |
130 | ||
131 | struct emc_entry { | |
132 | uint32_t hash; | |
133 | struct netdev_flow_key mf; | |
134 | struct dp_netdev_flow *flow; | |
135 | }; | |
136 | ||
137 | struct emc_cache { | |
138 | struct emc_entry entries[EM_FLOW_HASH_ENTRIES]; | |
139 | }; | |
140 | ||
141 | /* Iterate in the exact match cache through every entry that might contain a | |
142 | * miniflow with hash 'HASH'. */ | |
143 | #define EMC_FOR_EACH_POS_WITH_HASH(EMC, CURRENT_ENTRY, HASH) \ | |
144 | for (uint32_t i__ = 0, srch_hash__ = (HASH); \ | |
145 | (CURRENT_ENTRY) = &(EMC)->entries[srch_hash__ & EM_FLOW_HASH_MASK], \ | |
146 | i__ < EM_FLOW_HASH_SEGS; \ | |
147 | i__++, srch_hash__ >>= EM_FLOW_HASH_SHIFT) | |
148 | ||
8a4e3a85 BP |
149 | /* Datapath based on the network device interface from netdev.h. |
150 | * | |
151 | * | |
152 | * Thread-safety | |
153 | * ============= | |
154 | * | |
155 | * Some members, marked 'const', are immutable. Accessing other members | |
156 | * requires synchronization, as noted in more detail below. | |
157 | * | |
158 | * Acquisition order is, from outermost to innermost: | |
159 | * | |
160 | * dp_netdev_mutex (global) | |
59e6d833 | 161 | * port_mutex |
9bbf1c3d | 162 | * emc_mutex |
8a4e3a85 | 163 | * flow_mutex |
8a4e3a85 | 164 | */ |
72865317 | 165 | struct dp_netdev { |
8a4e3a85 BP |
166 | const struct dpif_class *const class; |
167 | const char *const name; | |
6b31e073 | 168 | struct dpif *dpif; |
6a8267c5 BP |
169 | struct ovs_refcount ref_cnt; |
170 | atomic_flag destroyed; | |
72865317 | 171 | |
8a4e3a85 BP |
172 | /* Flows. |
173 | * | |
afae68b1 JR |
174 | * Writers of 'flow_table' must take the 'flow_mutex'. Corresponding |
175 | * changes to 'cls' must be made while still holding the 'flow_mutex'. | |
8a4e3a85 BP |
176 | */ |
177 | struct ovs_mutex flow_mutex; | |
afae68b1 | 178 | struct classifier cls; |
9f361d6b | 179 | struct cmap flow_table OVS_GUARDED; /* Flow table. */ |
8a4e3a85 | 180 | |
8a4e3a85 BP |
181 | /* Statistics. |
182 | * | |
51852a57 BP |
183 | * ovsthread_stats is internally synchronized. */ |
184 | struct ovsthread_stats stats; /* Contains 'struct dp_netdev_stats *'. */ | |
72865317 | 185 | |
8a4e3a85 BP |
186 | /* Ports. |
187 | * | |
59e6d833 BP |
188 | * Protected by RCU. Take the mutex to add or remove ports. */ |
189 | struct ovs_mutex port_mutex; | |
190 | struct cmap ports; | |
d33ed218 | 191 | struct seq *port_seq; /* Incremented whenever a port changes. */ |
6c3eee82 | 192 | |
6b31e073 RW |
193 | /* Protects access to ofproto-dpif-upcall interface during revalidator |
194 | * thread synchronization. */ | |
195 | struct fat_rwlock upcall_rwlock; | |
623540e4 EJ |
196 | upcall_callback *upcall_cb; /* Callback function for executing upcalls. */ |
197 | void *upcall_aux; | |
6b31e073 | 198 | |
6c3eee82 BP |
199 | /* Forwarding threads. */ |
200 | struct latch exit_latch; | |
e4cfed38 PS |
201 | struct pmd_thread *pmd_threads; |
202 | size_t n_pmd_threads; | |
203 | int pmd_count; | |
9bbf1c3d DDP |
204 | |
205 | /* Exact match cache for non-pmd devices. | |
206 | * Pmd devices use instead each thread's flow_cache for this purpose. | |
207 | * Protected by emc_mutex */ | |
208 | struct emc_cache flow_cache OVS_GUARDED; | |
209 | struct ovs_mutex emc_mutex; | |
72865317 BP |
210 | }; |
211 | ||
8a4e3a85 | 212 | static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp, |
59e6d833 | 213 | odp_port_t); |
ff073a71 | 214 | |
51852a57 BP |
215 | enum dp_stat_type { |
216 | DP_STAT_HIT, /* Packets that matched in the flow table. */ | |
217 | DP_STAT_MISS, /* Packets that did not match. */ | |
218 | DP_STAT_LOST, /* Packets not passed up to the client. */ | |
219 | DP_N_STATS | |
220 | }; | |
221 | ||
222 | /* Contained by struct dp_netdev's 'stats' member. */ | |
223 | struct dp_netdev_stats { | |
224 | struct ovs_mutex mutex; /* Protects 'n'. */ | |
225 | ||
226 | /* Indexed by DP_STAT_*, protected by 'mutex'. */ | |
227 | unsigned long long int n[DP_N_STATS] OVS_GUARDED; | |
228 | }; | |
229 | ||
230 | ||
72865317 BP |
231 | /* A port in a netdev-based datapath. */ |
232 | struct dp_netdev_port { | |
59e6d833 | 233 | struct cmap_node node; /* Node in dp_netdev's 'ports'. */ |
ff073a71 | 234 | odp_port_t port_no; |
72865317 | 235 | struct netdev *netdev; |
4b609110 | 236 | struct netdev_saved_flags *sf; |
55c955bd | 237 | struct netdev_rxq **rxq; |
b284085e | 238 | struct ovs_refcount ref_cnt; |
0cbfe35d | 239 | char *type; /* Port type as requested by user. */ |
72865317 BP |
240 | }; |
241 | ||
8a4e3a85 BP |
242 | /* A flow in dp_netdev's 'flow_table'. |
243 | * | |
244 | * | |
245 | * Thread-safety | |
246 | * ============= | |
247 | * | |
248 | * Except near the beginning or ending of its lifespan, rule 'rule' belongs to | |
249 | * its dp_netdev's classifier. The text below calls this classifier 'cls'. | |
250 | * | |
251 | * Motivation | |
252 | * ---------- | |
253 | * | |
254 | * The thread safety rules described here for "struct dp_netdev_flow" are | |
255 | * motivated by two goals: | |
256 | * | |
257 | * - Prevent threads that read members of "struct dp_netdev_flow" from | |
258 | * reading bad data due to changes by some thread concurrently modifying | |
259 | * those members. | |
260 | * | |
261 | * - Prevent two threads making changes to members of a given "struct | |
262 | * dp_netdev_flow" from interfering with each other. | |
263 | * | |
264 | * | |
265 | * Rules | |
266 | * ----- | |
267 | * | |
ed79f89a DDP |
268 | * A flow 'flow' may be accessed without a risk of being freed during an RCU |
269 | * grace period. Code that needs to hold onto a flow for a while | |
270 | * should try incrementing 'flow->ref_cnt' with dp_netdev_flow_ref(). | |
8a4e3a85 BP |
271 | * |
272 | * 'flow->ref_cnt' protects 'flow' from being freed. It doesn't protect the | |
ed79f89a DDP |
273 | * flow from being deleted from 'cls' and it doesn't protect members of 'flow' |
274 | * from modification. | |
8a4e3a85 BP |
275 | * |
276 | * Some members, marked 'const', are immutable. Accessing other members | |
277 | * requires synchronization, as noted in more detail below. | |
278 | */ | |
72865317 | 279 | struct dp_netdev_flow { |
9bbf1c3d | 280 | bool dead; |
2c0ea78f | 281 | /* Packet classification. */ |
8a4e3a85 | 282 | const struct cls_rule cr; /* In owning dp_netdev's 'cls'. */ |
2c0ea78f | 283 | |
8a4e3a85 | 284 | /* Hash table index by unmasked flow. */ |
9f361d6b | 285 | const struct cmap_node node; /* In owning dp_netdev's 'flow_table'. */ |
8a4e3a85 | 286 | const struct flow flow; /* The flow that created this entry. */ |
72865317 | 287 | |
ed79f89a DDP |
288 | /* Number of references. |
289 | * The classifier owns one reference. | |
290 | * Any thread trying to keep a rule from being freed should hold its own | |
291 | * reference. */ | |
292 | struct ovs_refcount ref_cnt; | |
293 | ||
8a4e3a85 BP |
294 | /* Statistics. |
295 | * | |
296 | * Reading or writing these members requires 'mutex'. */ | |
679ba04c | 297 | struct ovsthread_stats stats; /* Contains "struct dp_netdev_flow_stats". */ |
8a4e3a85 | 298 | |
45c626a3 | 299 | /* Actions. */ |
61e7deb1 | 300 | OVSRCU_TYPE(struct dp_netdev_actions *) actions; |
72865317 BP |
301 | }; |
302 | ||
ed79f89a | 303 | static void dp_netdev_flow_unref(struct dp_netdev_flow *); |
9bbf1c3d | 304 | static bool dp_netdev_flow_ref(struct dp_netdev_flow *); |
8a4e3a85 | 305 | |
679ba04c BP |
306 | /* Contained by struct dp_netdev_flow's 'stats' member. */ |
307 | struct dp_netdev_flow_stats { | |
308 | struct ovs_mutex mutex; /* Guards all the other members. */ | |
309 | ||
310 | long long int used OVS_GUARDED; /* Last used time, in monotonic msecs. */ | |
311 | long long int packet_count OVS_GUARDED; /* Number of packets matched. */ | |
312 | long long int byte_count OVS_GUARDED; /* Number of bytes matched. */ | |
313 | uint16_t tcp_flags OVS_GUARDED; /* Bitwise-OR of seen tcp_flags values. */ | |
314 | }; | |
315 | ||
a84cb64a BP |
316 | /* A set of datapath actions within a "struct dp_netdev_flow". |
317 | * | |
318 | * | |
319 | * Thread-safety | |
320 | * ============= | |
321 | * | |
45c626a3 | 322 | * A struct dp_netdev_actions 'actions' is protected with RCU. */ |
a84cb64a | 323 | struct dp_netdev_actions { |
a84cb64a BP |
324 | /* These members are immutable: they do not change during the struct's |
325 | * lifetime. */ | |
326 | struct nlattr *actions; /* Sequence of OVS_ACTION_ATTR_* attributes. */ | |
327 | unsigned int size; /* Size of 'actions', in bytes. */ | |
328 | }; | |
329 | ||
330 | struct dp_netdev_actions *dp_netdev_actions_create(const struct nlattr *, | |
331 | size_t); | |
61e7deb1 BP |
332 | struct dp_netdev_actions *dp_netdev_flow_get_actions( |
333 | const struct dp_netdev_flow *); | |
334 | static void dp_netdev_actions_free(struct dp_netdev_actions *); | |
a84cb64a | 335 | |
e4cfed38 PS |
336 | /* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate |
337 | * the performance overhead of interrupt processing. Therefore netdev can | |
338 | * not implement rx-wait for these devices. dpif-netdev needs to poll | |
339 | * these device to check for recv buffer. pmd-thread does polling for | |
340 | * devices assigned to itself thread. | |
341 | * | |
342 | * DPDK used PMD for accessing NIC. | |
343 | * | |
344 | * A thread that receives packets from PMD ports, looks them up in the flow | |
345 | * table, and executes the actions it finds. | |
346 | **/ | |
347 | struct pmd_thread { | |
6c3eee82 | 348 | struct dp_netdev *dp; |
9bbf1c3d | 349 | struct emc_cache flow_cache; |
6c3eee82 | 350 | pthread_t thread; |
e4cfed38 PS |
351 | int id; |
352 | atomic_uint change_seq; | |
6c3eee82 BP |
353 | }; |
354 | ||
84067a4c JR |
355 | #define PMD_INITIAL_SEQ 1 |
356 | ||
72865317 BP |
357 | /* Interface to netdev-based datapath. */ |
358 | struct dpif_netdev { | |
359 | struct dpif dpif; | |
360 | struct dp_netdev *dp; | |
d33ed218 | 361 | uint64_t last_port_seq; |
72865317 BP |
362 | }; |
363 | ||
8a4e3a85 | 364 | static int get_port_by_number(struct dp_netdev *dp, odp_port_t port_no, |
59e6d833 | 365 | struct dp_netdev_port **portp); |
8a4e3a85 | 366 | static int get_port_by_name(struct dp_netdev *dp, const char *devname, |
59e6d833 | 367 | struct dp_netdev_port **portp); |
8a4e3a85 BP |
368 | static void dp_netdev_free(struct dp_netdev *) |
369 | OVS_REQUIRES(dp_netdev_mutex); | |
72865317 | 370 | static void dp_netdev_flow_flush(struct dp_netdev *); |
8a4e3a85 BP |
371 | static int do_add_port(struct dp_netdev *dp, const char *devname, |
372 | const char *type, odp_port_t port_no) | |
59e6d833 | 373 | OVS_REQUIRES(dp->port_mutex); |
c40b890f | 374 | static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *) |
59e6d833 | 375 | OVS_REQUIRES(dp->port_mutex); |
614c4892 BP |
376 | static int dpif_netdev_open(const struct dpif_class *, const char *name, |
377 | bool create, struct dpif **); | |
8a4e3a85 | 378 | static void dp_netdev_execute_actions(struct dp_netdev *dp, |
8cbf4f47 DDP |
379 | struct dpif_packet **, int c, |
380 | bool may_steal, struct pkt_metadata *, | |
9bbf1c3d | 381 | struct emc_cache *flow_cache, |
4edb9ae9 | 382 | const struct nlattr *actions, |
e4cfed38 | 383 | size_t actions_len); |
3c33f0ff JR |
384 | static void dp_netdev_input(struct dp_netdev *, struct emc_cache *, |
385 | struct dpif_packet **, int cnt, | |
386 | struct pkt_metadata *); | |
e4cfed38 PS |
387 | |
388 | static void dp_netdev_set_pmd_threads(struct dp_netdev *, int n); | |
6b31e073 | 389 | static void dp_netdev_disable_upcall(struct dp_netdev *); |
72865317 | 390 | |
9bbf1c3d DDP |
391 | static void emc_clear_entry(struct emc_entry *ce); |
392 | ||
393 | static void | |
394 | emc_cache_init(struct emc_cache *flow_cache) | |
395 | { | |
396 | int i; | |
397 | ||
398 | for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) { | |
399 | flow_cache->entries[i].flow = NULL; | |
400 | flow_cache->entries[i].hash = 0; | |
401 | miniflow_initialize(&flow_cache->entries[i].mf.flow, | |
402 | flow_cache->entries[i].mf.buf); | |
403 | } | |
404 | } | |
405 | ||
406 | static void | |
407 | emc_cache_uninit(struct emc_cache *flow_cache) | |
408 | { | |
409 | int i; | |
410 | ||
411 | for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) { | |
412 | emc_clear_entry(&flow_cache->entries[i]); | |
413 | } | |
414 | } | |
415 | ||
72865317 BP |
416 | static struct dpif_netdev * |
417 | dpif_netdev_cast(const struct dpif *dpif) | |
418 | { | |
cb22974d | 419 | ovs_assert(dpif->dpif_class->open == dpif_netdev_open); |
72865317 BP |
420 | return CONTAINER_OF(dpif, struct dpif_netdev, dpif); |
421 | } | |
422 | ||
423 | static struct dp_netdev * | |
424 | get_dp_netdev(const struct dpif *dpif) | |
425 | { | |
426 | return dpif_netdev_cast(dpif)->dp; | |
427 | } | |
428 | ||
2197d7ab | 429 | static int |
2240af25 DDP |
430 | dpif_netdev_enumerate(struct sset *all_dps, |
431 | const struct dpif_class *dpif_class) | |
2197d7ab GL |
432 | { |
433 | struct shash_node *node; | |
434 | ||
97be1538 | 435 | ovs_mutex_lock(&dp_netdev_mutex); |
2197d7ab | 436 | SHASH_FOR_EACH(node, &dp_netdevs) { |
2240af25 DDP |
437 | struct dp_netdev *dp = node->data; |
438 | if (dpif_class != dp->class) { | |
439 | /* 'dp_netdevs' contains both "netdev" and "dummy" dpifs. | |
440 | * If the class doesn't match, skip this dpif. */ | |
441 | continue; | |
442 | } | |
2197d7ab GL |
443 | sset_add(all_dps, node->name); |
444 | } | |
97be1538 | 445 | ovs_mutex_unlock(&dp_netdev_mutex); |
5279f8fd | 446 | |
2197d7ab GL |
447 | return 0; |
448 | } | |
449 | ||
add90f6f EJ |
450 | static bool |
451 | dpif_netdev_class_is_dummy(const struct dpif_class *class) | |
452 | { | |
453 | return class != &dpif_netdev_class; | |
454 | } | |
455 | ||
0aeaabc8 JP |
456 | static const char * |
457 | dpif_netdev_port_open_type(const struct dpif_class *class, const char *type) | |
458 | { | |
459 | return strcmp(type, "internal") ? type | |
add90f6f | 460 | : dpif_netdev_class_is_dummy(class) ? "dummy" |
0aeaabc8 JP |
461 | : "tap"; |
462 | } | |
463 | ||
72865317 BP |
464 | static struct dpif * |
465 | create_dpif_netdev(struct dp_netdev *dp) | |
466 | { | |
462278db | 467 | uint16_t netflow_id = hash_string(dp->name, 0); |
72865317 | 468 | struct dpif_netdev *dpif; |
72865317 | 469 | |
6a8267c5 | 470 | ovs_refcount_ref(&dp->ref_cnt); |
72865317 | 471 | |
72865317 | 472 | dpif = xmalloc(sizeof *dpif); |
614c4892 | 473 | dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id); |
72865317 | 474 | dpif->dp = dp; |
d33ed218 | 475 | dpif->last_port_seq = seq_read(dp->port_seq); |
72865317 BP |
476 | |
477 | return &dpif->dpif; | |
478 | } | |
479 | ||
4e022ec0 AW |
480 | /* Choose an unused, non-zero port number and return it on success. |
481 | * Return ODPP_NONE on failure. */ | |
482 | static odp_port_t | |
e44768b7 | 483 | choose_port(struct dp_netdev *dp, const char *name) |
59e6d833 | 484 | OVS_REQUIRES(dp->port_mutex) |
e44768b7 | 485 | { |
4e022ec0 | 486 | uint32_t port_no; |
e44768b7 JP |
487 | |
488 | if (dp->class != &dpif_netdev_class) { | |
489 | const char *p; | |
490 | int start_no = 0; | |
491 | ||
492 | /* If the port name begins with "br", start the number search at | |
493 | * 100 to make writing tests easier. */ | |
494 | if (!strncmp(name, "br", 2)) { | |
495 | start_no = 100; | |
496 | } | |
497 | ||
498 | /* If the port name contains a number, try to assign that port number. | |
499 | * This can make writing unit tests easier because port numbers are | |
500 | * predictable. */ | |
501 | for (p = name; *p != '\0'; p++) { | |
502 | if (isdigit((unsigned char) *p)) { | |
503 | port_no = start_no + strtol(p, NULL, 10); | |
ff073a71 BP |
504 | if (port_no > 0 && port_no != odp_to_u32(ODPP_NONE) |
505 | && !dp_netdev_lookup_port(dp, u32_to_odp(port_no))) { | |
4e022ec0 | 506 | return u32_to_odp(port_no); |
e44768b7 JP |
507 | } |
508 | break; | |
509 | } | |
510 | } | |
511 | } | |
512 | ||
ff073a71 BP |
513 | for (port_no = 1; port_no <= UINT16_MAX; port_no++) { |
514 | if (!dp_netdev_lookup_port(dp, u32_to_odp(port_no))) { | |
4e022ec0 | 515 | return u32_to_odp(port_no); |
e44768b7 JP |
516 | } |
517 | } | |
518 | ||
4e022ec0 | 519 | return ODPP_NONE; |
e44768b7 JP |
520 | } |
521 | ||
72865317 | 522 | static int |
614c4892 BP |
523 | create_dp_netdev(const char *name, const struct dpif_class *class, |
524 | struct dp_netdev **dpp) | |
8a4e3a85 | 525 | OVS_REQUIRES(dp_netdev_mutex) |
72865317 BP |
526 | { |
527 | struct dp_netdev *dp; | |
528 | int error; | |
72865317 | 529 | |
462278db | 530 | dp = xzalloc(sizeof *dp); |
8a4e3a85 BP |
531 | shash_add(&dp_netdevs, name, dp); |
532 | ||
533 | *CONST_CAST(const struct dpif_class **, &dp->class) = class; | |
534 | *CONST_CAST(const char **, &dp->name) = xstrdup(name); | |
6a8267c5 | 535 | ovs_refcount_init(&dp->ref_cnt); |
1a65ba85 | 536 | atomic_flag_clear(&dp->destroyed); |
8a4e3a85 BP |
537 | |
538 | ovs_mutex_init(&dp->flow_mutex); | |
539 | classifier_init(&dp->cls, NULL); | |
9f361d6b | 540 | cmap_init(&dp->flow_table); |
8a4e3a85 | 541 | |
51852a57 | 542 | ovsthread_stats_init(&dp->stats); |
ed27e010 | 543 | |
59e6d833 BP |
544 | ovs_mutex_init(&dp->port_mutex); |
545 | cmap_init(&dp->ports); | |
d33ed218 | 546 | dp->port_seq = seq_create(); |
6c3eee82 | 547 | latch_init(&dp->exit_latch); |
6b31e073 RW |
548 | fat_rwlock_init(&dp->upcall_rwlock); |
549 | ||
550 | /* Disable upcalls by default. */ | |
551 | dp_netdev_disable_upcall(dp); | |
623540e4 | 552 | dp->upcall_aux = NULL; |
6b31e073 | 553 | dp->upcall_cb = NULL; |
e44768b7 | 554 | |
59e6d833 | 555 | ovs_mutex_lock(&dp->port_mutex); |
4e022ec0 | 556 | error = do_add_port(dp, name, "internal", ODPP_LOCAL); |
59e6d833 | 557 | ovs_mutex_unlock(&dp->port_mutex); |
72865317 BP |
558 | if (error) { |
559 | dp_netdev_free(dp); | |
462278db | 560 | return error; |
72865317 BP |
561 | } |
562 | ||
3c33f0ff | 563 | ovs_mutex_init_recursive(&dp->emc_mutex); |
9bbf1c3d DDP |
564 | emc_cache_init(&dp->flow_cache); |
565 | ||
462278db | 566 | *dpp = dp; |
72865317 BP |
567 | return 0; |
568 | } | |
569 | ||
570 | static int | |
614c4892 | 571 | dpif_netdev_open(const struct dpif_class *class, const char *name, |
4a387741 | 572 | bool create, struct dpif **dpifp) |
72865317 | 573 | { |
462278db | 574 | struct dp_netdev *dp; |
5279f8fd | 575 | int error; |
462278db | 576 | |
97be1538 | 577 | ovs_mutex_lock(&dp_netdev_mutex); |
462278db BP |
578 | dp = shash_find_data(&dp_netdevs, name); |
579 | if (!dp) { | |
5279f8fd | 580 | error = create ? create_dp_netdev(name, class, &dp) : ENODEV; |
72865317 | 581 | } else { |
5279f8fd BP |
582 | error = (dp->class != class ? EINVAL |
583 | : create ? EEXIST | |
584 | : 0); | |
585 | } | |
586 | if (!error) { | |
587 | *dpifp = create_dpif_netdev(dp); | |
6b31e073 | 588 | dp->dpif = *dpifp; |
72865317 | 589 | } |
97be1538 | 590 | ovs_mutex_unlock(&dp_netdev_mutex); |
462278db | 591 | |
5279f8fd | 592 | return error; |
72865317 BP |
593 | } |
594 | ||
8a4e3a85 BP |
595 | /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp' |
596 | * through the 'dp_netdevs' shash while freeing 'dp'. */ | |
1ba530f4 BP |
597 | static void |
598 | dp_netdev_free(struct dp_netdev *dp) | |
8a4e3a85 | 599 | OVS_REQUIRES(dp_netdev_mutex) |
1ba530f4 | 600 | { |
59e6d833 | 601 | struct dp_netdev_port *port; |
51852a57 BP |
602 | struct dp_netdev_stats *bucket; |
603 | int i; | |
4ad28026 | 604 | |
8a4e3a85 BP |
605 | shash_find_and_delete(&dp_netdevs, dp->name); |
606 | ||
e4cfed38 PS |
607 | dp_netdev_set_pmd_threads(dp, 0); |
608 | free(dp->pmd_threads); | |
6c3eee82 | 609 | |
1ba530f4 | 610 | dp_netdev_flow_flush(dp); |
59e6d833 | 611 | ovs_mutex_lock(&dp->port_mutex); |
a532e683 | 612 | CMAP_FOR_EACH (port, node, &dp->ports) { |
c40b890f | 613 | do_del_port(dp, port); |
1ba530f4 | 614 | } |
59e6d833 | 615 | ovs_mutex_unlock(&dp->port_mutex); |
51852a57 BP |
616 | |
617 | OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) { | |
618 | ovs_mutex_destroy(&bucket->mutex); | |
619 | free_cacheline(bucket); | |
620 | } | |
621 | ovsthread_stats_destroy(&dp->stats); | |
f5126b57 | 622 | |
2c0ea78f | 623 | classifier_destroy(&dp->cls); |
9f361d6b | 624 | cmap_destroy(&dp->flow_table); |
8a4e3a85 | 625 | ovs_mutex_destroy(&dp->flow_mutex); |
d33ed218 | 626 | seq_destroy(dp->port_seq); |
59e6d833 | 627 | cmap_destroy(&dp->ports); |
6b31e073 | 628 | fat_rwlock_destroy(&dp->upcall_rwlock); |
6c3eee82 | 629 | latch_destroy(&dp->exit_latch); |
9bbf1c3d DDP |
630 | |
631 | emc_cache_uninit(&dp->flow_cache); | |
632 | ovs_mutex_destroy(&dp->emc_mutex); | |
633 | ||
8a4e3a85 | 634 | free(CONST_CAST(char *, dp->name)); |
72865317 BP |
635 | free(dp); |
636 | } | |
637 | ||
8a4e3a85 BP |
638 | static void |
639 | dp_netdev_unref(struct dp_netdev *dp) | |
640 | { | |
641 | if (dp) { | |
642 | /* Take dp_netdev_mutex so that, if dp->ref_cnt falls to zero, we can't | |
643 | * get a new reference to 'dp' through the 'dp_netdevs' shash. */ | |
644 | ovs_mutex_lock(&dp_netdev_mutex); | |
24f83812 | 645 | if (ovs_refcount_unref_relaxed(&dp->ref_cnt) == 1) { |
8a4e3a85 BP |
646 | dp_netdev_free(dp); |
647 | } | |
648 | ovs_mutex_unlock(&dp_netdev_mutex); | |
649 | } | |
650 | } | |
651 | ||
72865317 BP |
652 | static void |
653 | dpif_netdev_close(struct dpif *dpif) | |
654 | { | |
655 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
5279f8fd | 656 | |
8a4e3a85 | 657 | dp_netdev_unref(dp); |
72865317 BP |
658 | free(dpif); |
659 | } | |
660 | ||
661 | static int | |
7dab847a | 662 | dpif_netdev_destroy(struct dpif *dpif) |
72865317 BP |
663 | { |
664 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
5279f8fd | 665 | |
6a8267c5 | 666 | if (!atomic_flag_test_and_set(&dp->destroyed)) { |
24f83812 | 667 | if (ovs_refcount_unref_relaxed(&dp->ref_cnt) == 1) { |
6a8267c5 BP |
668 | /* Can't happen: 'dpif' still owns a reference to 'dp'. */ |
669 | OVS_NOT_REACHED(); | |
670 | } | |
671 | } | |
5279f8fd | 672 | |
72865317 BP |
673 | return 0; |
674 | } | |
675 | ||
676 | static int | |
a8d9304d | 677 | dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats) |
72865317 BP |
678 | { |
679 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
51852a57 BP |
680 | struct dp_netdev_stats *bucket; |
681 | size_t i; | |
5279f8fd | 682 | |
9f361d6b | 683 | stats->n_flows = cmap_count(&dp->flow_table); |
8a4e3a85 | 684 | |
51852a57 BP |
685 | stats->n_hit = stats->n_missed = stats->n_lost = 0; |
686 | OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) { | |
687 | ovs_mutex_lock(&bucket->mutex); | |
688 | stats->n_hit += bucket->n[DP_STAT_HIT]; | |
689 | stats->n_missed += bucket->n[DP_STAT_MISS]; | |
690 | stats->n_lost += bucket->n[DP_STAT_LOST]; | |
691 | ovs_mutex_unlock(&bucket->mutex); | |
692 | } | |
1ce3fa06 | 693 | stats->n_masks = UINT32_MAX; |
847108dc | 694 | stats->n_mask_hit = UINT64_MAX; |
5279f8fd | 695 | |
72865317 BP |
696 | return 0; |
697 | } | |
698 | ||
e4cfed38 PS |
699 | static void |
700 | dp_netdev_reload_pmd_threads(struct dp_netdev *dp) | |
701 | { | |
702 | int i; | |
703 | ||
704 | for (i = 0; i < dp->n_pmd_threads; i++) { | |
705 | struct pmd_thread *f = &dp->pmd_threads[i]; | |
84067a4c | 706 | int old_seq; |
e4cfed38 | 707 | |
91a96379 | 708 | atomic_add_relaxed(&f->change_seq, 1, &old_seq); |
84067a4c | 709 | } |
e4cfed38 PS |
710 | } |
711 | ||
59e6d833 BP |
712 | static uint32_t |
713 | hash_port_no(odp_port_t port_no) | |
714 | { | |
715 | return hash_int(odp_to_u32(port_no), 0); | |
716 | } | |
717 | ||
72865317 | 718 | static int |
c3827f61 | 719 | do_add_port(struct dp_netdev *dp, const char *devname, const char *type, |
4e022ec0 | 720 | odp_port_t port_no) |
59e6d833 | 721 | OVS_REQUIRES(dp->port_mutex) |
72865317 | 722 | { |
4b609110 | 723 | struct netdev_saved_flags *sf; |
72865317 BP |
724 | struct dp_netdev_port *port; |
725 | struct netdev *netdev; | |
2499a8ce | 726 | enum netdev_flags flags; |
0cbfe35d | 727 | const char *open_type; |
72865317 | 728 | int error; |
55c955bd | 729 | int i; |
72865317 BP |
730 | |
731 | /* XXX reject devices already in some dp_netdev. */ | |
732 | ||
733 | /* Open and validate network device. */ | |
0aeaabc8 | 734 | open_type = dpif_netdev_port_open_type(dp->class, type); |
0cbfe35d | 735 | error = netdev_open(devname, open_type, &netdev); |
72865317 BP |
736 | if (error) { |
737 | return error; | |
738 | } | |
72865317 BP |
739 | /* XXX reject non-Ethernet devices */ |
740 | ||
2499a8ce AC |
741 | netdev_get_flags(netdev, &flags); |
742 | if (flags & NETDEV_LOOPBACK) { | |
743 | VLOG_ERR("%s: cannot add a loopback device", devname); | |
744 | netdev_close(netdev); | |
745 | return EINVAL; | |
746 | } | |
747 | ||
5a034064 AW |
748 | if (netdev_is_pmd(netdev)) { |
749 | int n_cores = ovs_numa_get_n_cores(); | |
750 | ||
751 | if (n_cores == OVS_CORE_UNSPEC) { | |
752 | VLOG_ERR("%s, cannot get cpu core info", devname); | |
753 | return ENOENT; | |
754 | } | |
755 | /* There can only be ovs_numa_get_n_cores() pmd threads, | |
756 | * so creates a tx_q for each. */ | |
757 | error = netdev_set_multiq(netdev, n_cores, NR_QUEUE); | |
758 | if (error) { | |
759 | VLOG_ERR("%s, cannot set multiq", devname); | |
760 | return errno; | |
761 | } | |
762 | } | |
e4cfed38 PS |
763 | port = xzalloc(sizeof *port); |
764 | port->port_no = port_no; | |
765 | port->netdev = netdev; | |
55c955bd | 766 | port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev)); |
e4cfed38 | 767 | port->type = xstrdup(type); |
55c955bd PS |
768 | for (i = 0; i < netdev_n_rxq(netdev); i++) { |
769 | error = netdev_rxq_open(netdev, &port->rxq[i], i); | |
770 | if (error | |
771 | && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) { | |
772 | VLOG_ERR("%s: cannot receive packets on this network device (%s)", | |
773 | devname, ovs_strerror(errno)); | |
774 | netdev_close(netdev); | |
16bea12c TG |
775 | free(port->type); |
776 | free(port->rxq); | |
777 | free(port); | |
55c955bd PS |
778 | return error; |
779 | } | |
7b6b0ef4 BP |
780 | } |
781 | ||
4b609110 | 782 | error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf); |
72865317 | 783 | if (error) { |
55c955bd PS |
784 | for (i = 0; i < netdev_n_rxq(netdev); i++) { |
785 | netdev_rxq_close(port->rxq[i]); | |
786 | } | |
72865317 | 787 | netdev_close(netdev); |
16bea12c | 788 | free(port->type); |
f7791740 | 789 | free(port->rxq); |
e4cfed38 | 790 | free(port); |
72865317 BP |
791 | return error; |
792 | } | |
4b609110 | 793 | port->sf = sf; |
e4cfed38 PS |
794 | |
795 | if (netdev_is_pmd(netdev)) { | |
796 | dp->pmd_count++; | |
db73f716 | 797 | dp_netdev_set_pmd_threads(dp, NR_PMD_THREADS); |
e4cfed38 PS |
798 | dp_netdev_reload_pmd_threads(dp); |
799 | } | |
800 | ovs_refcount_init(&port->ref_cnt); | |
72865317 | 801 | |
59e6d833 | 802 | cmap_insert(&dp->ports, &port->node, hash_port_no(port_no)); |
d33ed218 | 803 | seq_change(dp->port_seq); |
72865317 BP |
804 | |
805 | return 0; | |
806 | } | |
807 | ||
247527db BP |
808 | static int |
809 | dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev, | |
4e022ec0 | 810 | odp_port_t *port_nop) |
247527db BP |
811 | { |
812 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
3aa30359 BP |
813 | char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; |
814 | const char *dpif_port; | |
4e022ec0 | 815 | odp_port_t port_no; |
5279f8fd | 816 | int error; |
247527db | 817 | |
59e6d833 | 818 | ovs_mutex_lock(&dp->port_mutex); |
3aa30359 | 819 | dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf); |
4e022ec0 | 820 | if (*port_nop != ODPP_NONE) { |
ff073a71 BP |
821 | port_no = *port_nop; |
822 | error = dp_netdev_lookup_port(dp, *port_nop) ? EBUSY : 0; | |
232dfa4a | 823 | } else { |
3aa30359 | 824 | port_no = choose_port(dp, dpif_port); |
5279f8fd | 825 | error = port_no == ODPP_NONE ? EFBIG : 0; |
232dfa4a | 826 | } |
5279f8fd | 827 | if (!error) { |
247527db | 828 | *port_nop = port_no; |
5279f8fd | 829 | error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no); |
247527db | 830 | } |
59e6d833 | 831 | ovs_mutex_unlock(&dp->port_mutex); |
5279f8fd BP |
832 | |
833 | return error; | |
72865317 BP |
834 | } |
835 | ||
836 | static int | |
4e022ec0 | 837 | dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no) |
72865317 BP |
838 | { |
839 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
5279f8fd BP |
840 | int error; |
841 | ||
59e6d833 | 842 | ovs_mutex_lock(&dp->port_mutex); |
c40b890f BP |
843 | if (port_no == ODPP_LOCAL) { |
844 | error = EINVAL; | |
845 | } else { | |
846 | struct dp_netdev_port *port; | |
847 | ||
848 | error = get_port_by_number(dp, port_no, &port); | |
849 | if (!error) { | |
850 | do_del_port(dp, port); | |
851 | } | |
852 | } | |
59e6d833 | 853 | ovs_mutex_unlock(&dp->port_mutex); |
5279f8fd BP |
854 | |
855 | return error; | |
72865317 BP |
856 | } |
857 | ||
858 | static bool | |
4e022ec0 | 859 | is_valid_port_number(odp_port_t port_no) |
72865317 | 860 | { |
ff073a71 BP |
861 | return port_no != ODPP_NONE; |
862 | } | |
863 | ||
864 | static struct dp_netdev_port * | |
865 | dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no) | |
866 | { | |
867 | struct dp_netdev_port *port; | |
868 | ||
59e6d833 | 869 | CMAP_FOR_EACH_WITH_HASH (port, node, hash_port_no(port_no), &dp->ports) { |
ff073a71 BP |
870 | if (port->port_no == port_no) { |
871 | return port; | |
872 | } | |
873 | } | |
874 | return NULL; | |
72865317 BP |
875 | } |
876 | ||
877 | static int | |
878 | get_port_by_number(struct dp_netdev *dp, | |
4e022ec0 | 879 | odp_port_t port_no, struct dp_netdev_port **portp) |
72865317 BP |
880 | { |
881 | if (!is_valid_port_number(port_no)) { | |
882 | *portp = NULL; | |
883 | return EINVAL; | |
884 | } else { | |
ff073a71 | 885 | *portp = dp_netdev_lookup_port(dp, port_no); |
72865317 BP |
886 | return *portp ? 0 : ENOENT; |
887 | } | |
888 | } | |
889 | ||
b284085e PS |
890 | static void |
891 | port_ref(struct dp_netdev_port *port) | |
892 | { | |
893 | if (port) { | |
894 | ovs_refcount_ref(&port->ref_cnt); | |
895 | } | |
896 | } | |
897 | ||
a1fdee13 AW |
898 | static bool |
899 | port_try_ref(struct dp_netdev_port *port) | |
900 | { | |
901 | if (port) { | |
902 | return ovs_refcount_try_ref_rcu(&port->ref_cnt); | |
903 | } | |
904 | ||
905 | return false; | |
906 | } | |
907 | ||
b284085e | 908 | static void |
59e6d833 | 909 | port_destroy__(struct dp_netdev_port *port) |
b284085e | 910 | { |
98de6beb | 911 | int n_rxq = netdev_n_rxq(port->netdev); |
59e6d833 | 912 | int i; |
55c955bd | 913 | |
59e6d833 BP |
914 | netdev_close(port->netdev); |
915 | netdev_restore_flags(port->sf); | |
55c955bd | 916 | |
59e6d833 BP |
917 | for (i = 0; i < n_rxq; i++) { |
918 | netdev_rxq_close(port->rxq[i]); | |
919 | } | |
920 | free(port->rxq); | |
921 | free(port->type); | |
922 | free(port); | |
923 | } | |
924 | ||
925 | static void | |
926 | port_unref(struct dp_netdev_port *port) | |
927 | { | |
24f83812 | 928 | if (port && ovs_refcount_unref_relaxed(&port->ref_cnt) == 1) { |
59e6d833 | 929 | ovsrcu_postpone(port_destroy__, port); |
b284085e PS |
930 | } |
931 | } | |
932 | ||
72865317 BP |
933 | static int |
934 | get_port_by_name(struct dp_netdev *dp, | |
935 | const char *devname, struct dp_netdev_port **portp) | |
59e6d833 | 936 | OVS_REQUIRES(dp->port_mutex) |
72865317 BP |
937 | { |
938 | struct dp_netdev_port *port; | |
939 | ||
a532e683 | 940 | CMAP_FOR_EACH (port, node, &dp->ports) { |
3efb6063 | 941 | if (!strcmp(netdev_get_name(port->netdev), devname)) { |
72865317 BP |
942 | *portp = port; |
943 | return 0; | |
944 | } | |
945 | } | |
946 | return ENOENT; | |
947 | } | |
948 | ||
c40b890f BP |
949 | static void |
950 | do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port) | |
59e6d833 | 951 | OVS_REQUIRES(dp->port_mutex) |
72865317 | 952 | { |
c40b890f | 953 | cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no)); |
d33ed218 | 954 | seq_change(dp->port_seq); |
e4cfed38 PS |
955 | if (netdev_is_pmd(port->netdev)) { |
956 | dp_netdev_reload_pmd_threads(dp); | |
957 | } | |
72865317 | 958 | |
b284085e | 959 | port_unref(port); |
72865317 BP |
960 | } |
961 | ||
962 | static void | |
4c738a8d BP |
963 | answer_port_query(const struct dp_netdev_port *port, |
964 | struct dpif_port *dpif_port) | |
72865317 | 965 | { |
3efb6063 | 966 | dpif_port->name = xstrdup(netdev_get_name(port->netdev)); |
0cbfe35d | 967 | dpif_port->type = xstrdup(port->type); |
4c738a8d | 968 | dpif_port->port_no = port->port_no; |
72865317 BP |
969 | } |
970 | ||
971 | static int | |
4e022ec0 | 972 | dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no, |
4c738a8d | 973 | struct dpif_port *dpif_port) |
72865317 BP |
974 | { |
975 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
976 | struct dp_netdev_port *port; | |
977 | int error; | |
978 | ||
979 | error = get_port_by_number(dp, port_no, &port); | |
4afba28d | 980 | if (!error && dpif_port) { |
4c738a8d | 981 | answer_port_query(port, dpif_port); |
72865317 | 982 | } |
5279f8fd | 983 | |
72865317 BP |
984 | return error; |
985 | } | |
986 | ||
987 | static int | |
988 | dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname, | |
4c738a8d | 989 | struct dpif_port *dpif_port) |
72865317 BP |
990 | { |
991 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
992 | struct dp_netdev_port *port; | |
993 | int error; | |
994 | ||
59e6d833 | 995 | ovs_mutex_lock(&dp->port_mutex); |
72865317 | 996 | error = get_port_by_name(dp, devname, &port); |
4afba28d | 997 | if (!error && dpif_port) { |
4c738a8d | 998 | answer_port_query(port, dpif_port); |
72865317 | 999 | } |
59e6d833 | 1000 | ovs_mutex_unlock(&dp->port_mutex); |
5279f8fd | 1001 | |
72865317 BP |
1002 | return error; |
1003 | } | |
1004 | ||
61e7deb1 BP |
1005 | static void |
1006 | dp_netdev_flow_free(struct dp_netdev_flow *flow) | |
1007 | { | |
1008 | struct dp_netdev_flow_stats *bucket; | |
1009 | size_t i; | |
1010 | ||
1011 | OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &flow->stats) { | |
1012 | ovs_mutex_destroy(&bucket->mutex); | |
1013 | free_cacheline(bucket); | |
1014 | } | |
1015 | ovsthread_stats_destroy(&flow->stats); | |
1016 | ||
1017 | cls_rule_destroy(CONST_CAST(struct cls_rule *, &flow->cr)); | |
1018 | dp_netdev_actions_free(dp_netdev_flow_get_actions(flow)); | |
61e7deb1 BP |
1019 | free(flow); |
1020 | } | |
1021 | ||
ed79f89a DDP |
1022 | static void dp_netdev_flow_unref(struct dp_netdev_flow *flow) |
1023 | { | |
1024 | if (ovs_refcount_unref_relaxed(&flow->ref_cnt) == 1) { | |
1025 | ovsrcu_postpone(dp_netdev_flow_free, flow); | |
1026 | } | |
1027 | } | |
1028 | ||
72865317 | 1029 | static void |
8a4e3a85 | 1030 | dp_netdev_remove_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow) |
8a4e3a85 | 1031 | OVS_REQUIRES(dp->flow_mutex) |
72865317 | 1032 | { |
8a4e3a85 | 1033 | struct cls_rule *cr = CONST_CAST(struct cls_rule *, &flow->cr); |
9f361d6b | 1034 | struct cmap_node *node = CONST_CAST(struct cmap_node *, &flow->node); |
2c0ea78f | 1035 | |
8a4e3a85 | 1036 | classifier_remove(&dp->cls, cr); |
9f361d6b | 1037 | cmap_remove(&dp->flow_table, node, flow_hash(&flow->flow, 0)); |
9bbf1c3d | 1038 | flow->dead = true; |
ed79f89a DDP |
1039 | |
1040 | dp_netdev_flow_unref(flow); | |
72865317 BP |
1041 | } |
1042 | ||
1043 | static void | |
1044 | dp_netdev_flow_flush(struct dp_netdev *dp) | |
1045 | { | |
78c8df12 | 1046 | struct dp_netdev_flow *netdev_flow; |
72865317 | 1047 | |
8a4e3a85 | 1048 | ovs_mutex_lock(&dp->flow_mutex); |
6bc3bb82 | 1049 | CMAP_FOR_EACH (netdev_flow, node, &dp->flow_table) { |
8a4e3a85 | 1050 | dp_netdev_remove_flow(dp, netdev_flow); |
72865317 | 1051 | } |
8a4e3a85 | 1052 | ovs_mutex_unlock(&dp->flow_mutex); |
72865317 BP |
1053 | } |
1054 | ||
1055 | static int | |
1056 | dpif_netdev_flow_flush(struct dpif *dpif) | |
1057 | { | |
1058 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
5279f8fd | 1059 | |
72865317 BP |
1060 | dp_netdev_flow_flush(dp); |
1061 | return 0; | |
1062 | } | |
1063 | ||
b0ec0f27 | 1064 | struct dp_netdev_port_state { |
59e6d833 | 1065 | struct cmap_position position; |
4c738a8d | 1066 | char *name; |
b0ec0f27 BP |
1067 | }; |
1068 | ||
1069 | static int | |
1070 | dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep) | |
1071 | { | |
1072 | *statep = xzalloc(sizeof(struct dp_netdev_port_state)); | |
1073 | return 0; | |
1074 | } | |
1075 | ||
72865317 | 1076 | static int |
b0ec0f27 | 1077 | dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_, |
4c738a8d | 1078 | struct dpif_port *dpif_port) |
72865317 | 1079 | { |
b0ec0f27 | 1080 | struct dp_netdev_port_state *state = state_; |
72865317 | 1081 | struct dp_netdev *dp = get_dp_netdev(dpif); |
59e6d833 | 1082 | struct cmap_node *node; |
ff073a71 | 1083 | int retval; |
72865317 | 1084 | |
59e6d833 | 1085 | node = cmap_next_position(&dp->ports, &state->position); |
ff073a71 BP |
1086 | if (node) { |
1087 | struct dp_netdev_port *port; | |
5279f8fd | 1088 | |
ff073a71 BP |
1089 | port = CONTAINER_OF(node, struct dp_netdev_port, node); |
1090 | ||
1091 | free(state->name); | |
1092 | state->name = xstrdup(netdev_get_name(port->netdev)); | |
1093 | dpif_port->name = state->name; | |
1094 | dpif_port->type = port->type; | |
1095 | dpif_port->port_no = port->port_no; | |
1096 | ||
1097 | retval = 0; | |
1098 | } else { | |
1099 | retval = EOF; | |
72865317 | 1100 | } |
5279f8fd | 1101 | |
ff073a71 | 1102 | return retval; |
b0ec0f27 BP |
1103 | } |
1104 | ||
1105 | static int | |
4c738a8d | 1106 | dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) |
b0ec0f27 | 1107 | { |
4c738a8d BP |
1108 | struct dp_netdev_port_state *state = state_; |
1109 | free(state->name); | |
b0ec0f27 BP |
1110 | free(state); |
1111 | return 0; | |
72865317 BP |
1112 | } |
1113 | ||
1114 | static int | |
67a4917b | 1115 | dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED) |
72865317 BP |
1116 | { |
1117 | struct dpif_netdev *dpif = dpif_netdev_cast(dpif_); | |
d33ed218 | 1118 | uint64_t new_port_seq; |
5279f8fd BP |
1119 | int error; |
1120 | ||
d33ed218 BP |
1121 | new_port_seq = seq_read(dpif->dp->port_seq); |
1122 | if (dpif->last_port_seq != new_port_seq) { | |
1123 | dpif->last_port_seq = new_port_seq; | |
5279f8fd | 1124 | error = ENOBUFS; |
72865317 | 1125 | } else { |
5279f8fd | 1126 | error = EAGAIN; |
72865317 | 1127 | } |
5279f8fd BP |
1128 | |
1129 | return error; | |
72865317 BP |
1130 | } |
1131 | ||
1132 | static void | |
1133 | dpif_netdev_port_poll_wait(const struct dpif *dpif_) | |
1134 | { | |
1135 | struct dpif_netdev *dpif = dpif_netdev_cast(dpif_); | |
5279f8fd | 1136 | |
d33ed218 | 1137 | seq_wait(dpif->dp->port_seq, dpif->last_port_seq); |
8a4e3a85 BP |
1138 | } |
1139 | ||
1140 | static struct dp_netdev_flow * | |
1141 | dp_netdev_flow_cast(const struct cls_rule *cr) | |
1142 | { | |
1143 | return cr ? CONTAINER_OF(cr, struct dp_netdev_flow, cr) : NULL; | |
72865317 BP |
1144 | } |
1145 | ||
9bbf1c3d DDP |
1146 | static bool dp_netdev_flow_ref(struct dp_netdev_flow *flow) |
1147 | { | |
1148 | return ovs_refcount_try_ref_rcu(&flow->ref_cnt); | |
1149 | } | |
1150 | ||
1151 | static inline bool | |
1152 | emc_entry_alive(struct emc_entry *ce) | |
1153 | { | |
1154 | return ce->flow && !ce->flow->dead; | |
1155 | } | |
1156 | ||
1157 | static void | |
1158 | emc_clear_entry(struct emc_entry *ce) | |
1159 | { | |
1160 | if (ce->flow) { | |
1161 | dp_netdev_flow_unref(ce->flow); | |
1162 | ce->flow = NULL; | |
1163 | } | |
1164 | } | |
1165 | ||
1166 | static inline void | |
1167 | emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow, | |
1168 | const struct miniflow *mf, uint32_t hash) | |
1169 | { | |
1170 | if (ce->flow != flow) { | |
1171 | if (ce->flow) { | |
1172 | dp_netdev_flow_unref(ce->flow); | |
1173 | } | |
1174 | ||
1175 | if (dp_netdev_flow_ref(flow)) { | |
1176 | ce->flow = flow; | |
1177 | } else { | |
1178 | ce->flow = NULL; | |
1179 | } | |
1180 | } | |
1181 | if (mf) { | |
1182 | miniflow_clone_inline(&ce->mf.flow, mf, count_1bits(mf->map)); | |
1183 | ce->hash = hash; | |
1184 | } | |
1185 | } | |
1186 | ||
1187 | static inline void | |
1188 | emc_insert(struct emc_cache *cache, const struct miniflow *mf, uint32_t hash, | |
1189 | struct dp_netdev_flow *flow) | |
1190 | { | |
1191 | struct emc_entry *to_be_replaced = NULL; | |
1192 | struct emc_entry *current_entry; | |
1193 | ||
1194 | EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, hash) { | |
1195 | if (current_entry->hash == hash | |
1196 | && miniflow_equal(¤t_entry->mf.flow, mf)) { | |
1197 | ||
1198 | /* We found the entry with the 'mf' miniflow */ | |
1199 | emc_change_entry(current_entry, flow, NULL, 0); | |
1200 | return; | |
1201 | } | |
1202 | ||
1203 | /* Replacement policy: put the flow in an empty (not alive) entry, or | |
1204 | * in the first entry where it can be */ | |
1205 | if (!to_be_replaced | |
1206 | || (emc_entry_alive(to_be_replaced) | |
1207 | && !emc_entry_alive(current_entry)) | |
1208 | || current_entry->hash < to_be_replaced->hash) { | |
1209 | to_be_replaced = current_entry; | |
1210 | } | |
1211 | } | |
1212 | /* We didn't find the miniflow in the cache. | |
1213 | * The 'to_be_replaced' entry is where the new flow will be stored */ | |
1214 | ||
1215 | emc_change_entry(to_be_replaced, flow, mf, hash); | |
1216 | } | |
1217 | ||
1218 | static inline struct dp_netdev_flow * | |
1219 | emc_lookup(struct emc_cache *cache, const struct miniflow *mf, uint32_t hash) | |
1220 | { | |
1221 | struct emc_entry *current_entry; | |
1222 | ||
1223 | EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, hash) { | |
1224 | if (current_entry->hash == hash && emc_entry_alive(current_entry) | |
1225 | && miniflow_equal(¤t_entry->mf.flow, mf)) { | |
1226 | ||
1227 | /* We found the entry with the 'mf' miniflow */ | |
1228 | return current_entry->flow; | |
1229 | } | |
1230 | } | |
1231 | ||
1232 | return NULL; | |
1233 | } | |
1234 | ||
72865317 | 1235 | static struct dp_netdev_flow * |
4f150744 | 1236 | dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct miniflow *key) |
2c0ea78f | 1237 | { |
8a4e3a85 | 1238 | struct dp_netdev_flow *netdev_flow; |
4f150744 | 1239 | struct cls_rule *rule; |
2c0ea78f | 1240 | |
b7648634 | 1241 | classifier_lookup_miniflow_batch(&dp->cls, &key, &rule, 1); |
4f150744 | 1242 | netdev_flow = dp_netdev_flow_cast(rule); |
2c0ea78f | 1243 | |
8a4e3a85 | 1244 | return netdev_flow; |
2c0ea78f GS |
1245 | } |
1246 | ||
1247 | static struct dp_netdev_flow * | |
1248 | dp_netdev_find_flow(const struct dp_netdev *dp, const struct flow *flow) | |
72865317 | 1249 | { |
1763b4b8 | 1250 | struct dp_netdev_flow *netdev_flow; |
72865317 | 1251 | |
9f361d6b | 1252 | CMAP_FOR_EACH_WITH_HASH (netdev_flow, node, flow_hash(flow, 0), |
1763b4b8 | 1253 | &dp->flow_table) { |
2c0ea78f | 1254 | if (flow_equal(&netdev_flow->flow, flow)) { |
61e7deb1 | 1255 | return netdev_flow; |
72865317 BP |
1256 | } |
1257 | } | |
8a4e3a85 | 1258 | |
72865317 BP |
1259 | return NULL; |
1260 | } | |
1261 | ||
1262 | static void | |
6fe09f8c | 1263 | get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow, |
1763b4b8 | 1264 | struct dpif_flow_stats *stats) |
feebdea2 | 1265 | { |
679ba04c BP |
1266 | struct dp_netdev_flow_stats *bucket; |
1267 | size_t i; | |
1268 | ||
1269 | memset(stats, 0, sizeof *stats); | |
1270 | OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) { | |
1271 | ovs_mutex_lock(&bucket->mutex); | |
1272 | stats->n_packets += bucket->packet_count; | |
1273 | stats->n_bytes += bucket->byte_count; | |
1274 | stats->used = MAX(stats->used, bucket->used); | |
1275 | stats->tcp_flags |= bucket->tcp_flags; | |
1276 | ovs_mutex_unlock(&bucket->mutex); | |
1277 | } | |
72865317 BP |
1278 | } |
1279 | ||
6fe09f8c JS |
1280 | static void |
1281 | dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow *netdev_flow, | |
1282 | struct ofpbuf *buffer, struct dpif_flow *flow) | |
1283 | { | |
1284 | struct flow_wildcards wc; | |
1285 | struct dp_netdev_actions *actions; | |
1286 | ||
1287 | minimask_expand(&netdev_flow->cr.match.mask, &wc); | |
1288 | odp_flow_key_from_mask(buffer, &wc.masks, &netdev_flow->flow, | |
1289 | odp_to_u32(wc.masks.in_port.odp_port), | |
1290 | SIZE_MAX, true); | |
1291 | flow->mask = ofpbuf_data(buffer); | |
1292 | flow->mask_len = ofpbuf_size(buffer); | |
1293 | ||
1294 | actions = dp_netdev_flow_get_actions(netdev_flow); | |
1295 | flow->actions = actions->actions; | |
1296 | flow->actions_len = actions->size; | |
1297 | ||
1298 | get_dpif_flow_stats(netdev_flow, &flow->stats); | |
1299 | } | |
1300 | ||
36956a7d | 1301 | static int |
8c301900 JR |
1302 | dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len, |
1303 | const struct nlattr *mask_key, | |
1304 | uint32_t mask_key_len, const struct flow *flow, | |
1305 | struct flow *mask) | |
1306 | { | |
1307 | if (mask_key_len) { | |
80e44883 BP |
1308 | enum odp_key_fitness fitness; |
1309 | ||
1310 | fitness = odp_flow_key_to_mask(mask_key, mask_key_len, mask, flow); | |
1311 | if (fitness) { | |
8c301900 JR |
1312 | /* This should not happen: it indicates that |
1313 | * odp_flow_key_from_mask() and odp_flow_key_to_mask() | |
1314 | * disagree on the acceptable form of a mask. Log the problem | |
1315 | * as an error, with enough details to enable debugging. */ | |
1316 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); | |
1317 | ||
1318 | if (!VLOG_DROP_ERR(&rl)) { | |
1319 | struct ds s; | |
1320 | ||
1321 | ds_init(&s); | |
1322 | odp_flow_format(key, key_len, mask_key, mask_key_len, NULL, &s, | |
1323 | true); | |
80e44883 BP |
1324 | VLOG_ERR("internal error parsing flow mask %s (%s)", |
1325 | ds_cstr(&s), odp_key_fitness_to_string(fitness)); | |
8c301900 JR |
1326 | ds_destroy(&s); |
1327 | } | |
1328 | ||
1329 | return EINVAL; | |
1330 | } | |
8c301900 JR |
1331 | } else { |
1332 | enum mf_field_id id; | |
1333 | /* No mask key, unwildcard everything except fields whose | |
1334 | * prerequisities are not met. */ | |
1335 | memset(mask, 0x0, sizeof *mask); | |
1336 | ||
1337 | for (id = 0; id < MFF_N_IDS; ++id) { | |
1338 | /* Skip registers and metadata. */ | |
1339 | if (!(id >= MFF_REG0 && id < MFF_REG0 + FLOW_N_REGS) | |
1340 | && id != MFF_METADATA) { | |
1341 | const struct mf_field *mf = mf_from_id(id); | |
1342 | if (mf_are_prereqs_ok(mf, flow)) { | |
1343 | mf_mask_field(mf, mask); | |
1344 | } | |
1345 | } | |
1346 | } | |
1347 | } | |
1348 | ||
f3f750e5 BP |
1349 | /* Force unwildcard the in_port. |
1350 | * | |
1351 | * We need to do this even in the case where we unwildcard "everything" | |
1352 | * above because "everything" only includes the 16-bit OpenFlow port number | |
1353 | * mask->in_port.ofp_port, which only covers half of the 32-bit datapath | |
1354 | * port number mask->in_port.odp_port. */ | |
1355 | mask->in_port.odp_port = u32_to_odp(UINT32_MAX); | |
1356 | ||
8c301900 JR |
1357 | return 0; |
1358 | } | |
1359 | ||
1360 | static int | |
1361 | dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len, | |
1362 | struct flow *flow) | |
36956a7d | 1363 | { |
586ddea5 BP |
1364 | odp_port_t in_port; |
1365 | ||
8c301900 | 1366 | if (odp_flow_key_to_flow(key, key_len, flow)) { |
36956a7d | 1367 | /* This should not happen: it indicates that odp_flow_key_from_flow() |
8c301900 JR |
1368 | * and odp_flow_key_to_flow() disagree on the acceptable form of a |
1369 | * flow. Log the problem as an error, with enough details to enable | |
1370 | * debugging. */ | |
36956a7d BP |
1371 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); |
1372 | ||
1373 | if (!VLOG_DROP_ERR(&rl)) { | |
1374 | struct ds s; | |
1375 | ||
1376 | ds_init(&s); | |
8c301900 | 1377 | odp_flow_format(key, key_len, NULL, 0, NULL, &s, true); |
36956a7d BP |
1378 | VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s)); |
1379 | ds_destroy(&s); | |
1380 | } | |
1381 | ||
1382 | return EINVAL; | |
1383 | } | |
1384 | ||
586ddea5 BP |
1385 | in_port = flow->in_port.odp_port; |
1386 | if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) { | |
18886b60 BP |
1387 | return EINVAL; |
1388 | } | |
1389 | ||
36956a7d BP |
1390 | return 0; |
1391 | } | |
1392 | ||
72865317 | 1393 | static int |
6fe09f8c | 1394 | dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get) |
72865317 BP |
1395 | { |
1396 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
1763b4b8 | 1397 | struct dp_netdev_flow *netdev_flow; |
bc4a05c6 BP |
1398 | struct flow key; |
1399 | int error; | |
36956a7d | 1400 | |
6fe09f8c | 1401 | error = dpif_netdev_flow_from_nlattrs(get->key, get->key_len, &key); |
bc4a05c6 BP |
1402 | if (error) { |
1403 | return error; | |
1404 | } | |
14608a15 | 1405 | |
2c0ea78f | 1406 | netdev_flow = dp_netdev_find_flow(dp, &key); |
8a4e3a85 | 1407 | |
1763b4b8 | 1408 | if (netdev_flow) { |
6fe09f8c | 1409 | dp_netdev_flow_to_dpif_flow(netdev_flow, get->buffer, get->flow); |
61e7deb1 | 1410 | } else { |
5279f8fd | 1411 | error = ENOENT; |
72865317 | 1412 | } |
bc4a05c6 | 1413 | |
5279f8fd | 1414 | return error; |
72865317 BP |
1415 | } |
1416 | ||
72865317 | 1417 | static int |
ae2ceebd EJ |
1418 | dp_netdev_flow_add(struct dp_netdev *dp, struct match *match, |
1419 | const struct nlattr *actions, size_t actions_len) | |
8a4e3a85 | 1420 | OVS_REQUIRES(dp->flow_mutex) |
72865317 | 1421 | { |
1763b4b8 | 1422 | struct dp_netdev_flow *netdev_flow; |
72865317 | 1423 | |
1763b4b8 | 1424 | netdev_flow = xzalloc(sizeof *netdev_flow); |
ae2ceebd | 1425 | *CONST_CAST(struct flow *, &netdev_flow->flow) = match->flow; |
8a4e3a85 | 1426 | |
ed79f89a DDP |
1427 | ovs_refcount_init(&netdev_flow->ref_cnt); |
1428 | ||
679ba04c BP |
1429 | ovsthread_stats_init(&netdev_flow->stats); |
1430 | ||
61e7deb1 BP |
1431 | ovsrcu_set(&netdev_flow->actions, |
1432 | dp_netdev_actions_create(actions, actions_len)); | |
2c0ea78f | 1433 | |
8a4e3a85 | 1434 | cls_rule_init(CONST_CAST(struct cls_rule *, &netdev_flow->cr), |
ae2ceebd | 1435 | match, NETDEV_RULE_PRIORITY); |
9f361d6b JR |
1436 | cmap_insert(&dp->flow_table, |
1437 | CONST_CAST(struct cmap_node *, &netdev_flow->node), | |
ae2ceebd | 1438 | flow_hash(&match->flow, 0)); |
8a4e3a85 BP |
1439 | classifier_insert(&dp->cls, |
1440 | CONST_CAST(struct cls_rule *, &netdev_flow->cr)); | |
72865317 | 1441 | |
623540e4 EJ |
1442 | if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) { |
1443 | struct ds ds = DS_EMPTY_INITIALIZER; | |
1444 | ||
1445 | ds_put_cstr(&ds, "flow_add: "); | |
1446 | match_format(match, &ds, OFP_DEFAULT_PRIORITY); | |
1447 | ds_put_cstr(&ds, ", actions:"); | |
1448 | format_odp_actions(&ds, actions, actions_len); | |
1449 | ||
1450 | VLOG_DBG_RL(&upcall_rl, "%s", ds_cstr(&ds)); | |
1451 | ||
1452 | ds_destroy(&ds); | |
1453 | } | |
1454 | ||
72865317 BP |
1455 | return 0; |
1456 | } | |
1457 | ||
1458 | static void | |
1763b4b8 | 1459 | clear_stats(struct dp_netdev_flow *netdev_flow) |
72865317 | 1460 | { |
679ba04c BP |
1461 | struct dp_netdev_flow_stats *bucket; |
1462 | size_t i; | |
1463 | ||
1464 | OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) { | |
1465 | ovs_mutex_lock(&bucket->mutex); | |
1466 | bucket->used = 0; | |
1467 | bucket->packet_count = 0; | |
1468 | bucket->byte_count = 0; | |
1469 | bucket->tcp_flags = 0; | |
1470 | ovs_mutex_unlock(&bucket->mutex); | |
1471 | } | |
72865317 BP |
1472 | } |
1473 | ||
1474 | static int | |
89625d1e | 1475 | dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put) |
72865317 BP |
1476 | { |
1477 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
1763b4b8 | 1478 | struct dp_netdev_flow *netdev_flow; |
4f150744 | 1479 | struct miniflow miniflow; |
ae2ceebd | 1480 | struct match match; |
36956a7d BP |
1481 | int error; |
1482 | ||
ae2ceebd | 1483 | error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &match.flow); |
8c301900 JR |
1484 | if (error) { |
1485 | return error; | |
1486 | } | |
1487 | error = dpif_netdev_mask_from_nlattrs(put->key, put->key_len, | |
1488 | put->mask, put->mask_len, | |
ae2ceebd | 1489 | &match.flow, &match.wc.masks); |
36956a7d BP |
1490 | if (error) { |
1491 | return error; | |
1492 | } | |
ae2ceebd | 1493 | miniflow_init(&miniflow, &match.flow); |
72865317 | 1494 | |
8a4e3a85 | 1495 | ovs_mutex_lock(&dp->flow_mutex); |
4f150744 | 1496 | netdev_flow = dp_netdev_lookup_flow(dp, &miniflow); |
1763b4b8 | 1497 | if (!netdev_flow) { |
89625d1e | 1498 | if (put->flags & DPIF_FP_CREATE) { |
9f361d6b | 1499 | if (cmap_count(&dp->flow_table) < MAX_FLOWS) { |
89625d1e BP |
1500 | if (put->stats) { |
1501 | memset(put->stats, 0, sizeof *put->stats); | |
feebdea2 | 1502 | } |
ae2ceebd | 1503 | error = dp_netdev_flow_add(dp, &match, put->actions, |
5279f8fd | 1504 | put->actions_len); |
72865317 | 1505 | } else { |
5279f8fd | 1506 | error = EFBIG; |
72865317 BP |
1507 | } |
1508 | } else { | |
5279f8fd | 1509 | error = ENOENT; |
72865317 BP |
1510 | } |
1511 | } else { | |
2c0ea78f | 1512 | if (put->flags & DPIF_FP_MODIFY |
ae2ceebd | 1513 | && flow_equal(&match.flow, &netdev_flow->flow)) { |
8a4e3a85 BP |
1514 | struct dp_netdev_actions *new_actions; |
1515 | struct dp_netdev_actions *old_actions; | |
1516 | ||
1517 | new_actions = dp_netdev_actions_create(put->actions, | |
1518 | put->actions_len); | |
1519 | ||
61e7deb1 BP |
1520 | old_actions = dp_netdev_flow_get_actions(netdev_flow); |
1521 | ovsrcu_set(&netdev_flow->actions, new_actions); | |
679ba04c | 1522 | |
a84cb64a BP |
1523 | if (put->stats) { |
1524 | get_dpif_flow_stats(netdev_flow, put->stats); | |
1525 | } | |
1526 | if (put->flags & DPIF_FP_ZERO_STATS) { | |
1527 | clear_stats(netdev_flow); | |
72865317 | 1528 | } |
8a4e3a85 | 1529 | |
61e7deb1 | 1530 | ovsrcu_postpone(dp_netdev_actions_free, old_actions); |
2c0ea78f | 1531 | } else if (put->flags & DPIF_FP_CREATE) { |
5279f8fd | 1532 | error = EEXIST; |
2c0ea78f GS |
1533 | } else { |
1534 | /* Overlapping flow. */ | |
1535 | error = EINVAL; | |
72865317 BP |
1536 | } |
1537 | } | |
8a4e3a85 | 1538 | ovs_mutex_unlock(&dp->flow_mutex); |
5715de14 | 1539 | miniflow_destroy(&miniflow); |
5279f8fd BP |
1540 | |
1541 | return error; | |
72865317 BP |
1542 | } |
1543 | ||
72865317 | 1544 | static int |
b99d3cee | 1545 | dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del) |
72865317 BP |
1546 | { |
1547 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
1763b4b8 | 1548 | struct dp_netdev_flow *netdev_flow; |
14608a15 | 1549 | struct flow key; |
36956a7d BP |
1550 | int error; |
1551 | ||
b99d3cee | 1552 | error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key); |
36956a7d BP |
1553 | if (error) { |
1554 | return error; | |
1555 | } | |
72865317 | 1556 | |
8a4e3a85 | 1557 | ovs_mutex_lock(&dp->flow_mutex); |
2c0ea78f | 1558 | netdev_flow = dp_netdev_find_flow(dp, &key); |
1763b4b8 | 1559 | if (netdev_flow) { |
b99d3cee | 1560 | if (del->stats) { |
1763b4b8 | 1561 | get_dpif_flow_stats(netdev_flow, del->stats); |
feebdea2 | 1562 | } |
8a4e3a85 | 1563 | dp_netdev_remove_flow(dp, netdev_flow); |
72865317 | 1564 | } else { |
5279f8fd | 1565 | error = ENOENT; |
72865317 | 1566 | } |
8a4e3a85 | 1567 | ovs_mutex_unlock(&dp->flow_mutex); |
5279f8fd BP |
1568 | |
1569 | return error; | |
72865317 BP |
1570 | } |
1571 | ||
ac64794a BP |
1572 | struct dpif_netdev_flow_dump { |
1573 | struct dpif_flow_dump up; | |
9f361d6b | 1574 | struct cmap_position pos; |
d2ad7ef1 JS |
1575 | int status; |
1576 | struct ovs_mutex mutex; | |
e723fd32 JS |
1577 | }; |
1578 | ||
ac64794a BP |
1579 | static struct dpif_netdev_flow_dump * |
1580 | dpif_netdev_flow_dump_cast(struct dpif_flow_dump *dump) | |
72865317 | 1581 | { |
ac64794a | 1582 | return CONTAINER_OF(dump, struct dpif_netdev_flow_dump, up); |
e723fd32 JS |
1583 | } |
1584 | ||
ac64794a BP |
1585 | static struct dpif_flow_dump * |
1586 | dpif_netdev_flow_dump_create(const struct dpif *dpif_) | |
e723fd32 | 1587 | { |
ac64794a | 1588 | struct dpif_netdev_flow_dump *dump; |
e723fd32 | 1589 | |
ac64794a BP |
1590 | dump = xmalloc(sizeof *dump); |
1591 | dpif_flow_dump_init(&dump->up, dpif_); | |
9f361d6b | 1592 | memset(&dump->pos, 0, sizeof dump->pos); |
ac64794a BP |
1593 | dump->status = 0; |
1594 | ovs_mutex_init(&dump->mutex); | |
1595 | ||
1596 | return &dump->up; | |
e723fd32 JS |
1597 | } |
1598 | ||
1599 | static int | |
ac64794a | 1600 | dpif_netdev_flow_dump_destroy(struct dpif_flow_dump *dump_) |
e723fd32 | 1601 | { |
ac64794a | 1602 | struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_); |
e723fd32 | 1603 | |
ac64794a BP |
1604 | ovs_mutex_destroy(&dump->mutex); |
1605 | free(dump); | |
704a1e09 BP |
1606 | return 0; |
1607 | } | |
1608 | ||
ac64794a BP |
1609 | struct dpif_netdev_flow_dump_thread { |
1610 | struct dpif_flow_dump_thread up; | |
1611 | struct dpif_netdev_flow_dump *dump; | |
8bb113da RW |
1612 | struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH]; |
1613 | struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH]; | |
ac64794a BP |
1614 | }; |
1615 | ||
1616 | static struct dpif_netdev_flow_dump_thread * | |
1617 | dpif_netdev_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread) | |
1618 | { | |
1619 | return CONTAINER_OF(thread, struct dpif_netdev_flow_dump_thread, up); | |
1620 | } | |
1621 | ||
1622 | static struct dpif_flow_dump_thread * | |
1623 | dpif_netdev_flow_dump_thread_create(struct dpif_flow_dump *dump_) | |
1624 | { | |
1625 | struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_); | |
1626 | struct dpif_netdev_flow_dump_thread *thread; | |
1627 | ||
1628 | thread = xmalloc(sizeof *thread); | |
1629 | dpif_flow_dump_thread_init(&thread->up, &dump->up); | |
1630 | thread->dump = dump; | |
1631 | return &thread->up; | |
1632 | } | |
1633 | ||
1634 | static void | |
1635 | dpif_netdev_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_) | |
1636 | { | |
1637 | struct dpif_netdev_flow_dump_thread *thread | |
1638 | = dpif_netdev_flow_dump_thread_cast(thread_); | |
1639 | ||
1640 | free(thread); | |
1641 | } | |
1642 | ||
704a1e09 | 1643 | static int |
ac64794a | 1644 | dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_, |
8bb113da | 1645 | struct dpif_flow *flows, int max_flows) |
ac64794a BP |
1646 | { |
1647 | struct dpif_netdev_flow_dump_thread *thread | |
1648 | = dpif_netdev_flow_dump_thread_cast(thread_); | |
1649 | struct dpif_netdev_flow_dump *dump = thread->dump; | |
1650 | struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif); | |
8bb113da | 1651 | struct dp_netdev_flow *netdev_flows[FLOW_DUMP_MAX_BATCH]; |
ac64794a | 1652 | struct dp_netdev *dp = get_dp_netdev(&dpif->dpif); |
8bb113da RW |
1653 | int n_flows = 0; |
1654 | int i; | |
14608a15 | 1655 | |
ac64794a | 1656 | ovs_mutex_lock(&dump->mutex); |
8bb113da | 1657 | if (!dump->status) { |
8bb113da RW |
1658 | for (n_flows = 0; n_flows < MIN(max_flows, FLOW_DUMP_MAX_BATCH); |
1659 | n_flows++) { | |
9f361d6b | 1660 | struct cmap_node *node; |
8bb113da | 1661 | |
9f361d6b | 1662 | node = cmap_next_position(&dp->flow_table, &dump->pos); |
8bb113da RW |
1663 | if (!node) { |
1664 | dump->status = EOF; | |
1665 | break; | |
1666 | } | |
1667 | netdev_flows[n_flows] = CONTAINER_OF(node, struct dp_netdev_flow, | |
1668 | node); | |
d2ad7ef1 | 1669 | } |
8a4e3a85 | 1670 | } |
ac64794a | 1671 | ovs_mutex_unlock(&dump->mutex); |
ac64794a | 1672 | |
8bb113da RW |
1673 | for (i = 0; i < n_flows; i++) { |
1674 | struct odputil_keybuf *maskbuf = &thread->maskbuf[i]; | |
1675 | struct odputil_keybuf *keybuf = &thread->keybuf[i]; | |
1676 | struct dp_netdev_flow *netdev_flow = netdev_flows[i]; | |
1677 | struct dpif_flow *f = &flows[i]; | |
1678 | struct dp_netdev_actions *dp_actions; | |
1679 | struct flow_wildcards wc; | |
1680 | struct ofpbuf buf; | |
1681 | ||
1682 | minimask_expand(&netdev_flow->cr.match.mask, &wc); | |
1683 | ||
1684 | /* Key. */ | |
1685 | ofpbuf_use_stack(&buf, keybuf, sizeof *keybuf); | |
1686 | odp_flow_key_from_flow(&buf, &netdev_flow->flow, &wc.masks, | |
1687 | netdev_flow->flow.in_port.odp_port, true); | |
1688 | f->key = ofpbuf_data(&buf); | |
1689 | f->key_len = ofpbuf_size(&buf); | |
1690 | ||
1691 | /* Mask. */ | |
1692 | ofpbuf_use_stack(&buf, maskbuf, sizeof *maskbuf); | |
1693 | odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow, | |
1694 | odp_to_u32(wc.masks.in_port.odp_port), | |
1695 | SIZE_MAX, true); | |
1696 | f->mask = ofpbuf_data(&buf); | |
1697 | f->mask_len = ofpbuf_size(&buf); | |
1698 | ||
1699 | /* Actions. */ | |
1700 | dp_actions = dp_netdev_flow_get_actions(netdev_flow); | |
1701 | f->actions = dp_actions->actions; | |
1702 | f->actions_len = dp_actions->size; | |
1703 | ||
1704 | /* Stats. */ | |
1705 | get_dpif_flow_stats(netdev_flow, &f->stats); | |
1706 | } | |
feebdea2 | 1707 | |
8bb113da | 1708 | return n_flows; |
72865317 BP |
1709 | } |
1710 | ||
1711 | static int | |
758c456d | 1712 | dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) |
72865317 BP |
1713 | { |
1714 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
8cbf4f47 | 1715 | struct dpif_packet packet, *pp; |
758c456d | 1716 | struct pkt_metadata *md = &execute->md; |
72865317 | 1717 | |
1f317cb5 PS |
1718 | if (ofpbuf_size(execute->packet) < ETH_HEADER_LEN || |
1719 | ofpbuf_size(execute->packet) > UINT16_MAX) { | |
72865317 BP |
1720 | return EINVAL; |
1721 | } | |
1722 | ||
91088554 | 1723 | packet.ofpbuf = *execute->packet; |
8cbf4f47 | 1724 | pp = &packet; |
91088554 | 1725 | |
9bbf1c3d | 1726 | ovs_mutex_lock(&dp->emc_mutex); |
8cbf4f47 | 1727 | dp_netdev_execute_actions(dp, &pp, 1, false, md, |
9bbf1c3d DDP |
1728 | &dp->flow_cache, execute->actions, |
1729 | execute->actions_len); | |
1730 | ovs_mutex_unlock(&dp->emc_mutex); | |
8a4e3a85 | 1731 | |
91088554 DDP |
1732 | /* Even though may_steal is set to false, some actions could modify or |
1733 | * reallocate the ofpbuf memory. We need to pass those changes to the | |
1734 | * caller */ | |
1735 | *execute->packet = packet.ofpbuf; | |
1736 | ||
758c456d | 1737 | return 0; |
72865317 BP |
1738 | } |
1739 | ||
1a0c894a BP |
1740 | static void |
1741 | dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) | |
1742 | { | |
1743 | size_t i; | |
1744 | ||
1745 | for (i = 0; i < n_ops; i++) { | |
1746 | struct dpif_op *op = ops[i]; | |
1747 | ||
1748 | switch (op->type) { | |
1749 | case DPIF_OP_FLOW_PUT: | |
1750 | op->error = dpif_netdev_flow_put(dpif, &op->u.flow_put); | |
1751 | break; | |
1752 | ||
1753 | case DPIF_OP_FLOW_DEL: | |
1754 | op->error = dpif_netdev_flow_del(dpif, &op->u.flow_del); | |
1755 | break; | |
1756 | ||
1757 | case DPIF_OP_EXECUTE: | |
1758 | op->error = dpif_netdev_execute(dpif, &op->u.execute); | |
1759 | break; | |
6fe09f8c JS |
1760 | |
1761 | case DPIF_OP_FLOW_GET: | |
1762 | op->error = dpif_netdev_flow_get(dpif, &op->u.flow_get); | |
1763 | break; | |
1a0c894a BP |
1764 | } |
1765 | } | |
1766 | } | |
1767 | ||
5bf93d67 EJ |
1768 | static int |
1769 | dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED, | |
1770 | uint32_t queue_id, uint32_t *priority) | |
1771 | { | |
1772 | *priority = queue_id; | |
1773 | return 0; | |
1774 | } | |
1775 | ||
72865317 | 1776 | \f |
a84cb64a BP |
1777 | /* Creates and returns a new 'struct dp_netdev_actions', with a reference count |
1778 | * of 1, whose actions are a copy of from the 'ofpacts_len' bytes of | |
1779 | * 'ofpacts'. */ | |
1780 | struct dp_netdev_actions * | |
1781 | dp_netdev_actions_create(const struct nlattr *actions, size_t size) | |
1782 | { | |
1783 | struct dp_netdev_actions *netdev_actions; | |
1784 | ||
1785 | netdev_actions = xmalloc(sizeof *netdev_actions); | |
a84cb64a BP |
1786 | netdev_actions->actions = xmemdup(actions, size); |
1787 | netdev_actions->size = size; | |
1788 | ||
1789 | return netdev_actions; | |
1790 | } | |
1791 | ||
a84cb64a | 1792 | struct dp_netdev_actions * |
61e7deb1 | 1793 | dp_netdev_flow_get_actions(const struct dp_netdev_flow *flow) |
a84cb64a | 1794 | { |
61e7deb1 | 1795 | return ovsrcu_get(struct dp_netdev_actions *, &flow->actions); |
a84cb64a BP |
1796 | } |
1797 | ||
61e7deb1 BP |
1798 | static void |
1799 | dp_netdev_actions_free(struct dp_netdev_actions *actions) | |
a84cb64a | 1800 | { |
61e7deb1 BP |
1801 | free(actions->actions); |
1802 | free(actions); | |
a84cb64a BP |
1803 | } |
1804 | \f | |
e4cfed38 | 1805 | |
5794e276 | 1806 | static void |
f7791740 | 1807 | dp_netdev_process_rxq_port(struct dp_netdev *dp, |
9bbf1c3d DDP |
1808 | struct emc_cache *flow_cache, |
1809 | struct dp_netdev_port *port, | |
1810 | struct netdev_rxq *rxq) | |
e4cfed38 | 1811 | { |
8cbf4f47 DDP |
1812 | struct dpif_packet *packets[NETDEV_MAX_RX_BATCH]; |
1813 | int error, cnt; | |
e4cfed38 | 1814 | |
8cbf4f47 | 1815 | error = netdev_rxq_recv(rxq, packets, &cnt); |
e4cfed38 | 1816 | if (!error) { |
3c33f0ff JR |
1817 | struct pkt_metadata md = PKT_METADATA_INITIALIZER(port->port_no); |
1818 | ||
1819 | *recirc_depth_get() = 0; | |
1820 | dp_netdev_input(dp, flow_cache, packets, cnt, &md); | |
e4cfed38 | 1821 | } else if (error != EAGAIN && error != EOPNOTSUPP) { |
3c33f0ff | 1822 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); |
e4cfed38 PS |
1823 | |
1824 | VLOG_ERR_RL(&rl, "error receiving data from %s: %s", | |
3c33f0ff | 1825 | netdev_get_name(port->netdev), ovs_strerror(error)); |
e4cfed38 PS |
1826 | } |
1827 | } | |
1828 | ||
1829 | static void | |
1830 | dpif_netdev_run(struct dpif *dpif) | |
1831 | { | |
1832 | struct dp_netdev_port *port; | |
1833 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
1834 | ||
9bbf1c3d | 1835 | ovs_mutex_lock(&dp->emc_mutex); |
a532e683 | 1836 | CMAP_FOR_EACH (port, node, &dp->ports) { |
55c955bd PS |
1837 | if (!netdev_is_pmd(port->netdev)) { |
1838 | int i; | |
1839 | ||
1840 | for (i = 0; i < netdev_n_rxq(port->netdev); i++) { | |
9bbf1c3d DDP |
1841 | dp_netdev_process_rxq_port(dp, &dp->flow_cache, port, |
1842 | port->rxq[i]); | |
55c955bd | 1843 | } |
e4cfed38 PS |
1844 | } |
1845 | } | |
9bbf1c3d | 1846 | ovs_mutex_unlock(&dp->emc_mutex); |
e4cfed38 PS |
1847 | } |
1848 | ||
1849 | static void | |
1850 | dpif_netdev_wait(struct dpif *dpif) | |
1851 | { | |
1852 | struct dp_netdev_port *port; | |
1853 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
1854 | ||
59e6d833 | 1855 | ovs_mutex_lock(&dp_netdev_mutex); |
a532e683 | 1856 | CMAP_FOR_EACH (port, node, &dp->ports) { |
55c955bd PS |
1857 | if (!netdev_is_pmd(port->netdev)) { |
1858 | int i; | |
1859 | ||
1860 | for (i = 0; i < netdev_n_rxq(port->netdev); i++) { | |
1861 | netdev_rxq_wait(port->rxq[i]); | |
1862 | } | |
e4cfed38 PS |
1863 | } |
1864 | } | |
59e6d833 | 1865 | ovs_mutex_unlock(&dp_netdev_mutex); |
e4cfed38 PS |
1866 | } |
1867 | ||
f7791740 | 1868 | struct rxq_poll { |
e4cfed38 | 1869 | struct dp_netdev_port *port; |
55c955bd | 1870 | struct netdev_rxq *rx; |
e4cfed38 PS |
1871 | }; |
1872 | ||
1873 | static int | |
1874 | pmd_load_queues(struct pmd_thread *f, | |
f7791740 | 1875 | struct rxq_poll **ppoll_list, int poll_cnt) |
e4cfed38 PS |
1876 | { |
1877 | struct dp_netdev *dp = f->dp; | |
f7791740 | 1878 | struct rxq_poll *poll_list = *ppoll_list; |
e4cfed38 PS |
1879 | struct dp_netdev_port *port; |
1880 | int id = f->id; | |
1881 | int index; | |
1882 | int i; | |
1883 | ||
1884 | /* Simple scheduler for netdev rx polling. */ | |
e4cfed38 PS |
1885 | for (i = 0; i < poll_cnt; i++) { |
1886 | port_unref(poll_list[i].port); | |
1887 | } | |
1888 | ||
1889 | poll_cnt = 0; | |
1890 | index = 0; | |
1891 | ||
a532e683 | 1892 | CMAP_FOR_EACH (port, node, &f->dp->ports) { |
a1fdee13 AW |
1893 | /* Calls port_try_ref() to prevent the main thread |
1894 | * from deleting the port. */ | |
1895 | if (port_try_ref(port)) { | |
1896 | if (netdev_is_pmd(port->netdev)) { | |
1897 | int i; | |
1898 | ||
1899 | for (i = 0; i < netdev_n_rxq(port->netdev); i++) { | |
1900 | if ((index % dp->n_pmd_threads) == id) { | |
1901 | poll_list = xrealloc(poll_list, | |
1902 | sizeof *poll_list * (poll_cnt + 1)); | |
1903 | ||
1904 | port_ref(port); | |
1905 | poll_list[poll_cnt].port = port; | |
1906 | poll_list[poll_cnt].rx = port->rxq[i]; | |
1907 | poll_cnt++; | |
1908 | } | |
1909 | index++; | |
55c955bd | 1910 | } |
e4cfed38 | 1911 | } |
a1fdee13 AW |
1912 | /* Unrefs the port_try_ref(). */ |
1913 | port_unref(port); | |
e4cfed38 PS |
1914 | } |
1915 | } | |
1916 | ||
e4cfed38 PS |
1917 | *ppoll_list = poll_list; |
1918 | return poll_cnt; | |
1919 | } | |
1920 | ||
6c3eee82 | 1921 | static void * |
e4cfed38 | 1922 | pmd_thread_main(void *f_) |
6c3eee82 | 1923 | { |
e4cfed38 | 1924 | struct pmd_thread *f = f_; |
6c3eee82 | 1925 | struct dp_netdev *dp = f->dp; |
e4cfed38 | 1926 | unsigned int lc = 0; |
f7791740 | 1927 | struct rxq_poll *poll_list; |
84067a4c | 1928 | unsigned int port_seq = PMD_INITIAL_SEQ; |
e4cfed38 PS |
1929 | int poll_cnt; |
1930 | int i; | |
6c3eee82 | 1931 | |
e4cfed38 PS |
1932 | poll_cnt = 0; |
1933 | poll_list = NULL; | |
1934 | ||
8617afff | 1935 | pmd_thread_setaffinity_cpu(f->id); |
e4cfed38 | 1936 | reload: |
9bbf1c3d | 1937 | emc_cache_init(&f->flow_cache); |
e4cfed38 | 1938 | poll_cnt = pmd_load_queues(f, &poll_list, poll_cnt); |
6c3eee82 | 1939 | |
e4cfed38 | 1940 | for (;;) { |
6c3eee82 BP |
1941 | int i; |
1942 | ||
e4cfed38 | 1943 | for (i = 0; i < poll_cnt; i++) { |
9bbf1c3d DDP |
1944 | dp_netdev_process_rxq_port(dp, &f->flow_cache, poll_list[i].port, |
1945 | poll_list[i].rx); | |
e4cfed38 PS |
1946 | } |
1947 | ||
1948 | if (lc++ > 1024) { | |
84067a4c | 1949 | unsigned int seq; |
6c3eee82 | 1950 | |
e4cfed38 | 1951 | lc = 0; |
84067a4c JR |
1952 | |
1953 | ovsrcu_quiesce(); | |
1954 | ||
91a96379 | 1955 | atomic_read_relaxed(&f->change_seq, &seq); |
84067a4c JR |
1956 | if (seq != port_seq) { |
1957 | port_seq = seq; | |
6c3eee82 BP |
1958 | break; |
1959 | } | |
1960 | } | |
e4cfed38 | 1961 | } |
6c3eee82 | 1962 | |
9bbf1c3d DDP |
1963 | emc_cache_uninit(&f->flow_cache); |
1964 | ||
e4cfed38 PS |
1965 | if (!latch_is_set(&f->dp->exit_latch)){ |
1966 | goto reload; | |
1967 | } | |
6c3eee82 | 1968 | |
e4cfed38 PS |
1969 | for (i = 0; i < poll_cnt; i++) { |
1970 | port_unref(poll_list[i].port); | |
6c3eee82 | 1971 | } |
6c3eee82 | 1972 | |
e4cfed38 | 1973 | free(poll_list); |
6c3eee82 BP |
1974 | return NULL; |
1975 | } | |
1976 | ||
6b31e073 RW |
1977 | static void |
1978 | dp_netdev_disable_upcall(struct dp_netdev *dp) | |
1979 | OVS_ACQUIRES(dp->upcall_rwlock) | |
1980 | { | |
1981 | fat_rwlock_wrlock(&dp->upcall_rwlock); | |
1982 | } | |
1983 | ||
1984 | static void | |
1985 | dpif_netdev_disable_upcall(struct dpif *dpif) | |
1986 | OVS_NO_THREAD_SAFETY_ANALYSIS | |
1987 | { | |
1988 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
1989 | dp_netdev_disable_upcall(dp); | |
1990 | } | |
1991 | ||
1992 | static void | |
1993 | dp_netdev_enable_upcall(struct dp_netdev *dp) | |
1994 | OVS_RELEASES(dp->upcall_rwlock) | |
1995 | { | |
1996 | fat_rwlock_unlock(&dp->upcall_rwlock); | |
1997 | } | |
1998 | ||
1999 | static void | |
2000 | dpif_netdev_enable_upcall(struct dpif *dpif) | |
2001 | OVS_NO_THREAD_SAFETY_ANALYSIS | |
2002 | { | |
2003 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
2004 | dp_netdev_enable_upcall(dp); | |
2005 | } | |
2006 | ||
6c3eee82 | 2007 | static void |
e4cfed38 | 2008 | dp_netdev_set_pmd_threads(struct dp_netdev *dp, int n) |
6c3eee82 BP |
2009 | { |
2010 | int i; | |
2011 | ||
e4cfed38 | 2012 | if (n == dp->n_pmd_threads) { |
6c3eee82 BP |
2013 | return; |
2014 | } | |
2015 | ||
2016 | /* Stop existing threads. */ | |
2017 | latch_set(&dp->exit_latch); | |
e4cfed38 PS |
2018 | dp_netdev_reload_pmd_threads(dp); |
2019 | for (i = 0; i < dp->n_pmd_threads; i++) { | |
2020 | struct pmd_thread *f = &dp->pmd_threads[i]; | |
6c3eee82 BP |
2021 | |
2022 | xpthread_join(f->thread, NULL); | |
2023 | } | |
2024 | latch_poll(&dp->exit_latch); | |
e4cfed38 | 2025 | free(dp->pmd_threads); |
6c3eee82 BP |
2026 | |
2027 | /* Start new threads. */ | |
e4cfed38 PS |
2028 | dp->pmd_threads = xmalloc(n * sizeof *dp->pmd_threads); |
2029 | dp->n_pmd_threads = n; | |
2030 | ||
6c3eee82 | 2031 | for (i = 0; i < n; i++) { |
e4cfed38 | 2032 | struct pmd_thread *f = &dp->pmd_threads[i]; |
6c3eee82 BP |
2033 | |
2034 | f->dp = dp; | |
e4cfed38 | 2035 | f->id = i; |
84067a4c | 2036 | atomic_init(&f->change_seq, PMD_INITIAL_SEQ); |
e4cfed38 PS |
2037 | |
2038 | /* Each thread will distribute all devices rx-queues among | |
2039 | * themselves. */ | |
8ba0a522 | 2040 | f->thread = ovs_thread_create("pmd", pmd_thread_main, f); |
6c3eee82 BP |
2041 | } |
2042 | } | |
e4cfed38 | 2043 | |
6c3eee82 | 2044 | \f |
679ba04c BP |
2045 | static void * |
2046 | dp_netdev_flow_stats_new_cb(void) | |
2047 | { | |
2048 | struct dp_netdev_flow_stats *bucket = xzalloc_cacheline(sizeof *bucket); | |
2049 | ovs_mutex_init(&bucket->mutex); | |
2050 | return bucket; | |
2051 | } | |
2052 | ||
72865317 | 2053 | static void |
1763b4b8 | 2054 | dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow, |
8cbf4f47 DDP |
2055 | int cnt, int size, |
2056 | uint16_t tcp_flags) | |
72865317 | 2057 | { |
679ba04c BP |
2058 | long long int now = time_msec(); |
2059 | struct dp_netdev_flow_stats *bucket; | |
2060 | ||
2061 | bucket = ovsthread_stats_bucket_get(&netdev_flow->stats, | |
2062 | dp_netdev_flow_stats_new_cb); | |
2063 | ||
2064 | ovs_mutex_lock(&bucket->mutex); | |
2065 | bucket->used = MAX(now, bucket->used); | |
8cbf4f47 DDP |
2066 | bucket->packet_count += cnt; |
2067 | bucket->byte_count += size; | |
679ba04c BP |
2068 | bucket->tcp_flags |= tcp_flags; |
2069 | ovs_mutex_unlock(&bucket->mutex); | |
72865317 BP |
2070 | } |
2071 | ||
51852a57 BP |
2072 | static void * |
2073 | dp_netdev_stats_new_cb(void) | |
2074 | { | |
2075 | struct dp_netdev_stats *bucket = xzalloc_cacheline(sizeof *bucket); | |
2076 | ovs_mutex_init(&bucket->mutex); | |
2077 | return bucket; | |
2078 | } | |
2079 | ||
2080 | static void | |
8cbf4f47 | 2081 | dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type, int cnt) |
51852a57 BP |
2082 | { |
2083 | struct dp_netdev_stats *bucket; | |
2084 | ||
2085 | bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb); | |
2086 | ovs_mutex_lock(&bucket->mutex); | |
8cbf4f47 | 2087 | bucket->n[type] += cnt; |
51852a57 BP |
2088 | ovs_mutex_unlock(&bucket->mutex); |
2089 | } | |
2090 | ||
623540e4 EJ |
2091 | static int |
2092 | dp_netdev_upcall(struct dp_netdev *dp, struct dpif_packet *packet_, | |
2093 | struct flow *flow, struct flow_wildcards *wc, | |
2094 | enum dpif_upcall_type type, const struct nlattr *userdata, | |
2095 | struct ofpbuf *actions, struct ofpbuf *put_actions) | |
2096 | { | |
2097 | struct ofpbuf *packet = &packet_->ofpbuf; | |
2098 | ||
2099 | if (type == DPIF_UC_MISS) { | |
2100 | dp_netdev_count_packet(dp, DP_STAT_MISS, 1); | |
2101 | } | |
2102 | ||
2103 | if (OVS_UNLIKELY(!dp->upcall_cb)) { | |
2104 | return ENODEV; | |
2105 | } | |
2106 | ||
2107 | if (OVS_UNLIKELY(!VLOG_DROP_DBG(&upcall_rl))) { | |
2108 | struct ds ds = DS_EMPTY_INITIALIZER; | |
2109 | struct ofpbuf key; | |
2110 | char *packet_str; | |
2111 | ||
2112 | ofpbuf_init(&key, 0); | |
2113 | odp_flow_key_from_flow(&key, flow, &wc->masks, flow->in_port.odp_port, | |
2114 | true); | |
2115 | ||
2116 | packet_str = ofp_packet_to_string(ofpbuf_data(packet), | |
2117 | ofpbuf_size(packet)); | |
2118 | ||
2119 | odp_flow_key_format(ofpbuf_data(&key), ofpbuf_size(&key), &ds); | |
2120 | ||
2121 | VLOG_DBG("%s: %s upcall:\n%s\n%s", dp->name, | |
2122 | dpif_upcall_type_to_string(type), ds_cstr(&ds), packet_str); | |
2123 | ||
2124 | ofpbuf_uninit(&key); | |
2125 | free(packet_str); | |
2126 | ds_destroy(&ds); | |
2127 | } | |
2128 | ||
2129 | return dp->upcall_cb(packet, flow, type, userdata, actions, wc, | |
2130 | put_actions, dp->upcall_aux); | |
2131 | } | |
2132 | ||
9bbf1c3d DDP |
2133 | static inline uint32_t |
2134 | dpif_netdev_packet_get_dp_hash(struct dpif_packet *packet, | |
2135 | const struct miniflow *mf) | |
2136 | { | |
2137 | uint32_t hash; | |
2138 | ||
2139 | hash = dpif_packet_get_dp_hash(packet); | |
2140 | if (OVS_UNLIKELY(!hash)) { | |
2141 | hash = miniflow_hash_5tuple(mf, 0); | |
2142 | dpif_packet_set_dp_hash(packet, hash); | |
2143 | } | |
2144 | return hash; | |
2145 | } | |
2146 | ||
567bbb2e | 2147 | struct packet_batch { |
8cbf4f47 DDP |
2148 | unsigned int packet_count; |
2149 | unsigned int byte_count; | |
2150 | uint16_t tcp_flags; | |
2151 | ||
2152 | struct dp_netdev_flow *flow; | |
2153 | ||
2154 | struct dpif_packet *packets[NETDEV_MAX_RX_BATCH]; | |
2155 | struct pkt_metadata md; | |
2156 | }; | |
2157 | ||
2158 | static inline void | |
9bbf1c3d DDP |
2159 | packet_batch_update(struct packet_batch *batch, struct dpif_packet *packet, |
2160 | const struct miniflow *mf) | |
8cbf4f47 DDP |
2161 | { |
2162 | batch->tcp_flags |= miniflow_get_tcp_flags(mf); | |
2163 | batch->packets[batch->packet_count++] = packet; | |
2164 | batch->byte_count += ofpbuf_size(&packet->ofpbuf); | |
2165 | } | |
2166 | ||
2167 | static inline void | |
567bbb2e | 2168 | packet_batch_init(struct packet_batch *batch, struct dp_netdev_flow *flow, |
84d6d5eb | 2169 | struct pkt_metadata *md) |
8cbf4f47 DDP |
2170 | { |
2171 | batch->flow = flow; | |
2172 | batch->md = *md; | |
8cbf4f47 DDP |
2173 | |
2174 | batch->packet_count = 0; | |
2175 | batch->byte_count = 0; | |
2176 | batch->tcp_flags = 0; | |
8cbf4f47 DDP |
2177 | } |
2178 | ||
2179 | static inline void | |
9bbf1c3d DDP |
2180 | packet_batch_execute(struct packet_batch *batch, struct dp_netdev *dp, |
2181 | struct emc_cache *flow_cache) | |
8cbf4f47 DDP |
2182 | { |
2183 | struct dp_netdev_actions *actions; | |
2184 | struct dp_netdev_flow *flow = batch->flow; | |
2185 | ||
2186 | dp_netdev_flow_used(batch->flow, batch->packet_count, batch->byte_count, | |
2187 | batch->tcp_flags); | |
2188 | ||
2189 | actions = dp_netdev_flow_get_actions(flow); | |
2190 | ||
9bbf1c3d DDP |
2191 | dp_netdev_execute_actions(dp, batch->packets, batch->packet_count, true, |
2192 | &batch->md, flow_cache, | |
8cbf4f47 DDP |
2193 | actions->actions, actions->size); |
2194 | ||
2195 | dp_netdev_count_packet(dp, DP_STAT_HIT, batch->packet_count); | |
2196 | } | |
2197 | ||
9bbf1c3d DDP |
2198 | static inline bool |
2199 | dp_netdev_queue_batches(struct dpif_packet *pkt, struct pkt_metadata *md, | |
2200 | struct dp_netdev_flow *flow, const struct miniflow *mf, | |
2201 | struct packet_batch *batches, size_t *n_batches, | |
2202 | size_t max_batches) | |
2203 | { | |
2204 | struct packet_batch *batch = NULL; | |
2205 | int j; | |
2206 | ||
2207 | if (OVS_UNLIKELY(!flow)) { | |
2208 | return false; | |
2209 | } | |
2210 | /* XXX: This O(n^2) algortihm makes sense if we're operating under the | |
2211 | * assumption that the number of distinct flows (and therefore the | |
2212 | * number of distinct batches) is quite small. If this turns out not | |
2213 | * to be the case, it may make sense to pre sort based on the | |
2214 | * netdev_flow pointer. That done we can get the appropriate batching | |
2215 | * in O(n * log(n)) instead. */ | |
2216 | for (j = *n_batches - 1; j >= 0; j--) { | |
2217 | if (batches[j].flow == flow) { | |
2218 | batch = &batches[j]; | |
2219 | packet_batch_update(batch, pkt, mf); | |
2220 | return true; | |
2221 | } | |
2222 | } | |
2223 | if (OVS_UNLIKELY(*n_batches >= max_batches)) { | |
2224 | return false; | |
2225 | } | |
2226 | ||
2227 | batch = &batches[(*n_batches)++]; | |
2228 | packet_batch_init(batch, flow, md); | |
2229 | packet_batch_update(batch, pkt, mf); | |
2230 | return true; | |
2231 | } | |
2232 | ||
2233 | static inline void | |
2234 | dpif_packet_swap(struct dpif_packet **a, struct dpif_packet **b) | |
2235 | { | |
2236 | struct dpif_packet *tmp = *a; | |
2237 | *a = *b; | |
2238 | *b = tmp; | |
2239 | } | |
2240 | ||
2241 | /* Try to process all ('cnt') the 'packets' using only the exact match cache | |
2242 | * 'flow_cache'. If a flow is not found for a packet 'packets[i]', or if there | |
2243 | * is no matching batch for a packet's flow, the miniflow is copied into 'keys' | |
2244 | * and the packet pointer is moved at the beginning of the 'packets' array. | |
2245 | * | |
2246 | * The function returns the number of packets that needs to be processed in the | |
2247 | * 'packets' array (they have been moved to the beginning of the vector). | |
2248 | */ | |
2249 | static inline size_t | |
2250 | emc_processing(struct dp_netdev *dp, struct emc_cache *flow_cache, | |
2251 | struct dpif_packet **packets, size_t cnt, | |
2252 | struct pkt_metadata *md, struct netdev_flow_key *keys) | |
72865317 | 2253 | { |
9bbf1c3d DDP |
2254 | struct netdev_flow_key key; |
2255 | struct packet_batch batches[4]; | |
84d6d5eb | 2256 | size_t n_batches, i; |
9bbf1c3d | 2257 | size_t notfound_cnt = 0; |
8cbf4f47 | 2258 | |
9bbf1c3d DDP |
2259 | n_batches = 0; |
2260 | miniflow_initialize(&key.flow, key.buf); | |
84d6d5eb | 2261 | for (i = 0; i < cnt; i++) { |
9bbf1c3d DDP |
2262 | struct dp_netdev_flow *flow; |
2263 | uint32_t hash; | |
2264 | ||
84d6d5eb EJ |
2265 | if (OVS_UNLIKELY(ofpbuf_size(&packets[i]->ofpbuf) < ETH_HEADER_LEN)) { |
2266 | dpif_packet_delete(packets[i]); | |
84d6d5eb EJ |
2267 | continue; |
2268 | } | |
8cbf4f47 | 2269 | |
9bbf1c3d DDP |
2270 | miniflow_extract(&packets[i]->ofpbuf, md, &key.flow); |
2271 | ||
2272 | hash = dpif_netdev_packet_get_dp_hash(packets[i], &key.flow); | |
2273 | ||
2274 | flow = emc_lookup(flow_cache, &key.flow, hash); | |
2275 | if (OVS_UNLIKELY(!dp_netdev_queue_batches(packets[i], md, | |
2276 | flow, &key.flow, | |
2277 | batches, &n_batches, | |
2278 | ARRAY_SIZE(batches)))) { | |
2279 | if (i != notfound_cnt) { | |
2280 | dpif_packet_swap(&packets[i], &packets[notfound_cnt]); | |
2281 | } | |
2282 | ||
2283 | keys[notfound_cnt++] = key; | |
2284 | } | |
2285 | } | |
2286 | ||
2287 | for (i = 0; i < n_batches; i++) { | |
2288 | packet_batch_execute(&batches[i], dp, flow_cache); | |
84d6d5eb | 2289 | } |
4f150744 | 2290 | |
9bbf1c3d DDP |
2291 | return notfound_cnt; |
2292 | } | |
2293 | ||
2294 | static inline void | |
2295 | fast_path_processing(struct dp_netdev *dp, struct emc_cache *flow_cache, | |
2296 | struct dpif_packet **packets, size_t cnt, | |
2297 | struct pkt_metadata *md, struct netdev_flow_key *keys) | |
2298 | { | |
1a0d5831 | 2299 | #if !defined(__CHECKER__) && !defined(_WIN32) |
9bbf1c3d DDP |
2300 | const size_t PKT_ARRAY_SIZE = cnt; |
2301 | #else | |
1a0d5831 | 2302 | /* Sparse or MSVC doesn't like variable length array. */ |
9bbf1c3d DDP |
2303 | enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH }; |
2304 | #endif | |
2305 | struct packet_batch batches[PKT_ARRAY_SIZE]; | |
2306 | const struct miniflow *mfs[PKT_ARRAY_SIZE]; /* NULL at bad packets. */ | |
2307 | struct cls_rule *rules[PKT_ARRAY_SIZE]; | |
2308 | size_t n_batches, i; | |
2309 | bool any_miss; | |
2310 | ||
2311 | for (i = 0; i < cnt; i++) { | |
2312 | mfs[i] = &keys[i].flow; | |
2313 | } | |
623540e4 EJ |
2314 | any_miss = !classifier_lookup_miniflow_batch(&dp->cls, mfs, rules, cnt); |
2315 | if (OVS_UNLIKELY(any_miss) && !fat_rwlock_tryrdlock(&dp->upcall_rwlock)) { | |
2316 | uint64_t actions_stub[512 / 8], slow_stub[512 / 8]; | |
2317 | struct ofpbuf actions, put_actions; | |
2318 | struct match match; | |
2319 | ||
2320 | ofpbuf_use_stub(&actions, actions_stub, sizeof actions_stub); | |
2321 | ofpbuf_use_stub(&put_actions, slow_stub, sizeof slow_stub); | |
2322 | ||
2323 | for (i = 0; i < cnt; i++) { | |
2324 | const struct dp_netdev_flow *netdev_flow; | |
2325 | struct ofpbuf *add_actions; | |
2326 | int error; | |
2327 | ||
2328 | if (OVS_LIKELY(rules[i] || !mfs[i])) { | |
2329 | continue; | |
2330 | } | |
2331 | ||
2332 | /* It's possible that an earlier slow path execution installed | |
2333 | * the rule this flow needs. In this case, it's a lot cheaper | |
2334 | * to catch it here than execute a miss. */ | |
2335 | netdev_flow = dp_netdev_lookup_flow(dp, mfs[i]); | |
2336 | if (netdev_flow) { | |
2337 | rules[i] = CONST_CAST(struct cls_rule *, &netdev_flow->cr); | |
2338 | continue; | |
2339 | } | |
2340 | ||
2341 | miniflow_expand(mfs[i], &match.flow); | |
2342 | ||
2343 | ofpbuf_clear(&actions); | |
2344 | ofpbuf_clear(&put_actions); | |
2345 | ||
2346 | error = dp_netdev_upcall(dp, packets[i], &match.flow, &match.wc, | |
2347 | DPIF_UC_MISS, NULL, &actions, | |
2348 | &put_actions); | |
2349 | if (OVS_UNLIKELY(error && error != ENOSPC)) { | |
2350 | continue; | |
2351 | } | |
2352 | ||
2353 | /* We can't allow the packet batching in the next loop to execute | |
2354 | * the actions. Otherwise, if there are any slow path actions, | |
2355 | * we'll send the packet up twice. */ | |
2356 | dp_netdev_execute_actions(dp, &packets[i], 1, false, md, | |
9bbf1c3d | 2357 | flow_cache, ofpbuf_data(&actions), |
623540e4 EJ |
2358 | ofpbuf_size(&actions)); |
2359 | ||
2360 | add_actions = ofpbuf_size(&put_actions) | |
2361 | ? &put_actions | |
2362 | : &actions; | |
2363 | ||
2364 | ovs_mutex_lock(&dp->flow_mutex); | |
2365 | /* XXX: There's a brief race where this flow could have already | |
2366 | * been installed since we last did the flow lookup. This could be | |
2367 | * solved by moving the mutex lock outside the loop, but that's an | |
2368 | * awful long time to be locking everyone out of making flow | |
2369 | * installs. If we move to a per-core classifier, it would be | |
2370 | * reasonable. */ | |
2371 | if (OVS_LIKELY(error != ENOSPC) | |
2372 | && !dp_netdev_lookup_flow(dp, mfs[i])) { | |
2373 | dp_netdev_flow_add(dp, &match, ofpbuf_data(add_actions), | |
2374 | ofpbuf_size(add_actions)); | |
2375 | } | |
2376 | ovs_mutex_unlock(&dp->flow_mutex); | |
2377 | } | |
2378 | ||
2379 | ofpbuf_uninit(&actions); | |
2380 | ofpbuf_uninit(&put_actions); | |
2381 | fat_rwlock_unlock(&dp->upcall_rwlock); | |
2382 | } | |
84d6d5eb EJ |
2383 | |
2384 | n_batches = 0; | |
8cbf4f47 | 2385 | for (i = 0; i < cnt; i++) { |
9bbf1c3d | 2386 | struct dpif_packet *packet = packets[i]; |
84d6d5eb | 2387 | struct dp_netdev_flow *flow; |
8cbf4f47 | 2388 | |
623540e4 | 2389 | if (OVS_UNLIKELY(!rules[i] || !mfs[i])) { |
84d6d5eb EJ |
2390 | continue; |
2391 | } | |
2392 | ||
84d6d5eb | 2393 | flow = dp_netdev_flow_cast(rules[i]); |
9bbf1c3d DDP |
2394 | emc_insert(flow_cache, mfs[i], dpif_packet_get_dp_hash(packet), flow); |
2395 | dp_netdev_queue_batches(packet, md, flow, mfs[i], batches, &n_batches, | |
2396 | ARRAY_SIZE(batches)); | |
8cbf4f47 DDP |
2397 | } |
2398 | ||
84d6d5eb | 2399 | for (i = 0; i < n_batches; i++) { |
9bbf1c3d | 2400 | packet_batch_execute(&batches[i], dp, flow_cache); |
72865317 BP |
2401 | } |
2402 | } | |
2403 | ||
adcf00ba | 2404 | static void |
9bbf1c3d DDP |
2405 | dp_netdev_input(struct dp_netdev *dp, struct emc_cache *flow_cache, |
2406 | struct dpif_packet **packets, int cnt, struct pkt_metadata *md) | |
2407 | { | |
1a0d5831 | 2408 | #if !defined(__CHECKER__) && !defined(_WIN32) |
9bbf1c3d DDP |
2409 | const size_t PKT_ARRAY_SIZE = cnt; |
2410 | #else | |
1a0d5831 | 2411 | /* Sparse or MSVC doesn't like variable length array. */ |
9bbf1c3d DDP |
2412 | enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH }; |
2413 | #endif | |
2414 | struct netdev_flow_key keys[PKT_ARRAY_SIZE]; | |
2415 | size_t newcnt; | |
2416 | ||
2417 | newcnt = emc_processing(dp, flow_cache, packets, cnt, md, keys); | |
2418 | if (OVS_UNLIKELY(newcnt)) { | |
2419 | fast_path_processing(dp, flow_cache, packets, newcnt, md, keys); | |
2420 | } | |
2421 | } | |
2422 | ||
9080a111 JR |
2423 | struct dp_netdev_execute_aux { |
2424 | struct dp_netdev *dp; | |
9bbf1c3d | 2425 | struct emc_cache *flow_cache; |
9080a111 JR |
2426 | }; |
2427 | ||
6b31e073 | 2428 | static void |
623540e4 EJ |
2429 | dpif_netdev_register_upcall_cb(struct dpif *dpif, upcall_callback *cb, |
2430 | void *aux) | |
6b31e073 RW |
2431 | { |
2432 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
623540e4 | 2433 | dp->upcall_aux = aux; |
6b31e073 RW |
2434 | dp->upcall_cb = cb; |
2435 | } | |
2436 | ||
9080a111 | 2437 | static void |
8cbf4f47 | 2438 | dp_execute_cb(void *aux_, struct dpif_packet **packets, int cnt, |
572f732a | 2439 | struct pkt_metadata *md, |
09f9da0b | 2440 | const struct nlattr *a, bool may_steal) |
8a4e3a85 | 2441 | OVS_NO_THREAD_SAFETY_ANALYSIS |
9080a111 JR |
2442 | { |
2443 | struct dp_netdev_execute_aux *aux = aux_; | |
623540e4 EJ |
2444 | uint32_t *depth = recirc_depth_get(); |
2445 | struct dp_netdev *dp = aux->dp; | |
09f9da0b | 2446 | int type = nl_attr_type(a); |
8a4e3a85 | 2447 | struct dp_netdev_port *p; |
8cbf4f47 | 2448 | int i; |
9080a111 | 2449 | |
09f9da0b JR |
2450 | switch ((enum ovs_action_attr)type) { |
2451 | case OVS_ACTION_ATTR_OUTPUT: | |
623540e4 | 2452 | p = dp_netdev_lookup_port(dp, u32_to_odp(nl_attr_get_u32(a))); |
26a5075b | 2453 | if (OVS_LIKELY(p)) { |
f00fa8cb | 2454 | netdev_send(p->netdev, NETDEV_QID_NONE, packets, cnt, may_steal); |
26a5075b DDP |
2455 | } else if (may_steal) { |
2456 | for (i = 0; i < cnt; i++) { | |
2457 | dpif_packet_delete(packets[i]); | |
2458 | } | |
8a4e3a85 | 2459 | } |
09f9da0b JR |
2460 | break; |
2461 | ||
623540e4 EJ |
2462 | case OVS_ACTION_ATTR_USERSPACE: |
2463 | if (!fat_rwlock_tryrdlock(&dp->upcall_rwlock)) { | |
2464 | const struct nlattr *userdata; | |
2465 | struct ofpbuf actions; | |
2466 | struct flow flow; | |
4fc65926 | 2467 | |
623540e4 EJ |
2468 | userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA); |
2469 | ofpbuf_init(&actions, 0); | |
8cbf4f47 | 2470 | |
623540e4 EJ |
2471 | for (i = 0; i < cnt; i++) { |
2472 | int error; | |
2473 | ||
2474 | ofpbuf_clear(&actions); | |
2475 | ||
2476 | flow_extract(&packets[i]->ofpbuf, md, &flow); | |
2477 | error = dp_netdev_upcall(dp, packets[i], &flow, NULL, | |
2478 | DPIF_UC_ACTION, userdata, &actions, | |
2479 | NULL); | |
2480 | if (!error || error == ENOSPC) { | |
2481 | dp_netdev_execute_actions(dp, &packets[i], 1, false, md, | |
9bbf1c3d | 2482 | aux->flow_cache, |
623540e4 EJ |
2483 | ofpbuf_data(&actions), |
2484 | ofpbuf_size(&actions)); | |
2485 | } | |
8cbf4f47 | 2486 | |
623540e4 EJ |
2487 | if (may_steal) { |
2488 | dpif_packet_delete(packets[i]); | |
2489 | } | |
db73f716 | 2490 | } |
623540e4 EJ |
2491 | ofpbuf_uninit(&actions); |
2492 | fat_rwlock_unlock(&dp->upcall_rwlock); | |
8cbf4f47 | 2493 | } |
6b31e073 | 2494 | |
09f9da0b | 2495 | break; |
572f732a | 2496 | |
347bf289 AZ |
2497 | case OVS_ACTION_ATTR_HASH: { |
2498 | const struct ovs_action_hash *hash_act; | |
2499 | uint32_t hash; | |
2500 | ||
2501 | hash_act = nl_attr_get(a); | |
8cbf4f47 | 2502 | |
8cbf4f47 DDP |
2503 | for (i = 0; i < cnt; i++) { |
2504 | ||
8cbf4f47 DDP |
2505 | if (hash_act->hash_alg == OVS_HASH_ALG_L4) { |
2506 | /* Hash need not be symmetric, nor does it need to include | |
2507 | * L2 fields. */ | |
9bbf1c3d DDP |
2508 | hash = hash_2words(dpif_packet_get_dp_hash(packets[i]), |
2509 | hash_act->hash_basis); | |
8cbf4f47 DDP |
2510 | } else { |
2511 | VLOG_WARN("Unknown hash algorithm specified " | |
2512 | "for the hash action."); | |
2513 | hash = 2; | |
2514 | } | |
2515 | ||
347bf289 AZ |
2516 | if (!hash) { |
2517 | hash = 1; /* 0 is not valid */ | |
2518 | } | |
2519 | ||
8cbf4f47 DDP |
2520 | if (i == 0) { |
2521 | md->dp_hash = hash; | |
2522 | } | |
9bbf1c3d | 2523 | dpif_packet_set_dp_hash(packets[i], hash); |
347bf289 | 2524 | } |
347bf289 AZ |
2525 | break; |
2526 | } | |
2527 | ||
adcf00ba AZ |
2528 | case OVS_ACTION_ATTR_RECIRC: |
2529 | if (*depth < MAX_RECIRC_DEPTH) { | |
572f732a | 2530 | |
adcf00ba | 2531 | (*depth)++; |
8cbf4f47 DDP |
2532 | for (i = 0; i < cnt; i++) { |
2533 | struct dpif_packet *recirc_pkt; | |
2534 | struct pkt_metadata recirc_md = *md; | |
2535 | ||
2536 | recirc_pkt = (may_steal) ? packets[i] | |
2537 | : dpif_packet_clone(packets[i]); | |
2538 | ||
2539 | recirc_md.recirc_id = nl_attr_get_u32(a); | |
2540 | ||
2541 | /* Hash is private to each packet */ | |
61a2647e | 2542 | recirc_md.dp_hash = dpif_packet_get_dp_hash(packets[i]); |
8cbf4f47 | 2543 | |
9bbf1c3d DDP |
2544 | dp_netdev_input(dp, aux->flow_cache, &recirc_pkt, 1, |
2545 | &recirc_md); | |
8cbf4f47 | 2546 | } |
adcf00ba AZ |
2547 | (*depth)--; |
2548 | ||
adcf00ba AZ |
2549 | break; |
2550 | } else { | |
2551 | VLOG_WARN("Packet dropped. Max recirculation depth exceeded."); | |
26a5075b DDP |
2552 | if (may_steal) { |
2553 | for (i = 0; i < cnt; i++) { | |
2554 | dpif_packet_delete(packets[i]); | |
2555 | } | |
2556 | } | |
adcf00ba | 2557 | } |
572f732a | 2558 | break; |
572f732a | 2559 | |
09f9da0b JR |
2560 | case OVS_ACTION_ATTR_PUSH_VLAN: |
2561 | case OVS_ACTION_ATTR_POP_VLAN: | |
2562 | case OVS_ACTION_ATTR_PUSH_MPLS: | |
2563 | case OVS_ACTION_ATTR_POP_MPLS: | |
2564 | case OVS_ACTION_ATTR_SET: | |
6d670e7f | 2565 | case OVS_ACTION_ATTR_SET_MASKED: |
09f9da0b JR |
2566 | case OVS_ACTION_ATTR_SAMPLE: |
2567 | case OVS_ACTION_ATTR_UNSPEC: | |
2568 | case __OVS_ACTION_ATTR_MAX: | |
2569 | OVS_NOT_REACHED(); | |
da546e07 | 2570 | } |
98403001 BP |
2571 | } |
2572 | ||
4edb9ae9 | 2573 | static void |
8cbf4f47 DDP |
2574 | dp_netdev_execute_actions(struct dp_netdev *dp, |
2575 | struct dpif_packet **packets, int cnt, | |
2576 | bool may_steal, struct pkt_metadata *md, | |
9bbf1c3d | 2577 | struct emc_cache *flow_cache, |
9080a111 | 2578 | const struct nlattr *actions, size_t actions_len) |
72865317 | 2579 | { |
9bbf1c3d | 2580 | struct dp_netdev_execute_aux aux = {dp, flow_cache}; |
9080a111 | 2581 | |
8cbf4f47 DDP |
2582 | odp_execute_actions(&aux, packets, cnt, may_steal, md, actions, |
2583 | actions_len, dp_execute_cb); | |
72865317 BP |
2584 | } |
2585 | ||
2586 | const struct dpif_class dpif_netdev_class = { | |
72865317 | 2587 | "netdev", |
2197d7ab | 2588 | dpif_netdev_enumerate, |
0aeaabc8 | 2589 | dpif_netdev_port_open_type, |
72865317 BP |
2590 | dpif_netdev_open, |
2591 | dpif_netdev_close, | |
7dab847a | 2592 | dpif_netdev_destroy, |
e4cfed38 PS |
2593 | dpif_netdev_run, |
2594 | dpif_netdev_wait, | |
72865317 | 2595 | dpif_netdev_get_stats, |
72865317 BP |
2596 | dpif_netdev_port_add, |
2597 | dpif_netdev_port_del, | |
2598 | dpif_netdev_port_query_by_number, | |
2599 | dpif_netdev_port_query_by_name, | |
98403001 | 2600 | NULL, /* port_get_pid */ |
b0ec0f27 BP |
2601 | dpif_netdev_port_dump_start, |
2602 | dpif_netdev_port_dump_next, | |
2603 | dpif_netdev_port_dump_done, | |
72865317 BP |
2604 | dpif_netdev_port_poll, |
2605 | dpif_netdev_port_poll_wait, | |
72865317 | 2606 | dpif_netdev_flow_flush, |
ac64794a BP |
2607 | dpif_netdev_flow_dump_create, |
2608 | dpif_netdev_flow_dump_destroy, | |
2609 | dpif_netdev_flow_dump_thread_create, | |
2610 | dpif_netdev_flow_dump_thread_destroy, | |
704a1e09 | 2611 | dpif_netdev_flow_dump_next, |
1a0c894a | 2612 | dpif_netdev_operate, |
6b31e073 RW |
2613 | NULL, /* recv_set */ |
2614 | NULL, /* handlers_set */ | |
5bf93d67 | 2615 | dpif_netdev_queue_to_priority, |
6b31e073 RW |
2616 | NULL, /* recv */ |
2617 | NULL, /* recv_wait */ | |
2618 | NULL, /* recv_purge */ | |
2619 | dpif_netdev_register_upcall_cb, | |
2620 | dpif_netdev_enable_upcall, | |
2621 | dpif_netdev_disable_upcall, | |
72865317 | 2622 | }; |
614c4892 | 2623 | |
74cc3969 BP |
2624 | static void |
2625 | dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED, | |
2626 | const char *argv[], void *aux OVS_UNUSED) | |
2627 | { | |
59e6d833 BP |
2628 | struct dp_netdev_port *old_port; |
2629 | struct dp_netdev_port *new_port; | |
74cc3969 | 2630 | struct dp_netdev *dp; |
ff073a71 | 2631 | odp_port_t port_no; |
74cc3969 | 2632 | |
8a4e3a85 | 2633 | ovs_mutex_lock(&dp_netdev_mutex); |
74cc3969 BP |
2634 | dp = shash_find_data(&dp_netdevs, argv[1]); |
2635 | if (!dp || !dpif_netdev_class_is_dummy(dp->class)) { | |
8a4e3a85 | 2636 | ovs_mutex_unlock(&dp_netdev_mutex); |
74cc3969 BP |
2637 | unixctl_command_reply_error(conn, "unknown datapath or not a dummy"); |
2638 | return; | |
2639 | } | |
8a4e3a85 BP |
2640 | ovs_refcount_ref(&dp->ref_cnt); |
2641 | ovs_mutex_unlock(&dp_netdev_mutex); | |
74cc3969 | 2642 | |
59e6d833 BP |
2643 | ovs_mutex_lock(&dp->port_mutex); |
2644 | if (get_port_by_name(dp, argv[2], &old_port)) { | |
74cc3969 | 2645 | unixctl_command_reply_error(conn, "unknown port"); |
8a4e3a85 | 2646 | goto exit; |
74cc3969 BP |
2647 | } |
2648 | ||
ff073a71 BP |
2649 | port_no = u32_to_odp(atoi(argv[3])); |
2650 | if (!port_no || port_no == ODPP_NONE) { | |
74cc3969 | 2651 | unixctl_command_reply_error(conn, "bad port number"); |
8a4e3a85 | 2652 | goto exit; |
74cc3969 | 2653 | } |
ff073a71 | 2654 | if (dp_netdev_lookup_port(dp, port_no)) { |
74cc3969 | 2655 | unixctl_command_reply_error(conn, "port number already in use"); |
8a4e3a85 | 2656 | goto exit; |
74cc3969 | 2657 | } |
59e6d833 BP |
2658 | |
2659 | /* Remove old port. */ | |
2660 | cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->port_no)); | |
2661 | ovsrcu_postpone(free, old_port); | |
2662 | ||
2663 | /* Insert new port (cmap semantics mean we cannot re-insert 'old_port'). */ | |
2664 | new_port = xmemdup(old_port, sizeof *old_port); | |
2665 | new_port->port_no = port_no; | |
2666 | cmap_insert(&dp->ports, &new_port->node, hash_port_no(port_no)); | |
2667 | ||
d33ed218 | 2668 | seq_change(dp->port_seq); |
74cc3969 | 2669 | unixctl_command_reply(conn, NULL); |
8a4e3a85 BP |
2670 | |
2671 | exit: | |
59e6d833 | 2672 | ovs_mutex_unlock(&dp->port_mutex); |
8a4e3a85 | 2673 | dp_netdev_unref(dp); |
74cc3969 BP |
2674 | } |
2675 | ||
c40b890f BP |
2676 | static void |
2677 | dpif_dummy_delete_port(struct unixctl_conn *conn, int argc OVS_UNUSED, | |
2678 | const char *argv[], void *aux OVS_UNUSED) | |
2679 | { | |
2680 | struct dp_netdev_port *port; | |
2681 | struct dp_netdev *dp; | |
2682 | ||
2683 | ovs_mutex_lock(&dp_netdev_mutex); | |
2684 | dp = shash_find_data(&dp_netdevs, argv[1]); | |
2685 | if (!dp || !dpif_netdev_class_is_dummy(dp->class)) { | |
2686 | ovs_mutex_unlock(&dp_netdev_mutex); | |
2687 | unixctl_command_reply_error(conn, "unknown datapath or not a dummy"); | |
2688 | return; | |
2689 | } | |
2690 | ovs_refcount_ref(&dp->ref_cnt); | |
2691 | ovs_mutex_unlock(&dp_netdev_mutex); | |
2692 | ||
2693 | ovs_mutex_lock(&dp->port_mutex); | |
2694 | if (get_port_by_name(dp, argv[2], &port)) { | |
2695 | unixctl_command_reply_error(conn, "unknown port"); | |
2696 | } else if (port->port_no == ODPP_LOCAL) { | |
2697 | unixctl_command_reply_error(conn, "can't delete local port"); | |
2698 | } else { | |
2699 | do_del_port(dp, port); | |
2700 | unixctl_command_reply(conn, NULL); | |
2701 | } | |
2702 | ovs_mutex_unlock(&dp->port_mutex); | |
2703 | ||
2704 | dp_netdev_unref(dp); | |
2705 | } | |
2706 | ||
0cbfe35d BP |
2707 | static void |
2708 | dpif_dummy_register__(const char *type) | |
2709 | { | |
2710 | struct dpif_class *class; | |
2711 | ||
2712 | class = xmalloc(sizeof *class); | |
2713 | *class = dpif_netdev_class; | |
2714 | class->type = xstrdup(type); | |
2715 | dp_register_provider(class); | |
2716 | } | |
2717 | ||
614c4892 | 2718 | void |
0cbfe35d | 2719 | dpif_dummy_register(bool override) |
614c4892 | 2720 | { |
0cbfe35d BP |
2721 | if (override) { |
2722 | struct sset types; | |
2723 | const char *type; | |
2724 | ||
2725 | sset_init(&types); | |
2726 | dp_enumerate_types(&types); | |
2727 | SSET_FOR_EACH (type, &types) { | |
2728 | if (!dp_unregister_provider(type)) { | |
2729 | dpif_dummy_register__(type); | |
2730 | } | |
2731 | } | |
2732 | sset_destroy(&types); | |
614c4892 | 2733 | } |
0cbfe35d BP |
2734 | |
2735 | dpif_dummy_register__("dummy"); | |
74cc3969 BP |
2736 | |
2737 | unixctl_command_register("dpif-dummy/change-port-number", | |
74467d5c | 2738 | "dp port new-number", |
74cc3969 | 2739 | 3, 3, dpif_dummy_change_port_number, NULL); |
74467d5c | 2740 | unixctl_command_register("dpif-dummy/delete-port", "dp port", |
c40b890f | 2741 | 2, 2, dpif_dummy_delete_port, NULL); |
614c4892 | 2742 | } |