]>
Commit | Line | Data |
---|---|---|
72865317 | 1 | /* |
ff073a71 | 2 | * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. |
72865317 BP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | #include "dpif.h" | |
19 | ||
72865317 BP |
20 | #include <ctype.h> |
21 | #include <errno.h> | |
22 | #include <fcntl.h> | |
23 | #include <inttypes.h> | |
72865317 | 24 | #include <netinet/in.h> |
9d82ec47 | 25 | #include <sys/socket.h> |
7f3adc00 | 26 | #include <net/if.h> |
cdee00fd | 27 | #include <stdint.h> |
72865317 BP |
28 | #include <stdlib.h> |
29 | #include <string.h> | |
30 | #include <sys/ioctl.h> | |
31 | #include <sys/stat.h> | |
72865317 BP |
32 | #include <unistd.h> |
33 | ||
2c0ea78f | 34 | #include "classifier.h" |
59e6d833 | 35 | #include "cmap.h" |
72865317 | 36 | #include "csum.h" |
614c4892 | 37 | #include "dpif.h" |
72865317 | 38 | #include "dpif-provider.h" |
614c4892 | 39 | #include "dummy.h" |
36956a7d | 40 | #include "dynamic-string.h" |
72865317 BP |
41 | #include "flow.h" |
42 | #include "hmap.h" | |
6c3eee82 | 43 | #include "latch.h" |
72865317 | 44 | #include "list.h" |
8c301900 | 45 | #include "meta-flow.h" |
72865317 | 46 | #include "netdev.h" |
8617afff | 47 | #include "netdev-dpdk.h" |
de281153 | 48 | #include "netdev-vport.h" |
cdee00fd | 49 | #include "netlink.h" |
f094af7b | 50 | #include "odp-execute.h" |
72865317 BP |
51 | #include "odp-util.h" |
52 | #include "ofp-print.h" | |
53 | #include "ofpbuf.h" | |
61e7deb1 | 54 | #include "ovs-rcu.h" |
72865317 BP |
55 | #include "packets.h" |
56 | #include "poll-loop.h" | |
26c6b6cd | 57 | #include "random.h" |
d33ed218 | 58 | #include "seq.h" |
462278db | 59 | #include "shash.h" |
0cbfe35d | 60 | #include "sset.h" |
72865317 | 61 | #include "timeval.h" |
74cc3969 | 62 | #include "unixctl.h" |
72865317 | 63 | #include "util.h" |
72865317 | 64 | #include "vlog.h" |
5136ce49 | 65 | |
d98e6007 | 66 | VLOG_DEFINE_THIS_MODULE(dpif_netdev); |
72865317 | 67 | |
2c0ea78f GS |
68 | /* By default, choose a priority in the middle. */ |
69 | #define NETDEV_RULE_PRIORITY 0x8000 | |
70 | ||
e4cfed38 | 71 | #define NR_THREADS 1 |
adcf00ba AZ |
72 | /* Use per thread recirc_depth to prevent recirculation loop. */ |
73 | #define MAX_RECIRC_DEPTH 5 | |
74 | DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0) | |
e4cfed38 | 75 | |
72865317 | 76 | /* Configuration parameters. */ |
72865317 BP |
77 | enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ |
78 | ||
856081f6 | 79 | /* Queues. */ |
856081f6 BP |
80 | enum { MAX_QUEUE_LEN = 128 }; /* Maximum number of packets per queue. */ |
81 | enum { QUEUE_MASK = MAX_QUEUE_LEN - 1 }; | |
82 | BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN)); | |
83 | ||
8a4e3a85 BP |
84 | /* Protects against changes to 'dp_netdevs'. */ |
85 | static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER; | |
86 | ||
87 | /* Contains all 'struct dp_netdev's. */ | |
88 | static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex) | |
89 | = SHASH_INITIALIZER(&dp_netdevs); | |
90 | ||
d88b629b BP |
91 | struct dp_netdev_upcall { |
92 | struct dpif_upcall upcall; /* Queued upcall information. */ | |
93 | struct ofpbuf buf; /* ofpbuf instance for upcall.packet. */ | |
94 | }; | |
95 | ||
63be20be | 96 | /* A queue passing packets from a struct dp_netdev to its clients (handlers). |
8a4e3a85 BP |
97 | * |
98 | * | |
99 | * Thread-safety | |
100 | * ============= | |
101 | * | |
63be20be AW |
102 | * Any access at all requires the owning 'dp_netdev''s queue_rwlock and |
103 | * its own mutex. */ | |
856081f6 | 104 | struct dp_netdev_queue { |
63be20be AW |
105 | struct ovs_mutex mutex; |
106 | struct seq *seq; /* Incremented whenever a packet is queued. */ | |
f5126b57 BP |
107 | struct dp_netdev_upcall upcalls[MAX_QUEUE_LEN] OVS_GUARDED; |
108 | unsigned int head OVS_GUARDED; | |
109 | unsigned int tail OVS_GUARDED; | |
856081f6 BP |
110 | }; |
111 | ||
8a4e3a85 BP |
112 | /* Datapath based on the network device interface from netdev.h. |
113 | * | |
114 | * | |
115 | * Thread-safety | |
116 | * ============= | |
117 | * | |
118 | * Some members, marked 'const', are immutable. Accessing other members | |
119 | * requires synchronization, as noted in more detail below. | |
120 | * | |
121 | * Acquisition order is, from outermost to innermost: | |
122 | * | |
123 | * dp_netdev_mutex (global) | |
59e6d833 | 124 | * port_mutex |
8a4e3a85 BP |
125 | * flow_mutex |
126 | * cls.rwlock | |
63be20be | 127 | * queue_rwlock |
8a4e3a85 | 128 | */ |
72865317 | 129 | struct dp_netdev { |
8a4e3a85 BP |
130 | const struct dpif_class *const class; |
131 | const char *const name; | |
6a8267c5 BP |
132 | struct ovs_refcount ref_cnt; |
133 | atomic_flag destroyed; | |
72865317 | 134 | |
8a4e3a85 BP |
135 | /* Flows. |
136 | * | |
137 | * Readers of 'cls' and 'flow_table' must take a 'cls->rwlock' read lock. | |
138 | * | |
139 | * Writers of 'cls' and 'flow_table' must take the 'flow_mutex' and then | |
140 | * the 'cls->rwlock' write lock. (The outer 'flow_mutex' allows writers to | |
141 | * atomically perform multiple operations on 'cls' and 'flow_table'.) | |
142 | */ | |
143 | struct ovs_mutex flow_mutex; | |
144 | struct classifier cls; /* Classifier. Protected by cls.rwlock. */ | |
145 | struct hmap flow_table OVS_GUARDED; /* Flow table. */ | |
146 | ||
147 | /* Queues. | |
148 | * | |
63be20be AW |
149 | * 'queue_rwlock' protects the modification of 'handler_queues' and |
150 | * 'n_handlers'. The queue elements are protected by its | |
151 | * 'handler_queues''s mutex. */ | |
152 | struct fat_rwlock queue_rwlock; | |
153 | struct dp_netdev_queue *handler_queues; | |
154 | uint32_t n_handlers; | |
72865317 | 155 | |
8a4e3a85 BP |
156 | /* Statistics. |
157 | * | |
51852a57 BP |
158 | * ovsthread_stats is internally synchronized. */ |
159 | struct ovsthread_stats stats; /* Contains 'struct dp_netdev_stats *'. */ | |
72865317 | 160 | |
8a4e3a85 BP |
161 | /* Ports. |
162 | * | |
59e6d833 BP |
163 | * Protected by RCU. Take the mutex to add or remove ports. */ |
164 | struct ovs_mutex port_mutex; | |
165 | struct cmap ports; | |
d33ed218 | 166 | struct seq *port_seq; /* Incremented whenever a port changes. */ |
6c3eee82 BP |
167 | |
168 | /* Forwarding threads. */ | |
169 | struct latch exit_latch; | |
e4cfed38 PS |
170 | struct pmd_thread *pmd_threads; |
171 | size_t n_pmd_threads; | |
172 | int pmd_count; | |
72865317 BP |
173 | }; |
174 | ||
8a4e3a85 | 175 | static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp, |
59e6d833 | 176 | odp_port_t); |
ff073a71 | 177 | |
51852a57 BP |
178 | enum dp_stat_type { |
179 | DP_STAT_HIT, /* Packets that matched in the flow table. */ | |
180 | DP_STAT_MISS, /* Packets that did not match. */ | |
181 | DP_STAT_LOST, /* Packets not passed up to the client. */ | |
182 | DP_N_STATS | |
183 | }; | |
184 | ||
185 | /* Contained by struct dp_netdev's 'stats' member. */ | |
186 | struct dp_netdev_stats { | |
187 | struct ovs_mutex mutex; /* Protects 'n'. */ | |
188 | ||
189 | /* Indexed by DP_STAT_*, protected by 'mutex'. */ | |
190 | unsigned long long int n[DP_N_STATS] OVS_GUARDED; | |
191 | }; | |
192 | ||
193 | ||
72865317 BP |
194 | /* A port in a netdev-based datapath. */ |
195 | struct dp_netdev_port { | |
59e6d833 | 196 | struct cmap_node node; /* Node in dp_netdev's 'ports'. */ |
ff073a71 | 197 | odp_port_t port_no; |
72865317 | 198 | struct netdev *netdev; |
4b609110 | 199 | struct netdev_saved_flags *sf; |
55c955bd | 200 | struct netdev_rxq **rxq; |
b284085e | 201 | struct ovs_refcount ref_cnt; |
0cbfe35d | 202 | char *type; /* Port type as requested by user. */ |
72865317 BP |
203 | }; |
204 | ||
8a4e3a85 BP |
205 | /* A flow in dp_netdev's 'flow_table'. |
206 | * | |
207 | * | |
208 | * Thread-safety | |
209 | * ============= | |
210 | * | |
211 | * Except near the beginning or ending of its lifespan, rule 'rule' belongs to | |
212 | * its dp_netdev's classifier. The text below calls this classifier 'cls'. | |
213 | * | |
214 | * Motivation | |
215 | * ---------- | |
216 | * | |
217 | * The thread safety rules described here for "struct dp_netdev_flow" are | |
218 | * motivated by two goals: | |
219 | * | |
220 | * - Prevent threads that read members of "struct dp_netdev_flow" from | |
221 | * reading bad data due to changes by some thread concurrently modifying | |
222 | * those members. | |
223 | * | |
224 | * - Prevent two threads making changes to members of a given "struct | |
225 | * dp_netdev_flow" from interfering with each other. | |
226 | * | |
227 | * | |
228 | * Rules | |
229 | * ----- | |
230 | * | |
231 | * A flow 'flow' may be accessed without a risk of being freed by code that | |
232 | * holds a read-lock or write-lock on 'cls->rwlock' or that owns a reference to | |
233 | * 'flow->ref_cnt' (or both). Code that needs to hold onto a flow for a while | |
234 | * should take 'cls->rwlock', find the flow it needs, increment 'flow->ref_cnt' | |
235 | * with dpif_netdev_flow_ref(), and drop 'cls->rwlock'. | |
236 | * | |
237 | * 'flow->ref_cnt' protects 'flow' from being freed. It doesn't protect the | |
238 | * flow from being deleted from 'cls' (that's 'cls->rwlock') and it doesn't | |
45c626a3 | 239 | * protect members of 'flow' from modification. |
8a4e3a85 BP |
240 | * |
241 | * Some members, marked 'const', are immutable. Accessing other members | |
242 | * requires synchronization, as noted in more detail below. | |
243 | */ | |
72865317 | 244 | struct dp_netdev_flow { |
2c0ea78f | 245 | /* Packet classification. */ |
8a4e3a85 | 246 | const struct cls_rule cr; /* In owning dp_netdev's 'cls'. */ |
2c0ea78f | 247 | |
8a4e3a85 BP |
248 | /* Hash table index by unmasked flow. */ |
249 | const struct hmap_node node; /* In owning dp_netdev's 'flow_table'. */ | |
250 | const struct flow flow; /* The flow that created this entry. */ | |
72865317 | 251 | |
8a4e3a85 BP |
252 | /* Statistics. |
253 | * | |
254 | * Reading or writing these members requires 'mutex'. */ | |
679ba04c | 255 | struct ovsthread_stats stats; /* Contains "struct dp_netdev_flow_stats". */ |
8a4e3a85 | 256 | |
45c626a3 | 257 | /* Actions. */ |
61e7deb1 | 258 | OVSRCU_TYPE(struct dp_netdev_actions *) actions; |
72865317 BP |
259 | }; |
260 | ||
61e7deb1 | 261 | static void dp_netdev_flow_free(struct dp_netdev_flow *); |
8a4e3a85 | 262 | |
679ba04c BP |
263 | /* Contained by struct dp_netdev_flow's 'stats' member. */ |
264 | struct dp_netdev_flow_stats { | |
265 | struct ovs_mutex mutex; /* Guards all the other members. */ | |
266 | ||
267 | long long int used OVS_GUARDED; /* Last used time, in monotonic msecs. */ | |
268 | long long int packet_count OVS_GUARDED; /* Number of packets matched. */ | |
269 | long long int byte_count OVS_GUARDED; /* Number of bytes matched. */ | |
270 | uint16_t tcp_flags OVS_GUARDED; /* Bitwise-OR of seen tcp_flags values. */ | |
271 | }; | |
272 | ||
a84cb64a BP |
273 | /* A set of datapath actions within a "struct dp_netdev_flow". |
274 | * | |
275 | * | |
276 | * Thread-safety | |
277 | * ============= | |
278 | * | |
45c626a3 | 279 | * A struct dp_netdev_actions 'actions' is protected with RCU. */ |
a84cb64a | 280 | struct dp_netdev_actions { |
a84cb64a BP |
281 | /* These members are immutable: they do not change during the struct's |
282 | * lifetime. */ | |
283 | struct nlattr *actions; /* Sequence of OVS_ACTION_ATTR_* attributes. */ | |
284 | unsigned int size; /* Size of 'actions', in bytes. */ | |
285 | }; | |
286 | ||
287 | struct dp_netdev_actions *dp_netdev_actions_create(const struct nlattr *, | |
288 | size_t); | |
61e7deb1 BP |
289 | struct dp_netdev_actions *dp_netdev_flow_get_actions( |
290 | const struct dp_netdev_flow *); | |
291 | static void dp_netdev_actions_free(struct dp_netdev_actions *); | |
a84cb64a | 292 | |
e4cfed38 PS |
293 | /* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate |
294 | * the performance overhead of interrupt processing. Therefore netdev can | |
295 | * not implement rx-wait for these devices. dpif-netdev needs to poll | |
296 | * these device to check for recv buffer. pmd-thread does polling for | |
297 | * devices assigned to itself thread. | |
298 | * | |
299 | * DPDK used PMD for accessing NIC. | |
300 | * | |
301 | * A thread that receives packets from PMD ports, looks them up in the flow | |
302 | * table, and executes the actions it finds. | |
303 | **/ | |
304 | struct pmd_thread { | |
6c3eee82 BP |
305 | struct dp_netdev *dp; |
306 | pthread_t thread; | |
e4cfed38 PS |
307 | int id; |
308 | atomic_uint change_seq; | |
6c3eee82 BP |
309 | }; |
310 | ||
72865317 BP |
311 | /* Interface to netdev-based datapath. */ |
312 | struct dpif_netdev { | |
313 | struct dpif dpif; | |
314 | struct dp_netdev *dp; | |
d33ed218 | 315 | uint64_t last_port_seq; |
72865317 BP |
316 | }; |
317 | ||
8a4e3a85 | 318 | static int get_port_by_number(struct dp_netdev *dp, odp_port_t port_no, |
59e6d833 | 319 | struct dp_netdev_port **portp); |
8a4e3a85 | 320 | static int get_port_by_name(struct dp_netdev *dp, const char *devname, |
59e6d833 | 321 | struct dp_netdev_port **portp); |
8a4e3a85 BP |
322 | static void dp_netdev_free(struct dp_netdev *) |
323 | OVS_REQUIRES(dp_netdev_mutex); | |
72865317 | 324 | static void dp_netdev_flow_flush(struct dp_netdev *); |
8a4e3a85 BP |
325 | static int do_add_port(struct dp_netdev *dp, const char *devname, |
326 | const char *type, odp_port_t port_no) | |
59e6d833 | 327 | OVS_REQUIRES(dp->port_mutex); |
c40b890f | 328 | static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *) |
59e6d833 | 329 | OVS_REQUIRES(dp->port_mutex); |
63be20be AW |
330 | static void dp_netdev_destroy_all_queues(struct dp_netdev *dp) |
331 | OVS_REQ_WRLOCK(dp->queue_rwlock); | |
614c4892 BP |
332 | static int dpif_netdev_open(const struct dpif_class *, const char *name, |
333 | bool create, struct dpif **); | |
f5126b57 | 334 | static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *, |
63be20be | 335 | int queue_no, int type, |
4f150744 | 336 | const struct miniflow *, |
e4cfed38 | 337 | const struct nlattr *userdata); |
8a4e3a85 | 338 | static void dp_netdev_execute_actions(struct dp_netdev *dp, |
4f150744 JR |
339 | const struct miniflow *, |
340 | struct ofpbuf *, bool may_steal, | |
8a4e3a85 | 341 | struct pkt_metadata *, |
4edb9ae9 | 342 | const struct nlattr *actions, |
e4cfed38 | 343 | size_t actions_len); |
758c456d | 344 | static void dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet, |
e4cfed38 PS |
345 | struct pkt_metadata *); |
346 | ||
347 | static void dp_netdev_set_pmd_threads(struct dp_netdev *, int n); | |
72865317 BP |
348 | |
349 | static struct dpif_netdev * | |
350 | dpif_netdev_cast(const struct dpif *dpif) | |
351 | { | |
cb22974d | 352 | ovs_assert(dpif->dpif_class->open == dpif_netdev_open); |
72865317 BP |
353 | return CONTAINER_OF(dpif, struct dpif_netdev, dpif); |
354 | } | |
355 | ||
356 | static struct dp_netdev * | |
357 | get_dp_netdev(const struct dpif *dpif) | |
358 | { | |
359 | return dpif_netdev_cast(dpif)->dp; | |
360 | } | |
361 | ||
2197d7ab GL |
362 | static int |
363 | dpif_netdev_enumerate(struct sset *all_dps) | |
364 | { | |
365 | struct shash_node *node; | |
366 | ||
97be1538 | 367 | ovs_mutex_lock(&dp_netdev_mutex); |
2197d7ab GL |
368 | SHASH_FOR_EACH(node, &dp_netdevs) { |
369 | sset_add(all_dps, node->name); | |
370 | } | |
97be1538 | 371 | ovs_mutex_unlock(&dp_netdev_mutex); |
5279f8fd | 372 | |
2197d7ab GL |
373 | return 0; |
374 | } | |
375 | ||
add90f6f EJ |
376 | static bool |
377 | dpif_netdev_class_is_dummy(const struct dpif_class *class) | |
378 | { | |
379 | return class != &dpif_netdev_class; | |
380 | } | |
381 | ||
0aeaabc8 JP |
382 | static const char * |
383 | dpif_netdev_port_open_type(const struct dpif_class *class, const char *type) | |
384 | { | |
385 | return strcmp(type, "internal") ? type | |
add90f6f | 386 | : dpif_netdev_class_is_dummy(class) ? "dummy" |
0aeaabc8 JP |
387 | : "tap"; |
388 | } | |
389 | ||
72865317 BP |
390 | static struct dpif * |
391 | create_dpif_netdev(struct dp_netdev *dp) | |
392 | { | |
462278db | 393 | uint16_t netflow_id = hash_string(dp->name, 0); |
72865317 | 394 | struct dpif_netdev *dpif; |
72865317 | 395 | |
6a8267c5 | 396 | ovs_refcount_ref(&dp->ref_cnt); |
72865317 | 397 | |
72865317 | 398 | dpif = xmalloc(sizeof *dpif); |
614c4892 | 399 | dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id); |
72865317 | 400 | dpif->dp = dp; |
d33ed218 | 401 | dpif->last_port_seq = seq_read(dp->port_seq); |
72865317 BP |
402 | |
403 | return &dpif->dpif; | |
404 | } | |
405 | ||
4e022ec0 AW |
406 | /* Choose an unused, non-zero port number and return it on success. |
407 | * Return ODPP_NONE on failure. */ | |
408 | static odp_port_t | |
e44768b7 | 409 | choose_port(struct dp_netdev *dp, const char *name) |
59e6d833 | 410 | OVS_REQUIRES(dp->port_mutex) |
e44768b7 | 411 | { |
4e022ec0 | 412 | uint32_t port_no; |
e44768b7 JP |
413 | |
414 | if (dp->class != &dpif_netdev_class) { | |
415 | const char *p; | |
416 | int start_no = 0; | |
417 | ||
418 | /* If the port name begins with "br", start the number search at | |
419 | * 100 to make writing tests easier. */ | |
420 | if (!strncmp(name, "br", 2)) { | |
421 | start_no = 100; | |
422 | } | |
423 | ||
424 | /* If the port name contains a number, try to assign that port number. | |
425 | * This can make writing unit tests easier because port numbers are | |
426 | * predictable. */ | |
427 | for (p = name; *p != '\0'; p++) { | |
428 | if (isdigit((unsigned char) *p)) { | |
429 | port_no = start_no + strtol(p, NULL, 10); | |
ff073a71 BP |
430 | if (port_no > 0 && port_no != odp_to_u32(ODPP_NONE) |
431 | && !dp_netdev_lookup_port(dp, u32_to_odp(port_no))) { | |
4e022ec0 | 432 | return u32_to_odp(port_no); |
e44768b7 JP |
433 | } |
434 | break; | |
435 | } | |
436 | } | |
437 | } | |
438 | ||
ff073a71 BP |
439 | for (port_no = 1; port_no <= UINT16_MAX; port_no++) { |
440 | if (!dp_netdev_lookup_port(dp, u32_to_odp(port_no))) { | |
4e022ec0 | 441 | return u32_to_odp(port_no); |
e44768b7 JP |
442 | } |
443 | } | |
444 | ||
4e022ec0 | 445 | return ODPP_NONE; |
e44768b7 JP |
446 | } |
447 | ||
72865317 | 448 | static int |
614c4892 BP |
449 | create_dp_netdev(const char *name, const struct dpif_class *class, |
450 | struct dp_netdev **dpp) | |
8a4e3a85 | 451 | OVS_REQUIRES(dp_netdev_mutex) |
72865317 BP |
452 | { |
453 | struct dp_netdev *dp; | |
454 | int error; | |
72865317 | 455 | |
462278db | 456 | dp = xzalloc(sizeof *dp); |
8a4e3a85 BP |
457 | shash_add(&dp_netdevs, name, dp); |
458 | ||
459 | *CONST_CAST(const struct dpif_class **, &dp->class) = class; | |
460 | *CONST_CAST(const char **, &dp->name) = xstrdup(name); | |
6a8267c5 | 461 | ovs_refcount_init(&dp->ref_cnt); |
1a65ba85 | 462 | atomic_flag_clear(&dp->destroyed); |
8a4e3a85 BP |
463 | |
464 | ovs_mutex_init(&dp->flow_mutex); | |
465 | classifier_init(&dp->cls, NULL); | |
466 | hmap_init(&dp->flow_table); | |
467 | ||
63be20be | 468 | fat_rwlock_init(&dp->queue_rwlock); |
ed27e010 | 469 | |
51852a57 | 470 | ovsthread_stats_init(&dp->stats); |
ed27e010 | 471 | |
59e6d833 BP |
472 | ovs_mutex_init(&dp->port_mutex); |
473 | cmap_init(&dp->ports); | |
d33ed218 | 474 | dp->port_seq = seq_create(); |
6c3eee82 | 475 | latch_init(&dp->exit_latch); |
e44768b7 | 476 | |
59e6d833 | 477 | ovs_mutex_lock(&dp->port_mutex); |
4e022ec0 | 478 | error = do_add_port(dp, name, "internal", ODPP_LOCAL); |
59e6d833 | 479 | ovs_mutex_unlock(&dp->port_mutex); |
72865317 BP |
480 | if (error) { |
481 | dp_netdev_free(dp); | |
462278db | 482 | return error; |
72865317 BP |
483 | } |
484 | ||
462278db | 485 | *dpp = dp; |
72865317 BP |
486 | return 0; |
487 | } | |
488 | ||
489 | static int | |
614c4892 | 490 | dpif_netdev_open(const struct dpif_class *class, const char *name, |
4a387741 | 491 | bool create, struct dpif **dpifp) |
72865317 | 492 | { |
462278db | 493 | struct dp_netdev *dp; |
5279f8fd | 494 | int error; |
462278db | 495 | |
97be1538 | 496 | ovs_mutex_lock(&dp_netdev_mutex); |
462278db BP |
497 | dp = shash_find_data(&dp_netdevs, name); |
498 | if (!dp) { | |
5279f8fd | 499 | error = create ? create_dp_netdev(name, class, &dp) : ENODEV; |
72865317 | 500 | } else { |
5279f8fd BP |
501 | error = (dp->class != class ? EINVAL |
502 | : create ? EEXIST | |
503 | : 0); | |
504 | } | |
505 | if (!error) { | |
506 | *dpifp = create_dpif_netdev(dp); | |
72865317 | 507 | } |
97be1538 | 508 | ovs_mutex_unlock(&dp_netdev_mutex); |
462278db | 509 | |
5279f8fd | 510 | return error; |
72865317 BP |
511 | } |
512 | ||
513 | static void | |
1ba530f4 | 514 | dp_netdev_purge_queues(struct dp_netdev *dp) |
63be20be | 515 | OVS_REQ_WRLOCK(dp->queue_rwlock) |
72865317 BP |
516 | { |
517 | int i; | |
518 | ||
63be20be AW |
519 | for (i = 0; i < dp->n_handlers; i++) { |
520 | struct dp_netdev_queue *q = &dp->handler_queues[i]; | |
856081f6 | 521 | |
63be20be | 522 | ovs_mutex_lock(&q->mutex); |
1ba530f4 | 523 | while (q->tail != q->head) { |
d88b629b | 524 | struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK]; |
da546e07 | 525 | ofpbuf_uninit(&u->upcall.packet); |
d88b629b | 526 | ofpbuf_uninit(&u->buf); |
856081f6 | 527 | } |
63be20be | 528 | ovs_mutex_unlock(&q->mutex); |
72865317 | 529 | } |
1ba530f4 BP |
530 | } |
531 | ||
8a4e3a85 BP |
532 | /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp' |
533 | * through the 'dp_netdevs' shash while freeing 'dp'. */ | |
1ba530f4 BP |
534 | static void |
535 | dp_netdev_free(struct dp_netdev *dp) | |
8a4e3a85 | 536 | OVS_REQUIRES(dp_netdev_mutex) |
1ba530f4 | 537 | { |
59e6d833 | 538 | struct dp_netdev_port *port; |
51852a57 | 539 | struct dp_netdev_stats *bucket; |
59e6d833 | 540 | struct cmap_cursor cursor; |
51852a57 | 541 | int i; |
4ad28026 | 542 | |
8a4e3a85 BP |
543 | shash_find_and_delete(&dp_netdevs, dp->name); |
544 | ||
e4cfed38 PS |
545 | dp_netdev_set_pmd_threads(dp, 0); |
546 | free(dp->pmd_threads); | |
6c3eee82 | 547 | |
1ba530f4 | 548 | dp_netdev_flow_flush(dp); |
59e6d833 BP |
549 | ovs_mutex_lock(&dp->port_mutex); |
550 | CMAP_FOR_EACH (port, node, &cursor, &dp->ports) { | |
c40b890f | 551 | do_del_port(dp, port); |
1ba530f4 | 552 | } |
59e6d833 | 553 | ovs_mutex_unlock(&dp->port_mutex); |
51852a57 BP |
554 | |
555 | OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) { | |
556 | ovs_mutex_destroy(&bucket->mutex); | |
557 | free_cacheline(bucket); | |
558 | } | |
559 | ovsthread_stats_destroy(&dp->stats); | |
f5126b57 | 560 | |
63be20be AW |
561 | fat_rwlock_wrlock(&dp->queue_rwlock); |
562 | dp_netdev_destroy_all_queues(dp); | |
563 | fat_rwlock_unlock(&dp->queue_rwlock); | |
564 | ||
565 | fat_rwlock_destroy(&dp->queue_rwlock); | |
f5126b57 | 566 | |
2c0ea78f | 567 | classifier_destroy(&dp->cls); |
72865317 | 568 | hmap_destroy(&dp->flow_table); |
8a4e3a85 | 569 | ovs_mutex_destroy(&dp->flow_mutex); |
d33ed218 | 570 | seq_destroy(dp->port_seq); |
59e6d833 | 571 | cmap_destroy(&dp->ports); |
6c3eee82 | 572 | latch_destroy(&dp->exit_latch); |
8a4e3a85 | 573 | free(CONST_CAST(char *, dp->name)); |
72865317 BP |
574 | free(dp); |
575 | } | |
576 | ||
8a4e3a85 BP |
577 | static void |
578 | dp_netdev_unref(struct dp_netdev *dp) | |
579 | { | |
580 | if (dp) { | |
581 | /* Take dp_netdev_mutex so that, if dp->ref_cnt falls to zero, we can't | |
582 | * get a new reference to 'dp' through the 'dp_netdevs' shash. */ | |
583 | ovs_mutex_lock(&dp_netdev_mutex); | |
584 | if (ovs_refcount_unref(&dp->ref_cnt) == 1) { | |
585 | dp_netdev_free(dp); | |
586 | } | |
587 | ovs_mutex_unlock(&dp_netdev_mutex); | |
588 | } | |
589 | } | |
590 | ||
72865317 BP |
591 | static void |
592 | dpif_netdev_close(struct dpif *dpif) | |
593 | { | |
594 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
5279f8fd | 595 | |
8a4e3a85 | 596 | dp_netdev_unref(dp); |
72865317 BP |
597 | free(dpif); |
598 | } | |
599 | ||
600 | static int | |
7dab847a | 601 | dpif_netdev_destroy(struct dpif *dpif) |
72865317 BP |
602 | { |
603 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
5279f8fd | 604 | |
6a8267c5 BP |
605 | if (!atomic_flag_test_and_set(&dp->destroyed)) { |
606 | if (ovs_refcount_unref(&dp->ref_cnt) == 1) { | |
607 | /* Can't happen: 'dpif' still owns a reference to 'dp'. */ | |
608 | OVS_NOT_REACHED(); | |
609 | } | |
610 | } | |
5279f8fd | 611 | |
72865317 BP |
612 | return 0; |
613 | } | |
614 | ||
615 | static int | |
a8d9304d | 616 | dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats) |
72865317 BP |
617 | { |
618 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
51852a57 BP |
619 | struct dp_netdev_stats *bucket; |
620 | size_t i; | |
5279f8fd | 621 | |
06f81620 | 622 | fat_rwlock_rdlock(&dp->cls.rwlock); |
f180c2e2 | 623 | stats->n_flows = hmap_count(&dp->flow_table); |
06f81620 | 624 | fat_rwlock_unlock(&dp->cls.rwlock); |
8a4e3a85 | 625 | |
51852a57 BP |
626 | stats->n_hit = stats->n_missed = stats->n_lost = 0; |
627 | OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) { | |
628 | ovs_mutex_lock(&bucket->mutex); | |
629 | stats->n_hit += bucket->n[DP_STAT_HIT]; | |
630 | stats->n_missed += bucket->n[DP_STAT_MISS]; | |
631 | stats->n_lost += bucket->n[DP_STAT_LOST]; | |
632 | ovs_mutex_unlock(&bucket->mutex); | |
633 | } | |
1ce3fa06 | 634 | stats->n_masks = UINT32_MAX; |
847108dc | 635 | stats->n_mask_hit = UINT64_MAX; |
5279f8fd | 636 | |
72865317 BP |
637 | return 0; |
638 | } | |
639 | ||
e4cfed38 PS |
640 | static void |
641 | dp_netdev_reload_pmd_threads(struct dp_netdev *dp) | |
642 | { | |
643 | int i; | |
644 | ||
645 | for (i = 0; i < dp->n_pmd_threads; i++) { | |
646 | struct pmd_thread *f = &dp->pmd_threads[i]; | |
647 | int id; | |
648 | ||
649 | atomic_add(&f->change_seq, 1, &id); | |
650 | } | |
651 | } | |
652 | ||
59e6d833 BP |
653 | static uint32_t |
654 | hash_port_no(odp_port_t port_no) | |
655 | { | |
656 | return hash_int(odp_to_u32(port_no), 0); | |
657 | } | |
658 | ||
72865317 | 659 | static int |
c3827f61 | 660 | do_add_port(struct dp_netdev *dp, const char *devname, const char *type, |
4e022ec0 | 661 | odp_port_t port_no) |
59e6d833 | 662 | OVS_REQUIRES(dp->port_mutex) |
72865317 | 663 | { |
4b609110 | 664 | struct netdev_saved_flags *sf; |
72865317 BP |
665 | struct dp_netdev_port *port; |
666 | struct netdev *netdev; | |
2499a8ce | 667 | enum netdev_flags flags; |
0cbfe35d | 668 | const char *open_type; |
72865317 | 669 | int error; |
55c955bd | 670 | int i; |
72865317 BP |
671 | |
672 | /* XXX reject devices already in some dp_netdev. */ | |
673 | ||
674 | /* Open and validate network device. */ | |
0aeaabc8 | 675 | open_type = dpif_netdev_port_open_type(dp->class, type); |
0cbfe35d | 676 | error = netdev_open(devname, open_type, &netdev); |
72865317 BP |
677 | if (error) { |
678 | return error; | |
679 | } | |
72865317 BP |
680 | /* XXX reject non-Ethernet devices */ |
681 | ||
2499a8ce AC |
682 | netdev_get_flags(netdev, &flags); |
683 | if (flags & NETDEV_LOOPBACK) { | |
684 | VLOG_ERR("%s: cannot add a loopback device", devname); | |
685 | netdev_close(netdev); | |
686 | return EINVAL; | |
687 | } | |
688 | ||
e4cfed38 PS |
689 | port = xzalloc(sizeof *port); |
690 | port->port_no = port_no; | |
691 | port->netdev = netdev; | |
55c955bd | 692 | port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev)); |
e4cfed38 | 693 | port->type = xstrdup(type); |
55c955bd PS |
694 | for (i = 0; i < netdev_n_rxq(netdev); i++) { |
695 | error = netdev_rxq_open(netdev, &port->rxq[i], i); | |
696 | if (error | |
697 | && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) { | |
698 | VLOG_ERR("%s: cannot receive packets on this network device (%s)", | |
699 | devname, ovs_strerror(errno)); | |
700 | netdev_close(netdev); | |
701 | return error; | |
702 | } | |
7b6b0ef4 BP |
703 | } |
704 | ||
4b609110 | 705 | error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf); |
72865317 | 706 | if (error) { |
55c955bd PS |
707 | for (i = 0; i < netdev_n_rxq(netdev); i++) { |
708 | netdev_rxq_close(port->rxq[i]); | |
709 | } | |
72865317 | 710 | netdev_close(netdev); |
f7791740 | 711 | free(port->rxq); |
e4cfed38 | 712 | free(port); |
72865317 BP |
713 | return error; |
714 | } | |
4b609110 | 715 | port->sf = sf; |
e4cfed38 PS |
716 | |
717 | if (netdev_is_pmd(netdev)) { | |
718 | dp->pmd_count++; | |
719 | dp_netdev_set_pmd_threads(dp, NR_THREADS); | |
720 | dp_netdev_reload_pmd_threads(dp); | |
721 | } | |
722 | ovs_refcount_init(&port->ref_cnt); | |
72865317 | 723 | |
59e6d833 | 724 | cmap_insert(&dp->ports, &port->node, hash_port_no(port_no)); |
d33ed218 | 725 | seq_change(dp->port_seq); |
72865317 BP |
726 | |
727 | return 0; | |
728 | } | |
729 | ||
247527db BP |
730 | static int |
731 | dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev, | |
4e022ec0 | 732 | odp_port_t *port_nop) |
247527db BP |
733 | { |
734 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
3aa30359 BP |
735 | char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; |
736 | const char *dpif_port; | |
4e022ec0 | 737 | odp_port_t port_no; |
5279f8fd | 738 | int error; |
247527db | 739 | |
59e6d833 | 740 | ovs_mutex_lock(&dp->port_mutex); |
3aa30359 | 741 | dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf); |
4e022ec0 | 742 | if (*port_nop != ODPP_NONE) { |
ff073a71 BP |
743 | port_no = *port_nop; |
744 | error = dp_netdev_lookup_port(dp, *port_nop) ? EBUSY : 0; | |
232dfa4a | 745 | } else { |
3aa30359 | 746 | port_no = choose_port(dp, dpif_port); |
5279f8fd | 747 | error = port_no == ODPP_NONE ? EFBIG : 0; |
232dfa4a | 748 | } |
5279f8fd | 749 | if (!error) { |
247527db | 750 | *port_nop = port_no; |
5279f8fd | 751 | error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no); |
247527db | 752 | } |
59e6d833 | 753 | ovs_mutex_unlock(&dp->port_mutex); |
5279f8fd BP |
754 | |
755 | return error; | |
72865317 BP |
756 | } |
757 | ||
758 | static int | |
4e022ec0 | 759 | dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no) |
72865317 BP |
760 | { |
761 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
5279f8fd BP |
762 | int error; |
763 | ||
59e6d833 | 764 | ovs_mutex_lock(&dp->port_mutex); |
c40b890f BP |
765 | if (port_no == ODPP_LOCAL) { |
766 | error = EINVAL; | |
767 | } else { | |
768 | struct dp_netdev_port *port; | |
769 | ||
770 | error = get_port_by_number(dp, port_no, &port); | |
771 | if (!error) { | |
772 | do_del_port(dp, port); | |
773 | } | |
774 | } | |
59e6d833 | 775 | ovs_mutex_unlock(&dp->port_mutex); |
5279f8fd BP |
776 | |
777 | return error; | |
72865317 BP |
778 | } |
779 | ||
780 | static bool | |
4e022ec0 | 781 | is_valid_port_number(odp_port_t port_no) |
72865317 | 782 | { |
ff073a71 BP |
783 | return port_no != ODPP_NONE; |
784 | } | |
785 | ||
786 | static struct dp_netdev_port * | |
787 | dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no) | |
788 | { | |
789 | struct dp_netdev_port *port; | |
790 | ||
59e6d833 | 791 | CMAP_FOR_EACH_WITH_HASH (port, node, hash_port_no(port_no), &dp->ports) { |
ff073a71 BP |
792 | if (port->port_no == port_no) { |
793 | return port; | |
794 | } | |
795 | } | |
796 | return NULL; | |
72865317 BP |
797 | } |
798 | ||
799 | static int | |
800 | get_port_by_number(struct dp_netdev *dp, | |
4e022ec0 | 801 | odp_port_t port_no, struct dp_netdev_port **portp) |
72865317 BP |
802 | { |
803 | if (!is_valid_port_number(port_no)) { | |
804 | *portp = NULL; | |
805 | return EINVAL; | |
806 | } else { | |
ff073a71 | 807 | *portp = dp_netdev_lookup_port(dp, port_no); |
72865317 BP |
808 | return *portp ? 0 : ENOENT; |
809 | } | |
810 | } | |
811 | ||
b284085e PS |
812 | static void |
813 | port_ref(struct dp_netdev_port *port) | |
814 | { | |
815 | if (port) { | |
816 | ovs_refcount_ref(&port->ref_cnt); | |
817 | } | |
818 | } | |
819 | ||
820 | static void | |
59e6d833 | 821 | port_destroy__(struct dp_netdev_port *port) |
b284085e | 822 | { |
98de6beb | 823 | int n_rxq = netdev_n_rxq(port->netdev); |
59e6d833 | 824 | int i; |
55c955bd | 825 | |
59e6d833 BP |
826 | netdev_close(port->netdev); |
827 | netdev_restore_flags(port->sf); | |
55c955bd | 828 | |
59e6d833 BP |
829 | for (i = 0; i < n_rxq; i++) { |
830 | netdev_rxq_close(port->rxq[i]); | |
831 | } | |
832 | free(port->rxq); | |
833 | free(port->type); | |
834 | free(port); | |
835 | } | |
836 | ||
837 | static void | |
838 | port_unref(struct dp_netdev_port *port) | |
839 | { | |
840 | if (port && ovs_refcount_unref(&port->ref_cnt) == 1) { | |
841 | ovsrcu_postpone(port_destroy__, port); | |
b284085e PS |
842 | } |
843 | } | |
844 | ||
72865317 BP |
845 | static int |
846 | get_port_by_name(struct dp_netdev *dp, | |
847 | const char *devname, struct dp_netdev_port **portp) | |
59e6d833 | 848 | OVS_REQUIRES(dp->port_mutex) |
72865317 BP |
849 | { |
850 | struct dp_netdev_port *port; | |
59e6d833 | 851 | struct cmap_cursor cursor; |
72865317 | 852 | |
59e6d833 | 853 | CMAP_FOR_EACH (port, node, &cursor, &dp->ports) { |
3efb6063 | 854 | if (!strcmp(netdev_get_name(port->netdev), devname)) { |
72865317 BP |
855 | *portp = port; |
856 | return 0; | |
857 | } | |
858 | } | |
859 | return ENOENT; | |
860 | } | |
861 | ||
c40b890f BP |
862 | static void |
863 | do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port) | |
59e6d833 | 864 | OVS_REQUIRES(dp->port_mutex) |
72865317 | 865 | { |
c40b890f | 866 | cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no)); |
d33ed218 | 867 | seq_change(dp->port_seq); |
e4cfed38 PS |
868 | if (netdev_is_pmd(port->netdev)) { |
869 | dp_netdev_reload_pmd_threads(dp); | |
870 | } | |
72865317 | 871 | |
b284085e | 872 | port_unref(port); |
72865317 BP |
873 | } |
874 | ||
875 | static void | |
4c738a8d BP |
876 | answer_port_query(const struct dp_netdev_port *port, |
877 | struct dpif_port *dpif_port) | |
72865317 | 878 | { |
3efb6063 | 879 | dpif_port->name = xstrdup(netdev_get_name(port->netdev)); |
0cbfe35d | 880 | dpif_port->type = xstrdup(port->type); |
4c738a8d | 881 | dpif_port->port_no = port->port_no; |
72865317 BP |
882 | } |
883 | ||
884 | static int | |
4e022ec0 | 885 | dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no, |
4c738a8d | 886 | struct dpif_port *dpif_port) |
72865317 BP |
887 | { |
888 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
889 | struct dp_netdev_port *port; | |
890 | int error; | |
891 | ||
892 | error = get_port_by_number(dp, port_no, &port); | |
4afba28d | 893 | if (!error && dpif_port) { |
4c738a8d | 894 | answer_port_query(port, dpif_port); |
72865317 | 895 | } |
5279f8fd | 896 | |
72865317 BP |
897 | return error; |
898 | } | |
899 | ||
900 | static int | |
901 | dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname, | |
4c738a8d | 902 | struct dpif_port *dpif_port) |
72865317 BP |
903 | { |
904 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
905 | struct dp_netdev_port *port; | |
906 | int error; | |
907 | ||
59e6d833 | 908 | ovs_mutex_lock(&dp->port_mutex); |
72865317 | 909 | error = get_port_by_name(dp, devname, &port); |
4afba28d | 910 | if (!error && dpif_port) { |
4c738a8d | 911 | answer_port_query(port, dpif_port); |
72865317 | 912 | } |
59e6d833 | 913 | ovs_mutex_unlock(&dp->port_mutex); |
5279f8fd | 914 | |
72865317 BP |
915 | return error; |
916 | } | |
917 | ||
61e7deb1 BP |
918 | static void |
919 | dp_netdev_flow_free(struct dp_netdev_flow *flow) | |
920 | { | |
921 | struct dp_netdev_flow_stats *bucket; | |
922 | size_t i; | |
923 | ||
924 | OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &flow->stats) { | |
925 | ovs_mutex_destroy(&bucket->mutex); | |
926 | free_cacheline(bucket); | |
927 | } | |
928 | ovsthread_stats_destroy(&flow->stats); | |
929 | ||
930 | cls_rule_destroy(CONST_CAST(struct cls_rule *, &flow->cr)); | |
931 | dp_netdev_actions_free(dp_netdev_flow_get_actions(flow)); | |
61e7deb1 BP |
932 | free(flow); |
933 | } | |
934 | ||
72865317 | 935 | static void |
8a4e3a85 BP |
936 | dp_netdev_remove_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow) |
937 | OVS_REQ_WRLOCK(dp->cls.rwlock) | |
938 | OVS_REQUIRES(dp->flow_mutex) | |
72865317 | 939 | { |
8a4e3a85 BP |
940 | struct cls_rule *cr = CONST_CAST(struct cls_rule *, &flow->cr); |
941 | struct hmap_node *node = CONST_CAST(struct hmap_node *, &flow->node); | |
2c0ea78f | 942 | |
8a4e3a85 BP |
943 | classifier_remove(&dp->cls, cr); |
944 | hmap_remove(&dp->flow_table, node); | |
61e7deb1 | 945 | ovsrcu_postpone(dp_netdev_flow_free, flow); |
72865317 BP |
946 | } |
947 | ||
948 | static void | |
949 | dp_netdev_flow_flush(struct dp_netdev *dp) | |
950 | { | |
1763b4b8 | 951 | struct dp_netdev_flow *netdev_flow, *next; |
72865317 | 952 | |
8a4e3a85 | 953 | ovs_mutex_lock(&dp->flow_mutex); |
06f81620 | 954 | fat_rwlock_wrlock(&dp->cls.rwlock); |
1763b4b8 | 955 | HMAP_FOR_EACH_SAFE (netdev_flow, next, node, &dp->flow_table) { |
8a4e3a85 | 956 | dp_netdev_remove_flow(dp, netdev_flow); |
72865317 | 957 | } |
06f81620 | 958 | fat_rwlock_unlock(&dp->cls.rwlock); |
8a4e3a85 | 959 | ovs_mutex_unlock(&dp->flow_mutex); |
72865317 BP |
960 | } |
961 | ||
962 | static int | |
963 | dpif_netdev_flow_flush(struct dpif *dpif) | |
964 | { | |
965 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
5279f8fd | 966 | |
72865317 BP |
967 | dp_netdev_flow_flush(dp); |
968 | return 0; | |
969 | } | |
970 | ||
b0ec0f27 | 971 | struct dp_netdev_port_state { |
59e6d833 | 972 | struct cmap_position position; |
4c738a8d | 973 | char *name; |
b0ec0f27 BP |
974 | }; |
975 | ||
976 | static int | |
977 | dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep) | |
978 | { | |
979 | *statep = xzalloc(sizeof(struct dp_netdev_port_state)); | |
980 | return 0; | |
981 | } | |
982 | ||
72865317 | 983 | static int |
b0ec0f27 | 984 | dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_, |
4c738a8d | 985 | struct dpif_port *dpif_port) |
72865317 | 986 | { |
b0ec0f27 | 987 | struct dp_netdev_port_state *state = state_; |
72865317 | 988 | struct dp_netdev *dp = get_dp_netdev(dpif); |
59e6d833 | 989 | struct cmap_node *node; |
ff073a71 | 990 | int retval; |
72865317 | 991 | |
59e6d833 | 992 | node = cmap_next_position(&dp->ports, &state->position); |
ff073a71 BP |
993 | if (node) { |
994 | struct dp_netdev_port *port; | |
5279f8fd | 995 | |
ff073a71 BP |
996 | port = CONTAINER_OF(node, struct dp_netdev_port, node); |
997 | ||
998 | free(state->name); | |
999 | state->name = xstrdup(netdev_get_name(port->netdev)); | |
1000 | dpif_port->name = state->name; | |
1001 | dpif_port->type = port->type; | |
1002 | dpif_port->port_no = port->port_no; | |
1003 | ||
1004 | retval = 0; | |
1005 | } else { | |
1006 | retval = EOF; | |
72865317 | 1007 | } |
5279f8fd | 1008 | |
ff073a71 | 1009 | return retval; |
b0ec0f27 BP |
1010 | } |
1011 | ||
1012 | static int | |
4c738a8d | 1013 | dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) |
b0ec0f27 | 1014 | { |
4c738a8d BP |
1015 | struct dp_netdev_port_state *state = state_; |
1016 | free(state->name); | |
b0ec0f27 BP |
1017 | free(state); |
1018 | return 0; | |
72865317 BP |
1019 | } |
1020 | ||
1021 | static int | |
67a4917b | 1022 | dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED) |
72865317 BP |
1023 | { |
1024 | struct dpif_netdev *dpif = dpif_netdev_cast(dpif_); | |
d33ed218 | 1025 | uint64_t new_port_seq; |
5279f8fd BP |
1026 | int error; |
1027 | ||
d33ed218 BP |
1028 | new_port_seq = seq_read(dpif->dp->port_seq); |
1029 | if (dpif->last_port_seq != new_port_seq) { | |
1030 | dpif->last_port_seq = new_port_seq; | |
5279f8fd | 1031 | error = ENOBUFS; |
72865317 | 1032 | } else { |
5279f8fd | 1033 | error = EAGAIN; |
72865317 | 1034 | } |
5279f8fd BP |
1035 | |
1036 | return error; | |
72865317 BP |
1037 | } |
1038 | ||
1039 | static void | |
1040 | dpif_netdev_port_poll_wait(const struct dpif *dpif_) | |
1041 | { | |
1042 | struct dpif_netdev *dpif = dpif_netdev_cast(dpif_); | |
5279f8fd | 1043 | |
d33ed218 | 1044 | seq_wait(dpif->dp->port_seq, dpif->last_port_seq); |
8a4e3a85 BP |
1045 | } |
1046 | ||
1047 | static struct dp_netdev_flow * | |
1048 | dp_netdev_flow_cast(const struct cls_rule *cr) | |
1049 | { | |
1050 | return cr ? CONTAINER_OF(cr, struct dp_netdev_flow, cr) : NULL; | |
72865317 BP |
1051 | } |
1052 | ||
72865317 | 1053 | static struct dp_netdev_flow * |
4f150744 | 1054 | dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct miniflow *key) |
8a4e3a85 | 1055 | OVS_EXCLUDED(dp->cls.rwlock) |
2c0ea78f | 1056 | { |
8a4e3a85 | 1057 | struct dp_netdev_flow *netdev_flow; |
4f150744 | 1058 | struct cls_rule *rule; |
2c0ea78f | 1059 | |
06f81620 | 1060 | fat_rwlock_rdlock(&dp->cls.rwlock); |
4f150744 JR |
1061 | rule = classifier_lookup_miniflow_first(&dp->cls, key); |
1062 | netdev_flow = dp_netdev_flow_cast(rule); | |
06f81620 | 1063 | fat_rwlock_unlock(&dp->cls.rwlock); |
2c0ea78f | 1064 | |
8a4e3a85 | 1065 | return netdev_flow; |
2c0ea78f GS |
1066 | } |
1067 | ||
1068 | static struct dp_netdev_flow * | |
1069 | dp_netdev_find_flow(const struct dp_netdev *dp, const struct flow *flow) | |
8a4e3a85 | 1070 | OVS_REQ_RDLOCK(dp->cls.rwlock) |
72865317 | 1071 | { |
1763b4b8 | 1072 | struct dp_netdev_flow *netdev_flow; |
72865317 | 1073 | |
2c0ea78f | 1074 | HMAP_FOR_EACH_WITH_HASH (netdev_flow, node, flow_hash(flow, 0), |
1763b4b8 | 1075 | &dp->flow_table) { |
2c0ea78f | 1076 | if (flow_equal(&netdev_flow->flow, flow)) { |
61e7deb1 | 1077 | return netdev_flow; |
72865317 BP |
1078 | } |
1079 | } | |
8a4e3a85 | 1080 | |
72865317 BP |
1081 | return NULL; |
1082 | } | |
1083 | ||
1084 | static void | |
1763b4b8 GS |
1085 | get_dpif_flow_stats(struct dp_netdev_flow *netdev_flow, |
1086 | struct dpif_flow_stats *stats) | |
feebdea2 | 1087 | { |
679ba04c BP |
1088 | struct dp_netdev_flow_stats *bucket; |
1089 | size_t i; | |
1090 | ||
1091 | memset(stats, 0, sizeof *stats); | |
1092 | OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) { | |
1093 | ovs_mutex_lock(&bucket->mutex); | |
1094 | stats->n_packets += bucket->packet_count; | |
1095 | stats->n_bytes += bucket->byte_count; | |
1096 | stats->used = MAX(stats->used, bucket->used); | |
1097 | stats->tcp_flags |= bucket->tcp_flags; | |
1098 | ovs_mutex_unlock(&bucket->mutex); | |
1099 | } | |
72865317 BP |
1100 | } |
1101 | ||
36956a7d | 1102 | static int |
8c301900 JR |
1103 | dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len, |
1104 | const struct nlattr *mask_key, | |
1105 | uint32_t mask_key_len, const struct flow *flow, | |
1106 | struct flow *mask) | |
1107 | { | |
1108 | if (mask_key_len) { | |
80e44883 BP |
1109 | enum odp_key_fitness fitness; |
1110 | ||
1111 | fitness = odp_flow_key_to_mask(mask_key, mask_key_len, mask, flow); | |
1112 | if (fitness) { | |
8c301900 JR |
1113 | /* This should not happen: it indicates that |
1114 | * odp_flow_key_from_mask() and odp_flow_key_to_mask() | |
1115 | * disagree on the acceptable form of a mask. Log the problem | |
1116 | * as an error, with enough details to enable debugging. */ | |
1117 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); | |
1118 | ||
1119 | if (!VLOG_DROP_ERR(&rl)) { | |
1120 | struct ds s; | |
1121 | ||
1122 | ds_init(&s); | |
1123 | odp_flow_format(key, key_len, mask_key, mask_key_len, NULL, &s, | |
1124 | true); | |
80e44883 BP |
1125 | VLOG_ERR("internal error parsing flow mask %s (%s)", |
1126 | ds_cstr(&s), odp_key_fitness_to_string(fitness)); | |
8c301900 JR |
1127 | ds_destroy(&s); |
1128 | } | |
1129 | ||
1130 | return EINVAL; | |
1131 | } | |
8c301900 JR |
1132 | } else { |
1133 | enum mf_field_id id; | |
1134 | /* No mask key, unwildcard everything except fields whose | |
1135 | * prerequisities are not met. */ | |
1136 | memset(mask, 0x0, sizeof *mask); | |
1137 | ||
1138 | for (id = 0; id < MFF_N_IDS; ++id) { | |
1139 | /* Skip registers and metadata. */ | |
1140 | if (!(id >= MFF_REG0 && id < MFF_REG0 + FLOW_N_REGS) | |
1141 | && id != MFF_METADATA) { | |
1142 | const struct mf_field *mf = mf_from_id(id); | |
1143 | if (mf_are_prereqs_ok(mf, flow)) { | |
1144 | mf_mask_field(mf, mask); | |
1145 | } | |
1146 | } | |
1147 | } | |
1148 | } | |
1149 | ||
f3f750e5 BP |
1150 | /* Force unwildcard the in_port. |
1151 | * | |
1152 | * We need to do this even in the case where we unwildcard "everything" | |
1153 | * above because "everything" only includes the 16-bit OpenFlow port number | |
1154 | * mask->in_port.ofp_port, which only covers half of the 32-bit datapath | |
1155 | * port number mask->in_port.odp_port. */ | |
1156 | mask->in_port.odp_port = u32_to_odp(UINT32_MAX); | |
1157 | ||
8c301900 JR |
1158 | return 0; |
1159 | } | |
1160 | ||
1161 | static int | |
1162 | dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len, | |
1163 | struct flow *flow) | |
36956a7d | 1164 | { |
586ddea5 BP |
1165 | odp_port_t in_port; |
1166 | ||
8c301900 | 1167 | if (odp_flow_key_to_flow(key, key_len, flow)) { |
36956a7d | 1168 | /* This should not happen: it indicates that odp_flow_key_from_flow() |
8c301900 JR |
1169 | * and odp_flow_key_to_flow() disagree on the acceptable form of a |
1170 | * flow. Log the problem as an error, with enough details to enable | |
1171 | * debugging. */ | |
36956a7d BP |
1172 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); |
1173 | ||
1174 | if (!VLOG_DROP_ERR(&rl)) { | |
1175 | struct ds s; | |
1176 | ||
1177 | ds_init(&s); | |
8c301900 | 1178 | odp_flow_format(key, key_len, NULL, 0, NULL, &s, true); |
36956a7d BP |
1179 | VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s)); |
1180 | ds_destroy(&s); | |
1181 | } | |
1182 | ||
1183 | return EINVAL; | |
1184 | } | |
1185 | ||
586ddea5 BP |
1186 | in_port = flow->in_port.odp_port; |
1187 | if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) { | |
18886b60 BP |
1188 | return EINVAL; |
1189 | } | |
1190 | ||
36956a7d BP |
1191 | return 0; |
1192 | } | |
1193 | ||
72865317 | 1194 | static int |
693c4a01 | 1195 | dpif_netdev_flow_get(const struct dpif *dpif, |
feebdea2 | 1196 | const struct nlattr *nl_key, size_t nl_key_len, |
c97fb132 | 1197 | struct ofpbuf **actionsp, struct dpif_flow_stats *stats) |
72865317 BP |
1198 | { |
1199 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
1763b4b8 | 1200 | struct dp_netdev_flow *netdev_flow; |
bc4a05c6 BP |
1201 | struct flow key; |
1202 | int error; | |
36956a7d | 1203 | |
feebdea2 | 1204 | error = dpif_netdev_flow_from_nlattrs(nl_key, nl_key_len, &key); |
bc4a05c6 BP |
1205 | if (error) { |
1206 | return error; | |
1207 | } | |
14608a15 | 1208 | |
06f81620 | 1209 | fat_rwlock_rdlock(&dp->cls.rwlock); |
2c0ea78f | 1210 | netdev_flow = dp_netdev_find_flow(dp, &key); |
06f81620 | 1211 | fat_rwlock_unlock(&dp->cls.rwlock); |
8a4e3a85 | 1212 | |
1763b4b8 | 1213 | if (netdev_flow) { |
5279f8fd | 1214 | if (stats) { |
1763b4b8 | 1215 | get_dpif_flow_stats(netdev_flow, stats); |
5279f8fd | 1216 | } |
679ba04c | 1217 | |
5279f8fd | 1218 | if (actionsp) { |
61e7deb1 | 1219 | struct dp_netdev_actions *actions; |
8a4e3a85 | 1220 | |
61e7deb1 | 1221 | actions = dp_netdev_flow_get_actions(netdev_flow); |
8a4e3a85 | 1222 | *actionsp = ofpbuf_clone_data(actions->actions, actions->size); |
5279f8fd | 1223 | } |
61e7deb1 | 1224 | } else { |
5279f8fd | 1225 | error = ENOENT; |
72865317 | 1226 | } |
bc4a05c6 | 1227 | |
5279f8fd | 1228 | return error; |
72865317 BP |
1229 | } |
1230 | ||
72865317 | 1231 | static int |
2c0ea78f GS |
1232 | dp_netdev_flow_add(struct dp_netdev *dp, const struct flow *flow, |
1233 | const struct flow_wildcards *wc, | |
1234 | const struct nlattr *actions, | |
1235 | size_t actions_len) | |
8a4e3a85 | 1236 | OVS_REQUIRES(dp->flow_mutex) |
72865317 | 1237 | { |
1763b4b8 | 1238 | struct dp_netdev_flow *netdev_flow; |
2c0ea78f | 1239 | struct match match; |
72865317 | 1240 | |
1763b4b8 | 1241 | netdev_flow = xzalloc(sizeof *netdev_flow); |
8a4e3a85 | 1242 | *CONST_CAST(struct flow *, &netdev_flow->flow) = *flow; |
8a4e3a85 | 1243 | |
679ba04c BP |
1244 | ovsthread_stats_init(&netdev_flow->stats); |
1245 | ||
61e7deb1 BP |
1246 | ovsrcu_set(&netdev_flow->actions, |
1247 | dp_netdev_actions_create(actions, actions_len)); | |
2c0ea78f GS |
1248 | |
1249 | match_init(&match, flow, wc); | |
8a4e3a85 BP |
1250 | cls_rule_init(CONST_CAST(struct cls_rule *, &netdev_flow->cr), |
1251 | &match, NETDEV_RULE_PRIORITY); | |
06f81620 | 1252 | fat_rwlock_wrlock(&dp->cls.rwlock); |
8a4e3a85 BP |
1253 | classifier_insert(&dp->cls, |
1254 | CONST_CAST(struct cls_rule *, &netdev_flow->cr)); | |
1255 | hmap_insert(&dp->flow_table, | |
1256 | CONST_CAST(struct hmap_node *, &netdev_flow->node), | |
1257 | flow_hash(flow, 0)); | |
06f81620 | 1258 | fat_rwlock_unlock(&dp->cls.rwlock); |
72865317 | 1259 | |
72865317 BP |
1260 | return 0; |
1261 | } | |
1262 | ||
1263 | static void | |
1763b4b8 | 1264 | clear_stats(struct dp_netdev_flow *netdev_flow) |
72865317 | 1265 | { |
679ba04c BP |
1266 | struct dp_netdev_flow_stats *bucket; |
1267 | size_t i; | |
1268 | ||
1269 | OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) { | |
1270 | ovs_mutex_lock(&bucket->mutex); | |
1271 | bucket->used = 0; | |
1272 | bucket->packet_count = 0; | |
1273 | bucket->byte_count = 0; | |
1274 | bucket->tcp_flags = 0; | |
1275 | ovs_mutex_unlock(&bucket->mutex); | |
1276 | } | |
72865317 BP |
1277 | } |
1278 | ||
1279 | static int | |
89625d1e | 1280 | dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put) |
72865317 BP |
1281 | { |
1282 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
1763b4b8 | 1283 | struct dp_netdev_flow *netdev_flow; |
2c0ea78f | 1284 | struct flow flow; |
4f150744 | 1285 | struct miniflow miniflow; |
2c0ea78f | 1286 | struct flow_wildcards wc; |
36956a7d BP |
1287 | int error; |
1288 | ||
8c301900 JR |
1289 | error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &flow); |
1290 | if (error) { | |
1291 | return error; | |
1292 | } | |
1293 | error = dpif_netdev_mask_from_nlattrs(put->key, put->key_len, | |
1294 | put->mask, put->mask_len, | |
1295 | &flow, &wc.masks); | |
36956a7d BP |
1296 | if (error) { |
1297 | return error; | |
1298 | } | |
4f150744 | 1299 | miniflow_init(&miniflow, &flow); |
72865317 | 1300 | |
8a4e3a85 | 1301 | ovs_mutex_lock(&dp->flow_mutex); |
4f150744 | 1302 | netdev_flow = dp_netdev_lookup_flow(dp, &miniflow); |
1763b4b8 | 1303 | if (!netdev_flow) { |
89625d1e | 1304 | if (put->flags & DPIF_FP_CREATE) { |
72865317 | 1305 | if (hmap_count(&dp->flow_table) < MAX_FLOWS) { |
89625d1e BP |
1306 | if (put->stats) { |
1307 | memset(put->stats, 0, sizeof *put->stats); | |
feebdea2 | 1308 | } |
2c0ea78f | 1309 | error = dp_netdev_flow_add(dp, &flow, &wc, put->actions, |
5279f8fd | 1310 | put->actions_len); |
72865317 | 1311 | } else { |
5279f8fd | 1312 | error = EFBIG; |
72865317 BP |
1313 | } |
1314 | } else { | |
5279f8fd | 1315 | error = ENOENT; |
72865317 BP |
1316 | } |
1317 | } else { | |
2c0ea78f GS |
1318 | if (put->flags & DPIF_FP_MODIFY |
1319 | && flow_equal(&flow, &netdev_flow->flow)) { | |
8a4e3a85 BP |
1320 | struct dp_netdev_actions *new_actions; |
1321 | struct dp_netdev_actions *old_actions; | |
1322 | ||
1323 | new_actions = dp_netdev_actions_create(put->actions, | |
1324 | put->actions_len); | |
1325 | ||
61e7deb1 BP |
1326 | old_actions = dp_netdev_flow_get_actions(netdev_flow); |
1327 | ovsrcu_set(&netdev_flow->actions, new_actions); | |
679ba04c | 1328 | |
a84cb64a BP |
1329 | if (put->stats) { |
1330 | get_dpif_flow_stats(netdev_flow, put->stats); | |
1331 | } | |
1332 | if (put->flags & DPIF_FP_ZERO_STATS) { | |
1333 | clear_stats(netdev_flow); | |
72865317 | 1334 | } |
8a4e3a85 | 1335 | |
61e7deb1 | 1336 | ovsrcu_postpone(dp_netdev_actions_free, old_actions); |
2c0ea78f | 1337 | } else if (put->flags & DPIF_FP_CREATE) { |
5279f8fd | 1338 | error = EEXIST; |
2c0ea78f GS |
1339 | } else { |
1340 | /* Overlapping flow. */ | |
1341 | error = EINVAL; | |
72865317 BP |
1342 | } |
1343 | } | |
8a4e3a85 | 1344 | ovs_mutex_unlock(&dp->flow_mutex); |
5279f8fd BP |
1345 | |
1346 | return error; | |
72865317 BP |
1347 | } |
1348 | ||
72865317 | 1349 | static int |
b99d3cee | 1350 | dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del) |
72865317 BP |
1351 | { |
1352 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
1763b4b8 | 1353 | struct dp_netdev_flow *netdev_flow; |
14608a15 | 1354 | struct flow key; |
36956a7d BP |
1355 | int error; |
1356 | ||
b99d3cee | 1357 | error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key); |
36956a7d BP |
1358 | if (error) { |
1359 | return error; | |
1360 | } | |
72865317 | 1361 | |
8a4e3a85 | 1362 | ovs_mutex_lock(&dp->flow_mutex); |
06f81620 | 1363 | fat_rwlock_wrlock(&dp->cls.rwlock); |
2c0ea78f | 1364 | netdev_flow = dp_netdev_find_flow(dp, &key); |
1763b4b8 | 1365 | if (netdev_flow) { |
b99d3cee | 1366 | if (del->stats) { |
1763b4b8 | 1367 | get_dpif_flow_stats(netdev_flow, del->stats); |
feebdea2 | 1368 | } |
8a4e3a85 | 1369 | dp_netdev_remove_flow(dp, netdev_flow); |
72865317 | 1370 | } else { |
5279f8fd | 1371 | error = ENOENT; |
72865317 | 1372 | } |
06f81620 | 1373 | fat_rwlock_unlock(&dp->cls.rwlock); |
8a4e3a85 | 1374 | ovs_mutex_unlock(&dp->flow_mutex); |
5279f8fd BP |
1375 | |
1376 | return error; | |
72865317 BP |
1377 | } |
1378 | ||
ac64794a BP |
1379 | struct dpif_netdev_flow_dump { |
1380 | struct dpif_flow_dump up; | |
e723fd32 JS |
1381 | uint32_t bucket; |
1382 | uint32_t offset; | |
d2ad7ef1 JS |
1383 | int status; |
1384 | struct ovs_mutex mutex; | |
e723fd32 JS |
1385 | }; |
1386 | ||
ac64794a BP |
1387 | static struct dpif_netdev_flow_dump * |
1388 | dpif_netdev_flow_dump_cast(struct dpif_flow_dump *dump) | |
72865317 | 1389 | { |
ac64794a | 1390 | return CONTAINER_OF(dump, struct dpif_netdev_flow_dump, up); |
e723fd32 JS |
1391 | } |
1392 | ||
ac64794a BP |
1393 | static struct dpif_flow_dump * |
1394 | dpif_netdev_flow_dump_create(const struct dpif *dpif_) | |
e723fd32 | 1395 | { |
ac64794a | 1396 | struct dpif_netdev_flow_dump *dump; |
e723fd32 | 1397 | |
ac64794a BP |
1398 | dump = xmalloc(sizeof *dump); |
1399 | dpif_flow_dump_init(&dump->up, dpif_); | |
1400 | dump->bucket = 0; | |
1401 | dump->offset = 0; | |
1402 | dump->status = 0; | |
1403 | ovs_mutex_init(&dump->mutex); | |
1404 | ||
1405 | return &dump->up; | |
e723fd32 JS |
1406 | } |
1407 | ||
1408 | static int | |
ac64794a | 1409 | dpif_netdev_flow_dump_destroy(struct dpif_flow_dump *dump_) |
e723fd32 | 1410 | { |
ac64794a | 1411 | struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_); |
e723fd32 | 1412 | |
ac64794a BP |
1413 | ovs_mutex_destroy(&dump->mutex); |
1414 | free(dump); | |
704a1e09 BP |
1415 | return 0; |
1416 | } | |
1417 | ||
ac64794a BP |
1418 | struct dpif_netdev_flow_dump_thread { |
1419 | struct dpif_flow_dump_thread up; | |
1420 | struct dpif_netdev_flow_dump *dump; | |
1421 | struct odputil_keybuf keybuf; | |
1422 | struct odputil_keybuf maskbuf; | |
1423 | }; | |
1424 | ||
1425 | static struct dpif_netdev_flow_dump_thread * | |
1426 | dpif_netdev_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread) | |
1427 | { | |
1428 | return CONTAINER_OF(thread, struct dpif_netdev_flow_dump_thread, up); | |
1429 | } | |
1430 | ||
1431 | static struct dpif_flow_dump_thread * | |
1432 | dpif_netdev_flow_dump_thread_create(struct dpif_flow_dump *dump_) | |
1433 | { | |
1434 | struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_); | |
1435 | struct dpif_netdev_flow_dump_thread *thread; | |
1436 | ||
1437 | thread = xmalloc(sizeof *thread); | |
1438 | dpif_flow_dump_thread_init(&thread->up, &dump->up); | |
1439 | thread->dump = dump; | |
1440 | return &thread->up; | |
1441 | } | |
1442 | ||
1443 | static void | |
1444 | dpif_netdev_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_) | |
1445 | { | |
1446 | struct dpif_netdev_flow_dump_thread *thread | |
1447 | = dpif_netdev_flow_dump_thread_cast(thread_); | |
1448 | ||
1449 | free(thread); | |
1450 | } | |
1451 | ||
61e7deb1 | 1452 | /* XXX the caller must use 'actions' without quiescing */ |
704a1e09 | 1453 | static int |
ac64794a BP |
1454 | dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_, |
1455 | struct dpif_flow *f, int max_flows OVS_UNUSED) | |
1456 | { | |
1457 | struct dpif_netdev_flow_dump_thread *thread | |
1458 | = dpif_netdev_flow_dump_thread_cast(thread_); | |
1459 | struct dpif_netdev_flow_dump *dump = thread->dump; | |
1460 | struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif); | |
1461 | struct dp_netdev *dp = get_dp_netdev(&dpif->dpif); | |
1763b4b8 | 1462 | struct dp_netdev_flow *netdev_flow; |
fbfe01de | 1463 | struct flow_wildcards wc; |
ac64794a BP |
1464 | struct dp_netdev_actions *dp_actions; |
1465 | struct ofpbuf buf; | |
d2ad7ef1 | 1466 | int error; |
14608a15 | 1467 | |
ac64794a BP |
1468 | ovs_mutex_lock(&dump->mutex); |
1469 | error = dump->status; | |
d2ad7ef1 JS |
1470 | if (!error) { |
1471 | struct hmap_node *node; | |
1472 | ||
1473 | fat_rwlock_rdlock(&dp->cls.rwlock); | |
ac64794a | 1474 | node = hmap_at_position(&dp->flow_table, &dump->bucket, &dump->offset); |
d2ad7ef1 JS |
1475 | if (node) { |
1476 | netdev_flow = CONTAINER_OF(node, struct dp_netdev_flow, node); | |
d2ad7ef1 JS |
1477 | } |
1478 | fat_rwlock_unlock(&dp->cls.rwlock); | |
1479 | if (!node) { | |
ac64794a | 1480 | dump->status = error = EOF; |
d2ad7ef1 | 1481 | } |
8a4e3a85 | 1482 | } |
ac64794a | 1483 | ovs_mutex_unlock(&dump->mutex); |
d2ad7ef1 | 1484 | if (error) { |
ac64794a | 1485 | return 0; |
72865317 | 1486 | } |
704a1e09 | 1487 | |
fbfe01de AZ |
1488 | minimask_expand(&netdev_flow->cr.match.mask, &wc); |
1489 | ||
ac64794a BP |
1490 | /* Key. */ |
1491 | ofpbuf_use_stack(&buf, &thread->keybuf, sizeof thread->keybuf); | |
1492 | odp_flow_key_from_flow(&buf, &netdev_flow->flow, &wc.masks, | |
1493 | netdev_flow->flow.in_port.odp_port, true); | |
1494 | f->key = ofpbuf_data(&buf); | |
1495 | f->key_len = ofpbuf_size(&buf); | |
1496 | ||
1497 | /* Mask. */ | |
1498 | ofpbuf_use_stack(&buf, &thread->maskbuf, sizeof thread->maskbuf); | |
1499 | odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow, | |
1500 | odp_to_u32(wc.masks.in_port.odp_port), | |
1501 | SIZE_MAX, true); | |
1502 | f->mask = ofpbuf_data(&buf); | |
1503 | f->mask_len = ofpbuf_size(&buf); | |
45c626a3 | 1504 | |
ac64794a BP |
1505 | /* Actions. */ |
1506 | dp_actions = dp_netdev_flow_get_actions(netdev_flow); | |
1507 | f->actions = dp_actions->actions; | |
1508 | f->actions_len = dp_actions->size; | |
704a1e09 | 1509 | |
ac64794a BP |
1510 | /* Stats. */ |
1511 | get_dpif_flow_stats(netdev_flow, &f->stats); | |
feebdea2 | 1512 | |
ac64794a | 1513 | return 1; |
72865317 BP |
1514 | } |
1515 | ||
1516 | static int | |
758c456d | 1517 | dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) |
72865317 BP |
1518 | { |
1519 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
758c456d | 1520 | struct pkt_metadata *md = &execute->md; |
27bbe15d JR |
1521 | struct { |
1522 | struct miniflow flow; | |
1523 | uint32_t buf[FLOW_U32S]; | |
1524 | } key; | |
72865317 | 1525 | |
1f317cb5 PS |
1526 | if (ofpbuf_size(execute->packet) < ETH_HEADER_LEN || |
1527 | ofpbuf_size(execute->packet) > UINT16_MAX) { | |
72865317 BP |
1528 | return EINVAL; |
1529 | } | |
1530 | ||
758c456d | 1531 | /* Extract flow key. */ |
27bbe15d JR |
1532 | miniflow_initialize(&key.flow, key.buf); |
1533 | miniflow_extract(execute->packet, md, &key.flow); | |
8a4e3a85 | 1534 | |
27bbe15d | 1535 | dp_netdev_execute_actions(dp, &key.flow, execute->packet, false, md, |
df1e5a3b | 1536 | execute->actions, execute->actions_len); |
8a4e3a85 | 1537 | |
758c456d | 1538 | return 0; |
72865317 BP |
1539 | } |
1540 | ||
63be20be AW |
1541 | static void |
1542 | dp_netdev_destroy_all_queues(struct dp_netdev *dp) | |
1543 | OVS_REQ_WRLOCK(dp->queue_rwlock) | |
1544 | { | |
1545 | size_t i; | |
1546 | ||
1547 | dp_netdev_purge_queues(dp); | |
1548 | ||
1549 | for (i = 0; i < dp->n_handlers; i++) { | |
1550 | struct dp_netdev_queue *q = &dp->handler_queues[i]; | |
1551 | ||
1552 | ovs_mutex_destroy(&q->mutex); | |
1553 | seq_destroy(q->seq); | |
1554 | } | |
1555 | free(dp->handler_queues); | |
1556 | dp->handler_queues = NULL; | |
1557 | dp->n_handlers = 0; | |
1558 | } | |
1559 | ||
1560 | static void | |
1561 | dp_netdev_refresh_queues(struct dp_netdev *dp, uint32_t n_handlers) | |
1562 | OVS_REQ_WRLOCK(dp->queue_rwlock) | |
1563 | { | |
1564 | if (dp->n_handlers != n_handlers) { | |
1565 | size_t i; | |
1566 | ||
1567 | dp_netdev_destroy_all_queues(dp); | |
1568 | ||
1569 | dp->n_handlers = n_handlers; | |
1570 | dp->handler_queues = xzalloc(n_handlers * sizeof *dp->handler_queues); | |
1571 | ||
1572 | for (i = 0; i < n_handlers; i++) { | |
1573 | struct dp_netdev_queue *q = &dp->handler_queues[i]; | |
1574 | ||
1575 | ovs_mutex_init(&q->mutex); | |
1576 | q->seq = seq_create(); | |
1577 | } | |
1578 | } | |
1579 | } | |
1580 | ||
72865317 | 1581 | static int |
63be20be | 1582 | dpif_netdev_recv_set(struct dpif *dpif, bool enable) |
72865317 | 1583 | { |
63be20be AW |
1584 | struct dp_netdev *dp = get_dp_netdev(dpif); |
1585 | ||
1586 | if ((dp->handler_queues != NULL) == enable) { | |
1587 | return 0; | |
1588 | } | |
1589 | ||
1590 | fat_rwlock_wrlock(&dp->queue_rwlock); | |
1591 | if (!enable) { | |
1592 | dp_netdev_destroy_all_queues(dp); | |
1593 | } else { | |
1594 | dp_netdev_refresh_queues(dp, 1); | |
1595 | } | |
1596 | fat_rwlock_unlock(&dp->queue_rwlock); | |
1597 | ||
82272ede | 1598 | return 0; |
72865317 BP |
1599 | } |
1600 | ||
1954e6bb | 1601 | static int |
63be20be | 1602 | dpif_netdev_handlers_set(struct dpif *dpif, uint32_t n_handlers) |
1954e6bb | 1603 | { |
63be20be AW |
1604 | struct dp_netdev *dp = get_dp_netdev(dpif); |
1605 | ||
1606 | fat_rwlock_wrlock(&dp->queue_rwlock); | |
1607 | if (dp->handler_queues) { | |
1608 | dp_netdev_refresh_queues(dp, n_handlers); | |
1609 | } | |
1610 | fat_rwlock_unlock(&dp->queue_rwlock); | |
1611 | ||
1954e6bb AW |
1612 | return 0; |
1613 | } | |
1614 | ||
5bf93d67 EJ |
1615 | static int |
1616 | dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED, | |
1617 | uint32_t queue_id, uint32_t *priority) | |
1618 | { | |
1619 | *priority = queue_id; | |
1620 | return 0; | |
1621 | } | |
1622 | ||
63be20be AW |
1623 | static bool |
1624 | dp_netdev_recv_check(const struct dp_netdev *dp, const uint32_t handler_id) | |
1625 | OVS_REQ_RDLOCK(dp->queue_rwlock) | |
72865317 | 1626 | { |
63be20be | 1627 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); |
72865317 | 1628 | |
63be20be AW |
1629 | if (!dp->handler_queues) { |
1630 | VLOG_WARN_RL(&rl, "receiving upcall disabled"); | |
1631 | return false; | |
72865317 | 1632 | } |
63be20be AW |
1633 | |
1634 | if (handler_id >= dp->n_handlers) { | |
1635 | VLOG_WARN_RL(&rl, "handler index out of bound"); | |
1636 | return false; | |
1637 | } | |
1638 | ||
1639 | return true; | |
72865317 BP |
1640 | } |
1641 | ||
1642 | static int | |
63be20be | 1643 | dpif_netdev_recv(struct dpif *dpif, uint32_t handler_id, |
1954e6bb | 1644 | struct dpif_upcall *upcall, struct ofpbuf *buf) |
72865317 | 1645 | { |
f5126b57 | 1646 | struct dp_netdev *dp = get_dp_netdev(dpif); |
5279f8fd | 1647 | struct dp_netdev_queue *q; |
63be20be AW |
1648 | int error = 0; |
1649 | ||
1650 | fat_rwlock_rdlock(&dp->queue_rwlock); | |
5279f8fd | 1651 | |
63be20be AW |
1652 | if (!dp_netdev_recv_check(dp, handler_id)) { |
1653 | error = EAGAIN; | |
1654 | goto out; | |
1655 | } | |
1656 | ||
1657 | q = &dp->handler_queues[handler_id]; | |
1658 | ovs_mutex_lock(&q->mutex); | |
1659 | if (q->head != q->tail) { | |
d88b629b BP |
1660 | struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK]; |
1661 | ||
1662 | *upcall = u->upcall; | |
b3907fbc | 1663 | |
90a7c55e | 1664 | ofpbuf_uninit(buf); |
d88b629b | 1665 | *buf = u->buf; |
72865317 | 1666 | } else { |
5279f8fd | 1667 | error = EAGAIN; |
72865317 | 1668 | } |
63be20be AW |
1669 | ovs_mutex_unlock(&q->mutex); |
1670 | ||
1671 | out: | |
1672 | fat_rwlock_unlock(&dp->queue_rwlock); | |
5279f8fd BP |
1673 | |
1674 | return error; | |
72865317 BP |
1675 | } |
1676 | ||
1677 | static void | |
63be20be | 1678 | dpif_netdev_recv_wait(struct dpif *dpif, uint32_t handler_id) |
72865317 | 1679 | { |
d33ed218 | 1680 | struct dp_netdev *dp = get_dp_netdev(dpif); |
63be20be | 1681 | struct dp_netdev_queue *q; |
d33ed218 | 1682 | uint64_t seq; |
5279f8fd | 1683 | |
63be20be AW |
1684 | fat_rwlock_rdlock(&dp->queue_rwlock); |
1685 | ||
1686 | if (!dp_netdev_recv_check(dp, handler_id)) { | |
1687 | goto out; | |
1688 | } | |
1689 | ||
1690 | q = &dp->handler_queues[handler_id]; | |
1691 | ovs_mutex_lock(&q->mutex); | |
1692 | seq = seq_read(q->seq); | |
1693 | if (q->head != q->tail) { | |
72865317 | 1694 | poll_immediate_wake(); |
d33ed218 | 1695 | } else { |
63be20be | 1696 | seq_wait(q->seq, seq); |
72865317 | 1697 | } |
63be20be AW |
1698 | |
1699 | ovs_mutex_unlock(&q->mutex); | |
1700 | ||
1701 | out: | |
1702 | fat_rwlock_unlock(&dp->queue_rwlock); | |
72865317 | 1703 | } |
1ba530f4 BP |
1704 | |
1705 | static void | |
1706 | dpif_netdev_recv_purge(struct dpif *dpif) | |
1707 | { | |
1708 | struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif); | |
f5126b57 | 1709 | |
63be20be | 1710 | fat_rwlock_wrlock(&dpif_netdev->dp->queue_rwlock); |
1ba530f4 | 1711 | dp_netdev_purge_queues(dpif_netdev->dp); |
63be20be | 1712 | fat_rwlock_unlock(&dpif_netdev->dp->queue_rwlock); |
1ba530f4 | 1713 | } |
72865317 | 1714 | \f |
a84cb64a BP |
1715 | /* Creates and returns a new 'struct dp_netdev_actions', with a reference count |
1716 | * of 1, whose actions are a copy of from the 'ofpacts_len' bytes of | |
1717 | * 'ofpacts'. */ | |
1718 | struct dp_netdev_actions * | |
1719 | dp_netdev_actions_create(const struct nlattr *actions, size_t size) | |
1720 | { | |
1721 | struct dp_netdev_actions *netdev_actions; | |
1722 | ||
1723 | netdev_actions = xmalloc(sizeof *netdev_actions); | |
a84cb64a BP |
1724 | netdev_actions->actions = xmemdup(actions, size); |
1725 | netdev_actions->size = size; | |
1726 | ||
1727 | return netdev_actions; | |
1728 | } | |
1729 | ||
a84cb64a | 1730 | struct dp_netdev_actions * |
61e7deb1 | 1731 | dp_netdev_flow_get_actions(const struct dp_netdev_flow *flow) |
a84cb64a | 1732 | { |
61e7deb1 | 1733 | return ovsrcu_get(struct dp_netdev_actions *, &flow->actions); |
a84cb64a BP |
1734 | } |
1735 | ||
61e7deb1 BP |
1736 | static void |
1737 | dp_netdev_actions_free(struct dp_netdev_actions *actions) | |
a84cb64a | 1738 | { |
61e7deb1 BP |
1739 | free(actions->actions); |
1740 | free(actions); | |
a84cb64a BP |
1741 | } |
1742 | \f | |
e4cfed38 | 1743 | |
5794e276 | 1744 | static void |
f7791740 | 1745 | dp_netdev_process_rxq_port(struct dp_netdev *dp, |
e4cfed38 | 1746 | struct dp_netdev_port *port, |
f7791740 | 1747 | struct netdev_rxq *rxq) |
e4cfed38 PS |
1748 | { |
1749 | struct ofpbuf *packet[NETDEV_MAX_RX_BATCH]; | |
1750 | int error, c; | |
1751 | ||
f7791740 | 1752 | error = netdev_rxq_recv(rxq, packet, &c); |
e4cfed38 PS |
1753 | if (!error) { |
1754 | struct pkt_metadata md = PKT_METADATA_INITIALIZER(port->port_no); | |
1755 | int i; | |
1756 | ||
1757 | for (i = 0; i < c; i++) { | |
1758 | dp_netdev_port_input(dp, packet[i], &md); | |
1759 | } | |
1760 | } else if (error != EAGAIN && error != EOPNOTSUPP) { | |
1761 | static struct vlog_rate_limit rl | |
1762 | = VLOG_RATE_LIMIT_INIT(1, 5); | |
1763 | ||
1764 | VLOG_ERR_RL(&rl, "error receiving data from %s: %s", | |
1765 | netdev_get_name(port->netdev), | |
1766 | ovs_strerror(error)); | |
1767 | } | |
1768 | } | |
1769 | ||
1770 | static void | |
1771 | dpif_netdev_run(struct dpif *dpif) | |
1772 | { | |
1773 | struct dp_netdev_port *port; | |
1774 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
59e6d833 | 1775 | struct cmap_cursor cursor; |
e4cfed38 | 1776 | |
59e6d833 | 1777 | CMAP_FOR_EACH (port, node, &cursor, &dp->ports) { |
55c955bd PS |
1778 | if (!netdev_is_pmd(port->netdev)) { |
1779 | int i; | |
1780 | ||
1781 | for (i = 0; i < netdev_n_rxq(port->netdev); i++) { | |
1782 | dp_netdev_process_rxq_port(dp, port, port->rxq[i]); | |
1783 | } | |
e4cfed38 PS |
1784 | } |
1785 | } | |
e4cfed38 PS |
1786 | } |
1787 | ||
1788 | static void | |
1789 | dpif_netdev_wait(struct dpif *dpif) | |
1790 | { | |
1791 | struct dp_netdev_port *port; | |
1792 | struct dp_netdev *dp = get_dp_netdev(dpif); | |
59e6d833 | 1793 | struct cmap_cursor cursor; |
e4cfed38 | 1794 | |
59e6d833 BP |
1795 | ovs_mutex_lock(&dp_netdev_mutex); |
1796 | CMAP_FOR_EACH (port, node, &cursor, &dp->ports) { | |
55c955bd PS |
1797 | if (!netdev_is_pmd(port->netdev)) { |
1798 | int i; | |
1799 | ||
1800 | for (i = 0; i < netdev_n_rxq(port->netdev); i++) { | |
1801 | netdev_rxq_wait(port->rxq[i]); | |
1802 | } | |
e4cfed38 PS |
1803 | } |
1804 | } | |
59e6d833 | 1805 | ovs_mutex_unlock(&dp_netdev_mutex); |
e4cfed38 PS |
1806 | } |
1807 | ||
f7791740 | 1808 | struct rxq_poll { |
e4cfed38 | 1809 | struct dp_netdev_port *port; |
55c955bd | 1810 | struct netdev_rxq *rx; |
e4cfed38 PS |
1811 | }; |
1812 | ||
1813 | static int | |
1814 | pmd_load_queues(struct pmd_thread *f, | |
f7791740 | 1815 | struct rxq_poll **ppoll_list, int poll_cnt) |
e4cfed38 PS |
1816 | { |
1817 | struct dp_netdev *dp = f->dp; | |
f7791740 | 1818 | struct rxq_poll *poll_list = *ppoll_list; |
e4cfed38 | 1819 | struct dp_netdev_port *port; |
59e6d833 | 1820 | struct cmap_cursor cursor; |
e4cfed38 PS |
1821 | int id = f->id; |
1822 | int index; | |
1823 | int i; | |
1824 | ||
1825 | /* Simple scheduler for netdev rx polling. */ | |
e4cfed38 PS |
1826 | for (i = 0; i < poll_cnt; i++) { |
1827 | port_unref(poll_list[i].port); | |
1828 | } | |
1829 | ||
1830 | poll_cnt = 0; | |
1831 | index = 0; | |
1832 | ||
59e6d833 | 1833 | CMAP_FOR_EACH (port, node, &cursor, &f->dp->ports) { |
e4cfed38 | 1834 | if (netdev_is_pmd(port->netdev)) { |
55c955bd PS |
1835 | int i; |
1836 | ||
1837 | for (i = 0; i < netdev_n_rxq(port->netdev); i++) { | |
1838 | if ((index % dp->n_pmd_threads) == id) { | |
1839 | poll_list = xrealloc(poll_list, sizeof *poll_list * (poll_cnt + 1)); | |
e4cfed38 | 1840 | |
55c955bd PS |
1841 | port_ref(port); |
1842 | poll_list[poll_cnt].port = port; | |
1843 | poll_list[poll_cnt].rx = port->rxq[i]; | |
1844 | poll_cnt++; | |
1845 | } | |
1846 | index++; | |
e4cfed38 | 1847 | } |
e4cfed38 PS |
1848 | } |
1849 | } | |
1850 | ||
e4cfed38 PS |
1851 | *ppoll_list = poll_list; |
1852 | return poll_cnt; | |
1853 | } | |
1854 | ||
6c3eee82 | 1855 | static void * |
e4cfed38 | 1856 | pmd_thread_main(void *f_) |
6c3eee82 | 1857 | { |
e4cfed38 | 1858 | struct pmd_thread *f = f_; |
6c3eee82 | 1859 | struct dp_netdev *dp = f->dp; |
e4cfed38 | 1860 | unsigned int lc = 0; |
f7791740 | 1861 | struct rxq_poll *poll_list; |
e4cfed38 PS |
1862 | unsigned int port_seq; |
1863 | int poll_cnt; | |
1864 | int i; | |
6c3eee82 | 1865 | |
e4cfed38 PS |
1866 | poll_cnt = 0; |
1867 | poll_list = NULL; | |
1868 | ||
8617afff | 1869 | pmd_thread_setaffinity_cpu(f->id); |
e4cfed38 PS |
1870 | reload: |
1871 | poll_cnt = pmd_load_queues(f, &poll_list, poll_cnt); | |
1872 | atomic_read(&f->change_seq, &port_seq); | |
6c3eee82 | 1873 | |
e4cfed38 PS |
1874 | for (;;) { |
1875 | unsigned int c_port_seq; | |
6c3eee82 BP |
1876 | int i; |
1877 | ||
e4cfed38 | 1878 | for (i = 0; i < poll_cnt; i++) { |
55c955bd | 1879 | dp_netdev_process_rxq_port(dp, poll_list[i].port, poll_list[i].rx); |
e4cfed38 PS |
1880 | } |
1881 | ||
1882 | if (lc++ > 1024) { | |
1883 | ovsrcu_quiesce(); | |
6c3eee82 | 1884 | |
e4cfed38 PS |
1885 | /* TODO: need completely userspace based signaling method. |
1886 | * to keep this thread entirely in userspace. | |
1887 | * For now using atomic counter. */ | |
1888 | lc = 0; | |
1889 | atomic_read_explicit(&f->change_seq, &c_port_seq, memory_order_consume); | |
1890 | if (c_port_seq != port_seq) { | |
6c3eee82 BP |
1891 | break; |
1892 | } | |
1893 | } | |
e4cfed38 | 1894 | } |
6c3eee82 | 1895 | |
e4cfed38 PS |
1896 | if (!latch_is_set(&f->dp->exit_latch)){ |
1897 | goto reload; | |
1898 | } | |
6c3eee82 | 1899 | |
e4cfed38 PS |
1900 | for (i = 0; i < poll_cnt; i++) { |
1901 | port_unref(poll_list[i].port); | |
6c3eee82 | 1902 | } |
6c3eee82 | 1903 | |
e4cfed38 | 1904 | free(poll_list); |
6c3eee82 BP |
1905 | return NULL; |
1906 | } | |
1907 | ||
1908 | static void | |
e4cfed38 | 1909 | dp_netdev_set_pmd_threads(struct dp_netdev *dp, int n) |
6c3eee82 BP |
1910 | { |
1911 | int i; | |
1912 | ||
e4cfed38 | 1913 | if (n == dp->n_pmd_threads) { |
6c3eee82 BP |
1914 | return; |
1915 | } | |
1916 | ||
1917 | /* Stop existing threads. */ | |
1918 | latch_set(&dp->exit_latch); | |
e4cfed38 PS |
1919 | dp_netdev_reload_pmd_threads(dp); |
1920 | for (i = 0; i < dp->n_pmd_threads; i++) { | |
1921 | struct pmd_thread *f = &dp->pmd_threads[i]; | |
6c3eee82 BP |
1922 | |
1923 | xpthread_join(f->thread, NULL); | |
1924 | } | |
1925 | latch_poll(&dp->exit_latch); | |
e4cfed38 | 1926 | free(dp->pmd_threads); |
6c3eee82 BP |
1927 | |
1928 | /* Start new threads. */ | |
e4cfed38 PS |
1929 | dp->pmd_threads = xmalloc(n * sizeof *dp->pmd_threads); |
1930 | dp->n_pmd_threads = n; | |
1931 | ||
6c3eee82 | 1932 | for (i = 0; i < n; i++) { |
e4cfed38 | 1933 | struct pmd_thread *f = &dp->pmd_threads[i]; |
6c3eee82 BP |
1934 | |
1935 | f->dp = dp; | |
e4cfed38 PS |
1936 | f->id = i; |
1937 | atomic_store(&f->change_seq, 1); | |
1938 | ||
1939 | /* Each thread will distribute all devices rx-queues among | |
1940 | * themselves. */ | |
8ba0a522 | 1941 | f->thread = ovs_thread_create("pmd", pmd_thread_main, f); |
6c3eee82 BP |
1942 | } |
1943 | } | |
e4cfed38 | 1944 | |
6c3eee82 | 1945 | \f |
679ba04c BP |
1946 | static void * |
1947 | dp_netdev_flow_stats_new_cb(void) | |
1948 | { | |
1949 | struct dp_netdev_flow_stats *bucket = xzalloc_cacheline(sizeof *bucket); | |
1950 | ovs_mutex_init(&bucket->mutex); | |
1951 | return bucket; | |
1952 | } | |
1953 | ||
72865317 | 1954 | static void |
1763b4b8 | 1955 | dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow, |
855dd13c | 1956 | const struct ofpbuf *packet, |
4f150744 | 1957 | const struct miniflow *key) |
72865317 | 1958 | { |
4f150744 | 1959 | uint16_t tcp_flags = miniflow_get_tcp_flags(key); |
679ba04c BP |
1960 | long long int now = time_msec(); |
1961 | struct dp_netdev_flow_stats *bucket; | |
1962 | ||
1963 | bucket = ovsthread_stats_bucket_get(&netdev_flow->stats, | |
1964 | dp_netdev_flow_stats_new_cb); | |
1965 | ||
1966 | ovs_mutex_lock(&bucket->mutex); | |
1967 | bucket->used = MAX(now, bucket->used); | |
1968 | bucket->packet_count++; | |
1f317cb5 | 1969 | bucket->byte_count += ofpbuf_size(packet); |
679ba04c BP |
1970 | bucket->tcp_flags |= tcp_flags; |
1971 | ovs_mutex_unlock(&bucket->mutex); | |
72865317 BP |
1972 | } |
1973 | ||
51852a57 BP |
1974 | static void * |
1975 | dp_netdev_stats_new_cb(void) | |
1976 | { | |
1977 | struct dp_netdev_stats *bucket = xzalloc_cacheline(sizeof *bucket); | |
1978 | ovs_mutex_init(&bucket->mutex); | |
1979 | return bucket; | |
1980 | } | |
1981 | ||
1982 | static void | |
1983 | dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type) | |
1984 | { | |
1985 | struct dp_netdev_stats *bucket; | |
1986 | ||
1987 | bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb); | |
1988 | ovs_mutex_lock(&bucket->mutex); | |
1989 | bucket->n[type]++; | |
1990 | ovs_mutex_unlock(&bucket->mutex); | |
1991 | } | |
1992 | ||
72865317 | 1993 | static void |
adcf00ba AZ |
1994 | dp_netdev_input(struct dp_netdev *dp, struct ofpbuf *packet, |
1995 | struct pkt_metadata *md) | |
72865317 | 1996 | { |
1763b4b8 | 1997 | struct dp_netdev_flow *netdev_flow; |
27bbe15d JR |
1998 | struct { |
1999 | struct miniflow flow; | |
2000 | uint32_t buf[FLOW_U32S]; | |
2001 | } key; | |
72865317 | 2002 | |
1f317cb5 | 2003 | if (ofpbuf_size(packet) < ETH_HEADER_LEN) { |
df1e5a3b | 2004 | ofpbuf_delete(packet); |
1805876e BP |
2005 | return; |
2006 | } | |
27bbe15d JR |
2007 | miniflow_initialize(&key.flow, key.buf); |
2008 | miniflow_extract(packet, md, &key.flow); | |
4f150744 | 2009 | |
27bbe15d | 2010 | netdev_flow = dp_netdev_lookup_flow(dp, &key.flow); |
1763b4b8 | 2011 | if (netdev_flow) { |
a84cb64a BP |
2012 | struct dp_netdev_actions *actions; |
2013 | ||
27bbe15d | 2014 | dp_netdev_flow_used(netdev_flow, packet, &key.flow); |
679ba04c | 2015 | |
61e7deb1 | 2016 | actions = dp_netdev_flow_get_actions(netdev_flow); |
27bbe15d | 2017 | dp_netdev_execute_actions(dp, &key.flow, packet, true, md, |
a84cb64a | 2018 | actions->actions, actions->size); |
51852a57 | 2019 | dp_netdev_count_packet(dp, DP_STAT_HIT); |
63be20be | 2020 | } else if (dp->handler_queues) { |
51852a57 | 2021 | dp_netdev_count_packet(dp, DP_STAT_MISS); |
63be20be | 2022 | dp_netdev_output_userspace(dp, packet, |
27bbe15d | 2023 | miniflow_hash_5tuple(&key.flow, 0) |
4f150744 | 2024 | % dp->n_handlers, |
27bbe15d | 2025 | DPIF_UC_MISS, &key.flow, NULL); |
72865317 BP |
2026 | } |
2027 | } | |
2028 | ||
adcf00ba AZ |
2029 | static void |
2030 | dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet, | |
2031 | struct pkt_metadata *md) | |
adcf00ba AZ |
2032 | { |
2033 | uint32_t *recirc_depth = recirc_depth_get(); | |
2034 | ||
2035 | *recirc_depth = 0; | |
2036 | dp_netdev_input(dp, packet, md); | |
2037 | } | |
2038 | ||
72865317 | 2039 | static int |
da546e07 | 2040 | dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, |
4f150744 | 2041 | int queue_no, int type, const struct miniflow *key, |
e995e3df | 2042 | const struct nlattr *userdata) |
72865317 | 2043 | { |
63be20be | 2044 | struct dp_netdev_queue *q; |
f5126b57 BP |
2045 | int error; |
2046 | ||
63be20be AW |
2047 | fat_rwlock_rdlock(&dp->queue_rwlock); |
2048 | q = &dp->handler_queues[queue_no]; | |
2049 | ovs_mutex_lock(&q->mutex); | |
e995e3df BP |
2050 | if (q->head - q->tail < MAX_QUEUE_LEN) { |
2051 | struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK]; | |
2052 | struct dpif_upcall *upcall = &u->upcall; | |
2053 | struct ofpbuf *buf = &u->buf; | |
2054 | size_t buf_size; | |
4f150744 | 2055 | struct flow flow; |
e995e3df | 2056 | |
63be20be | 2057 | upcall->type = type; |
e995e3df BP |
2058 | |
2059 | /* Allocate buffer big enough for everything. */ | |
da546e07 | 2060 | buf_size = ODPUTIL_FLOW_KEY_BYTES; |
e995e3df BP |
2061 | if (userdata) { |
2062 | buf_size += NLA_ALIGN(userdata->nla_len); | |
2063 | } | |
2064 | ofpbuf_init(buf, buf_size); | |
72865317 | 2065 | |
e995e3df | 2066 | /* Put ODP flow. */ |
4f150744 | 2067 | miniflow_expand(key, &flow); |
7ce2769e | 2068 | odp_flow_key_from_flow(buf, &flow, NULL, flow.in_port.odp_port, true); |
1f317cb5 PS |
2069 | upcall->key = ofpbuf_data(buf); |
2070 | upcall->key_len = ofpbuf_size(buf); | |
d88b629b | 2071 | |
e995e3df BP |
2072 | /* Put userdata. */ |
2073 | if (userdata) { | |
2074 | upcall->userdata = ofpbuf_put(buf, userdata, | |
2075 | NLA_ALIGN(userdata->nla_len)); | |
2076 | } | |
856081f6 | 2077 | |
143859ec | 2078 | upcall->packet = *packet; |
856081f6 | 2079 | |
63be20be | 2080 | seq_change(q->seq); |
d33ed218 | 2081 | |
f5126b57 | 2082 | error = 0; |
e995e3df | 2083 | } else { |
51852a57 | 2084 | dp_netdev_count_packet(dp, DP_STAT_LOST); |
143859ec | 2085 | ofpbuf_delete(packet); |
f5126b57 | 2086 | error = ENOBUFS; |
e995e3df | 2087 | } |
63be20be AW |
2088 | ovs_mutex_unlock(&q->mutex); |
2089 | fat_rwlock_unlock(&dp->queue_rwlock); | |
f5126b57 BP |
2090 | |
2091 | return error; | |
72865317 BP |
2092 | } |
2093 | ||
9080a111 JR |
2094 | struct dp_netdev_execute_aux { |
2095 | struct dp_netdev *dp; | |
4f150744 | 2096 | const struct miniflow *key; |
9080a111 JR |
2097 | }; |
2098 | ||
2099 | static void | |
758c456d | 2100 | dp_execute_cb(void *aux_, struct ofpbuf *packet, |
572f732a | 2101 | struct pkt_metadata *md, |
09f9da0b | 2102 | const struct nlattr *a, bool may_steal) |
8a4e3a85 | 2103 | OVS_NO_THREAD_SAFETY_ANALYSIS |
9080a111 JR |
2104 | { |
2105 | struct dp_netdev_execute_aux *aux = aux_; | |
09f9da0b | 2106 | int type = nl_attr_type(a); |
8a4e3a85 | 2107 | struct dp_netdev_port *p; |
adcf00ba | 2108 | uint32_t *depth = recirc_depth_get(); |
9080a111 | 2109 | |
09f9da0b JR |
2110 | switch ((enum ovs_action_attr)type) { |
2111 | case OVS_ACTION_ATTR_OUTPUT: | |
8a4e3a85 BP |
2112 | p = dp_netdev_lookup_port(aux->dp, u32_to_odp(nl_attr_get_u32(a))); |
2113 | if (p) { | |
40d26f04 | 2114 | netdev_send(p->netdev, packet, may_steal); |
8a4e3a85 | 2115 | } |
09f9da0b JR |
2116 | break; |
2117 | ||
2118 | case OVS_ACTION_ATTR_USERSPACE: { | |
143859ec | 2119 | struct ofpbuf *userspace_packet; |
09f9da0b | 2120 | const struct nlattr *userdata; |
4fc65926 | 2121 | |
09f9da0b | 2122 | userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA); |
143859ec | 2123 | userspace_packet = may_steal ? packet : ofpbuf_clone(packet); |
da546e07 | 2124 | |
143859ec | 2125 | dp_netdev_output_userspace(aux->dp, userspace_packet, |
4f150744 | 2126 | miniflow_hash_5tuple(aux->key, 0) |
63be20be AW |
2127 | % aux->dp->n_handlers, |
2128 | DPIF_UC_ACTION, aux->key, | |
09f9da0b | 2129 | userdata); |
09f9da0b | 2130 | break; |
da546e07 | 2131 | } |
572f732a | 2132 | |
347bf289 AZ |
2133 | case OVS_ACTION_ATTR_HASH: { |
2134 | const struct ovs_action_hash *hash_act; | |
2135 | uint32_t hash; | |
2136 | ||
2137 | hash_act = nl_attr_get(a); | |
2138 | if (hash_act->hash_alg == OVS_HASH_ALG_L4) { | |
4f150744 JR |
2139 | /* Hash need not be symmetric, nor does it need to include |
2140 | * L2 fields. */ | |
62ac1f20 | 2141 | hash = miniflow_hash_5tuple(aux->key, hash_act->hash_basis); |
347bf289 AZ |
2142 | if (!hash) { |
2143 | hash = 1; /* 0 is not valid */ | |
2144 | } | |
2145 | ||
2146 | } else { | |
2147 | VLOG_WARN("Unknown hash algorithm specified for the hash action."); | |
2148 | hash = 2; | |
2149 | } | |
2150 | ||
2151 | md->dp_hash = hash; | |
2152 | break; | |
2153 | } | |
2154 | ||
adcf00ba AZ |
2155 | case OVS_ACTION_ATTR_RECIRC: |
2156 | if (*depth < MAX_RECIRC_DEPTH) { | |
4347b9b3 | 2157 | struct pkt_metadata recirc_md = *md; |
adcf00ba | 2158 | struct ofpbuf *recirc_packet; |
9b516652 | 2159 | |
adcf00ba | 2160 | recirc_packet = may_steal ? packet : ofpbuf_clone(packet); |
347bf289 | 2161 | recirc_md.recirc_id = nl_attr_get_u32(a); |
572f732a | 2162 | |
adcf00ba | 2163 | (*depth)++; |
4347b9b3 | 2164 | dp_netdev_input(aux->dp, recirc_packet, &recirc_md); |
adcf00ba AZ |
2165 | (*depth)--; |
2166 | ||
adcf00ba AZ |
2167 | break; |
2168 | } else { | |
2169 | VLOG_WARN("Packet dropped. Max recirculation depth exceeded."); | |
2170 | } | |
572f732a | 2171 | break; |
572f732a | 2172 | |
09f9da0b JR |
2173 | case OVS_ACTION_ATTR_PUSH_VLAN: |
2174 | case OVS_ACTION_ATTR_POP_VLAN: | |
2175 | case OVS_ACTION_ATTR_PUSH_MPLS: | |
2176 | case OVS_ACTION_ATTR_POP_MPLS: | |
2177 | case OVS_ACTION_ATTR_SET: | |
2178 | case OVS_ACTION_ATTR_SAMPLE: | |
2179 | case OVS_ACTION_ATTR_UNSPEC: | |
2180 | case __OVS_ACTION_ATTR_MAX: | |
2181 | OVS_NOT_REACHED(); | |
da546e07 | 2182 | } |
98403001 BP |
2183 | } |
2184 | ||
4edb9ae9 | 2185 | static void |
4f150744 | 2186 | dp_netdev_execute_actions(struct dp_netdev *dp, const struct miniflow *key, |
df1e5a3b PS |
2187 | struct ofpbuf *packet, bool may_steal, |
2188 | struct pkt_metadata *md, | |
9080a111 | 2189 | const struct nlattr *actions, size_t actions_len) |
72865317 | 2190 | { |
9080a111 | 2191 | struct dp_netdev_execute_aux aux = {dp, key}; |
9080a111 | 2192 | |
df1e5a3b PS |
2193 | odp_execute_actions(&aux, packet, may_steal, md, |
2194 | actions, actions_len, dp_execute_cb); | |
72865317 BP |
2195 | } |
2196 | ||
2197 | const struct dpif_class dpif_netdev_class = { | |
72865317 | 2198 | "netdev", |
2197d7ab | 2199 | dpif_netdev_enumerate, |
0aeaabc8 | 2200 | dpif_netdev_port_open_type, |
72865317 BP |
2201 | dpif_netdev_open, |
2202 | dpif_netdev_close, | |
7dab847a | 2203 | dpif_netdev_destroy, |
e4cfed38 PS |
2204 | dpif_netdev_run, |
2205 | dpif_netdev_wait, | |
72865317 | 2206 | dpif_netdev_get_stats, |
72865317 BP |
2207 | dpif_netdev_port_add, |
2208 | dpif_netdev_port_del, | |
2209 | dpif_netdev_port_query_by_number, | |
2210 | dpif_netdev_port_query_by_name, | |
98403001 | 2211 | NULL, /* port_get_pid */ |
b0ec0f27 BP |
2212 | dpif_netdev_port_dump_start, |
2213 | dpif_netdev_port_dump_next, | |
2214 | dpif_netdev_port_dump_done, | |
72865317 BP |
2215 | dpif_netdev_port_poll, |
2216 | dpif_netdev_port_poll_wait, | |
72865317 BP |
2217 | dpif_netdev_flow_get, |
2218 | dpif_netdev_flow_put, | |
2219 | dpif_netdev_flow_del, | |
2220 | dpif_netdev_flow_flush, | |
ac64794a BP |
2221 | dpif_netdev_flow_dump_create, |
2222 | dpif_netdev_flow_dump_destroy, | |
2223 | dpif_netdev_flow_dump_thread_create, | |
2224 | dpif_netdev_flow_dump_thread_destroy, | |
704a1e09 | 2225 | dpif_netdev_flow_dump_next, |
72865317 | 2226 | dpif_netdev_execute, |
6bc60024 | 2227 | NULL, /* operate */ |
a12b3ead | 2228 | dpif_netdev_recv_set, |
1954e6bb | 2229 | dpif_netdev_handlers_set, |
5bf93d67 | 2230 | dpif_netdev_queue_to_priority, |
72865317 BP |
2231 | dpif_netdev_recv, |
2232 | dpif_netdev_recv_wait, | |
1ba530f4 | 2233 | dpif_netdev_recv_purge, |
72865317 | 2234 | }; |
614c4892 | 2235 | |
74cc3969 BP |
2236 | static void |
2237 | dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED, | |
2238 | const char *argv[], void *aux OVS_UNUSED) | |
2239 | { | |
59e6d833 BP |
2240 | struct dp_netdev_port *old_port; |
2241 | struct dp_netdev_port *new_port; | |
74cc3969 | 2242 | struct dp_netdev *dp; |
ff073a71 | 2243 | odp_port_t port_no; |
74cc3969 | 2244 | |
8a4e3a85 | 2245 | ovs_mutex_lock(&dp_netdev_mutex); |
74cc3969 BP |
2246 | dp = shash_find_data(&dp_netdevs, argv[1]); |
2247 | if (!dp || !dpif_netdev_class_is_dummy(dp->class)) { | |
8a4e3a85 | 2248 | ovs_mutex_unlock(&dp_netdev_mutex); |
74cc3969 BP |
2249 | unixctl_command_reply_error(conn, "unknown datapath or not a dummy"); |
2250 | return; | |
2251 | } | |
8a4e3a85 BP |
2252 | ovs_refcount_ref(&dp->ref_cnt); |
2253 | ovs_mutex_unlock(&dp_netdev_mutex); | |
74cc3969 | 2254 | |
59e6d833 BP |
2255 | ovs_mutex_lock(&dp->port_mutex); |
2256 | if (get_port_by_name(dp, argv[2], &old_port)) { | |
74cc3969 | 2257 | unixctl_command_reply_error(conn, "unknown port"); |
8a4e3a85 | 2258 | goto exit; |
74cc3969 BP |
2259 | } |
2260 | ||
ff073a71 BP |
2261 | port_no = u32_to_odp(atoi(argv[3])); |
2262 | if (!port_no || port_no == ODPP_NONE) { | |
74cc3969 | 2263 | unixctl_command_reply_error(conn, "bad port number"); |
8a4e3a85 | 2264 | goto exit; |
74cc3969 | 2265 | } |
ff073a71 | 2266 | if (dp_netdev_lookup_port(dp, port_no)) { |
74cc3969 | 2267 | unixctl_command_reply_error(conn, "port number already in use"); |
8a4e3a85 | 2268 | goto exit; |
74cc3969 | 2269 | } |
59e6d833 BP |
2270 | |
2271 | /* Remove old port. */ | |
2272 | cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->port_no)); | |
2273 | ovsrcu_postpone(free, old_port); | |
2274 | ||
2275 | /* Insert new port (cmap semantics mean we cannot re-insert 'old_port'). */ | |
2276 | new_port = xmemdup(old_port, sizeof *old_port); | |
2277 | new_port->port_no = port_no; | |
2278 | cmap_insert(&dp->ports, &new_port->node, hash_port_no(port_no)); | |
2279 | ||
d33ed218 | 2280 | seq_change(dp->port_seq); |
74cc3969 | 2281 | unixctl_command_reply(conn, NULL); |
8a4e3a85 BP |
2282 | |
2283 | exit: | |
59e6d833 | 2284 | ovs_mutex_unlock(&dp->port_mutex); |
8a4e3a85 | 2285 | dp_netdev_unref(dp); |
74cc3969 BP |
2286 | } |
2287 | ||
c40b890f BP |
2288 | static void |
2289 | dpif_dummy_delete_port(struct unixctl_conn *conn, int argc OVS_UNUSED, | |
2290 | const char *argv[], void *aux OVS_UNUSED) | |
2291 | { | |
2292 | struct dp_netdev_port *port; | |
2293 | struct dp_netdev *dp; | |
2294 | ||
2295 | ovs_mutex_lock(&dp_netdev_mutex); | |
2296 | dp = shash_find_data(&dp_netdevs, argv[1]); | |
2297 | if (!dp || !dpif_netdev_class_is_dummy(dp->class)) { | |
2298 | ovs_mutex_unlock(&dp_netdev_mutex); | |
2299 | unixctl_command_reply_error(conn, "unknown datapath or not a dummy"); | |
2300 | return; | |
2301 | } | |
2302 | ovs_refcount_ref(&dp->ref_cnt); | |
2303 | ovs_mutex_unlock(&dp_netdev_mutex); | |
2304 | ||
2305 | ovs_mutex_lock(&dp->port_mutex); | |
2306 | if (get_port_by_name(dp, argv[2], &port)) { | |
2307 | unixctl_command_reply_error(conn, "unknown port"); | |
2308 | } else if (port->port_no == ODPP_LOCAL) { | |
2309 | unixctl_command_reply_error(conn, "can't delete local port"); | |
2310 | } else { | |
2311 | do_del_port(dp, port); | |
2312 | unixctl_command_reply(conn, NULL); | |
2313 | } | |
2314 | ovs_mutex_unlock(&dp->port_mutex); | |
2315 | ||
2316 | dp_netdev_unref(dp); | |
2317 | } | |
2318 | ||
0cbfe35d BP |
2319 | static void |
2320 | dpif_dummy_register__(const char *type) | |
2321 | { | |
2322 | struct dpif_class *class; | |
2323 | ||
2324 | class = xmalloc(sizeof *class); | |
2325 | *class = dpif_netdev_class; | |
2326 | class->type = xstrdup(type); | |
2327 | dp_register_provider(class); | |
2328 | } | |
2329 | ||
614c4892 | 2330 | void |
0cbfe35d | 2331 | dpif_dummy_register(bool override) |
614c4892 | 2332 | { |
0cbfe35d BP |
2333 | if (override) { |
2334 | struct sset types; | |
2335 | const char *type; | |
2336 | ||
2337 | sset_init(&types); | |
2338 | dp_enumerate_types(&types); | |
2339 | SSET_FOR_EACH (type, &types) { | |
2340 | if (!dp_unregister_provider(type)) { | |
2341 | dpif_dummy_register__(type); | |
2342 | } | |
2343 | } | |
2344 | sset_destroy(&types); | |
614c4892 | 2345 | } |
0cbfe35d BP |
2346 | |
2347 | dpif_dummy_register__("dummy"); | |
74cc3969 BP |
2348 | |
2349 | unixctl_command_register("dpif-dummy/change-port-number", | |
2350 | "DP PORT NEW-NUMBER", | |
2351 | 3, 3, dpif_dummy_change_port_number, NULL); | |
c40b890f BP |
2352 | unixctl_command_register("dpif-dummy/delete-port", "DP PORT", |
2353 | 2, 2, dpif_dummy_delete_port, NULL); | |
614c4892 | 2354 | } |