]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/openvswitch/datapath.c
UBUNTU: Ubuntu-5.15.0-39.42
[mirror_ubuntu-jammy-kernel.git] / net / openvswitch / datapath.c
CommitLineData
c9422999 1// SPDX-License-Identifier: GPL-2.0-only
ccb1352e 2/*
ad552007 3 * Copyright (c) 2007-2014 Nicira, Inc.
ccb1352e
JG
4 */
5
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8#include <linux/init.h>
9#include <linux/module.h>
10#include <linux/if_arp.h>
11#include <linux/if_vlan.h>
12#include <linux/in.h>
13#include <linux/ip.h>
14#include <linux/jhash.h>
15#include <linux/delay.h>
16#include <linux/time.h>
17#include <linux/etherdevice.h>
18#include <linux/genetlink.h>
19#include <linux/kernel.h>
20#include <linux/kthread.h>
21#include <linux/mutex.h>
22#include <linux/percpu.h>
23#include <linux/rcupdate.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
ccb1352e
JG
26#include <linux/ethtool.h>
27#include <linux/wait.h>
ccb1352e
JG
28#include <asm/div64.h>
29#include <linux/highmem.h>
30#include <linux/netfilter_bridge.h>
31#include <linux/netfilter_ipv4.h>
32#include <linux/inetdevice.h>
33#include <linux/list.h>
34#include <linux/openvswitch.h>
35#include <linux/rculist.h>
36#include <linux/dmi.h>
ccb1352e 37#include <net/genetlink.h>
46df7b81
PS
38#include <net/net_namespace.h>
39#include <net/netns/generic.h>
ccb1352e
JG
40
41#include "datapath.h"
42#include "flow.h"
e80857cc 43#include "flow_table.h"
e6445719 44#include "flow_netlink.h"
96fbc13d 45#include "meter.h"
c4ab7b56 46#include "openvswitch_trace.h"
ccb1352e 47#include "vport-internal_dev.h"
cff63a52 48#include "vport-netdev.h"
ccb1352e 49
c7d03a00 50unsigned int ovs_net_id __read_mostly;
8e4e1713 51
0c200ef9
PS
52static struct genl_family dp_packet_genl_family;
53static struct genl_family dp_flow_genl_family;
54static struct genl_family dp_datapath_genl_family;
55
74ed7ab9
JS
56static const struct nla_policy flow_policy[];
57
48e48a70 58static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
59 .name = OVS_FLOW_MCGROUP,
0c200ef9
PS
60};
61
48e48a70 62static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
63 .name = OVS_DATAPATH_MCGROUP,
0c200ef9
PS
64};
65
48e48a70 66static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
67 .name = OVS_VPORT_MCGROUP,
0c200ef9
PS
68};
69
fb5d1e9e
JR
70/* Check if need to build a reply message.
71 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
9b67aa4a
SG
72static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
73 unsigned int group)
fb5d1e9e
JR
74{
75 return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
f8403a2e 76 genl_has_listeners(family, genl_info_net(info), group);
fb5d1e9e
JR
77}
78
68eb5503 79static void ovs_notify(struct genl_family *family,
2a94fe48 80 struct sk_buff *skb, struct genl_info *info)
ed661185 81{
92c14d9b 82 genl_notify(family, skb, info, 0, GFP_KERNEL);
ed661185
TG
83}
84
ccb1352e
JG
85/**
86 * DOC: Locking:
87 *
8e4e1713
PS
88 * All writes e.g. Writes to device state (add/remove datapath, port, set
89 * operations on vports, etc.), Writes to other state (flow table
90 * modifications, set miscellaneous datapath parameters, etc.) are protected
91 * by ovs_lock.
ccb1352e
JG
92 *
93 * Reads are protected by RCU.
94 *
95 * There are a few special cases (mostly stats) that have their own
96 * synchronization but they nest under all of above and don't interact with
97 * each other.
8e4e1713
PS
98 *
99 * The RTNL lock nests inside ovs_mutex.
ccb1352e
JG
100 */
101
8e4e1713
PS
102static DEFINE_MUTEX(ovs_mutex);
103
104void ovs_lock(void)
105{
106 mutex_lock(&ovs_mutex);
107}
108
109void ovs_unlock(void)
110{
111 mutex_unlock(&ovs_mutex);
112}
113
114#ifdef CONFIG_LOCKDEP
115int lockdep_ovsl_is_held(void)
116{
117 if (debug_locks)
118 return lockdep_is_held(&ovs_mutex);
119 else
120 return 1;
121}
122#endif
123
ccb1352e 124static struct vport *new_vport(const struct vport_parms *);
8055a89c 125static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
e8eedb85 126 const struct sw_flow_key *,
f2a4d086
WT
127 const struct dp_upcall_info *,
128 uint32_t cutlen);
8055a89c 129static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
e8eedb85 130 const struct sw_flow_key *,
f2a4d086
WT
131 const struct dp_upcall_info *,
132 uint32_t cutlen);
ccb1352e 133
eac87c41
EC
134static void ovs_dp_masks_rebalance(struct work_struct *work);
135
b83d23a2
MG
136static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
137
8e4e1713 138/* Must be called with rcu_read_lock or ovs_mutex. */
971427f3 139const char *ovs_dp_name(const struct datapath *dp)
ccb1352e 140{
8e4e1713 141 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
c9db965c 142 return ovs_vport_name(vport);
ccb1352e
JG
143}
144
12eb18f7 145static int get_dpifindex(const struct datapath *dp)
ccb1352e
JG
146{
147 struct vport *local;
148 int ifindex;
149
150 rcu_read_lock();
151
15eac2a7 152 local = ovs_vport_rcu(dp, OVSP_LOCAL);
ccb1352e 153 if (local)
be4ace6e 154 ifindex = local->dev->ifindex;
ccb1352e
JG
155 else
156 ifindex = 0;
157
158 rcu_read_unlock();
159
160 return ifindex;
161}
162
163static void destroy_dp_rcu(struct rcu_head *rcu)
164{
165 struct datapath *dp = container_of(rcu, struct datapath, rcu);
166
9b996e54 167 ovs_flow_tbl_destroy(&dp->table);
ccb1352e 168 free_percpu(dp->stats_percpu);
15eac2a7 169 kfree(dp->ports);
96fbc13d 170 ovs_meters_exit(dp);
076999e4 171 kfree(rcu_dereference_raw(dp->upcall_portids));
ccb1352e
JG
172 kfree(dp);
173}
174
15eac2a7
PS
175static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
176 u16 port_no)
177{
178 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
179}
180
bb6f9a70 181/* Called with ovs_mutex or RCU read lock. */
15eac2a7
PS
182struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
183{
184 struct vport *vport;
15eac2a7
PS
185 struct hlist_head *head;
186
187 head = vport_hash_bucket(dp, port_no);
53742e69 188 hlist_for_each_entry_rcu(vport, head, dp_hash_node,
cf3266ad 189 lockdep_ovsl_is_held()) {
15eac2a7
PS
190 if (vport->port_no == port_no)
191 return vport;
192 }
193 return NULL;
194}
195
8e4e1713 196/* Called with ovs_mutex. */
ccb1352e
JG
197static struct vport *new_vport(const struct vport_parms *parms)
198{
199 struct vport *vport;
200
201 vport = ovs_vport_add(parms);
202 if (!IS_ERR(vport)) {
203 struct datapath *dp = parms->dp;
15eac2a7 204 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
ccb1352e 205
15eac2a7 206 hlist_add_head_rcu(&vport->dp_hash_node, head);
ccb1352e 207 }
ccb1352e
JG
208 return vport;
209}
210
ccb1352e
JG
211void ovs_dp_detach_port(struct vport *p)
212{
8e4e1713 213 ASSERT_OVSL();
ccb1352e
JG
214
215 /* First drop references to device. */
15eac2a7 216 hlist_del_rcu(&p->dp_hash_node);
ccb1352e
JG
217
218 /* Then destroy it. */
219 ovs_vport_del(p);
220}
221
222/* Must be called with rcu_read_lock. */
8c8b1b83 223void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
ccb1352e 224{
83c8df26 225 const struct vport *p = OVS_CB(skb)->input_vport;
ccb1352e
JG
226 struct datapath *dp = p->dp;
227 struct sw_flow *flow;
d98612b8 228 struct sw_flow_actions *sf_acts;
ccb1352e 229 struct dp_stats_percpu *stats;
ccb1352e 230 u64 *stats_counter;
1bd7116f 231 u32 n_mask_hit;
9d2f627b 232 u32 n_cache_hit;
aa733660 233 int error;
ccb1352e 234
404f2f10 235 stats = this_cpu_ptr(dp->stats_percpu);
ccb1352e 236
ccb1352e 237 /* Look up flow. */
04b7d136 238 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
9d2f627b 239 &n_mask_hit, &n_cache_hit);
ccb1352e
JG
240 if (unlikely(!flow)) {
241 struct dp_upcall_info upcall;
242
ccea7445 243 memset(&upcall, 0, sizeof(upcall));
ccb1352e 244 upcall.cmd = OVS_PACKET_CMD_MISS;
b83d23a2
MG
245
246 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
784dcfa5
MG
247 upcall.portid =
248 ovs_dp_get_upcall_portid(dp, smp_processor_id());
b83d23a2
MG
249 else
250 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
251
7f8a436e 252 upcall.mru = OVS_CB(skb)->mru;
f2a4d086 253 error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
c5eba0b6
LR
254 if (unlikely(error))
255 kfree_skb(skb);
256 else
257 consume_skb(skb);
ccb1352e
JG
258 stats_counter = &stats->n_missed;
259 goto out;
260 }
261
d98612b8
LJ
262 ovs_flow_stats_update(flow, key->tp.flags, skb);
263 sf_acts = rcu_dereference(flow->sf_acts);
aa733660
YS
264 error = ovs_execute_actions(dp, skb, sf_acts, key);
265 if (unlikely(error))
266 net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
cf3266ad 267 ovs_dp_name(dp), error);
ccb1352e 268
e298e505 269 stats_counter = &stats->n_hit;
ccb1352e
JG
270
271out:
272 /* Update datapath statistics. */
df9d9fdf 273 u64_stats_update_begin(&stats->syncp);
ccb1352e 274 (*stats_counter)++;
1bd7116f 275 stats->n_mask_hit += n_mask_hit;
9d2f627b 276 stats->n_cache_hit += n_cache_hit;
df9d9fdf 277 u64_stats_update_end(&stats->syncp);
ccb1352e
JG
278}
279
ccb1352e 280int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
e8eedb85 281 const struct sw_flow_key *key,
f2a4d086
WT
282 const struct dp_upcall_info *upcall_info,
283 uint32_t cutlen)
ccb1352e
JG
284{
285 struct dp_stats_percpu *stats;
ccb1352e
JG
286 int err;
287
c4ab7b56
AC
288 if (trace_ovs_dp_upcall_enabled())
289 trace_ovs_dp_upcall(dp, skb, key, upcall_info);
290
15e47304 291 if (upcall_info->portid == 0) {
ccb1352e
JG
292 err = -ENOTCONN;
293 goto err;
294 }
295
ccb1352e 296 if (!skb_is_gso(skb))
f2a4d086 297 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
ccb1352e 298 else
f2a4d086 299 err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
ccb1352e
JG
300 if (err)
301 goto err;
302
303 return 0;
304
305err:
404f2f10 306 stats = this_cpu_ptr(dp->stats_percpu);
ccb1352e 307
df9d9fdf 308 u64_stats_update_begin(&stats->syncp);
ccb1352e 309 stats->n_lost++;
df9d9fdf 310 u64_stats_update_end(&stats->syncp);
ccb1352e
JG
311
312 return err;
313}
314
8055a89c 315static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
e8eedb85 316 const struct sw_flow_key *key,
f2a4d086 317 const struct dp_upcall_info *upcall_info,
cf3266ad 318 uint32_t cutlen)
ccb1352e 319{
2734166e 320 unsigned int gso_type = skb_shinfo(skb)->gso_type;
0c19f846 321 struct sw_flow_key later_key;
ccb1352e
JG
322 struct sk_buff *segs, *nskb;
323 int err;
324
a08e7fd9 325 BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET);
09c5e605 326 segs = __skb_gso_segment(skb, NETIF_F_SG, false);
92e5dfc3
PS
327 if (IS_ERR(segs))
328 return PTR_ERR(segs);
330966e5
FW
329 if (segs == NULL)
330 return -EINVAL;
ccb1352e 331
0c19f846
WB
332 if (gso_type & SKB_GSO_UDP) {
333 /* The initial flow key extracted by ovs_flow_key_extract()
334 * in this case is for a first fragment, so we need to
335 * properly mark later fragments.
336 */
337 later_key = *key;
338 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
339 }
340
ccb1352e 341 /* Queue all of the segments. */
2cec4448 342 skb_list_walk_safe(segs, skb, nskb) {
0c19f846
WB
343 if (gso_type & SKB_GSO_UDP && skb != segs)
344 key = &later_key;
345
f2a4d086 346 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
ccb1352e
JG
347 if (err)
348 break;
349
2cec4448 350 }
ccb1352e
JG
351
352 /* Free all of the segments. */
2cec4448 353 skb_list_walk_safe(segs, skb, nskb) {
ccb1352e
JG
354 if (err)
355 kfree_skb(skb);
356 else
357 consume_skb(skb);
2cec4448 358 }
ccb1352e
JG
359 return err;
360}
361
8f0aad6f 362static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
494bea39 363 unsigned int hdrlen, int actions_attrlen)
c3ff8cfe
TG
364{
365 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
bda56f14 366 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
b95e5928 367 + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
bd1903b7
TZ
368 + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
369 + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
c3ff8cfe
TG
370
371 /* OVS_PACKET_ATTR_USERDATA */
8f0aad6f
WZ
372 if (upcall_info->userdata)
373 size += NLA_ALIGN(upcall_info->userdata->nla_len);
374
375 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
376 if (upcall_info->egress_tun_info)
377 size += nla_total_size(ovs_tun_key_attr_size());
c3ff8cfe 378
ccea7445
NM
379 /* OVS_PACKET_ATTR_ACTIONS */
380 if (upcall_info->actions_len)
494bea39 381 size += nla_total_size(actions_attrlen);
ccea7445 382
7f8a436e
JS
383 /* OVS_PACKET_ATTR_MRU */
384 if (upcall_info->mru)
385 size += nla_total_size(sizeof(upcall_info->mru));
386
c3ff8cfe
TG
387 return size;
388}
389
7f8a436e
JS
390static void pad_packet(struct datapath *dp, struct sk_buff *skb)
391{
392 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
393 size_t plen = NLA_ALIGN(skb->len) - skb->len;
394
395 if (plen > 0)
b080db58 396 skb_put_zero(skb, plen);
7f8a436e
JS
397 }
398}
399
8055a89c 400static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
e8eedb85 401 const struct sw_flow_key *key,
f2a4d086
WT
402 const struct dp_upcall_info *upcall_info,
403 uint32_t cutlen)
ccb1352e
JG
404{
405 struct ovs_header *upcall;
406 struct sk_buff *nskb = NULL;
4ee45ea0 407 struct sk_buff *user_skb = NULL; /* to be queued to userspace */
ccb1352e 408 struct nlattr *nla;
795449d8 409 size_t len;
bda56f14 410 unsigned int hlen;
8055a89c 411 int err, dp_ifindex;
bd1903b7 412 u64 hash;
8055a89c
TG
413
414 dp_ifindex = get_dpifindex(dp);
415 if (!dp_ifindex)
416 return -ENODEV;
ccb1352e 417
df8a39de 418 if (skb_vlan_tag_present(skb)) {
ccb1352e
JG
419 nskb = skb_clone(skb, GFP_ATOMIC);
420 if (!nskb)
421 return -ENOMEM;
422
5968250c 423 nskb = __vlan_hwaccel_push_inside(nskb);
8aa51d64 424 if (!nskb)
ccb1352e
JG
425 return -ENOMEM;
426
ccb1352e
JG
427 skb = nskb;
428 }
429
430 if (nla_attr_size(skb->len) > USHRT_MAX) {
431 err = -EFBIG;
432 goto out;
433 }
434
bda56f14
TG
435 /* Complete checksum if needed */
436 if (skb->ip_summed == CHECKSUM_PARTIAL &&
7529390d 437 (err = skb_csum_hwoffload_help(skb, 0)))
bda56f14
TG
438 goto out;
439
440 /* Older versions of OVS user space enforce alignment of the last
441 * Netlink attribute to NLA_ALIGNTO which would require extensive
442 * padding logic. Only perform zerocopy if padding is not required.
443 */
444 if (dp->user_features & OVS_DP_F_UNALIGNED)
445 hlen = skb_zerocopy_headlen(skb);
446 else
447 hlen = skb->len;
448
494bea39
LZ
449 len = upcall_msg_size(upcall_info, hlen - cutlen,
450 OVS_CB(skb)->acts_origlen);
551ddc05 451 user_skb = genlmsg_new(len, GFP_ATOMIC);
ccb1352e
JG
452 if (!user_skb) {
453 err = -ENOMEM;
454 goto out;
455 }
456
457 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
458 0, upcall_info->cmd);
6f19893b
KL
459 if (!upcall) {
460 err = -EINVAL;
461 goto out;
462 }
ccb1352e
JG
463 upcall->dp_ifindex = dp_ifindex;
464
5b4237bb 465 err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
a734d1f4
EC
466 if (err)
467 goto out;
ccb1352e
JG
468
469 if (upcall_info->userdata)
4490108b
BP
470 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
471 nla_len(upcall_info->userdata),
472 nla_data(upcall_info->userdata));
ccb1352e 473
8f0aad6f 474 if (upcall_info->egress_tun_info) {
ae0be8de
MK
475 nla = nla_nest_start_noflag(user_skb,
476 OVS_PACKET_ATTR_EGRESS_TUN_KEY);
0fff9bd4
KL
477 if (!nla) {
478 err = -EMSGSIZE;
479 goto out;
480 }
fc4099f1
PS
481 err = ovs_nla_put_tunnel_info(user_skb,
482 upcall_info->egress_tun_info);
a734d1f4
EC
483 if (err)
484 goto out;
485
8f0aad6f
WZ
486 nla_nest_end(user_skb, nla);
487 }
488
ccea7445 489 if (upcall_info->actions_len) {
ae0be8de 490 nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
0fff9bd4
KL
491 if (!nla) {
492 err = -EMSGSIZE;
493 goto out;
494 }
ccea7445
NM
495 err = ovs_nla_put_actions(upcall_info->actions,
496 upcall_info->actions_len,
497 user_skb);
498 if (!err)
499 nla_nest_end(user_skb, nla);
500 else
501 nla_nest_cancel(user_skb, nla);
502 }
503
7f8a436e 504 /* Add OVS_PACKET_ATTR_MRU */
61ca533c
TZ
505 if (upcall_info->mru &&
506 nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
507 err = -ENOBUFS;
508 goto out;
7f8a436e
JS
509 }
510
b95e5928 511 /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
61ca533c
TZ
512 if (cutlen > 0 &&
513 nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
514 err = -ENOBUFS;
515 goto out;
b95e5928
WT
516 }
517
bd1903b7
TZ
518 /* Add OVS_PACKET_ATTR_HASH */
519 hash = skb_get_hash_raw(skb);
520 if (skb->sw_hash)
521 hash |= OVS_PACKET_HASH_SW_BIT;
522
523 if (skb->l4_hash)
524 hash |= OVS_PACKET_HASH_L4_BIT;
525
526 if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
527 err = -ENOBUFS;
528 goto out;
529 }
530
bda56f14
TG
531 /* Only reserve room for attribute header, packet data is added
532 * in skb_zerocopy() */
533 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
534 err = -ENOBUFS;
535 goto out;
536 }
f2a4d086 537 nla->nla_len = nla_attr_size(skb->len - cutlen);
ccb1352e 538
f2a4d086 539 err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
36d5fe6a
ZK
540 if (err)
541 goto out;
ccb1352e 542
aea0bb4f 543 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
7f8a436e 544 pad_packet(dp, user_skb);
aea0bb4f 545
bda56f14 546 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
ccb1352e 547
bda56f14 548 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
4ee45ea0 549 user_skb = NULL;
ccb1352e 550out:
36d5fe6a
ZK
551 if (err)
552 skb_tx_error(skb);
4ee45ea0 553 kfree_skb(user_skb);
ccb1352e
JG
554 kfree_skb(nskb);
555 return err;
556}
557
ccb1352e
JG
558static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
559{
560 struct ovs_header *ovs_header = info->userhdr;
7f8a436e 561 struct net *net = sock_net(skb->sk);
ccb1352e
JG
562 struct nlattr **a = info->attrs;
563 struct sw_flow_actions *acts;
564 struct sk_buff *packet;
565 struct sw_flow *flow;
d98612b8 566 struct sw_flow_actions *sf_acts;
ccb1352e 567 struct datapath *dp;
83c8df26 568 struct vport *input_vport;
7f8a436e 569 u16 mru = 0;
bd1903b7 570 u64 hash;
ccb1352e
JG
571 int len;
572 int err;
1ba39804 573 bool log = !a[OVS_PACKET_ATTR_PROBE];
ccb1352e
JG
574
575 err = -EINVAL;
576 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
dded45fc 577 !a[OVS_PACKET_ATTR_ACTIONS])
ccb1352e
JG
578 goto err;
579
580 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
581 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
582 err = -ENOMEM;
583 if (!packet)
584 goto err;
585 skb_reserve(packet, NET_IP_ALIGN);
586
32686a9d 587 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
ccb1352e 588
7f8a436e
JS
589 /* Set packet's mru */
590 if (a[OVS_PACKET_ATTR_MRU]) {
591 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
592 packet->ignore_df = 1;
593 }
594 OVS_CB(packet)->mru = mru;
595
bd1903b7
TZ
596 if (a[OVS_PACKET_ATTR_HASH]) {
597 hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
598
599 __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
600 !!(hash & OVS_PACKET_HASH_SW_BIT),
601 !!(hash & OVS_PACKET_HASH_L4_BIT));
602 }
603
ccb1352e 604 /* Build an sw_flow for sending this packet. */
23dabf88 605 flow = ovs_flow_alloc();
ccb1352e
JG
606 err = PTR_ERR(flow);
607 if (IS_ERR(flow))
608 goto err_kfree_skb;
609
c2ac6673
JS
610 err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
611 packet, &flow->key, log);
ccb1352e
JG
612 if (err)
613 goto err_flow_free;
614
7f8a436e 615 err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
05da5898 616 &flow->key, &acts, log);
74f84a57
PS
617 if (err)
618 goto err_flow_free;
ccb1352e 619
f5796684 620 rcu_assign_pointer(flow->sf_acts, acts);
ccb1352e 621 packet->priority = flow->key.phy.priority;
39c7caeb 622 packet->mark = flow->key.phy.skb_mark;
ccb1352e
JG
623
624 rcu_read_lock();
7f8a436e 625 dp = get_dp_rcu(net, ovs_header->dp_ifindex);
ccb1352e
JG
626 err = -ENODEV;
627 if (!dp)
628 goto err_unlock;
629
83c8df26
PS
630 input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
631 if (!input_vport)
632 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
633
634 if (!input_vport)
635 goto err_unlock;
636
7f8a436e 637 packet->dev = input_vport->dev;
83c8df26 638 OVS_CB(packet)->input_vport = input_vport;
d98612b8 639 sf_acts = rcu_dereference(flow->sf_acts);
83c8df26 640
ccb1352e 641 local_bh_disable();
d98612b8 642 err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
ccb1352e
JG
643 local_bh_enable();
644 rcu_read_unlock();
645
03f0d916 646 ovs_flow_free(flow, false);
ccb1352e
JG
647 return err;
648
649err_unlock:
650 rcu_read_unlock();
651err_flow_free:
03f0d916 652 ovs_flow_free(flow, false);
ccb1352e
JG
653err_kfree_skb:
654 kfree_skb(packet);
655err:
656 return err;
657}
658
659static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
dded45fc 660 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
ccb1352e
JG
661 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
662 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
1ba39804 663 [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
7f8a436e 664 [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
b5ab1f1b 665 [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
ccb1352e
JG
666};
667
66a9b928 668static const struct genl_small_ops dp_packet_genl_ops[] = {
ccb1352e 669 { .cmd = OVS_PACKET_CMD_EXECUTE,
ef6243ac 670 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 671 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
672 .doit = ovs_packet_cmd_execute
673 }
674};
675
56989f6d 676static struct genl_family dp_packet_genl_family __ro_after_init = {
0c200ef9
PS
677 .hdrsize = sizeof(struct ovs_header),
678 .name = OVS_PACKET_FAMILY,
679 .version = OVS_PACKET_VERSION,
680 .maxattr = OVS_PACKET_ATTR_MAX,
3b0f31f2 681 .policy = packet_policy,
0c200ef9
PS
682 .netnsok = true,
683 .parallel_ops = true,
66a9b928
JK
684 .small_ops = dp_packet_genl_ops,
685 .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
489111e5 686 .module = THIS_MODULE,
0c200ef9
PS
687};
688
12eb18f7 689static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
1bd7116f 690 struct ovs_dp_megaflow_stats *mega_stats)
ccb1352e
JG
691{
692 int i;
ccb1352e 693
1bd7116f
AZ
694 memset(mega_stats, 0, sizeof(*mega_stats));
695
b637e498 696 stats->n_flows = ovs_flow_tbl_count(&dp->table);
1bd7116f 697 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
ccb1352e
JG
698
699 stats->n_hit = stats->n_missed = stats->n_lost = 0;
1bd7116f 700
ccb1352e
JG
701 for_each_possible_cpu(i) {
702 const struct dp_stats_percpu *percpu_stats;
703 struct dp_stats_percpu local_stats;
704 unsigned int start;
705
706 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
707
708 do {
57a7744e 709 start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
ccb1352e 710 local_stats = *percpu_stats;
57a7744e 711 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
ccb1352e
JG
712
713 stats->n_hit += local_stats.n_hit;
714 stats->n_missed += local_stats.n_missed;
715 stats->n_lost += local_stats.n_lost;
1bd7116f 716 mega_stats->n_mask_hit += local_stats.n_mask_hit;
9d2f627b 717 mega_stats->n_cache_hit += local_stats.n_cache_hit;
ccb1352e
JG
718 }
719}
720
74ed7ab9
JS
721static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
722{
723 return ovs_identifier_is_ufid(sfid) &&
724 !(ufid_flags & OVS_UFID_F_OMIT_KEY);
725}
726
727static bool should_fill_mask(uint32_t ufid_flags)
728{
729 return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
730}
731
732static bool should_fill_actions(uint32_t ufid_flags)
c3ff8cfe 733{
74ed7ab9
JS
734 return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
735}
736
737static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
738 const struct sw_flow_id *sfid,
739 uint32_t ufid_flags)
740{
741 size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
742
4e81c0b3
PA
743 /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
744 * see ovs_nla_put_identifier()
745 */
74ed7ab9
JS
746 if (sfid && ovs_identifier_is_ufid(sfid))
747 len += nla_total_size(sfid->ufid_len);
4e81c0b3
PA
748 else
749 len += nla_total_size(ovs_key_attr_size());
74ed7ab9
JS
750
751 /* OVS_FLOW_ATTR_KEY */
752 if (!sfid || should_fill_key(sfid, ufid_flags))
753 len += nla_total_size(ovs_key_attr_size());
754
755 /* OVS_FLOW_ATTR_MASK */
756 if (should_fill_mask(ufid_flags))
757 len += nla_total_size(ovs_key_attr_size());
758
759 /* OVS_FLOW_ATTR_ACTIONS */
760 if (should_fill_actions(ufid_flags))
8e2fed1c 761 len += nla_total_size(acts->orig_len);
74ed7ab9
JS
762
763 return len
66c7a5ee 764 + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
c3ff8cfe 765 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
66c7a5ee 766 + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
c3ff8cfe
TG
767}
768
ca7105f2
JS
769/* Called with ovs_mutex or RCU read lock. */
770static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
771 struct sk_buff *skb)
772{
773 struct ovs_flow_stats stats;
774 __be16 tcp_flags;
775 unsigned long used;
ccb1352e 776
e298e505 777 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
0e9796b4 778
028d6a67 779 if (used &&
0238b720
ND
780 nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
781 OVS_FLOW_ATTR_PAD))
ca7105f2 782 return -EMSGSIZE;
ccb1352e 783
028d6a67 784 if (stats.n_packets &&
66c7a5ee
ND
785 nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
786 sizeof(struct ovs_flow_stats), &stats,
787 OVS_FLOW_ATTR_PAD))
ca7105f2 788 return -EMSGSIZE;
ccb1352e 789
e298e505
PS
790 if ((u8)ntohs(tcp_flags) &&
791 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
ca7105f2
JS
792 return -EMSGSIZE;
793
794 return 0;
795}
796
797/* Called with ovs_mutex or RCU read lock. */
798static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
799 struct sk_buff *skb, int skb_orig_len)
800{
801 struct nlattr *start;
802 int err;
ccb1352e
JG
803
804 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
805 * this is the first flow to be dumped into 'skb'. This is unusual for
806 * Netlink but individual action lists can be longer than
807 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
808 * The userspace caller can always fetch the actions separately if it
809 * really wants them. (Most userspace callers in fact don't care.)
810 *
811 * This can only fail for dump operations because the skb is always
812 * properly sized for single flows.
813 */
ae0be8de 814 start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
74f84a57 815 if (start) {
d57170b1
PS
816 const struct sw_flow_actions *sf_acts;
817
663efa36 818 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
e6445719
PS
819 err = ovs_nla_put_actions(sf_acts->actions,
820 sf_acts->actions_len, skb);
0e9796b4 821
74f84a57
PS
822 if (!err)
823 nla_nest_end(skb, start);
824 else {
825 if (skb_orig_len)
ca7105f2 826 return err;
74f84a57
PS
827
828 nla_nest_cancel(skb, start);
829 }
ca7105f2
JS
830 } else if (skb_orig_len) {
831 return -EMSGSIZE;
832 }
833
834 return 0;
835}
836
837/* Called with ovs_mutex or RCU read lock. */
838static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
839 struct sk_buff *skb, u32 portid,
74ed7ab9 840 u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
ca7105f2
JS
841{
842 const int skb_orig_len = skb->len;
843 struct ovs_header *ovs_header;
844 int err;
845
846 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
847 flags, cmd);
848 if (!ovs_header)
849 return -EMSGSIZE;
850
851 ovs_header->dp_ifindex = dp_ifindex;
852
74ed7ab9 853 err = ovs_nla_put_identifier(flow, skb);
5b4237bb
JS
854 if (err)
855 goto error;
856
74ed7ab9
JS
857 if (should_fill_key(&flow->id, ufid_flags)) {
858 err = ovs_nla_put_masked_key(flow, skb);
859 if (err)
860 goto error;
861 }
862
863 if (should_fill_mask(ufid_flags)) {
864 err = ovs_nla_put_mask(flow, skb);
865 if (err)
866 goto error;
867 }
ca7105f2
JS
868
869 err = ovs_flow_cmd_fill_stats(flow, skb);
870 if (err)
871 goto error;
872
74ed7ab9
JS
873 if (should_fill_actions(ufid_flags)) {
874 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
875 if (err)
876 goto error;
877 }
ccb1352e 878
053c095a
JB
879 genlmsg_end(skb, ovs_header);
880 return 0;
ccb1352e 881
ccb1352e
JG
882error:
883 genlmsg_cancel(skb, ovs_header);
884 return err;
885}
886
0e9796b4
JR
887/* May not be called with RCU read lock. */
888static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
74ed7ab9 889 const struct sw_flow_id *sfid,
fb5d1e9e 890 struct genl_info *info,
74ed7ab9
JS
891 bool always,
892 uint32_t ufid_flags)
ccb1352e 893{
fb5d1e9e 894 struct sk_buff *skb;
74ed7ab9 895 size_t len;
ccb1352e 896
9b67aa4a 897 if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
fb5d1e9e
JR
898 return NULL;
899
74ed7ab9 900 len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
551ddc05 901 skb = genlmsg_new(len, GFP_KERNEL);
fb5d1e9e
JR
902 if (!skb)
903 return ERR_PTR(-ENOMEM);
904
905 return skb;
ccb1352e
JG
906}
907
0e9796b4
JR
908/* Called with ovs_mutex. */
909static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
910 int dp_ifindex,
911 struct genl_info *info, u8 cmd,
74ed7ab9 912 bool always, u32 ufid_flags)
ccb1352e
JG
913{
914 struct sk_buff *skb;
915 int retval;
916
74ed7ab9
JS
917 skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
918 &flow->id, info, always, ufid_flags);
d0e992aa 919 if (IS_ERR_OR_NULL(skb))
fb5d1e9e 920 return skb;
ccb1352e 921
0e9796b4
JR
922 retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
923 info->snd_portid, info->snd_seq, 0,
74ed7ab9 924 cmd, ufid_flags);
8ffeb03f
PA
925 if (WARN_ON_ONCE(retval < 0)) {
926 kfree_skb(skb);
927 skb = ERR_PTR(retval);
928 }
ccb1352e
JG
929 return skb;
930}
931
37bdc87b 932static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
ccb1352e 933{
7f8a436e 934 struct net *net = sock_net(skb->sk);
ccb1352e
JG
935 struct nlattr **a = info->attrs;
936 struct ovs_header *ovs_header = info->userhdr;
74ed7ab9 937 struct sw_flow *flow = NULL, *new_flow;
03f0d916 938 struct sw_flow_mask mask;
ccb1352e
JG
939 struct sk_buff *reply;
940 struct datapath *dp;
37bdc87b 941 struct sw_flow_actions *acts;
03f0d916 942 struct sw_flow_match match;
74ed7ab9 943 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
ccb1352e 944 int error;
05da5898 945 bool log = !a[OVS_FLOW_ATTR_PROBE];
ccb1352e 946
893f139b 947 /* Must have key and actions. */
ccb1352e 948 error = -EINVAL;
426cda5c 949 if (!a[OVS_FLOW_ATTR_KEY]) {
05da5898 950 OVS_NLERR(log, "Flow key attr not present in new flow.");
ccb1352e 951 goto error;
426cda5c
JG
952 }
953 if (!a[OVS_FLOW_ATTR_ACTIONS]) {
05da5898 954 OVS_NLERR(log, "Flow actions attr not present in new flow.");
893f139b 955 goto error;
426cda5c 956 }
03f0d916 957
893f139b
JR
958 /* Most of the time we need to allocate a new flow, do it before
959 * locking.
960 */
961 new_flow = ovs_flow_alloc();
962 if (IS_ERR(new_flow)) {
963 error = PTR_ERR(new_flow);
964 goto error;
965 }
966
967 /* Extract key. */
2279994d 968 ovs_match_init(&match, &new_flow->key, false, &mask);
c2ac6673 969 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
05da5898 970 a[OVS_FLOW_ATTR_MASK], log);
ccb1352e 971 if (error)
893f139b 972 goto err_kfree_flow;
ccb1352e 973
74ed7ab9
JS
974 /* Extract flow identifier. */
975 error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
190aa3e7 976 &new_flow->key, log);
74ed7ab9
JS
977 if (error)
978 goto err_kfree_flow;
74f84a57 979
190aa3e7 980 /* unmasked key is needed to match when ufid is not used. */
981 if (ovs_identifier_is_key(&new_flow->id))
982 match.key = new_flow->id.unmasked_key;
983
984 ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
985
893f139b 986 /* Validate actions. */
7f8a436e
JS
987 error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
988 &new_flow->key, &acts, log);
37bdc87b 989 if (error) {
05da5898 990 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
2fdb957d 991 goto err_kfree_flow;
893f139b
JR
992 }
993
74ed7ab9
JS
994 reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
995 ufid_flags);
893f139b
JR
996 if (IS_ERR(reply)) {
997 error = PTR_ERR(reply);
998 goto err_kfree_acts;
ccb1352e
JG
999 }
1000
8e4e1713 1001 ovs_lock();
7f8a436e 1002 dp = get_dp(net, ovs_header->dp_ifindex);
893f139b
JR
1003 if (unlikely(!dp)) {
1004 error = -ENODEV;
8e4e1713 1005 goto err_unlock_ovs;
893f139b 1006 }
74ed7ab9 1007
03f0d916 1008 /* Check if this is a duplicate flow */
74ed7ab9
JS
1009 if (ovs_identifier_is_ufid(&new_flow->id))
1010 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
1011 if (!flow)
190aa3e7 1012 flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
893f139b
JR
1013 if (likely(!flow)) {
1014 rcu_assign_pointer(new_flow->sf_acts, acts);
ccb1352e
JG
1015
1016 /* Put flow in bucket. */
893f139b
JR
1017 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
1018 if (unlikely(error)) {
618ed0c8 1019 acts = NULL;
893f139b
JR
1020 goto err_unlock_ovs;
1021 }
1022
1023 if (unlikely(reply)) {
1024 error = ovs_flow_cmd_fill_info(new_flow,
1025 ovs_header->dp_ifindex,
1026 reply, info->snd_portid,
1027 info->snd_seq, 0,
74ed7ab9
JS
1028 OVS_FLOW_CMD_NEW,
1029 ufid_flags);
893f139b 1030 BUG_ON(error < 0);
618ed0c8 1031 }
893f139b 1032 ovs_unlock();
ccb1352e 1033 } else {
37bdc87b
JR
1034 struct sw_flow_actions *old_acts;
1035
ccb1352e
JG
1036 /* Bail out if we're not allowed to modify an existing flow.
1037 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1038 * because Generic Netlink treats the latter as a dump
1039 * request. We also accept NLM_F_EXCL in case that bug ever
1040 * gets fixed.
1041 */
893f139b
JR
1042 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1043 | NLM_F_EXCL))) {
1044 error = -EEXIST;
8e4e1713 1045 goto err_unlock_ovs;
893f139b 1046 }
74ed7ab9
JS
1047 /* The flow identifier has to be the same for flow updates.
1048 * Look for any overlapping flow.
1049 */
1050 if (unlikely(!ovs_flow_cmp(flow, &match))) {
1051 if (ovs_identifier_is_key(&flow->id))
1052 flow = ovs_flow_tbl_lookup_exact(&dp->table,
1053 &match);
1054 else /* UFID matches but key is different */
1055 flow = NULL;
4a46b24e
AW
1056 if (!flow) {
1057 error = -ENOENT;
1058 goto err_unlock_ovs;
1059 }
893f139b 1060 }
37bdc87b
JR
1061 /* Update actions. */
1062 old_acts = ovsl_dereference(flow->sf_acts);
1063 rcu_assign_pointer(flow->sf_acts, acts);
37bdc87b 1064
893f139b
JR
1065 if (unlikely(reply)) {
1066 error = ovs_flow_cmd_fill_info(flow,
1067 ovs_header->dp_ifindex,
1068 reply, info->snd_portid,
1069 info->snd_seq, 0,
74ed7ab9
JS
1070 OVS_FLOW_CMD_NEW,
1071 ufid_flags);
893f139b
JR
1072 BUG_ON(error < 0);
1073 }
1074 ovs_unlock();
37bdc87b 1075
34ae932a 1076 ovs_nla_free_flow_actions_rcu(old_acts);
893f139b 1077 ovs_flow_free(new_flow, false);
37bdc87b 1078 }
893f139b
JR
1079
1080 if (reply)
1081 ovs_notify(&dp_flow_genl_family, reply, info);
37bdc87b
JR
1082 return 0;
1083
37bdc87b
JR
1084err_unlock_ovs:
1085 ovs_unlock();
893f139b
JR
1086 kfree_skb(reply);
1087err_kfree_acts:
34ae932a 1088 ovs_nla_free_flow_actions(acts);
893f139b
JR
1089err_kfree_flow:
1090 ovs_flow_free(new_flow, false);
37bdc87b
JR
1091error:
1092 return error;
1093}
ccb1352e 1094
2fdb957d 1095/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
cf3266ad
TZ
1096static noinline_for_stack
1097struct sw_flow_actions *get_flow_actions(struct net *net,
1098 const struct nlattr *a,
1099 const struct sw_flow_key *key,
1100 const struct sw_flow_mask *mask,
1101 bool log)
6b205b2c
JG
1102{
1103 struct sw_flow_actions *acts;
1104 struct sw_flow_key masked_key;
1105 int error;
1106
ae5f2fb1 1107 ovs_flow_mask_key(&masked_key, key, true, mask);
7f8a436e 1108 error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
6b205b2c 1109 if (error) {
05da5898
JR
1110 OVS_NLERR(log,
1111 "Actions may not be safe on all matching packets");
6b205b2c
JG
1112 return ERR_PTR(error);
1113 }
1114
1115 return acts;
1116}
1117
9cc9a5cb
TZ
1118/* Factor out match-init and action-copy to avoid
1119 * "Wframe-larger-than=1024" warning. Because mask is only
1120 * used to get actions, we new a function to save some
1121 * stack space.
1122 *
1123 * If there are not key and action attrs, we return 0
1124 * directly. In the case, the caller will also not use the
1125 * match as before. If there is action attr, we try to get
1126 * actions and save them to *acts. Before returning from
1127 * the function, we reset the match->mask pointer. Because
1128 * we should not to return match object with dangling reference
1129 * to mask.
1130 * */
26063790
AB
1131static noinline_for_stack int
1132ovs_nla_init_match_and_action(struct net *net,
1133 struct sw_flow_match *match,
1134 struct sw_flow_key *key,
1135 struct nlattr **a,
1136 struct sw_flow_actions **acts,
1137 bool log)
9cc9a5cb
TZ
1138{
1139 struct sw_flow_mask mask;
1140 int error = 0;
1141
1142 if (a[OVS_FLOW_ATTR_KEY]) {
1143 ovs_match_init(match, key, true, &mask);
1144 error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
1145 a[OVS_FLOW_ATTR_MASK], log);
1146 if (error)
1147 goto error;
1148 }
1149
1150 if (a[OVS_FLOW_ATTR_ACTIONS]) {
1151 if (!a[OVS_FLOW_ATTR_KEY]) {
1152 OVS_NLERR(log,
1153 "Flow key attribute not present in set flow.");
5829e62a
CJ
1154 error = -EINVAL;
1155 goto error;
9cc9a5cb
TZ
1156 }
1157
1158 *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
1159 &mask, log);
1160 if (IS_ERR(*acts)) {
1161 error = PTR_ERR(*acts);
1162 goto error;
1163 }
1164 }
1165
1166 /* On success, error is 0. */
1167error:
1168 match->mask = NULL;
1169 return error;
1170}
1171
37bdc87b
JR
1172static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1173{
7f8a436e 1174 struct net *net = sock_net(skb->sk);
37bdc87b
JR
1175 struct nlattr **a = info->attrs;
1176 struct ovs_header *ovs_header = info->userhdr;
6b205b2c 1177 struct sw_flow_key key;
37bdc87b 1178 struct sw_flow *flow;
37bdc87b
JR
1179 struct sk_buff *reply = NULL;
1180 struct datapath *dp;
893f139b 1181 struct sw_flow_actions *old_acts = NULL, *acts = NULL;
37bdc87b 1182 struct sw_flow_match match;
74ed7ab9
JS
1183 struct sw_flow_id sfid;
1184 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
6f15cdbf 1185 int error = 0;
05da5898 1186 bool log = !a[OVS_FLOW_ATTR_PROBE];
74ed7ab9 1187 bool ufid_present;
37bdc87b 1188
74ed7ab9 1189 ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
9cc9a5cb 1190 if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
6f15cdbf
SG
1191 OVS_NLERR(log,
1192 "Flow set message rejected, Key attribute missing.");
9cc9a5cb 1193 return -EINVAL;
6f15cdbf 1194 }
9cc9a5cb
TZ
1195
1196 error = ovs_nla_init_match_and_action(net, &match, &key, a,
1197 &acts, log);
37bdc87b
JR
1198 if (error)
1199 goto error;
1200
9cc9a5cb 1201 if (acts) {
2fdb957d 1202 /* Can allocate before locking if have acts. */
74ed7ab9
JS
1203 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1204 ufid_flags);
893f139b
JR
1205 if (IS_ERR(reply)) {
1206 error = PTR_ERR(reply);
1207 goto err_kfree_acts;
be52c9e9 1208 }
37bdc87b 1209 }
0e9796b4 1210
37bdc87b 1211 ovs_lock();
7f8a436e 1212 dp = get_dp(net, ovs_header->dp_ifindex);
893f139b
JR
1213 if (unlikely(!dp)) {
1214 error = -ENODEV;
37bdc87b 1215 goto err_unlock_ovs;
893f139b 1216 }
37bdc87b 1217 /* Check that the flow exists. */
74ed7ab9
JS
1218 if (ufid_present)
1219 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1220 else
1221 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
893f139b
JR
1222 if (unlikely(!flow)) {
1223 error = -ENOENT;
37bdc87b 1224 goto err_unlock_ovs;
893f139b 1225 }
4a46b24e 1226
37bdc87b 1227 /* Update actions, if present. */
893f139b 1228 if (likely(acts)) {
37bdc87b
JR
1229 old_acts = ovsl_dereference(flow->sf_acts);
1230 rcu_assign_pointer(flow->sf_acts, acts);
893f139b
JR
1231
1232 if (unlikely(reply)) {
1233 error = ovs_flow_cmd_fill_info(flow,
1234 ovs_header->dp_ifindex,
1235 reply, info->snd_portid,
1236 info->snd_seq, 0,
804fe108 1237 OVS_FLOW_CMD_SET,
74ed7ab9 1238 ufid_flags);
893f139b
JR
1239 BUG_ON(error < 0);
1240 }
1241 } else {
1242 /* Could not alloc without acts before locking. */
1243 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
804fe108 1244 info, OVS_FLOW_CMD_SET, false,
74ed7ab9
JS
1245 ufid_flags);
1246
b5ffe634 1247 if (IS_ERR(reply)) {
893f139b
JR
1248 error = PTR_ERR(reply);
1249 goto err_unlock_ovs;
1250 }
ccb1352e 1251 }
37bdc87b 1252
37bdc87b
JR
1253 /* Clear stats. */
1254 if (a[OVS_FLOW_ATTR_CLEAR])
1255 ovs_flow_stats_clear(flow);
8e4e1713 1256 ovs_unlock();
ccb1352e 1257
893f139b
JR
1258 if (reply)
1259 ovs_notify(&dp_flow_genl_family, reply, info);
1260 if (old_acts)
34ae932a 1261 ovs_nla_free_flow_actions_rcu(old_acts);
fb5d1e9e 1262
ccb1352e
JG
1263 return 0;
1264
8e4e1713
PS
1265err_unlock_ovs:
1266 ovs_unlock();
893f139b
JR
1267 kfree_skb(reply);
1268err_kfree_acts:
34ae932a 1269 ovs_nla_free_flow_actions(acts);
ccb1352e
JG
1270error:
1271 return error;
1272}
1273
1274static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1275{
1276 struct nlattr **a = info->attrs;
1277 struct ovs_header *ovs_header = info->userhdr;
c2ac6673 1278 struct net *net = sock_net(skb->sk);
ccb1352e
JG
1279 struct sw_flow_key key;
1280 struct sk_buff *reply;
1281 struct sw_flow *flow;
1282 struct datapath *dp;
03f0d916 1283 struct sw_flow_match match;
74ed7ab9
JS
1284 struct sw_flow_id ufid;
1285 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1286 int err = 0;
05da5898 1287 bool log = !a[OVS_FLOW_ATTR_PROBE];
74ed7ab9 1288 bool ufid_present;
ccb1352e 1289
74ed7ab9
JS
1290 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1291 if (a[OVS_FLOW_ATTR_KEY]) {
2279994d 1292 ovs_match_init(&match, &key, true, NULL);
c2ac6673 1293 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
74ed7ab9
JS
1294 log);
1295 } else if (!ufid_present) {
05da5898
JR
1296 OVS_NLERR(log,
1297 "Flow get message rejected, Key attribute missing.");
74ed7ab9 1298 err = -EINVAL;
03f0d916 1299 }
ccb1352e
JG
1300 if (err)
1301 return err;
1302
8e4e1713 1303 ovs_lock();
46df7b81 1304 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
8e4e1713
PS
1305 if (!dp) {
1306 err = -ENODEV;
1307 goto unlock;
1308 }
ccb1352e 1309
74ed7ab9
JS
1310 if (ufid_present)
1311 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1312 else
1313 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
4a46b24e 1314 if (!flow) {
8e4e1713
PS
1315 err = -ENOENT;
1316 goto unlock;
1317 }
ccb1352e 1318
0e9796b4 1319 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
804fe108 1320 OVS_FLOW_CMD_GET, true, ufid_flags);
8e4e1713
PS
1321 if (IS_ERR(reply)) {
1322 err = PTR_ERR(reply);
1323 goto unlock;
1324 }
ccb1352e 1325
8e4e1713 1326 ovs_unlock();
ccb1352e 1327 return genlmsg_reply(reply, info);
8e4e1713
PS
1328unlock:
1329 ovs_unlock();
1330 return err;
ccb1352e
JG
1331}
1332
1333static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1334{
1335 struct nlattr **a = info->attrs;
1336 struct ovs_header *ovs_header = info->userhdr;
c2ac6673 1337 struct net *net = sock_net(skb->sk);
ccb1352e
JG
1338 struct sw_flow_key key;
1339 struct sk_buff *reply;
74ed7ab9 1340 struct sw_flow *flow = NULL;
ccb1352e 1341 struct datapath *dp;
03f0d916 1342 struct sw_flow_match match;
74ed7ab9
JS
1343 struct sw_flow_id ufid;
1344 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
ccb1352e 1345 int err;
05da5898 1346 bool log = !a[OVS_FLOW_ATTR_PROBE];
74ed7ab9 1347 bool ufid_present;
ccb1352e 1348
74ed7ab9
JS
1349 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1350 if (a[OVS_FLOW_ATTR_KEY]) {
2279994d 1351 ovs_match_init(&match, &key, true, NULL);
c2ac6673
JS
1352 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1353 NULL, log);
aed06778
JR
1354 if (unlikely(err))
1355 return err;
1356 }
1357
8e4e1713 1358 ovs_lock();
46df7b81 1359 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
aed06778 1360 if (unlikely(!dp)) {
8e4e1713
PS
1361 err = -ENODEV;
1362 goto unlock;
1363 }
46df7b81 1364
74ed7ab9 1365 if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
b637e498 1366 err = ovs_flow_tbl_flush(&dp->table);
8e4e1713
PS
1367 goto unlock;
1368 }
03f0d916 1369
74ed7ab9
JS
1370 if (ufid_present)
1371 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1372 else
1373 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
4a46b24e 1374 if (unlikely(!flow)) {
8e4e1713
PS
1375 err = -ENOENT;
1376 goto unlock;
1377 }
ccb1352e 1378
b637e498 1379 ovs_flow_tbl_remove(&dp->table, flow);
aed06778 1380 ovs_unlock();
ccb1352e 1381
aed06778 1382 reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
74ed7ab9 1383 &flow->id, info, false, ufid_flags);
aed06778 1384 if (likely(reply)) {
b90f5aa4 1385 if (!IS_ERR(reply)) {
aed06778
JR
1386 rcu_read_lock(); /*To keep RCU checker happy. */
1387 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1388 reply, info->snd_portid,
1389 info->snd_seq, 0,
74ed7ab9
JS
1390 OVS_FLOW_CMD_DEL,
1391 ufid_flags);
aed06778 1392 rcu_read_unlock();
8a574f86
PA
1393 if (WARN_ON_ONCE(err < 0)) {
1394 kfree_skb(reply);
1395 goto out_free;
1396 }
aed06778
JR
1397
1398 ovs_notify(&dp_flow_genl_family, reply, info);
1399 } else {
cf3266ad
TZ
1400 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
1401 PTR_ERR(reply));
aed06778 1402 }
fb5d1e9e 1403 }
ccb1352e 1404
8a574f86 1405out_free:
aed06778 1406 ovs_flow_free(flow, true);
ccb1352e 1407 return 0;
8e4e1713
PS
1408unlock:
1409 ovs_unlock();
1410 return err;
ccb1352e
JG
1411}
1412
1413static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1414{
74ed7ab9 1415 struct nlattr *a[__OVS_FLOW_ATTR_MAX];
ccb1352e 1416 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
b637e498 1417 struct table_instance *ti;
ccb1352e 1418 struct datapath *dp;
74ed7ab9
JS
1419 u32 ufid_flags;
1420 int err;
1421
8cb08174
JB
1422 err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
1423 OVS_FLOW_ATTR_MAX, flow_policy, NULL);
74ed7ab9
JS
1424 if (err)
1425 return err;
1426 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
ccb1352e 1427
d57170b1 1428 rcu_read_lock();
cc3a5ae6 1429 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
8e4e1713 1430 if (!dp) {
d57170b1 1431 rcu_read_unlock();
ccb1352e 1432 return -ENODEV;
8e4e1713 1433 }
ccb1352e 1434
b637e498 1435 ti = rcu_dereference(dp->table.ti);
ccb1352e
JG
1436 for (;;) {
1437 struct sw_flow *flow;
1438 u32 bucket, obj;
1439
1440 bucket = cb->args[0];
1441 obj = cb->args[1];
b637e498 1442 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
ccb1352e
JG
1443 if (!flow)
1444 break;
1445
0e9796b4 1446 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
15e47304 1447 NETLINK_CB(cb->skb).portid,
ccb1352e 1448 cb->nlh->nlmsg_seq, NLM_F_MULTI,
804fe108 1449 OVS_FLOW_CMD_GET, ufid_flags) < 0)
ccb1352e
JG
1450 break;
1451
1452 cb->args[0] = bucket;
1453 cb->args[1] = obj;
1454 }
d57170b1 1455 rcu_read_unlock();
ccb1352e
JG
1456 return skb->len;
1457}
1458
0c200ef9
PS
1459static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1460 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
05da5898 1461 [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
0c200ef9
PS
1462 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1463 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
05da5898 1464 [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
74ed7ab9
JS
1465 [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1466 [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
0c200ef9
PS
1467};
1468
66a9b928 1469static const struct genl_small_ops dp_flow_genl_ops[] = {
ccb1352e 1470 { .cmd = OVS_FLOW_CMD_NEW,
ef6243ac 1471 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 1472 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
37bdc87b 1473 .doit = ovs_flow_cmd_new
ccb1352e
JG
1474 },
1475 { .cmd = OVS_FLOW_CMD_DEL,
ef6243ac 1476 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 1477 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
1478 .doit = ovs_flow_cmd_del
1479 },
1480 { .cmd = OVS_FLOW_CMD_GET,
ef6243ac 1481 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
ccb1352e 1482 .flags = 0, /* OK for unprivileged users. */
ccb1352e
JG
1483 .doit = ovs_flow_cmd_get,
1484 .dumpit = ovs_flow_cmd_dump
1485 },
1486 { .cmd = OVS_FLOW_CMD_SET,
ef6243ac 1487 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 1488 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
37bdc87b 1489 .doit = ovs_flow_cmd_set,
ccb1352e
JG
1490 },
1491};
1492
56989f6d 1493static struct genl_family dp_flow_genl_family __ro_after_init = {
ccb1352e 1494 .hdrsize = sizeof(struct ovs_header),
0c200ef9
PS
1495 .name = OVS_FLOW_FAMILY,
1496 .version = OVS_FLOW_VERSION,
1497 .maxattr = OVS_FLOW_ATTR_MAX,
3b0f31f2 1498 .policy = flow_policy,
3a4e0d6a
PS
1499 .netnsok = true,
1500 .parallel_ops = true,
66a9b928
JK
1501 .small_ops = dp_flow_genl_ops,
1502 .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
0c200ef9
PS
1503 .mcgrps = &ovs_dp_flow_multicast_group,
1504 .n_mcgrps = 1,
489111e5 1505 .module = THIS_MODULE,
ccb1352e
JG
1506};
1507
c3ff8cfe
TG
1508static size_t ovs_dp_cmd_msg_size(void)
1509{
1510 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1511
1512 msgsize += nla_total_size(IFNAMSIZ);
66c7a5ee
ND
1513 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1514 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
45fb9c35 1515 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
9bf24f59 1516 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
c3ff8cfe
TG
1517
1518 return msgsize;
1519}
1520
8ec609d8 1521/* Called with ovs_mutex. */
ccb1352e 1522static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
15e47304 1523 u32 portid, u32 seq, u32 flags, u8 cmd)
ccb1352e
JG
1524{
1525 struct ovs_header *ovs_header;
1526 struct ovs_dp_stats dp_stats;
1bd7116f 1527 struct ovs_dp_megaflow_stats dp_megaflow_stats;
ccb1352e
JG
1528 int err;
1529
15e47304 1530 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
cf3266ad 1531 flags, cmd);
ccb1352e
JG
1532 if (!ovs_header)
1533 goto error;
1534
1535 ovs_header->dp_ifindex = get_dpifindex(dp);
1536
ccb1352e 1537 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
ccb1352e
JG
1538 if (err)
1539 goto nla_put_failure;
1540
1bd7116f 1541 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
66c7a5ee
ND
1542 if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1543 &dp_stats, OVS_DP_ATTR_PAD))
1bd7116f
AZ
1544 goto nla_put_failure;
1545
66c7a5ee
ND
1546 if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1547 sizeof(struct ovs_dp_megaflow_stats),
1548 &dp_megaflow_stats, OVS_DP_ATTR_PAD))
028d6a67 1549 goto nla_put_failure;
ccb1352e 1550
43d4be9c
TG
1551 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1552 goto nla_put_failure;
1553
9bf24f59
EC
1554 if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
1555 ovs_flow_tbl_masks_cache_size(&dp->table)))
1556 goto nla_put_failure;
1557
053c095a
JB
1558 genlmsg_end(skb, ovs_header);
1559 return 0;
ccb1352e
JG
1560
1561nla_put_failure:
1562 genlmsg_cancel(skb, ovs_header);
1563error:
1564 return -EMSGSIZE;
1565}
1566
263ea090 1567static struct sk_buff *ovs_dp_cmd_alloc_info(void)
ccb1352e 1568{
551ddc05 1569 return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
ccb1352e
JG
1570}
1571
bb6f9a70 1572/* Called with rcu_read_lock or ovs_mutex. */
46df7b81 1573static struct datapath *lookup_datapath(struct net *net,
12eb18f7 1574 const struct ovs_header *ovs_header,
ccb1352e
JG
1575 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1576{
1577 struct datapath *dp;
1578
1579 if (!a[OVS_DP_ATTR_NAME])
46df7b81 1580 dp = get_dp(net, ovs_header->dp_ifindex);
ccb1352e
JG
1581 else {
1582 struct vport *vport;
1583
46df7b81 1584 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
ccb1352e 1585 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
ccb1352e
JG
1586 }
1587 return dp ? dp : ERR_PTR(-ENODEV);
1588}
1589
cf3266ad
TZ
1590static void ovs_dp_reset_user_features(struct sk_buff *skb,
1591 struct genl_info *info)
44da5ae5
TG
1592{
1593 struct datapath *dp;
1594
cf3266ad
TZ
1595 dp = lookup_datapath(sock_net(skb->sk), info->userhdr,
1596 info->attrs);
3c7eacfc 1597 if (IS_ERR(dp))
44da5ae5
TG
1598 return;
1599
1600 WARN(dp->user_features, "Dropping previously announced user features\n");
1601 dp->user_features = 0;
1602}
1603
95a7233c
PB
1604DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
1605
b83d23a2
MG
1606static int ovs_dp_set_upcall_portids(struct datapath *dp,
1607 const struct nlattr *ids)
1608{
1609 struct dp_nlsk_pids *old, *dp_nlsk_pids;
1610
1611 if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
1612 return -EINVAL;
1613
1614 old = ovsl_dereference(dp->upcall_portids);
1615
1616 dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids),
1617 GFP_KERNEL);
1618 if (!dp_nlsk_pids)
1619 return -ENOMEM;
1620
1621 dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32);
1622 nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids));
1623
1624 rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids);
1625
1626 kfree_rcu(old, rcu);
1627
1628 return 0;
1629}
1630
1631u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
1632{
1633 struct dp_nlsk_pids *dp_nlsk_pids;
1634
1635 dp_nlsk_pids = rcu_dereference(dp->upcall_portids);
1636
1637 if (dp_nlsk_pids) {
1638 if (cpu_id < dp_nlsk_pids->n_pids) {
1639 return dp_nlsk_pids->pids[cpu_id];
784dcfa5
MG
1640 } else if (dp_nlsk_pids->n_pids > 0 &&
1641 cpu_id >= dp_nlsk_pids->n_pids) {
1642 /* If the number of netlink PIDs is mismatched with
1643 * the number of CPUs as seen by the kernel, log this
1644 * and send the upcall to an arbitrary socket (0) in
1645 * order to not drop packets
b83d23a2
MG
1646 */
1647 pr_info_ratelimited("cpu_id mismatch with handler threads");
784dcfa5
MG
1648 return dp_nlsk_pids->pids[cpu_id %
1649 dp_nlsk_pids->n_pids];
b83d23a2
MG
1650 } else {
1651 return 0;
1652 }
1653 } else {
1654 return 0;
1655 }
1656}
1657
95a7233c 1658static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
43d4be9c 1659{
95a7233c 1660 u32 user_features = 0;
b83d23a2 1661 int err;
95a7233c
PB
1662
1663 if (a[OVS_DP_ATTR_USER_FEATURES]) {
1664 user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1665
1666 if (user_features & ~(OVS_DP_F_VPORT_PIDS |
1667 OVS_DP_F_UNALIGNED |
b83d23a2
MG
1668 OVS_DP_F_TC_RECIRC_SHARING |
1669 OVS_DP_F_DISPATCH_UPCALL_PER_CPU))
95a7233c
PB
1670 return -EOPNOTSUPP;
1671
1672#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1673 if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
1674 return -EOPNOTSUPP;
1675#endif
1676 }
1677
9bf24f59
EC
1678 if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
1679 int err;
1680 u32 cache_size;
1681
1682 cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
1683 err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
1684 if (err)
1685 return err;
1686 }
1687
95a7233c
PB
1688 dp->user_features = user_features;
1689
b83d23a2
MG
1690 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU &&
1691 a[OVS_DP_ATTR_PER_CPU_PIDS]) {
1692 /* Upcall Netlink Port IDs have been updated */
1693 err = ovs_dp_set_upcall_portids(dp,
1694 a[OVS_DP_ATTR_PER_CPU_PIDS]);
1695 if (err)
1696 return err;
1697 }
1698
95a7233c
PB
1699 if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
1700 static_branch_enable(&tc_recirc_sharing_support);
1701 else
1702 static_branch_disable(&tc_recirc_sharing_support);
1703
1704 return 0;
43d4be9c
TG
1705}
1706
eec62ead
TZ
1707static int ovs_dp_stats_init(struct datapath *dp)
1708{
1709 dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1710 if (!dp->stats_percpu)
1711 return -ENOMEM;
1712
1713 return 0;
1714}
1715
1716static int ovs_dp_vport_init(struct datapath *dp)
1717{
1718 int i;
1719
1720 dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
1721 sizeof(struct hlist_head),
1722 GFP_KERNEL);
1723 if (!dp->ports)
1724 return -ENOMEM;
1725
1726 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1727 INIT_HLIST_HEAD(&dp->ports[i]);
1728
1729 return 0;
1730}
1731
ccb1352e
JG
1732static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1733{
1734 struct nlattr **a = info->attrs;
1735 struct vport_parms parms;
1736 struct sk_buff *reply;
1737 struct datapath *dp;
1738 struct vport *vport;
46df7b81 1739 struct ovs_net *ovs_net;
eec62ead 1740 int err;
ccb1352e
JG
1741
1742 err = -EINVAL;
1743 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1744 goto err;
1745
263ea090 1746 reply = ovs_dp_cmd_alloc_info();
6093ae9a
JR
1747 if (!reply)
1748 return -ENOMEM;
ccb1352e
JG
1749
1750 err = -ENOMEM;
1751 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1752 if (dp == NULL)
eec62ead 1753 goto err_destroy_reply;
46df7b81 1754
efd7ef1c 1755 ovs_dp_set_net(dp, sock_net(skb->sk));
ccb1352e
JG
1756
1757 /* Allocate table. */
b637e498
PS
1758 err = ovs_flow_tbl_init(&dp->table);
1759 if (err)
eec62ead 1760 goto err_destroy_dp;
ccb1352e 1761
eec62ead
TZ
1762 err = ovs_dp_stats_init(dp);
1763 if (err)
ccb1352e 1764 goto err_destroy_table;
ccb1352e 1765
eec62ead
TZ
1766 err = ovs_dp_vport_init(dp);
1767 if (err)
1768 goto err_destroy_stats;
15eac2a7 1769
96fbc13d
AZ
1770 err = ovs_meters_init(dp);
1771 if (err)
eec62ead 1772 goto err_destroy_ports;
96fbc13d 1773
ccb1352e
JG
1774 /* Set up our datapath device. */
1775 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1776 parms.type = OVS_VPORT_TYPE_INTERNAL;
1777 parms.options = NULL;
1778 parms.dp = dp;
1779 parms.port_no = OVSP_LOCAL;
5cd667b0 1780 parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
ccb1352e 1781
6093ae9a
JR
1782 /* So far only local changes have been made, now need the lock. */
1783 ovs_lock();
1784
fea07a48
EC
1785 err = ovs_dp_change(dp, a);
1786 if (err)
1787 goto err_unlock_and_destroy_meters;
1788
ccb1352e
JG
1789 vport = new_vport(&parms);
1790 if (IS_ERR(vport)) {
1791 err = PTR_ERR(vport);
1792 if (err == -EBUSY)
1793 err = -EEXIST;
1794
44da5ae5
TG
1795 if (err == -EEXIST) {
1796 /* An outdated user space instance that does not understand
1797 * the concept of user_features has attempted to create a new
1798 * datapath and is likely to reuse it. Drop all user features.
1799 */
1800 if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1801 ovs_dp_reset_user_features(skb, info);
1802 }
1803
fea07a48 1804 goto err_unlock_and_destroy_meters;
ccb1352e
JG
1805 }
1806
6093ae9a
JR
1807 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1808 info->snd_seq, 0, OVS_DP_CMD_NEW);
1809 BUG_ON(err < 0);
ccb1352e 1810
46df7b81 1811 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
59a35d60 1812 list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
8e4e1713
PS
1813
1814 ovs_unlock();
ccb1352e 1815
2a94fe48 1816 ovs_notify(&dp_datapath_genl_family, reply, info);
ccb1352e
JG
1817 return 0;
1818
fea07a48
EC
1819err_unlock_and_destroy_meters:
1820 ovs_unlock();
96fbc13d 1821 ovs_meters_exit(dp);
eec62ead 1822err_destroy_ports:
15eac2a7 1823 kfree(dp->ports);
eec62ead 1824err_destroy_stats:
ccb1352e
JG
1825 free_percpu(dp->stats_percpu);
1826err_destroy_table:
9b996e54 1827 ovs_flow_tbl_destroy(&dp->table);
eec62ead 1828err_destroy_dp:
ccb1352e 1829 kfree(dp);
eec62ead 1830err_destroy_reply:
6093ae9a 1831 kfree_skb(reply);
ccb1352e
JG
1832err:
1833 return err;
1834}
1835
8e4e1713 1836/* Called with ovs_mutex. */
46df7b81 1837static void __dp_destroy(struct datapath *dp)
ccb1352e 1838{
1f3a090b 1839 struct flow_table *table = &dp->table;
15eac2a7 1840 int i;
ccb1352e 1841
15eac2a7
PS
1842 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1843 struct vport *vport;
b67bfe0d 1844 struct hlist_node *n;
15eac2a7 1845
b67bfe0d 1846 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
15eac2a7
PS
1847 if (vport->port_no != OVSP_LOCAL)
1848 ovs_dp_detach_port(vport);
1849 }
ccb1352e 1850
59a35d60 1851 list_del_rcu(&dp->list_node);
ccb1352e 1852
8e4e1713 1853 /* OVSP_LOCAL is datapath internal port. We need to make sure that
e80857cc 1854 * all ports in datapath are destroyed first before freeing datapath.
ccb1352e 1855 */
8e4e1713 1856 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
ccb1352e 1857
1f3a090b
TZ
1858 /* Flush sw_flow in the tables. RCU cb only releases resource
1859 * such as dp, ports and tables. That may avoid some issues
1860 * such as RCU usage warning.
1861 */
1862 table_instance_flow_flush(table, ovsl_dereference(table->ti),
1863 ovsl_dereference(table->ufid_ti));
1864
1865 /* RCU destroy the ports, meters and flow tables. */
ccb1352e 1866 call_rcu(&dp->rcu, destroy_dp_rcu);
46df7b81
PS
1867}
1868
1869static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1870{
1871 struct sk_buff *reply;
1872 struct datapath *dp;
1873 int err;
1874
263ea090 1875 reply = ovs_dp_cmd_alloc_info();
6093ae9a
JR
1876 if (!reply)
1877 return -ENOMEM;
1878
8e4e1713 1879 ovs_lock();
46df7b81
PS
1880 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1881 err = PTR_ERR(dp);
1882 if (IS_ERR(dp))
6093ae9a 1883 goto err_unlock_free;
46df7b81 1884
6093ae9a
JR
1885 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1886 info->snd_seq, 0, OVS_DP_CMD_DEL);
1887 BUG_ON(err < 0);
46df7b81
PS
1888
1889 __dp_destroy(dp);
8e4e1713 1890 ovs_unlock();
ccb1352e 1891
2a94fe48 1892 ovs_notify(&dp_datapath_genl_family, reply, info);
ccb1352e
JG
1893
1894 return 0;
6093ae9a
JR
1895
1896err_unlock_free:
8e4e1713 1897 ovs_unlock();
6093ae9a 1898 kfree_skb(reply);
8e4e1713 1899 return err;
ccb1352e
JG
1900}
1901
1902static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1903{
1904 struct sk_buff *reply;
1905 struct datapath *dp;
1906 int err;
1907
263ea090 1908 reply = ovs_dp_cmd_alloc_info();
6093ae9a
JR
1909 if (!reply)
1910 return -ENOMEM;
1911
8e4e1713 1912 ovs_lock();
46df7b81 1913 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
8e4e1713 1914 err = PTR_ERR(dp);
ccb1352e 1915 if (IS_ERR(dp))
6093ae9a 1916 goto err_unlock_free;
ccb1352e 1917
95a7233c
PB
1918 err = ovs_dp_change(dp, info->attrs);
1919 if (err)
1920 goto err_unlock_free;
43d4be9c 1921
6093ae9a 1922 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
804fe108 1923 info->snd_seq, 0, OVS_DP_CMD_SET);
6093ae9a 1924 BUG_ON(err < 0);
ccb1352e 1925
8e4e1713 1926 ovs_unlock();
2a94fe48 1927 ovs_notify(&dp_datapath_genl_family, reply, info);
ccb1352e
JG
1928
1929 return 0;
6093ae9a
JR
1930
1931err_unlock_free:
8e4e1713 1932 ovs_unlock();
6093ae9a 1933 kfree_skb(reply);
8e4e1713 1934 return err;
ccb1352e
JG
1935}
1936
1937static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1938{
1939 struct sk_buff *reply;
1940 struct datapath *dp;
8e4e1713 1941 int err;
ccb1352e 1942
263ea090 1943 reply = ovs_dp_cmd_alloc_info();
6093ae9a
JR
1944 if (!reply)
1945 return -ENOMEM;
1946
8ec609d8 1947 ovs_lock();
46df7b81 1948 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
8e4e1713
PS
1949 if (IS_ERR(dp)) {
1950 err = PTR_ERR(dp);
6093ae9a 1951 goto err_unlock_free;
8e4e1713 1952 }
6093ae9a 1953 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
804fe108 1954 info->snd_seq, 0, OVS_DP_CMD_GET);
6093ae9a 1955 BUG_ON(err < 0);
8ec609d8 1956 ovs_unlock();
ccb1352e
JG
1957
1958 return genlmsg_reply(reply, info);
8e4e1713 1959
6093ae9a 1960err_unlock_free:
8ec609d8 1961 ovs_unlock();
6093ae9a 1962 kfree_skb(reply);
8e4e1713 1963 return err;
ccb1352e
JG
1964}
1965
1966static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1967{
46df7b81 1968 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
ccb1352e
JG
1969 struct datapath *dp;
1970 int skip = cb->args[0];
1971 int i = 0;
1972
8ec609d8
PS
1973 ovs_lock();
1974 list_for_each_entry(dp, &ovs_net->dps, list_node) {
77676fdb 1975 if (i >= skip &&
15e47304 1976 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
ccb1352e 1977 cb->nlh->nlmsg_seq, NLM_F_MULTI,
804fe108 1978 OVS_DP_CMD_GET) < 0)
ccb1352e
JG
1979 break;
1980 i++;
1981 }
8ec609d8 1982 ovs_unlock();
ccb1352e
JG
1983
1984 cb->args[0] = i;
1985
1986 return skb->len;
1987}
1988
0c200ef9
PS
1989static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1990 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1991 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1992 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
9bf24f59
EC
1993 [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0,
1994 PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
0c200ef9
PS
1995};
1996
66a9b928 1997static const struct genl_small_ops dp_datapath_genl_ops[] = {
ccb1352e 1998 { .cmd = OVS_DP_CMD_NEW,
ef6243ac 1999 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 2000 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
2001 .doit = ovs_dp_cmd_new
2002 },
2003 { .cmd = OVS_DP_CMD_DEL,
ef6243ac 2004 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 2005 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
2006 .doit = ovs_dp_cmd_del
2007 },
2008 { .cmd = OVS_DP_CMD_GET,
ef6243ac 2009 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
ccb1352e 2010 .flags = 0, /* OK for unprivileged users. */
ccb1352e
JG
2011 .doit = ovs_dp_cmd_get,
2012 .dumpit = ovs_dp_cmd_dump
2013 },
2014 { .cmd = OVS_DP_CMD_SET,
ef6243ac 2015 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 2016 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
2017 .doit = ovs_dp_cmd_set,
2018 },
2019};
2020
56989f6d 2021static struct genl_family dp_datapath_genl_family __ro_after_init = {
ccb1352e 2022 .hdrsize = sizeof(struct ovs_header),
0c200ef9
PS
2023 .name = OVS_DATAPATH_FAMILY,
2024 .version = OVS_DATAPATH_VERSION,
2025 .maxattr = OVS_DP_ATTR_MAX,
3b0f31f2 2026 .policy = datapath_policy,
3a4e0d6a
PS
2027 .netnsok = true,
2028 .parallel_ops = true,
66a9b928
JK
2029 .small_ops = dp_datapath_genl_ops,
2030 .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
0c200ef9
PS
2031 .mcgrps = &ovs_dp_datapath_multicast_group,
2032 .n_mcgrps = 1,
489111e5 2033 .module = THIS_MODULE,
ccb1352e
JG
2034};
2035
8e4e1713 2036/* Called with ovs_mutex or RCU read lock. */
ccb1352e 2037static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
9354d452 2038 struct net *net, u32 portid, u32 seq,
d4e4fdf9 2039 u32 flags, u8 cmd, gfp_t gfp)
ccb1352e
JG
2040{
2041 struct ovs_header *ovs_header;
2042 struct ovs_vport_stats vport_stats;
2043 int err;
2044
15e47304 2045 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
ccb1352e
JG
2046 flags, cmd);
2047 if (!ovs_header)
2048 return -EMSGSIZE;
2049
2050 ovs_header->dp_ifindex = get_dpifindex(vport->dp);
2051
028d6a67
DM
2052 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
2053 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
5cd667b0 2054 nla_put_string(skb, OVS_VPORT_ATTR_NAME,
9354d452
JB
2055 ovs_vport_name(vport)) ||
2056 nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
028d6a67 2057 goto nla_put_failure;
ccb1352e 2058
9354d452 2059 if (!net_eq(net, dev_net(vport->dev))) {
d4e4fdf9 2060 int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
9354d452
JB
2061
2062 if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
2063 goto nla_put_failure;
2064 }
2065
ccb1352e 2066 ovs_vport_get_stats(vport, &vport_stats);
66c7a5ee
ND
2067 if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
2068 sizeof(struct ovs_vport_stats), &vport_stats,
2069 OVS_VPORT_ATTR_PAD))
028d6a67 2070 goto nla_put_failure;
ccb1352e 2071
5cd667b0
AW
2072 if (ovs_vport_get_upcall_portids(vport, skb))
2073 goto nla_put_failure;
2074
ccb1352e
JG
2075 err = ovs_vport_get_options(vport, skb);
2076 if (err == -EMSGSIZE)
2077 goto error;
2078
053c095a
JB
2079 genlmsg_end(skb, ovs_header);
2080 return 0;
ccb1352e
JG
2081
2082nla_put_failure:
2083 err = -EMSGSIZE;
2084error:
2085 genlmsg_cancel(skb, ovs_header);
2086 return err;
2087}
2088
6093ae9a
JR
2089static struct sk_buff *ovs_vport_cmd_alloc_info(void)
2090{
2091 return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2092}
2093
2094/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
9354d452
JB
2095struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
2096 u32 portid, u32 seq, u8 cmd)
ccb1352e
JG
2097{
2098 struct sk_buff *skb;
2099 int retval;
2100
d4e4fdf9 2101 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
ccb1352e
JG
2102 if (!skb)
2103 return ERR_PTR(-ENOMEM);
2104
d4e4fdf9
GN
2105 retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
2106 GFP_KERNEL);
a9341512
JG
2107 BUG_ON(retval < 0);
2108
ccb1352e
JG
2109 return skb;
2110}
2111
8e4e1713 2112/* Called with ovs_mutex or RCU read lock. */
46df7b81 2113static struct vport *lookup_vport(struct net *net,
12eb18f7 2114 const struct ovs_header *ovs_header,
ccb1352e
JG
2115 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
2116{
2117 struct datapath *dp;
2118 struct vport *vport;
2119
9354d452
JB
2120 if (a[OVS_VPORT_ATTR_IFINDEX])
2121 return ERR_PTR(-EOPNOTSUPP);
ccb1352e 2122 if (a[OVS_VPORT_ATTR_NAME]) {
46df7b81 2123 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
ccb1352e
JG
2124 if (!vport)
2125 return ERR_PTR(-ENODEV);
651a68ea
BP
2126 if (ovs_header->dp_ifindex &&
2127 ovs_header->dp_ifindex != get_dpifindex(vport->dp))
2128 return ERR_PTR(-ENODEV);
ccb1352e
JG
2129 return vport;
2130 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
2131 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
2132
2133 if (port_no >= DP_MAX_PORTS)
2134 return ERR_PTR(-EFBIG);
2135
46df7b81 2136 dp = get_dp(net, ovs_header->dp_ifindex);
ccb1352e
JG
2137 if (!dp)
2138 return ERR_PTR(-ENODEV);
2139
8e4e1713 2140 vport = ovs_vport_ovsl_rcu(dp, port_no);
ccb1352e 2141 if (!vport)
14408dba 2142 return ERR_PTR(-ENODEV);
ccb1352e
JG
2143 return vport;
2144 } else
2145 return ERR_PTR(-EINVAL);
9354d452 2146
ccb1352e
JG
2147}
2148
6b660c41 2149static unsigned int ovs_get_max_headroom(struct datapath *dp)
3a927bc7 2150{
6b660c41 2151 unsigned int dev_headroom, max_headroom = 0;
3a927bc7
PA
2152 struct net_device *dev;
2153 struct vport *vport;
2154 int i;
2155
2156 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
53742e69 2157 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
cf3266ad 2158 lockdep_ovsl_is_held()) {
3a927bc7
PA
2159 dev = vport->dev;
2160 dev_headroom = netdev_get_fwd_headroom(dev);
2161 if (dev_headroom > max_headroom)
2162 max_headroom = dev_headroom;
2163 }
2164 }
2165
6b660c41
TY
2166 return max_headroom;
2167}
2168
2169/* Called with ovs_mutex */
2170static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
2171{
2172 struct vport *vport;
2173 int i;
2174
2175 dp->max_headroom = new_headroom;
cf3266ad 2176 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
53742e69 2177 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
cf3266ad 2178 lockdep_ovsl_is_held())
6b660c41 2179 netdev_set_rx_headroom(vport->dev, new_headroom);
cf3266ad 2180 }
3a927bc7
PA
2181}
2182
ccb1352e
JG
2183static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
2184{
2185 struct nlattr **a = info->attrs;
2186 struct ovs_header *ovs_header = info->userhdr;
2187 struct vport_parms parms;
2188 struct sk_buff *reply;
2189 struct vport *vport;
2190 struct datapath *dp;
6b660c41 2191 unsigned int new_headroom;
ccb1352e
JG
2192 u32 port_no;
2193 int err;
2194
ccb1352e
JG
2195 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
2196 !a[OVS_VPORT_ATTR_UPCALL_PID])
6093ae9a 2197 return -EINVAL;
9354d452
JB
2198 if (a[OVS_VPORT_ATTR_IFINDEX])
2199 return -EOPNOTSUPP;
6093ae9a
JR
2200
2201 port_no = a[OVS_VPORT_ATTR_PORT_NO]
2202 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
2203 if (port_no >= DP_MAX_PORTS)
2204 return -EFBIG;
2205
2206 reply = ovs_vport_cmd_alloc_info();
2207 if (!reply)
2208 return -ENOMEM;
ccb1352e 2209
8e4e1713 2210 ovs_lock();
62b9c8d0 2211restart:
46df7b81 2212 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
ccb1352e
JG
2213 err = -ENODEV;
2214 if (!dp)
6093ae9a 2215 goto exit_unlock_free;
ccb1352e 2216
6093ae9a 2217 if (port_no) {
8e4e1713 2218 vport = ovs_vport_ovsl(dp, port_no);
ccb1352e
JG
2219 err = -EBUSY;
2220 if (vport)
6093ae9a 2221 goto exit_unlock_free;
ccb1352e
JG
2222 } else {
2223 for (port_no = 1; ; port_no++) {
2224 if (port_no >= DP_MAX_PORTS) {
2225 err = -EFBIG;
6093ae9a 2226 goto exit_unlock_free;
ccb1352e 2227 }
8e4e1713 2228 vport = ovs_vport_ovsl(dp, port_no);
ccb1352e
JG
2229 if (!vport)
2230 break;
2231 }
2232 }
2233
2234 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2235 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2236 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2237 parms.dp = dp;
2238 parms.port_no = port_no;
5cd667b0 2239 parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
ccb1352e
JG
2240
2241 vport = new_vport(&parms);
2242 err = PTR_ERR(vport);
62b9c8d0
TG
2243 if (IS_ERR(vport)) {
2244 if (err == -EAGAIN)
2245 goto restart;
6093ae9a 2246 goto exit_unlock_free;
62b9c8d0 2247 }
ccb1352e 2248
9354d452
JB
2249 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2250 info->snd_portid, info->snd_seq, 0,
d4e4fdf9 2251 OVS_VPORT_CMD_NEW, GFP_KERNEL);
3a927bc7 2252
6b660c41
TY
2253 new_headroom = netdev_get_fwd_headroom(vport->dev);
2254
2255 if (new_headroom > dp->max_headroom)
2256 ovs_update_headroom(dp, new_headroom);
3a927bc7
PA
2257 else
2258 netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2259
6093ae9a
JR
2260 BUG_ON(err < 0);
2261 ovs_unlock();
ed661185 2262
2a94fe48 2263 ovs_notify(&dp_vport_genl_family, reply, info);
6093ae9a 2264 return 0;
ccb1352e 2265
6093ae9a 2266exit_unlock_free:
8e4e1713 2267 ovs_unlock();
6093ae9a 2268 kfree_skb(reply);
ccb1352e
JG
2269 return err;
2270}
2271
2272static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2273{
2274 struct nlattr **a = info->attrs;
2275 struct sk_buff *reply;
2276 struct vport *vport;
2277 int err;
2278
6093ae9a
JR
2279 reply = ovs_vport_cmd_alloc_info();
2280 if (!reply)
2281 return -ENOMEM;
2282
8e4e1713 2283 ovs_lock();
46df7b81 2284 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
ccb1352e
JG
2285 err = PTR_ERR(vport);
2286 if (IS_ERR(vport))
6093ae9a 2287 goto exit_unlock_free;
ccb1352e 2288
ccb1352e 2289 if (a[OVS_VPORT_ATTR_TYPE] &&
f44f3408 2290 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
ccb1352e 2291 err = -EINVAL;
6093ae9a 2292 goto exit_unlock_free;
a9341512
JG
2293 }
2294
f44f3408 2295 if (a[OVS_VPORT_ATTR_OPTIONS]) {
ccb1352e 2296 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
f44f3408 2297 if (err)
6093ae9a 2298 goto exit_unlock_free;
f44f3408 2299 }
a9341512 2300
5cd667b0
AW
2301
2302 if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2303 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2304
2305 err = ovs_vport_set_upcall_portids(vport, ids);
2306 if (err)
2307 goto exit_unlock_free;
2308 }
ccb1352e 2309
9354d452
JB
2310 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2311 info->snd_portid, info->snd_seq, 0,
d4e4fdf9 2312 OVS_VPORT_CMD_SET, GFP_KERNEL);
a9341512 2313 BUG_ON(err < 0);
ccb1352e 2314
8e4e1713 2315 ovs_unlock();
2a94fe48 2316 ovs_notify(&dp_vport_genl_family, reply, info);
8e4e1713 2317 return 0;
ccb1352e 2318
6093ae9a 2319exit_unlock_free:
8e4e1713 2320 ovs_unlock();
6093ae9a 2321 kfree_skb(reply);
ccb1352e
JG
2322 return err;
2323}
2324
2325static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2326{
6b660c41 2327 bool update_headroom = false;
ccb1352e
JG
2328 struct nlattr **a = info->attrs;
2329 struct sk_buff *reply;
3a927bc7 2330 struct datapath *dp;
ccb1352e 2331 struct vport *vport;
6b660c41 2332 unsigned int new_headroom;
ccb1352e
JG
2333 int err;
2334
6093ae9a
JR
2335 reply = ovs_vport_cmd_alloc_info();
2336 if (!reply)
2337 return -ENOMEM;
2338
8e4e1713 2339 ovs_lock();
46df7b81 2340 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
ccb1352e
JG
2341 err = PTR_ERR(vport);
2342 if (IS_ERR(vport))
6093ae9a 2343 goto exit_unlock_free;
ccb1352e
JG
2344
2345 if (vport->port_no == OVSP_LOCAL) {
2346 err = -EINVAL;
6093ae9a 2347 goto exit_unlock_free;
ccb1352e
JG
2348 }
2349
9354d452
JB
2350 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2351 info->snd_portid, info->snd_seq, 0,
d4e4fdf9 2352 OVS_VPORT_CMD_DEL, GFP_KERNEL);
6093ae9a 2353 BUG_ON(err < 0);
3a927bc7
PA
2354
2355 /* the vport deletion may trigger dp headroom update */
2356 dp = vport->dp;
2357 if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
6b660c41
TY
2358 update_headroom = true;
2359
3a927bc7 2360 netdev_reset_rx_headroom(vport->dev);
ccb1352e 2361 ovs_dp_detach_port(vport);
3a927bc7 2362
6b660c41
TY
2363 if (update_headroom) {
2364 new_headroom = ovs_get_max_headroom(dp);
2365
2366 if (new_headroom < dp->max_headroom)
2367 ovs_update_headroom(dp, new_headroom);
2368 }
6093ae9a 2369 ovs_unlock();
ccb1352e 2370
2a94fe48 2371 ovs_notify(&dp_vport_genl_family, reply, info);
6093ae9a 2372 return 0;
ccb1352e 2373
6093ae9a 2374exit_unlock_free:
8e4e1713 2375 ovs_unlock();
6093ae9a 2376 kfree_skb(reply);
ccb1352e
JG
2377 return err;
2378}
2379
2380static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2381{
2382 struct nlattr **a = info->attrs;
2383 struct ovs_header *ovs_header = info->userhdr;
2384 struct sk_buff *reply;
2385 struct vport *vport;
2386 int err;
2387
6093ae9a
JR
2388 reply = ovs_vport_cmd_alloc_info();
2389 if (!reply)
2390 return -ENOMEM;
2391
ccb1352e 2392 rcu_read_lock();
46df7b81 2393 vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
ccb1352e
JG
2394 err = PTR_ERR(vport);
2395 if (IS_ERR(vport))
6093ae9a 2396 goto exit_unlock_free;
9354d452
JB
2397 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2398 info->snd_portid, info->snd_seq, 0,
d4e4fdf9 2399 OVS_VPORT_CMD_GET, GFP_ATOMIC);
6093ae9a 2400 BUG_ON(err < 0);
ccb1352e
JG
2401 rcu_read_unlock();
2402
2403 return genlmsg_reply(reply, info);
2404
6093ae9a 2405exit_unlock_free:
ccb1352e 2406 rcu_read_unlock();
6093ae9a 2407 kfree_skb(reply);
ccb1352e
JG
2408 return err;
2409}
2410
2411static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2412{
2413 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2414 struct datapath *dp;
15eac2a7
PS
2415 int bucket = cb->args[0], skip = cb->args[1];
2416 int i, j = 0;
ccb1352e 2417
42ee19e2 2418 rcu_read_lock();
cc3a5ae6 2419 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
42ee19e2
JR
2420 if (!dp) {
2421 rcu_read_unlock();
ccb1352e 2422 return -ENODEV;
42ee19e2 2423 }
15eac2a7 2424 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
ccb1352e 2425 struct vport *vport;
15eac2a7
PS
2426
2427 j = 0;
b67bfe0d 2428 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
15eac2a7
PS
2429 if (j >= skip &&
2430 ovs_vport_cmd_fill_info(vport, skb,
9354d452 2431 sock_net(skb->sk),
15e47304 2432 NETLINK_CB(cb->skb).portid,
15eac2a7
PS
2433 cb->nlh->nlmsg_seq,
2434 NLM_F_MULTI,
d4e4fdf9
GN
2435 OVS_VPORT_CMD_GET,
2436 GFP_ATOMIC) < 0)
15eac2a7
PS
2437 goto out;
2438
2439 j++;
2440 }
2441 skip = 0;
ccb1352e 2442 }
15eac2a7 2443out:
ccb1352e
JG
2444 rcu_read_unlock();
2445
15eac2a7
PS
2446 cb->args[0] = i;
2447 cb->args[1] = j;
ccb1352e 2448
15eac2a7 2449 return skb->len;
ccb1352e
JG
2450}
2451
eac87c41
EC
2452static void ovs_dp_masks_rebalance(struct work_struct *work)
2453{
a65878d6
EC
2454 struct ovs_net *ovs_net = container_of(work, struct ovs_net,
2455 masks_rebalance.work);
2456 struct datapath *dp;
eac87c41
EC
2457
2458 ovs_lock();
a65878d6
EC
2459
2460 list_for_each_entry(dp, &ovs_net->dps, list_node)
2461 ovs_flow_masks_rebalance(&dp->table);
2462
eac87c41
EC
2463 ovs_unlock();
2464
a65878d6 2465 schedule_delayed_work(&ovs_net->masks_rebalance,
eac87c41
EC
2466 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2467}
2468
0c200ef9
PS
2469static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2470 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2471 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2472 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2473 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
ea8564c8 2474 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
0c200ef9 2475 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
9354d452
JB
2476 [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
2477 [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
0c200ef9
PS
2478};
2479
66a9b928 2480static const struct genl_small_ops dp_vport_genl_ops[] = {
ccb1352e 2481 { .cmd = OVS_VPORT_CMD_NEW,
ef6243ac 2482 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 2483 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
2484 .doit = ovs_vport_cmd_new
2485 },
2486 { .cmd = OVS_VPORT_CMD_DEL,
ef6243ac 2487 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 2488 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
2489 .doit = ovs_vport_cmd_del
2490 },
2491 { .cmd = OVS_VPORT_CMD_GET,
ef6243ac 2492 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
ccb1352e 2493 .flags = 0, /* OK for unprivileged users. */
ccb1352e
JG
2494 .doit = ovs_vport_cmd_get,
2495 .dumpit = ovs_vport_cmd_dump
2496 },
2497 { .cmd = OVS_VPORT_CMD_SET,
ef6243ac 2498 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 2499 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
2500 .doit = ovs_vport_cmd_set,
2501 },
2502};
2503
56989f6d 2504struct genl_family dp_vport_genl_family __ro_after_init = {
0c200ef9
PS
2505 .hdrsize = sizeof(struct ovs_header),
2506 .name = OVS_VPORT_FAMILY,
2507 .version = OVS_VPORT_VERSION,
2508 .maxattr = OVS_VPORT_ATTR_MAX,
3b0f31f2 2509 .policy = vport_policy,
0c200ef9
PS
2510 .netnsok = true,
2511 .parallel_ops = true,
66a9b928
JK
2512 .small_ops = dp_vport_genl_ops,
2513 .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
0c200ef9
PS
2514 .mcgrps = &ovs_dp_vport_multicast_group,
2515 .n_mcgrps = 1,
489111e5 2516 .module = THIS_MODULE,
ccb1352e
JG
2517};
2518
0c200ef9
PS
2519static struct genl_family * const dp_genl_families[] = {
2520 &dp_datapath_genl_family,
2521 &dp_vport_genl_family,
2522 &dp_flow_genl_family,
2523 &dp_packet_genl_family,
96fbc13d 2524 &dp_meter_genl_family,
11efd5cb
YHW
2525#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
2526 &dp_ct_limit_genl_family,
2527#endif
ccb1352e
JG
2528};
2529
2530static void dp_unregister_genl(int n_families)
2531{
2532 int i;
2533
2534 for (i = 0; i < n_families; i++)
0c200ef9 2535 genl_unregister_family(dp_genl_families[i]);
ccb1352e
JG
2536}
2537
56989f6d 2538static int __init dp_register_genl(void)
ccb1352e 2539{
ccb1352e
JG
2540 int err;
2541 int i;
2542
ccb1352e 2543 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
ccb1352e 2544
0c200ef9 2545 err = genl_register_family(dp_genl_families[i]);
ccb1352e
JG
2546 if (err)
2547 goto error;
ccb1352e
JG
2548 }
2549
2550 return 0;
2551
2552error:
0c200ef9 2553 dp_unregister_genl(i);
ccb1352e
JG
2554 return err;
2555}
2556
46df7b81
PS
2557static int __net_init ovs_init_net(struct net *net)
2558{
2559 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
e0afe914 2560 int err;
46df7b81
PS
2561
2562 INIT_LIST_HEAD(&ovs_net->dps);
8e4e1713 2563 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
a65878d6 2564 INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
e0afe914
EC
2565
2566 err = ovs_ct_init(net);
2567 if (err)
2568 return err;
2569
a65878d6
EC
2570 schedule_delayed_work(&ovs_net->masks_rebalance,
2571 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
e0afe914 2572 return 0;
46df7b81
PS
2573}
2574
7b4577a9
PS
2575static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2576 struct list_head *head)
46df7b81 2577{
8e4e1713 2578 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
7b4577a9
PS
2579 struct datapath *dp;
2580
2581 list_for_each_entry(dp, &ovs_net->dps, list_node) {
2582 int i;
2583
2584 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2585 struct vport *vport;
2586
2587 hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
7b4577a9
PS
2588 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2589 continue;
2590
be4ace6e 2591 if (dev_net(vport->dev) == dnet)
7b4577a9
PS
2592 list_add(&vport->detach_list, head);
2593 }
2594 }
2595 }
2596}
2597
2598static void __net_exit ovs_exit_net(struct net *dnet)
2599{
2600 struct datapath *dp, *dp_next;
2601 struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2602 struct vport *vport, *vport_next;
2603 struct net *net;
2604 LIST_HEAD(head);
46df7b81 2605
8e4e1713 2606 ovs_lock();
27de77ce
TZ
2607
2608 ovs_ct_exit(dnet);
2609
46df7b81
PS
2610 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2611 __dp_destroy(dp);
7b4577a9 2612
f0b07bb1 2613 down_read(&net_rwsem);
7b4577a9
PS
2614 for_each_net(net)
2615 list_vports_from_net(net, dnet, &head);
f0b07bb1 2616 up_read(&net_rwsem);
7b4577a9
PS
2617
2618 /* Detach all vports from given namespace. */
2619 list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2620 list_del(&vport->detach_list);
2621 ovs_dp_detach_port(vport);
2622 }
2623
8e4e1713
PS
2624 ovs_unlock();
2625
a65878d6 2626 cancel_delayed_work_sync(&ovs_net->masks_rebalance);
8e4e1713 2627 cancel_work_sync(&ovs_net->dp_notify_work);
46df7b81
PS
2628}
2629
2630static struct pernet_operations ovs_net_ops = {
2631 .init = ovs_init_net,
2632 .exit = ovs_exit_net,
2633 .id = &ovs_net_id,
2634 .size = sizeof(struct ovs_net),
2635};
2636
ccb1352e
JG
2637static int __init dp_init(void)
2638{
ccb1352e
JG
2639 int err;
2640
cf3266ad
TZ
2641 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
2642 sizeof_field(struct sk_buff, cb));
ccb1352e
JG
2643
2644 pr_info("Open vSwitch switching datapath\n");
2645
971427f3 2646 err = action_fifos_init();
ccb1352e
JG
2647 if (err)
2648 goto error;
2649
971427f3
AZ
2650 err = ovs_internal_dev_rtnl_link_register();
2651 if (err)
2652 goto error_action_fifos_exit;
2653
5b9e7e16
JP
2654 err = ovs_flow_init();
2655 if (err)
2656 goto error_unreg_rtnl_link;
2657
ccb1352e
JG
2658 err = ovs_vport_init();
2659 if (err)
2660 goto error_flow_exit;
2661
46df7b81 2662 err = register_pernet_device(&ovs_net_ops);
ccb1352e
JG
2663 if (err)
2664 goto error_vport_exit;
2665
46df7b81
PS
2666 err = register_netdevice_notifier(&ovs_dp_device_notifier);
2667 if (err)
2668 goto error_netns_exit;
2669
62b9c8d0
TG
2670 err = ovs_netdev_init();
2671 if (err)
2672 goto error_unreg_notifier;
2673
ccb1352e
JG
2674 err = dp_register_genl();
2675 if (err < 0)
62b9c8d0 2676 goto error_unreg_netdev;
ccb1352e 2677
ccb1352e
JG
2678 return 0;
2679
62b9c8d0
TG
2680error_unreg_netdev:
2681 ovs_netdev_exit();
ccb1352e
JG
2682error_unreg_notifier:
2683 unregister_netdevice_notifier(&ovs_dp_device_notifier);
46df7b81
PS
2684error_netns_exit:
2685 unregister_pernet_device(&ovs_net_ops);
ccb1352e
JG
2686error_vport_exit:
2687 ovs_vport_exit();
2688error_flow_exit:
2689 ovs_flow_exit();
5b9e7e16
JP
2690error_unreg_rtnl_link:
2691 ovs_internal_dev_rtnl_link_unregister();
971427f3
AZ
2692error_action_fifos_exit:
2693 action_fifos_exit();
ccb1352e
JG
2694error:
2695 return err;
2696}
2697
2698static void dp_cleanup(void)
2699{
ccb1352e 2700 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
62b9c8d0 2701 ovs_netdev_exit();
ccb1352e 2702 unregister_netdevice_notifier(&ovs_dp_device_notifier);
46df7b81
PS
2703 unregister_pernet_device(&ovs_net_ops);
2704 rcu_barrier();
ccb1352e
JG
2705 ovs_vport_exit();
2706 ovs_flow_exit();
5b9e7e16 2707 ovs_internal_dev_rtnl_link_unregister();
971427f3 2708 action_fifos_exit();
ccb1352e
JG
2709}
2710
2711module_init(dp_init);
2712module_exit(dp_cleanup);
2713
2714MODULE_DESCRIPTION("Open vSwitch switching datapath");
2715MODULE_LICENSE("GPL");
ed227099
TLSC
2716MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2717MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2718MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2719MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
96fbc13d 2720MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
11efd5cb 2721MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);