]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/datapath.c
datapath: more accurate checksumming in queue_userspace_packet()
[mirror_ovs.git] / datapath / datapath.c
CommitLineData
064af421 1/*
e23775f2 2 * Copyright (c) 2007-2015 Nicira, Inc.
a14bc59f 3 *
a9a29d22
JG
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
064af421
BP
17 */
18
dfffaef1
JP
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
064af421
BP
21#include <linux/init.h>
22#include <linux/module.h>
064af421 23#include <linux/if_arp.h>
064af421
BP
24#include <linux/if_vlan.h>
25#include <linux/in.h>
26#include <linux/ip.h>
982b8810 27#include <linux/jhash.h>
064af421
BP
28#include <linux/delay.h>
29#include <linux/time.h>
30#include <linux/etherdevice.h>
ed099e92 31#include <linux/genetlink.h>
064af421
BP
32#include <linux/kernel.h>
33#include <linux/kthread.h>
064af421
BP
34#include <linux/mutex.h>
35#include <linux/percpu.h>
36#include <linux/rcupdate.h>
37#include <linux/tcp.h>
38#include <linux/udp.h>
39#include <linux/version.h>
40#include <linux/ethtool.h>
064af421 41#include <linux/wait.h>
064af421 42#include <asm/div64.h>
656a0e37 43#include <linux/highmem.h>
064af421
BP
44#include <linux/netfilter_bridge.h>
45#include <linux/netfilter_ipv4.h>
46#include <linux/inetdevice.h>
47#include <linux/list.h>
077257b8 48#include <linux/openvswitch.h>
064af421 49#include <linux/rculist.h>
064af421 50#include <linux/dmi.h>
36956a7d 51#include <net/genetlink.h>
2a4999f3
PS
52#include <net/net_namespace.h>
53#include <net/netns/generic.h>
064af421 54
064af421 55#include "datapath.h"
038e34ab 56#include "conntrack.h"
064af421 57#include "flow.h"
d103f479 58#include "flow_table.h"
a097c0b2 59#include "flow_netlink.h"
e23775f2 60#include "gso.h"
f2459fe7 61#include "vport-internal_dev.h"
d5de5b0d 62#include "vport-netdev.h"
064af421 63
f56f0b73 64unsigned int ovs_net_id __read_mostly;
2a4999f3 65
cb25142c
PS
66static struct genl_family dp_packet_genl_family;
67static struct genl_family dp_flow_genl_family;
68static struct genl_family dp_datapath_genl_family;
69
bc619e29
JS
70static const struct nla_policy flow_policy[];
71
18fd3a52
PS
72static struct genl_multicast_group ovs_dp_flow_multicast_group = {
73 .name = OVS_FLOW_MCGROUP
cb25142c
PS
74};
75
18fd3a52
PS
76static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
77 .name = OVS_DATAPATH_MCGROUP
cb25142c
PS
78};
79
18fd3a52
PS
80struct genl_multicast_group ovs_dp_vport_multicast_group = {
81 .name = OVS_VPORT_MCGROUP
cb25142c
PS
82};
83
afad3556 84/* Check if need to build a reply message.
af465b67
PS
85 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply.
86 */
114fce23
SG
87static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
88 unsigned int group)
afad3556
JR
89{
90 return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
6233a1bd 91 genl_has_listeners(family, genl_info_net(info), group);
afad3556
JR
92}
93
18fd3a52 94static void ovs_notify(struct genl_family *family, struct genl_multicast_group *grp,
cb25142c 95 struct sk_buff *skb, struct genl_info *info)
e297c6b7 96{
0643a78b 97 genl_notify(family, skb, info, GROUP_ID(grp), GFP_KERNEL);
e297c6b7
TG
98}
99
ed099e92
BP
100/**
101 * DOC: Locking:
064af421 102 *
cd2a59e9
PS
103 * All writes e.g. Writes to device state (add/remove datapath, port, set
104 * operations on vports, etc.), Writes to other state (flow table
105 * modifications, set miscellaneous datapath parameters, etc.) are protected
106 * by ovs_lock.
ed099e92
BP
107 *
108 * Reads are protected by RCU.
109 *
110 * There are a few special cases (mostly stats) that have their own
111 * synchronization but they nest under all of above and don't interact with
112 * each other.
cd2a59e9
PS
113 *
114 * The RTNL lock nests inside ovs_mutex.
064af421 115 */
ed099e92 116
cd2a59e9
PS
117static DEFINE_MUTEX(ovs_mutex);
118
119void ovs_lock(void)
120{
121 mutex_lock(&ovs_mutex);
122}
123
124void ovs_unlock(void)
125{
126 mutex_unlock(&ovs_mutex);
127}
128
129#ifdef CONFIG_LOCKDEP
130int lockdep_ovsl_is_held(void)
131{
132 if (debug_locks)
133 return lockdep_is_held(&ovs_mutex);
134 else
135 return 1;
136}
137#endif
138
5ae440c3 139static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
f1f60b85 140 const struct sw_flow_key *,
4c7804f1
WT
141 const struct dp_upcall_info *,
142 uint32_t cutlen);
5ae440c3 143static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
7d16c847 144 const struct sw_flow_key *,
4c7804f1
WT
145 const struct dp_upcall_info *,
146 uint32_t cutlen);
064af421 147
01ac0970
AZ
148/* Must be called with rcu_read_lock. */
149static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
064af421 150{
01ac0970 151 struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
ed099e92 152
254f2dc8 153 if (dev) {
850b6b3b 154 struct vport *vport = ovs_internal_dev_get_vport(dev);
254f2dc8 155 if (vport)
01ac0970 156 return vport->dp;
254f2dc8 157 }
01ac0970
AZ
158
159 return NULL;
160}
161
162/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
af465b67
PS
163 * returned dp pointer valid.
164 */
01ac0970
AZ
165static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
166{
167 struct datapath *dp;
168
169 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
170 rcu_read_lock();
171 dp = get_dp_rcu(net, dp_ifindex);
254f2dc8
BP
172 rcu_read_unlock();
173
174 return dp;
064af421 175}
064af421 176
cd2a59e9 177/* Must be called with rcu_read_lock or ovs_mutex. */
850b6b3b 178const char *ovs_dp_name(const struct datapath *dp)
f2459fe7 179{
cd2a59e9 180 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
e23775f2 181 return ovs_vport_name(vport);
f2459fe7
JG
182}
183
f1f60b85 184static int get_dpifindex(const struct datapath *dp)
99769a40
JG
185{
186 struct vport *local;
187 int ifindex;
188
189 rcu_read_lock();
190
95b1d73a 191 local = ovs_vport_rcu(dp, OVSP_LOCAL);
99769a40 192 if (local)
e23775f2 193 ifindex = local->dev->ifindex;
99769a40
JG
194 else
195 ifindex = 0;
196
197 rcu_read_unlock();
198
199 return ifindex;
200}
201
46c6a11d
JG
202static void destroy_dp_rcu(struct rcu_head *rcu)
203{
204 struct datapath *dp = container_of(rcu, struct datapath, rcu);
46c6a11d 205
e379e4d1 206 ovs_flow_tbl_destroy(&dp->table);
46c6a11d 207 free_percpu(dp->stats_percpu);
95b1d73a 208 kfree(dp->ports);
5ca1ba48 209 kfree(dp);
46c6a11d
JG
210}
211
95b1d73a
PS
212static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
213 u16 port_no)
214{
215 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
216}
217
aa917006 218/* Called with ovs_mutex or RCU read lock. */
95b1d73a
PS
219struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
220{
221 struct vport *vport;
95b1d73a
PS
222 struct hlist_head *head;
223
224 head = vport_hash_bucket(dp, port_no);
f8dfbcb7 225 hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
95b1d73a
PS
226 if (vport->port_no == port_no)
227 return vport;
228 }
229 return NULL;
230}
231
cd2a59e9 232/* Called with ovs_mutex. */
c19e6535 233static struct vport *new_vport(const struct vport_parms *parms)
064af421 234{
f2459fe7 235 struct vport *vport;
f2459fe7 236
850b6b3b 237 vport = ovs_vport_add(parms);
c19e6535
BP
238 if (!IS_ERR(vport)) {
239 struct datapath *dp = parms->dp;
95b1d73a 240 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
064af421 241
95b1d73a 242 hlist_add_head_rcu(&vport->dp_hash_node, head);
c19e6535 243 }
c19e6535 244 return vport;
064af421
BP
245}
246
850b6b3b 247void ovs_dp_detach_port(struct vport *p)
064af421 248{
cd2a59e9 249 ASSERT_OVSL();
064af421 250
064af421 251 /* First drop references to device. */
95b1d73a 252 hlist_del_rcu(&p->dp_hash_node);
f2459fe7 253
7237e4f4 254 /* Then destroy it. */
850b6b3b 255 ovs_vport_del(p);
064af421
BP
256}
257
fb66fbd1 258/* Must be called with rcu_read_lock. */
e74d4817 259void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
064af421 260{
a6059080 261 const struct vport *p = OVS_CB(skb)->input_vport;
064af421 262 struct datapath *dp = p->dp;
3544358a 263 struct sw_flow *flow;
ad50cb60 264 struct sw_flow_actions *sf_acts;
064af421 265 struct dp_stats_percpu *stats;
e9141eec 266 u64 *stats_counter;
4fa72a95 267 u32 n_mask_hit;
064af421 268
70dbc259 269 stats = this_cpu_ptr(dp->stats_percpu);
a063b0df 270
52a23d92 271 /* Look up flow. */
e74d4817 272 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
5604935e 273 &n_mask_hit);
52a23d92
JG
274 if (unlikely(!flow)) {
275 struct dp_upcall_info upcall;
a7d607c5 276 int error;
52a23d92 277
0e469d3b 278 memset(&upcall, 0, sizeof(upcall));
52a23d92 279 upcall.cmd = OVS_PACKET_CMD_MISS;
beb1c69a 280 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
a94ebc39 281 upcall.mru = OVS_CB(skb)->mru;
4c7804f1 282 error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
a7d607c5
LR
283 if (unlikely(error))
284 kfree_skb(skb);
285 else
286 consume_skb(skb);
52a23d92
JG
287 stats_counter = &stats->n_missed;
288 goto out;
289 }
290
e74d4817 291 ovs_flow_stats_update(flow, key->tp.flags, skb);
ad50cb60 292 sf_acts = rcu_dereference(flow->sf_acts);
7d16c847
PS
293 ovs_execute_actions(dp, skb, sf_acts, key);
294
b0b906cc 295 stats_counter = &stats->n_hit;
55574bb0 296
8819fac7 297out:
55574bb0 298 /* Update datapath statistics. */
b81deb15 299 u64_stats_update_begin(&stats->syncp);
e9141eec 300 (*stats_counter)++;
4fa72a95 301 stats->n_mask_hit += n_mask_hit;
b81deb15 302 u64_stats_update_end(&stats->syncp);
064af421
BP
303}
304
850b6b3b 305int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
f1f60b85 306 const struct sw_flow_key *key,
4c7804f1
WT
307 const struct dp_upcall_info *upcall_info,
308 uint32_t cutlen)
aa5a8fdc
JG
309{
310 struct dp_stats_percpu *stats;
311 int err;
312
28aea917 313 if (upcall_info->portid == 0) {
b063d9f0 314 err = -ENOTCONN;
b063d9f0
JG
315 goto err;
316 }
317
7257b535 318 if (!skb_is_gso(skb))
4c7804f1 319 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
7257b535 320 else
4c7804f1 321 err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
d76195db
JG
322 if (err)
323 goto err;
324
325 return 0;
aa5a8fdc 326
aa5a8fdc 327err:
70dbc259 328 stats = this_cpu_ptr(dp->stats_percpu);
aa5a8fdc 329
b81deb15 330 u64_stats_update_begin(&stats->syncp);
aa5a8fdc 331 stats->n_lost++;
b81deb15 332 u64_stats_update_end(&stats->syncp);
aa5a8fdc 333
aa5a8fdc 334 return err;
982b8810
BP
335}
336
5ae440c3 337static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
f1f60b85 338 const struct sw_flow_key *key,
4c7804f1
WT
339 const struct dp_upcall_info *upcall_info,
340 uint32_t cutlen)
cb5087ca 341{
d4cba1f8 342 unsigned short gso_type = skb_shinfo(skb)->gso_type;
7257b535
BP
343 struct sw_flow_key later_key;
344 struct sk_buff *segs, *nskb;
b2a23c4e 345 struct ovs_skb_cb ovs_cb;
7257b535 346 int err;
cb5087ca 347
b2a23c4e 348 ovs_cb = *OVS_CB(skb);
1d04cd4e 349 segs = __skb_gso_segment(skb, NETIF_F_SG, false);
b2a23c4e 350 *OVS_CB(skb) = ovs_cb;
79089764
PS
351 if (IS_ERR(segs))
352 return PTR_ERR(segs);
d1da7669
PS
353 if (segs == NULL)
354 return -EINVAL;
99769a40 355
9b277b39 356 if (gso_type & SKB_GSO_UDP) {
c135bba1 357 /* The initial flow key extracted by ovs_flow_key_extract()
9b277b39
PS
358 * in this case is for a first fragment, so we need to
359 * properly mark later fragments.
360 */
e74d4817 361 later_key = *key;
9b277b39
PS
362 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
363 }
364
7257b535
BP
365 /* Queue all of the segments. */
366 skb = segs;
cb5087ca 367 do {
b2a23c4e 368 *OVS_CB(skb) = ovs_cb;
9b277b39 369 if (gso_type & SKB_GSO_UDP && skb != segs)
e74d4817 370 key = &later_key;
9b277b39 371
4c7804f1 372 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
982b8810 373 if (err)
7257b535 374 break;
856081f6 375
36ce148c 376 } while ((skb = skb->next));
cb5087ca 377
7257b535
BP
378 /* Free all of the segments. */
379 skb = segs;
380 do {
381 nskb = skb->next;
382 if (err)
383 kfree_skb(skb);
384 else
385 consume_skb(skb);
386 } while ((skb = nskb));
387 return err;
388}
389
8b7ea2d4 390static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
533bea51 391 unsigned int hdrlen)
0afa2373
TG
392{
393 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
533bea51 394 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
039fb36c
WT
395 + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
396 + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
0afa2373
TG
397
398 /* OVS_PACKET_ATTR_USERDATA */
8b7ea2d4
WZ
399 if (upcall_info->userdata)
400 size += NLA_ALIGN(upcall_info->userdata->nla_len);
401
402 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
403 if (upcall_info->egress_tun_info)
404 size += nla_total_size(ovs_tun_key_attr_size());
0afa2373 405
0e469d3b
NM
406 /* OVS_PACKET_ATTR_ACTIONS */
407 if (upcall_info->actions_len)
408 size += nla_total_size(upcall_info->actions_len);
409
a94ebc39
JS
410 /* OVS_PACKET_ATTR_MRU */
411 if (upcall_info->mru)
412 size += nla_total_size(sizeof(upcall_info->mru));
413
0afa2373
TG
414 return size;
415}
416
a94ebc39
JS
417static void pad_packet(struct datapath *dp, struct sk_buff *skb)
418{
419 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
420 size_t plen = NLA_ALIGN(skb->len) - skb->len;
421
422 if (plen > 0)
423 memset(skb_put(skb, plen), 0, plen);
424 }
425}
426
5ae440c3 427static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
f1f60b85 428 const struct sw_flow_key *key,
4c7804f1
WT
429 const struct dp_upcall_info *upcall_info,
430 uint32_t cutlen)
7257b535
BP
431{
432 struct ovs_header *upcall;
6161d3fd 433 struct sk_buff *nskb = NULL;
82706a6f 434 struct sk_buff *user_skb = NULL; /* to be queued to userspace */
7257b535 435 struct nlattr *nla;
978188b2 436 size_t len;
533bea51 437 unsigned int hlen;
5ae440c3
TG
438 int err, dp_ifindex;
439
440 dp_ifindex = get_dpifindex(dp);
441 if (!dp_ifindex)
442 return -ENODEV;
7257b535 443
efd8a18e 444 if (skb_vlan_tag_present(skb)) {
6161d3fd
JG
445 nskb = skb_clone(skb, GFP_ATOMIC);
446 if (!nskb)
447 return -ENOMEM;
07ac71ea 448
8063e095 449 nskb = __vlan_hwaccel_push_inside(nskb);
07ac71ea
PS
450 if (!nskb)
451 return -ENOMEM;
452
6161d3fd
JG
453 skb = nskb;
454 }
455
456 if (nla_attr_size(skb->len) > USHRT_MAX) {
457 err = -EFBIG;
458 goto out;
459 }
7257b535 460
533bea51
TG
461 /* Complete checksum if needed */
462 if (skb->ip_summed == CHECKSUM_PARTIAL &&
a0c9fedc 463 (err = skb_csum_hwoffload_help(skb, 0)))
533bea51
TG
464 goto out;
465
466 /* Older versions of OVS user space enforce alignment of the last
467 * Netlink attribute to NLA_ALIGNTO which would require extensive
468 * padding logic. Only perform zerocopy if padding is not required.
469 */
470 if (dp->user_features & OVS_DP_F_UNALIGNED)
471 hlen = skb_zerocopy_headlen(skb);
472 else
473 hlen = skb->len;
474
4c7804f1 475 len = upcall_msg_size(upcall_info, hlen - cutlen);
40c08cda 476 user_skb = genlmsg_new(len, GFP_ATOMIC);
6161d3fd
JG
477 if (!user_skb) {
478 err = -ENOMEM;
479 goto out;
480 }
7257b535
BP
481
482 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
483 0, upcall_info->cmd);
484 upcall->dp_ifindex = dp_ifindex;
485
db7f2238 486 err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
9a621f82 487 BUG_ON(err);
7257b535
BP
488
489 if (upcall_info->userdata)
e995e3df 490 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
462a988b 491 nla_len(upcall_info->userdata),
e995e3df 492 nla_data(upcall_info->userdata));
7257b535 493
e23775f2 494
8b7ea2d4
WZ
495 if (upcall_info->egress_tun_info) {
496 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
aad7cb91
PS
497 err = ovs_nla_put_tunnel_info(user_skb,
498 upcall_info->egress_tun_info);
8b7ea2d4
WZ
499 BUG_ON(err);
500 nla_nest_end(user_skb, nla);
501 }
502
0e469d3b
NM
503 if (upcall_info->actions_len) {
504 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
505 err = ovs_nla_put_actions(upcall_info->actions,
506 upcall_info->actions_len,
507 user_skb);
508 if (!err)
509 nla_nest_end(user_skb, nla);
510 else
511 nla_nest_cancel(user_skb, nla);
512 }
513
a94ebc39
JS
514 /* Add OVS_PACKET_ATTR_MRU */
515 if (upcall_info->mru) {
516 if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
517 upcall_info->mru)) {
518 err = -ENOBUFS;
519 goto out;
520 }
521 pad_packet(dp, user_skb);
522 }
523
039fb36c
WT
524 /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
525 if (cutlen > 0) {
526 if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN,
527 skb->len)) {
528 err = -ENOBUFS;
529 goto out;
530 }
531 pad_packet(dp, user_skb);
532 }
533
533bea51 534 /* Only reserve room for attribute header, packet data is added
af465b67
PS
535 * in skb_zerocopy()
536 */
533bea51
TG
537 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
538 err = -ENOBUFS;
539 goto out;
540 }
4c7804f1 541 nla->nla_len = nla_attr_size(skb->len - cutlen);
bed53bd1 542
4c7804f1 543 err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
2c272bd9
ZK
544 if (err)
545 goto out;
7257b535 546
ef507cec 547 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
a94ebc39 548 pad_packet(dp, user_skb);
ef507cec 549
533bea51 550 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
6161d3fd 551
533bea51 552 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
82706a6f 553 user_skb = NULL;
6161d3fd 554out:
2c272bd9
ZK
555 if (err)
556 skb_tx_error(skb);
82706a6f 557 kfree_skb(user_skb);
6161d3fd
JG
558 kfree_skb(nskb);
559 return err;
cb5087ca
BP
560}
561
df2c07f4 562static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
064af421 563{
df2c07f4 564 struct ovs_header *ovs_header = info->userhdr;
a94ebc39 565 struct net *net = sock_net(skb->sk);
982b8810 566 struct nlattr **a = info->attrs;
e0e57990 567 struct sw_flow_actions *acts;
982b8810 568 struct sk_buff *packet;
e0e57990 569 struct sw_flow *flow;
ad50cb60 570 struct sw_flow_actions *sf_acts;
f7cd0081 571 struct datapath *dp;
a6059080 572 struct vport *input_vport;
a94ebc39 573 u16 mru = 0;
3f19d399 574 int len;
d6569377 575 int err;
2e460098 576 bool log = !a[OVS_PACKET_ATTR_PROBE];
064af421 577
f7cd0081 578 err = -EINVAL;
df2c07f4 579 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
7c3072cc 580 !a[OVS_PACKET_ATTR_ACTIONS])
e5cad958 581 goto err;
064af421 582
df2c07f4 583 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
3f19d399 584 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
f7cd0081
BP
585 err = -ENOMEM;
586 if (!packet)
e5cad958 587 goto err;
3f19d399
BP
588 skb_reserve(packet, NET_IP_ALIGN);
589
bf3d6fce 590 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
8d5ebd83 591
a94ebc39
JS
592 /* Set packet's mru */
593 if (a[OVS_PACKET_ATTR_MRU]) {
594 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
595 packet->ignore_df = 1;
596 }
597 OVS_CB(packet)->mru = mru;
598
e0e57990 599 /* Build an sw_flow for sending this packet. */
df65fec1 600 flow = ovs_flow_alloc();
e0e57990
BP
601 err = PTR_ERR(flow);
602 if (IS_ERR(flow))
e5cad958 603 goto err_kfree_skb;
064af421 604
038e34ab
JS
605 err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
606 packet, &flow->key, log);
e0e57990 607 if (err)
9321954a 608 goto err_flow_free;
e0e57990 609
a94ebc39 610 err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
9233cef7 611 &flow->key, &acts, log);
9b405f1a
PS
612 if (err)
613 goto err_flow_free;
e0e57990 614
ff27161e 615 rcu_assign_pointer(flow->sf_acts, acts);
abff858b 616 packet->priority = flow->key.phy.priority;
3025a772 617 packet->mark = flow->key.phy.skb_mark;
e0e57990 618
d6569377 619 rcu_read_lock();
a94ebc39 620 dp = get_dp_rcu(net, ovs_header->dp_ifindex);
f7cd0081 621 err = -ENODEV;
e5cad958
BP
622 if (!dp)
623 goto err_unlock;
cc4015df 624
a6059080
AZ
625 input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
626 if (!input_vport)
627 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
628
629 if (!input_vport)
630 goto err_unlock;
631
e23775f2 632 packet->dev = input_vport->dev;
a6059080 633 OVS_CB(packet)->input_vport = input_vport;
ad50cb60 634 sf_acts = rcu_dereference(flow->sf_acts);
a6059080 635
e9141eec 636 local_bh_disable();
7d16c847 637 err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
e9141eec 638 local_bh_enable();
d6569377 639 rcu_read_unlock();
e0e57990 640
a1c564be 641 ovs_flow_free(flow, false);
e5cad958 642 return err;
064af421 643
e5cad958
BP
644err_unlock:
645 rcu_read_unlock();
9321954a 646err_flow_free:
a1c564be 647 ovs_flow_free(flow, false);
e5cad958
BP
648err_kfree_skb:
649 kfree_skb(packet);
650err:
d6569377 651 return err;
064af421
BP
652}
653
df2c07f4 654static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
7c3072cc 655 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
df2c07f4
JP
656 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
657 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
2e460098 658 [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
a94ebc39 659 [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
982b8810
BP
660};
661
18fd3a52 662static struct genl_ops dp_packet_genl_ops[] = {
df2c07f4 663 { .cmd = OVS_PACKET_CMD_EXECUTE,
a6a8674d 664 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
982b8810 665 .policy = packet_policy,
df2c07f4 666 .doit = ovs_packet_cmd_execute
982b8810
BP
667 }
668};
669
ba63fe26 670static struct genl_family dp_packet_genl_family __ro_after_init = {
cb25142c
PS
671 .hdrsize = sizeof(struct ovs_header),
672 .name = OVS_PACKET_FAMILY,
673 .version = OVS_PACKET_VERSION,
674 .maxattr = OVS_PACKET_ATTR_MAX,
675 .netnsok = true,
676 .parallel_ops = true,
677 .ops = dp_packet_genl_ops,
678 .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
ba63fe26 679 .module = THIS_MODULE,
cb25142c
PS
680};
681
f1f60b85 682static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
4fa72a95 683 struct ovs_dp_megaflow_stats *mega_stats)
064af421 684{
d6569377 685 int i;
f180c2e2 686
4fa72a95
AZ
687 memset(mega_stats, 0, sizeof(*mega_stats));
688
994dc286 689 stats->n_flows = ovs_flow_tbl_count(&dp->table);
4fa72a95 690 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
064af421 691
7257b535 692 stats->n_hit = stats->n_missed = stats->n_lost = 0;
4fa72a95 693
d6569377
BP
694 for_each_possible_cpu(i) {
695 const struct dp_stats_percpu *percpu_stats;
696 struct dp_stats_percpu local_stats;
821cb9fa 697 unsigned int start;
44e05eca 698
d6569377 699 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
064af421 700
d6569377 701 do {
b81deb15 702 start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
d6569377 703 local_stats = *percpu_stats;
b81deb15 704 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
064af421 705
d6569377
BP
706 stats->n_hit += local_stats.n_hit;
707 stats->n_missed += local_stats.n_missed;
708 stats->n_lost += local_stats.n_lost;
4fa72a95 709 mega_stats->n_mask_hit += local_stats.n_mask_hit;
d6569377
BP
710 }
711}
064af421 712
bc619e29
JS
713static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
714{
715 return ovs_identifier_is_ufid(sfid) &&
716 !(ufid_flags & OVS_UFID_F_OMIT_KEY);
717}
718
719static bool should_fill_mask(uint32_t ufid_flags)
720{
721 return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
722}
723
724static bool should_fill_actions(uint32_t ufid_flags)
0afa2373 725{
bc619e29
JS
726 return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
727}
728
729static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
730 const struct sw_flow_id *sfid,
731 uint32_t ufid_flags)
732{
733 size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
734
735 /* OVS_FLOW_ATTR_UFID */
736 if (sfid && ovs_identifier_is_ufid(sfid))
737 len += nla_total_size(sfid->ufid_len);
738
739 /* OVS_FLOW_ATTR_KEY */
740 if (!sfid || should_fill_key(sfid, ufid_flags))
741 len += nla_total_size(ovs_key_attr_size());
742
743 /* OVS_FLOW_ATTR_MASK */
744 if (should_fill_mask(ufid_flags))
745 len += nla_total_size(ovs_key_attr_size());
746
747 /* OVS_FLOW_ATTR_ACTIONS */
748 if (should_fill_actions(ufid_flags))
c3bb15b3 749 len += nla_total_size(acts->orig_len);
bc619e29
JS
750
751 return len
91b37647 752 + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
0afa2373 753 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
91b37647 754 + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
0afa2373
TG
755}
756
f1948bb9
JS
757/* Called with ovs_mutex or RCU read lock. */
758static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
759 struct sk_buff *skb)
760{
761 struct ovs_flow_stats stats;
762 __be16 tcp_flags;
763 unsigned long used;
764
b0f3a2fe 765 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
f71db6b1 766
b0f3a2fe 767 if (used &&
89be7da8
PS
768 nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
769 OVS_FLOW_ATTR_PAD))
f1948bb9 770 return -EMSGSIZE;
d6569377 771
b0f3a2fe 772 if (stats.n_packets &&
91b37647
PS
773 nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
774 sizeof(struct ovs_flow_stats), &stats,
775 OVS_FLOW_ATTR_PAD))
f1948bb9 776 return -EMSGSIZE;
b0b906cc 777
b0f3a2fe
PS
778 if ((u8)ntohs(tcp_flags) &&
779 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
f1948bb9
JS
780 return -EMSGSIZE;
781
782 return 0;
783}
784
785/* Called with ovs_mutex or RCU read lock. */
786static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
787 struct sk_buff *skb, int skb_orig_len)
788{
789 struct nlattr *start;
790 int err;
d6569377 791
df2c07f4 792 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
30053024
BP
793 * this is the first flow to be dumped into 'skb'. This is unusual for
794 * Netlink but individual action lists can be longer than
795 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
796 * The userspace caller can always fetch the actions separately if it
797 * really wants them. (Most userspace callers in fact don't care.)
798 *
799 * This can only fail for dump operations because the skb is always
800 * properly sized for single flows.
801 */
9b405f1a 802 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
f6f481ee 803 if (start) {
f44ccce1
PS
804 const struct sw_flow_actions *sf_acts;
805
780ec6ae 806 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
a097c0b2
PS
807 err = ovs_nla_put_actions(sf_acts->actions,
808 sf_acts->actions_len, skb);
f71db6b1 809
0a25b039
BP
810 if (!err)
811 nla_nest_end(skb, start);
812 else {
813 if (skb_orig_len)
f1948bb9 814 return err;
0a25b039
BP
815
816 nla_nest_cancel(skb, start);
817 }
f1948bb9
JS
818 } else if (skb_orig_len) {
819 return -EMSGSIZE;
820 }
821
822 return 0;
823}
824
825/* Called with ovs_mutex or RCU read lock. */
2c622e5a 826static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
f1948bb9 827 struct sk_buff *skb, u32 portid,
bc619e29 828 u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
f1948bb9
JS
829{
830 const int skb_orig_len = skb->len;
831 struct ovs_header *ovs_header;
832 int err;
833
7d16c847
PS
834 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
835 flags, cmd);
f1948bb9
JS
836 if (!ovs_header)
837 return -EMSGSIZE;
7d16c847 838
f1948bb9
JS
839 ovs_header->dp_ifindex = dp_ifindex;
840
bc619e29 841 err = ovs_nla_put_identifier(flow, skb);
db7f2238
JS
842 if (err)
843 goto error;
844
bc619e29
JS
845 if (should_fill_key(&flow->id, ufid_flags)) {
846 err = ovs_nla_put_masked_key(flow, skb);
847 if (err)
848 goto error;
849 }
850
851 if (should_fill_mask(ufid_flags)) {
852 err = ovs_nla_put_mask(flow, skb);
853 if (err)
854 goto error;
855 }
f1948bb9
JS
856
857 err = ovs_flow_cmd_fill_stats(flow, skb);
858 if (err)
859 goto error;
860
bc619e29
JS
861 if (should_fill_actions(ufid_flags)) {
862 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
863 if (err)
864 goto error;
865 }
37a1300c 866
23b48dc1
TG
867 genlmsg_end(skb, ovs_header);
868 return 0;
d6569377 869
37a1300c 870error:
df2c07f4 871 genlmsg_cancel(skb, ovs_header);
d6569377 872 return err;
44e05eca
BP
873}
874
f71db6b1
JR
875/* May not be called with RCU read lock. */
876static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
bc619e29 877 const struct sw_flow_id *sfid,
afad3556 878 struct genl_info *info,
bc619e29
JS
879 bool always,
880 uint32_t ufid_flags)
44e05eca 881{
afad3556 882 struct sk_buff *skb;
bc619e29 883 size_t len;
d6569377 884
114fce23
SG
885 if (!always && !ovs_must_notify(&dp_flow_genl_family, info,
886 GROUP_ID(&ovs_dp_flow_multicast_group)))
afad3556
JR
887 return NULL;
888
bc619e29 889 len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
40c08cda 890 skb = genlmsg_new(len, GFP_KERNEL);
afad3556
JR
891 if (!skb)
892 return ERR_PTR(-ENOMEM);
893
894 return skb;
37a1300c 895}
8d5ebd83 896
f71db6b1 897/* Called with ovs_mutex. */
7d16c847 898static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
f71db6b1
JR
899 int dp_ifindex,
900 struct genl_info *info, u8 cmd,
bc619e29 901 bool always, u32 ufid_flags)
37a1300c
BP
902{
903 struct sk_buff *skb;
904 int retval;
d6569377 905
bc619e29
JS
906 skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
907 &flow->id, info, always, ufid_flags);
a6ddcc9a 908 if (IS_ERR_OR_NULL(skb))
afad3556 909 return skb;
d6569377 910
2c622e5a 911 retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
f71db6b1 912 info->snd_portid, info->snd_seq, 0,
bc619e29 913 cmd, ufid_flags);
37a1300c 914 BUG_ON(retval < 0);
d6569377 915 return skb;
064af421
BP
916}
917
0c9fd022 918static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
064af421 919{
a94ebc39 920 struct net *net = sock_net(skb->sk);
37a1300c 921 struct nlattr **a = info->attrs;
df2c07f4 922 struct ovs_header *ovs_header = info->userhdr;
bc619e29 923 struct sw_flow *flow = NULL, *new_flow;
a1c564be 924 struct sw_flow_mask mask;
37a1300c 925 struct sk_buff *reply;
9c52546b 926 struct datapath *dp;
0c9fd022 927 struct sw_flow_actions *acts;
a1c564be 928 struct sw_flow_match match;
bc619e29 929 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
bc4a05c6 930 int error;
9233cef7 931 bool log = !a[OVS_FLOW_ATTR_PROBE];
064af421 932
6740b721 933 /* Must have key and actions. */
37a1300c 934 error = -EINVAL;
a473df5b 935 if (!a[OVS_FLOW_ATTR_KEY]) {
7d16c847 936 OVS_NLERR(log, "Flow key attr not present in new flow.");
37a1300c 937 goto error;
a473df5b
JG
938 }
939 if (!a[OVS_FLOW_ATTR_ACTIONS]) {
7d16c847 940 OVS_NLERR(log, "Flow actions attr not present in new flow.");
6740b721 941 goto error;
a473df5b 942 }
a1c564be 943
6740b721 944 /* Most of the time we need to allocate a new flow, do it before
af465b67
PS
945 * locking.
946 */
6740b721
JR
947 new_flow = ovs_flow_alloc();
948 if (IS_ERR(new_flow)) {
949 error = PTR_ERR(new_flow);
950 goto error;
951 }
952
953 /* Extract key. */
9b94fa6c 954 ovs_match_init(&match, &new_flow->key, false, &mask);
038e34ab 955 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
9233cef7 956 a[OVS_FLOW_ATTR_MASK], log);
37a1300c 957 if (error)
6740b721 958 goto err_kfree_flow;
064af421 959
bc619e29
JS
960 /* Extract flow identifier. */
961 error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
1d334d4f 962 &new_flow->key, log);
bc619e29
JS
963 if (error)
964 goto err_kfree_flow;
9b405f1a 965
1d334d4f 966 /* unmasked key is needed to match when ufid is not used. */
967 if (ovs_identifier_is_key(&new_flow->id))
968 match.key = new_flow->id.unmasked_key;
969
970 ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
971
6740b721 972 /* Validate actions. */
a94ebc39
JS
973 error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
974 &new_flow->key, &acts, log);
0c9fd022 975 if (error) {
7d16c847 976 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
4f67b12a 977 goto err_kfree_flow;
6740b721
JR
978 }
979
bc619e29
JS
980 reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
981 ufid_flags);
6740b721
JR
982 if (IS_ERR(reply)) {
983 error = PTR_ERR(reply);
984 goto err_kfree_acts;
37a1300c
BP
985 }
986
cd2a59e9 987 ovs_lock();
a94ebc39 988 dp = get_dp(net, ovs_header->dp_ifindex);
6740b721
JR
989 if (unlikely(!dp)) {
990 error = -ENODEV;
cd2a59e9 991 goto err_unlock_ovs;
6740b721 992 }
bc619e29 993
a1c564be 994 /* Check if this is a duplicate flow */
bc619e29
JS
995 if (ovs_identifier_is_ufid(&new_flow->id))
996 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
997 if (!flow)
1d334d4f 998 flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
6740b721
JR
999 if (likely(!flow)) {
1000 rcu_assign_pointer(new_flow->sf_acts, acts);
d6569377 1001
d6569377 1002 /* Put flow in bucket. */
6740b721
JR
1003 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
1004 if (unlikely(error)) {
0585f7a8 1005 acts = NULL;
6740b721
JR
1006 goto err_unlock_ovs;
1007 }
1008
1009 if (unlikely(reply)) {
2c622e5a 1010 error = ovs_flow_cmd_fill_info(new_flow,
6740b721
JR
1011 ovs_header->dp_ifindex,
1012 reply, info->snd_portid,
1013 info->snd_seq, 0,
bc619e29
JS
1014 OVS_FLOW_CMD_NEW,
1015 ufid_flags);
6740b721 1016 BUG_ON(error < 0);
0585f7a8 1017 }
6740b721 1018 ovs_unlock();
d6569377 1019 } else {
0c9fd022
JR
1020 struct sw_flow_actions *old_acts;
1021
d6569377
BP
1022 /* Bail out if we're not allowed to modify an existing flow.
1023 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1024 * because Generic Netlink treats the latter as a dump
1025 * request. We also accept NLM_F_EXCL in case that bug ever
1026 * gets fixed.
1027 */
6740b721
JR
1028 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1029 | NLM_F_EXCL))) {
1030 error = -EEXIST;
cd2a59e9 1031 goto err_unlock_ovs;
6740b721 1032 }
bc619e29
JS
1033 /* The flow identifier has to be the same for flow updates.
1034 * Look for any overlapping flow.
1035 */
1036 if (unlikely(!ovs_flow_cmp(flow, &match))) {
1037 if (ovs_identifier_is_key(&flow->id))
1038 flow = ovs_flow_tbl_lookup_exact(&dp->table,
1039 &match);
1040 else /* UFID matches but key is different */
1041 flow = NULL;
3440e4bc
AW
1042 if (!flow) {
1043 error = -ENOENT;
1044 goto err_unlock_ovs;
1045 }
6740b721 1046 }
0c9fd022
JR
1047 /* Update actions. */
1048 old_acts = ovsl_dereference(flow->sf_acts);
1049 rcu_assign_pointer(flow->sf_acts, acts);
0c9fd022 1050
6740b721 1051 if (unlikely(reply)) {
2c622e5a 1052 error = ovs_flow_cmd_fill_info(flow,
6740b721
JR
1053 ovs_header->dp_ifindex,
1054 reply, info->snd_portid,
1055 info->snd_seq, 0,
bc619e29
JS
1056 OVS_FLOW_CMD_NEW,
1057 ufid_flags);
6740b721
JR
1058 BUG_ON(error < 0);
1059 }
1060 ovs_unlock();
0c9fd022 1061
e23775f2 1062 ovs_nla_free_flow_actions_rcu(old_acts);
6740b721 1063 ovs_flow_free(new_flow, false);
0c9fd022 1064 }
6740b721
JR
1065
1066 if (reply)
cb25142c 1067 ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
0c9fd022
JR
1068 return 0;
1069
0c9fd022
JR
1070err_unlock_ovs:
1071 ovs_unlock();
6740b721
JR
1072 kfree_skb(reply);
1073err_kfree_acts:
e23775f2 1074 ovs_nla_free_flow_actions(acts);
6740b721
JR
1075err_kfree_flow:
1076 ovs_flow_free(new_flow, false);
0c9fd022
JR
1077error:
1078 return error;
1079}
1080
cc561abf 1081/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
a94ebc39
JS
1082static struct sw_flow_actions *get_flow_actions(struct net *net,
1083 const struct nlattr *a,
cc561abf 1084 const struct sw_flow_key *key,
9233cef7
JR
1085 const struct sw_flow_mask *mask,
1086 bool log)
cc561abf
PS
1087{
1088 struct sw_flow_actions *acts;
1089 struct sw_flow_key masked_key;
1090 int error;
1091
ad4adec2 1092 ovs_flow_mask_key(&masked_key, key, true, mask);
a94ebc39 1093 error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
cc561abf 1094 if (error) {
9233cef7 1095 OVS_NLERR(log,
7d16c847 1096 "Actions may not be safe on all matching packets");
cc561abf
PS
1097 return ERR_PTR(error);
1098 }
1099
1100 return acts;
1101}
1102
0c9fd022
JR
1103static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1104{
a94ebc39 1105 struct net *net = sock_net(skb->sk);
0c9fd022
JR
1106 struct nlattr **a = info->attrs;
1107 struct ovs_header *ovs_header = info->userhdr;
1d2a1b5f 1108 struct sw_flow_key key;
0c9fd022
JR
1109 struct sw_flow *flow;
1110 struct sw_flow_mask mask;
1111 struct sk_buff *reply = NULL;
1112 struct datapath *dp;
6740b721 1113 struct sw_flow_actions *old_acts = NULL, *acts = NULL;
0c9fd022 1114 struct sw_flow_match match;
bc619e29
JS
1115 struct sw_flow_id sfid;
1116 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
b24baa1a 1117 int error = 0;
9233cef7 1118 bool log = !a[OVS_FLOW_ATTR_PROBE];
bc619e29 1119 bool ufid_present;
0c9fd022 1120
bc619e29 1121 ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
b24baa1a 1122 if (a[OVS_FLOW_ATTR_KEY]) {
9b94fa6c 1123 ovs_match_init(&match, &key, true, &mask);
b24baa1a
PS
1124 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1125 a[OVS_FLOW_ATTR_MASK], log);
1126 } else if (!ufid_present) {
1127 OVS_NLERR(log,
1128 "Flow set message rejected, Key attribute missing.");
1129 error = -EINVAL;
1130 }
0c9fd022
JR
1131 if (error)
1132 goto error;
d6569377 1133
0c9fd022
JR
1134 /* Validate actions. */
1135 if (a[OVS_FLOW_ATTR_ACTIONS]) {
b24baa1a
PS
1136 if (!a[OVS_FLOW_ATTR_KEY]) {
1137 OVS_NLERR(log,
1138 "Flow key attribute not present in set flow.");
1139 error = -EINVAL;
1140 goto error;
1141 }
1142
a94ebc39
JS
1143 acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key,
1144 &mask, log);
cc561abf
PS
1145 if (IS_ERR(acts)) {
1146 error = PTR_ERR(acts);
0c9fd022 1147 goto error;
6740b721 1148 }
6740b721 1149
ff27161e 1150 /* Can allocate before locking if have acts. */
bc619e29
JS
1151 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1152 ufid_flags);
6740b721
JR
1153 if (IS_ERR(reply)) {
1154 error = PTR_ERR(reply);
1155 goto err_kfree_acts;
90b8c2f7 1156 }
0c9fd022
JR
1157 }
1158
1159 ovs_lock();
a94ebc39 1160 dp = get_dp(net, ovs_header->dp_ifindex);
6740b721
JR
1161 if (unlikely(!dp)) {
1162 error = -ENODEV;
0c9fd022 1163 goto err_unlock_ovs;
6740b721 1164 }
0c9fd022 1165 /* Check that the flow exists. */
bc619e29
JS
1166 if (ufid_present)
1167 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1168 else
1169 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
6740b721
JR
1170 if (unlikely(!flow)) {
1171 error = -ENOENT;
0c9fd022 1172 goto err_unlock_ovs;
6740b721 1173 }
3440e4bc 1174
0c9fd022 1175 /* Update actions, if present. */
6740b721 1176 if (likely(acts)) {
0c9fd022
JR
1177 old_acts = ovsl_dereference(flow->sf_acts);
1178 rcu_assign_pointer(flow->sf_acts, acts);
6740b721
JR
1179
1180 if (unlikely(reply)) {
2c622e5a 1181 error = ovs_flow_cmd_fill_info(flow,
6740b721
JR
1182 ovs_header->dp_ifindex,
1183 reply, info->snd_portid,
1184 info->snd_seq, 0,
bc619e29
JS
1185 OVS_FLOW_CMD_NEW,
1186 ufid_flags);
6740b721
JR
1187 BUG_ON(error < 0);
1188 }
1189 } else {
1190 /* Could not alloc without acts before locking. */
7d16c847 1191 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
bc619e29
JS
1192 info, OVS_FLOW_CMD_NEW, false,
1193 ufid_flags);
1194
6740b721
JR
1195 if (unlikely(IS_ERR(reply))) {
1196 error = PTR_ERR(reply);
1197 goto err_unlock_ovs;
1198 }
9c52546b 1199 }
0c9fd022 1200
0c9fd022
JR
1201 /* Clear stats. */
1202 if (a[OVS_FLOW_ATTR_CLEAR])
1203 ovs_flow_stats_clear(flow);
cd2a59e9 1204 ovs_unlock();
37a1300c 1205
6740b721 1206 if (reply)
cb25142c 1207 ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
6740b721 1208 if (old_acts)
e23775f2 1209 ovs_nla_free_flow_actions_rcu(old_acts);
7d16c847 1210
d6569377 1211 return 0;
704a1e09 1212
cd2a59e9
PS
1213err_unlock_ovs:
1214 ovs_unlock();
6740b721
JR
1215 kfree_skb(reply);
1216err_kfree_acts:
e23775f2 1217 ovs_nla_free_flow_actions(acts);
37a1300c 1218error:
9c52546b 1219 return error;
704a1e09
BP
1220}
1221
df2c07f4 1222static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
704a1e09 1223{
37a1300c 1224 struct nlattr **a = info->attrs;
df2c07f4 1225 struct ovs_header *ovs_header = info->userhdr;
038e34ab 1226 struct net *net = sock_net(skb->sk);
37a1300c 1227 struct sw_flow_key key;
37a1300c 1228 struct sk_buff *reply;
704a1e09 1229 struct sw_flow *flow;
9c52546b 1230 struct datapath *dp;
a1c564be 1231 struct sw_flow_match match;
bc619e29
JS
1232 struct sw_flow_id ufid;
1233 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1234 int err = 0;
9233cef7 1235 bool log = !a[OVS_FLOW_ATTR_PROBE];
bc619e29 1236 bool ufid_present;
704a1e09 1237
bc619e29
JS
1238 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1239 if (a[OVS_FLOW_ATTR_KEY]) {
9b94fa6c 1240 ovs_match_init(&match, &key, true, NULL);
038e34ab 1241 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
bc619e29
JS
1242 log);
1243 } else if (!ufid_present) {
9233cef7
JR
1244 OVS_NLERR(log,
1245 "Flow get message rejected, Key attribute missing.");
bc619e29 1246 err = -EINVAL;
1b936472 1247 }
37a1300c
BP
1248 if (err)
1249 return err;
704a1e09 1250
cd2a59e9 1251 ovs_lock();
2a4999f3 1252 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9
PS
1253 if (!dp) {
1254 err = -ENODEV;
1255 goto unlock;
1256 }
704a1e09 1257
bc619e29
JS
1258 if (ufid_present)
1259 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1260 else
1261 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
3440e4bc 1262 if (!flow) {
cd2a59e9
PS
1263 err = -ENOENT;
1264 goto unlock;
1265 }
d6569377 1266
7d16c847 1267 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
bc619e29 1268 OVS_FLOW_CMD_NEW, true, ufid_flags);
cd2a59e9
PS
1269 if (IS_ERR(reply)) {
1270 err = PTR_ERR(reply);
1271 goto unlock;
1272 }
36956a7d 1273
cd2a59e9 1274 ovs_unlock();
37a1300c 1275 return genlmsg_reply(reply, info);
cd2a59e9
PS
1276unlock:
1277 ovs_unlock();
1278 return err;
d6569377 1279}
9c52546b 1280
df2c07f4 1281static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
d6569377 1282{
37a1300c 1283 struct nlattr **a = info->attrs;
df2c07f4 1284 struct ovs_header *ovs_header = info->userhdr;
038e34ab 1285 struct net *net = sock_net(skb->sk);
37a1300c 1286 struct sw_flow_key key;
37a1300c 1287 struct sk_buff *reply;
bc619e29 1288 struct sw_flow *flow = NULL;
d6569377 1289 struct datapath *dp;
a1c564be 1290 struct sw_flow_match match;
bc619e29
JS
1291 struct sw_flow_id ufid;
1292 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
d6569377 1293 int err;
9233cef7 1294 bool log = !a[OVS_FLOW_ATTR_PROBE];
bc619e29 1295 bool ufid_present;
36956a7d 1296
bc619e29
JS
1297 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1298 if (a[OVS_FLOW_ATTR_KEY]) {
9b94fa6c 1299 ovs_match_init(&match, &key, true, NULL);
038e34ab
JS
1300 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1301 NULL, log);
cde7f3ba
JR
1302 if (unlikely(err))
1303 return err;
1304 }
1305
cd2a59e9 1306 ovs_lock();
2a4999f3 1307 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cde7f3ba 1308 if (unlikely(!dp)) {
cd2a59e9
PS
1309 err = -ENODEV;
1310 goto unlock;
1311 }
7d16c847 1312
bc619e29 1313 if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
994dc286 1314 err = ovs_flow_tbl_flush(&dp->table);
cd2a59e9
PS
1315 goto unlock;
1316 }
7d16c847 1317
bc619e29
JS
1318 if (ufid_present)
1319 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1320 else
1321 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
3440e4bc 1322 if (unlikely(!flow)) {
cd2a59e9
PS
1323 err = -ENOENT;
1324 goto unlock;
1325 }
d6569377 1326
994dc286 1327 ovs_flow_tbl_remove(&dp->table, flow);
cde7f3ba 1328 ovs_unlock();
37a1300c 1329
46051cf8 1330 reply = ovs_flow_cmd_alloc_info(rcu_dereference_raw(flow->sf_acts),
bc619e29 1331 &flow->id, info, false, ufid_flags);
cde7f3ba
JR
1332
1333 if (likely(reply)) {
1334 if (likely(!IS_ERR(reply))) {
7d16c847
PS
1335 rcu_read_lock(); /*To keep RCU checker happy. */
1336 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
cde7f3ba
JR
1337 reply, info->snd_portid,
1338 info->snd_seq, 0,
bc619e29
JS
1339 OVS_FLOW_CMD_DEL,
1340 ufid_flags);
cde7f3ba
JR
1341 rcu_read_unlock();
1342 BUG_ON(err < 0);
cb25142c 1343 ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
cde7f3ba 1344 } else {
cb25142c
PS
1345 genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0,
1346 GROUP_ID(&ovs_dp_flow_multicast_group), PTR_ERR(reply));
1347
cde7f3ba 1348 }
afad3556 1349 }
37a1300c 1350
a1c564be 1351 ovs_flow_free(flow, true);
37a1300c 1352 return 0;
cd2a59e9
PS
1353unlock:
1354 ovs_unlock();
1355 return err;
37a1300c
BP
1356}
1357
df2c07f4 1358static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
37a1300c 1359{
bc619e29 1360 struct nlattr *a[__OVS_FLOW_ATTR_MAX];
df2c07f4 1361 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
994dc286 1362 struct table_instance *ti;
37a1300c 1363 struct datapath *dp;
bc619e29
JS
1364 u32 ufid_flags;
1365 int err;
1366
1367 err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
15702dc9 1368 OVS_FLOW_ATTR_MAX, flow_policy, NULL);
bc619e29
JS
1369 if (err)
1370 return err;
1371 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
37a1300c 1372
f44ccce1 1373 rcu_read_lock();
01ac0970 1374 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9 1375 if (!dp) {
f44ccce1 1376 rcu_read_unlock();
37a1300c 1377 return -ENODEV;
cd2a59e9 1378 }
37a1300c 1379
994dc286 1380 ti = rcu_dereference(dp->table.ti);
37a1300c 1381 for (;;) {
37a1300c
BP
1382 struct sw_flow *flow;
1383 u32 bucket, obj;
1384
1385 bucket = cb->args[0];
1386 obj = cb->args[1];
994dc286 1387 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
3544358a 1388 if (!flow)
37a1300c
BP
1389 break;
1390
2c622e5a 1391 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
28aea917 1392 NETLINK_CB(cb->skb).portid,
37a1300c 1393 cb->nlh->nlmsg_seq, NLM_F_MULTI,
bc619e29 1394 OVS_FLOW_CMD_NEW, ufid_flags) < 0)
37a1300c
BP
1395 break;
1396
1397 cb->args[0] = bucket;
1398 cb->args[1] = obj;
1399 }
f44ccce1 1400 rcu_read_unlock();
37a1300c 1401 return skb->len;
704a1e09
BP
1402}
1403
cb25142c
PS
1404static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1405 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
9233cef7 1406 [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
cb25142c
PS
1407 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1408 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
9233cef7 1409 [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
bc619e29
JS
1410 [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1411 [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
cb25142c
PS
1412};
1413
18fd3a52 1414static struct genl_ops dp_flow_genl_ops[] = {
df2c07f4 1415 { .cmd = OVS_FLOW_CMD_NEW,
a6a8674d 1416 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
37a1300c 1417 .policy = flow_policy,
0c9fd022 1418 .doit = ovs_flow_cmd_new
37a1300c 1419 },
df2c07f4 1420 { .cmd = OVS_FLOW_CMD_DEL,
a6a8674d 1421 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
37a1300c 1422 .policy = flow_policy,
df2c07f4 1423 .doit = ovs_flow_cmd_del
37a1300c 1424 },
df2c07f4 1425 { .cmd = OVS_FLOW_CMD_GET,
37a1300c
BP
1426 .flags = 0, /* OK for unprivileged users. */
1427 .policy = flow_policy,
df2c07f4
JP
1428 .doit = ovs_flow_cmd_get,
1429 .dumpit = ovs_flow_cmd_dump
37a1300c 1430 },
df2c07f4 1431 { .cmd = OVS_FLOW_CMD_SET,
a6a8674d 1432 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
37a1300c 1433 .policy = flow_policy,
0c9fd022 1434 .doit = ovs_flow_cmd_set,
37a1300c
BP
1435 },
1436};
1437
ba63fe26 1438static struct genl_family dp_flow_genl_family __ro_after_init = {
df2c07f4 1439 .hdrsize = sizeof(struct ovs_header),
cb25142c
PS
1440 .name = OVS_FLOW_FAMILY,
1441 .version = OVS_FLOW_VERSION,
1442 .maxattr = OVS_FLOW_ATTR_MAX,
b3dcb73c 1443 .netnsok = true,
cb25142c
PS
1444 .parallel_ops = true,
1445 .ops = dp_flow_genl_ops,
1446 .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1447 .mcgrps = &ovs_dp_flow_multicast_group,
1448 .n_mcgrps = 1,
ba63fe26 1449 .module = THIS_MODULE,
aaff4b55
BP
1450};
1451
0afa2373
TG
1452static size_t ovs_dp_cmd_msg_size(void)
1453{
1454 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1455
1456 msgsize += nla_total_size(IFNAMSIZ);
91b37647
PS
1457 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1458 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
300af20a 1459 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
0afa2373
TG
1460
1461 return msgsize;
1462}
1463
d637497c 1464/* Called with ovs_mutex. */
df2c07f4 1465static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
28aea917 1466 u32 portid, u32 seq, u32 flags, u8 cmd)
064af421 1467{
df2c07f4 1468 struct ovs_header *ovs_header;
e926dfe3 1469 struct ovs_dp_stats dp_stats;
4fa72a95 1470 struct ovs_dp_megaflow_stats dp_megaflow_stats;
064af421
BP
1471 int err;
1472
28aea917 1473 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
aaff4b55 1474 flags, cmd);
df2c07f4 1475 if (!ovs_header)
aaff4b55 1476 goto error;
064af421 1477
b063d9f0 1478 ovs_header->dp_ifindex = get_dpifindex(dp);
064af421 1479
850b6b3b 1480 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
064af421 1481 if (err)
d6569377 1482 goto nla_put_failure;
064af421 1483
4fa72a95 1484 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
91b37647
PS
1485 if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1486 &dp_stats, OVS_DP_ATTR_PAD))
4fa72a95
AZ
1487 goto nla_put_failure;
1488
91b37647
PS
1489 if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1490 sizeof(struct ovs_dp_megaflow_stats),
1491 &dp_megaflow_stats, OVS_DP_ATTR_PAD))
c3cc8c03 1492 goto nla_put_failure;
d6569377 1493
c58cc9a4
TG
1494 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1495 goto nla_put_failure;
1496
23b48dc1
TG
1497 genlmsg_end(skb, ovs_header);
1498 return 0;
d6569377
BP
1499
1500nla_put_failure:
df2c07f4 1501 genlmsg_cancel(skb, ovs_header);
aaff4b55
BP
1502error:
1503 return -EMSGSIZE;
d6569377
BP
1504}
1505
40c08cda 1506static struct sk_buff *ovs_dp_cmd_alloc_info(void)
d6569377 1507{
40c08cda 1508 return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
aaff4b55 1509}
9dca7bd5 1510
aa917006 1511/* Called with rcu_read_lock or ovs_mutex. */
2a4999f3 1512static struct datapath *lookup_datapath(struct net *net,
f1f60b85 1513 const struct ovs_header *ovs_header,
6455100f 1514 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
d6569377 1515{
254f2dc8
BP
1516 struct datapath *dp;
1517
df2c07f4 1518 if (!a[OVS_DP_ATTR_NAME])
2a4999f3 1519 dp = get_dp(net, ovs_header->dp_ifindex);
254f2dc8 1520 else {
d6569377 1521 struct vport *vport;
d6569377 1522
2a4999f3 1523 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
df2c07f4 1524 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
d6569377 1525 }
254f2dc8 1526 return dp ? dp : ERR_PTR(-ENODEV);
d6569377
BP
1527}
1528
94358dcf
TG
1529static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1530{
1531 struct datapath *dp;
1532
1533 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
09350a3d 1534 if (IS_ERR(dp))
94358dcf
TG
1535 return;
1536
1537 WARN(dp->user_features, "Dropping previously announced user features\n");
1538 dp->user_features = 0;
1539}
1540
f1f60b85 1541static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
c58cc9a4
TG
1542{
1543 if (a[OVS_DP_ATTR_USER_FEATURES])
1544 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1545}
1546
df2c07f4 1547static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
d6569377 1548{
aaff4b55 1549 struct nlattr **a = info->attrs;
d6569377 1550 struct vport_parms parms;
aaff4b55 1551 struct sk_buff *reply;
d6569377
BP
1552 struct datapath *dp;
1553 struct vport *vport;
2a4999f3 1554 struct ovs_net *ovs_net;
95b1d73a 1555 int err, i;
d6569377 1556
d6569377 1557 err = -EINVAL;
ea36840f 1558 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
aaff4b55
BP
1559 goto err;
1560
40c08cda 1561 reply = ovs_dp_cmd_alloc_info();
d81eef1b
JR
1562 if (!reply)
1563 return -ENOMEM;
d6569377 1564
d6569377
BP
1565 err = -ENOMEM;
1566 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1567 if (dp == NULL)
d81eef1b 1568 goto err_free_reply;
2a4999f3 1569
c0cddcec 1570 ovs_dp_set_net(dp, sock_net(skb->sk));
0ceaa66c 1571
d6569377 1572 /* Allocate table. */
994dc286
PS
1573 err = ovs_flow_tbl_init(&dp->table);
1574 if (err)
d6569377
BP
1575 goto err_free_dp;
1576
08fb1bbd 1577 dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
99769a40
JG
1578 if (!dp->stats_percpu) {
1579 err = -ENOMEM;
1580 goto err_destroy_table;
1581 }
1582
95b1d73a
PS
1583 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1584 GFP_KERNEL);
1585 if (!dp->ports) {
1586 err = -ENOMEM;
1587 goto err_destroy_percpu;
1588 }
1589
1590 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1591 INIT_HLIST_HEAD(&dp->ports[i]);
1592
d6569377 1593 /* Set up our datapath device. */
df2c07f4
JP
1594 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1595 parms.type = OVS_VPORT_TYPE_INTERNAL;
d6569377
BP
1596 parms.options = NULL;
1597 parms.dp = dp;
df2c07f4 1598 parms.port_no = OVSP_LOCAL;
beb1c69a 1599 parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
b063d9f0 1600
c58cc9a4
TG
1601 ovs_dp_change(dp, a);
1602
d81eef1b
JR
1603 /* So far only local changes have been made, now need the lock. */
1604 ovs_lock();
1605
d6569377
BP
1606 vport = new_vport(&parms);
1607 if (IS_ERR(vport)) {
1608 err = PTR_ERR(vport);
1609 if (err == -EBUSY)
1610 err = -EEXIST;
1611
94358dcf
TG
1612 if (err == -EEXIST) {
1613 /* An outdated user space instance that does not understand
1614 * the concept of user_features has attempted to create a new
1615 * datapath and is likely to reuse it. Drop all user features.
1616 */
1617 if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1618 ovs_dp_reset_user_features(skb, info);
1619 }
1620
95b1d73a 1621 goto err_destroy_ports_array;
d6569377 1622 }
d6569377 1623
d81eef1b
JR
1624 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1625 info->snd_seq, 0, OVS_DP_CMD_NEW);
1626 BUG_ON(err < 0);
aaff4b55 1627
2a4999f3 1628 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
fb93e9aa 1629 list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
a0fb56c1 1630
cd2a59e9 1631 ovs_unlock();
d6569377 1632
cb25142c 1633 ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
d6569377
BP
1634 return 0;
1635
95b1d73a 1636err_destroy_ports_array:
d81eef1b 1637 ovs_unlock();
95b1d73a 1638 kfree(dp->ports);
99769a40
JG
1639err_destroy_percpu:
1640 free_percpu(dp->stats_percpu);
d6569377 1641err_destroy_table:
e379e4d1 1642 ovs_flow_tbl_destroy(&dp->table);
d6569377 1643err_free_dp:
d6569377 1644 kfree(dp);
d81eef1b
JR
1645err_free_reply:
1646 kfree_skb(reply);
d6569377 1647err:
064af421
BP
1648 return err;
1649}
1650
cd2a59e9 1651/* Called with ovs_mutex. */
2a4999f3 1652static void __dp_destroy(struct datapath *dp)
44e05eca 1653{
95b1d73a 1654 int i;
44e05eca 1655
95b1d73a
PS
1656 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1657 struct vport *vport;
f8dfbcb7 1658 struct hlist_node *n;
95b1d73a 1659
f8dfbcb7 1660 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
95b1d73a
PS
1661 if (vport->port_no != OVSP_LOCAL)
1662 ovs_dp_detach_port(vport);
1663 }
ed099e92 1664
fb93e9aa 1665 list_del_rcu(&dp->list_node);
ed099e92 1666
cd2a59e9 1667 /* OVSP_LOCAL is datapath internal port. We need to make sure that
d103f479
AZ
1668 * all ports in datapath are destroyed first before freeing datapath.
1669 */
cd2a59e9 1670 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
99620d2c 1671
d103f479 1672 /* RCU destroy the flow table */
ed099e92 1673 call_rcu(&dp->rcu, destroy_dp_rcu);
2a4999f3
PS
1674}
1675
1676static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1677{
1678 struct sk_buff *reply;
1679 struct datapath *dp;
1680 int err;
1681
40c08cda 1682 reply = ovs_dp_cmd_alloc_info();
d81eef1b
JR
1683 if (!reply)
1684 return -ENOMEM;
1685
cd2a59e9 1686 ovs_lock();
2a4999f3
PS
1687 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1688 err = PTR_ERR(dp);
1689 if (IS_ERR(dp))
d81eef1b 1690 goto err_unlock_free;
2a4999f3 1691
d81eef1b
JR
1692 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1693 info->snd_seq, 0, OVS_DP_CMD_DEL);
1694 BUG_ON(err < 0);
2a4999f3
PS
1695
1696 __dp_destroy(dp);
d81eef1b 1697 ovs_unlock();
7d16c847 1698
cb25142c 1699 ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
99620d2c 1700 return 0;
d81eef1b
JR
1701
1702err_unlock_free:
cd2a59e9 1703 ovs_unlock();
d81eef1b 1704 kfree_skb(reply);
cd2a59e9 1705 return err;
44e05eca
BP
1706}
1707
df2c07f4 1708static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
064af421 1709{
aaff4b55 1710 struct sk_buff *reply;
d6569377 1711 struct datapath *dp;
d6569377 1712 int err;
064af421 1713
40c08cda 1714 reply = ovs_dp_cmd_alloc_info();
d81eef1b
JR
1715 if (!reply)
1716 return -ENOMEM;
1717
cd2a59e9 1718 ovs_lock();
2a4999f3 1719 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
cd2a59e9 1720 err = PTR_ERR(dp);
d6569377 1721 if (IS_ERR(dp))
d81eef1b 1722 goto err_unlock_free;
38c6ecbc 1723
c58cc9a4
TG
1724 ovs_dp_change(dp, info->attrs);
1725
d81eef1b
JR
1726 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1727 info->snd_seq, 0, OVS_DP_CMD_NEW);
1728 BUG_ON(err < 0);
a0fb56c1 1729
cd2a59e9 1730 ovs_unlock();
7d16c847 1731
cb25142c 1732 ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
aaff4b55 1733 return 0;
d81eef1b
JR
1734
1735err_unlock_free:
cd2a59e9 1736 ovs_unlock();
d81eef1b 1737 kfree_skb(reply);
cd2a59e9 1738 return err;
064af421
BP
1739}
1740
df2c07f4 1741static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1dcf111b 1742{
aaff4b55 1743 struct sk_buff *reply;
d6569377 1744 struct datapath *dp;
d6569377 1745 int err;
1dcf111b 1746
40c08cda 1747 reply = ovs_dp_cmd_alloc_info();
d81eef1b
JR
1748 if (!reply)
1749 return -ENOMEM;
1750
d637497c 1751 ovs_lock();
2a4999f3 1752 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
cd2a59e9
PS
1753 if (IS_ERR(dp)) {
1754 err = PTR_ERR(dp);
d81eef1b 1755 goto err_unlock_free;
cd2a59e9 1756 }
d81eef1b
JR
1757 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1758 info->snd_seq, 0, OVS_DP_CMD_NEW);
1759 BUG_ON(err < 0);
d637497c 1760 ovs_unlock();
aaff4b55
BP
1761
1762 return genlmsg_reply(reply, info);
cd2a59e9 1763
d81eef1b 1764err_unlock_free:
d637497c 1765 ovs_unlock();
d81eef1b 1766 kfree_skb(reply);
cd2a59e9 1767 return err;
1dcf111b
JP
1768}
1769
df2c07f4 1770static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
a7786963 1771{
2a4999f3 1772 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
254f2dc8
BP
1773 struct datapath *dp;
1774 int skip = cb->args[0];
1775 int i = 0;
a7786963 1776
d637497c
PS
1777 ovs_lock();
1778 list_for_each_entry(dp, &ovs_net->dps, list_node) {
a2bab2f0 1779 if (i >= skip &&
28aea917 1780 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
aaff4b55 1781 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1782 OVS_DP_CMD_NEW) < 0)
aaff4b55 1783 break;
254f2dc8 1784 i++;
a7786963 1785 }
d637497c 1786 ovs_unlock();
aaff4b55 1787
254f2dc8
BP
1788 cb->args[0] = i;
1789
aaff4b55 1790 return skb->len;
c19e6535
BP
1791}
1792
cb25142c
PS
1793static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1794 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1795 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1796 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1797};
1798
18fd3a52 1799static struct genl_ops dp_datapath_genl_ops[] = {
df2c07f4 1800 { .cmd = OVS_DP_CMD_NEW,
a6a8674d 1801 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
aaff4b55 1802 .policy = datapath_policy,
df2c07f4 1803 .doit = ovs_dp_cmd_new
aaff4b55 1804 },
df2c07f4 1805 { .cmd = OVS_DP_CMD_DEL,
a6a8674d 1806 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
aaff4b55 1807 .policy = datapath_policy,
df2c07f4 1808 .doit = ovs_dp_cmd_del
aaff4b55 1809 },
df2c07f4 1810 { .cmd = OVS_DP_CMD_GET,
aaff4b55
BP
1811 .flags = 0, /* OK for unprivileged users. */
1812 .policy = datapath_policy,
df2c07f4
JP
1813 .doit = ovs_dp_cmd_get,
1814 .dumpit = ovs_dp_cmd_dump
aaff4b55 1815 },
df2c07f4 1816 { .cmd = OVS_DP_CMD_SET,
a6a8674d 1817 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
aaff4b55 1818 .policy = datapath_policy,
df2c07f4 1819 .doit = ovs_dp_cmd_set,
aaff4b55
BP
1820 },
1821};
1822
ba63fe26 1823static struct genl_family dp_datapath_genl_family __ro_after_init = {
df2c07f4 1824 .hdrsize = sizeof(struct ovs_header),
cb25142c
PS
1825 .name = OVS_DATAPATH_FAMILY,
1826 .version = OVS_DATAPATH_VERSION,
1827 .maxattr = OVS_DP_ATTR_MAX,
b3dcb73c 1828 .netnsok = true,
cb25142c
PS
1829 .parallel_ops = true,
1830 .ops = dp_datapath_genl_ops,
1831 .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1832 .mcgrps = &ovs_dp_datapath_multicast_group,
1833 .n_mcgrps = 1,
ba63fe26 1834 .module = THIS_MODULE,
f0fef760
BP
1835};
1836
cd2a59e9 1837/* Called with ovs_mutex or RCU read lock. */
df2c07f4 1838static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
28aea917 1839 u32 portid, u32 seq, u32 flags, u8 cmd)
064af421 1840{
df2c07f4 1841 struct ovs_header *ovs_header;
e926dfe3 1842 struct ovs_vport_stats vport_stats;
c19e6535
BP
1843 int err;
1844
28aea917 1845 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
f0fef760 1846 flags, cmd);
df2c07f4 1847 if (!ovs_header)
f0fef760 1848 return -EMSGSIZE;
c19e6535 1849
99769a40 1850 ovs_header->dp_ifindex = get_dpifindex(vport->dp);
c19e6535 1851
c3cc8c03
DM
1852 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1853 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
e23775f2
PS
1854 nla_put_string(skb, OVS_VPORT_ATTR_NAME,
1855 ovs_vport_name(vport)))
c3cc8c03 1856 goto nla_put_failure;
c19e6535 1857
850b6b3b 1858 ovs_vport_get_stats(vport, &vport_stats);
91b37647
PS
1859 if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
1860 sizeof(struct ovs_vport_stats), &vport_stats,
1861 OVS_VPORT_ATTR_PAD))
c3cc8c03 1862 goto nla_put_failure;
c19e6535 1863
beb1c69a
AW
1864 if (ovs_vport_get_upcall_portids(vport, skb))
1865 goto nla_put_failure;
1866
850b6b3b 1867 err = ovs_vport_get_options(vport, skb);
f0fef760
BP
1868 if (err == -EMSGSIZE)
1869 goto error;
c19e6535 1870
23b48dc1
TG
1871 genlmsg_end(skb, ovs_header);
1872 return 0;
c19e6535
BP
1873
1874nla_put_failure:
1875 err = -EMSGSIZE;
f0fef760 1876error:
df2c07f4 1877 genlmsg_cancel(skb, ovs_header);
f0fef760 1878 return err;
064af421
BP
1879}
1880
d81eef1b
JR
1881static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1882{
1883 return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1884}
1885
1886/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
28aea917 1887struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
f14d8083 1888 u32 seq, u8 cmd)
064af421 1889{
c19e6535 1890 struct sk_buff *skb;
f0fef760 1891 int retval;
c19e6535 1892
f0fef760 1893 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
c19e6535
BP
1894 if (!skb)
1895 return ERR_PTR(-ENOMEM);
1896
28aea917 1897 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
c25ea534
JG
1898 BUG_ON(retval < 0);
1899
c19e6535 1900 return skb;
f0fef760 1901}
c19e6535 1902
cd2a59e9 1903/* Called with ovs_mutex or RCU read lock. */
2a4999f3 1904static struct vport *lookup_vport(struct net *net,
f1f60b85 1905 const struct ovs_header *ovs_header,
df2c07f4 1906 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
c19e6535
BP
1907{
1908 struct datapath *dp;
1909 struct vport *vport;
1910
df2c07f4 1911 if (a[OVS_VPORT_ATTR_NAME]) {
2a4999f3 1912 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
ed099e92 1913 if (!vport)
c19e6535 1914 return ERR_PTR(-ENODEV);
24ce832d
BP
1915 if (ovs_header->dp_ifindex &&
1916 ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1917 return ERR_PTR(-ENODEV);
c19e6535 1918 return vport;
df2c07f4
JP
1919 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1920 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
c19e6535
BP
1921
1922 if (port_no >= DP_MAX_PORTS)
f0fef760 1923 return ERR_PTR(-EFBIG);
c19e6535 1924
2a4999f3 1925 dp = get_dp(net, ovs_header->dp_ifindex);
c19e6535
BP
1926 if (!dp)
1927 return ERR_PTR(-ENODEV);
f2459fe7 1928
cd2a59e9 1929 vport = ovs_vport_ovsl_rcu(dp, port_no);
ed099e92 1930 if (!vport)
17535c57 1931 return ERR_PTR(-ENODEV);
c19e6535
BP
1932 return vport;
1933 } else
1934 return ERR_PTR(-EINVAL);
064af421
BP
1935}
1936
8ce37339
PS
1937/* Called with ovs_mutex */
1938static void update_headroom(struct datapath *dp)
1939{
1940 unsigned dev_headroom, max_headroom = 0;
1941 struct net_device *dev;
1942 struct vport *vport;
1943 int i;
1944
1945 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1946 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
1947 dev = vport->dev;
1948 dev_headroom = netdev_get_fwd_headroom(dev);
1949 if (dev_headroom > max_headroom)
1950 max_headroom = dev_headroom;
1951 }
1952 }
1953
1954 dp->max_headroom = max_headroom;
1955 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1956 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
1957 netdev_set_rx_headroom(vport->dev, max_headroom);
1958}
1959
df2c07f4 1960static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
c19e6535 1961{
f0fef760 1962 struct nlattr **a = info->attrs;
df2c07f4 1963 struct ovs_header *ovs_header = info->userhdr;
c19e6535 1964 struct vport_parms parms;
ed099e92 1965 struct sk_buff *reply;
c19e6535 1966 struct vport *vport;
c19e6535 1967 struct datapath *dp;
b0ec0f27 1968 u32 port_no;
c19e6535 1969 int err;
b0ec0f27 1970
ea36840f
BP
1971 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1972 !a[OVS_VPORT_ATTR_UPCALL_PID])
d81eef1b
JR
1973 return -EINVAL;
1974
1975 port_no = a[OVS_VPORT_ATTR_PORT_NO]
1976 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
1977 if (port_no >= DP_MAX_PORTS)
1978 return -EFBIG;
1979
1980 reply = ovs_vport_cmd_alloc_info();
1981 if (!reply)
1982 return -ENOMEM;
f0fef760 1983
cd2a59e9 1984 ovs_lock();
5a38795f 1985restart:
2a4999f3 1986 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
c19e6535
BP
1987 err = -ENODEV;
1988 if (!dp)
d81eef1b 1989 goto exit_unlock_free;
c19e6535 1990
d81eef1b 1991 if (port_no) {
cd2a59e9 1992 vport = ovs_vport_ovsl(dp, port_no);
c19e6535
BP
1993 err = -EBUSY;
1994 if (vport)
d81eef1b 1995 goto exit_unlock_free;
c19e6535
BP
1996 } else {
1997 for (port_no = 1; ; port_no++) {
1998 if (port_no >= DP_MAX_PORTS) {
1999 err = -EFBIG;
d81eef1b 2000 goto exit_unlock_free;
c19e6535 2001 }
cd2a59e9 2002 vport = ovs_vport_ovsl(dp, port_no);
c19e6535
BP
2003 if (!vport)
2004 break;
51d4d598 2005 }
064af421 2006 }
b0ec0f27 2007
df2c07f4
JP
2008 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2009 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2010 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
c19e6535
BP
2011 parms.dp = dp;
2012 parms.port_no = port_no;
beb1c69a 2013 parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
c19e6535
BP
2014
2015 vport = new_vport(&parms);
2016 err = PTR_ERR(vport);
5a38795f
TG
2017 if (IS_ERR(vport)) {
2018 if (err == -EAGAIN)
2019 goto restart;
d81eef1b 2020 goto exit_unlock_free;
5a38795f 2021 }
c19e6535 2022
d81eef1b
JR
2023 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2024 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2025 BUG_ON(err < 0);
8ce37339
PS
2026
2027 if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
2028 update_headroom(dp);
2029 else
2030 netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2031
d81eef1b 2032 ovs_unlock();
e297c6b7 2033
cb25142c 2034 ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
d81eef1b 2035 return 0;
c19e6535 2036
d81eef1b 2037exit_unlock_free:
cd2a59e9 2038 ovs_unlock();
d81eef1b 2039 kfree_skb(reply);
c19e6535 2040 return err;
44e05eca
BP
2041}
2042
df2c07f4 2043static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
44e05eca 2044{
f0fef760
BP
2045 struct nlattr **a = info->attrs;
2046 struct sk_buff *reply;
c19e6535 2047 struct vport *vport;
c19e6535 2048 int err;
44e05eca 2049
d81eef1b
JR
2050 reply = ovs_vport_cmd_alloc_info();
2051 if (!reply)
2052 return -ENOMEM;
2053
cd2a59e9 2054 ovs_lock();
2a4999f3 2055 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
c19e6535
BP
2056 err = PTR_ERR(vport);
2057 if (IS_ERR(vport))
d81eef1b 2058 goto exit_unlock_free;
44e05eca 2059
6455100f 2060 if (a[OVS_VPORT_ATTR_TYPE] &&
17ec1d04 2061 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
4879d4c7 2062 err = -EINVAL;
d81eef1b 2063 goto exit_unlock_free;
c25ea534
JG
2064 }
2065
17ec1d04 2066 if (a[OVS_VPORT_ATTR_OPTIONS]) {
850b6b3b 2067 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
17ec1d04 2068 if (err)
d81eef1b 2069 goto exit_unlock_free;
17ec1d04 2070 }
1fc7083d 2071
beb1c69a 2072 if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
7d16c847
PS
2073 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2074
2075 err = ovs_vport_set_upcall_portids(vport, ids);
beb1c69a
AW
2076 if (err)
2077 goto exit_unlock_free;
2078 }
c19e6535 2079
c25ea534
JG
2080 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2081 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2082 BUG_ON(err < 0);
cd2a59e9 2083 ovs_unlock();
d81eef1b 2084
cb25142c 2085 ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
c25ea534
JG
2086 return 0;
2087
d81eef1b 2088exit_unlock_free:
cd2a59e9 2089 ovs_unlock();
d81eef1b 2090 kfree_skb(reply);
c19e6535 2091 return err;
064af421
BP
2092}
2093
df2c07f4 2094static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
7c40efc9 2095{
8ce37339 2096 bool must_update_headroom = false;
f0fef760
BP
2097 struct nlattr **a = info->attrs;
2098 struct sk_buff *reply;
8ce37339 2099 struct datapath *dp;
c19e6535 2100 struct vport *vport;
c19e6535
BP
2101 int err;
2102
d81eef1b
JR
2103 reply = ovs_vport_cmd_alloc_info();
2104 if (!reply)
2105 return -ENOMEM;
2106
cd2a59e9 2107 ovs_lock();
2a4999f3 2108 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
c19e6535 2109 err = PTR_ERR(vport);
f0fef760 2110 if (IS_ERR(vport))
d81eef1b 2111 goto exit_unlock_free;
c19e6535 2112
df2c07f4 2113 if (vport->port_no == OVSP_LOCAL) {
f0fef760 2114 err = -EINVAL;
d81eef1b 2115 goto exit_unlock_free;
f0fef760
BP
2116 }
2117
d81eef1b
JR
2118 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2119 info->snd_seq, 0, OVS_VPORT_CMD_DEL);
2120 BUG_ON(err < 0);
8ce37339
PS
2121
2122 /* the vport deletion may trigger dp headroom update */
2123 dp = vport->dp;
2124 if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2125 must_update_headroom = true;
2126 netdev_reset_rx_headroom(vport->dev);
850b6b3b 2127 ovs_dp_detach_port(vport);
8ce37339
PS
2128
2129 if (must_update_headroom)
2130 update_headroom(dp);
2131
d81eef1b 2132 ovs_unlock();
f0fef760 2133
cb25142c 2134 ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
d81eef1b 2135 return 0;
f0fef760 2136
d81eef1b 2137exit_unlock_free:
cd2a59e9 2138 ovs_unlock();
d81eef1b 2139 kfree_skb(reply);
c19e6535 2140 return err;
7c40efc9
BP
2141}
2142
df2c07f4 2143static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
7c40efc9 2144{
f0fef760 2145 struct nlattr **a = info->attrs;
df2c07f4 2146 struct ovs_header *ovs_header = info->userhdr;
ed099e92 2147 struct sk_buff *reply;
c19e6535 2148 struct vport *vport;
c19e6535
BP
2149 int err;
2150
d81eef1b
JR
2151 reply = ovs_vport_cmd_alloc_info();
2152 if (!reply)
2153 return -ENOMEM;
2154
ed099e92 2155 rcu_read_lock();
2a4999f3 2156 vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
c19e6535
BP
2157 err = PTR_ERR(vport);
2158 if (IS_ERR(vport))
d81eef1b
JR
2159 goto exit_unlock_free;
2160 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2161 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2162 BUG_ON(err < 0);
df2fa9b5
JG
2163 rcu_read_unlock();
2164
2165 return genlmsg_reply(reply, info);
ed099e92 2166
d81eef1b 2167exit_unlock_free:
ed099e92 2168 rcu_read_unlock();
d81eef1b 2169 kfree_skb(reply);
c19e6535
BP
2170 return err;
2171}
2172
df2c07f4 2173static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
c19e6535 2174{
df2c07f4 2175 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
c19e6535 2176 struct datapath *dp;
95b1d73a
PS
2177 int bucket = cb->args[0], skip = cb->args[1];
2178 int i, j = 0;
c19e6535 2179
03fc2881 2180 rcu_read_lock();
01ac0970 2181 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
03fc2881
JR
2182 if (!dp) {
2183 rcu_read_unlock();
f0fef760 2184 return -ENODEV;
03fc2881 2185 }
95b1d73a 2186 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
ed099e92 2187 struct vport *vport;
95b1d73a
PS
2188
2189 j = 0;
f8dfbcb7 2190 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
95b1d73a
PS
2191 if (j >= skip &&
2192 ovs_vport_cmd_fill_info(vport, skb,
28aea917 2193 NETLINK_CB(cb->skb).portid,
95b1d73a
PS
2194 cb->nlh->nlmsg_seq,
2195 NLM_F_MULTI,
2196 OVS_VPORT_CMD_NEW) < 0)
2197 goto out;
2198
2199 j++;
2200 }
2201 skip = 0;
c19e6535 2202 }
95b1d73a 2203out:
ed099e92 2204 rcu_read_unlock();
c19e6535 2205
95b1d73a
PS
2206 cb->args[0] = i;
2207 cb->args[1] = j;
f0fef760 2208
95b1d73a 2209 return skb->len;
7c40efc9
BP
2210}
2211
cb25142c
PS
2212static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2213 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2214 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2215 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2216 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2217 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2218 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2219};
2220
18fd3a52 2221static struct genl_ops dp_vport_genl_ops[] = {
df2c07f4 2222 { .cmd = OVS_VPORT_CMD_NEW,
a6a8674d 2223 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
f0fef760 2224 .policy = vport_policy,
df2c07f4 2225 .doit = ovs_vport_cmd_new
f0fef760 2226 },
df2c07f4 2227 { .cmd = OVS_VPORT_CMD_DEL,
a6a8674d 2228 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
f0fef760 2229 .policy = vport_policy,
df2c07f4 2230 .doit = ovs_vport_cmd_del
f0fef760 2231 },
df2c07f4 2232 { .cmd = OVS_VPORT_CMD_GET,
f0fef760
BP
2233 .flags = 0, /* OK for unprivileged users. */
2234 .policy = vport_policy,
df2c07f4
JP
2235 .doit = ovs_vport_cmd_get,
2236 .dumpit = ovs_vport_cmd_dump
f0fef760 2237 },
df2c07f4 2238 { .cmd = OVS_VPORT_CMD_SET,
a6a8674d 2239 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
f0fef760 2240 .policy = vport_policy,
df2c07f4 2241 .doit = ovs_vport_cmd_set,
f0fef760
BP
2242 },
2243};
2244
ba63fe26 2245struct genl_family dp_vport_genl_family __ro_after_init = {
cb25142c
PS
2246 .hdrsize = sizeof(struct ovs_header),
2247 .name = OVS_VPORT_FAMILY,
2248 .version = OVS_VPORT_VERSION,
2249 .maxattr = OVS_VPORT_ATTR_MAX,
2250 .netnsok = true,
2251 .parallel_ops = true,
2252 .ops = dp_vport_genl_ops,
2253 .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
2254 .mcgrps = &ovs_dp_vport_multicast_group,
2255 .n_mcgrps = 1,
ba63fe26 2256 .module = THIS_MODULE,
982b8810 2257};
ed099e92 2258
18fd3a52 2259static struct genl_family *dp_genl_families[] = {
cb25142c
PS
2260 &dp_datapath_genl_family,
2261 &dp_vport_genl_family,
2262 &dp_flow_genl_family,
2263 &dp_packet_genl_family,
982b8810 2264};
ed099e92 2265
982b8810
BP
2266static void dp_unregister_genl(int n_families)
2267{
2268 int i;
ed099e92 2269
b867ca75 2270 for (i = 0; i < n_families; i++)
cb25142c 2271 genl_unregister_family(dp_genl_families[i]);
ed099e92
BP
2272}
2273
ba63fe26 2274static int __init dp_register_genl(void)
064af421 2275{
982b8810
BP
2276 int err;
2277 int i;
064af421 2278
982b8810 2279 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
064af421 2280
cb25142c 2281 err = genl_register_family(dp_genl_families[i]);
982b8810
BP
2282 if (err)
2283 goto error;
982b8810 2284 }
9cc8b4e4 2285
982b8810 2286 return 0;
064af421
BP
2287
2288error:
cb25142c 2289 dp_unregister_genl(i);
982b8810 2290 return err;
064af421
BP
2291}
2292
2a4999f3
PS
2293static int __net_init ovs_init_net(struct net *net)
2294{
2295 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2296
2297 INIT_LIST_HEAD(&ovs_net->dps);
cd2a59e9 2298 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
038e34ab 2299 ovs_ct_init(net);
7f4a5d68 2300 ovs_netns_frags_init(net);
2301 ovs_netns_frags6_init(net);
2a4999f3
PS
2302 return 0;
2303}
2304
cabd5516
PS
2305static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2306 struct list_head *head)
2a4999f3
PS
2307{
2308 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
cabd5516
PS
2309 struct datapath *dp;
2310
2311 list_for_each_entry(dp, &ovs_net->dps, list_node) {
2312 int i;
2313
2314 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2315 struct vport *vport;
2316
2317 hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
cabd5516
PS
2318
2319 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2320 continue;
2321
e23775f2 2322 if (dev_net(vport->dev) == dnet)
cabd5516
PS
2323 list_add(&vport->detach_list, head);
2324 }
2325 }
2326 }
2327}
2328
2329static void __net_exit ovs_exit_net(struct net *dnet)
2330{
2331 struct datapath *dp, *dp_next;
2332 struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2333 struct vport *vport, *vport_next;
2334 struct net *net;
2335 LIST_HEAD(head);
2a4999f3 2336
7f4a5d68 2337 ovs_netns_frags6_exit(dnet);
2338 ovs_netns_frags_exit(dnet);
038e34ab 2339 ovs_ct_exit(dnet);
cd2a59e9
PS
2340 ovs_lock();
2341 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2342 __dp_destroy(dp);
cabd5516
PS
2343
2344 rtnl_lock();
2345 for_each_net(net)
2346 list_vports_from_net(net, dnet, &head);
2347 rtnl_unlock();
2348
2349 /* Detach all vports from given namespace. */
2350 list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2351 list_del(&vport->detach_list);
2352 ovs_dp_detach_port(vport);
2353 }
2354
cd2a59e9
PS
2355 ovs_unlock();
2356
2357 cancel_work_sync(&ovs_net->dp_notify_work);
2a4999f3
PS
2358}
2359
2360static struct pernet_operations ovs_net_ops = {
2361 .init = ovs_init_net,
2362 .exit = ovs_exit_net,
2363 .id = &ovs_net_id,
2364 .size = sizeof(struct ovs_net),
2365};
2366
22d24ebf
BP
2367static int __init dp_init(void)
2368{
2369 int err;
2370
f3d85db3 2371 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
22d24ebf 2372
26bfaeaa 2373 pr_info("Open vSwitch switching datapath %s\n", VERSION);
064af421 2374
595e069a
JS
2375 err = action_fifos_init();
2376 if (err)
7f4a5d68 2377 goto error;
595e069a 2378
5282e284 2379 err = ovs_internal_dev_rtnl_link_register();
2c8c4fb7
AZ
2380 if (err)
2381 goto error_action_fifos_exit;
2382
5282e284
TG
2383 err = ovs_flow_init();
2384 if (err)
2385 goto error_unreg_rtnl_link;
2386
850b6b3b 2387 err = ovs_vport_init();
064af421
BP
2388 if (err)
2389 goto error_flow_exit;
2390
2a4999f3 2391 err = register_pernet_device(&ovs_net_ops);
f2459fe7
JG
2392 if (err)
2393 goto error_vport_exit;
2394
7f4a5d68 2395 err = compat_init();
2a4999f3
PS
2396 if (err)
2397 goto error_netns_exit;
2398
7f4a5d68 2399 err = register_netdevice_notifier(&ovs_dp_device_notifier);
2400 if (err)
2401 goto error_compat_exit;
2402
5a38795f
TG
2403 err = ovs_netdev_init();
2404 if (err)
2405 goto error_unreg_notifier;
2406
982b8810
BP
2407 err = dp_register_genl();
2408 if (err < 0)
5a38795f 2409 goto error_unreg_netdev;
982b8810 2410
064af421
BP
2411 return 0;
2412
5a38795f
TG
2413error_unreg_netdev:
2414 ovs_netdev_exit();
064af421 2415error_unreg_notifier:
850b6b3b 2416 unregister_netdevice_notifier(&ovs_dp_device_notifier);
7f4a5d68 2417error_compat_exit:
2418 compat_exit();
2a4999f3
PS
2419error_netns_exit:
2420 unregister_pernet_device(&ovs_net_ops);
f2459fe7 2421error_vport_exit:
850b6b3b 2422 ovs_vport_exit();
064af421 2423error_flow_exit:
850b6b3b 2424 ovs_flow_exit();
5282e284
TG
2425error_unreg_rtnl_link:
2426 ovs_internal_dev_rtnl_link_unregister();
2c8c4fb7
AZ
2427error_action_fifos_exit:
2428 action_fifos_exit();
064af421
BP
2429error:
2430 return err;
2431}
2432
2433static void dp_cleanup(void)
2434{
982b8810 2435 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
5a38795f 2436 ovs_netdev_exit();
850b6b3b 2437 unregister_netdevice_notifier(&ovs_dp_device_notifier);
7f4a5d68 2438 compat_exit();
2a4999f3
PS
2439 unregister_pernet_device(&ovs_net_ops);
2440 rcu_barrier();
850b6b3b
JG
2441 ovs_vport_exit();
2442 ovs_flow_exit();
5282e284 2443 ovs_internal_dev_rtnl_link_unregister();
2c8c4fb7 2444 action_fifos_exit();
064af421
BP
2445}
2446
2447module_init(dp_init);
2448module_exit(dp_cleanup);
2449
2450MODULE_DESCRIPTION("Open vSwitch switching datapath");
2451MODULE_LICENSE("GPL");
3d0666d2 2452MODULE_VERSION(VERSION);
75e2077e
TLSC
2453MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2454MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2455MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2456MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);