]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/datapath.c
ipfix: Export user specified virtual observation ID
[mirror_ovs.git] / datapath / datapath.c
CommitLineData
064af421 1/*
e23775f2 2 * Copyright (c) 2007-2015 Nicira, Inc.
a14bc59f 3 *
a9a29d22
JG
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
064af421
BP
17 */
18
dfffaef1
JP
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
064af421
BP
21#include <linux/init.h>
22#include <linux/module.h>
064af421 23#include <linux/if_arp.h>
064af421
BP
24#include <linux/if_vlan.h>
25#include <linux/in.h>
26#include <linux/ip.h>
982b8810 27#include <linux/jhash.h>
064af421
BP
28#include <linux/delay.h>
29#include <linux/time.h>
30#include <linux/etherdevice.h>
ed099e92 31#include <linux/genetlink.h>
064af421
BP
32#include <linux/kernel.h>
33#include <linux/kthread.h>
064af421
BP
34#include <linux/mutex.h>
35#include <linux/percpu.h>
36#include <linux/rcupdate.h>
37#include <linux/tcp.h>
38#include <linux/udp.h>
39#include <linux/version.h>
40#include <linux/ethtool.h>
064af421 41#include <linux/wait.h>
064af421 42#include <asm/div64.h>
656a0e37 43#include <linux/highmem.h>
064af421
BP
44#include <linux/netfilter_bridge.h>
45#include <linux/netfilter_ipv4.h>
46#include <linux/inetdevice.h>
47#include <linux/list.h>
077257b8 48#include <linux/openvswitch.h>
064af421 49#include <linux/rculist.h>
064af421 50#include <linux/dmi.h>
36956a7d 51#include <net/genetlink.h>
2a4999f3
PS
52#include <net/net_namespace.h>
53#include <net/netns/generic.h>
064af421 54
064af421 55#include "datapath.h"
038e34ab 56#include "conntrack.h"
064af421 57#include "flow.h"
d103f479 58#include "flow_table.h"
a097c0b2 59#include "flow_netlink.h"
e23775f2 60#include "gso.h"
f2459fe7 61#include "vport-internal_dev.h"
d5de5b0d 62#include "vport-netdev.h"
064af421 63
2a4999f3 64int ovs_net_id __read_mostly;
5a38795f 65EXPORT_SYMBOL_GPL(ovs_net_id);
2a4999f3 66
cb25142c
PS
67static struct genl_family dp_packet_genl_family;
68static struct genl_family dp_flow_genl_family;
69static struct genl_family dp_datapath_genl_family;
70
bc619e29
JS
71static const struct nla_policy flow_policy[];
72
18fd3a52
PS
73static struct genl_multicast_group ovs_dp_flow_multicast_group = {
74 .name = OVS_FLOW_MCGROUP
cb25142c
PS
75};
76
18fd3a52
PS
77static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
78 .name = OVS_DATAPATH_MCGROUP
cb25142c
PS
79};
80
18fd3a52
PS
81struct genl_multicast_group ovs_dp_vport_multicast_group = {
82 .name = OVS_VPORT_MCGROUP
cb25142c
PS
83};
84
afad3556 85/* Check if need to build a reply message.
af465b67
PS
86 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply.
87 */
114fce23
SG
88static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
89 unsigned int group)
afad3556
JR
90{
91 return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
6233a1bd 92 genl_has_listeners(family, genl_info_net(info), group);
afad3556
JR
93}
94
18fd3a52 95static void ovs_notify(struct genl_family *family, struct genl_multicast_group *grp,
cb25142c 96 struct sk_buff *skb, struct genl_info *info)
e297c6b7 97{
cb25142c
PS
98 genl_notify(family, skb, genl_info_net(info),
99 info->snd_portid, GROUP_ID(grp), info->nlhdr, GFP_KERNEL);
e297c6b7
TG
100}
101
ed099e92
BP
102/**
103 * DOC: Locking:
064af421 104 *
cd2a59e9
PS
105 * All writes e.g. Writes to device state (add/remove datapath, port, set
106 * operations on vports, etc.), Writes to other state (flow table
107 * modifications, set miscellaneous datapath parameters, etc.) are protected
108 * by ovs_lock.
ed099e92
BP
109 *
110 * Reads are protected by RCU.
111 *
112 * There are a few special cases (mostly stats) that have their own
113 * synchronization but they nest under all of above and don't interact with
114 * each other.
cd2a59e9
PS
115 *
116 * The RTNL lock nests inside ovs_mutex.
064af421 117 */
ed099e92 118
cd2a59e9
PS
119static DEFINE_MUTEX(ovs_mutex);
120
121void ovs_lock(void)
122{
123 mutex_lock(&ovs_mutex);
124}
125
126void ovs_unlock(void)
127{
128 mutex_unlock(&ovs_mutex);
129}
130
131#ifdef CONFIG_LOCKDEP
132int lockdep_ovsl_is_held(void)
133{
134 if (debug_locks)
135 return lockdep_is_held(&ovs_mutex);
136 else
137 return 1;
138}
5a38795f 139EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
cd2a59e9
PS
140#endif
141
5ae440c3 142static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
f1f60b85
TG
143 const struct sw_flow_key *,
144 const struct dp_upcall_info *);
5ae440c3 145static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
7d16c847 146 const struct sw_flow_key *,
7257b535 147 const struct dp_upcall_info *);
064af421 148
01ac0970
AZ
149/* Must be called with rcu_read_lock. */
150static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
064af421 151{
01ac0970 152 struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
ed099e92 153
254f2dc8 154 if (dev) {
850b6b3b 155 struct vport *vport = ovs_internal_dev_get_vport(dev);
254f2dc8 156 if (vport)
01ac0970 157 return vport->dp;
254f2dc8 158 }
01ac0970
AZ
159
160 return NULL;
161}
162
163/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
af465b67
PS
164 * returned dp pointer valid.
165 */
01ac0970
AZ
166static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
167{
168 struct datapath *dp;
169
170 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
171 rcu_read_lock();
172 dp = get_dp_rcu(net, dp_ifindex);
254f2dc8
BP
173 rcu_read_unlock();
174
175 return dp;
064af421 176}
064af421 177
cd2a59e9 178/* Must be called with rcu_read_lock or ovs_mutex. */
850b6b3b 179const char *ovs_dp_name(const struct datapath *dp)
f2459fe7 180{
cd2a59e9 181 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
e23775f2 182 return ovs_vport_name(vport);
f2459fe7
JG
183}
184
f1f60b85 185static int get_dpifindex(const struct datapath *dp)
99769a40
JG
186{
187 struct vport *local;
188 int ifindex;
189
190 rcu_read_lock();
191
95b1d73a 192 local = ovs_vport_rcu(dp, OVSP_LOCAL);
99769a40 193 if (local)
e23775f2 194 ifindex = local->dev->ifindex;
99769a40
JG
195 else
196 ifindex = 0;
197
198 rcu_read_unlock();
199
200 return ifindex;
201}
202
46c6a11d
JG
203static void destroy_dp_rcu(struct rcu_head *rcu)
204{
205 struct datapath *dp = container_of(rcu, struct datapath, rcu);
46c6a11d 206
e379e4d1 207 ovs_flow_tbl_destroy(&dp->table);
46c6a11d 208 free_percpu(dp->stats_percpu);
95b1d73a 209 kfree(dp->ports);
5ca1ba48 210 kfree(dp);
46c6a11d
JG
211}
212
95b1d73a
PS
213static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
214 u16 port_no)
215{
216 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
217}
218
aa917006 219/* Called with ovs_mutex or RCU read lock. */
95b1d73a
PS
220struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
221{
222 struct vport *vport;
95b1d73a
PS
223 struct hlist_head *head;
224
225 head = vport_hash_bucket(dp, port_no);
f8dfbcb7 226 hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
95b1d73a
PS
227 if (vport->port_no == port_no)
228 return vport;
229 }
230 return NULL;
231}
232
cd2a59e9 233/* Called with ovs_mutex. */
c19e6535 234static struct vport *new_vport(const struct vport_parms *parms)
064af421 235{
f2459fe7 236 struct vport *vport;
f2459fe7 237
850b6b3b 238 vport = ovs_vport_add(parms);
c19e6535
BP
239 if (!IS_ERR(vport)) {
240 struct datapath *dp = parms->dp;
95b1d73a 241 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
064af421 242
95b1d73a 243 hlist_add_head_rcu(&vport->dp_hash_node, head);
c19e6535 244 }
c19e6535 245 return vport;
064af421
BP
246}
247
850b6b3b 248void ovs_dp_detach_port(struct vport *p)
064af421 249{
cd2a59e9 250 ASSERT_OVSL();
064af421 251
064af421 252 /* First drop references to device. */
95b1d73a 253 hlist_del_rcu(&p->dp_hash_node);
f2459fe7 254
7237e4f4 255 /* Then destroy it. */
850b6b3b 256 ovs_vport_del(p);
064af421
BP
257}
258
fb66fbd1 259/* Must be called with rcu_read_lock. */
e74d4817 260void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
064af421 261{
a6059080 262 const struct vport *p = OVS_CB(skb)->input_vport;
064af421 263 struct datapath *dp = p->dp;
3544358a 264 struct sw_flow *flow;
ad50cb60 265 struct sw_flow_actions *sf_acts;
064af421 266 struct dp_stats_percpu *stats;
e9141eec 267 u64 *stats_counter;
4fa72a95 268 u32 n_mask_hit;
064af421 269
70dbc259 270 stats = this_cpu_ptr(dp->stats_percpu);
a063b0df 271
52a23d92 272 /* Look up flow. */
e74d4817 273 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
5604935e 274 &n_mask_hit);
52a23d92
JG
275 if (unlikely(!flow)) {
276 struct dp_upcall_info upcall;
a7d607c5 277 int error;
52a23d92 278
0e469d3b 279 memset(&upcall, 0, sizeof(upcall));
52a23d92 280 upcall.cmd = OVS_PACKET_CMD_MISS;
beb1c69a 281 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
a94ebc39 282 upcall.mru = OVS_CB(skb)->mru;
e74d4817 283 error = ovs_dp_upcall(dp, skb, key, &upcall);
a7d607c5
LR
284 if (unlikely(error))
285 kfree_skb(skb);
286 else
287 consume_skb(skb);
52a23d92
JG
288 stats_counter = &stats->n_missed;
289 goto out;
290 }
291
e74d4817 292 ovs_flow_stats_update(flow, key->tp.flags, skb);
ad50cb60 293 sf_acts = rcu_dereference(flow->sf_acts);
7d16c847
PS
294 ovs_execute_actions(dp, skb, sf_acts, key);
295
b0b906cc 296 stats_counter = &stats->n_hit;
55574bb0 297
8819fac7 298out:
55574bb0 299 /* Update datapath statistics. */
b81deb15 300 u64_stats_update_begin(&stats->syncp);
e9141eec 301 (*stats_counter)++;
4fa72a95 302 stats->n_mask_hit += n_mask_hit;
b81deb15 303 u64_stats_update_end(&stats->syncp);
064af421
BP
304}
305
850b6b3b 306int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
f1f60b85 307 const struct sw_flow_key *key,
850b6b3b 308 const struct dp_upcall_info *upcall_info)
aa5a8fdc
JG
309{
310 struct dp_stats_percpu *stats;
311 int err;
312
28aea917 313 if (upcall_info->portid == 0) {
b063d9f0 314 err = -ENOTCONN;
b063d9f0
JG
315 goto err;
316 }
317
7257b535 318 if (!skb_is_gso(skb))
e74d4817 319 err = queue_userspace_packet(dp, skb, key, upcall_info);
7257b535 320 else
e74d4817 321 err = queue_gso_packets(dp, skb, key, upcall_info);
d76195db
JG
322 if (err)
323 goto err;
324
325 return 0;
aa5a8fdc 326
aa5a8fdc 327err:
70dbc259 328 stats = this_cpu_ptr(dp->stats_percpu);
aa5a8fdc 329
b81deb15 330 u64_stats_update_begin(&stats->syncp);
aa5a8fdc 331 stats->n_lost++;
b81deb15 332 u64_stats_update_end(&stats->syncp);
aa5a8fdc 333
aa5a8fdc 334 return err;
982b8810
BP
335}
336
5ae440c3 337static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
f1f60b85 338 const struct sw_flow_key *key,
7257b535 339 const struct dp_upcall_info *upcall_info)
cb5087ca 340{
d4cba1f8 341 unsigned short gso_type = skb_shinfo(skb)->gso_type;
7257b535
BP
342 struct sw_flow_key later_key;
343 struct sk_buff *segs, *nskb;
b2a23c4e 344 struct ovs_skb_cb ovs_cb;
7257b535 345 int err;
cb5087ca 346
b2a23c4e 347 ovs_cb = *OVS_CB(skb);
1d04cd4e 348 segs = __skb_gso_segment(skb, NETIF_F_SG, false);
b2a23c4e 349 *OVS_CB(skb) = ovs_cb;
79089764
PS
350 if (IS_ERR(segs))
351 return PTR_ERR(segs);
d1da7669
PS
352 if (segs == NULL)
353 return -EINVAL;
99769a40 354
9b277b39 355 if (gso_type & SKB_GSO_UDP) {
c135bba1 356 /* The initial flow key extracted by ovs_flow_key_extract()
9b277b39
PS
357 * in this case is for a first fragment, so we need to
358 * properly mark later fragments.
359 */
e74d4817 360 later_key = *key;
9b277b39
PS
361 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
362 }
363
7257b535
BP
364 /* Queue all of the segments. */
365 skb = segs;
cb5087ca 366 do {
b2a23c4e 367 *OVS_CB(skb) = ovs_cb;
9b277b39 368 if (gso_type & SKB_GSO_UDP && skb != segs)
e74d4817 369 key = &later_key;
9b277b39 370
e74d4817 371 err = queue_userspace_packet(dp, skb, key, upcall_info);
982b8810 372 if (err)
7257b535 373 break;
856081f6 374
36ce148c 375 } while ((skb = skb->next));
cb5087ca 376
7257b535
BP
377 /* Free all of the segments. */
378 skb = segs;
379 do {
380 nskb = skb->next;
381 if (err)
382 kfree_skb(skb);
383 else
384 consume_skb(skb);
385 } while ((skb = nskb));
386 return err;
387}
388
8b7ea2d4 389static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
533bea51 390 unsigned int hdrlen)
0afa2373
TG
391{
392 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
533bea51 393 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
4e25b8c1 394 + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
0afa2373
TG
395
396 /* OVS_PACKET_ATTR_USERDATA */
8b7ea2d4
WZ
397 if (upcall_info->userdata)
398 size += NLA_ALIGN(upcall_info->userdata->nla_len);
399
400 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
401 if (upcall_info->egress_tun_info)
402 size += nla_total_size(ovs_tun_key_attr_size());
0afa2373 403
0e469d3b
NM
404 /* OVS_PACKET_ATTR_ACTIONS */
405 if (upcall_info->actions_len)
406 size += nla_total_size(upcall_info->actions_len);
407
a94ebc39
JS
408 /* OVS_PACKET_ATTR_MRU */
409 if (upcall_info->mru)
410 size += nla_total_size(sizeof(upcall_info->mru));
411
0afa2373
TG
412 return size;
413}
414
a94ebc39
JS
415static void pad_packet(struct datapath *dp, struct sk_buff *skb)
416{
417 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
418 size_t plen = NLA_ALIGN(skb->len) - skb->len;
419
420 if (plen > 0)
421 memset(skb_put(skb, plen), 0, plen);
422 }
423}
424
5ae440c3 425static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
f1f60b85 426 const struct sw_flow_key *key,
7257b535
BP
427 const struct dp_upcall_info *upcall_info)
428{
429 struct ovs_header *upcall;
6161d3fd 430 struct sk_buff *nskb = NULL;
82706a6f 431 struct sk_buff *user_skb = NULL; /* to be queued to userspace */
7257b535 432 struct nlattr *nla;
68eadcf0 433 struct genl_info info = {
705e9260 434#ifdef HAVE_GENLMSG_NEW_UNICAST
5ae440c3 435 .dst_sk = ovs_dp_get_net(dp)->genl_sock,
68eadcf0
TG
436#endif
437 .snd_portid = upcall_info->portid,
438 };
978188b2 439 size_t len;
533bea51 440 unsigned int hlen;
5ae440c3
TG
441 int err, dp_ifindex;
442
443 dp_ifindex = get_dpifindex(dp);
444 if (!dp_ifindex)
445 return -ENODEV;
7257b535 446
efd8a18e 447 if (skb_vlan_tag_present(skb)) {
6161d3fd
JG
448 nskb = skb_clone(skb, GFP_ATOMIC);
449 if (!nskb)
450 return -ENOMEM;
07ac71ea 451
8063e095 452 nskb = __vlan_hwaccel_push_inside(nskb);
07ac71ea
PS
453 if (!nskb)
454 return -ENOMEM;
455
6161d3fd
JG
456 skb = nskb;
457 }
458
459 if (nla_attr_size(skb->len) > USHRT_MAX) {
460 err = -EFBIG;
461 goto out;
462 }
7257b535 463
533bea51
TG
464 /* Complete checksum if needed */
465 if (skb->ip_summed == CHECKSUM_PARTIAL &&
466 (err = skb_checksum_help(skb)))
467 goto out;
468
469 /* Older versions of OVS user space enforce alignment of the last
470 * Netlink attribute to NLA_ALIGNTO which would require extensive
471 * padding logic. Only perform zerocopy if padding is not required.
472 */
473 if (dp->user_features & OVS_DP_F_UNALIGNED)
474 hlen = skb_zerocopy_headlen(skb);
475 else
476 hlen = skb->len;
477
8b7ea2d4 478 len = upcall_msg_size(upcall_info, hlen);
68eadcf0 479 user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
6161d3fd
JG
480 if (!user_skb) {
481 err = -ENOMEM;
482 goto out;
483 }
7257b535
BP
484
485 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
486 0, upcall_info->cmd);
487 upcall->dp_ifindex = dp_ifindex;
488
db7f2238 489 err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
9a621f82 490 BUG_ON(err);
7257b535
BP
491
492 if (upcall_info->userdata)
e995e3df 493 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
462a988b 494 nla_len(upcall_info->userdata),
e995e3df 495 nla_data(upcall_info->userdata));
7257b535 496
e23775f2 497
8b7ea2d4
WZ
498 if (upcall_info->egress_tun_info) {
499 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
500 err = ovs_nla_put_egress_tunnel_key(user_skb,
e23775f2
PS
501 upcall_info->egress_tun_info,
502 upcall_info->egress_tun_opts);
8b7ea2d4
WZ
503 BUG_ON(err);
504 nla_nest_end(user_skb, nla);
505 }
506
0e469d3b
NM
507 if (upcall_info->actions_len) {
508 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
509 err = ovs_nla_put_actions(upcall_info->actions,
510 upcall_info->actions_len,
511 user_skb);
512 if (!err)
513 nla_nest_end(user_skb, nla);
514 else
515 nla_nest_cancel(user_skb, nla);
516 }
517
a94ebc39
JS
518 /* Add OVS_PACKET_ATTR_MRU */
519 if (upcall_info->mru) {
520 if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
521 upcall_info->mru)) {
522 err = -ENOBUFS;
523 goto out;
524 }
525 pad_packet(dp, user_skb);
526 }
527
533bea51 528 /* Only reserve room for attribute header, packet data is added
af465b67
PS
529 * in skb_zerocopy()
530 */
533bea51
TG
531 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
532 err = -ENOBUFS;
533 goto out;
534 }
535 nla->nla_len = nla_attr_size(skb->len);
bed53bd1 536
2c272bd9
ZK
537 err = skb_zerocopy(user_skb, skb, skb->len, hlen);
538 if (err)
539 goto out;
7257b535 540
ef507cec 541 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
a94ebc39 542 pad_packet(dp, user_skb);
ef507cec 543
533bea51 544 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
6161d3fd 545
533bea51 546 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
82706a6f 547 user_skb = NULL;
6161d3fd 548out:
2c272bd9
ZK
549 if (err)
550 skb_tx_error(skb);
82706a6f 551 kfree_skb(user_skb);
6161d3fd
JG
552 kfree_skb(nskb);
553 return err;
cb5087ca
BP
554}
555
df2c07f4 556static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
064af421 557{
df2c07f4 558 struct ovs_header *ovs_header = info->userhdr;
a94ebc39 559 struct net *net = sock_net(skb->sk);
982b8810 560 struct nlattr **a = info->attrs;
e0e57990 561 struct sw_flow_actions *acts;
982b8810 562 struct sk_buff *packet;
e0e57990 563 struct sw_flow *flow;
ad50cb60 564 struct sw_flow_actions *sf_acts;
f7cd0081 565 struct datapath *dp;
d6569377 566 struct ethhdr *eth;
a6059080 567 struct vport *input_vport;
a94ebc39 568 u16 mru = 0;
3f19d399 569 int len;
d6569377 570 int err;
2e460098 571 bool log = !a[OVS_PACKET_ATTR_PROBE];
064af421 572
f7cd0081 573 err = -EINVAL;
df2c07f4 574 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
7c3072cc 575 !a[OVS_PACKET_ATTR_ACTIONS])
e5cad958 576 goto err;
064af421 577
df2c07f4 578 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
3f19d399 579 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
f7cd0081
BP
580 err = -ENOMEM;
581 if (!packet)
e5cad958 582 goto err;
3f19d399
BP
583 skb_reserve(packet, NET_IP_ALIGN);
584
bf3d6fce 585 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
8d5ebd83 586
f7cd0081
BP
587 skb_reset_mac_header(packet);
588 eth = eth_hdr(packet);
064af421 589
d6569377
BP
590 /* Normally, setting the skb 'protocol' field would be handled by a
591 * call to eth_type_trans(), but it assumes there's a sending
af465b67
PS
592 * device, which we may not have.
593 */
935fc582 594 if (eth_proto_is_802_3(eth->h_proto))
f7cd0081 595 packet->protocol = eth->h_proto;
d6569377 596 else
f7cd0081 597 packet->protocol = htons(ETH_P_802_2);
d3c54451 598
a94ebc39
JS
599 /* Set packet's mru */
600 if (a[OVS_PACKET_ATTR_MRU]) {
601 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
602 packet->ignore_df = 1;
603 }
604 OVS_CB(packet)->mru = mru;
605
e0e57990 606 /* Build an sw_flow for sending this packet. */
df65fec1 607 flow = ovs_flow_alloc();
e0e57990
BP
608 err = PTR_ERR(flow);
609 if (IS_ERR(flow))
e5cad958 610 goto err_kfree_skb;
064af421 611
038e34ab
JS
612 err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
613 packet, &flow->key, log);
e0e57990 614 if (err)
9321954a 615 goto err_flow_free;
e0e57990 616
a94ebc39 617 err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
9233cef7 618 &flow->key, &acts, log);
9b405f1a
PS
619 if (err)
620 goto err_flow_free;
e0e57990 621
ff27161e 622 rcu_assign_pointer(flow->sf_acts, acts);
abff858b 623 packet->priority = flow->key.phy.priority;
3025a772 624 packet->mark = flow->key.phy.skb_mark;
e0e57990 625
d6569377 626 rcu_read_lock();
a94ebc39 627 dp = get_dp_rcu(net, ovs_header->dp_ifindex);
f7cd0081 628 err = -ENODEV;
e5cad958
BP
629 if (!dp)
630 goto err_unlock;
cc4015df 631
a6059080
AZ
632 input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
633 if (!input_vport)
634 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
635
636 if (!input_vport)
637 goto err_unlock;
638
e23775f2 639 packet->dev = input_vport->dev;
a6059080 640 OVS_CB(packet)->input_vport = input_vport;
ad50cb60 641 sf_acts = rcu_dereference(flow->sf_acts);
a6059080 642
e9141eec 643 local_bh_disable();
7d16c847 644 err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
e9141eec 645 local_bh_enable();
d6569377 646 rcu_read_unlock();
e0e57990 647
a1c564be 648 ovs_flow_free(flow, false);
e5cad958 649 return err;
064af421 650
e5cad958
BP
651err_unlock:
652 rcu_read_unlock();
9321954a 653err_flow_free:
a1c564be 654 ovs_flow_free(flow, false);
e5cad958
BP
655err_kfree_skb:
656 kfree_skb(packet);
657err:
d6569377 658 return err;
064af421
BP
659}
660
df2c07f4 661static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
7c3072cc 662 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
df2c07f4
JP
663 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
664 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
2e460098 665 [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
a94ebc39 666 [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
982b8810
BP
667};
668
18fd3a52 669static struct genl_ops dp_packet_genl_ops[] = {
df2c07f4 670 { .cmd = OVS_PACKET_CMD_EXECUTE,
982b8810
BP
671 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
672 .policy = packet_policy,
df2c07f4 673 .doit = ovs_packet_cmd_execute
982b8810
BP
674 }
675};
676
cb25142c
PS
677static struct genl_family dp_packet_genl_family = {
678 .id = GENL_ID_GENERATE,
679 .hdrsize = sizeof(struct ovs_header),
680 .name = OVS_PACKET_FAMILY,
681 .version = OVS_PACKET_VERSION,
682 .maxattr = OVS_PACKET_ATTR_MAX,
683 .netnsok = true,
684 .parallel_ops = true,
685 .ops = dp_packet_genl_ops,
686 .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
687};
688
f1f60b85 689static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
4fa72a95 690 struct ovs_dp_megaflow_stats *mega_stats)
064af421 691{
d6569377 692 int i;
f180c2e2 693
4fa72a95
AZ
694 memset(mega_stats, 0, sizeof(*mega_stats));
695
994dc286 696 stats->n_flows = ovs_flow_tbl_count(&dp->table);
4fa72a95 697 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
064af421 698
7257b535 699 stats->n_hit = stats->n_missed = stats->n_lost = 0;
4fa72a95 700
d6569377
BP
701 for_each_possible_cpu(i) {
702 const struct dp_stats_percpu *percpu_stats;
703 struct dp_stats_percpu local_stats;
821cb9fa 704 unsigned int start;
44e05eca 705
d6569377 706 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
064af421 707
d6569377 708 do {
b81deb15 709 start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
d6569377 710 local_stats = *percpu_stats;
b81deb15 711 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
064af421 712
d6569377
BP
713 stats->n_hit += local_stats.n_hit;
714 stats->n_missed += local_stats.n_missed;
715 stats->n_lost += local_stats.n_lost;
4fa72a95 716 mega_stats->n_mask_hit += local_stats.n_mask_hit;
d6569377
BP
717 }
718}
064af421 719
bc619e29
JS
720static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
721{
722 return ovs_identifier_is_ufid(sfid) &&
723 !(ufid_flags & OVS_UFID_F_OMIT_KEY);
724}
725
726static bool should_fill_mask(uint32_t ufid_flags)
727{
728 return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
729}
730
731static bool should_fill_actions(uint32_t ufid_flags)
0afa2373 732{
bc619e29
JS
733 return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
734}
735
736static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
737 const struct sw_flow_id *sfid,
738 uint32_t ufid_flags)
739{
740 size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
741
742 /* OVS_FLOW_ATTR_UFID */
743 if (sfid && ovs_identifier_is_ufid(sfid))
744 len += nla_total_size(sfid->ufid_len);
745
746 /* OVS_FLOW_ATTR_KEY */
747 if (!sfid || should_fill_key(sfid, ufid_flags))
748 len += nla_total_size(ovs_key_attr_size());
749
750 /* OVS_FLOW_ATTR_MASK */
751 if (should_fill_mask(ufid_flags))
752 len += nla_total_size(ovs_key_attr_size());
753
754 /* OVS_FLOW_ATTR_ACTIONS */
755 if (should_fill_actions(ufid_flags))
c3bb15b3 756 len += nla_total_size(acts->orig_len);
bc619e29
JS
757
758 return len
0afa2373
TG
759 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
760 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
bc619e29 761 + nla_total_size(8); /* OVS_FLOW_ATTR_USED */
0afa2373
TG
762}
763
f1948bb9
JS
764/* Called with ovs_mutex or RCU read lock. */
765static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
766 struct sk_buff *skb)
767{
768 struct ovs_flow_stats stats;
769 __be16 tcp_flags;
770 unsigned long used;
771
b0f3a2fe 772 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
f71db6b1 773
b0f3a2fe
PS
774 if (used &&
775 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
f1948bb9 776 return -EMSGSIZE;
d6569377 777
b0f3a2fe
PS
778 if (stats.n_packets &&
779 nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
f1948bb9 780 return -EMSGSIZE;
b0b906cc 781
b0f3a2fe
PS
782 if ((u8)ntohs(tcp_flags) &&
783 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
f1948bb9
JS
784 return -EMSGSIZE;
785
786 return 0;
787}
788
789/* Called with ovs_mutex or RCU read lock. */
790static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
791 struct sk_buff *skb, int skb_orig_len)
792{
793 struct nlattr *start;
794 int err;
d6569377 795
df2c07f4 796 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
30053024
BP
797 * this is the first flow to be dumped into 'skb'. This is unusual for
798 * Netlink but individual action lists can be longer than
799 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
800 * The userspace caller can always fetch the actions separately if it
801 * really wants them. (Most userspace callers in fact don't care.)
802 *
803 * This can only fail for dump operations because the skb is always
804 * properly sized for single flows.
805 */
9b405f1a 806 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
f6f481ee 807 if (start) {
f44ccce1
PS
808 const struct sw_flow_actions *sf_acts;
809
780ec6ae 810 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
a097c0b2
PS
811 err = ovs_nla_put_actions(sf_acts->actions,
812 sf_acts->actions_len, skb);
f71db6b1 813
0a25b039
BP
814 if (!err)
815 nla_nest_end(skb, start);
816 else {
817 if (skb_orig_len)
f1948bb9 818 return err;
0a25b039
BP
819
820 nla_nest_cancel(skb, start);
821 }
f1948bb9
JS
822 } else if (skb_orig_len) {
823 return -EMSGSIZE;
824 }
825
826 return 0;
827}
828
829/* Called with ovs_mutex or RCU read lock. */
2c622e5a 830static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
f1948bb9 831 struct sk_buff *skb, u32 portid,
bc619e29 832 u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
f1948bb9
JS
833{
834 const int skb_orig_len = skb->len;
835 struct ovs_header *ovs_header;
836 int err;
837
7d16c847
PS
838 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
839 flags, cmd);
f1948bb9
JS
840 if (!ovs_header)
841 return -EMSGSIZE;
7d16c847 842
f1948bb9
JS
843 ovs_header->dp_ifindex = dp_ifindex;
844
bc619e29 845 err = ovs_nla_put_identifier(flow, skb);
db7f2238
JS
846 if (err)
847 goto error;
848
bc619e29
JS
849 if (should_fill_key(&flow->id, ufid_flags)) {
850 err = ovs_nla_put_masked_key(flow, skb);
851 if (err)
852 goto error;
853 }
854
855 if (should_fill_mask(ufid_flags)) {
856 err = ovs_nla_put_mask(flow, skb);
857 if (err)
858 goto error;
859 }
f1948bb9
JS
860
861 err = ovs_flow_cmd_fill_stats(flow, skb);
862 if (err)
863 goto error;
864
bc619e29
JS
865 if (should_fill_actions(ufid_flags)) {
866 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
867 if (err)
868 goto error;
869 }
37a1300c 870
23b48dc1
TG
871 genlmsg_end(skb, ovs_header);
872 return 0;
d6569377 873
37a1300c 874error:
df2c07f4 875 genlmsg_cancel(skb, ovs_header);
d6569377 876 return err;
44e05eca
BP
877}
878
f71db6b1
JR
879/* May not be called with RCU read lock. */
880static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
bc619e29 881 const struct sw_flow_id *sfid,
afad3556 882 struct genl_info *info,
bc619e29
JS
883 bool always,
884 uint32_t ufid_flags)
44e05eca 885{
afad3556 886 struct sk_buff *skb;
bc619e29 887 size_t len;
d6569377 888
114fce23
SG
889 if (!always && !ovs_must_notify(&dp_flow_genl_family, info,
890 GROUP_ID(&ovs_dp_flow_multicast_group)))
afad3556
JR
891 return NULL;
892
bc619e29
JS
893 len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
894 skb = genlmsg_new_unicast(len, info, GFP_KERNEL);
afad3556
JR
895 if (!skb)
896 return ERR_PTR(-ENOMEM);
897
898 return skb;
37a1300c 899}
8d5ebd83 900
f71db6b1 901/* Called with ovs_mutex. */
7d16c847 902static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
f71db6b1
JR
903 int dp_ifindex,
904 struct genl_info *info, u8 cmd,
bc619e29 905 bool always, u32 ufid_flags)
37a1300c
BP
906{
907 struct sk_buff *skb;
908 int retval;
d6569377 909
bc619e29
JS
910 skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
911 &flow->id, info, always, ufid_flags);
a6ddcc9a 912 if (IS_ERR_OR_NULL(skb))
afad3556 913 return skb;
d6569377 914
2c622e5a 915 retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
f71db6b1 916 info->snd_portid, info->snd_seq, 0,
bc619e29 917 cmd, ufid_flags);
37a1300c 918 BUG_ON(retval < 0);
d6569377 919 return skb;
064af421
BP
920}
921
0c9fd022 922static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
064af421 923{
a94ebc39 924 struct net *net = sock_net(skb->sk);
37a1300c 925 struct nlattr **a = info->attrs;
df2c07f4 926 struct ovs_header *ovs_header = info->userhdr;
bc619e29 927 struct sw_flow *flow = NULL, *new_flow;
a1c564be 928 struct sw_flow_mask mask;
37a1300c 929 struct sk_buff *reply;
9c52546b 930 struct datapath *dp;
bc619e29 931 struct sw_flow_key key;
0c9fd022 932 struct sw_flow_actions *acts;
a1c564be 933 struct sw_flow_match match;
bc619e29 934 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
bc4a05c6 935 int error;
9233cef7 936 bool log = !a[OVS_FLOW_ATTR_PROBE];
064af421 937
6740b721 938 /* Must have key and actions. */
37a1300c 939 error = -EINVAL;
a473df5b 940 if (!a[OVS_FLOW_ATTR_KEY]) {
7d16c847 941 OVS_NLERR(log, "Flow key attr not present in new flow.");
37a1300c 942 goto error;
a473df5b
JG
943 }
944 if (!a[OVS_FLOW_ATTR_ACTIONS]) {
7d16c847 945 OVS_NLERR(log, "Flow actions attr not present in new flow.");
6740b721 946 goto error;
a473df5b 947 }
a1c564be 948
6740b721 949 /* Most of the time we need to allocate a new flow, do it before
af465b67
PS
950 * locking.
951 */
6740b721
JR
952 new_flow = ovs_flow_alloc();
953 if (IS_ERR(new_flow)) {
954 error = PTR_ERR(new_flow);
955 goto error;
956 }
957
958 /* Extract key. */
bc619e29 959 ovs_match_init(&match, &key, &mask);
038e34ab 960 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
9233cef7 961 a[OVS_FLOW_ATTR_MASK], log);
37a1300c 962 if (error)
6740b721 963 goto err_kfree_flow;
064af421 964
ad4adec2 965 ovs_flow_mask_key(&new_flow->key, &key, true, &mask);
bc619e29
JS
966
967 /* Extract flow identifier. */
968 error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
969 &key, log);
970 if (error)
971 goto err_kfree_flow;
9b405f1a 972
6740b721 973 /* Validate actions. */
a94ebc39
JS
974 error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
975 &new_flow->key, &acts, log);
0c9fd022 976 if (error) {
7d16c847 977 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
4f67b12a 978 goto err_kfree_flow;
6740b721
JR
979 }
980
bc619e29
JS
981 reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
982 ufid_flags);
6740b721
JR
983 if (IS_ERR(reply)) {
984 error = PTR_ERR(reply);
985 goto err_kfree_acts;
37a1300c
BP
986 }
987
cd2a59e9 988 ovs_lock();
a94ebc39 989 dp = get_dp(net, ovs_header->dp_ifindex);
6740b721
JR
990 if (unlikely(!dp)) {
991 error = -ENODEV;
cd2a59e9 992 goto err_unlock_ovs;
6740b721 993 }
bc619e29 994
a1c564be 995 /* Check if this is a duplicate flow */
bc619e29
JS
996 if (ovs_identifier_is_ufid(&new_flow->id))
997 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
998 if (!flow)
999 flow = ovs_flow_tbl_lookup(&dp->table, &key);
6740b721
JR
1000 if (likely(!flow)) {
1001 rcu_assign_pointer(new_flow->sf_acts, acts);
d6569377 1002
d6569377 1003 /* Put flow in bucket. */
6740b721
JR
1004 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
1005 if (unlikely(error)) {
0585f7a8 1006 acts = NULL;
6740b721
JR
1007 goto err_unlock_ovs;
1008 }
1009
1010 if (unlikely(reply)) {
2c622e5a 1011 error = ovs_flow_cmd_fill_info(new_flow,
6740b721
JR
1012 ovs_header->dp_ifindex,
1013 reply, info->snd_portid,
1014 info->snd_seq, 0,
bc619e29
JS
1015 OVS_FLOW_CMD_NEW,
1016 ufid_flags);
6740b721 1017 BUG_ON(error < 0);
0585f7a8 1018 }
6740b721 1019 ovs_unlock();
d6569377 1020 } else {
0c9fd022
JR
1021 struct sw_flow_actions *old_acts;
1022
d6569377
BP
1023 /* Bail out if we're not allowed to modify an existing flow.
1024 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1025 * because Generic Netlink treats the latter as a dump
1026 * request. We also accept NLM_F_EXCL in case that bug ever
1027 * gets fixed.
1028 */
6740b721
JR
1029 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1030 | NLM_F_EXCL))) {
1031 error = -EEXIST;
cd2a59e9 1032 goto err_unlock_ovs;
6740b721 1033 }
bc619e29
JS
1034 /* The flow identifier has to be the same for flow updates.
1035 * Look for any overlapping flow.
1036 */
1037 if (unlikely(!ovs_flow_cmp(flow, &match))) {
1038 if (ovs_identifier_is_key(&flow->id))
1039 flow = ovs_flow_tbl_lookup_exact(&dp->table,
1040 &match);
1041 else /* UFID matches but key is different */
1042 flow = NULL;
3440e4bc
AW
1043 if (!flow) {
1044 error = -ENOENT;
1045 goto err_unlock_ovs;
1046 }
6740b721 1047 }
0c9fd022
JR
1048 /* Update actions. */
1049 old_acts = ovsl_dereference(flow->sf_acts);
1050 rcu_assign_pointer(flow->sf_acts, acts);
0c9fd022 1051
6740b721 1052 if (unlikely(reply)) {
2c622e5a 1053 error = ovs_flow_cmd_fill_info(flow,
6740b721
JR
1054 ovs_header->dp_ifindex,
1055 reply, info->snd_portid,
1056 info->snd_seq, 0,
bc619e29
JS
1057 OVS_FLOW_CMD_NEW,
1058 ufid_flags);
6740b721
JR
1059 BUG_ON(error < 0);
1060 }
1061 ovs_unlock();
0c9fd022 1062
e23775f2 1063 ovs_nla_free_flow_actions_rcu(old_acts);
6740b721 1064 ovs_flow_free(new_flow, false);
0c9fd022 1065 }
6740b721
JR
1066
1067 if (reply)
cb25142c 1068 ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
0c9fd022
JR
1069 return 0;
1070
0c9fd022
JR
1071err_unlock_ovs:
1072 ovs_unlock();
6740b721
JR
1073 kfree_skb(reply);
1074err_kfree_acts:
e23775f2 1075 ovs_nla_free_flow_actions(acts);
6740b721
JR
1076err_kfree_flow:
1077 ovs_flow_free(new_flow, false);
0c9fd022
JR
1078error:
1079 return error;
1080}
1081
cc561abf 1082/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
a94ebc39
JS
1083static struct sw_flow_actions *get_flow_actions(struct net *net,
1084 const struct nlattr *a,
cc561abf 1085 const struct sw_flow_key *key,
9233cef7
JR
1086 const struct sw_flow_mask *mask,
1087 bool log)
cc561abf
PS
1088{
1089 struct sw_flow_actions *acts;
1090 struct sw_flow_key masked_key;
1091 int error;
1092
ad4adec2 1093 ovs_flow_mask_key(&masked_key, key, true, mask);
a94ebc39 1094 error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
cc561abf 1095 if (error) {
9233cef7 1096 OVS_NLERR(log,
7d16c847 1097 "Actions may not be safe on all matching packets");
cc561abf
PS
1098 return ERR_PTR(error);
1099 }
1100
1101 return acts;
1102}
1103
0c9fd022
JR
1104static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1105{
a94ebc39 1106 struct net *net = sock_net(skb->sk);
0c9fd022
JR
1107 struct nlattr **a = info->attrs;
1108 struct ovs_header *ovs_header = info->userhdr;
1d2a1b5f 1109 struct sw_flow_key key;
0c9fd022
JR
1110 struct sw_flow *flow;
1111 struct sw_flow_mask mask;
1112 struct sk_buff *reply = NULL;
1113 struct datapath *dp;
6740b721 1114 struct sw_flow_actions *old_acts = NULL, *acts = NULL;
0c9fd022 1115 struct sw_flow_match match;
bc619e29
JS
1116 struct sw_flow_id sfid;
1117 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
0c9fd022 1118 int error;
9233cef7 1119 bool log = !a[OVS_FLOW_ATTR_PROBE];
bc619e29 1120 bool ufid_present;
0c9fd022
JR
1121
1122 /* Extract key. */
1123 error = -EINVAL;
a473df5b 1124 if (!a[OVS_FLOW_ATTR_KEY]) {
9233cef7 1125 OVS_NLERR(log, "Flow key attribute not present in set flow.");
0c9fd022 1126 goto error;
a473df5b 1127 }
0c9fd022 1128
bc619e29 1129 ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
0c9fd022 1130 ovs_match_init(&match, &key, &mask);
038e34ab 1131 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
9233cef7 1132 a[OVS_FLOW_ATTR_MASK], log);
0c9fd022
JR
1133 if (error)
1134 goto error;
d6569377 1135
0c9fd022
JR
1136 /* Validate actions. */
1137 if (a[OVS_FLOW_ATTR_ACTIONS]) {
a94ebc39
JS
1138 acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key,
1139 &mask, log);
cc561abf
PS
1140 if (IS_ERR(acts)) {
1141 error = PTR_ERR(acts);
0c9fd022 1142 goto error;
6740b721 1143 }
6740b721 1144
ff27161e 1145 /* Can allocate before locking if have acts. */
bc619e29
JS
1146 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1147 ufid_flags);
6740b721
JR
1148 if (IS_ERR(reply)) {
1149 error = PTR_ERR(reply);
1150 goto err_kfree_acts;
90b8c2f7 1151 }
0c9fd022
JR
1152 }
1153
1154 ovs_lock();
a94ebc39 1155 dp = get_dp(net, ovs_header->dp_ifindex);
6740b721
JR
1156 if (unlikely(!dp)) {
1157 error = -ENODEV;
0c9fd022 1158 goto err_unlock_ovs;
6740b721 1159 }
0c9fd022 1160 /* Check that the flow exists. */
bc619e29
JS
1161 if (ufid_present)
1162 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1163 else
1164 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
6740b721
JR
1165 if (unlikely(!flow)) {
1166 error = -ENOENT;
0c9fd022 1167 goto err_unlock_ovs;
6740b721 1168 }
3440e4bc 1169
0c9fd022 1170 /* Update actions, if present. */
6740b721 1171 if (likely(acts)) {
0c9fd022
JR
1172 old_acts = ovsl_dereference(flow->sf_acts);
1173 rcu_assign_pointer(flow->sf_acts, acts);
6740b721
JR
1174
1175 if (unlikely(reply)) {
2c622e5a 1176 error = ovs_flow_cmd_fill_info(flow,
6740b721
JR
1177 ovs_header->dp_ifindex,
1178 reply, info->snd_portid,
1179 info->snd_seq, 0,
bc619e29
JS
1180 OVS_FLOW_CMD_NEW,
1181 ufid_flags);
6740b721
JR
1182 BUG_ON(error < 0);
1183 }
1184 } else {
1185 /* Could not alloc without acts before locking. */
7d16c847 1186 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
bc619e29
JS
1187 info, OVS_FLOW_CMD_NEW, false,
1188 ufid_flags);
1189
6740b721
JR
1190 if (unlikely(IS_ERR(reply))) {
1191 error = PTR_ERR(reply);
1192 goto err_unlock_ovs;
1193 }
9c52546b 1194 }
0c9fd022 1195
0c9fd022
JR
1196 /* Clear stats. */
1197 if (a[OVS_FLOW_ATTR_CLEAR])
1198 ovs_flow_stats_clear(flow);
cd2a59e9 1199 ovs_unlock();
37a1300c 1200
6740b721 1201 if (reply)
cb25142c 1202 ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
6740b721 1203 if (old_acts)
e23775f2 1204 ovs_nla_free_flow_actions_rcu(old_acts);
7d16c847 1205
d6569377 1206 return 0;
704a1e09 1207
cd2a59e9
PS
1208err_unlock_ovs:
1209 ovs_unlock();
6740b721
JR
1210 kfree_skb(reply);
1211err_kfree_acts:
e23775f2 1212 ovs_nla_free_flow_actions(acts);
37a1300c 1213error:
9c52546b 1214 return error;
704a1e09
BP
1215}
1216
df2c07f4 1217static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
704a1e09 1218{
37a1300c 1219 struct nlattr **a = info->attrs;
df2c07f4 1220 struct ovs_header *ovs_header = info->userhdr;
038e34ab 1221 struct net *net = sock_net(skb->sk);
37a1300c 1222 struct sw_flow_key key;
37a1300c 1223 struct sk_buff *reply;
704a1e09 1224 struct sw_flow *flow;
9c52546b 1225 struct datapath *dp;
a1c564be 1226 struct sw_flow_match match;
bc619e29
JS
1227 struct sw_flow_id ufid;
1228 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1229 int err = 0;
9233cef7 1230 bool log = !a[OVS_FLOW_ATTR_PROBE];
bc619e29 1231 bool ufid_present;
704a1e09 1232
bc619e29
JS
1233 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1234 if (a[OVS_FLOW_ATTR_KEY]) {
1235 ovs_match_init(&match, &key, NULL);
038e34ab 1236 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
bc619e29
JS
1237 log);
1238 } else if (!ufid_present) {
9233cef7
JR
1239 OVS_NLERR(log,
1240 "Flow get message rejected, Key attribute missing.");
bc619e29 1241 err = -EINVAL;
1b936472 1242 }
37a1300c
BP
1243 if (err)
1244 return err;
704a1e09 1245
cd2a59e9 1246 ovs_lock();
2a4999f3 1247 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9
PS
1248 if (!dp) {
1249 err = -ENODEV;
1250 goto unlock;
1251 }
704a1e09 1252
bc619e29
JS
1253 if (ufid_present)
1254 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1255 else
1256 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
3440e4bc 1257 if (!flow) {
cd2a59e9
PS
1258 err = -ENOENT;
1259 goto unlock;
1260 }
d6569377 1261
7d16c847 1262 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
bc619e29 1263 OVS_FLOW_CMD_NEW, true, ufid_flags);
cd2a59e9
PS
1264 if (IS_ERR(reply)) {
1265 err = PTR_ERR(reply);
1266 goto unlock;
1267 }
36956a7d 1268
cd2a59e9 1269 ovs_unlock();
37a1300c 1270 return genlmsg_reply(reply, info);
cd2a59e9
PS
1271unlock:
1272 ovs_unlock();
1273 return err;
d6569377 1274}
9c52546b 1275
df2c07f4 1276static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
d6569377 1277{
37a1300c 1278 struct nlattr **a = info->attrs;
df2c07f4 1279 struct ovs_header *ovs_header = info->userhdr;
038e34ab 1280 struct net *net = sock_net(skb->sk);
37a1300c 1281 struct sw_flow_key key;
37a1300c 1282 struct sk_buff *reply;
bc619e29 1283 struct sw_flow *flow = NULL;
d6569377 1284 struct datapath *dp;
a1c564be 1285 struct sw_flow_match match;
bc619e29
JS
1286 struct sw_flow_id ufid;
1287 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
d6569377 1288 int err;
9233cef7 1289 bool log = !a[OVS_FLOW_ATTR_PROBE];
bc619e29 1290 bool ufid_present;
36956a7d 1291
bc619e29
JS
1292 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1293 if (a[OVS_FLOW_ATTR_KEY]) {
cde7f3ba 1294 ovs_match_init(&match, &key, NULL);
038e34ab
JS
1295 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1296 NULL, log);
cde7f3ba
JR
1297 if (unlikely(err))
1298 return err;
1299 }
1300
cd2a59e9 1301 ovs_lock();
2a4999f3 1302 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cde7f3ba 1303 if (unlikely(!dp)) {
cd2a59e9
PS
1304 err = -ENODEV;
1305 goto unlock;
1306 }
7d16c847 1307
bc619e29 1308 if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
994dc286 1309 err = ovs_flow_tbl_flush(&dp->table);
cd2a59e9
PS
1310 goto unlock;
1311 }
7d16c847 1312
bc619e29
JS
1313 if (ufid_present)
1314 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1315 else
1316 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
3440e4bc 1317 if (unlikely(!flow)) {
cd2a59e9
PS
1318 err = -ENOENT;
1319 goto unlock;
1320 }
d6569377 1321
994dc286 1322 ovs_flow_tbl_remove(&dp->table, flow);
cde7f3ba 1323 ovs_unlock();
37a1300c 1324
46051cf8 1325 reply = ovs_flow_cmd_alloc_info(rcu_dereference_raw(flow->sf_acts),
bc619e29 1326 &flow->id, info, false, ufid_flags);
cde7f3ba
JR
1327
1328 if (likely(reply)) {
1329 if (likely(!IS_ERR(reply))) {
7d16c847
PS
1330 rcu_read_lock(); /*To keep RCU checker happy. */
1331 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
cde7f3ba
JR
1332 reply, info->snd_portid,
1333 info->snd_seq, 0,
bc619e29
JS
1334 OVS_FLOW_CMD_DEL,
1335 ufid_flags);
cde7f3ba
JR
1336 rcu_read_unlock();
1337 BUG_ON(err < 0);
cb25142c 1338 ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
cde7f3ba 1339 } else {
cb25142c
PS
1340 genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0,
1341 GROUP_ID(&ovs_dp_flow_multicast_group), PTR_ERR(reply));
1342
cde7f3ba 1343 }
afad3556 1344 }
37a1300c 1345
a1c564be 1346 ovs_flow_free(flow, true);
37a1300c 1347 return 0;
cd2a59e9
PS
1348unlock:
1349 ovs_unlock();
1350 return err;
37a1300c
BP
1351}
1352
df2c07f4 1353static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
37a1300c 1354{
bc619e29 1355 struct nlattr *a[__OVS_FLOW_ATTR_MAX];
df2c07f4 1356 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
994dc286 1357 struct table_instance *ti;
37a1300c 1358 struct datapath *dp;
bc619e29
JS
1359 u32 ufid_flags;
1360 int err;
1361
1362 err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
1363 OVS_FLOW_ATTR_MAX, flow_policy);
1364 if (err)
1365 return err;
1366 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
37a1300c 1367
f44ccce1 1368 rcu_read_lock();
01ac0970 1369 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9 1370 if (!dp) {
f44ccce1 1371 rcu_read_unlock();
37a1300c 1372 return -ENODEV;
cd2a59e9 1373 }
37a1300c 1374
994dc286 1375 ti = rcu_dereference(dp->table.ti);
37a1300c 1376 for (;;) {
37a1300c
BP
1377 struct sw_flow *flow;
1378 u32 bucket, obj;
1379
1380 bucket = cb->args[0];
1381 obj = cb->args[1];
994dc286 1382 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
3544358a 1383 if (!flow)
37a1300c
BP
1384 break;
1385
2c622e5a 1386 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
28aea917 1387 NETLINK_CB(cb->skb).portid,
37a1300c 1388 cb->nlh->nlmsg_seq, NLM_F_MULTI,
bc619e29 1389 OVS_FLOW_CMD_NEW, ufid_flags) < 0)
37a1300c
BP
1390 break;
1391
1392 cb->args[0] = bucket;
1393 cb->args[1] = obj;
1394 }
f44ccce1 1395 rcu_read_unlock();
37a1300c 1396 return skb->len;
704a1e09
BP
1397}
1398
cb25142c
PS
1399static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1400 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
9233cef7 1401 [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
cb25142c
PS
1402 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1403 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
9233cef7 1404 [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
bc619e29
JS
1405 [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1406 [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
cb25142c
PS
1407};
1408
18fd3a52 1409static struct genl_ops dp_flow_genl_ops[] = {
df2c07f4 1410 { .cmd = OVS_FLOW_CMD_NEW,
37a1300c
BP
1411 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1412 .policy = flow_policy,
0c9fd022 1413 .doit = ovs_flow_cmd_new
37a1300c 1414 },
df2c07f4 1415 { .cmd = OVS_FLOW_CMD_DEL,
37a1300c
BP
1416 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1417 .policy = flow_policy,
df2c07f4 1418 .doit = ovs_flow_cmd_del
37a1300c 1419 },
df2c07f4 1420 { .cmd = OVS_FLOW_CMD_GET,
37a1300c
BP
1421 .flags = 0, /* OK for unprivileged users. */
1422 .policy = flow_policy,
df2c07f4
JP
1423 .doit = ovs_flow_cmd_get,
1424 .dumpit = ovs_flow_cmd_dump
37a1300c 1425 },
df2c07f4 1426 { .cmd = OVS_FLOW_CMD_SET,
37a1300c
BP
1427 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1428 .policy = flow_policy,
0c9fd022 1429 .doit = ovs_flow_cmd_set,
37a1300c
BP
1430 },
1431};
1432
cb25142c 1433static struct genl_family dp_flow_genl_family = {
aaff4b55 1434 .id = GENL_ID_GENERATE,
df2c07f4 1435 .hdrsize = sizeof(struct ovs_header),
cb25142c
PS
1436 .name = OVS_FLOW_FAMILY,
1437 .version = OVS_FLOW_VERSION,
1438 .maxattr = OVS_FLOW_ATTR_MAX,
b3dcb73c 1439 .netnsok = true,
cb25142c
PS
1440 .parallel_ops = true,
1441 .ops = dp_flow_genl_ops,
1442 .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1443 .mcgrps = &ovs_dp_flow_multicast_group,
1444 .n_mcgrps = 1,
aaff4b55
BP
1445};
1446
0afa2373
TG
1447static size_t ovs_dp_cmd_msg_size(void)
1448{
1449 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1450
1451 msgsize += nla_total_size(IFNAMSIZ);
1452 msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
4fa72a95 1453 msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
300af20a 1454 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
0afa2373
TG
1455
1456 return msgsize;
1457}
1458
d637497c 1459/* Called with ovs_mutex. */
df2c07f4 1460static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
28aea917 1461 u32 portid, u32 seq, u32 flags, u8 cmd)
064af421 1462{
df2c07f4 1463 struct ovs_header *ovs_header;
e926dfe3 1464 struct ovs_dp_stats dp_stats;
4fa72a95 1465 struct ovs_dp_megaflow_stats dp_megaflow_stats;
064af421
BP
1466 int err;
1467
28aea917 1468 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
aaff4b55 1469 flags, cmd);
df2c07f4 1470 if (!ovs_header)
aaff4b55 1471 goto error;
064af421 1472
b063d9f0 1473 ovs_header->dp_ifindex = get_dpifindex(dp);
064af421 1474
850b6b3b 1475 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
064af421 1476 if (err)
d6569377 1477 goto nla_put_failure;
064af421 1478
4fa72a95
AZ
1479 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1480 if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1481 &dp_stats))
1482 goto nla_put_failure;
1483
1484 if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1485 sizeof(struct ovs_dp_megaflow_stats),
1486 &dp_megaflow_stats))
c3cc8c03 1487 goto nla_put_failure;
d6569377 1488
c58cc9a4
TG
1489 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1490 goto nla_put_failure;
1491
23b48dc1
TG
1492 genlmsg_end(skb, ovs_header);
1493 return 0;
d6569377
BP
1494
1495nla_put_failure:
df2c07f4 1496 genlmsg_cancel(skb, ovs_header);
aaff4b55
BP
1497error:
1498 return -EMSGSIZE;
d6569377
BP
1499}
1500
d81eef1b 1501static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
d6569377 1502{
d81eef1b 1503 return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
aaff4b55 1504}
9dca7bd5 1505
aa917006 1506/* Called with rcu_read_lock or ovs_mutex. */
2a4999f3 1507static struct datapath *lookup_datapath(struct net *net,
f1f60b85 1508 const struct ovs_header *ovs_header,
6455100f 1509 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
d6569377 1510{
254f2dc8
BP
1511 struct datapath *dp;
1512
df2c07f4 1513 if (!a[OVS_DP_ATTR_NAME])
2a4999f3 1514 dp = get_dp(net, ovs_header->dp_ifindex);
254f2dc8 1515 else {
d6569377 1516 struct vport *vport;
d6569377 1517
2a4999f3 1518 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
df2c07f4 1519 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
d6569377 1520 }
254f2dc8 1521 return dp ? dp : ERR_PTR(-ENODEV);
d6569377
BP
1522}
1523
94358dcf
TG
1524static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1525{
1526 struct datapath *dp;
1527
1528 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
09350a3d 1529 if (IS_ERR(dp))
94358dcf
TG
1530 return;
1531
1532 WARN(dp->user_features, "Dropping previously announced user features\n");
1533 dp->user_features = 0;
1534}
1535
f1f60b85 1536static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
c58cc9a4
TG
1537{
1538 if (a[OVS_DP_ATTR_USER_FEATURES])
1539 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1540}
1541
df2c07f4 1542static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
d6569377 1543{
aaff4b55 1544 struct nlattr **a = info->attrs;
d6569377 1545 struct vport_parms parms;
aaff4b55 1546 struct sk_buff *reply;
d6569377
BP
1547 struct datapath *dp;
1548 struct vport *vport;
2a4999f3 1549 struct ovs_net *ovs_net;
95b1d73a 1550 int err, i;
d6569377 1551
d6569377 1552 err = -EINVAL;
ea36840f 1553 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
aaff4b55
BP
1554 goto err;
1555
d81eef1b
JR
1556 reply = ovs_dp_cmd_alloc_info(info);
1557 if (!reply)
1558 return -ENOMEM;
d6569377 1559
d6569377
BP
1560 err = -ENOMEM;
1561 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1562 if (dp == NULL)
d81eef1b 1563 goto err_free_reply;
2a4999f3 1564
c0cddcec 1565 ovs_dp_set_net(dp, sock_net(skb->sk));
0ceaa66c 1566
d6569377 1567 /* Allocate table. */
994dc286
PS
1568 err = ovs_flow_tbl_init(&dp->table);
1569 if (err)
d6569377
BP
1570 goto err_free_dp;
1571
08fb1bbd 1572 dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
99769a40
JG
1573 if (!dp->stats_percpu) {
1574 err = -ENOMEM;
1575 goto err_destroy_table;
1576 }
1577
95b1d73a
PS
1578 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1579 GFP_KERNEL);
1580 if (!dp->ports) {
1581 err = -ENOMEM;
1582 goto err_destroy_percpu;
1583 }
1584
1585 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1586 INIT_HLIST_HEAD(&dp->ports[i]);
1587
d6569377 1588 /* Set up our datapath device. */
df2c07f4
JP
1589 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1590 parms.type = OVS_VPORT_TYPE_INTERNAL;
d6569377
BP
1591 parms.options = NULL;
1592 parms.dp = dp;
df2c07f4 1593 parms.port_no = OVSP_LOCAL;
beb1c69a 1594 parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
b063d9f0 1595
c58cc9a4
TG
1596 ovs_dp_change(dp, a);
1597
d81eef1b
JR
1598 /* So far only local changes have been made, now need the lock. */
1599 ovs_lock();
1600
d6569377
BP
1601 vport = new_vport(&parms);
1602 if (IS_ERR(vport)) {
1603 err = PTR_ERR(vport);
1604 if (err == -EBUSY)
1605 err = -EEXIST;
1606
94358dcf
TG
1607 if (err == -EEXIST) {
1608 /* An outdated user space instance that does not understand
1609 * the concept of user_features has attempted to create a new
1610 * datapath and is likely to reuse it. Drop all user features.
1611 */
1612 if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1613 ovs_dp_reset_user_features(skb, info);
1614 }
1615
95b1d73a 1616 goto err_destroy_ports_array;
d6569377 1617 }
d6569377 1618
d81eef1b
JR
1619 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1620 info->snd_seq, 0, OVS_DP_CMD_NEW);
1621 BUG_ON(err < 0);
aaff4b55 1622
2a4999f3 1623 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
fb93e9aa 1624 list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
a0fb56c1 1625
cd2a59e9 1626 ovs_unlock();
d6569377 1627
cb25142c 1628 ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
d6569377
BP
1629 return 0;
1630
95b1d73a 1631err_destroy_ports_array:
d81eef1b 1632 ovs_unlock();
95b1d73a 1633 kfree(dp->ports);
99769a40
JG
1634err_destroy_percpu:
1635 free_percpu(dp->stats_percpu);
d6569377 1636err_destroy_table:
e379e4d1 1637 ovs_flow_tbl_destroy(&dp->table);
d6569377 1638err_free_dp:
d6569377 1639 kfree(dp);
d81eef1b
JR
1640err_free_reply:
1641 kfree_skb(reply);
d6569377 1642err:
064af421
BP
1643 return err;
1644}
1645
cd2a59e9 1646/* Called with ovs_mutex. */
2a4999f3 1647static void __dp_destroy(struct datapath *dp)
44e05eca 1648{
95b1d73a 1649 int i;
44e05eca 1650
95b1d73a
PS
1651 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1652 struct vport *vport;
f8dfbcb7 1653 struct hlist_node *n;
95b1d73a 1654
f8dfbcb7 1655 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
95b1d73a
PS
1656 if (vport->port_no != OVSP_LOCAL)
1657 ovs_dp_detach_port(vport);
1658 }
ed099e92 1659
fb93e9aa 1660 list_del_rcu(&dp->list_node);
ed099e92 1661
cd2a59e9 1662 /* OVSP_LOCAL is datapath internal port. We need to make sure that
d103f479
AZ
1663 * all ports in datapath are destroyed first before freeing datapath.
1664 */
cd2a59e9 1665 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
99620d2c 1666
d103f479 1667 /* RCU destroy the flow table */
ed099e92 1668 call_rcu(&dp->rcu, destroy_dp_rcu);
2a4999f3
PS
1669}
1670
1671static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1672{
1673 struct sk_buff *reply;
1674 struct datapath *dp;
1675 int err;
1676
d81eef1b
JR
1677 reply = ovs_dp_cmd_alloc_info(info);
1678 if (!reply)
1679 return -ENOMEM;
1680
cd2a59e9 1681 ovs_lock();
2a4999f3
PS
1682 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1683 err = PTR_ERR(dp);
1684 if (IS_ERR(dp))
d81eef1b 1685 goto err_unlock_free;
2a4999f3 1686
d81eef1b
JR
1687 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1688 info->snd_seq, 0, OVS_DP_CMD_DEL);
1689 BUG_ON(err < 0);
2a4999f3
PS
1690
1691 __dp_destroy(dp);
d81eef1b 1692 ovs_unlock();
7d16c847 1693
cb25142c 1694 ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
99620d2c 1695 return 0;
d81eef1b
JR
1696
1697err_unlock_free:
cd2a59e9 1698 ovs_unlock();
d81eef1b 1699 kfree_skb(reply);
cd2a59e9 1700 return err;
44e05eca
BP
1701}
1702
df2c07f4 1703static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
064af421 1704{
aaff4b55 1705 struct sk_buff *reply;
d6569377 1706 struct datapath *dp;
d6569377 1707 int err;
064af421 1708
d81eef1b
JR
1709 reply = ovs_dp_cmd_alloc_info(info);
1710 if (!reply)
1711 return -ENOMEM;
1712
cd2a59e9 1713 ovs_lock();
2a4999f3 1714 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
cd2a59e9 1715 err = PTR_ERR(dp);
d6569377 1716 if (IS_ERR(dp))
d81eef1b 1717 goto err_unlock_free;
38c6ecbc 1718
c58cc9a4
TG
1719 ovs_dp_change(dp, info->attrs);
1720
d81eef1b
JR
1721 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1722 info->snd_seq, 0, OVS_DP_CMD_NEW);
1723 BUG_ON(err < 0);
a0fb56c1 1724
cd2a59e9 1725 ovs_unlock();
7d16c847 1726
cb25142c 1727 ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
aaff4b55 1728 return 0;
d81eef1b
JR
1729
1730err_unlock_free:
cd2a59e9 1731 ovs_unlock();
d81eef1b 1732 kfree_skb(reply);
cd2a59e9 1733 return err;
064af421
BP
1734}
1735
df2c07f4 1736static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1dcf111b 1737{
aaff4b55 1738 struct sk_buff *reply;
d6569377 1739 struct datapath *dp;
d6569377 1740 int err;
1dcf111b 1741
d81eef1b
JR
1742 reply = ovs_dp_cmd_alloc_info(info);
1743 if (!reply)
1744 return -ENOMEM;
1745
d637497c 1746 ovs_lock();
2a4999f3 1747 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
cd2a59e9
PS
1748 if (IS_ERR(dp)) {
1749 err = PTR_ERR(dp);
d81eef1b 1750 goto err_unlock_free;
cd2a59e9 1751 }
d81eef1b
JR
1752 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1753 info->snd_seq, 0, OVS_DP_CMD_NEW);
1754 BUG_ON(err < 0);
d637497c 1755 ovs_unlock();
aaff4b55
BP
1756
1757 return genlmsg_reply(reply, info);
cd2a59e9 1758
d81eef1b 1759err_unlock_free:
d637497c 1760 ovs_unlock();
d81eef1b 1761 kfree_skb(reply);
cd2a59e9 1762 return err;
1dcf111b
JP
1763}
1764
df2c07f4 1765static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
a7786963 1766{
2a4999f3 1767 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
254f2dc8
BP
1768 struct datapath *dp;
1769 int skip = cb->args[0];
1770 int i = 0;
a7786963 1771
d637497c
PS
1772 ovs_lock();
1773 list_for_each_entry(dp, &ovs_net->dps, list_node) {
a2bab2f0 1774 if (i >= skip &&
28aea917 1775 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
aaff4b55 1776 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1777 OVS_DP_CMD_NEW) < 0)
aaff4b55 1778 break;
254f2dc8 1779 i++;
a7786963 1780 }
d637497c 1781 ovs_unlock();
aaff4b55 1782
254f2dc8
BP
1783 cb->args[0] = i;
1784
aaff4b55 1785 return skb->len;
c19e6535
BP
1786}
1787
cb25142c
PS
1788static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1789 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1790 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1791 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1792};
1793
18fd3a52 1794static struct genl_ops dp_datapath_genl_ops[] = {
df2c07f4 1795 { .cmd = OVS_DP_CMD_NEW,
aaff4b55
BP
1796 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1797 .policy = datapath_policy,
df2c07f4 1798 .doit = ovs_dp_cmd_new
aaff4b55 1799 },
df2c07f4 1800 { .cmd = OVS_DP_CMD_DEL,
aaff4b55
BP
1801 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1802 .policy = datapath_policy,
df2c07f4 1803 .doit = ovs_dp_cmd_del
aaff4b55 1804 },
df2c07f4 1805 { .cmd = OVS_DP_CMD_GET,
aaff4b55
BP
1806 .flags = 0, /* OK for unprivileged users. */
1807 .policy = datapath_policy,
df2c07f4
JP
1808 .doit = ovs_dp_cmd_get,
1809 .dumpit = ovs_dp_cmd_dump
aaff4b55 1810 },
df2c07f4 1811 { .cmd = OVS_DP_CMD_SET,
aaff4b55
BP
1812 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1813 .policy = datapath_policy,
df2c07f4 1814 .doit = ovs_dp_cmd_set,
aaff4b55
BP
1815 },
1816};
1817
cb25142c 1818static struct genl_family dp_datapath_genl_family = {
f0fef760 1819 .id = GENL_ID_GENERATE,
df2c07f4 1820 .hdrsize = sizeof(struct ovs_header),
cb25142c
PS
1821 .name = OVS_DATAPATH_FAMILY,
1822 .version = OVS_DATAPATH_VERSION,
1823 .maxattr = OVS_DP_ATTR_MAX,
b3dcb73c 1824 .netnsok = true,
cb25142c
PS
1825 .parallel_ops = true,
1826 .ops = dp_datapath_genl_ops,
1827 .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1828 .mcgrps = &ovs_dp_datapath_multicast_group,
1829 .n_mcgrps = 1,
f0fef760
BP
1830};
1831
cd2a59e9 1832/* Called with ovs_mutex or RCU read lock. */
df2c07f4 1833static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
28aea917 1834 u32 portid, u32 seq, u32 flags, u8 cmd)
064af421 1835{
df2c07f4 1836 struct ovs_header *ovs_header;
e926dfe3 1837 struct ovs_vport_stats vport_stats;
c19e6535
BP
1838 int err;
1839
28aea917 1840 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
f0fef760 1841 flags, cmd);
df2c07f4 1842 if (!ovs_header)
f0fef760 1843 return -EMSGSIZE;
c19e6535 1844
99769a40 1845 ovs_header->dp_ifindex = get_dpifindex(vport->dp);
c19e6535 1846
c3cc8c03
DM
1847 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1848 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
e23775f2
PS
1849 nla_put_string(skb, OVS_VPORT_ATTR_NAME,
1850 ovs_vport_name(vport)))
c3cc8c03 1851 goto nla_put_failure;
c19e6535 1852
850b6b3b 1853 ovs_vport_get_stats(vport, &vport_stats);
c3cc8c03
DM
1854 if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1855 &vport_stats))
1856 goto nla_put_failure;
c19e6535 1857
beb1c69a
AW
1858 if (ovs_vport_get_upcall_portids(vport, skb))
1859 goto nla_put_failure;
1860
850b6b3b 1861 err = ovs_vport_get_options(vport, skb);
f0fef760
BP
1862 if (err == -EMSGSIZE)
1863 goto error;
c19e6535 1864
23b48dc1
TG
1865 genlmsg_end(skb, ovs_header);
1866 return 0;
c19e6535
BP
1867
1868nla_put_failure:
1869 err = -EMSGSIZE;
f0fef760 1870error:
df2c07f4 1871 genlmsg_cancel(skb, ovs_header);
f0fef760 1872 return err;
064af421
BP
1873}
1874
d81eef1b
JR
1875static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1876{
1877 return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1878}
1879
1880/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
28aea917 1881struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
f14d8083 1882 u32 seq, u8 cmd)
064af421 1883{
c19e6535 1884 struct sk_buff *skb;
f0fef760 1885 int retval;
c19e6535 1886
f0fef760 1887 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
c19e6535
BP
1888 if (!skb)
1889 return ERR_PTR(-ENOMEM);
1890
28aea917 1891 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
c25ea534
JG
1892 BUG_ON(retval < 0);
1893
c19e6535 1894 return skb;
f0fef760 1895}
c19e6535 1896
cd2a59e9 1897/* Called with ovs_mutex or RCU read lock. */
2a4999f3 1898static struct vport *lookup_vport(struct net *net,
f1f60b85 1899 const struct ovs_header *ovs_header,
df2c07f4 1900 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
c19e6535
BP
1901{
1902 struct datapath *dp;
1903 struct vport *vport;
1904
df2c07f4 1905 if (a[OVS_VPORT_ATTR_NAME]) {
2a4999f3 1906 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
ed099e92 1907 if (!vport)
c19e6535 1908 return ERR_PTR(-ENODEV);
24ce832d
BP
1909 if (ovs_header->dp_ifindex &&
1910 ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1911 return ERR_PTR(-ENODEV);
c19e6535 1912 return vport;
df2c07f4
JP
1913 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1914 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
c19e6535
BP
1915
1916 if (port_no >= DP_MAX_PORTS)
f0fef760 1917 return ERR_PTR(-EFBIG);
c19e6535 1918
2a4999f3 1919 dp = get_dp(net, ovs_header->dp_ifindex);
c19e6535
BP
1920 if (!dp)
1921 return ERR_PTR(-ENODEV);
f2459fe7 1922
cd2a59e9 1923 vport = ovs_vport_ovsl_rcu(dp, port_no);
ed099e92 1924 if (!vport)
17535c57 1925 return ERR_PTR(-ENODEV);
c19e6535
BP
1926 return vport;
1927 } else
1928 return ERR_PTR(-EINVAL);
064af421
BP
1929}
1930
df2c07f4 1931static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
c19e6535 1932{
f0fef760 1933 struct nlattr **a = info->attrs;
df2c07f4 1934 struct ovs_header *ovs_header = info->userhdr;
c19e6535 1935 struct vport_parms parms;
ed099e92 1936 struct sk_buff *reply;
c19e6535 1937 struct vport *vport;
c19e6535 1938 struct datapath *dp;
b0ec0f27 1939 u32 port_no;
c19e6535 1940 int err;
b0ec0f27 1941
ea36840f
BP
1942 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1943 !a[OVS_VPORT_ATTR_UPCALL_PID])
d81eef1b
JR
1944 return -EINVAL;
1945
1946 port_no = a[OVS_VPORT_ATTR_PORT_NO]
1947 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
1948 if (port_no >= DP_MAX_PORTS)
1949 return -EFBIG;
1950
1951 reply = ovs_vport_cmd_alloc_info();
1952 if (!reply)
1953 return -ENOMEM;
f0fef760 1954
cd2a59e9 1955 ovs_lock();
5a38795f 1956restart:
2a4999f3 1957 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
c19e6535
BP
1958 err = -ENODEV;
1959 if (!dp)
d81eef1b 1960 goto exit_unlock_free;
c19e6535 1961
d81eef1b 1962 if (port_no) {
cd2a59e9 1963 vport = ovs_vport_ovsl(dp, port_no);
c19e6535
BP
1964 err = -EBUSY;
1965 if (vport)
d81eef1b 1966 goto exit_unlock_free;
c19e6535
BP
1967 } else {
1968 for (port_no = 1; ; port_no++) {
1969 if (port_no >= DP_MAX_PORTS) {
1970 err = -EFBIG;
d81eef1b 1971 goto exit_unlock_free;
c19e6535 1972 }
cd2a59e9 1973 vport = ovs_vport_ovsl(dp, port_no);
c19e6535
BP
1974 if (!vport)
1975 break;
51d4d598 1976 }
064af421 1977 }
b0ec0f27 1978
df2c07f4
JP
1979 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1980 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1981 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
c19e6535
BP
1982 parms.dp = dp;
1983 parms.port_no = port_no;
beb1c69a 1984 parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
c19e6535
BP
1985
1986 vport = new_vport(&parms);
1987 err = PTR_ERR(vport);
5a38795f
TG
1988 if (IS_ERR(vport)) {
1989 if (err == -EAGAIN)
1990 goto restart;
d81eef1b 1991 goto exit_unlock_free;
5a38795f 1992 }
c19e6535 1993
d81eef1b
JR
1994 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1995 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1996 BUG_ON(err < 0);
1997 ovs_unlock();
e297c6b7 1998
cb25142c 1999 ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
d81eef1b 2000 return 0;
c19e6535 2001
d81eef1b 2002exit_unlock_free:
cd2a59e9 2003 ovs_unlock();
d81eef1b 2004 kfree_skb(reply);
c19e6535 2005 return err;
44e05eca
BP
2006}
2007
df2c07f4 2008static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
44e05eca 2009{
f0fef760
BP
2010 struct nlattr **a = info->attrs;
2011 struct sk_buff *reply;
c19e6535 2012 struct vport *vport;
c19e6535 2013 int err;
44e05eca 2014
d81eef1b
JR
2015 reply = ovs_vport_cmd_alloc_info();
2016 if (!reply)
2017 return -ENOMEM;
2018
cd2a59e9 2019 ovs_lock();
2a4999f3 2020 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
c19e6535
BP
2021 err = PTR_ERR(vport);
2022 if (IS_ERR(vport))
d81eef1b 2023 goto exit_unlock_free;
44e05eca 2024
6455100f 2025 if (a[OVS_VPORT_ATTR_TYPE] &&
17ec1d04 2026 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
4879d4c7 2027 err = -EINVAL;
d81eef1b 2028 goto exit_unlock_free;
c25ea534
JG
2029 }
2030
17ec1d04 2031 if (a[OVS_VPORT_ATTR_OPTIONS]) {
850b6b3b 2032 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
17ec1d04 2033 if (err)
d81eef1b 2034 goto exit_unlock_free;
17ec1d04 2035 }
1fc7083d 2036
beb1c69a 2037 if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
7d16c847
PS
2038 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2039
2040 err = ovs_vport_set_upcall_portids(vport, ids);
beb1c69a
AW
2041 if (err)
2042 goto exit_unlock_free;
2043 }
c19e6535 2044
c25ea534
JG
2045 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2046 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2047 BUG_ON(err < 0);
cd2a59e9 2048 ovs_unlock();
d81eef1b 2049
cb25142c 2050 ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
c25ea534
JG
2051 return 0;
2052
d81eef1b 2053exit_unlock_free:
cd2a59e9 2054 ovs_unlock();
d81eef1b 2055 kfree_skb(reply);
c19e6535 2056 return err;
064af421
BP
2057}
2058
df2c07f4 2059static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
7c40efc9 2060{
f0fef760
BP
2061 struct nlattr **a = info->attrs;
2062 struct sk_buff *reply;
c19e6535 2063 struct vport *vport;
c19e6535
BP
2064 int err;
2065
d81eef1b
JR
2066 reply = ovs_vport_cmd_alloc_info();
2067 if (!reply)
2068 return -ENOMEM;
2069
cd2a59e9 2070 ovs_lock();
2a4999f3 2071 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
c19e6535 2072 err = PTR_ERR(vport);
f0fef760 2073 if (IS_ERR(vport))
d81eef1b 2074 goto exit_unlock_free;
c19e6535 2075
df2c07f4 2076 if (vport->port_no == OVSP_LOCAL) {
f0fef760 2077 err = -EINVAL;
d81eef1b 2078 goto exit_unlock_free;
f0fef760
BP
2079 }
2080
d81eef1b
JR
2081 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2082 info->snd_seq, 0, OVS_VPORT_CMD_DEL);
2083 BUG_ON(err < 0);
850b6b3b 2084 ovs_dp_detach_port(vport);
d81eef1b 2085 ovs_unlock();
f0fef760 2086
cb25142c 2087 ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
d81eef1b 2088 return 0;
f0fef760 2089
d81eef1b 2090exit_unlock_free:
cd2a59e9 2091 ovs_unlock();
d81eef1b 2092 kfree_skb(reply);
c19e6535 2093 return err;
7c40efc9
BP
2094}
2095
df2c07f4 2096static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
7c40efc9 2097{
f0fef760 2098 struct nlattr **a = info->attrs;
df2c07f4 2099 struct ovs_header *ovs_header = info->userhdr;
ed099e92 2100 struct sk_buff *reply;
c19e6535 2101 struct vport *vport;
c19e6535
BP
2102 int err;
2103
d81eef1b
JR
2104 reply = ovs_vport_cmd_alloc_info();
2105 if (!reply)
2106 return -ENOMEM;
2107
ed099e92 2108 rcu_read_lock();
2a4999f3 2109 vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
c19e6535
BP
2110 err = PTR_ERR(vport);
2111 if (IS_ERR(vport))
d81eef1b
JR
2112 goto exit_unlock_free;
2113 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2114 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2115 BUG_ON(err < 0);
df2fa9b5
JG
2116 rcu_read_unlock();
2117
2118 return genlmsg_reply(reply, info);
ed099e92 2119
d81eef1b 2120exit_unlock_free:
ed099e92 2121 rcu_read_unlock();
d81eef1b 2122 kfree_skb(reply);
c19e6535
BP
2123 return err;
2124}
2125
df2c07f4 2126static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
c19e6535 2127{
df2c07f4 2128 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
c19e6535 2129 struct datapath *dp;
95b1d73a
PS
2130 int bucket = cb->args[0], skip = cb->args[1];
2131 int i, j = 0;
c19e6535 2132
03fc2881 2133 rcu_read_lock();
01ac0970 2134 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
03fc2881
JR
2135 if (!dp) {
2136 rcu_read_unlock();
f0fef760 2137 return -ENODEV;
03fc2881 2138 }
95b1d73a 2139 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
ed099e92 2140 struct vport *vport;
95b1d73a
PS
2141
2142 j = 0;
f8dfbcb7 2143 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
95b1d73a
PS
2144 if (j >= skip &&
2145 ovs_vport_cmd_fill_info(vport, skb,
28aea917 2146 NETLINK_CB(cb->skb).portid,
95b1d73a
PS
2147 cb->nlh->nlmsg_seq,
2148 NLM_F_MULTI,
2149 OVS_VPORT_CMD_NEW) < 0)
2150 goto out;
2151
2152 j++;
2153 }
2154 skip = 0;
c19e6535 2155 }
95b1d73a 2156out:
ed099e92 2157 rcu_read_unlock();
c19e6535 2158
95b1d73a
PS
2159 cb->args[0] = i;
2160 cb->args[1] = j;
f0fef760 2161
95b1d73a 2162 return skb->len;
7c40efc9
BP
2163}
2164
cb25142c
PS
2165static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2166 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2167 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2168 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2169 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2170 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2171 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2172};
2173
18fd3a52 2174static struct genl_ops dp_vport_genl_ops[] = {
df2c07f4 2175 { .cmd = OVS_VPORT_CMD_NEW,
f0fef760
BP
2176 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2177 .policy = vport_policy,
df2c07f4 2178 .doit = ovs_vport_cmd_new
f0fef760 2179 },
df2c07f4 2180 { .cmd = OVS_VPORT_CMD_DEL,
f0fef760
BP
2181 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2182 .policy = vport_policy,
df2c07f4 2183 .doit = ovs_vport_cmd_del
f0fef760 2184 },
df2c07f4 2185 { .cmd = OVS_VPORT_CMD_GET,
f0fef760
BP
2186 .flags = 0, /* OK for unprivileged users. */
2187 .policy = vport_policy,
df2c07f4
JP
2188 .doit = ovs_vport_cmd_get,
2189 .dumpit = ovs_vport_cmd_dump
f0fef760 2190 },
df2c07f4 2191 { .cmd = OVS_VPORT_CMD_SET,
f0fef760
BP
2192 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2193 .policy = vport_policy,
df2c07f4 2194 .doit = ovs_vport_cmd_set,
f0fef760
BP
2195 },
2196};
2197
cb25142c
PS
2198struct genl_family dp_vport_genl_family = {
2199 .id = GENL_ID_GENERATE,
2200 .hdrsize = sizeof(struct ovs_header),
2201 .name = OVS_VPORT_FAMILY,
2202 .version = OVS_VPORT_VERSION,
2203 .maxattr = OVS_VPORT_ATTR_MAX,
2204 .netnsok = true,
2205 .parallel_ops = true,
2206 .ops = dp_vport_genl_ops,
2207 .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
2208 .mcgrps = &ovs_dp_vport_multicast_group,
2209 .n_mcgrps = 1,
982b8810 2210};
ed099e92 2211
18fd3a52 2212static struct genl_family *dp_genl_families[] = {
cb25142c
PS
2213 &dp_datapath_genl_family,
2214 &dp_vport_genl_family,
2215 &dp_flow_genl_family,
2216 &dp_packet_genl_family,
982b8810 2217};
ed099e92 2218
982b8810
BP
2219static void dp_unregister_genl(int n_families)
2220{
2221 int i;
ed099e92 2222
b867ca75 2223 for (i = 0; i < n_families; i++)
cb25142c 2224 genl_unregister_family(dp_genl_families[i]);
ed099e92
BP
2225}
2226
982b8810 2227static int dp_register_genl(void)
064af421 2228{
982b8810
BP
2229 int err;
2230 int i;
064af421 2231
982b8810 2232 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
064af421 2233
cb25142c 2234 err = genl_register_family(dp_genl_families[i]);
982b8810
BP
2235 if (err)
2236 goto error;
982b8810 2237 }
9cc8b4e4 2238
982b8810 2239 return 0;
064af421
BP
2240
2241error:
cb25142c 2242 dp_unregister_genl(i);
982b8810 2243 return err;
064af421
BP
2244}
2245
2a4999f3
PS
2246static int __net_init ovs_init_net(struct net *net)
2247{
2248 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2249
2250 INIT_LIST_HEAD(&ovs_net->dps);
cd2a59e9 2251 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
038e34ab 2252 ovs_ct_init(net);
2a4999f3
PS
2253 return 0;
2254}
2255
cabd5516
PS
2256static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2257 struct list_head *head)
2a4999f3
PS
2258{
2259 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
cabd5516
PS
2260 struct datapath *dp;
2261
2262 list_for_each_entry(dp, &ovs_net->dps, list_node) {
2263 int i;
2264
2265 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2266 struct vport *vport;
2267
2268 hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
cabd5516
PS
2269
2270 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2271 continue;
2272
e23775f2 2273 if (dev_net(vport->dev) == dnet)
cabd5516
PS
2274 list_add(&vport->detach_list, head);
2275 }
2276 }
2277 }
2278}
2279
2280static void __net_exit ovs_exit_net(struct net *dnet)
2281{
2282 struct datapath *dp, *dp_next;
2283 struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2284 struct vport *vport, *vport_next;
2285 struct net *net;
2286 LIST_HEAD(head);
2a4999f3 2287
038e34ab 2288 ovs_ct_exit(dnet);
cd2a59e9
PS
2289 ovs_lock();
2290 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2291 __dp_destroy(dp);
cabd5516
PS
2292
2293 rtnl_lock();
2294 for_each_net(net)
2295 list_vports_from_net(net, dnet, &head);
2296 rtnl_unlock();
2297
2298 /* Detach all vports from given namespace. */
2299 list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2300 list_del(&vport->detach_list);
2301 ovs_dp_detach_port(vport);
2302 }
2303
cd2a59e9
PS
2304 ovs_unlock();
2305
2306 cancel_work_sync(&ovs_net->dp_notify_work);
2a4999f3
PS
2307}
2308
2309static struct pernet_operations ovs_net_ops = {
2310 .init = ovs_init_net,
2311 .exit = ovs_exit_net,
2312 .id = &ovs_net_id,
2313 .size = sizeof(struct ovs_net),
2314};
2315
22d24ebf
BP
2316static int __init dp_init(void)
2317{
2318 int err;
2319
f3d85db3 2320 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
22d24ebf 2321
26bfaeaa 2322 pr_info("Open vSwitch switching datapath %s\n", VERSION);
064af421 2323
595e069a 2324 err = compat_init();
3544358a 2325 if (err)
533e96e7 2326 goto error;
3544358a 2327
595e069a
JS
2328 err = action_fifos_init();
2329 if (err)
2330 goto error_compat_exit;
2331
5282e284 2332 err = ovs_internal_dev_rtnl_link_register();
2c8c4fb7
AZ
2333 if (err)
2334 goto error_action_fifos_exit;
2335
5282e284
TG
2336 err = ovs_flow_init();
2337 if (err)
2338 goto error_unreg_rtnl_link;
2339
850b6b3b 2340 err = ovs_vport_init();
064af421
BP
2341 if (err)
2342 goto error_flow_exit;
2343
2a4999f3 2344 err = register_pernet_device(&ovs_net_ops);
f2459fe7
JG
2345 if (err)
2346 goto error_vport_exit;
2347
2a4999f3
PS
2348 err = register_netdevice_notifier(&ovs_dp_device_notifier);
2349 if (err)
2350 goto error_netns_exit;
2351
5a38795f
TG
2352 err = ovs_netdev_init();
2353 if (err)
2354 goto error_unreg_notifier;
2355
982b8810
BP
2356 err = dp_register_genl();
2357 if (err < 0)
5a38795f 2358 goto error_unreg_netdev;
982b8810 2359
064af421
BP
2360 return 0;
2361
5a38795f
TG
2362error_unreg_netdev:
2363 ovs_netdev_exit();
064af421 2364error_unreg_notifier:
850b6b3b 2365 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2a4999f3
PS
2366error_netns_exit:
2367 unregister_pernet_device(&ovs_net_ops);
f2459fe7 2368error_vport_exit:
850b6b3b 2369 ovs_vport_exit();
064af421 2370error_flow_exit:
850b6b3b 2371 ovs_flow_exit();
5282e284
TG
2372error_unreg_rtnl_link:
2373 ovs_internal_dev_rtnl_link_unregister();
2c8c4fb7
AZ
2374error_action_fifos_exit:
2375 action_fifos_exit();
595e069a
JS
2376error_compat_exit:
2377 compat_exit();
064af421
BP
2378error:
2379 return err;
2380}
2381
2382static void dp_cleanup(void)
2383{
982b8810 2384 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
5a38795f 2385 ovs_netdev_exit();
850b6b3b 2386 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2a4999f3
PS
2387 unregister_pernet_device(&ovs_net_ops);
2388 rcu_barrier();
850b6b3b
JG
2389 ovs_vport_exit();
2390 ovs_flow_exit();
5282e284 2391 ovs_internal_dev_rtnl_link_unregister();
2c8c4fb7 2392 action_fifos_exit();
595e069a 2393 compat_exit();
064af421
BP
2394}
2395
2396module_init(dp_init);
2397module_exit(dp_cleanup);
2398
2399MODULE_DESCRIPTION("Open vSwitch switching datapath");
2400MODULE_LICENSE("GPL");
3d0666d2 2401MODULE_VERSION(VERSION);