]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/datapath.c
datapath: Reduce locking requirements.
[mirror_ovs.git] / datapath / datapath.c
CommitLineData
064af421 1/*
a1c564be 2 * Copyright (c) 2007-2013 Nicira, Inc.
a14bc59f 3 *
a9a29d22
JG
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
064af421
BP
17 */
18
dfffaef1
JP
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
064af421
BP
21#include <linux/init.h>
22#include <linux/module.h>
064af421 23#include <linux/if_arp.h>
064af421
BP
24#include <linux/if_vlan.h>
25#include <linux/in.h>
26#include <linux/ip.h>
982b8810 27#include <linux/jhash.h>
064af421
BP
28#include <linux/delay.h>
29#include <linux/time.h>
30#include <linux/etherdevice.h>
ed099e92 31#include <linux/genetlink.h>
064af421
BP
32#include <linux/kernel.h>
33#include <linux/kthread.h>
064af421
BP
34#include <linux/mutex.h>
35#include <linux/percpu.h>
36#include <linux/rcupdate.h>
37#include <linux/tcp.h>
38#include <linux/udp.h>
39#include <linux/version.h>
40#include <linux/ethtool.h>
064af421 41#include <linux/wait.h>
064af421 42#include <asm/div64.h>
656a0e37 43#include <linux/highmem.h>
064af421
BP
44#include <linux/netfilter_bridge.h>
45#include <linux/netfilter_ipv4.h>
46#include <linux/inetdevice.h>
47#include <linux/list.h>
077257b8 48#include <linux/openvswitch.h>
064af421 49#include <linux/rculist.h>
064af421 50#include <linux/dmi.h>
cd2a59e9
PS
51#include <linux/genetlink.h>
52#include <net/genetlink.h>
36956a7d 53#include <net/genetlink.h>
2a4999f3
PS
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
064af421 56
064af421 57#include "datapath.h"
064af421 58#include "flow.h"
d103f479 59#include "flow_table.h"
a097c0b2 60#include "flow_netlink.h"
303708cc 61#include "vlan.h"
f2459fe7 62#include "vport-internal_dev.h"
d5de5b0d 63#include "vport-netdev.h"
064af421 64
2a4999f3
PS
65int ovs_net_id __read_mostly;
66
afad3556
JR
67/* Check if need to build a reply message.
68 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
69static bool ovs_must_notify(struct genl_info *info,
70 const struct genl_multicast_group *grp)
71{
72 return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
73 netlink_has_listeners(genl_info_net(info)->genl_sock, grp->id);
74}
75
e297c6b7
TG
76static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
77 struct genl_multicast_group *grp)
78{
79 genl_notify(skb, genl_info_net(info), info->snd_portid,
80 grp->id, info->nlhdr, GFP_KERNEL);
81}
82
ed099e92
BP
83/**
84 * DOC: Locking:
064af421 85 *
cd2a59e9
PS
86 * All writes e.g. Writes to device state (add/remove datapath, port, set
87 * operations on vports, etc.), Writes to other state (flow table
88 * modifications, set miscellaneous datapath parameters, etc.) are protected
89 * by ovs_lock.
ed099e92
BP
90 *
91 * Reads are protected by RCU.
92 *
93 * There are a few special cases (mostly stats) that have their own
94 * synchronization but they nest under all of above and don't interact with
95 * each other.
cd2a59e9
PS
96 *
97 * The RTNL lock nests inside ovs_mutex.
064af421 98 */
ed099e92 99
cd2a59e9
PS
100static DEFINE_MUTEX(ovs_mutex);
101
102void ovs_lock(void)
103{
104 mutex_lock(&ovs_mutex);
105}
106
107void ovs_unlock(void)
108{
109 mutex_unlock(&ovs_mutex);
110}
111
112#ifdef CONFIG_LOCKDEP
113int lockdep_ovsl_is_held(void)
114{
115 if (debug_locks)
116 return lockdep_is_held(&ovs_mutex);
117 else
118 return 1;
119}
120#endif
121
c19e6535 122static struct vport *new_vport(const struct vport_parms *);
5ae440c3 123static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
7257b535 124 const struct dp_upcall_info *);
5ae440c3 125static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
7257b535 126 const struct dp_upcall_info *);
064af421 127
cd2a59e9 128/* Must be called with rcu_read_lock or ovs_mutex. */
2a4999f3 129static struct datapath *get_dp(struct net *net, int dp_ifindex)
064af421 130{
254f2dc8
BP
131 struct datapath *dp = NULL;
132 struct net_device *dev;
ed099e92 133
254f2dc8 134 rcu_read_lock();
2a4999f3 135 dev = dev_get_by_index_rcu(net, dp_ifindex);
254f2dc8 136 if (dev) {
850b6b3b 137 struct vport *vport = ovs_internal_dev_get_vport(dev);
254f2dc8
BP
138 if (vport)
139 dp = vport->dp;
140 }
141 rcu_read_unlock();
142
143 return dp;
064af421 144}
064af421 145
cd2a59e9 146/* Must be called with rcu_read_lock or ovs_mutex. */
850b6b3b 147const char *ovs_dp_name(const struct datapath *dp)
f2459fe7 148{
cd2a59e9 149 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
16b82e84 150 return vport->ops->get_name(vport);
f2459fe7
JG
151}
152
99769a40
JG
153static int get_dpifindex(struct datapath *dp)
154{
155 struct vport *local;
156 int ifindex;
157
158 rcu_read_lock();
159
95b1d73a 160 local = ovs_vport_rcu(dp, OVSP_LOCAL);
99769a40 161 if (local)
d5de5b0d 162 ifindex = netdev_vport_priv(local)->dev->ifindex;
99769a40
JG
163 else
164 ifindex = 0;
165
166 rcu_read_unlock();
167
168 return ifindex;
169}
170
46c6a11d
JG
171static void destroy_dp_rcu(struct rcu_head *rcu)
172{
173 struct datapath *dp = container_of(rcu, struct datapath, rcu);
46c6a11d 174
46c6a11d 175 free_percpu(dp->stats_percpu);
2a4999f3 176 release_net(ovs_dp_get_net(dp));
95b1d73a 177 kfree(dp->ports);
5ca1ba48 178 kfree(dp);
46c6a11d
JG
179}
180
95b1d73a
PS
181static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
182 u16 port_no)
183{
184 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
185}
186
aa917006 187/* Called with ovs_mutex or RCU read lock. */
95b1d73a
PS
188struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
189{
190 struct vport *vport;
95b1d73a
PS
191 struct hlist_head *head;
192
193 head = vport_hash_bucket(dp, port_no);
f8dfbcb7 194 hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
95b1d73a
PS
195 if (vport->port_no == port_no)
196 return vport;
197 }
198 return NULL;
199}
200
cd2a59e9 201/* Called with ovs_mutex. */
c19e6535 202static struct vport *new_vport(const struct vport_parms *parms)
064af421 203{
f2459fe7 204 struct vport *vport;
f2459fe7 205
850b6b3b 206 vport = ovs_vport_add(parms);
c19e6535
BP
207 if (!IS_ERR(vport)) {
208 struct datapath *dp = parms->dp;
95b1d73a 209 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
064af421 210
95b1d73a 211 hlist_add_head_rcu(&vport->dp_hash_node, head);
c19e6535 212 }
c19e6535 213 return vport;
064af421
BP
214}
215
850b6b3b 216void ovs_dp_detach_port(struct vport *p)
064af421 217{
cd2a59e9 218 ASSERT_OVSL();
064af421 219
064af421 220 /* First drop references to device. */
95b1d73a 221 hlist_del_rcu(&p->dp_hash_node);
f2459fe7 222
7237e4f4 223 /* Then destroy it. */
850b6b3b 224 ovs_vport_del(p);
064af421
BP
225}
226
8819fac7 227/* Must be called with rcu_read_lock. */
850b6b3b 228void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
064af421
BP
229{
230 struct datapath *dp = p->dp;
3544358a 231 struct sw_flow *flow;
064af421 232 struct dp_stats_percpu *stats;
52a23d92 233 struct sw_flow_key key;
e9141eec 234 u64 *stats_counter;
4fa72a95 235 u32 n_mask_hit;
4c1ad233 236 int error;
064af421 237
70dbc259 238 stats = this_cpu_ptr(dp->stats_percpu);
a063b0df 239
52a23d92 240 /* Extract flow from 'skb' into 'key'. */
a1c564be 241 error = ovs_flow_extract(skb, p->port_no, &key);
52a23d92
JG
242 if (unlikely(error)) {
243 kfree_skb(skb);
244 return;
55574bb0
BP
245 }
246
52a23d92 247 /* Look up flow. */
4f88b5e5 248 flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit);
52a23d92
JG
249 if (unlikely(!flow)) {
250 struct dp_upcall_info upcall;
251
252 upcall.cmd = OVS_PACKET_CMD_MISS;
253 upcall.key = &key;
254 upcall.userdata = NULL;
255 upcall.portid = p->upcall_portid;
256 ovs_dp_upcall(dp, skb, &upcall);
257 consume_skb(skb);
258 stats_counter = &stats->n_missed;
259 goto out;
260 }
261
262 OVS_CB(skb)->flow = flow;
d1d71a36 263 OVS_CB(skb)->pkt_key = &key;
52a23d92 264
b0b906cc 265 ovs_flow_stats_update(OVS_CB(skb)->flow, skb);
850b6b3b 266 ovs_execute_actions(dp, skb);
b0b906cc 267 stats_counter = &stats->n_hit;
55574bb0 268
8819fac7 269out:
55574bb0 270 /* Update datapath statistics. */
821cb9fa 271 u64_stats_update_begin(&stats->sync);
e9141eec 272 (*stats_counter)++;
4fa72a95 273 stats->n_mask_hit += n_mask_hit;
821cb9fa 274 u64_stats_update_end(&stats->sync);
064af421
BP
275}
276
aa5a8fdc
JG
277static struct genl_family dp_packet_genl_family = {
278 .id = GENL_ID_GENERATE,
df2c07f4
JP
279 .hdrsize = sizeof(struct ovs_header),
280 .name = OVS_PACKET_FAMILY,
69685a88 281 .version = OVS_PACKET_VERSION,
2a4999f3 282 .maxattr = OVS_PACKET_ATTR_MAX,
b3dcb73c 283 .netnsok = true,
14002a59 284 SET_PARALLEL_OPS
aa5a8fdc
JG
285};
286
850b6b3b
JG
287int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
288 const struct dp_upcall_info *upcall_info)
aa5a8fdc
JG
289{
290 struct dp_stats_percpu *stats;
291 int err;
292
28aea917 293 if (upcall_info->portid == 0) {
b063d9f0 294 err = -ENOTCONN;
b063d9f0
JG
295 goto err;
296 }
297
7257b535 298 if (!skb_is_gso(skb))
5ae440c3 299 err = queue_userspace_packet(dp, skb, upcall_info);
7257b535 300 else
5ae440c3 301 err = queue_gso_packets(dp, skb, upcall_info);
d76195db
JG
302 if (err)
303 goto err;
304
305 return 0;
aa5a8fdc 306
aa5a8fdc 307err:
70dbc259 308 stats = this_cpu_ptr(dp->stats_percpu);
aa5a8fdc 309
821cb9fa 310 u64_stats_update_begin(&stats->sync);
aa5a8fdc 311 stats->n_lost++;
821cb9fa 312 u64_stats_update_end(&stats->sync);
aa5a8fdc 313
aa5a8fdc 314 return err;
982b8810
BP
315}
316
5ae440c3 317static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
7257b535 318 const struct dp_upcall_info *upcall_info)
cb5087ca 319{
d4cba1f8 320 unsigned short gso_type = skb_shinfo(skb)->gso_type;
7257b535
BP
321 struct dp_upcall_info later_info;
322 struct sw_flow_key later_key;
323 struct sk_buff *segs, *nskb;
324 int err;
cb5087ca 325
1d04cd4e 326 segs = __skb_gso_segment(skb, NETIF_F_SG, false);
79089764
PS
327 if (IS_ERR(segs))
328 return PTR_ERR(segs);
99769a40 329
7257b535
BP
330 /* Queue all of the segments. */
331 skb = segs;
cb5087ca 332 do {
5ae440c3 333 err = queue_userspace_packet(dp, skb, upcall_info);
982b8810 334 if (err)
7257b535 335 break;
856081f6 336
d4cba1f8 337 if (skb == segs && gso_type & SKB_GSO_UDP) {
e1cf87ff
JG
338 /* The initial flow key extracted by ovs_flow_extract()
339 * in this case is for a first fragment, so we need to
7257b535
BP
340 * properly mark later fragments.
341 */
342 later_key = *upcall_info->key;
9e44d715 343 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
7257b535
BP
344
345 later_info = *upcall_info;
346 later_info.key = &later_key;
347 upcall_info = &later_info;
348 }
36ce148c 349 } while ((skb = skb->next));
cb5087ca 350
7257b535
BP
351 /* Free all of the segments. */
352 skb = segs;
353 do {
354 nskb = skb->next;
355 if (err)
356 kfree_skb(skb);
357 else
358 consume_skb(skb);
359 } while ((skb = nskb));
360 return err;
361}
362
0afa2373
TG
363static size_t key_attr_size(void)
364{
365 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
366 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
367 + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
368 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
369 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
370 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
371 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
372 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
373 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
374 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
375 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
376 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
377 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
378 + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */
379 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
380 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
381 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
382 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
383 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
384}
385
533bea51
TG
386static size_t upcall_msg_size(const struct nlattr *userdata,
387 unsigned int hdrlen)
0afa2373
TG
388{
389 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
533bea51 390 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
0afa2373
TG
391 + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
392
393 /* OVS_PACKET_ATTR_USERDATA */
394 if (userdata)
395 size += NLA_ALIGN(userdata->nla_len);
396
397 return size;
398}
399
5ae440c3 400static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
7257b535
BP
401 const struct dp_upcall_info *upcall_info)
402{
403 struct ovs_header *upcall;
6161d3fd 404 struct sk_buff *nskb = NULL;
7257b535
BP
405 struct sk_buff *user_skb; /* to be queued to userspace */
406 struct nlattr *nla;
68eadcf0
TG
407 struct genl_info info = {
408#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0)
5ae440c3 409 .dst_sk = ovs_dp_get_net(dp)->genl_sock,
68eadcf0
TG
410#endif
411 .snd_portid = upcall_info->portid,
412 };
978188b2 413 size_t len;
533bea51 414 unsigned int hlen;
5ae440c3
TG
415 int err, dp_ifindex;
416
417 dp_ifindex = get_dpifindex(dp);
418 if (!dp_ifindex)
419 return -ENODEV;
7257b535 420
6161d3fd
JG
421 if (vlan_tx_tag_present(skb)) {
422 nskb = skb_clone(skb, GFP_ATOMIC);
423 if (!nskb)
424 return -ENOMEM;
07ac71ea
PS
425
426 nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
427 if (!nskb)
428 return -ENOMEM;
429
430 vlan_set_tci(nskb, 0);
7257b535 431
6161d3fd
JG
432 skb = nskb;
433 }
434
435 if (nla_attr_size(skb->len) > USHRT_MAX) {
436 err = -EFBIG;
437 goto out;
438 }
7257b535 439
533bea51
TG
440 /* Complete checksum if needed */
441 if (skb->ip_summed == CHECKSUM_PARTIAL &&
442 (err = skb_checksum_help(skb)))
443 goto out;
444
445 /* Older versions of OVS user space enforce alignment of the last
446 * Netlink attribute to NLA_ALIGNTO which would require extensive
447 * padding logic. Only perform zerocopy if padding is not required.
448 */
449 if (dp->user_features & OVS_DP_F_UNALIGNED)
450 hlen = skb_zerocopy_headlen(skb);
451 else
452 hlen = skb->len;
453
454 len = upcall_msg_size(upcall_info->userdata, hlen);
68eadcf0 455 user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
6161d3fd
JG
456 if (!user_skb) {
457 err = -ENOMEM;
458 goto out;
459 }
7257b535
BP
460
461 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
462 0, upcall_info->cmd);
463 upcall->dp_ifindex = dp_ifindex;
464
465 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
a097c0b2 466 ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
7257b535
BP
467 nla_nest_end(user_skb, nla);
468
469 if (upcall_info->userdata)
e995e3df 470 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
462a988b 471 nla_len(upcall_info->userdata),
e995e3df 472 nla_data(upcall_info->userdata));
7257b535 473
533bea51
TG
474 /* Only reserve room for attribute header, packet data is added
475 * in skb_zerocopy() */
476 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
477 err = -ENOBUFS;
478 goto out;
479 }
480 nla->nla_len = nla_attr_size(skb->len);
bed53bd1 481
533bea51 482 skb_zerocopy(user_skb, skb, skb->len, hlen);
7257b535 483
ef507cec 484 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
978188b2
JG
485 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
486 size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len;
487
488 if (plen > 0)
489 memset(skb_put(user_skb, plen), 0, plen);
490 }
ef507cec 491
533bea51 492 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
6161d3fd 493
533bea51 494 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
6161d3fd
JG
495out:
496 kfree_skb(nskb);
497 return err;
cb5087ca
BP
498}
499
df2c07f4 500static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
064af421 501{
df2c07f4 502 struct ovs_header *ovs_header = info->userhdr;
982b8810 503 struct nlattr **a = info->attrs;
e0e57990 504 struct sw_flow_actions *acts;
982b8810 505 struct sk_buff *packet;
e0e57990 506 struct sw_flow *flow;
f7cd0081 507 struct datapath *dp;
d6569377 508 struct ethhdr *eth;
3f19d399 509 int len;
d6569377 510 int err;
064af421 511
f7cd0081 512 err = -EINVAL;
df2c07f4 513 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
7c3072cc 514 !a[OVS_PACKET_ATTR_ACTIONS])
e5cad958 515 goto err;
064af421 516
df2c07f4 517 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
3f19d399 518 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
f7cd0081
BP
519 err = -ENOMEM;
520 if (!packet)
e5cad958 521 goto err;
3f19d399
BP
522 skb_reserve(packet, NET_IP_ALIGN);
523
bf3d6fce 524 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
8d5ebd83 525
f7cd0081
BP
526 skb_reset_mac_header(packet);
527 eth = eth_hdr(packet);
064af421 528
d6569377
BP
529 /* Normally, setting the skb 'protocol' field would be handled by a
530 * call to eth_type_trans(), but it assumes there's a sending
531 * device, which we may not have. */
7cd46155 532 if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
f7cd0081 533 packet->protocol = eth->h_proto;
d6569377 534 else
f7cd0081 535 packet->protocol = htons(ETH_P_802_2);
d3c54451 536
e0e57990 537 /* Build an sw_flow for sending this packet. */
df65fec1 538 flow = ovs_flow_alloc();
e0e57990
BP
539 err = PTR_ERR(flow);
540 if (IS_ERR(flow))
e5cad958 541 goto err_kfree_skb;
064af421 542
a1c564be 543 err = ovs_flow_extract(packet, -1, &flow->key);
e0e57990 544 if (err)
9321954a 545 goto err_flow_free;
e0e57990 546
a097c0b2 547 err = ovs_nla_get_flow_metadata(flow, a[OVS_PACKET_ATTR_KEY]);
80e5eed9 548 if (err)
9321954a 549 goto err_flow_free;
a097c0b2 550 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
e0e57990
BP
551 err = PTR_ERR(acts);
552 if (IS_ERR(acts))
9321954a 553 goto err_flow_free;
9b405f1a 554
a097c0b2
PS
555 err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
556 &flow->key, 0, &acts);
e0e57990 557 rcu_assign_pointer(flow->sf_acts, acts);
9b405f1a
PS
558 if (err)
559 goto err_flow_free;
e0e57990
BP
560
561 OVS_CB(packet)->flow = flow;
d1d71a36 562 OVS_CB(packet)->pkt_key = &flow->key;
abff858b 563 packet->priority = flow->key.phy.priority;
3025a772 564 packet->mark = flow->key.phy.skb_mark;
e0e57990 565
d6569377 566 rcu_read_lock();
2a4999f3 567 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
f7cd0081 568 err = -ENODEV;
e5cad958
BP
569 if (!dp)
570 goto err_unlock;
cc4015df 571
e9141eec 572 local_bh_disable();
850b6b3b 573 err = ovs_execute_actions(dp, packet);
e9141eec 574 local_bh_enable();
d6569377 575 rcu_read_unlock();
e0e57990 576
a1c564be 577 ovs_flow_free(flow, false);
e5cad958 578 return err;
064af421 579
e5cad958
BP
580err_unlock:
581 rcu_read_unlock();
9321954a 582err_flow_free:
a1c564be 583 ovs_flow_free(flow, false);
e5cad958
BP
584err_kfree_skb:
585 kfree_skb(packet);
586err:
d6569377 587 return err;
064af421
BP
588}
589
df2c07f4 590static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
7c3072cc 591 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
df2c07f4
JP
592 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
593 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
982b8810
BP
594};
595
596static struct genl_ops dp_packet_genl_ops[] = {
df2c07f4 597 { .cmd = OVS_PACKET_CMD_EXECUTE,
982b8810
BP
598 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
599 .policy = packet_policy,
df2c07f4 600 .doit = ovs_packet_cmd_execute
982b8810
BP
601 }
602};
603
4fa72a95
AZ
604static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
605 struct ovs_dp_megaflow_stats *mega_stats)
064af421 606{
d6569377 607 int i;
f180c2e2 608
4fa72a95
AZ
609 memset(mega_stats, 0, sizeof(*mega_stats));
610
994dc286 611 stats->n_flows = ovs_flow_tbl_count(&dp->table);
4fa72a95 612 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
064af421 613
7257b535 614 stats->n_hit = stats->n_missed = stats->n_lost = 0;
4fa72a95 615
d6569377
BP
616 for_each_possible_cpu(i) {
617 const struct dp_stats_percpu *percpu_stats;
618 struct dp_stats_percpu local_stats;
821cb9fa 619 unsigned int start;
44e05eca 620
d6569377 621 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
064af421 622
d6569377 623 do {
821cb9fa 624 start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
d6569377 625 local_stats = *percpu_stats;
821cb9fa 626 } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
064af421 627
d6569377
BP
628 stats->n_hit += local_stats.n_hit;
629 stats->n_missed += local_stats.n_missed;
630 stats->n_lost += local_stats.n_lost;
4fa72a95 631 mega_stats->n_mask_hit += local_stats.n_mask_hit;
d6569377
BP
632 }
633}
064af421 634
df2c07f4
JP
635static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
636 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
637 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
638 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
d6569377 639};
36956a7d 640
37a1300c
BP
641static struct genl_family dp_flow_genl_family = {
642 .id = GENL_ID_GENERATE,
df2c07f4
JP
643 .hdrsize = sizeof(struct ovs_header),
644 .name = OVS_FLOW_FAMILY,
69685a88 645 .version = OVS_FLOW_VERSION,
2a4999f3 646 .maxattr = OVS_FLOW_ATTR_MAX,
b3dcb73c 647 .netnsok = true,
14002a59 648 SET_PARALLEL_OPS
37a1300c 649};
ed099e92 650
850b6b3b 651static struct genl_multicast_group ovs_dp_flow_multicast_group = {
df2c07f4 652 .name = OVS_FLOW_MCGROUP
37a1300c
BP
653};
654
0afa2373
TG
655static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
656{
657 return NLMSG_ALIGN(sizeof(struct ovs_header))
658 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
a1c564be 659 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
0afa2373
TG
660 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
661 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
662 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
663 + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
664}
665
aa917006 666/* Called with ovs_mutex or RCU read lock. */
f71db6b1 667static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
28aea917 668 struct sk_buff *skb, u32 portid,
6455100f 669 u32 seq, u32 flags, u8 cmd)
d6569377 670{
37a1300c 671 const int skb_orig_len = skb->len;
9b405f1a 672 struct nlattr *start;
b0f3a2fe
PS
673 struct ovs_flow_stats stats;
674 __be16 tcp_flags;
675 unsigned long used;
df2c07f4 676 struct ovs_header *ovs_header;
d6569377 677 struct nlattr *nla;
d6569377 678 int err;
064af421 679
28aea917 680 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
df2c07f4 681 if (!ovs_header)
37a1300c 682 return -EMSGSIZE;
d6569377 683
f71db6b1 684 ovs_header->dp_ifindex = dp_ifindex;
d6569377 685
a1c564be 686 /* Fill flow key. */
df2c07f4 687 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
d6569377
BP
688 if (!nla)
689 goto nla_put_failure;
a1c564be 690
a097c0b2 691 err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
d6569377 692 if (err)
37a1300c 693 goto error;
d6569377
BP
694 nla_nest_end(skb, nla);
695
a1c564be
AZ
696 nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
697 if (!nla)
698 goto nla_put_failure;
699
a097c0b2 700 err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
a1c564be
AZ
701 if (err)
702 goto error;
703
704 nla_nest_end(skb, nla);
705
b0f3a2fe 706 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
f71db6b1 707
b0f3a2fe
PS
708 if (used &&
709 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
c3cc8c03 710 goto nla_put_failure;
d6569377 711
b0f3a2fe
PS
712 if (stats.n_packets &&
713 nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
714 goto nla_put_failure;
b0b906cc 715
b0f3a2fe
PS
716 if ((u8)ntohs(tcp_flags) &&
717 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
c3cc8c03 718 goto nla_put_failure;
d6569377 719
df2c07f4 720 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
30053024
BP
721 * this is the first flow to be dumped into 'skb'. This is unusual for
722 * Netlink but individual action lists can be longer than
723 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
724 * The userspace caller can always fetch the actions separately if it
725 * really wants them. (Most userspace callers in fact don't care.)
726 *
727 * This can only fail for dump operations because the skb is always
728 * properly sized for single flows.
729 */
9b405f1a 730 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
f6f481ee 731 if (start) {
f44ccce1
PS
732 const struct sw_flow_actions *sf_acts;
733
780ec6ae 734 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
a097c0b2
PS
735 err = ovs_nla_put_actions(sf_acts->actions,
736 sf_acts->actions_len, skb);
f71db6b1 737
0a25b039
BP
738 if (!err)
739 nla_nest_end(skb, start);
740 else {
741 if (skb_orig_len)
742 goto error;
743
744 nla_nest_cancel(skb, start);
745 }
7aac03bd
JG
746 } else if (skb_orig_len)
747 goto nla_put_failure;
37a1300c 748
df2c07f4 749 return genlmsg_end(skb, ovs_header);
d6569377
BP
750
751nla_put_failure:
752 err = -EMSGSIZE;
37a1300c 753error:
df2c07f4 754 genlmsg_cancel(skb, ovs_header);
d6569377 755 return err;
44e05eca
BP
756}
757
f71db6b1
JR
758/* May not be called with RCU read lock. */
759static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
afad3556
JR
760 struct genl_info *info,
761 bool always)
44e05eca 762{
afad3556 763 struct sk_buff *skb;
d6569377 764
afad3556
JR
765 if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group))
766 return NULL;
767
f71db6b1 768 skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL);
afad3556
JR
769
770 if (!skb)
771 return ERR_PTR(-ENOMEM);
772
773 return skb;
37a1300c 774}
8d5ebd83 775
f71db6b1
JR
776/* Called with ovs_mutex. */
777static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
778 int dp_ifindex,
779 struct genl_info *info, u8 cmd,
780 bool always)
37a1300c
BP
781{
782 struct sk_buff *skb;
783 int retval;
d6569377 784
f71db6b1
JR
785 skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info,
786 always);
afad3556
JR
787 if (!skb || IS_ERR(skb))
788 return skb;
d6569377 789
f71db6b1
JR
790 retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
791 info->snd_portid, info->snd_seq, 0,
792 cmd);
37a1300c 793 BUG_ON(retval < 0);
d6569377 794 return skb;
064af421
BP
795}
796
df2c07f4 797static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
064af421 798{
37a1300c 799 struct nlattr **a = info->attrs;
df2c07f4 800 struct ovs_header *ovs_header = info->userhdr;
529db635 801 struct sw_flow_key key, masked_key;
a1c564be
AZ
802 struct sw_flow *flow = NULL;
803 struct sw_flow_mask mask;
37a1300c 804 struct sk_buff *reply;
9c52546b 805 struct datapath *dp;
9b405f1a 806 struct sw_flow_actions *acts = NULL;
a1c564be 807 struct sw_flow_match match;
bc4a05c6 808 int error;
064af421 809
37a1300c
BP
810 /* Extract key. */
811 error = -EINVAL;
df2c07f4 812 if (!a[OVS_FLOW_ATTR_KEY])
37a1300c 813 goto error;
a1c564be
AZ
814
815 ovs_match_init(&match, &key, &mask);
df65fec1 816 error = ovs_nla_get_match(&match,
a097c0b2 817 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
37a1300c
BP
818 if (error)
819 goto error;
064af421 820
37a1300c 821 /* Validate actions. */
df2c07f4 822 if (a[OVS_FLOW_ATTR_ACTIONS]) {
a097c0b2 823 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
9b405f1a
PS
824 error = PTR_ERR(acts);
825 if (IS_ERR(acts))
37a1300c 826 goto error;
9b405f1a 827
a097c0b2
PS
828 ovs_flow_mask_key(&masked_key, &key, &mask);
829 error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
830 &masked_key, 0, &acts);
529db635
JG
831 if (error) {
832 OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
9b405f1a 833 goto err_kfree;
529db635 834 }
df2c07f4 835 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
90b8c2f7 836 /* OVS_FLOW_CMD_NEW must have actions. */
37a1300c
BP
837 error = -EINVAL;
838 goto error;
839 }
840
cd2a59e9 841 ovs_lock();
2a4999f3 842 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
d6569377 843 error = -ENODEV;
9c52546b 844 if (!dp)
cd2a59e9 845 goto err_unlock_ovs;
704a1e09 846
a1c564be 847 /* Check if this is a duplicate flow */
4f88b5e5 848 flow = ovs_flow_tbl_lookup(&dp->table, &key);
3544358a 849 if (!flow) {
d6569377
BP
850 /* Bail out if we're not allowed to create a new flow. */
851 error = -ENOENT;
df2c07f4 852 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
cd2a59e9 853 goto err_unlock_ovs;
d6569377 854
d6569377 855 /* Allocate flow. */
df65fec1 856 flow = ovs_flow_alloc();
d6569377
BP
857 if (IS_ERR(flow)) {
858 error = PTR_ERR(flow);
cd2a59e9 859 goto err_unlock_ovs;
d6569377 860 }
d6569377 861
529db635
JG
862 flow->key = masked_key;
863 flow->unmasked_key = key;
d6569377
BP
864 rcu_assign_pointer(flow->sf_acts, acts);
865
d6569377 866 /* Put flow in bucket. */
0585f7a8
PS
867 error = ovs_flow_tbl_insert(&dp->table, flow, &mask);
868 if (error) {
869 acts = NULL;
870 goto err_flow_free;
871 }
37a1300c 872
f71db6b1
JR
873 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
874 info, OVS_FLOW_CMD_NEW, false);
d6569377
BP
875 } else {
876 /* We found a matching flow. */
d6569377
BP
877 /* Bail out if we're not allowed to modify an existing flow.
878 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
879 * because Generic Netlink treats the latter as a dump
880 * request. We also accept NLM_F_EXCL in case that bug ever
881 * gets fixed.
882 */
883 error = -EEXIST;
df2c07f4 884 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
37a1300c 885 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
cd2a59e9 886 goto err_unlock_ovs;
d6569377 887
b21e5b6a 888 /* The unmasked key has to be the same for flow updates. */
a6603481 889 if (!ovs_flow_cmp_unmasked_key(flow, &match))
b21e5b6a
AZ
890 goto err_unlock_ovs;
891
90b8c2f7
JR
892 /* Update actions, if present. */
893 if (acts) {
894 struct sw_flow_actions *old_acts;
d6569377 895
90b8c2f7
JR
896 old_acts = ovsl_dereference(flow->sf_acts);
897 rcu_assign_pointer(flow->sf_acts, acts);
898 ovs_nla_free_flow_actions(old_acts);
899 }
f71db6b1
JR
900
901 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
902 info, OVS_FLOW_CMD_NEW, false);
d6569377
BP
903
904 /* Clear stats. */
b0b906cc
PS
905 if (a[OVS_FLOW_ATTR_CLEAR])
906 ovs_flow_stats_clear(flow);
9c52546b 907 }
cd2a59e9 908 ovs_unlock();
37a1300c 909
afad3556
JR
910 if (reply) {
911 if (!IS_ERR(reply))
912 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
913 else
914 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
915 ovs_dp_flow_multicast_group.id,
916 PTR_ERR(reply));
917 }
d6569377 918 return 0;
704a1e09 919
a1c564be
AZ
920err_flow_free:
921 ovs_flow_free(flow, false);
cd2a59e9
PS
922err_unlock_ovs:
923 ovs_unlock();
9b405f1a 924err_kfree:
ba400435 925 kfree(acts);
37a1300c 926error:
9c52546b 927 return error;
704a1e09
BP
928}
929
df2c07f4 930static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
704a1e09 931{
37a1300c 932 struct nlattr **a = info->attrs;
df2c07f4 933 struct ovs_header *ovs_header = info->userhdr;
37a1300c 934 struct sw_flow_key key;
37a1300c 935 struct sk_buff *reply;
704a1e09 936 struct sw_flow *flow;
9c52546b 937 struct datapath *dp;
a1c564be 938 struct sw_flow_match match;
9c52546b 939 int err;
704a1e09 940
1b936472
AZ
941 if (!a[OVS_FLOW_ATTR_KEY]) {
942 OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
37a1300c 943 return -EINVAL;
1b936472 944 }
a1c564be
AZ
945
946 ovs_match_init(&match, &key, NULL);
df65fec1 947 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
37a1300c
BP
948 if (err)
949 return err;
704a1e09 950
cd2a59e9 951 ovs_lock();
2a4999f3 952 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9
PS
953 if (!dp) {
954 err = -ENODEV;
955 goto unlock;
956 }
704a1e09 957
4f88b5e5 958 flow = ovs_flow_tbl_lookup(&dp->table, &key);
a097c0b2 959 if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
cd2a59e9
PS
960 err = -ENOENT;
961 goto unlock;
962 }
d6569377 963
f71db6b1
JR
964 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
965 OVS_FLOW_CMD_NEW, true);
cd2a59e9
PS
966 if (IS_ERR(reply)) {
967 err = PTR_ERR(reply);
968 goto unlock;
969 }
36956a7d 970
cd2a59e9 971 ovs_unlock();
37a1300c 972 return genlmsg_reply(reply, info);
cd2a59e9
PS
973unlock:
974 ovs_unlock();
975 return err;
d6569377 976}
9c52546b 977
df2c07f4 978static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
d6569377 979{
37a1300c 980 struct nlattr **a = info->attrs;
df2c07f4 981 struct ovs_header *ovs_header = info->userhdr;
37a1300c 982 struct sw_flow_key key;
37a1300c 983 struct sk_buff *reply;
d6569377 984 struct sw_flow *flow;
d6569377 985 struct datapath *dp;
a1c564be 986 struct sw_flow_match match;
d6569377 987 int err;
36956a7d 988
cd2a59e9 989 ovs_lock();
2a4999f3 990 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9
PS
991 if (!dp) {
992 err = -ENODEV;
993 goto unlock;
994 }
2a4999f3 995
cd2a59e9 996 if (!a[OVS_FLOW_ATTR_KEY]) {
994dc286 997 err = ovs_flow_tbl_flush(&dp->table);
cd2a59e9
PS
998 goto unlock;
999 }
a1c564be
AZ
1000
1001 ovs_match_init(&match, &key, NULL);
df65fec1 1002 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
37a1300c 1003 if (err)
cd2a59e9 1004 goto unlock;
d6569377 1005
4f88b5e5 1006 flow = ovs_flow_tbl_lookup(&dp->table, &key);
a097c0b2 1007 if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
cd2a59e9
PS
1008 err = -ENOENT;
1009 goto unlock;
1010 }
d6569377 1011
f71db6b1
JR
1012 reply = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info,
1013 false);
afad3556
JR
1014 if (IS_ERR(reply)) {
1015 err = PTR_ERR(reply);
cd2a59e9
PS
1016 goto unlock;
1017 }
37a1300c 1018
994dc286 1019 ovs_flow_tbl_remove(&dp->table, flow);
37a1300c 1020
afad3556 1021 if (reply) {
f71db6b1
JR
1022 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1023 reply, info->snd_portid,
afad3556
JR
1024 info->snd_seq, 0,
1025 OVS_FLOW_CMD_DEL);
1026 BUG_ON(err < 0);
1027 }
37a1300c 1028
a1c564be 1029 ovs_flow_free(flow, true);
cd2a59e9 1030 ovs_unlock();
37a1300c 1031
afad3556
JR
1032 if (reply)
1033 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
37a1300c 1034 return 0;
cd2a59e9
PS
1035unlock:
1036 ovs_unlock();
1037 return err;
37a1300c
BP
1038}
1039
df2c07f4 1040static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
37a1300c 1041{
df2c07f4 1042 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
994dc286 1043 struct table_instance *ti;
37a1300c
BP
1044 struct datapath *dp;
1045
f44ccce1 1046 rcu_read_lock();
2a4999f3 1047 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9 1048 if (!dp) {
f44ccce1 1049 rcu_read_unlock();
37a1300c 1050 return -ENODEV;
cd2a59e9 1051 }
37a1300c 1052
994dc286 1053 ti = rcu_dereference(dp->table.ti);
37a1300c 1054 for (;;) {
37a1300c
BP
1055 struct sw_flow *flow;
1056 u32 bucket, obj;
1057
1058 bucket = cb->args[0];
1059 obj = cb->args[1];
994dc286 1060 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
3544358a 1061 if (!flow)
37a1300c
BP
1062 break;
1063
f71db6b1 1064 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
28aea917 1065 NETLINK_CB(cb->skb).portid,
37a1300c 1066 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1067 OVS_FLOW_CMD_NEW) < 0)
37a1300c
BP
1068 break;
1069
1070 cb->args[0] = bucket;
1071 cb->args[1] = obj;
1072 }
f44ccce1 1073 rcu_read_unlock();
37a1300c 1074 return skb->len;
704a1e09
BP
1075}
1076
37a1300c 1077static struct genl_ops dp_flow_genl_ops[] = {
df2c07f4 1078 { .cmd = OVS_FLOW_CMD_NEW,
37a1300c
BP
1079 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1080 .policy = flow_policy,
df2c07f4 1081 .doit = ovs_flow_cmd_new_or_set
37a1300c 1082 },
df2c07f4 1083 { .cmd = OVS_FLOW_CMD_DEL,
37a1300c
BP
1084 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1085 .policy = flow_policy,
df2c07f4 1086 .doit = ovs_flow_cmd_del
37a1300c 1087 },
df2c07f4 1088 { .cmd = OVS_FLOW_CMD_GET,
37a1300c
BP
1089 .flags = 0, /* OK for unprivileged users. */
1090 .policy = flow_policy,
df2c07f4
JP
1091 .doit = ovs_flow_cmd_get,
1092 .dumpit = ovs_flow_cmd_dump
37a1300c 1093 },
df2c07f4 1094 { .cmd = OVS_FLOW_CMD_SET,
37a1300c
BP
1095 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1096 .policy = flow_policy,
df2c07f4 1097 .doit = ovs_flow_cmd_new_or_set,
37a1300c
BP
1098 },
1099};
1100
df2c07f4 1101static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
df2c07f4 1102 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
b063d9f0 1103 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
c58cc9a4 1104 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
d6569377
BP
1105};
1106
aaff4b55
BP
1107static struct genl_family dp_datapath_genl_family = {
1108 .id = GENL_ID_GENERATE,
df2c07f4
JP
1109 .hdrsize = sizeof(struct ovs_header),
1110 .name = OVS_DATAPATH_FAMILY,
69685a88 1111 .version = OVS_DATAPATH_VERSION,
2a4999f3 1112 .maxattr = OVS_DP_ATTR_MAX,
b3dcb73c 1113 .netnsok = true,
14002a59 1114 SET_PARALLEL_OPS
aaff4b55
BP
1115};
1116
850b6b3b 1117static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
df2c07f4 1118 .name = OVS_DATAPATH_MCGROUP
aaff4b55
BP
1119};
1120
0afa2373
TG
1121static size_t ovs_dp_cmd_msg_size(void)
1122{
1123 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1124
1125 msgsize += nla_total_size(IFNAMSIZ);
1126 msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
4fa72a95 1127 msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
300af20a 1128 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
0afa2373
TG
1129
1130 return msgsize;
1131}
1132
aa917006 1133/* Called with ovs_mutex or RCU read lock. */
df2c07f4 1134static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
28aea917 1135 u32 portid, u32 seq, u32 flags, u8 cmd)
064af421 1136{
df2c07f4 1137 struct ovs_header *ovs_header;
e926dfe3 1138 struct ovs_dp_stats dp_stats;
4fa72a95 1139 struct ovs_dp_megaflow_stats dp_megaflow_stats;
064af421
BP
1140 int err;
1141
28aea917 1142 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
aaff4b55 1143 flags, cmd);
df2c07f4 1144 if (!ovs_header)
aaff4b55 1145 goto error;
064af421 1146
b063d9f0 1147 ovs_header->dp_ifindex = get_dpifindex(dp);
064af421 1148
850b6b3b 1149 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
064af421 1150 if (err)
d6569377 1151 goto nla_put_failure;
064af421 1152
4fa72a95
AZ
1153 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1154 if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1155 &dp_stats))
1156 goto nla_put_failure;
1157
1158 if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1159 sizeof(struct ovs_dp_megaflow_stats),
1160 &dp_megaflow_stats))
c3cc8c03 1161 goto nla_put_failure;
d6569377 1162
c58cc9a4
TG
1163 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1164 goto nla_put_failure;
1165
df2c07f4 1166 return genlmsg_end(skb, ovs_header);
d6569377
BP
1167
1168nla_put_failure:
df2c07f4 1169 genlmsg_cancel(skb, ovs_header);
aaff4b55
BP
1170error:
1171 return -EMSGSIZE;
d6569377
BP
1172}
1173
d81eef1b 1174static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
d6569377 1175{
d81eef1b 1176 return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
aaff4b55 1177}
9dca7bd5 1178
aa917006 1179/* Called with rcu_read_lock or ovs_mutex. */
2a4999f3
PS
1180static struct datapath *lookup_datapath(struct net *net,
1181 struct ovs_header *ovs_header,
6455100f 1182 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
d6569377 1183{
254f2dc8
BP
1184 struct datapath *dp;
1185
df2c07f4 1186 if (!a[OVS_DP_ATTR_NAME])
2a4999f3 1187 dp = get_dp(net, ovs_header->dp_ifindex);
254f2dc8 1188 else {
d6569377 1189 struct vport *vport;
d6569377 1190
2a4999f3 1191 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
df2c07f4 1192 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
d6569377 1193 }
254f2dc8 1194 return dp ? dp : ERR_PTR(-ENODEV);
d6569377
BP
1195}
1196
94358dcf
TG
1197static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1198{
1199 struct datapath *dp;
1200
1201 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
09350a3d 1202 if (IS_ERR(dp))
94358dcf
TG
1203 return;
1204
1205 WARN(dp->user_features, "Dropping previously announced user features\n");
1206 dp->user_features = 0;
1207}
1208
c58cc9a4
TG
1209static void ovs_dp_change(struct datapath *dp, struct nlattr **a)
1210{
1211 if (a[OVS_DP_ATTR_USER_FEATURES])
1212 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1213}
1214
df2c07f4 1215static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
d6569377 1216{
aaff4b55 1217 struct nlattr **a = info->attrs;
d6569377 1218 struct vport_parms parms;
aaff4b55 1219 struct sk_buff *reply;
d6569377
BP
1220 struct datapath *dp;
1221 struct vport *vport;
2a4999f3 1222 struct ovs_net *ovs_net;
95b1d73a 1223 int err, i;
d6569377 1224
d6569377 1225 err = -EINVAL;
ea36840f 1226 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
aaff4b55
BP
1227 goto err;
1228
d81eef1b
JR
1229 reply = ovs_dp_cmd_alloc_info(info);
1230 if (!reply)
1231 return -ENOMEM;
d6569377 1232
d6569377
BP
1233 err = -ENOMEM;
1234 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1235 if (dp == NULL)
d81eef1b 1236 goto err_free_reply;
2a4999f3 1237
0ceaa66c
JG
1238 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1239
d6569377 1240 /* Allocate table. */
994dc286
PS
1241 err = ovs_flow_tbl_init(&dp->table);
1242 if (err)
d6569377
BP
1243 goto err_free_dp;
1244
99769a40
JG
1245 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1246 if (!dp->stats_percpu) {
1247 err = -ENOMEM;
1248 goto err_destroy_table;
1249 }
1250
95b1d73a
PS
1251 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1252 GFP_KERNEL);
1253 if (!dp->ports) {
1254 err = -ENOMEM;
1255 goto err_destroy_percpu;
1256 }
1257
1258 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1259 INIT_HLIST_HEAD(&dp->ports[i]);
1260
d6569377 1261 /* Set up our datapath device. */
df2c07f4
JP
1262 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1263 parms.type = OVS_VPORT_TYPE_INTERNAL;
d6569377
BP
1264 parms.options = NULL;
1265 parms.dp = dp;
df2c07f4 1266 parms.port_no = OVSP_LOCAL;
28aea917 1267 parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
b063d9f0 1268
c58cc9a4
TG
1269 ovs_dp_change(dp, a);
1270
d81eef1b
JR
1271 /* So far only local changes have been made, now need the lock. */
1272 ovs_lock();
1273
d6569377
BP
1274 vport = new_vport(&parms);
1275 if (IS_ERR(vport)) {
1276 err = PTR_ERR(vport);
1277 if (err == -EBUSY)
1278 err = -EEXIST;
1279
94358dcf
TG
1280 if (err == -EEXIST) {
1281 /* An outdated user space instance that does not understand
1282 * the concept of user_features has attempted to create a new
1283 * datapath and is likely to reuse it. Drop all user features.
1284 */
1285 if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1286 ovs_dp_reset_user_features(skb, info);
1287 }
1288
95b1d73a 1289 goto err_destroy_ports_array;
d6569377 1290 }
d6569377 1291
d81eef1b
JR
1292 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1293 info->snd_seq, 0, OVS_DP_CMD_NEW);
1294 BUG_ON(err < 0);
aaff4b55 1295
2a4999f3 1296 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
fb93e9aa 1297 list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
d6569377 1298
cd2a59e9 1299 ovs_unlock();
d6569377 1300
e297c6b7 1301 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
d6569377
BP
1302 return 0;
1303
95b1d73a 1304err_destroy_ports_array:
d81eef1b 1305 ovs_unlock();
95b1d73a 1306 kfree(dp->ports);
99769a40
JG
1307err_destroy_percpu:
1308 free_percpu(dp->stats_percpu);
d6569377 1309err_destroy_table:
d103f479 1310 ovs_flow_tbl_destroy(&dp->table, false);
d6569377 1311err_free_dp:
0ceaa66c 1312 release_net(ovs_dp_get_net(dp));
d6569377 1313 kfree(dp);
d81eef1b
JR
1314err_free_reply:
1315 kfree_skb(reply);
d6569377 1316err:
064af421
BP
1317 return err;
1318}
1319
cd2a59e9 1320/* Called with ovs_mutex. */
2a4999f3 1321static void __dp_destroy(struct datapath *dp)
44e05eca 1322{
95b1d73a 1323 int i;
44e05eca 1324
95b1d73a
PS
1325 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1326 struct vport *vport;
f8dfbcb7 1327 struct hlist_node *n;
95b1d73a 1328
f8dfbcb7 1329 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
95b1d73a
PS
1330 if (vport->port_no != OVSP_LOCAL)
1331 ovs_dp_detach_port(vport);
1332 }
ed099e92 1333
fb93e9aa 1334 list_del_rcu(&dp->list_node);
ed099e92 1335
cd2a59e9 1336 /* OVSP_LOCAL is datapath internal port. We need to make sure that
d103f479
AZ
1337 * all ports in datapath are destroyed first before freeing datapath.
1338 */
cd2a59e9 1339 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
99620d2c 1340
d103f479
AZ
1341 /* RCU destroy the flow table */
1342 ovs_flow_tbl_destroy(&dp->table, true);
1343
ed099e92 1344 call_rcu(&dp->rcu, destroy_dp_rcu);
2a4999f3
PS
1345}
1346
1347static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1348{
1349 struct sk_buff *reply;
1350 struct datapath *dp;
1351 int err;
1352
d81eef1b
JR
1353 reply = ovs_dp_cmd_alloc_info(info);
1354 if (!reply)
1355 return -ENOMEM;
1356
cd2a59e9 1357 ovs_lock();
2a4999f3
PS
1358 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1359 err = PTR_ERR(dp);
1360 if (IS_ERR(dp))
d81eef1b 1361 goto err_unlock_free;
2a4999f3 1362
d81eef1b
JR
1363 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1364 info->snd_seq, 0, OVS_DP_CMD_DEL);
1365 BUG_ON(err < 0);
2a4999f3
PS
1366
1367 __dp_destroy(dp);
ed099e92 1368
d81eef1b 1369 ovs_unlock();
e297c6b7 1370 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
99620d2c 1371 return 0;
d81eef1b
JR
1372
1373err_unlock_free:
cd2a59e9 1374 ovs_unlock();
d81eef1b 1375 kfree_skb(reply);
cd2a59e9 1376 return err;
44e05eca
BP
1377}
1378
df2c07f4 1379static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
064af421 1380{
aaff4b55 1381 struct sk_buff *reply;
d6569377 1382 struct datapath *dp;
d6569377 1383 int err;
064af421 1384
d81eef1b
JR
1385 reply = ovs_dp_cmd_alloc_info(info);
1386 if (!reply)
1387 return -ENOMEM;
1388
cd2a59e9 1389 ovs_lock();
2a4999f3 1390 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
cd2a59e9 1391 err = PTR_ERR(dp);
d6569377 1392 if (IS_ERR(dp))
d81eef1b 1393 goto err_unlock_free;
38c6ecbc 1394
c58cc9a4
TG
1395 ovs_dp_change(dp, info->attrs);
1396
d81eef1b
JR
1397 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1398 info->snd_seq, 0, OVS_DP_CMD_NEW);
1399 BUG_ON(err < 0);
aaff4b55 1400
cd2a59e9 1401 ovs_unlock();
e297c6b7 1402 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
aaff4b55 1403 return 0;
d81eef1b
JR
1404
1405err_unlock_free:
cd2a59e9 1406 ovs_unlock();
d81eef1b 1407 kfree_skb(reply);
cd2a59e9 1408 return err;
064af421
BP
1409}
1410
df2c07f4 1411static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1dcf111b 1412{
aaff4b55 1413 struct sk_buff *reply;
d6569377 1414 struct datapath *dp;
d6569377 1415 int err;
1dcf111b 1416
d81eef1b
JR
1417 reply = ovs_dp_cmd_alloc_info(info);
1418 if (!reply)
1419 return -ENOMEM;
1420
1421 rcu_read_lock();
2a4999f3 1422 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
cd2a59e9
PS
1423 if (IS_ERR(dp)) {
1424 err = PTR_ERR(dp);
d81eef1b 1425 goto err_unlock_free;
cd2a59e9 1426 }
d81eef1b
JR
1427 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1428 info->snd_seq, 0, OVS_DP_CMD_NEW);
1429 BUG_ON(err < 0);
1430 rcu_read_unlock();
aaff4b55
BP
1431
1432 return genlmsg_reply(reply, info);
cd2a59e9 1433
d81eef1b
JR
1434err_unlock_free:
1435 rcu_read_unlock();
1436 kfree_skb(reply);
cd2a59e9 1437 return err;
1dcf111b
JP
1438}
1439
df2c07f4 1440static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
a7786963 1441{
2a4999f3 1442 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
254f2dc8
BP
1443 struct datapath *dp;
1444 int skip = cb->args[0];
1445 int i = 0;
a7786963 1446
fb93e9aa
PS
1447 rcu_read_lock();
1448 list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) {
a2bab2f0 1449 if (i >= skip &&
28aea917 1450 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
aaff4b55 1451 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1452 OVS_DP_CMD_NEW) < 0)
aaff4b55 1453 break;
254f2dc8 1454 i++;
a7786963 1455 }
fb93e9aa 1456 rcu_read_unlock();
aaff4b55 1457
254f2dc8
BP
1458 cb->args[0] = i;
1459
aaff4b55 1460 return skb->len;
c19e6535
BP
1461}
1462
aaff4b55 1463static struct genl_ops dp_datapath_genl_ops[] = {
df2c07f4 1464 { .cmd = OVS_DP_CMD_NEW,
aaff4b55
BP
1465 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1466 .policy = datapath_policy,
df2c07f4 1467 .doit = ovs_dp_cmd_new
aaff4b55 1468 },
df2c07f4 1469 { .cmd = OVS_DP_CMD_DEL,
aaff4b55
BP
1470 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1471 .policy = datapath_policy,
df2c07f4 1472 .doit = ovs_dp_cmd_del
aaff4b55 1473 },
df2c07f4 1474 { .cmd = OVS_DP_CMD_GET,
aaff4b55
BP
1475 .flags = 0, /* OK for unprivileged users. */
1476 .policy = datapath_policy,
df2c07f4
JP
1477 .doit = ovs_dp_cmd_get,
1478 .dumpit = ovs_dp_cmd_dump
aaff4b55 1479 },
df2c07f4 1480 { .cmd = OVS_DP_CMD_SET,
aaff4b55
BP
1481 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1482 .policy = datapath_policy,
df2c07f4 1483 .doit = ovs_dp_cmd_set,
aaff4b55
BP
1484 },
1485};
1486
df2c07f4 1487static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
df2c07f4 1488 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
f613a0d7 1489 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
d48c88ec
JG
1490 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1491 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
b063d9f0 1492 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
df2c07f4 1493 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
c19e6535
BP
1494};
1495
f0fef760
BP
1496static struct genl_family dp_vport_genl_family = {
1497 .id = GENL_ID_GENERATE,
df2c07f4
JP
1498 .hdrsize = sizeof(struct ovs_header),
1499 .name = OVS_VPORT_FAMILY,
69685a88 1500 .version = OVS_VPORT_VERSION,
2a4999f3 1501 .maxattr = OVS_VPORT_ATTR_MAX,
b3dcb73c 1502 .netnsok = true,
14002a59 1503 SET_PARALLEL_OPS
f0fef760
BP
1504};
1505
850b6b3b 1506struct genl_multicast_group ovs_dp_vport_multicast_group = {
df2c07f4 1507 .name = OVS_VPORT_MCGROUP
f0fef760
BP
1508};
1509
cd2a59e9 1510/* Called with ovs_mutex or RCU read lock. */
df2c07f4 1511static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
28aea917 1512 u32 portid, u32 seq, u32 flags, u8 cmd)
064af421 1513{
df2c07f4 1514 struct ovs_header *ovs_header;
e926dfe3 1515 struct ovs_vport_stats vport_stats;
c19e6535
BP
1516 int err;
1517
28aea917 1518 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
f0fef760 1519 flags, cmd);
df2c07f4 1520 if (!ovs_header)
f0fef760 1521 return -EMSGSIZE;
c19e6535 1522
99769a40 1523 ovs_header->dp_ifindex = get_dpifindex(vport->dp);
c19e6535 1524
c3cc8c03
DM
1525 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1526 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1527 nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
28aea917 1528 nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
c3cc8c03 1529 goto nla_put_failure;
c19e6535 1530
850b6b3b 1531 ovs_vport_get_stats(vport, &vport_stats);
c3cc8c03
DM
1532 if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1533 &vport_stats))
1534 goto nla_put_failure;
c19e6535 1535
850b6b3b 1536 err = ovs_vport_get_options(vport, skb);
f0fef760
BP
1537 if (err == -EMSGSIZE)
1538 goto error;
c19e6535 1539
df2c07f4 1540 return genlmsg_end(skb, ovs_header);
c19e6535
BP
1541
1542nla_put_failure:
1543 err = -EMSGSIZE;
f0fef760 1544error:
df2c07f4 1545 genlmsg_cancel(skb, ovs_header);
f0fef760 1546 return err;
064af421
BP
1547}
1548
d81eef1b
JR
1549static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1550{
1551 return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1552}
1553
1554/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
28aea917 1555struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
f14d8083 1556 u32 seq, u8 cmd)
064af421 1557{
c19e6535 1558 struct sk_buff *skb;
f0fef760 1559 int retval;
c19e6535 1560
f0fef760 1561 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
c19e6535
BP
1562 if (!skb)
1563 return ERR_PTR(-ENOMEM);
1564
28aea917 1565 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
c25ea534
JG
1566 BUG_ON(retval < 0);
1567
c19e6535 1568 return skb;
f0fef760 1569}
c19e6535 1570
cd2a59e9 1571/* Called with ovs_mutex or RCU read lock. */
2a4999f3
PS
1572static struct vport *lookup_vport(struct net *net,
1573 struct ovs_header *ovs_header,
df2c07f4 1574 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
c19e6535
BP
1575{
1576 struct datapath *dp;
1577 struct vport *vport;
1578
df2c07f4 1579 if (a[OVS_VPORT_ATTR_NAME]) {
2a4999f3 1580 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
ed099e92 1581 if (!vport)
c19e6535 1582 return ERR_PTR(-ENODEV);
24ce832d
BP
1583 if (ovs_header->dp_ifindex &&
1584 ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1585 return ERR_PTR(-ENODEV);
c19e6535 1586 return vport;
df2c07f4
JP
1587 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1588 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
c19e6535
BP
1589
1590 if (port_no >= DP_MAX_PORTS)
f0fef760 1591 return ERR_PTR(-EFBIG);
c19e6535 1592
2a4999f3 1593 dp = get_dp(net, ovs_header->dp_ifindex);
c19e6535
BP
1594 if (!dp)
1595 return ERR_PTR(-ENODEV);
f2459fe7 1596
cd2a59e9 1597 vport = ovs_vport_ovsl_rcu(dp, port_no);
ed099e92 1598 if (!vport)
17535c57 1599 return ERR_PTR(-ENODEV);
c19e6535
BP
1600 return vport;
1601 } else
1602 return ERR_PTR(-EINVAL);
064af421
BP
1603}
1604
df2c07f4 1605static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
c19e6535 1606{
f0fef760 1607 struct nlattr **a = info->attrs;
df2c07f4 1608 struct ovs_header *ovs_header = info->userhdr;
c19e6535 1609 struct vport_parms parms;
ed099e92 1610 struct sk_buff *reply;
c19e6535 1611 struct vport *vport;
c19e6535 1612 struct datapath *dp;
b0ec0f27 1613 u32 port_no;
c19e6535 1614 int err;
b0ec0f27 1615
ea36840f
BP
1616 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1617 !a[OVS_VPORT_ATTR_UPCALL_PID])
d81eef1b
JR
1618 return -EINVAL;
1619
1620 port_no = a[OVS_VPORT_ATTR_PORT_NO]
1621 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
1622 if (port_no >= DP_MAX_PORTS)
1623 return -EFBIG;
1624
1625 reply = ovs_vport_cmd_alloc_info();
1626 if (!reply)
1627 return -ENOMEM;
f0fef760 1628
cd2a59e9 1629 ovs_lock();
2a4999f3 1630 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
c19e6535
BP
1631 err = -ENODEV;
1632 if (!dp)
d81eef1b 1633 goto exit_unlock_free;
c19e6535 1634
d81eef1b 1635 if (port_no) {
cd2a59e9 1636 vport = ovs_vport_ovsl(dp, port_no);
c19e6535
BP
1637 err = -EBUSY;
1638 if (vport)
d81eef1b 1639 goto exit_unlock_free;
c19e6535
BP
1640 } else {
1641 for (port_no = 1; ; port_no++) {
1642 if (port_no >= DP_MAX_PORTS) {
1643 err = -EFBIG;
d81eef1b 1644 goto exit_unlock_free;
c19e6535 1645 }
cd2a59e9 1646 vport = ovs_vport_ovsl(dp, port_no);
c19e6535
BP
1647 if (!vport)
1648 break;
51d4d598 1649 }
064af421 1650 }
b0ec0f27 1651
df2c07f4
JP
1652 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1653 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1654 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
c19e6535
BP
1655 parms.dp = dp;
1656 parms.port_no = port_no;
28aea917 1657 parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
c19e6535
BP
1658
1659 vport = new_vport(&parms);
1660 err = PTR_ERR(vport);
1661 if (IS_ERR(vport))
d81eef1b 1662 goto exit_unlock_free;
c19e6535 1663
faef6d2d 1664 err = 0;
1fc7083d
JG
1665 if (a[OVS_VPORT_ATTR_STATS])
1666 ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
1667
d81eef1b
JR
1668 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1669 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1670 BUG_ON(err < 0);
1671 ovs_unlock();
e297c6b7
TG
1672
1673 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
d81eef1b 1674 return 0;
c19e6535 1675
d81eef1b 1676exit_unlock_free:
cd2a59e9 1677 ovs_unlock();
d81eef1b 1678 kfree_skb(reply);
c19e6535 1679 return err;
44e05eca
BP
1680}
1681
df2c07f4 1682static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
44e05eca 1683{
f0fef760
BP
1684 struct nlattr **a = info->attrs;
1685 struct sk_buff *reply;
c19e6535 1686 struct vport *vport;
c19e6535 1687 int err;
44e05eca 1688
d81eef1b
JR
1689 reply = ovs_vport_cmd_alloc_info();
1690 if (!reply)
1691 return -ENOMEM;
1692
cd2a59e9 1693 ovs_lock();
2a4999f3 1694 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
c19e6535
BP
1695 err = PTR_ERR(vport);
1696 if (IS_ERR(vport))
d81eef1b 1697 goto exit_unlock_free;
44e05eca 1698
6455100f 1699 if (a[OVS_VPORT_ATTR_TYPE] &&
17ec1d04 1700 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
4879d4c7 1701 err = -EINVAL;
d81eef1b 1702 goto exit_unlock_free;
c25ea534
JG
1703 }
1704
17ec1d04 1705 if (a[OVS_VPORT_ATTR_OPTIONS]) {
850b6b3b 1706 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
17ec1d04 1707 if (err)
d81eef1b 1708 goto exit_unlock_free;
17ec1d04 1709 }
1fc7083d
JG
1710
1711 if (a[OVS_VPORT_ATTR_STATS])
1712 ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
1713
1714 if (a[OVS_VPORT_ATTR_UPCALL_PID])
28aea917 1715 vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
c19e6535 1716
c25ea534
JG
1717 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1718 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1719 BUG_ON(err < 0);
cd2a59e9 1720 ovs_unlock();
d81eef1b 1721
8680ae4d 1722 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
c25ea534
JG
1723 return 0;
1724
d81eef1b 1725exit_unlock_free:
cd2a59e9 1726 ovs_unlock();
d81eef1b 1727 kfree_skb(reply);
c19e6535 1728 return err;
064af421
BP
1729}
1730
df2c07f4 1731static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1732{
f0fef760
BP
1733 struct nlattr **a = info->attrs;
1734 struct sk_buff *reply;
c19e6535 1735 struct vport *vport;
c19e6535
BP
1736 int err;
1737
d81eef1b
JR
1738 reply = ovs_vport_cmd_alloc_info();
1739 if (!reply)
1740 return -ENOMEM;
1741
cd2a59e9 1742 ovs_lock();
2a4999f3 1743 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
c19e6535 1744 err = PTR_ERR(vport);
f0fef760 1745 if (IS_ERR(vport))
d81eef1b 1746 goto exit_unlock_free;
c19e6535 1747
df2c07f4 1748 if (vport->port_no == OVSP_LOCAL) {
f0fef760 1749 err = -EINVAL;
d81eef1b 1750 goto exit_unlock_free;
f0fef760
BP
1751 }
1752
d81eef1b
JR
1753 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1754 info->snd_seq, 0, OVS_VPORT_CMD_DEL);
1755 BUG_ON(err < 0);
850b6b3b 1756 ovs_dp_detach_port(vport);
d81eef1b 1757 ovs_unlock();
f0fef760 1758
e297c6b7 1759 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
d81eef1b 1760 return 0;
f0fef760 1761
d81eef1b 1762exit_unlock_free:
cd2a59e9 1763 ovs_unlock();
d81eef1b 1764 kfree_skb(reply);
c19e6535 1765 return err;
7c40efc9
BP
1766}
1767
df2c07f4 1768static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1769{
f0fef760 1770 struct nlattr **a = info->attrs;
df2c07f4 1771 struct ovs_header *ovs_header = info->userhdr;
ed099e92 1772 struct sk_buff *reply;
c19e6535 1773 struct vport *vport;
c19e6535
BP
1774 int err;
1775
d81eef1b
JR
1776 reply = ovs_vport_cmd_alloc_info();
1777 if (!reply)
1778 return -ENOMEM;
1779
ed099e92 1780 rcu_read_lock();
2a4999f3 1781 vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
c19e6535
BP
1782 err = PTR_ERR(vport);
1783 if (IS_ERR(vport))
d81eef1b
JR
1784 goto exit_unlock_free;
1785 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1786 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1787 BUG_ON(err < 0);
df2fa9b5
JG
1788 rcu_read_unlock();
1789
1790 return genlmsg_reply(reply, info);
ed099e92 1791
d81eef1b 1792exit_unlock_free:
ed099e92 1793 rcu_read_unlock();
d81eef1b 1794 kfree_skb(reply);
c19e6535
BP
1795 return err;
1796}
1797
df2c07f4 1798static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
c19e6535 1799{
df2c07f4 1800 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
c19e6535 1801 struct datapath *dp;
95b1d73a
PS
1802 int bucket = cb->args[0], skip = cb->args[1];
1803 int i, j = 0;
c19e6535 1804
03fc2881 1805 rcu_read_lock();
2a4999f3 1806 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
03fc2881
JR
1807 if (!dp) {
1808 rcu_read_unlock();
f0fef760 1809 return -ENODEV;
03fc2881 1810 }
95b1d73a 1811 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
ed099e92 1812 struct vport *vport;
95b1d73a
PS
1813
1814 j = 0;
f8dfbcb7 1815 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
95b1d73a
PS
1816 if (j >= skip &&
1817 ovs_vport_cmd_fill_info(vport, skb,
28aea917 1818 NETLINK_CB(cb->skb).portid,
95b1d73a
PS
1819 cb->nlh->nlmsg_seq,
1820 NLM_F_MULTI,
1821 OVS_VPORT_CMD_NEW) < 0)
1822 goto out;
1823
1824 j++;
1825 }
1826 skip = 0;
c19e6535 1827 }
95b1d73a 1828out:
ed099e92 1829 rcu_read_unlock();
c19e6535 1830
95b1d73a
PS
1831 cb->args[0] = i;
1832 cb->args[1] = j;
f0fef760 1833
95b1d73a 1834 return skb->len;
7c40efc9
BP
1835}
1836
f0fef760 1837static struct genl_ops dp_vport_genl_ops[] = {
df2c07f4 1838 { .cmd = OVS_VPORT_CMD_NEW,
f0fef760
BP
1839 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1840 .policy = vport_policy,
df2c07f4 1841 .doit = ovs_vport_cmd_new
f0fef760 1842 },
df2c07f4 1843 { .cmd = OVS_VPORT_CMD_DEL,
f0fef760
BP
1844 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1845 .policy = vport_policy,
df2c07f4 1846 .doit = ovs_vport_cmd_del
f0fef760 1847 },
df2c07f4 1848 { .cmd = OVS_VPORT_CMD_GET,
f0fef760
BP
1849 .flags = 0, /* OK for unprivileged users. */
1850 .policy = vport_policy,
df2c07f4
JP
1851 .doit = ovs_vport_cmd_get,
1852 .dumpit = ovs_vport_cmd_dump
f0fef760 1853 },
df2c07f4 1854 { .cmd = OVS_VPORT_CMD_SET,
f0fef760
BP
1855 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1856 .policy = vport_policy,
df2c07f4 1857 .doit = ovs_vport_cmd_set,
f0fef760
BP
1858 },
1859};
1860
982b8810
BP
1861struct genl_family_and_ops {
1862 struct genl_family *family;
1863 struct genl_ops *ops;
1864 int n_ops;
1865 struct genl_multicast_group *group;
1866};
ed099e92 1867
982b8810 1868static const struct genl_family_and_ops dp_genl_families[] = {
aaff4b55
BP
1869 { &dp_datapath_genl_family,
1870 dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
850b6b3b 1871 &ovs_dp_datapath_multicast_group },
f0fef760
BP
1872 { &dp_vport_genl_family,
1873 dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
850b6b3b 1874 &ovs_dp_vport_multicast_group },
37a1300c
BP
1875 { &dp_flow_genl_family,
1876 dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
850b6b3b 1877 &ovs_dp_flow_multicast_group },
982b8810
BP
1878 { &dp_packet_genl_family,
1879 dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1880 NULL },
1881};
ed099e92 1882
982b8810
BP
1883static void dp_unregister_genl(int n_families)
1884{
1885 int i;
ed099e92 1886
b867ca75 1887 for (i = 0; i < n_families; i++)
982b8810 1888 genl_unregister_family(dp_genl_families[i].family);
ed099e92
BP
1889}
1890
982b8810 1891static int dp_register_genl(void)
064af421 1892{
982b8810
BP
1893 int n_registered;
1894 int err;
1895 int i;
064af421 1896
982b8810
BP
1897 n_registered = 0;
1898 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
1899 const struct genl_family_and_ops *f = &dp_genl_families[i];
064af421 1900
982b8810
BP
1901 err = genl_register_family_with_ops(f->family, f->ops,
1902 f->n_ops);
1903 if (err)
1904 goto error;
1905 n_registered++;
e22d4953 1906
982b8810
BP
1907 if (f->group) {
1908 err = genl_register_mc_group(f->family, f->group);
1909 if (err)
1910 goto error;
1911 }
1912 }
9cc8b4e4 1913
982b8810 1914 return 0;
064af421
BP
1915
1916error:
982b8810
BP
1917 dp_unregister_genl(n_registered);
1918 return err;
064af421
BP
1919}
1920
2a4999f3
PS
1921static int __net_init ovs_init_net(struct net *net)
1922{
1923 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1924
1925 INIT_LIST_HEAD(&ovs_net->dps);
cd2a59e9 1926 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2a4999f3
PS
1927 return 0;
1928}
1929
1930static void __net_exit ovs_exit_net(struct net *net)
1931{
cd2a59e9 1932 struct datapath *dp, *dp_next;
2a4999f3
PS
1933 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1934
cd2a59e9
PS
1935 ovs_lock();
1936 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
1937 __dp_destroy(dp);
1938 ovs_unlock();
1939
1940 cancel_work_sync(&ovs_net->dp_notify_work);
2a4999f3
PS
1941}
1942
1943static struct pernet_operations ovs_net_ops = {
1944 .init = ovs_init_net,
1945 .exit = ovs_exit_net,
1946 .id = &ovs_net_id,
1947 .size = sizeof(struct ovs_net),
1948};
1949
637c8268
PS
1950DEFINE_COMPAT_PNET_REG_FUNC(device);
1951
22d24ebf
BP
1952static int __init dp_init(void)
1953{
1954 int err;
1955
f3d85db3 1956 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
22d24ebf 1957
dc5f3fef 1958 pr_info("Open vSwitch switching datapath %s, built "__DATE__" "__TIME__"\n",
8a07709c 1959 VERSION);
064af421 1960
850b6b3b 1961 err = ovs_flow_init();
3544358a 1962 if (err)
533e96e7 1963 goto error;
3544358a 1964
850b6b3b 1965 err = ovs_vport_init();
064af421
BP
1966 if (err)
1967 goto error_flow_exit;
1968
2a4999f3 1969 err = register_pernet_device(&ovs_net_ops);
f2459fe7
JG
1970 if (err)
1971 goto error_vport_exit;
1972
2a4999f3
PS
1973 err = register_netdevice_notifier(&ovs_dp_device_notifier);
1974 if (err)
1975 goto error_netns_exit;
1976
982b8810
BP
1977 err = dp_register_genl();
1978 if (err < 0)
37a1300c 1979 goto error_unreg_notifier;
982b8810 1980
064af421
BP
1981 return 0;
1982
1983error_unreg_notifier:
850b6b3b 1984 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2a4999f3
PS
1985error_netns_exit:
1986 unregister_pernet_device(&ovs_net_ops);
f2459fe7 1987error_vport_exit:
850b6b3b 1988 ovs_vport_exit();
064af421 1989error_flow_exit:
850b6b3b 1990 ovs_flow_exit();
064af421
BP
1991error:
1992 return err;
1993}
1994
1995static void dp_cleanup(void)
1996{
982b8810 1997 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
850b6b3b 1998 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2a4999f3
PS
1999 unregister_pernet_device(&ovs_net_ops);
2000 rcu_barrier();
850b6b3b
JG
2001 ovs_vport_exit();
2002 ovs_flow_exit();
064af421
BP
2003}
2004
2005module_init(dp_init);
2006module_exit(dp_cleanup);
2007
2008MODULE_DESCRIPTION("Open vSwitch switching datapath");
2009MODULE_LICENSE("GPL");
3d0666d2 2010MODULE_VERSION(VERSION);