]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/datapath.c
datapath: Allow user space to announce ability to accept unaligned Netlink messages
[mirror_ovs.git] / datapath / datapath.c
CommitLineData
064af421 1/*
a1c564be 2 * Copyright (c) 2007-2013 Nicira, Inc.
a14bc59f 3 *
a9a29d22
JG
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
064af421
BP
17 */
18
dfffaef1
JP
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
064af421
BP
21#include <linux/init.h>
22#include <linux/module.h>
064af421 23#include <linux/if_arp.h>
064af421
BP
24#include <linux/if_vlan.h>
25#include <linux/in.h>
26#include <linux/ip.h>
982b8810 27#include <linux/jhash.h>
064af421
BP
28#include <linux/delay.h>
29#include <linux/time.h>
30#include <linux/etherdevice.h>
ed099e92 31#include <linux/genetlink.h>
064af421
BP
32#include <linux/kernel.h>
33#include <linux/kthread.h>
064af421
BP
34#include <linux/mutex.h>
35#include <linux/percpu.h>
36#include <linux/rcupdate.h>
37#include <linux/tcp.h>
38#include <linux/udp.h>
39#include <linux/version.h>
40#include <linux/ethtool.h>
064af421 41#include <linux/wait.h>
064af421 42#include <asm/div64.h>
656a0e37 43#include <linux/highmem.h>
064af421
BP
44#include <linux/netfilter_bridge.h>
45#include <linux/netfilter_ipv4.h>
46#include <linux/inetdevice.h>
47#include <linux/list.h>
077257b8 48#include <linux/openvswitch.h>
064af421 49#include <linux/rculist.h>
064af421 50#include <linux/dmi.h>
cd2a59e9
PS
51#include <linux/genetlink.h>
52#include <net/genetlink.h>
36956a7d 53#include <net/genetlink.h>
2a4999f3
PS
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
064af421 56
064af421 57#include "datapath.h"
064af421 58#include "flow.h"
a097c0b2 59#include "flow_netlink.h"
303708cc 60#include "vlan.h"
f2459fe7 61#include "vport-internal_dev.h"
d5de5b0d 62#include "vport-netdev.h"
064af421 63
2a4999f3
PS
64int ovs_net_id __read_mostly;
65
e297c6b7
TG
66static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
67 struct genl_multicast_group *grp)
68{
69 genl_notify(skb, genl_info_net(info), info->snd_portid,
70 grp->id, info->nlhdr, GFP_KERNEL);
71}
72
ed099e92
BP
73/**
74 * DOC: Locking:
064af421 75 *
cd2a59e9
PS
76 * All writes e.g. Writes to device state (add/remove datapath, port, set
77 * operations on vports, etc.), Writes to other state (flow table
78 * modifications, set miscellaneous datapath parameters, etc.) are protected
79 * by ovs_lock.
ed099e92
BP
80 *
81 * Reads are protected by RCU.
82 *
83 * There are a few special cases (mostly stats) that have their own
84 * synchronization but they nest under all of above and don't interact with
85 * each other.
cd2a59e9
PS
86 *
87 * The RTNL lock nests inside ovs_mutex.
064af421 88 */
ed099e92 89
cd2a59e9
PS
90static DEFINE_MUTEX(ovs_mutex);
91
92void ovs_lock(void)
93{
94 mutex_lock(&ovs_mutex);
95}
96
97void ovs_unlock(void)
98{
99 mutex_unlock(&ovs_mutex);
100}
101
102#ifdef CONFIG_LOCKDEP
103int lockdep_ovsl_is_held(void)
104{
105 if (debug_locks)
106 return lockdep_is_held(&ovs_mutex);
107 else
108 return 1;
109}
110#endif
111
c19e6535 112static struct vport *new_vport(const struct vport_parms *);
2a4999f3 113static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
7257b535 114 const struct dp_upcall_info *);
2a4999f3
PS
115static int queue_userspace_packet(struct net *, int dp_ifindex,
116 struct sk_buff *,
7257b535 117 const struct dp_upcall_info *);
064af421 118
cd2a59e9 119/* Must be called with rcu_read_lock or ovs_mutex. */
2a4999f3 120static struct datapath *get_dp(struct net *net, int dp_ifindex)
064af421 121{
254f2dc8
BP
122 struct datapath *dp = NULL;
123 struct net_device *dev;
ed099e92 124
254f2dc8 125 rcu_read_lock();
2a4999f3 126 dev = dev_get_by_index_rcu(net, dp_ifindex);
254f2dc8 127 if (dev) {
850b6b3b 128 struct vport *vport = ovs_internal_dev_get_vport(dev);
254f2dc8
BP
129 if (vport)
130 dp = vport->dp;
131 }
132 rcu_read_unlock();
133
134 return dp;
064af421 135}
064af421 136
cd2a59e9 137/* Must be called with rcu_read_lock or ovs_mutex. */
850b6b3b 138const char *ovs_dp_name(const struct datapath *dp)
f2459fe7 139{
cd2a59e9 140 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
16b82e84 141 return vport->ops->get_name(vport);
f2459fe7
JG
142}
143
99769a40
JG
144static int get_dpifindex(struct datapath *dp)
145{
146 struct vport *local;
147 int ifindex;
148
149 rcu_read_lock();
150
95b1d73a 151 local = ovs_vport_rcu(dp, OVSP_LOCAL);
99769a40 152 if (local)
d5de5b0d 153 ifindex = netdev_vport_priv(local)->dev->ifindex;
99769a40
JG
154 else
155 ifindex = 0;
156
157 rcu_read_unlock();
158
159 return ifindex;
160}
161
46c6a11d
JG
162static void destroy_dp_rcu(struct rcu_head *rcu)
163{
164 struct datapath *dp = container_of(rcu, struct datapath, rcu);
46c6a11d 165
0585f7a8 166 ovs_flow_tbl_destroy(&dp->table);
46c6a11d 167 free_percpu(dp->stats_percpu);
2a4999f3 168 release_net(ovs_dp_get_net(dp));
95b1d73a 169 kfree(dp->ports);
5ca1ba48 170 kfree(dp);
46c6a11d
JG
171}
172
95b1d73a
PS
173static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
174 u16 port_no)
175{
176 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
177}
178
179struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
180{
181 struct vport *vport;
95b1d73a
PS
182 struct hlist_head *head;
183
184 head = vport_hash_bucket(dp, port_no);
f8dfbcb7 185 hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
95b1d73a
PS
186 if (vport->port_no == port_no)
187 return vport;
188 }
189 return NULL;
190}
191
cd2a59e9 192/* Called with ovs_mutex. */
c19e6535 193static struct vport *new_vport(const struct vport_parms *parms)
064af421 194{
f2459fe7 195 struct vport *vport;
f2459fe7 196
850b6b3b 197 vport = ovs_vport_add(parms);
c19e6535
BP
198 if (!IS_ERR(vport)) {
199 struct datapath *dp = parms->dp;
95b1d73a 200 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
064af421 201
95b1d73a 202 hlist_add_head_rcu(&vport->dp_hash_node, head);
c19e6535 203 }
c19e6535 204 return vport;
064af421
BP
205}
206
850b6b3b 207void ovs_dp_detach_port(struct vport *p)
064af421 208{
cd2a59e9 209 ASSERT_OVSL();
064af421 210
064af421 211 /* First drop references to device. */
95b1d73a 212 hlist_del_rcu(&p->dp_hash_node);
f2459fe7 213
7237e4f4 214 /* Then destroy it. */
850b6b3b 215 ovs_vport_del(p);
064af421
BP
216}
217
8819fac7 218/* Must be called with rcu_read_lock. */
850b6b3b 219void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
064af421
BP
220{
221 struct datapath *dp = p->dp;
3544358a 222 struct sw_flow *flow;
064af421 223 struct dp_stats_percpu *stats;
52a23d92 224 struct sw_flow_key key;
e9141eec 225 u64 *stats_counter;
4fa72a95 226 u32 n_mask_hit;
4c1ad233 227 int error;
064af421 228
70dbc259 229 stats = this_cpu_ptr(dp->stats_percpu);
a063b0df 230
52a23d92 231 /* Extract flow from 'skb' into 'key'. */
a1c564be 232 error = ovs_flow_extract(skb, p->port_no, &key);
52a23d92
JG
233 if (unlikely(error)) {
234 kfree_skb(skb);
235 return;
55574bb0
BP
236 }
237
52a23d92 238 /* Look up flow. */
4f88b5e5 239 flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit);
52a23d92
JG
240 if (unlikely(!flow)) {
241 struct dp_upcall_info upcall;
242
243 upcall.cmd = OVS_PACKET_CMD_MISS;
244 upcall.key = &key;
245 upcall.userdata = NULL;
246 upcall.portid = p->upcall_portid;
247 ovs_dp_upcall(dp, skb, &upcall);
248 consume_skb(skb);
249 stats_counter = &stats->n_missed;
250 goto out;
251 }
252
253 OVS_CB(skb)->flow = flow;
d1d71a36 254 OVS_CB(skb)->pkt_key = &key;
52a23d92 255
b0b906cc 256 ovs_flow_stats_update(OVS_CB(skb)->flow, skb);
850b6b3b 257 ovs_execute_actions(dp, skb);
b0b906cc 258 stats_counter = &stats->n_hit;
55574bb0 259
8819fac7 260out:
55574bb0 261 /* Update datapath statistics. */
821cb9fa 262 u64_stats_update_begin(&stats->sync);
e9141eec 263 (*stats_counter)++;
4fa72a95 264 stats->n_mask_hit += n_mask_hit;
821cb9fa 265 u64_stats_update_end(&stats->sync);
064af421
BP
266}
267
aa5a8fdc
JG
268static struct genl_family dp_packet_genl_family = {
269 .id = GENL_ID_GENERATE,
df2c07f4
JP
270 .hdrsize = sizeof(struct ovs_header),
271 .name = OVS_PACKET_FAMILY,
69685a88 272 .version = OVS_PACKET_VERSION,
2a4999f3 273 .maxattr = OVS_PACKET_ATTR_MAX,
b3dcb73c 274 .netnsok = true,
14002a59 275 SET_PARALLEL_OPS
aa5a8fdc
JG
276};
277
850b6b3b
JG
278int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
279 const struct dp_upcall_info *upcall_info)
aa5a8fdc
JG
280{
281 struct dp_stats_percpu *stats;
7257b535 282 int dp_ifindex;
aa5a8fdc
JG
283 int err;
284
28aea917 285 if (upcall_info->portid == 0) {
b063d9f0 286 err = -ENOTCONN;
b063d9f0
JG
287 goto err;
288 }
289
7257b535
BP
290 dp_ifindex = get_dpifindex(dp);
291 if (!dp_ifindex) {
292 err = -ENODEV;
293 goto err;
aa5a8fdc
JG
294 }
295
7257b535 296 if (!skb_is_gso(skb))
2a4999f3 297 err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
7257b535 298 else
2a4999f3 299 err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
d76195db
JG
300 if (err)
301 goto err;
302
303 return 0;
aa5a8fdc 304
aa5a8fdc 305err:
70dbc259 306 stats = this_cpu_ptr(dp->stats_percpu);
aa5a8fdc 307
821cb9fa 308 u64_stats_update_begin(&stats->sync);
aa5a8fdc 309 stats->n_lost++;
821cb9fa 310 u64_stats_update_end(&stats->sync);
aa5a8fdc 311
aa5a8fdc 312 return err;
982b8810
BP
313}
314
2a4999f3
PS
315static int queue_gso_packets(struct net *net, int dp_ifindex,
316 struct sk_buff *skb,
7257b535 317 const struct dp_upcall_info *upcall_info)
cb5087ca 318{
d4cba1f8 319 unsigned short gso_type = skb_shinfo(skb)->gso_type;
7257b535
BP
320 struct dp_upcall_info later_info;
321 struct sw_flow_key later_key;
322 struct sk_buff *segs, *nskb;
323 int err;
cb5087ca 324
0aa52d88 325 segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false);
79089764
PS
326 if (IS_ERR(segs))
327 return PTR_ERR(segs);
99769a40 328
7257b535
BP
329 /* Queue all of the segments. */
330 skb = segs;
cb5087ca 331 do {
2a4999f3 332 err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
982b8810 333 if (err)
7257b535 334 break;
856081f6 335
d4cba1f8 336 if (skb == segs && gso_type & SKB_GSO_UDP) {
e1cf87ff
JG
337 /* The initial flow key extracted by ovs_flow_extract()
338 * in this case is for a first fragment, so we need to
7257b535
BP
339 * properly mark later fragments.
340 */
341 later_key = *upcall_info->key;
9e44d715 342 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
7257b535
BP
343
344 later_info = *upcall_info;
345 later_info.key = &later_key;
346 upcall_info = &later_info;
347 }
36ce148c 348 } while ((skb = skb->next));
cb5087ca 349
7257b535
BP
350 /* Free all of the segments. */
351 skb = segs;
352 do {
353 nskb = skb->next;
354 if (err)
355 kfree_skb(skb);
356 else
357 consume_skb(skb);
358 } while ((skb = nskb));
359 return err;
360}
361
0afa2373
TG
362static size_t key_attr_size(void)
363{
364 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
365 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
366 + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
367 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
368 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
369 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
370 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
371 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
372 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
373 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
374 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
375 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
376 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
377 + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */
378 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
379 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
380 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
381 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
382 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
383}
384
385static size_t upcall_msg_size(const struct sk_buff *skb,
386 const struct nlattr *userdata)
387{
388 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
389 + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */
390 + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
391
392 /* OVS_PACKET_ATTR_USERDATA */
393 if (userdata)
394 size += NLA_ALIGN(userdata->nla_len);
395
396 return size;
397}
398
2a4999f3
PS
399static int queue_userspace_packet(struct net *net, int dp_ifindex,
400 struct sk_buff *skb,
7257b535
BP
401 const struct dp_upcall_info *upcall_info)
402{
403 struct ovs_header *upcall;
6161d3fd 404 struct sk_buff *nskb = NULL;
7257b535
BP
405 struct sk_buff *user_skb; /* to be queued to userspace */
406 struct nlattr *nla;
68eadcf0
TG
407 struct genl_info info = {
408#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0)
409 .dst_sk = net->genl_sock,
410#endif
411 .snd_portid = upcall_info->portid,
412 };
413 size_t len;
7257b535
BP
414 int err;
415
6161d3fd
JG
416 if (vlan_tx_tag_present(skb)) {
417 nskb = skb_clone(skb, GFP_ATOMIC);
418 if (!nskb)
419 return -ENOMEM;
07ac71ea
PS
420
421 nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
422 if (!nskb)
423 return -ENOMEM;
424
425 vlan_set_tci(nskb, 0);
7257b535 426
6161d3fd
JG
427 skb = nskb;
428 }
429
430 if (nla_attr_size(skb->len) > USHRT_MAX) {
431 err = -EFBIG;
432 goto out;
433 }
7257b535 434
68eadcf0
TG
435 len = upcall_msg_size(skb, upcall_info->userdata);
436 user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
6161d3fd
JG
437 if (!user_skb) {
438 err = -ENOMEM;
439 goto out;
440 }
7257b535
BP
441
442 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
443 0, upcall_info->cmd);
444 upcall->dp_ifindex = dp_ifindex;
445
446 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
a097c0b2 447 ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
7257b535
BP
448 nla_nest_end(user_skb, nla);
449
450 if (upcall_info->userdata)
e995e3df 451 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
462a988b 452 nla_len(upcall_info->userdata),
e995e3df 453 nla_data(upcall_info->userdata));
7257b535
BP
454
455 nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
bed53bd1
PS
456
457 skb_copy_and_csum_dev(skb, nla_data(nla));
7257b535 458
c39b1a5c 459 genlmsg_end(user_skb, upcall);
28aea917 460 err = genlmsg_unicast(net, user_skb, upcall_info->portid);
6161d3fd
JG
461
462out:
463 kfree_skb(nskb);
464 return err;
cb5087ca
BP
465}
466
df2c07f4 467static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
064af421 468{
df2c07f4 469 struct ovs_header *ovs_header = info->userhdr;
982b8810 470 struct nlattr **a = info->attrs;
e0e57990 471 struct sw_flow_actions *acts;
982b8810 472 struct sk_buff *packet;
e0e57990 473 struct sw_flow *flow;
f7cd0081 474 struct datapath *dp;
d6569377 475 struct ethhdr *eth;
3f19d399 476 int len;
d6569377 477 int err;
064af421 478
f7cd0081 479 err = -EINVAL;
df2c07f4 480 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
7c3072cc 481 !a[OVS_PACKET_ATTR_ACTIONS])
e5cad958 482 goto err;
064af421 483
df2c07f4 484 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
3f19d399 485 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
f7cd0081
BP
486 err = -ENOMEM;
487 if (!packet)
e5cad958 488 goto err;
3f19d399
BP
489 skb_reserve(packet, NET_IP_ALIGN);
490
bf3d6fce 491 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
8d5ebd83 492
f7cd0081
BP
493 skb_reset_mac_header(packet);
494 eth = eth_hdr(packet);
064af421 495
d6569377
BP
496 /* Normally, setting the skb 'protocol' field would be handled by a
497 * call to eth_type_trans(), but it assumes there's a sending
498 * device, which we may not have. */
7cd46155 499 if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
f7cd0081 500 packet->protocol = eth->h_proto;
d6569377 501 else
f7cd0081 502 packet->protocol = htons(ETH_P_802_2);
d3c54451 503
e0e57990 504 /* Build an sw_flow for sending this packet. */
b0f3a2fe 505 flow = ovs_flow_alloc(false);
e0e57990
BP
506 err = PTR_ERR(flow);
507 if (IS_ERR(flow))
e5cad958 508 goto err_kfree_skb;
064af421 509
a1c564be 510 err = ovs_flow_extract(packet, -1, &flow->key);
e0e57990 511 if (err)
9321954a 512 goto err_flow_free;
e0e57990 513
a097c0b2 514 err = ovs_nla_get_flow_metadata(flow, a[OVS_PACKET_ATTR_KEY]);
80e5eed9 515 if (err)
9321954a 516 goto err_flow_free;
a097c0b2 517 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
e0e57990
BP
518 err = PTR_ERR(acts);
519 if (IS_ERR(acts))
9321954a 520 goto err_flow_free;
9b405f1a 521
a097c0b2
PS
522 err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
523 &flow->key, 0, &acts);
e0e57990 524 rcu_assign_pointer(flow->sf_acts, acts);
9b405f1a
PS
525 if (err)
526 goto err_flow_free;
e0e57990
BP
527
528 OVS_CB(packet)->flow = flow;
d1d71a36 529 OVS_CB(packet)->pkt_key = &flow->key;
abff858b 530 packet->priority = flow->key.phy.priority;
3025a772 531 packet->mark = flow->key.phy.skb_mark;
e0e57990 532
d6569377 533 rcu_read_lock();
2a4999f3 534 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
f7cd0081 535 err = -ENODEV;
e5cad958
BP
536 if (!dp)
537 goto err_unlock;
cc4015df 538
e9141eec 539 local_bh_disable();
850b6b3b 540 err = ovs_execute_actions(dp, packet);
e9141eec 541 local_bh_enable();
d6569377 542 rcu_read_unlock();
e0e57990 543
a1c564be 544 ovs_flow_free(flow, false);
e5cad958 545 return err;
064af421 546
e5cad958
BP
547err_unlock:
548 rcu_read_unlock();
9321954a 549err_flow_free:
a1c564be 550 ovs_flow_free(flow, false);
e5cad958
BP
551err_kfree_skb:
552 kfree_skb(packet);
553err:
d6569377 554 return err;
064af421
BP
555}
556
df2c07f4 557static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
7c3072cc 558 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
df2c07f4
JP
559 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
560 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
982b8810
BP
561};
562
563static struct genl_ops dp_packet_genl_ops[] = {
df2c07f4 564 { .cmd = OVS_PACKET_CMD_EXECUTE,
982b8810
BP
565 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
566 .policy = packet_policy,
df2c07f4 567 .doit = ovs_packet_cmd_execute
982b8810
BP
568 }
569};
570
4fa72a95
AZ
571static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
572 struct ovs_dp_megaflow_stats *mega_stats)
064af421 573{
d6569377 574 int i;
f180c2e2 575
4fa72a95
AZ
576 memset(mega_stats, 0, sizeof(*mega_stats));
577
994dc286 578 stats->n_flows = ovs_flow_tbl_count(&dp->table);
4fa72a95 579 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
064af421 580
7257b535 581 stats->n_hit = stats->n_missed = stats->n_lost = 0;
4fa72a95 582
d6569377
BP
583 for_each_possible_cpu(i) {
584 const struct dp_stats_percpu *percpu_stats;
585 struct dp_stats_percpu local_stats;
821cb9fa 586 unsigned int start;
44e05eca 587
d6569377 588 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
064af421 589
d6569377 590 do {
821cb9fa 591 start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
d6569377 592 local_stats = *percpu_stats;
821cb9fa 593 } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
064af421 594
d6569377
BP
595 stats->n_hit += local_stats.n_hit;
596 stats->n_missed += local_stats.n_missed;
597 stats->n_lost += local_stats.n_lost;
4fa72a95 598 mega_stats->n_mask_hit += local_stats.n_mask_hit;
d6569377
BP
599 }
600}
064af421 601
df2c07f4
JP
602static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
603 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
604 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
605 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
d6569377 606};
36956a7d 607
37a1300c
BP
608static struct genl_family dp_flow_genl_family = {
609 .id = GENL_ID_GENERATE,
df2c07f4
JP
610 .hdrsize = sizeof(struct ovs_header),
611 .name = OVS_FLOW_FAMILY,
69685a88 612 .version = OVS_FLOW_VERSION,
2a4999f3 613 .maxattr = OVS_FLOW_ATTR_MAX,
b3dcb73c 614 .netnsok = true,
14002a59 615 SET_PARALLEL_OPS
37a1300c 616};
ed099e92 617
850b6b3b 618static struct genl_multicast_group ovs_dp_flow_multicast_group = {
df2c07f4 619 .name = OVS_FLOW_MCGROUP
37a1300c
BP
620};
621
0afa2373
TG
622static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
623{
624 return NLMSG_ALIGN(sizeof(struct ovs_header))
625 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
a1c564be 626 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
0afa2373
TG
627 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
628 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
629 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
630 + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
631}
632
cd2a59e9 633/* Called with ovs_mutex. */
df2c07f4 634static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
28aea917 635 struct sk_buff *skb, u32 portid,
6455100f 636 u32 seq, u32 flags, u8 cmd)
d6569377 637{
37a1300c 638 const int skb_orig_len = skb->len;
9b405f1a 639 struct nlattr *start;
b0f3a2fe
PS
640 struct ovs_flow_stats stats;
641 __be16 tcp_flags;
642 unsigned long used;
df2c07f4 643 struct ovs_header *ovs_header;
d6569377 644 struct nlattr *nla;
d6569377 645 int err;
064af421 646
28aea917 647 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
df2c07f4 648 if (!ovs_header)
37a1300c 649 return -EMSGSIZE;
d6569377 650
99769a40 651 ovs_header->dp_ifindex = get_dpifindex(dp);
d6569377 652
a1c564be 653 /* Fill flow key. */
df2c07f4 654 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
d6569377
BP
655 if (!nla)
656 goto nla_put_failure;
a1c564be 657
a097c0b2 658 err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
d6569377 659 if (err)
37a1300c 660 goto error;
d6569377
BP
661 nla_nest_end(skb, nla);
662
a1c564be
AZ
663 nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
664 if (!nla)
665 goto nla_put_failure;
666
a097c0b2 667 err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
a1c564be
AZ
668 if (err)
669 goto error;
670
671 nla_nest_end(skb, nla);
672
b0f3a2fe
PS
673 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
674 if (used &&
675 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
c3cc8c03 676 goto nla_put_failure;
d6569377 677
b0f3a2fe
PS
678 if (stats.n_packets &&
679 nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
680 goto nla_put_failure;
b0b906cc 681
b0f3a2fe
PS
682 if ((u8)ntohs(tcp_flags) &&
683 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
c3cc8c03 684 goto nla_put_failure;
d6569377 685
df2c07f4 686 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
30053024
BP
687 * this is the first flow to be dumped into 'skb'. This is unusual for
688 * Netlink but individual action lists can be longer than
689 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
690 * The userspace caller can always fetch the actions separately if it
691 * really wants them. (Most userspace callers in fact don't care.)
692 *
693 * This can only fail for dump operations because the skb is always
694 * properly sized for single flows.
695 */
9b405f1a 696 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
f6f481ee 697 if (start) {
f44ccce1
PS
698 const struct sw_flow_actions *sf_acts;
699
780ec6ae 700 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
f44ccce1 701
a097c0b2
PS
702 err = ovs_nla_put_actions(sf_acts->actions,
703 sf_acts->actions_len, skb);
0a25b039
BP
704 if (!err)
705 nla_nest_end(skb, start);
706 else {
707 if (skb_orig_len)
708 goto error;
709
710 nla_nest_cancel(skb, start);
711 }
7aac03bd
JG
712 } else if (skb_orig_len)
713 goto nla_put_failure;
37a1300c 714
df2c07f4 715 return genlmsg_end(skb, ovs_header);
d6569377
BP
716
717nla_put_failure:
718 err = -EMSGSIZE;
37a1300c 719error:
df2c07f4 720 genlmsg_cancel(skb, ovs_header);
d6569377 721 return err;
44e05eca
BP
722}
723
68eadcf0
TG
724static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow,
725 struct genl_info *info)
44e05eca 726{
68eadcf0 727 size_t len;
d6569377 728
68eadcf0 729 len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts));
d6569377 730
68eadcf0 731 return genlmsg_new_unicast(len, info, GFP_KERNEL);
37a1300c 732}
8d5ebd83 733
6455100f
PS
734static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
735 struct datapath *dp,
68eadcf0
TG
736 struct genl_info *info,
737 u8 cmd)
37a1300c
BP
738{
739 struct sk_buff *skb;
740 int retval;
d6569377 741
68eadcf0 742 skb = ovs_flow_cmd_alloc_info(flow, info);
37a1300c
BP
743 if (!skb)
744 return ERR_PTR(-ENOMEM);
d6569377 745
68eadcf0
TG
746 retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid,
747 info->snd_seq, 0, cmd);
37a1300c 748 BUG_ON(retval < 0);
d6569377 749 return skb;
064af421
BP
750}
751
df2c07f4 752static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
064af421 753{
37a1300c 754 struct nlattr **a = info->attrs;
df2c07f4 755 struct ovs_header *ovs_header = info->userhdr;
529db635 756 struct sw_flow_key key, masked_key;
a1c564be
AZ
757 struct sw_flow *flow = NULL;
758 struct sw_flow_mask mask;
37a1300c 759 struct sk_buff *reply;
9c52546b 760 struct datapath *dp;
9b405f1a 761 struct sw_flow_actions *acts = NULL;
a1c564be 762 struct sw_flow_match match;
b0f3a2fe 763 bool exact_5tuple;
bc4a05c6 764 int error;
064af421 765
37a1300c
BP
766 /* Extract key. */
767 error = -EINVAL;
df2c07f4 768 if (!a[OVS_FLOW_ATTR_KEY])
37a1300c 769 goto error;
a1c564be
AZ
770
771 ovs_match_init(&match, &key, &mask);
b0f3a2fe 772 error = ovs_nla_get_match(&match, &exact_5tuple,
a097c0b2 773 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
37a1300c
BP
774 if (error)
775 goto error;
064af421 776
37a1300c 777 /* Validate actions. */
df2c07f4 778 if (a[OVS_FLOW_ATTR_ACTIONS]) {
a097c0b2 779 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
9b405f1a
PS
780 error = PTR_ERR(acts);
781 if (IS_ERR(acts))
37a1300c 782 goto error;
9b405f1a 783
a097c0b2
PS
784 ovs_flow_mask_key(&masked_key, &key, &mask);
785 error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
786 &masked_key, 0, &acts);
529db635
JG
787 if (error) {
788 OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
9b405f1a 789 goto err_kfree;
529db635 790 }
df2c07f4 791 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
37a1300c
BP
792 error = -EINVAL;
793 goto error;
794 }
795
cd2a59e9 796 ovs_lock();
2a4999f3 797 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
d6569377 798 error = -ENODEV;
9c52546b 799 if (!dp)
cd2a59e9 800 goto err_unlock_ovs;
704a1e09 801
a1c564be 802 /* Check if this is a duplicate flow */
4f88b5e5 803 flow = ovs_flow_tbl_lookup(&dp->table, &key);
3544358a 804 if (!flow) {
d6569377
BP
805 /* Bail out if we're not allowed to create a new flow. */
806 error = -ENOENT;
df2c07f4 807 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
cd2a59e9 808 goto err_unlock_ovs;
d6569377 809
d6569377 810 /* Allocate flow. */
b0f3a2fe 811 flow = ovs_flow_alloc(!exact_5tuple);
d6569377
BP
812 if (IS_ERR(flow)) {
813 error = PTR_ERR(flow);
cd2a59e9 814 goto err_unlock_ovs;
d6569377 815 }
d6569377 816
529db635
JG
817 flow->key = masked_key;
818 flow->unmasked_key = key;
d6569377
BP
819 rcu_assign_pointer(flow->sf_acts, acts);
820
d6569377 821 /* Put flow in bucket. */
0585f7a8
PS
822 error = ovs_flow_tbl_insert(&dp->table, flow, &mask);
823 if (error) {
824 acts = NULL;
825 goto err_flow_free;
826 }
37a1300c 827
68eadcf0 828 reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
d6569377
BP
829 } else {
830 /* We found a matching flow. */
831 struct sw_flow_actions *old_acts;
832
833 /* Bail out if we're not allowed to modify an existing flow.
834 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
835 * because Generic Netlink treats the latter as a dump
836 * request. We also accept NLM_F_EXCL in case that bug ever
837 * gets fixed.
838 */
839 error = -EEXIST;
df2c07f4 840 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
37a1300c 841 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
cd2a59e9 842 goto err_unlock_ovs;
d6569377 843
b21e5b6a
AZ
844 /* The unmasked key has to be the same for flow updates. */
845 error = -EINVAL;
a097c0b2 846 if (!ovs_flow_cmp_unmasked_key(flow, &match)) {
1b936472 847 OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n");
b21e5b6a 848 goto err_unlock_ovs;
1b936472 849 }
b21e5b6a 850
d6569377 851 /* Update actions. */
cd2a59e9 852 old_acts = ovsl_dereference(flow->sf_acts);
9b405f1a 853 rcu_assign_pointer(flow->sf_acts, acts);
a097c0b2 854 ovs_nla_free_flow_actions(old_acts);
d6569377 855
68eadcf0 856 reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
d6569377
BP
857
858 /* Clear stats. */
b0b906cc
PS
859 if (a[OVS_FLOW_ATTR_CLEAR])
860 ovs_flow_stats_clear(flow);
9c52546b 861 }
cd2a59e9 862 ovs_unlock();
37a1300c
BP
863
864 if (!IS_ERR(reply))
e297c6b7 865 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
37a1300c 866 else
b3dcb73c 867 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
2a4999f3 868 ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
d6569377 869 return 0;
704a1e09 870
a1c564be
AZ
871err_flow_free:
872 ovs_flow_free(flow, false);
cd2a59e9
PS
873err_unlock_ovs:
874 ovs_unlock();
9b405f1a 875err_kfree:
ba400435 876 kfree(acts);
37a1300c 877error:
9c52546b 878 return error;
704a1e09
BP
879}
880
df2c07f4 881static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
704a1e09 882{
37a1300c 883 struct nlattr **a = info->attrs;
df2c07f4 884 struct ovs_header *ovs_header = info->userhdr;
37a1300c 885 struct sw_flow_key key;
37a1300c 886 struct sk_buff *reply;
704a1e09 887 struct sw_flow *flow;
9c52546b 888 struct datapath *dp;
a1c564be 889 struct sw_flow_match match;
9c52546b 890 int err;
704a1e09 891
1b936472
AZ
892 if (!a[OVS_FLOW_ATTR_KEY]) {
893 OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
37a1300c 894 return -EINVAL;
1b936472 895 }
a1c564be
AZ
896
897 ovs_match_init(&match, &key, NULL);
b0f3a2fe 898 err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
37a1300c
BP
899 if (err)
900 return err;
704a1e09 901
cd2a59e9 902 ovs_lock();
2a4999f3 903 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9
PS
904 if (!dp) {
905 err = -ENODEV;
906 goto unlock;
907 }
704a1e09 908
4f88b5e5 909 flow = ovs_flow_tbl_lookup(&dp->table, &key);
a097c0b2 910 if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
cd2a59e9
PS
911 err = -ENOENT;
912 goto unlock;
913 }
d6569377 914
68eadcf0 915 reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
cd2a59e9
PS
916 if (IS_ERR(reply)) {
917 err = PTR_ERR(reply);
918 goto unlock;
919 }
36956a7d 920
cd2a59e9 921 ovs_unlock();
37a1300c 922 return genlmsg_reply(reply, info);
cd2a59e9
PS
923unlock:
924 ovs_unlock();
925 return err;
d6569377 926}
9c52546b 927
df2c07f4 928static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
d6569377 929{
37a1300c 930 struct nlattr **a = info->attrs;
df2c07f4 931 struct ovs_header *ovs_header = info->userhdr;
37a1300c 932 struct sw_flow_key key;
37a1300c 933 struct sk_buff *reply;
d6569377 934 struct sw_flow *flow;
d6569377 935 struct datapath *dp;
a1c564be 936 struct sw_flow_match match;
d6569377 937 int err;
36956a7d 938
cd2a59e9 939 ovs_lock();
2a4999f3 940 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9
PS
941 if (!dp) {
942 err = -ENODEV;
943 goto unlock;
944 }
2a4999f3 945
cd2a59e9 946 if (!a[OVS_FLOW_ATTR_KEY]) {
994dc286 947 err = ovs_flow_tbl_flush(&dp->table);
cd2a59e9
PS
948 goto unlock;
949 }
a1c564be
AZ
950
951 ovs_match_init(&match, &key, NULL);
b0f3a2fe 952 err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
37a1300c 953 if (err)
cd2a59e9 954 goto unlock;
d6569377 955
4f88b5e5 956 flow = ovs_flow_tbl_lookup(&dp->table, &key);
a097c0b2 957 if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
cd2a59e9
PS
958 err = -ENOENT;
959 goto unlock;
960 }
d6569377 961
68eadcf0 962 reply = ovs_flow_cmd_alloc_info(flow, info);
cd2a59e9
PS
963 if (!reply) {
964 err = -ENOMEM;
965 goto unlock;
966 }
37a1300c 967
994dc286 968 ovs_flow_tbl_remove(&dp->table, flow);
37a1300c 969
28aea917 970 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
df2c07f4 971 info->snd_seq, 0, OVS_FLOW_CMD_DEL);
37a1300c
BP
972 BUG_ON(err < 0);
973
a1c564be 974 ovs_flow_free(flow, true);
cd2a59e9 975 ovs_unlock();
37a1300c 976
e297c6b7 977 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
37a1300c 978 return 0;
cd2a59e9
PS
979unlock:
980 ovs_unlock();
981 return err;
37a1300c
BP
982}
983
df2c07f4 984static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
37a1300c 985{
df2c07f4 986 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
994dc286 987 struct table_instance *ti;
37a1300c
BP
988 struct datapath *dp;
989
f44ccce1 990 rcu_read_lock();
2a4999f3 991 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9 992 if (!dp) {
f44ccce1 993 rcu_read_unlock();
37a1300c 994 return -ENODEV;
cd2a59e9 995 }
37a1300c 996
994dc286 997 ti = rcu_dereference(dp->table.ti);
37a1300c 998 for (;;) {
37a1300c
BP
999 struct sw_flow *flow;
1000 u32 bucket, obj;
1001
1002 bucket = cb->args[0];
1003 obj = cb->args[1];
994dc286 1004 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
3544358a 1005 if (!flow)
37a1300c
BP
1006 break;
1007
6455100f 1008 if (ovs_flow_cmd_fill_info(flow, dp, skb,
28aea917 1009 NETLINK_CB(cb->skb).portid,
37a1300c 1010 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1011 OVS_FLOW_CMD_NEW) < 0)
37a1300c
BP
1012 break;
1013
1014 cb->args[0] = bucket;
1015 cb->args[1] = obj;
1016 }
f44ccce1 1017 rcu_read_unlock();
37a1300c 1018 return skb->len;
704a1e09
BP
1019}
1020
37a1300c 1021static struct genl_ops dp_flow_genl_ops[] = {
df2c07f4 1022 { .cmd = OVS_FLOW_CMD_NEW,
37a1300c
BP
1023 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1024 .policy = flow_policy,
df2c07f4 1025 .doit = ovs_flow_cmd_new_or_set
37a1300c 1026 },
df2c07f4 1027 { .cmd = OVS_FLOW_CMD_DEL,
37a1300c
BP
1028 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1029 .policy = flow_policy,
df2c07f4 1030 .doit = ovs_flow_cmd_del
37a1300c 1031 },
df2c07f4 1032 { .cmd = OVS_FLOW_CMD_GET,
37a1300c
BP
1033 .flags = 0, /* OK for unprivileged users. */
1034 .policy = flow_policy,
df2c07f4
JP
1035 .doit = ovs_flow_cmd_get,
1036 .dumpit = ovs_flow_cmd_dump
37a1300c 1037 },
df2c07f4 1038 { .cmd = OVS_FLOW_CMD_SET,
37a1300c
BP
1039 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1040 .policy = flow_policy,
df2c07f4 1041 .doit = ovs_flow_cmd_new_or_set,
37a1300c
BP
1042 },
1043};
1044
df2c07f4 1045static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
df2c07f4 1046 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
b063d9f0 1047 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
c58cc9a4 1048 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
d6569377
BP
1049};
1050
aaff4b55
BP
1051static struct genl_family dp_datapath_genl_family = {
1052 .id = GENL_ID_GENERATE,
df2c07f4
JP
1053 .hdrsize = sizeof(struct ovs_header),
1054 .name = OVS_DATAPATH_FAMILY,
69685a88 1055 .version = OVS_DATAPATH_VERSION,
2a4999f3 1056 .maxattr = OVS_DP_ATTR_MAX,
b3dcb73c 1057 .netnsok = true,
14002a59 1058 SET_PARALLEL_OPS
aaff4b55
BP
1059};
1060
850b6b3b 1061static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
df2c07f4 1062 .name = OVS_DATAPATH_MCGROUP
aaff4b55
BP
1063};
1064
0afa2373
TG
1065static size_t ovs_dp_cmd_msg_size(void)
1066{
1067 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1068
1069 msgsize += nla_total_size(IFNAMSIZ);
1070 msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
4fa72a95 1071 msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
0afa2373
TG
1072
1073 return msgsize;
1074}
1075
df2c07f4 1076static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
28aea917 1077 u32 portid, u32 seq, u32 flags, u8 cmd)
064af421 1078{
df2c07f4 1079 struct ovs_header *ovs_header;
e926dfe3 1080 struct ovs_dp_stats dp_stats;
4fa72a95 1081 struct ovs_dp_megaflow_stats dp_megaflow_stats;
064af421
BP
1082 int err;
1083
28aea917 1084 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
aaff4b55 1085 flags, cmd);
df2c07f4 1086 if (!ovs_header)
aaff4b55 1087 goto error;
064af421 1088
b063d9f0 1089 ovs_header->dp_ifindex = get_dpifindex(dp);
064af421 1090
d6569377 1091 rcu_read_lock();
850b6b3b 1092 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
d6569377 1093 rcu_read_unlock();
064af421 1094 if (err)
d6569377 1095 goto nla_put_failure;
064af421 1096
4fa72a95
AZ
1097 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1098 if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1099 &dp_stats))
1100 goto nla_put_failure;
1101
1102 if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1103 sizeof(struct ovs_dp_megaflow_stats),
1104 &dp_megaflow_stats))
c3cc8c03 1105 goto nla_put_failure;
d6569377 1106
c58cc9a4
TG
1107 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1108 goto nla_put_failure;
1109
df2c07f4 1110 return genlmsg_end(skb, ovs_header);
d6569377
BP
1111
1112nla_put_failure:
df2c07f4 1113 genlmsg_cancel(skb, ovs_header);
aaff4b55
BP
1114error:
1115 return -EMSGSIZE;
d6569377
BP
1116}
1117
68eadcf0
TG
1118static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp,
1119 struct genl_info *info, u8 cmd)
d6569377 1120{
d6569377 1121 struct sk_buff *skb;
aaff4b55 1122 int retval;
d6569377 1123
68eadcf0 1124 skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
064af421 1125 if (!skb)
d6569377 1126 return ERR_PTR(-ENOMEM);
659586ef 1127
68eadcf0 1128 retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd);
aaff4b55
BP
1129 if (retval < 0) {
1130 kfree_skb(skb);
1131 return ERR_PTR(retval);
1132 }
1133 return skb;
1134}
9dca7bd5 1135
cd2a59e9 1136/* Called with ovs_mutex. */
2a4999f3
PS
1137static struct datapath *lookup_datapath(struct net *net,
1138 struct ovs_header *ovs_header,
6455100f 1139 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
d6569377 1140{
254f2dc8
BP
1141 struct datapath *dp;
1142
df2c07f4 1143 if (!a[OVS_DP_ATTR_NAME])
2a4999f3 1144 dp = get_dp(net, ovs_header->dp_ifindex);
254f2dc8 1145 else {
d6569377 1146 struct vport *vport;
d6569377 1147
057dd6d2 1148 rcu_read_lock();
2a4999f3 1149 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
df2c07f4 1150 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
057dd6d2 1151 rcu_read_unlock();
d6569377 1152 }
254f2dc8 1153 return dp ? dp : ERR_PTR(-ENODEV);
d6569377
BP
1154}
1155
c58cc9a4
TG
1156static void ovs_dp_change(struct datapath *dp, struct nlattr **a)
1157{
1158 if (a[OVS_DP_ATTR_USER_FEATURES])
1159 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1160}
1161
df2c07f4 1162static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
d6569377 1163{
aaff4b55 1164 struct nlattr **a = info->attrs;
d6569377 1165 struct vport_parms parms;
aaff4b55 1166 struct sk_buff *reply;
d6569377
BP
1167 struct datapath *dp;
1168 struct vport *vport;
2a4999f3 1169 struct ovs_net *ovs_net;
95b1d73a 1170 int err, i;
d6569377 1171
d6569377 1172 err = -EINVAL;
ea36840f 1173 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
aaff4b55
BP
1174 goto err;
1175
cd2a59e9 1176 ovs_lock();
d6569377 1177
d6569377
BP
1178 err = -ENOMEM;
1179 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1180 if (dp == NULL)
cd2a59e9 1181 goto err_unlock_ovs;
2a4999f3 1182
0ceaa66c
JG
1183 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1184
d6569377 1185 /* Allocate table. */
994dc286
PS
1186 err = ovs_flow_tbl_init(&dp->table);
1187 if (err)
d6569377
BP
1188 goto err_free_dp;
1189
99769a40
JG
1190 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1191 if (!dp->stats_percpu) {
1192 err = -ENOMEM;
1193 goto err_destroy_table;
1194 }
1195
95b1d73a
PS
1196 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1197 GFP_KERNEL);
1198 if (!dp->ports) {
1199 err = -ENOMEM;
1200 goto err_destroy_percpu;
1201 }
1202
1203 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1204 INIT_HLIST_HEAD(&dp->ports[i]);
1205
d6569377 1206 /* Set up our datapath device. */
df2c07f4
JP
1207 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1208 parms.type = OVS_VPORT_TYPE_INTERNAL;
d6569377
BP
1209 parms.options = NULL;
1210 parms.dp = dp;
df2c07f4 1211 parms.port_no = OVSP_LOCAL;
28aea917 1212 parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
b063d9f0 1213
c58cc9a4
TG
1214 ovs_dp_change(dp, a);
1215
d6569377
BP
1216 vport = new_vport(&parms);
1217 if (IS_ERR(vport)) {
1218 err = PTR_ERR(vport);
1219 if (err == -EBUSY)
1220 err = -EEXIST;
1221
95b1d73a 1222 goto err_destroy_ports_array;
d6569377 1223 }
d6569377 1224
68eadcf0 1225 reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
aaff4b55
BP
1226 err = PTR_ERR(reply);
1227 if (IS_ERR(reply))
1228 goto err_destroy_local_port;
1229
2a4999f3 1230 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
fb93e9aa 1231 list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
d6569377 1232
cd2a59e9 1233 ovs_unlock();
d6569377 1234
e297c6b7 1235 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
d6569377
BP
1236 return 0;
1237
1238err_destroy_local_port:
cd2a59e9 1239 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
95b1d73a
PS
1240err_destroy_ports_array:
1241 kfree(dp->ports);
99769a40
JG
1242err_destroy_percpu:
1243 free_percpu(dp->stats_percpu);
d6569377 1244err_destroy_table:
0585f7a8 1245 ovs_flow_tbl_destroy(&dp->table);
d6569377 1246err_free_dp:
0ceaa66c 1247 release_net(ovs_dp_get_net(dp));
d6569377 1248 kfree(dp);
cd2a59e9
PS
1249err_unlock_ovs:
1250 ovs_unlock();
d6569377 1251err:
064af421
BP
1252 return err;
1253}
1254
cd2a59e9 1255/* Called with ovs_mutex. */
2a4999f3 1256static void __dp_destroy(struct datapath *dp)
44e05eca 1257{
95b1d73a 1258 int i;
44e05eca 1259
95b1d73a
PS
1260 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1261 struct vport *vport;
f8dfbcb7 1262 struct hlist_node *n;
95b1d73a 1263
f8dfbcb7 1264 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
95b1d73a
PS
1265 if (vport->port_no != OVSP_LOCAL)
1266 ovs_dp_detach_port(vport);
1267 }
ed099e92 1268
fb93e9aa 1269 list_del_rcu(&dp->list_node);
ed099e92 1270
cd2a59e9
PS
1271 /* OVSP_LOCAL is datapath internal port. We need to make sure that
1272 * all port in datapath are destroyed first before freeing datapath.
1273 */
1274 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
99620d2c 1275
ed099e92 1276 call_rcu(&dp->rcu, destroy_dp_rcu);
2a4999f3
PS
1277}
1278
1279static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1280{
1281 struct sk_buff *reply;
1282 struct datapath *dp;
1283 int err;
1284
cd2a59e9 1285 ovs_lock();
2a4999f3
PS
1286 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1287 err = PTR_ERR(dp);
1288 if (IS_ERR(dp))
cd2a59e9 1289 goto unlock;
2a4999f3 1290
68eadcf0 1291 reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL);
2a4999f3
PS
1292 err = PTR_ERR(reply);
1293 if (IS_ERR(reply))
cd2a59e9 1294 goto unlock;
2a4999f3
PS
1295
1296 __dp_destroy(dp);
cd2a59e9 1297 ovs_unlock();
ed099e92 1298
e297c6b7 1299 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
99620d2c
JG
1300
1301 return 0;
cd2a59e9
PS
1302unlock:
1303 ovs_unlock();
1304 return err;
44e05eca
BP
1305}
1306
df2c07f4 1307static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
064af421 1308{
aaff4b55 1309 struct sk_buff *reply;
d6569377 1310 struct datapath *dp;
d6569377 1311 int err;
064af421 1312
cd2a59e9 1313 ovs_lock();
2a4999f3 1314 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
cd2a59e9 1315 err = PTR_ERR(dp);
d6569377 1316 if (IS_ERR(dp))
cd2a59e9 1317 goto unlock;
38c6ecbc 1318
c58cc9a4
TG
1319 ovs_dp_change(dp, info->attrs);
1320
68eadcf0 1321 reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
aaff4b55
BP
1322 if (IS_ERR(reply)) {
1323 err = PTR_ERR(reply);
b3dcb73c 1324 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
850b6b3b 1325 ovs_dp_datapath_multicast_group.id, err);
cd2a59e9
PS
1326 err = 0;
1327 goto unlock;
aaff4b55
BP
1328 }
1329
cd2a59e9 1330 ovs_unlock();
e297c6b7 1331 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
850b6b3b 1332
aaff4b55 1333 return 0;
cd2a59e9
PS
1334unlock:
1335 ovs_unlock();
1336 return err;
064af421
BP
1337}
1338
df2c07f4 1339static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1dcf111b 1340{
aaff4b55 1341 struct sk_buff *reply;
d6569377 1342 struct datapath *dp;
d6569377 1343 int err;
1dcf111b 1344
cd2a59e9 1345 ovs_lock();
2a4999f3 1346 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
cd2a59e9
PS
1347 if (IS_ERR(dp)) {
1348 err = PTR_ERR(dp);
1349 goto unlock;
1350 }
1dcf111b 1351
68eadcf0 1352 reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
cd2a59e9
PS
1353 if (IS_ERR(reply)) {
1354 err = PTR_ERR(reply);
1355 goto unlock;
1356 }
aaff4b55 1357
cd2a59e9 1358 ovs_unlock();
aaff4b55 1359 return genlmsg_reply(reply, info);
cd2a59e9
PS
1360
1361unlock:
1362 ovs_unlock();
1363 return err;
1dcf111b
JP
1364}
1365
df2c07f4 1366static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
a7786963 1367{
2a4999f3 1368 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
254f2dc8
BP
1369 struct datapath *dp;
1370 int skip = cb->args[0];
1371 int i = 0;
a7786963 1372
fb93e9aa
PS
1373 rcu_read_lock();
1374 list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) {
a2bab2f0 1375 if (i >= skip &&
28aea917 1376 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
aaff4b55 1377 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1378 OVS_DP_CMD_NEW) < 0)
aaff4b55 1379 break;
254f2dc8 1380 i++;
a7786963 1381 }
fb93e9aa 1382 rcu_read_unlock();
aaff4b55 1383
254f2dc8
BP
1384 cb->args[0] = i;
1385
aaff4b55 1386 return skb->len;
c19e6535
BP
1387}
1388
aaff4b55 1389static struct genl_ops dp_datapath_genl_ops[] = {
df2c07f4 1390 { .cmd = OVS_DP_CMD_NEW,
aaff4b55
BP
1391 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1392 .policy = datapath_policy,
df2c07f4 1393 .doit = ovs_dp_cmd_new
aaff4b55 1394 },
df2c07f4 1395 { .cmd = OVS_DP_CMD_DEL,
aaff4b55
BP
1396 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1397 .policy = datapath_policy,
df2c07f4 1398 .doit = ovs_dp_cmd_del
aaff4b55 1399 },
df2c07f4 1400 { .cmd = OVS_DP_CMD_GET,
aaff4b55
BP
1401 .flags = 0, /* OK for unprivileged users. */
1402 .policy = datapath_policy,
df2c07f4
JP
1403 .doit = ovs_dp_cmd_get,
1404 .dumpit = ovs_dp_cmd_dump
aaff4b55 1405 },
df2c07f4 1406 { .cmd = OVS_DP_CMD_SET,
aaff4b55
BP
1407 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1408 .policy = datapath_policy,
df2c07f4 1409 .doit = ovs_dp_cmd_set,
aaff4b55
BP
1410 },
1411};
1412
df2c07f4 1413static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
df2c07f4 1414 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
f613a0d7 1415 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
d48c88ec
JG
1416 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1417 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
b063d9f0 1418 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
df2c07f4 1419 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
c19e6535
BP
1420};
1421
f0fef760
BP
1422static struct genl_family dp_vport_genl_family = {
1423 .id = GENL_ID_GENERATE,
df2c07f4
JP
1424 .hdrsize = sizeof(struct ovs_header),
1425 .name = OVS_VPORT_FAMILY,
69685a88 1426 .version = OVS_VPORT_VERSION,
2a4999f3 1427 .maxattr = OVS_VPORT_ATTR_MAX,
b3dcb73c 1428 .netnsok = true,
14002a59 1429 SET_PARALLEL_OPS
f0fef760
BP
1430};
1431
850b6b3b 1432struct genl_multicast_group ovs_dp_vport_multicast_group = {
df2c07f4 1433 .name = OVS_VPORT_MCGROUP
f0fef760
BP
1434};
1435
cd2a59e9 1436/* Called with ovs_mutex or RCU read lock. */
df2c07f4 1437static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
28aea917 1438 u32 portid, u32 seq, u32 flags, u8 cmd)
064af421 1439{
df2c07f4 1440 struct ovs_header *ovs_header;
e926dfe3 1441 struct ovs_vport_stats vport_stats;
c19e6535
BP
1442 int err;
1443
28aea917 1444 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
f0fef760 1445 flags, cmd);
df2c07f4 1446 if (!ovs_header)
f0fef760 1447 return -EMSGSIZE;
c19e6535 1448
99769a40 1449 ovs_header->dp_ifindex = get_dpifindex(vport->dp);
c19e6535 1450
c3cc8c03
DM
1451 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1452 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1453 nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
28aea917 1454 nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
c3cc8c03 1455 goto nla_put_failure;
c19e6535 1456
850b6b3b 1457 ovs_vport_get_stats(vport, &vport_stats);
c3cc8c03
DM
1458 if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1459 &vport_stats))
1460 goto nla_put_failure;
c19e6535 1461
850b6b3b 1462 err = ovs_vport_get_options(vport, skb);
f0fef760
BP
1463 if (err == -EMSGSIZE)
1464 goto error;
c19e6535 1465
df2c07f4 1466 return genlmsg_end(skb, ovs_header);
c19e6535
BP
1467
1468nla_put_failure:
1469 err = -EMSGSIZE;
f0fef760 1470error:
df2c07f4 1471 genlmsg_cancel(skb, ovs_header);
f0fef760 1472 return err;
064af421
BP
1473}
1474
cd2a59e9 1475/* Called with ovs_mutex or RCU read lock. */
28aea917 1476struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
f14d8083 1477 u32 seq, u8 cmd)
064af421 1478{
c19e6535 1479 struct sk_buff *skb;
f0fef760 1480 int retval;
c19e6535 1481
f0fef760 1482 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
c19e6535
BP
1483 if (!skb)
1484 return ERR_PTR(-ENOMEM);
1485
28aea917 1486 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
c25ea534
JG
1487 BUG_ON(retval < 0);
1488
c19e6535 1489 return skb;
f0fef760 1490}
c19e6535 1491
cd2a59e9 1492/* Called with ovs_mutex or RCU read lock. */
2a4999f3
PS
1493static struct vport *lookup_vport(struct net *net,
1494 struct ovs_header *ovs_header,
df2c07f4 1495 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
c19e6535
BP
1496{
1497 struct datapath *dp;
1498 struct vport *vport;
1499
df2c07f4 1500 if (a[OVS_VPORT_ATTR_NAME]) {
2a4999f3 1501 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
ed099e92 1502 if (!vport)
c19e6535 1503 return ERR_PTR(-ENODEV);
24ce832d
BP
1504 if (ovs_header->dp_ifindex &&
1505 ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1506 return ERR_PTR(-ENODEV);
c19e6535 1507 return vport;
df2c07f4
JP
1508 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1509 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
c19e6535
BP
1510
1511 if (port_no >= DP_MAX_PORTS)
f0fef760 1512 return ERR_PTR(-EFBIG);
c19e6535 1513
2a4999f3 1514 dp = get_dp(net, ovs_header->dp_ifindex);
c19e6535
BP
1515 if (!dp)
1516 return ERR_PTR(-ENODEV);
f2459fe7 1517
cd2a59e9 1518 vport = ovs_vport_ovsl_rcu(dp, port_no);
ed099e92 1519 if (!vport)
17535c57 1520 return ERR_PTR(-ENODEV);
c19e6535
BP
1521 return vport;
1522 } else
1523 return ERR_PTR(-EINVAL);
064af421
BP
1524}
1525
df2c07f4 1526static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
c19e6535 1527{
f0fef760 1528 struct nlattr **a = info->attrs;
df2c07f4 1529 struct ovs_header *ovs_header = info->userhdr;
c19e6535 1530 struct vport_parms parms;
ed099e92 1531 struct sk_buff *reply;
c19e6535 1532 struct vport *vport;
c19e6535 1533 struct datapath *dp;
b0ec0f27 1534 u32 port_no;
c19e6535 1535 int err;
b0ec0f27 1536
c19e6535 1537 err = -EINVAL;
ea36840f
BP
1538 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1539 !a[OVS_VPORT_ATTR_UPCALL_PID])
f0fef760
BP
1540 goto exit;
1541
cd2a59e9 1542 ovs_lock();
2a4999f3 1543 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
c19e6535
BP
1544 err = -ENODEV;
1545 if (!dp)
ed099e92 1546 goto exit_unlock;
c19e6535 1547
df2c07f4
JP
1548 if (a[OVS_VPORT_ATTR_PORT_NO]) {
1549 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
c19e6535
BP
1550
1551 err = -EFBIG;
1552 if (port_no >= DP_MAX_PORTS)
ed099e92 1553 goto exit_unlock;
c19e6535 1554
cd2a59e9 1555 vport = ovs_vport_ovsl(dp, port_no);
c19e6535
BP
1556 err = -EBUSY;
1557 if (vport)
ed099e92 1558 goto exit_unlock;
c19e6535
BP
1559 } else {
1560 for (port_no = 1; ; port_no++) {
1561 if (port_no >= DP_MAX_PORTS) {
1562 err = -EFBIG;
ed099e92 1563 goto exit_unlock;
c19e6535 1564 }
cd2a59e9 1565 vport = ovs_vport_ovsl(dp, port_no);
c19e6535
BP
1566 if (!vport)
1567 break;
51d4d598 1568 }
064af421 1569 }
b0ec0f27 1570
df2c07f4
JP
1571 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1572 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1573 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
c19e6535
BP
1574 parms.dp = dp;
1575 parms.port_no = port_no;
28aea917 1576 parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
c19e6535
BP
1577
1578 vport = new_vport(&parms);
1579 err = PTR_ERR(vport);
1580 if (IS_ERR(vport))
ed099e92 1581 goto exit_unlock;
c19e6535 1582
faef6d2d 1583 err = 0;
1fc7083d
JG
1584 if (a[OVS_VPORT_ATTR_STATS])
1585 ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
1586
1587 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1588 OVS_VPORT_CMD_NEW);
1589 if (IS_ERR(reply)) {
1590 err = PTR_ERR(reply);
850b6b3b 1591 ovs_dp_detach_port(vport);
ed099e92 1592 goto exit_unlock;
c19e6535 1593 }
e297c6b7
TG
1594
1595 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
c19e6535 1596
ed099e92 1597exit_unlock:
cd2a59e9 1598 ovs_unlock();
c19e6535
BP
1599exit:
1600 return err;
44e05eca
BP
1601}
1602
df2c07f4 1603static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
44e05eca 1604{
f0fef760
BP
1605 struct nlattr **a = info->attrs;
1606 struct sk_buff *reply;
c19e6535 1607 struct vport *vport;
c19e6535 1608 int err;
44e05eca 1609
cd2a59e9 1610 ovs_lock();
2a4999f3 1611 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
c19e6535
BP
1612 err = PTR_ERR(vport);
1613 if (IS_ERR(vport))
f0fef760 1614 goto exit_unlock;
44e05eca 1615
6455100f 1616 if (a[OVS_VPORT_ATTR_TYPE] &&
17ec1d04 1617 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
4879d4c7 1618 err = -EINVAL;
17ec1d04
JG
1619 goto exit_unlock;
1620 }
6455100f 1621
c25ea534
JG
1622 reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1623 if (!reply) {
1624 err = -ENOMEM;
1625 goto exit_unlock;
1626 }
1627
17ec1d04 1628 if (a[OVS_VPORT_ATTR_OPTIONS]) {
850b6b3b 1629 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
17ec1d04
JG
1630 if (err)
1631 goto exit_free;
1632 }
1fc7083d
JG
1633
1634 if (a[OVS_VPORT_ATTR_STATS])
1635 ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
1636
1637 if (a[OVS_VPORT_ATTR_UPCALL_PID])
28aea917 1638 vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
c19e6535 1639
c25ea534
JG
1640 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1641 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1642 BUG_ON(err < 0);
f0fef760 1643
cd2a59e9 1644 ovs_unlock();
8680ae4d 1645 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
c25ea534
JG
1646 return 0;
1647
1648exit_free:
1649 kfree_skb(reply);
f0fef760 1650exit_unlock:
cd2a59e9 1651 ovs_unlock();
c19e6535 1652 return err;
064af421
BP
1653}
1654
df2c07f4 1655static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1656{
f0fef760
BP
1657 struct nlattr **a = info->attrs;
1658 struct sk_buff *reply;
c19e6535 1659 struct vport *vport;
c19e6535
BP
1660 int err;
1661
cd2a59e9 1662 ovs_lock();
2a4999f3 1663 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
c19e6535 1664 err = PTR_ERR(vport);
f0fef760
BP
1665 if (IS_ERR(vport))
1666 goto exit_unlock;
c19e6535 1667
df2c07f4 1668 if (vport->port_no == OVSP_LOCAL) {
f0fef760
BP
1669 err = -EINVAL;
1670 goto exit_unlock;
1671 }
1672
28aea917
IY
1673 reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
1674 info->snd_seq, OVS_VPORT_CMD_DEL);
f0fef760
BP
1675 err = PTR_ERR(reply);
1676 if (IS_ERR(reply))
1677 goto exit_unlock;
1678
b57d5819 1679 err = 0;
850b6b3b 1680 ovs_dp_detach_port(vport);
f0fef760 1681
e297c6b7 1682 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
f0fef760
BP
1683
1684exit_unlock:
cd2a59e9 1685 ovs_unlock();
c19e6535 1686 return err;
7c40efc9
BP
1687}
1688
df2c07f4 1689static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1690{
f0fef760 1691 struct nlattr **a = info->attrs;
df2c07f4 1692 struct ovs_header *ovs_header = info->userhdr;
ed099e92 1693 struct sk_buff *reply;
c19e6535 1694 struct vport *vport;
c19e6535
BP
1695 int err;
1696
ed099e92 1697 rcu_read_lock();
2a4999f3 1698 vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
c19e6535
BP
1699 err = PTR_ERR(vport);
1700 if (IS_ERR(vport))
f0fef760 1701 goto exit_unlock;
c19e6535 1702
28aea917
IY
1703 reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
1704 info->snd_seq, OVS_VPORT_CMD_NEW);
ed099e92
BP
1705 err = PTR_ERR(reply);
1706 if (IS_ERR(reply))
f0fef760 1707 goto exit_unlock;
ed099e92 1708
df2fa9b5
JG
1709 rcu_read_unlock();
1710
1711 return genlmsg_reply(reply, info);
ed099e92 1712
f0fef760 1713exit_unlock:
ed099e92 1714 rcu_read_unlock();
c19e6535
BP
1715 return err;
1716}
1717
df2c07f4 1718static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
c19e6535 1719{
df2c07f4 1720 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
c19e6535 1721 struct datapath *dp;
95b1d73a
PS
1722 int bucket = cb->args[0], skip = cb->args[1];
1723 int i, j = 0;
c19e6535 1724
2a4999f3 1725 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
c19e6535 1726 if (!dp)
f0fef760 1727 return -ENODEV;
ed099e92
BP
1728
1729 rcu_read_lock();
95b1d73a 1730 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
ed099e92 1731 struct vport *vport;
95b1d73a
PS
1732
1733 j = 0;
f8dfbcb7 1734 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
95b1d73a
PS
1735 if (j >= skip &&
1736 ovs_vport_cmd_fill_info(vport, skb,
28aea917 1737 NETLINK_CB(cb->skb).portid,
95b1d73a
PS
1738 cb->nlh->nlmsg_seq,
1739 NLM_F_MULTI,
1740 OVS_VPORT_CMD_NEW) < 0)
1741 goto out;
1742
1743 j++;
1744 }
1745 skip = 0;
c19e6535 1746 }
95b1d73a 1747out:
ed099e92 1748 rcu_read_unlock();
c19e6535 1749
95b1d73a
PS
1750 cb->args[0] = i;
1751 cb->args[1] = j;
f0fef760 1752
95b1d73a 1753 return skb->len;
7c40efc9
BP
1754}
1755
f0fef760 1756static struct genl_ops dp_vport_genl_ops[] = {
df2c07f4 1757 { .cmd = OVS_VPORT_CMD_NEW,
f0fef760
BP
1758 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1759 .policy = vport_policy,
df2c07f4 1760 .doit = ovs_vport_cmd_new
f0fef760 1761 },
df2c07f4 1762 { .cmd = OVS_VPORT_CMD_DEL,
f0fef760
BP
1763 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1764 .policy = vport_policy,
df2c07f4 1765 .doit = ovs_vport_cmd_del
f0fef760 1766 },
df2c07f4 1767 { .cmd = OVS_VPORT_CMD_GET,
f0fef760
BP
1768 .flags = 0, /* OK for unprivileged users. */
1769 .policy = vport_policy,
df2c07f4
JP
1770 .doit = ovs_vport_cmd_get,
1771 .dumpit = ovs_vport_cmd_dump
f0fef760 1772 },
df2c07f4 1773 { .cmd = OVS_VPORT_CMD_SET,
f0fef760
BP
1774 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1775 .policy = vport_policy,
df2c07f4 1776 .doit = ovs_vport_cmd_set,
f0fef760
BP
1777 },
1778};
1779
982b8810
BP
1780struct genl_family_and_ops {
1781 struct genl_family *family;
1782 struct genl_ops *ops;
1783 int n_ops;
1784 struct genl_multicast_group *group;
1785};
ed099e92 1786
982b8810 1787static const struct genl_family_and_ops dp_genl_families[] = {
aaff4b55
BP
1788 { &dp_datapath_genl_family,
1789 dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
850b6b3b 1790 &ovs_dp_datapath_multicast_group },
f0fef760
BP
1791 { &dp_vport_genl_family,
1792 dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
850b6b3b 1793 &ovs_dp_vport_multicast_group },
37a1300c
BP
1794 { &dp_flow_genl_family,
1795 dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
850b6b3b 1796 &ovs_dp_flow_multicast_group },
982b8810
BP
1797 { &dp_packet_genl_family,
1798 dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1799 NULL },
1800};
ed099e92 1801
982b8810
BP
1802static void dp_unregister_genl(int n_families)
1803{
1804 int i;
ed099e92 1805
b867ca75 1806 for (i = 0; i < n_families; i++)
982b8810 1807 genl_unregister_family(dp_genl_families[i].family);
ed099e92
BP
1808}
1809
982b8810 1810static int dp_register_genl(void)
064af421 1811{
982b8810
BP
1812 int n_registered;
1813 int err;
1814 int i;
064af421 1815
982b8810
BP
1816 n_registered = 0;
1817 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
1818 const struct genl_family_and_ops *f = &dp_genl_families[i];
064af421 1819
982b8810
BP
1820 err = genl_register_family_with_ops(f->family, f->ops,
1821 f->n_ops);
1822 if (err)
1823 goto error;
1824 n_registered++;
e22d4953 1825
982b8810
BP
1826 if (f->group) {
1827 err = genl_register_mc_group(f->family, f->group);
1828 if (err)
1829 goto error;
1830 }
1831 }
9cc8b4e4 1832
982b8810 1833 return 0;
064af421
BP
1834
1835error:
982b8810
BP
1836 dp_unregister_genl(n_registered);
1837 return err;
064af421
BP
1838}
1839
2a4999f3
PS
1840static int __net_init ovs_init_net(struct net *net)
1841{
1842 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1843
1844 INIT_LIST_HEAD(&ovs_net->dps);
cd2a59e9 1845 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2a4999f3
PS
1846 return 0;
1847}
1848
1849static void __net_exit ovs_exit_net(struct net *net)
1850{
cd2a59e9 1851 struct datapath *dp, *dp_next;
2a4999f3
PS
1852 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1853
cd2a59e9
PS
1854 ovs_lock();
1855 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
1856 __dp_destroy(dp);
1857 ovs_unlock();
1858
1859 cancel_work_sync(&ovs_net->dp_notify_work);
2a4999f3
PS
1860}
1861
1862static struct pernet_operations ovs_net_ops = {
1863 .init = ovs_init_net,
1864 .exit = ovs_exit_net,
1865 .id = &ovs_net_id,
1866 .size = sizeof(struct ovs_net),
1867};
1868
637c8268
PS
1869DEFINE_COMPAT_PNET_REG_FUNC(device);
1870
22d24ebf
BP
1871static int __init dp_init(void)
1872{
1873 int err;
1874
f3d85db3 1875 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
22d24ebf 1876
dc5f3fef 1877 pr_info("Open vSwitch switching datapath %s, built "__DATE__" "__TIME__"\n",
8a07709c 1878 VERSION);
064af421 1879
850b6b3b 1880 err = ovs_flow_init();
3544358a 1881 if (err)
533e96e7 1882 goto error;
3544358a 1883
850b6b3b 1884 err = ovs_vport_init();
064af421
BP
1885 if (err)
1886 goto error_flow_exit;
1887
2a4999f3 1888 err = register_pernet_device(&ovs_net_ops);
f2459fe7
JG
1889 if (err)
1890 goto error_vport_exit;
1891
2a4999f3
PS
1892 err = register_netdevice_notifier(&ovs_dp_device_notifier);
1893 if (err)
1894 goto error_netns_exit;
1895
982b8810
BP
1896 err = dp_register_genl();
1897 if (err < 0)
37a1300c 1898 goto error_unreg_notifier;
982b8810 1899
064af421
BP
1900 return 0;
1901
1902error_unreg_notifier:
850b6b3b 1903 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2a4999f3
PS
1904error_netns_exit:
1905 unregister_pernet_device(&ovs_net_ops);
f2459fe7 1906error_vport_exit:
850b6b3b 1907 ovs_vport_exit();
064af421 1908error_flow_exit:
850b6b3b 1909 ovs_flow_exit();
064af421
BP
1910error:
1911 return err;
1912}
1913
1914static void dp_cleanup(void)
1915{
982b8810 1916 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
850b6b3b 1917 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2a4999f3
PS
1918 unregister_pernet_device(&ovs_net_ops);
1919 rcu_barrier();
850b6b3b
JG
1920 ovs_vport_exit();
1921 ovs_flow_exit();
064af421
BP
1922}
1923
1924module_init(dp_init);
1925module_exit(dp_cleanup);
1926
1927MODULE_DESCRIPTION("Open vSwitch switching datapath");
1928MODULE_LICENSE("GPL");
3d0666d2 1929MODULE_VERSION(VERSION);