]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/datapath.c
ofproto-dpif.at: Avoid races by sleeping
[mirror_ovs.git] / datapath / datapath.c
CommitLineData
064af421 1/*
a1c564be 2 * Copyright (c) 2007-2013 Nicira, Inc.
a14bc59f 3 *
a9a29d22
JG
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
064af421
BP
17 */
18
dfffaef1
JP
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
064af421
BP
21#include <linux/init.h>
22#include <linux/module.h>
064af421 23#include <linux/if_arp.h>
064af421
BP
24#include <linux/if_vlan.h>
25#include <linux/in.h>
26#include <linux/ip.h>
982b8810 27#include <linux/jhash.h>
064af421
BP
28#include <linux/delay.h>
29#include <linux/time.h>
30#include <linux/etherdevice.h>
ed099e92 31#include <linux/genetlink.h>
064af421
BP
32#include <linux/kernel.h>
33#include <linux/kthread.h>
064af421
BP
34#include <linux/mutex.h>
35#include <linux/percpu.h>
36#include <linux/rcupdate.h>
37#include <linux/tcp.h>
38#include <linux/udp.h>
39#include <linux/version.h>
40#include <linux/ethtool.h>
064af421 41#include <linux/wait.h>
064af421 42#include <asm/div64.h>
656a0e37 43#include <linux/highmem.h>
064af421
BP
44#include <linux/netfilter_bridge.h>
45#include <linux/netfilter_ipv4.h>
46#include <linux/inetdevice.h>
47#include <linux/list.h>
077257b8 48#include <linux/openvswitch.h>
064af421 49#include <linux/rculist.h>
064af421 50#include <linux/dmi.h>
cd2a59e9
PS
51#include <linux/genetlink.h>
52#include <net/genetlink.h>
36956a7d 53#include <net/genetlink.h>
2a4999f3
PS
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
064af421 56
064af421 57#include "datapath.h"
064af421 58#include "flow.h"
d103f479 59#include "flow_table.h"
a097c0b2 60#include "flow_netlink.h"
303708cc 61#include "vlan.h"
f2459fe7 62#include "vport-internal_dev.h"
d5de5b0d 63#include "vport-netdev.h"
064af421 64
2a4999f3
PS
65int ovs_net_id __read_mostly;
66
e297c6b7
TG
67static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
68 struct genl_multicast_group *grp)
69{
70 genl_notify(skb, genl_info_net(info), info->snd_portid,
71 grp->id, info->nlhdr, GFP_KERNEL);
72}
73
ed099e92
BP
74/**
75 * DOC: Locking:
064af421 76 *
cd2a59e9
PS
77 * All writes e.g. Writes to device state (add/remove datapath, port, set
78 * operations on vports, etc.), Writes to other state (flow table
79 * modifications, set miscellaneous datapath parameters, etc.) are protected
80 * by ovs_lock.
ed099e92
BP
81 *
82 * Reads are protected by RCU.
83 *
84 * There are a few special cases (mostly stats) that have their own
85 * synchronization but they nest under all of above and don't interact with
86 * each other.
cd2a59e9
PS
87 *
88 * The RTNL lock nests inside ovs_mutex.
064af421 89 */
ed099e92 90
cd2a59e9
PS
91static DEFINE_MUTEX(ovs_mutex);
92
93void ovs_lock(void)
94{
95 mutex_lock(&ovs_mutex);
96}
97
98void ovs_unlock(void)
99{
100 mutex_unlock(&ovs_mutex);
101}
102
103#ifdef CONFIG_LOCKDEP
104int lockdep_ovsl_is_held(void)
105{
106 if (debug_locks)
107 return lockdep_is_held(&ovs_mutex);
108 else
109 return 1;
110}
111#endif
112
c19e6535 113static struct vport *new_vport(const struct vport_parms *);
5ae440c3 114static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
7257b535 115 const struct dp_upcall_info *);
5ae440c3 116static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
7257b535 117 const struct dp_upcall_info *);
064af421 118
cd2a59e9 119/* Must be called with rcu_read_lock or ovs_mutex. */
2a4999f3 120static struct datapath *get_dp(struct net *net, int dp_ifindex)
064af421 121{
254f2dc8
BP
122 struct datapath *dp = NULL;
123 struct net_device *dev;
ed099e92 124
254f2dc8 125 rcu_read_lock();
2a4999f3 126 dev = dev_get_by_index_rcu(net, dp_ifindex);
254f2dc8 127 if (dev) {
850b6b3b 128 struct vport *vport = ovs_internal_dev_get_vport(dev);
254f2dc8
BP
129 if (vport)
130 dp = vport->dp;
131 }
132 rcu_read_unlock();
133
134 return dp;
064af421 135}
064af421 136
cd2a59e9 137/* Must be called with rcu_read_lock or ovs_mutex. */
850b6b3b 138const char *ovs_dp_name(const struct datapath *dp)
f2459fe7 139{
cd2a59e9 140 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
16b82e84 141 return vport->ops->get_name(vport);
f2459fe7
JG
142}
143
99769a40
JG
144static int get_dpifindex(struct datapath *dp)
145{
146 struct vport *local;
147 int ifindex;
148
149 rcu_read_lock();
150
95b1d73a 151 local = ovs_vport_rcu(dp, OVSP_LOCAL);
99769a40 152 if (local)
d5de5b0d 153 ifindex = netdev_vport_priv(local)->dev->ifindex;
99769a40
JG
154 else
155 ifindex = 0;
156
157 rcu_read_unlock();
158
159 return ifindex;
160}
161
46c6a11d
JG
162static void destroy_dp_rcu(struct rcu_head *rcu)
163{
164 struct datapath *dp = container_of(rcu, struct datapath, rcu);
46c6a11d 165
46c6a11d 166 free_percpu(dp->stats_percpu);
2a4999f3 167 release_net(ovs_dp_get_net(dp));
95b1d73a 168 kfree(dp->ports);
5ca1ba48 169 kfree(dp);
46c6a11d
JG
170}
171
95b1d73a
PS
172static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
173 u16 port_no)
174{
175 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
176}
177
178struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
179{
180 struct vport *vport;
95b1d73a
PS
181 struct hlist_head *head;
182
183 head = vport_hash_bucket(dp, port_no);
f8dfbcb7 184 hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
95b1d73a
PS
185 if (vport->port_no == port_no)
186 return vport;
187 }
188 return NULL;
189}
190
cd2a59e9 191/* Called with ovs_mutex. */
c19e6535 192static struct vport *new_vport(const struct vport_parms *parms)
064af421 193{
f2459fe7 194 struct vport *vport;
f2459fe7 195
850b6b3b 196 vport = ovs_vport_add(parms);
c19e6535
BP
197 if (!IS_ERR(vport)) {
198 struct datapath *dp = parms->dp;
95b1d73a 199 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
064af421 200
95b1d73a 201 hlist_add_head_rcu(&vport->dp_hash_node, head);
c19e6535 202 }
c19e6535 203 return vport;
064af421
BP
204}
205
850b6b3b 206void ovs_dp_detach_port(struct vport *p)
064af421 207{
cd2a59e9 208 ASSERT_OVSL();
064af421 209
064af421 210 /* First drop references to device. */
95b1d73a 211 hlist_del_rcu(&p->dp_hash_node);
f2459fe7 212
7237e4f4 213 /* Then destroy it. */
850b6b3b 214 ovs_vport_del(p);
064af421
BP
215}
216
8819fac7 217/* Must be called with rcu_read_lock. */
850b6b3b 218void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
064af421
BP
219{
220 struct datapath *dp = p->dp;
3544358a 221 struct sw_flow *flow;
064af421 222 struct dp_stats_percpu *stats;
52a23d92 223 struct sw_flow_key key;
e9141eec 224 u64 *stats_counter;
4fa72a95 225 u32 n_mask_hit;
4c1ad233 226 int error;
064af421 227
70dbc259 228 stats = this_cpu_ptr(dp->stats_percpu);
a063b0df 229
52a23d92 230 /* Extract flow from 'skb' into 'key'. */
a1c564be 231 error = ovs_flow_extract(skb, p->port_no, &key);
52a23d92
JG
232 if (unlikely(error)) {
233 kfree_skb(skb);
234 return;
55574bb0
BP
235 }
236
52a23d92 237 /* Look up flow. */
4f88b5e5 238 flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit);
52a23d92
JG
239 if (unlikely(!flow)) {
240 struct dp_upcall_info upcall;
241
242 upcall.cmd = OVS_PACKET_CMD_MISS;
243 upcall.key = &key;
244 upcall.userdata = NULL;
245 upcall.portid = p->upcall_portid;
246 ovs_dp_upcall(dp, skb, &upcall);
247 consume_skb(skb);
248 stats_counter = &stats->n_missed;
249 goto out;
250 }
251
252 OVS_CB(skb)->flow = flow;
d1d71a36 253 OVS_CB(skb)->pkt_key = &key;
52a23d92 254
b0b906cc 255 ovs_flow_stats_update(OVS_CB(skb)->flow, skb);
850b6b3b 256 ovs_execute_actions(dp, skb);
b0b906cc 257 stats_counter = &stats->n_hit;
55574bb0 258
8819fac7 259out:
55574bb0 260 /* Update datapath statistics. */
821cb9fa 261 u64_stats_update_begin(&stats->sync);
e9141eec 262 (*stats_counter)++;
4fa72a95 263 stats->n_mask_hit += n_mask_hit;
821cb9fa 264 u64_stats_update_end(&stats->sync);
064af421
BP
265}
266
aa5a8fdc
JG
267static struct genl_family dp_packet_genl_family = {
268 .id = GENL_ID_GENERATE,
df2c07f4
JP
269 .hdrsize = sizeof(struct ovs_header),
270 .name = OVS_PACKET_FAMILY,
69685a88 271 .version = OVS_PACKET_VERSION,
2a4999f3 272 .maxattr = OVS_PACKET_ATTR_MAX,
b3dcb73c 273 .netnsok = true,
14002a59 274 SET_PARALLEL_OPS
aa5a8fdc
JG
275};
276
850b6b3b
JG
277int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
278 const struct dp_upcall_info *upcall_info)
aa5a8fdc
JG
279{
280 struct dp_stats_percpu *stats;
281 int err;
282
28aea917 283 if (upcall_info->portid == 0) {
b063d9f0 284 err = -ENOTCONN;
b063d9f0
JG
285 goto err;
286 }
287
7257b535 288 if (!skb_is_gso(skb))
5ae440c3 289 err = queue_userspace_packet(dp, skb, upcall_info);
7257b535 290 else
5ae440c3 291 err = queue_gso_packets(dp, skb, upcall_info);
d76195db
JG
292 if (err)
293 goto err;
294
295 return 0;
aa5a8fdc 296
aa5a8fdc 297err:
70dbc259 298 stats = this_cpu_ptr(dp->stats_percpu);
aa5a8fdc 299
821cb9fa 300 u64_stats_update_begin(&stats->sync);
aa5a8fdc 301 stats->n_lost++;
821cb9fa 302 u64_stats_update_end(&stats->sync);
aa5a8fdc 303
aa5a8fdc 304 return err;
982b8810
BP
305}
306
5ae440c3 307static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
7257b535 308 const struct dp_upcall_info *upcall_info)
cb5087ca 309{
d4cba1f8 310 unsigned short gso_type = skb_shinfo(skb)->gso_type;
7257b535
BP
311 struct dp_upcall_info later_info;
312 struct sw_flow_key later_key;
313 struct sk_buff *segs, *nskb;
314 int err;
cb5087ca 315
1d04cd4e 316 segs = __skb_gso_segment(skb, NETIF_F_SG, false);
79089764
PS
317 if (IS_ERR(segs))
318 return PTR_ERR(segs);
99769a40 319
7257b535
BP
320 /* Queue all of the segments. */
321 skb = segs;
cb5087ca 322 do {
5ae440c3 323 err = queue_userspace_packet(dp, skb, upcall_info);
982b8810 324 if (err)
7257b535 325 break;
856081f6 326
d4cba1f8 327 if (skb == segs && gso_type & SKB_GSO_UDP) {
e1cf87ff
JG
328 /* The initial flow key extracted by ovs_flow_extract()
329 * in this case is for a first fragment, so we need to
7257b535
BP
330 * properly mark later fragments.
331 */
332 later_key = *upcall_info->key;
9e44d715 333 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
7257b535
BP
334
335 later_info = *upcall_info;
336 later_info.key = &later_key;
337 upcall_info = &later_info;
338 }
36ce148c 339 } while ((skb = skb->next));
cb5087ca 340
7257b535
BP
341 /* Free all of the segments. */
342 skb = segs;
343 do {
344 nskb = skb->next;
345 if (err)
346 kfree_skb(skb);
347 else
348 consume_skb(skb);
349 } while ((skb = nskb));
350 return err;
351}
352
0afa2373
TG
353static size_t key_attr_size(void)
354{
355 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
356 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
357 + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
358 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
359 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
360 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
361 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
362 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
363 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
364 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
365 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
366 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
367 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
368 + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */
369 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
370 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
371 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
372 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
373 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
374}
375
533bea51
TG
376static size_t upcall_msg_size(const struct nlattr *userdata,
377 unsigned int hdrlen)
0afa2373
TG
378{
379 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
533bea51 380 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
0afa2373
TG
381 + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
382
383 /* OVS_PACKET_ATTR_USERDATA */
384 if (userdata)
385 size += NLA_ALIGN(userdata->nla_len);
386
387 return size;
388}
389
5ae440c3 390static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
7257b535
BP
391 const struct dp_upcall_info *upcall_info)
392{
393 struct ovs_header *upcall;
6161d3fd 394 struct sk_buff *nskb = NULL;
7257b535
BP
395 struct sk_buff *user_skb; /* to be queued to userspace */
396 struct nlattr *nla;
68eadcf0
TG
397 struct genl_info info = {
398#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0)
5ae440c3 399 .dst_sk = ovs_dp_get_net(dp)->genl_sock,
68eadcf0
TG
400#endif
401 .snd_portid = upcall_info->portid,
402 };
ef507cec 403 size_t len, plen;
533bea51 404 unsigned int hlen;
5ae440c3
TG
405 int err, dp_ifindex;
406
407 dp_ifindex = get_dpifindex(dp);
408 if (!dp_ifindex)
409 return -ENODEV;
7257b535 410
6161d3fd
JG
411 if (vlan_tx_tag_present(skb)) {
412 nskb = skb_clone(skb, GFP_ATOMIC);
413 if (!nskb)
414 return -ENOMEM;
07ac71ea
PS
415
416 nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
417 if (!nskb)
418 return -ENOMEM;
419
420 vlan_set_tci(nskb, 0);
7257b535 421
6161d3fd
JG
422 skb = nskb;
423 }
424
425 if (nla_attr_size(skb->len) > USHRT_MAX) {
426 err = -EFBIG;
427 goto out;
428 }
7257b535 429
533bea51
TG
430 /* Complete checksum if needed */
431 if (skb->ip_summed == CHECKSUM_PARTIAL &&
432 (err = skb_checksum_help(skb)))
433 goto out;
434
435 /* Older versions of OVS user space enforce alignment of the last
436 * Netlink attribute to NLA_ALIGNTO which would require extensive
437 * padding logic. Only perform zerocopy if padding is not required.
438 */
439 if (dp->user_features & OVS_DP_F_UNALIGNED)
440 hlen = skb_zerocopy_headlen(skb);
441 else
442 hlen = skb->len;
443
444 len = upcall_msg_size(upcall_info->userdata, hlen);
68eadcf0 445 user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
6161d3fd
JG
446 if (!user_skb) {
447 err = -ENOMEM;
448 goto out;
449 }
7257b535
BP
450
451 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
452 0, upcall_info->cmd);
453 upcall->dp_ifindex = dp_ifindex;
454
455 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
a097c0b2 456 ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
7257b535
BP
457 nla_nest_end(user_skb, nla);
458
459 if (upcall_info->userdata)
e995e3df 460 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
462a988b 461 nla_len(upcall_info->userdata),
e995e3df 462 nla_data(upcall_info->userdata));
7257b535 463
533bea51
TG
464 /* Only reserve room for attribute header, packet data is added
465 * in skb_zerocopy() */
466 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
467 err = -ENOBUFS;
468 goto out;
469 }
470 nla->nla_len = nla_attr_size(skb->len);
bed53bd1 471
533bea51 472 skb_zerocopy(user_skb, skb, skb->len, hlen);
7257b535 473
ef507cec
TG
474 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
475 if (!(dp->user_features & OVS_DP_F_UNALIGNED) &&
476 (plen = (ALIGN(user_skb->len, NLA_ALIGNTO) - user_skb->len)) > 0)
477 memset(skb_put(user_skb, plen), 0, plen);
478
533bea51 479 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
6161d3fd 480
533bea51 481 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
6161d3fd
JG
482out:
483 kfree_skb(nskb);
484 return err;
cb5087ca
BP
485}
486
df2c07f4 487static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
064af421 488{
df2c07f4 489 struct ovs_header *ovs_header = info->userhdr;
982b8810 490 struct nlattr **a = info->attrs;
e0e57990 491 struct sw_flow_actions *acts;
982b8810 492 struct sk_buff *packet;
e0e57990 493 struct sw_flow *flow;
f7cd0081 494 struct datapath *dp;
d6569377 495 struct ethhdr *eth;
3f19d399 496 int len;
d6569377 497 int err;
064af421 498
f7cd0081 499 err = -EINVAL;
df2c07f4 500 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
7c3072cc 501 !a[OVS_PACKET_ATTR_ACTIONS])
e5cad958 502 goto err;
064af421 503
df2c07f4 504 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
3f19d399 505 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
f7cd0081
BP
506 err = -ENOMEM;
507 if (!packet)
e5cad958 508 goto err;
3f19d399
BP
509 skb_reserve(packet, NET_IP_ALIGN);
510
bf3d6fce 511 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
8d5ebd83 512
f7cd0081
BP
513 skb_reset_mac_header(packet);
514 eth = eth_hdr(packet);
064af421 515
d6569377
BP
516 /* Normally, setting the skb 'protocol' field would be handled by a
517 * call to eth_type_trans(), but it assumes there's a sending
518 * device, which we may not have. */
7cd46155 519 if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
f7cd0081 520 packet->protocol = eth->h_proto;
d6569377 521 else
f7cd0081 522 packet->protocol = htons(ETH_P_802_2);
d3c54451 523
e0e57990 524 /* Build an sw_flow for sending this packet. */
b0f3a2fe 525 flow = ovs_flow_alloc(false);
e0e57990
BP
526 err = PTR_ERR(flow);
527 if (IS_ERR(flow))
e5cad958 528 goto err_kfree_skb;
064af421 529
a1c564be 530 err = ovs_flow_extract(packet, -1, &flow->key);
e0e57990 531 if (err)
9321954a 532 goto err_flow_free;
e0e57990 533
a097c0b2 534 err = ovs_nla_get_flow_metadata(flow, a[OVS_PACKET_ATTR_KEY]);
80e5eed9 535 if (err)
9321954a 536 goto err_flow_free;
a097c0b2 537 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
e0e57990
BP
538 err = PTR_ERR(acts);
539 if (IS_ERR(acts))
9321954a 540 goto err_flow_free;
9b405f1a 541
a097c0b2
PS
542 err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
543 &flow->key, 0, &acts);
e0e57990 544 rcu_assign_pointer(flow->sf_acts, acts);
9b405f1a
PS
545 if (err)
546 goto err_flow_free;
e0e57990
BP
547
548 OVS_CB(packet)->flow = flow;
d1d71a36 549 OVS_CB(packet)->pkt_key = &flow->key;
abff858b 550 packet->priority = flow->key.phy.priority;
3025a772 551 packet->mark = flow->key.phy.skb_mark;
e0e57990 552
d6569377 553 rcu_read_lock();
2a4999f3 554 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
f7cd0081 555 err = -ENODEV;
e5cad958
BP
556 if (!dp)
557 goto err_unlock;
cc4015df 558
e9141eec 559 local_bh_disable();
850b6b3b 560 err = ovs_execute_actions(dp, packet);
e9141eec 561 local_bh_enable();
d6569377 562 rcu_read_unlock();
e0e57990 563
a1c564be 564 ovs_flow_free(flow, false);
e5cad958 565 return err;
064af421 566
e5cad958
BP
567err_unlock:
568 rcu_read_unlock();
9321954a 569err_flow_free:
a1c564be 570 ovs_flow_free(flow, false);
e5cad958
BP
571err_kfree_skb:
572 kfree_skb(packet);
573err:
d6569377 574 return err;
064af421
BP
575}
576
df2c07f4 577static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
7c3072cc 578 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
df2c07f4
JP
579 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
580 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
982b8810
BP
581};
582
583static struct genl_ops dp_packet_genl_ops[] = {
df2c07f4 584 { .cmd = OVS_PACKET_CMD_EXECUTE,
982b8810
BP
585 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
586 .policy = packet_policy,
df2c07f4 587 .doit = ovs_packet_cmd_execute
982b8810
BP
588 }
589};
590
4fa72a95
AZ
591static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
592 struct ovs_dp_megaflow_stats *mega_stats)
064af421 593{
d6569377 594 int i;
f180c2e2 595
4fa72a95
AZ
596 memset(mega_stats, 0, sizeof(*mega_stats));
597
994dc286 598 stats->n_flows = ovs_flow_tbl_count(&dp->table);
4fa72a95 599 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
064af421 600
7257b535 601 stats->n_hit = stats->n_missed = stats->n_lost = 0;
4fa72a95 602
d6569377
BP
603 for_each_possible_cpu(i) {
604 const struct dp_stats_percpu *percpu_stats;
605 struct dp_stats_percpu local_stats;
821cb9fa 606 unsigned int start;
44e05eca 607
d6569377 608 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
064af421 609
d6569377 610 do {
821cb9fa 611 start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
d6569377 612 local_stats = *percpu_stats;
821cb9fa 613 } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
064af421 614
d6569377
BP
615 stats->n_hit += local_stats.n_hit;
616 stats->n_missed += local_stats.n_missed;
617 stats->n_lost += local_stats.n_lost;
4fa72a95 618 mega_stats->n_mask_hit += local_stats.n_mask_hit;
d6569377
BP
619 }
620}
064af421 621
df2c07f4
JP
622static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
623 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
624 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
625 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
d6569377 626};
36956a7d 627
37a1300c
BP
628static struct genl_family dp_flow_genl_family = {
629 .id = GENL_ID_GENERATE,
df2c07f4
JP
630 .hdrsize = sizeof(struct ovs_header),
631 .name = OVS_FLOW_FAMILY,
69685a88 632 .version = OVS_FLOW_VERSION,
2a4999f3 633 .maxattr = OVS_FLOW_ATTR_MAX,
b3dcb73c 634 .netnsok = true,
14002a59 635 SET_PARALLEL_OPS
37a1300c 636};
ed099e92 637
850b6b3b 638static struct genl_multicast_group ovs_dp_flow_multicast_group = {
df2c07f4 639 .name = OVS_FLOW_MCGROUP
37a1300c
BP
640};
641
0afa2373
TG
642static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
643{
644 return NLMSG_ALIGN(sizeof(struct ovs_header))
645 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
a1c564be 646 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
0afa2373
TG
647 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
648 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
649 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
650 + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
651}
652
cd2a59e9 653/* Called with ovs_mutex. */
df2c07f4 654static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
28aea917 655 struct sk_buff *skb, u32 portid,
6455100f 656 u32 seq, u32 flags, u8 cmd)
d6569377 657{
37a1300c 658 const int skb_orig_len = skb->len;
9b405f1a 659 struct nlattr *start;
b0f3a2fe
PS
660 struct ovs_flow_stats stats;
661 __be16 tcp_flags;
662 unsigned long used;
df2c07f4 663 struct ovs_header *ovs_header;
d6569377 664 struct nlattr *nla;
d6569377 665 int err;
064af421 666
28aea917 667 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
df2c07f4 668 if (!ovs_header)
37a1300c 669 return -EMSGSIZE;
d6569377 670
99769a40 671 ovs_header->dp_ifindex = get_dpifindex(dp);
d6569377 672
a1c564be 673 /* Fill flow key. */
df2c07f4 674 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
d6569377
BP
675 if (!nla)
676 goto nla_put_failure;
a1c564be 677
a097c0b2 678 err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
d6569377 679 if (err)
37a1300c 680 goto error;
d6569377
BP
681 nla_nest_end(skb, nla);
682
a1c564be
AZ
683 nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
684 if (!nla)
685 goto nla_put_failure;
686
a097c0b2 687 err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
a1c564be
AZ
688 if (err)
689 goto error;
690
691 nla_nest_end(skb, nla);
692
b0f3a2fe
PS
693 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
694 if (used &&
695 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
c3cc8c03 696 goto nla_put_failure;
d6569377 697
b0f3a2fe
PS
698 if (stats.n_packets &&
699 nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
700 goto nla_put_failure;
b0b906cc 701
b0f3a2fe
PS
702 if ((u8)ntohs(tcp_flags) &&
703 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
c3cc8c03 704 goto nla_put_failure;
d6569377 705
df2c07f4 706 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
30053024
BP
707 * this is the first flow to be dumped into 'skb'. This is unusual for
708 * Netlink but individual action lists can be longer than
709 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
710 * The userspace caller can always fetch the actions separately if it
711 * really wants them. (Most userspace callers in fact don't care.)
712 *
713 * This can only fail for dump operations because the skb is always
714 * properly sized for single flows.
715 */
9b405f1a 716 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
f6f481ee 717 if (start) {
f44ccce1
PS
718 const struct sw_flow_actions *sf_acts;
719
780ec6ae 720 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
f44ccce1 721
a097c0b2
PS
722 err = ovs_nla_put_actions(sf_acts->actions,
723 sf_acts->actions_len, skb);
0a25b039
BP
724 if (!err)
725 nla_nest_end(skb, start);
726 else {
727 if (skb_orig_len)
728 goto error;
729
730 nla_nest_cancel(skb, start);
731 }
7aac03bd
JG
732 } else if (skb_orig_len)
733 goto nla_put_failure;
37a1300c 734
df2c07f4 735 return genlmsg_end(skb, ovs_header);
d6569377
BP
736
737nla_put_failure:
738 err = -EMSGSIZE;
37a1300c 739error:
df2c07f4 740 genlmsg_cancel(skb, ovs_header);
d6569377 741 return err;
44e05eca
BP
742}
743
68eadcf0
TG
744static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow,
745 struct genl_info *info)
44e05eca 746{
68eadcf0 747 size_t len;
d6569377 748
68eadcf0 749 len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts));
d6569377 750
68eadcf0 751 return genlmsg_new_unicast(len, info, GFP_KERNEL);
37a1300c 752}
8d5ebd83 753
6455100f
PS
754static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
755 struct datapath *dp,
68eadcf0
TG
756 struct genl_info *info,
757 u8 cmd)
37a1300c
BP
758{
759 struct sk_buff *skb;
760 int retval;
d6569377 761
68eadcf0 762 skb = ovs_flow_cmd_alloc_info(flow, info);
37a1300c
BP
763 if (!skb)
764 return ERR_PTR(-ENOMEM);
d6569377 765
68eadcf0
TG
766 retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid,
767 info->snd_seq, 0, cmd);
37a1300c 768 BUG_ON(retval < 0);
d6569377 769 return skb;
064af421
BP
770}
771
df2c07f4 772static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
064af421 773{
37a1300c 774 struct nlattr **a = info->attrs;
df2c07f4 775 struct ovs_header *ovs_header = info->userhdr;
529db635 776 struct sw_flow_key key, masked_key;
a1c564be
AZ
777 struct sw_flow *flow = NULL;
778 struct sw_flow_mask mask;
37a1300c 779 struct sk_buff *reply;
9c52546b 780 struct datapath *dp;
9b405f1a 781 struct sw_flow_actions *acts = NULL;
a1c564be 782 struct sw_flow_match match;
b0f3a2fe 783 bool exact_5tuple;
bc4a05c6 784 int error;
064af421 785
37a1300c
BP
786 /* Extract key. */
787 error = -EINVAL;
df2c07f4 788 if (!a[OVS_FLOW_ATTR_KEY])
37a1300c 789 goto error;
a1c564be
AZ
790
791 ovs_match_init(&match, &key, &mask);
b0f3a2fe 792 error = ovs_nla_get_match(&match, &exact_5tuple,
a097c0b2 793 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
37a1300c
BP
794 if (error)
795 goto error;
064af421 796
37a1300c 797 /* Validate actions. */
df2c07f4 798 if (a[OVS_FLOW_ATTR_ACTIONS]) {
a097c0b2 799 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
9b405f1a
PS
800 error = PTR_ERR(acts);
801 if (IS_ERR(acts))
37a1300c 802 goto error;
9b405f1a 803
a097c0b2
PS
804 ovs_flow_mask_key(&masked_key, &key, &mask);
805 error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
806 &masked_key, 0, &acts);
529db635
JG
807 if (error) {
808 OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
9b405f1a 809 goto err_kfree;
529db635 810 }
df2c07f4 811 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
37a1300c
BP
812 error = -EINVAL;
813 goto error;
814 }
815
cd2a59e9 816 ovs_lock();
2a4999f3 817 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
d6569377 818 error = -ENODEV;
9c52546b 819 if (!dp)
cd2a59e9 820 goto err_unlock_ovs;
704a1e09 821
a1c564be 822 /* Check if this is a duplicate flow */
4f88b5e5 823 flow = ovs_flow_tbl_lookup(&dp->table, &key);
3544358a 824 if (!flow) {
d6569377
BP
825 /* Bail out if we're not allowed to create a new flow. */
826 error = -ENOENT;
df2c07f4 827 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
cd2a59e9 828 goto err_unlock_ovs;
d6569377 829
d6569377 830 /* Allocate flow. */
b0f3a2fe 831 flow = ovs_flow_alloc(!exact_5tuple);
d6569377
BP
832 if (IS_ERR(flow)) {
833 error = PTR_ERR(flow);
cd2a59e9 834 goto err_unlock_ovs;
d6569377 835 }
d6569377 836
529db635
JG
837 flow->key = masked_key;
838 flow->unmasked_key = key;
d6569377
BP
839 rcu_assign_pointer(flow->sf_acts, acts);
840
d6569377 841 /* Put flow in bucket. */
0585f7a8
PS
842 error = ovs_flow_tbl_insert(&dp->table, flow, &mask);
843 if (error) {
844 acts = NULL;
845 goto err_flow_free;
846 }
37a1300c 847
68eadcf0 848 reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
d6569377
BP
849 } else {
850 /* We found a matching flow. */
851 struct sw_flow_actions *old_acts;
852
853 /* Bail out if we're not allowed to modify an existing flow.
854 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
855 * because Generic Netlink treats the latter as a dump
856 * request. We also accept NLM_F_EXCL in case that bug ever
857 * gets fixed.
858 */
859 error = -EEXIST;
df2c07f4 860 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
37a1300c 861 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
cd2a59e9 862 goto err_unlock_ovs;
d6569377 863
b21e5b6a 864 /* The unmasked key has to be the same for flow updates. */
a6603481 865 if (!ovs_flow_cmp_unmasked_key(flow, &match))
b21e5b6a
AZ
866 goto err_unlock_ovs;
867
d6569377 868 /* Update actions. */
cd2a59e9 869 old_acts = ovsl_dereference(flow->sf_acts);
9b405f1a 870 rcu_assign_pointer(flow->sf_acts, acts);
a097c0b2 871 ovs_nla_free_flow_actions(old_acts);
d6569377 872
68eadcf0 873 reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
d6569377
BP
874
875 /* Clear stats. */
b0b906cc
PS
876 if (a[OVS_FLOW_ATTR_CLEAR])
877 ovs_flow_stats_clear(flow);
9c52546b 878 }
cd2a59e9 879 ovs_unlock();
37a1300c
BP
880
881 if (!IS_ERR(reply))
e297c6b7 882 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
37a1300c 883 else
b3dcb73c 884 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
2a4999f3 885 ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
d6569377 886 return 0;
704a1e09 887
a1c564be
AZ
888err_flow_free:
889 ovs_flow_free(flow, false);
cd2a59e9
PS
890err_unlock_ovs:
891 ovs_unlock();
9b405f1a 892err_kfree:
ba400435 893 kfree(acts);
37a1300c 894error:
9c52546b 895 return error;
704a1e09
BP
896}
897
df2c07f4 898static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
704a1e09 899{
37a1300c 900 struct nlattr **a = info->attrs;
df2c07f4 901 struct ovs_header *ovs_header = info->userhdr;
37a1300c 902 struct sw_flow_key key;
37a1300c 903 struct sk_buff *reply;
704a1e09 904 struct sw_flow *flow;
9c52546b 905 struct datapath *dp;
a1c564be 906 struct sw_flow_match match;
9c52546b 907 int err;
704a1e09 908
1b936472
AZ
909 if (!a[OVS_FLOW_ATTR_KEY]) {
910 OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
37a1300c 911 return -EINVAL;
1b936472 912 }
a1c564be
AZ
913
914 ovs_match_init(&match, &key, NULL);
b0f3a2fe 915 err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
37a1300c
BP
916 if (err)
917 return err;
704a1e09 918
cd2a59e9 919 ovs_lock();
2a4999f3 920 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9
PS
921 if (!dp) {
922 err = -ENODEV;
923 goto unlock;
924 }
704a1e09 925
4f88b5e5 926 flow = ovs_flow_tbl_lookup(&dp->table, &key);
a097c0b2 927 if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
cd2a59e9
PS
928 err = -ENOENT;
929 goto unlock;
930 }
d6569377 931
68eadcf0 932 reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
cd2a59e9
PS
933 if (IS_ERR(reply)) {
934 err = PTR_ERR(reply);
935 goto unlock;
936 }
36956a7d 937
cd2a59e9 938 ovs_unlock();
37a1300c 939 return genlmsg_reply(reply, info);
cd2a59e9
PS
940unlock:
941 ovs_unlock();
942 return err;
d6569377 943}
9c52546b 944
df2c07f4 945static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
d6569377 946{
37a1300c 947 struct nlattr **a = info->attrs;
df2c07f4 948 struct ovs_header *ovs_header = info->userhdr;
37a1300c 949 struct sw_flow_key key;
37a1300c 950 struct sk_buff *reply;
d6569377 951 struct sw_flow *flow;
d6569377 952 struct datapath *dp;
a1c564be 953 struct sw_flow_match match;
d6569377 954 int err;
36956a7d 955
cd2a59e9 956 ovs_lock();
2a4999f3 957 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9
PS
958 if (!dp) {
959 err = -ENODEV;
960 goto unlock;
961 }
2a4999f3 962
cd2a59e9 963 if (!a[OVS_FLOW_ATTR_KEY]) {
994dc286 964 err = ovs_flow_tbl_flush(&dp->table);
cd2a59e9
PS
965 goto unlock;
966 }
a1c564be
AZ
967
968 ovs_match_init(&match, &key, NULL);
b0f3a2fe 969 err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
37a1300c 970 if (err)
cd2a59e9 971 goto unlock;
d6569377 972
4f88b5e5 973 flow = ovs_flow_tbl_lookup(&dp->table, &key);
a097c0b2 974 if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
cd2a59e9
PS
975 err = -ENOENT;
976 goto unlock;
977 }
d6569377 978
68eadcf0 979 reply = ovs_flow_cmd_alloc_info(flow, info);
cd2a59e9
PS
980 if (!reply) {
981 err = -ENOMEM;
982 goto unlock;
983 }
37a1300c 984
994dc286 985 ovs_flow_tbl_remove(&dp->table, flow);
37a1300c 986
28aea917 987 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
df2c07f4 988 info->snd_seq, 0, OVS_FLOW_CMD_DEL);
37a1300c
BP
989 BUG_ON(err < 0);
990
a1c564be 991 ovs_flow_free(flow, true);
cd2a59e9 992 ovs_unlock();
37a1300c 993
e297c6b7 994 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
37a1300c 995 return 0;
cd2a59e9
PS
996unlock:
997 ovs_unlock();
998 return err;
37a1300c
BP
999}
1000
df2c07f4 1001static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
37a1300c 1002{
df2c07f4 1003 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
994dc286 1004 struct table_instance *ti;
37a1300c
BP
1005 struct datapath *dp;
1006
f44ccce1 1007 rcu_read_lock();
2a4999f3 1008 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9 1009 if (!dp) {
f44ccce1 1010 rcu_read_unlock();
37a1300c 1011 return -ENODEV;
cd2a59e9 1012 }
37a1300c 1013
994dc286 1014 ti = rcu_dereference(dp->table.ti);
37a1300c 1015 for (;;) {
37a1300c
BP
1016 struct sw_flow *flow;
1017 u32 bucket, obj;
1018
1019 bucket = cb->args[0];
1020 obj = cb->args[1];
994dc286 1021 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
3544358a 1022 if (!flow)
37a1300c
BP
1023 break;
1024
6455100f 1025 if (ovs_flow_cmd_fill_info(flow, dp, skb,
28aea917 1026 NETLINK_CB(cb->skb).portid,
37a1300c 1027 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1028 OVS_FLOW_CMD_NEW) < 0)
37a1300c
BP
1029 break;
1030
1031 cb->args[0] = bucket;
1032 cb->args[1] = obj;
1033 }
f44ccce1 1034 rcu_read_unlock();
37a1300c 1035 return skb->len;
704a1e09
BP
1036}
1037
37a1300c 1038static struct genl_ops dp_flow_genl_ops[] = {
df2c07f4 1039 { .cmd = OVS_FLOW_CMD_NEW,
37a1300c
BP
1040 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1041 .policy = flow_policy,
df2c07f4 1042 .doit = ovs_flow_cmd_new_or_set
37a1300c 1043 },
df2c07f4 1044 { .cmd = OVS_FLOW_CMD_DEL,
37a1300c
BP
1045 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1046 .policy = flow_policy,
df2c07f4 1047 .doit = ovs_flow_cmd_del
37a1300c 1048 },
df2c07f4 1049 { .cmd = OVS_FLOW_CMD_GET,
37a1300c
BP
1050 .flags = 0, /* OK for unprivileged users. */
1051 .policy = flow_policy,
df2c07f4
JP
1052 .doit = ovs_flow_cmd_get,
1053 .dumpit = ovs_flow_cmd_dump
37a1300c 1054 },
df2c07f4 1055 { .cmd = OVS_FLOW_CMD_SET,
37a1300c
BP
1056 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1057 .policy = flow_policy,
df2c07f4 1058 .doit = ovs_flow_cmd_new_or_set,
37a1300c
BP
1059 },
1060};
1061
df2c07f4 1062static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
df2c07f4 1063 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
b063d9f0 1064 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
c58cc9a4 1065 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
d6569377
BP
1066};
1067
aaff4b55
BP
1068static struct genl_family dp_datapath_genl_family = {
1069 .id = GENL_ID_GENERATE,
df2c07f4
JP
1070 .hdrsize = sizeof(struct ovs_header),
1071 .name = OVS_DATAPATH_FAMILY,
69685a88 1072 .version = OVS_DATAPATH_VERSION,
2a4999f3 1073 .maxattr = OVS_DP_ATTR_MAX,
b3dcb73c 1074 .netnsok = true,
14002a59 1075 SET_PARALLEL_OPS
aaff4b55
BP
1076};
1077
850b6b3b 1078static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
df2c07f4 1079 .name = OVS_DATAPATH_MCGROUP
aaff4b55
BP
1080};
1081
0afa2373
TG
1082static size_t ovs_dp_cmd_msg_size(void)
1083{
1084 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1085
1086 msgsize += nla_total_size(IFNAMSIZ);
1087 msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
4fa72a95 1088 msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
300af20a 1089 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
0afa2373
TG
1090
1091 return msgsize;
1092}
1093
df2c07f4 1094static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
28aea917 1095 u32 portid, u32 seq, u32 flags, u8 cmd)
064af421 1096{
df2c07f4 1097 struct ovs_header *ovs_header;
e926dfe3 1098 struct ovs_dp_stats dp_stats;
4fa72a95 1099 struct ovs_dp_megaflow_stats dp_megaflow_stats;
064af421
BP
1100 int err;
1101
28aea917 1102 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
aaff4b55 1103 flags, cmd);
df2c07f4 1104 if (!ovs_header)
aaff4b55 1105 goto error;
064af421 1106
b063d9f0 1107 ovs_header->dp_ifindex = get_dpifindex(dp);
064af421 1108
d6569377 1109 rcu_read_lock();
850b6b3b 1110 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
d6569377 1111 rcu_read_unlock();
064af421 1112 if (err)
d6569377 1113 goto nla_put_failure;
064af421 1114
4fa72a95
AZ
1115 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1116 if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1117 &dp_stats))
1118 goto nla_put_failure;
1119
1120 if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1121 sizeof(struct ovs_dp_megaflow_stats),
1122 &dp_megaflow_stats))
c3cc8c03 1123 goto nla_put_failure;
d6569377 1124
c58cc9a4
TG
1125 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1126 goto nla_put_failure;
1127
df2c07f4 1128 return genlmsg_end(skb, ovs_header);
d6569377
BP
1129
1130nla_put_failure:
df2c07f4 1131 genlmsg_cancel(skb, ovs_header);
aaff4b55
BP
1132error:
1133 return -EMSGSIZE;
d6569377
BP
1134}
1135
68eadcf0
TG
1136static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp,
1137 struct genl_info *info, u8 cmd)
d6569377 1138{
d6569377 1139 struct sk_buff *skb;
aaff4b55 1140 int retval;
d6569377 1141
68eadcf0 1142 skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
064af421 1143 if (!skb)
d6569377 1144 return ERR_PTR(-ENOMEM);
659586ef 1145
68eadcf0 1146 retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd);
aaff4b55
BP
1147 if (retval < 0) {
1148 kfree_skb(skb);
1149 return ERR_PTR(retval);
1150 }
1151 return skb;
1152}
9dca7bd5 1153
cd2a59e9 1154/* Called with ovs_mutex. */
2a4999f3
PS
1155static struct datapath *lookup_datapath(struct net *net,
1156 struct ovs_header *ovs_header,
6455100f 1157 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
d6569377 1158{
254f2dc8
BP
1159 struct datapath *dp;
1160
df2c07f4 1161 if (!a[OVS_DP_ATTR_NAME])
2a4999f3 1162 dp = get_dp(net, ovs_header->dp_ifindex);
254f2dc8 1163 else {
d6569377 1164 struct vport *vport;
d6569377 1165
057dd6d2 1166 rcu_read_lock();
2a4999f3 1167 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
df2c07f4 1168 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
057dd6d2 1169 rcu_read_unlock();
d6569377 1170 }
254f2dc8 1171 return dp ? dp : ERR_PTR(-ENODEV);
d6569377
BP
1172}
1173
94358dcf
TG
1174static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1175{
1176 struct datapath *dp;
1177
1178 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1179 if (!dp)
1180 return;
1181
1182 WARN(dp->user_features, "Dropping previously announced user features\n");
1183 dp->user_features = 0;
1184}
1185
c58cc9a4
TG
1186static void ovs_dp_change(struct datapath *dp, struct nlattr **a)
1187{
1188 if (a[OVS_DP_ATTR_USER_FEATURES])
1189 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1190}
1191
df2c07f4 1192static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
d6569377 1193{
aaff4b55 1194 struct nlattr **a = info->attrs;
d6569377 1195 struct vport_parms parms;
aaff4b55 1196 struct sk_buff *reply;
d6569377
BP
1197 struct datapath *dp;
1198 struct vport *vport;
2a4999f3 1199 struct ovs_net *ovs_net;
95b1d73a 1200 int err, i;
d6569377 1201
d6569377 1202 err = -EINVAL;
ea36840f 1203 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
aaff4b55
BP
1204 goto err;
1205
cd2a59e9 1206 ovs_lock();
d6569377 1207
d6569377
BP
1208 err = -ENOMEM;
1209 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1210 if (dp == NULL)
cd2a59e9 1211 goto err_unlock_ovs;
2a4999f3 1212
0ceaa66c
JG
1213 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1214
d6569377 1215 /* Allocate table. */
994dc286
PS
1216 err = ovs_flow_tbl_init(&dp->table);
1217 if (err)
d6569377
BP
1218 goto err_free_dp;
1219
99769a40
JG
1220 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1221 if (!dp->stats_percpu) {
1222 err = -ENOMEM;
1223 goto err_destroy_table;
1224 }
1225
95b1d73a
PS
1226 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1227 GFP_KERNEL);
1228 if (!dp->ports) {
1229 err = -ENOMEM;
1230 goto err_destroy_percpu;
1231 }
1232
1233 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1234 INIT_HLIST_HEAD(&dp->ports[i]);
1235
d6569377 1236 /* Set up our datapath device. */
df2c07f4
JP
1237 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1238 parms.type = OVS_VPORT_TYPE_INTERNAL;
d6569377
BP
1239 parms.options = NULL;
1240 parms.dp = dp;
df2c07f4 1241 parms.port_no = OVSP_LOCAL;
28aea917 1242 parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
b063d9f0 1243
c58cc9a4
TG
1244 ovs_dp_change(dp, a);
1245
d6569377
BP
1246 vport = new_vport(&parms);
1247 if (IS_ERR(vport)) {
1248 err = PTR_ERR(vport);
1249 if (err == -EBUSY)
1250 err = -EEXIST;
1251
94358dcf
TG
1252 if (err == -EEXIST) {
1253 /* An outdated user space instance that does not understand
1254 * the concept of user_features has attempted to create a new
1255 * datapath and is likely to reuse it. Drop all user features.
1256 */
1257 if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1258 ovs_dp_reset_user_features(skb, info);
1259 }
1260
95b1d73a 1261 goto err_destroy_ports_array;
d6569377 1262 }
d6569377 1263
68eadcf0 1264 reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
aaff4b55
BP
1265 err = PTR_ERR(reply);
1266 if (IS_ERR(reply))
1267 goto err_destroy_local_port;
1268
2a4999f3 1269 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
fb93e9aa 1270 list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
d6569377 1271
cd2a59e9 1272 ovs_unlock();
d6569377 1273
e297c6b7 1274 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
d6569377
BP
1275 return 0;
1276
1277err_destroy_local_port:
cd2a59e9 1278 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
95b1d73a
PS
1279err_destroy_ports_array:
1280 kfree(dp->ports);
99769a40
JG
1281err_destroy_percpu:
1282 free_percpu(dp->stats_percpu);
d6569377 1283err_destroy_table:
d103f479 1284 ovs_flow_tbl_destroy(&dp->table, false);
d6569377 1285err_free_dp:
0ceaa66c 1286 release_net(ovs_dp_get_net(dp));
d6569377 1287 kfree(dp);
cd2a59e9
PS
1288err_unlock_ovs:
1289 ovs_unlock();
d6569377 1290err:
064af421
BP
1291 return err;
1292}
1293
cd2a59e9 1294/* Called with ovs_mutex. */
2a4999f3 1295static void __dp_destroy(struct datapath *dp)
44e05eca 1296{
95b1d73a 1297 int i;
44e05eca 1298
95b1d73a
PS
1299 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1300 struct vport *vport;
f8dfbcb7 1301 struct hlist_node *n;
95b1d73a 1302
f8dfbcb7 1303 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
95b1d73a
PS
1304 if (vport->port_no != OVSP_LOCAL)
1305 ovs_dp_detach_port(vport);
1306 }
ed099e92 1307
fb93e9aa 1308 list_del_rcu(&dp->list_node);
ed099e92 1309
cd2a59e9 1310 /* OVSP_LOCAL is datapath internal port. We need to make sure that
d103f479
AZ
1311 * all ports in datapath are destroyed first before freeing datapath.
1312 */
cd2a59e9 1313 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
99620d2c 1314
d103f479
AZ
1315 /* RCU destroy the flow table */
1316 ovs_flow_tbl_destroy(&dp->table, true);
1317
ed099e92 1318 call_rcu(&dp->rcu, destroy_dp_rcu);
2a4999f3
PS
1319}
1320
1321static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1322{
1323 struct sk_buff *reply;
1324 struct datapath *dp;
1325 int err;
1326
cd2a59e9 1327 ovs_lock();
2a4999f3
PS
1328 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1329 err = PTR_ERR(dp);
1330 if (IS_ERR(dp))
cd2a59e9 1331 goto unlock;
2a4999f3 1332
68eadcf0 1333 reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL);
2a4999f3
PS
1334 err = PTR_ERR(reply);
1335 if (IS_ERR(reply))
cd2a59e9 1336 goto unlock;
2a4999f3
PS
1337
1338 __dp_destroy(dp);
cd2a59e9 1339 ovs_unlock();
ed099e92 1340
e297c6b7 1341 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
99620d2c
JG
1342
1343 return 0;
cd2a59e9
PS
1344unlock:
1345 ovs_unlock();
1346 return err;
44e05eca
BP
1347}
1348
df2c07f4 1349static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
064af421 1350{
aaff4b55 1351 struct sk_buff *reply;
d6569377 1352 struct datapath *dp;
d6569377 1353 int err;
064af421 1354
cd2a59e9 1355 ovs_lock();
2a4999f3 1356 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
cd2a59e9 1357 err = PTR_ERR(dp);
d6569377 1358 if (IS_ERR(dp))
cd2a59e9 1359 goto unlock;
38c6ecbc 1360
c58cc9a4
TG
1361 ovs_dp_change(dp, info->attrs);
1362
68eadcf0 1363 reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
aaff4b55
BP
1364 if (IS_ERR(reply)) {
1365 err = PTR_ERR(reply);
b3dcb73c 1366 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
850b6b3b 1367 ovs_dp_datapath_multicast_group.id, err);
cd2a59e9
PS
1368 err = 0;
1369 goto unlock;
aaff4b55
BP
1370 }
1371
cd2a59e9 1372 ovs_unlock();
e297c6b7 1373 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
850b6b3b 1374
aaff4b55 1375 return 0;
cd2a59e9
PS
1376unlock:
1377 ovs_unlock();
1378 return err;
064af421
BP
1379}
1380
df2c07f4 1381static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1dcf111b 1382{
aaff4b55 1383 struct sk_buff *reply;
d6569377 1384 struct datapath *dp;
d6569377 1385 int err;
1dcf111b 1386
cd2a59e9 1387 ovs_lock();
2a4999f3 1388 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
cd2a59e9
PS
1389 if (IS_ERR(dp)) {
1390 err = PTR_ERR(dp);
1391 goto unlock;
1392 }
1dcf111b 1393
68eadcf0 1394 reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
cd2a59e9
PS
1395 if (IS_ERR(reply)) {
1396 err = PTR_ERR(reply);
1397 goto unlock;
1398 }
aaff4b55 1399
cd2a59e9 1400 ovs_unlock();
aaff4b55 1401 return genlmsg_reply(reply, info);
cd2a59e9
PS
1402
1403unlock:
1404 ovs_unlock();
1405 return err;
1dcf111b
JP
1406}
1407
df2c07f4 1408static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
a7786963 1409{
2a4999f3 1410 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
254f2dc8
BP
1411 struct datapath *dp;
1412 int skip = cb->args[0];
1413 int i = 0;
a7786963 1414
fb93e9aa
PS
1415 rcu_read_lock();
1416 list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) {
a2bab2f0 1417 if (i >= skip &&
28aea917 1418 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
aaff4b55 1419 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1420 OVS_DP_CMD_NEW) < 0)
aaff4b55 1421 break;
254f2dc8 1422 i++;
a7786963 1423 }
fb93e9aa 1424 rcu_read_unlock();
aaff4b55 1425
254f2dc8
BP
1426 cb->args[0] = i;
1427
aaff4b55 1428 return skb->len;
c19e6535
BP
1429}
1430
aaff4b55 1431static struct genl_ops dp_datapath_genl_ops[] = {
df2c07f4 1432 { .cmd = OVS_DP_CMD_NEW,
aaff4b55
BP
1433 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1434 .policy = datapath_policy,
df2c07f4 1435 .doit = ovs_dp_cmd_new
aaff4b55 1436 },
df2c07f4 1437 { .cmd = OVS_DP_CMD_DEL,
aaff4b55
BP
1438 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1439 .policy = datapath_policy,
df2c07f4 1440 .doit = ovs_dp_cmd_del
aaff4b55 1441 },
df2c07f4 1442 { .cmd = OVS_DP_CMD_GET,
aaff4b55
BP
1443 .flags = 0, /* OK for unprivileged users. */
1444 .policy = datapath_policy,
df2c07f4
JP
1445 .doit = ovs_dp_cmd_get,
1446 .dumpit = ovs_dp_cmd_dump
aaff4b55 1447 },
df2c07f4 1448 { .cmd = OVS_DP_CMD_SET,
aaff4b55
BP
1449 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1450 .policy = datapath_policy,
df2c07f4 1451 .doit = ovs_dp_cmd_set,
aaff4b55
BP
1452 },
1453};
1454
df2c07f4 1455static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
df2c07f4 1456 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
f613a0d7 1457 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
d48c88ec
JG
1458 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1459 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
b063d9f0 1460 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
df2c07f4 1461 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
c19e6535
BP
1462};
1463
f0fef760
BP
1464static struct genl_family dp_vport_genl_family = {
1465 .id = GENL_ID_GENERATE,
df2c07f4
JP
1466 .hdrsize = sizeof(struct ovs_header),
1467 .name = OVS_VPORT_FAMILY,
69685a88 1468 .version = OVS_VPORT_VERSION,
2a4999f3 1469 .maxattr = OVS_VPORT_ATTR_MAX,
b3dcb73c 1470 .netnsok = true,
14002a59 1471 SET_PARALLEL_OPS
f0fef760
BP
1472};
1473
850b6b3b 1474struct genl_multicast_group ovs_dp_vport_multicast_group = {
df2c07f4 1475 .name = OVS_VPORT_MCGROUP
f0fef760
BP
1476};
1477
cd2a59e9 1478/* Called with ovs_mutex or RCU read lock. */
df2c07f4 1479static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
28aea917 1480 u32 portid, u32 seq, u32 flags, u8 cmd)
064af421 1481{
df2c07f4 1482 struct ovs_header *ovs_header;
e926dfe3 1483 struct ovs_vport_stats vport_stats;
c19e6535
BP
1484 int err;
1485
28aea917 1486 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
f0fef760 1487 flags, cmd);
df2c07f4 1488 if (!ovs_header)
f0fef760 1489 return -EMSGSIZE;
c19e6535 1490
99769a40 1491 ovs_header->dp_ifindex = get_dpifindex(vport->dp);
c19e6535 1492
c3cc8c03
DM
1493 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1494 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1495 nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
28aea917 1496 nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
c3cc8c03 1497 goto nla_put_failure;
c19e6535 1498
850b6b3b 1499 ovs_vport_get_stats(vport, &vport_stats);
c3cc8c03
DM
1500 if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1501 &vport_stats))
1502 goto nla_put_failure;
c19e6535 1503
850b6b3b 1504 err = ovs_vport_get_options(vport, skb);
f0fef760
BP
1505 if (err == -EMSGSIZE)
1506 goto error;
c19e6535 1507
df2c07f4 1508 return genlmsg_end(skb, ovs_header);
c19e6535
BP
1509
1510nla_put_failure:
1511 err = -EMSGSIZE;
f0fef760 1512error:
df2c07f4 1513 genlmsg_cancel(skb, ovs_header);
f0fef760 1514 return err;
064af421
BP
1515}
1516
cd2a59e9 1517/* Called with ovs_mutex or RCU read lock. */
28aea917 1518struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
f14d8083 1519 u32 seq, u8 cmd)
064af421 1520{
c19e6535 1521 struct sk_buff *skb;
f0fef760 1522 int retval;
c19e6535 1523
f0fef760 1524 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
c19e6535
BP
1525 if (!skb)
1526 return ERR_PTR(-ENOMEM);
1527
28aea917 1528 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
c25ea534
JG
1529 BUG_ON(retval < 0);
1530
c19e6535 1531 return skb;
f0fef760 1532}
c19e6535 1533
cd2a59e9 1534/* Called with ovs_mutex or RCU read lock. */
2a4999f3
PS
1535static struct vport *lookup_vport(struct net *net,
1536 struct ovs_header *ovs_header,
df2c07f4 1537 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
c19e6535
BP
1538{
1539 struct datapath *dp;
1540 struct vport *vport;
1541
df2c07f4 1542 if (a[OVS_VPORT_ATTR_NAME]) {
2a4999f3 1543 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
ed099e92 1544 if (!vport)
c19e6535 1545 return ERR_PTR(-ENODEV);
24ce832d
BP
1546 if (ovs_header->dp_ifindex &&
1547 ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1548 return ERR_PTR(-ENODEV);
c19e6535 1549 return vport;
df2c07f4
JP
1550 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1551 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
c19e6535
BP
1552
1553 if (port_no >= DP_MAX_PORTS)
f0fef760 1554 return ERR_PTR(-EFBIG);
c19e6535 1555
2a4999f3 1556 dp = get_dp(net, ovs_header->dp_ifindex);
c19e6535
BP
1557 if (!dp)
1558 return ERR_PTR(-ENODEV);
f2459fe7 1559
cd2a59e9 1560 vport = ovs_vport_ovsl_rcu(dp, port_no);
ed099e92 1561 if (!vport)
17535c57 1562 return ERR_PTR(-ENODEV);
c19e6535
BP
1563 return vport;
1564 } else
1565 return ERR_PTR(-EINVAL);
064af421
BP
1566}
1567
df2c07f4 1568static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
c19e6535 1569{
f0fef760 1570 struct nlattr **a = info->attrs;
df2c07f4 1571 struct ovs_header *ovs_header = info->userhdr;
c19e6535 1572 struct vport_parms parms;
ed099e92 1573 struct sk_buff *reply;
c19e6535 1574 struct vport *vport;
c19e6535 1575 struct datapath *dp;
b0ec0f27 1576 u32 port_no;
c19e6535 1577 int err;
b0ec0f27 1578
c19e6535 1579 err = -EINVAL;
ea36840f
BP
1580 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1581 !a[OVS_VPORT_ATTR_UPCALL_PID])
f0fef760
BP
1582 goto exit;
1583
cd2a59e9 1584 ovs_lock();
2a4999f3 1585 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
c19e6535
BP
1586 err = -ENODEV;
1587 if (!dp)
ed099e92 1588 goto exit_unlock;
c19e6535 1589
df2c07f4
JP
1590 if (a[OVS_VPORT_ATTR_PORT_NO]) {
1591 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
c19e6535
BP
1592
1593 err = -EFBIG;
1594 if (port_no >= DP_MAX_PORTS)
ed099e92 1595 goto exit_unlock;
c19e6535 1596
cd2a59e9 1597 vport = ovs_vport_ovsl(dp, port_no);
c19e6535
BP
1598 err = -EBUSY;
1599 if (vport)
ed099e92 1600 goto exit_unlock;
c19e6535
BP
1601 } else {
1602 for (port_no = 1; ; port_no++) {
1603 if (port_no >= DP_MAX_PORTS) {
1604 err = -EFBIG;
ed099e92 1605 goto exit_unlock;
c19e6535 1606 }
cd2a59e9 1607 vport = ovs_vport_ovsl(dp, port_no);
c19e6535
BP
1608 if (!vport)
1609 break;
51d4d598 1610 }
064af421 1611 }
b0ec0f27 1612
df2c07f4
JP
1613 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1614 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1615 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
c19e6535
BP
1616 parms.dp = dp;
1617 parms.port_no = port_no;
28aea917 1618 parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
c19e6535
BP
1619
1620 vport = new_vport(&parms);
1621 err = PTR_ERR(vport);
1622 if (IS_ERR(vport))
ed099e92 1623 goto exit_unlock;
c19e6535 1624
faef6d2d 1625 err = 0;
1fc7083d
JG
1626 if (a[OVS_VPORT_ATTR_STATS])
1627 ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
1628
1629 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1630 OVS_VPORT_CMD_NEW);
1631 if (IS_ERR(reply)) {
1632 err = PTR_ERR(reply);
850b6b3b 1633 ovs_dp_detach_port(vport);
ed099e92 1634 goto exit_unlock;
c19e6535 1635 }
e297c6b7
TG
1636
1637 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
c19e6535 1638
ed099e92 1639exit_unlock:
cd2a59e9 1640 ovs_unlock();
c19e6535
BP
1641exit:
1642 return err;
44e05eca
BP
1643}
1644
df2c07f4 1645static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
44e05eca 1646{
f0fef760
BP
1647 struct nlattr **a = info->attrs;
1648 struct sk_buff *reply;
c19e6535 1649 struct vport *vport;
c19e6535 1650 int err;
44e05eca 1651
cd2a59e9 1652 ovs_lock();
2a4999f3 1653 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
c19e6535
BP
1654 err = PTR_ERR(vport);
1655 if (IS_ERR(vport))
f0fef760 1656 goto exit_unlock;
44e05eca 1657
6455100f 1658 if (a[OVS_VPORT_ATTR_TYPE] &&
17ec1d04 1659 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
4879d4c7 1660 err = -EINVAL;
17ec1d04
JG
1661 goto exit_unlock;
1662 }
6455100f 1663
c25ea534
JG
1664 reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1665 if (!reply) {
1666 err = -ENOMEM;
1667 goto exit_unlock;
1668 }
1669
17ec1d04 1670 if (a[OVS_VPORT_ATTR_OPTIONS]) {
850b6b3b 1671 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
17ec1d04
JG
1672 if (err)
1673 goto exit_free;
1674 }
1fc7083d
JG
1675
1676 if (a[OVS_VPORT_ATTR_STATS])
1677 ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
1678
1679 if (a[OVS_VPORT_ATTR_UPCALL_PID])
28aea917 1680 vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
c19e6535 1681
c25ea534
JG
1682 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1683 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1684 BUG_ON(err < 0);
f0fef760 1685
cd2a59e9 1686 ovs_unlock();
8680ae4d 1687 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
c25ea534
JG
1688 return 0;
1689
1690exit_free:
1691 kfree_skb(reply);
f0fef760 1692exit_unlock:
cd2a59e9 1693 ovs_unlock();
c19e6535 1694 return err;
064af421
BP
1695}
1696
df2c07f4 1697static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1698{
f0fef760
BP
1699 struct nlattr **a = info->attrs;
1700 struct sk_buff *reply;
c19e6535 1701 struct vport *vport;
c19e6535
BP
1702 int err;
1703
cd2a59e9 1704 ovs_lock();
2a4999f3 1705 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
c19e6535 1706 err = PTR_ERR(vport);
f0fef760
BP
1707 if (IS_ERR(vport))
1708 goto exit_unlock;
c19e6535 1709
df2c07f4 1710 if (vport->port_no == OVSP_LOCAL) {
f0fef760
BP
1711 err = -EINVAL;
1712 goto exit_unlock;
1713 }
1714
28aea917
IY
1715 reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
1716 info->snd_seq, OVS_VPORT_CMD_DEL);
f0fef760
BP
1717 err = PTR_ERR(reply);
1718 if (IS_ERR(reply))
1719 goto exit_unlock;
1720
b57d5819 1721 err = 0;
850b6b3b 1722 ovs_dp_detach_port(vport);
f0fef760 1723
e297c6b7 1724 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
f0fef760
BP
1725
1726exit_unlock:
cd2a59e9 1727 ovs_unlock();
c19e6535 1728 return err;
7c40efc9
BP
1729}
1730
df2c07f4 1731static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1732{
f0fef760 1733 struct nlattr **a = info->attrs;
df2c07f4 1734 struct ovs_header *ovs_header = info->userhdr;
ed099e92 1735 struct sk_buff *reply;
c19e6535 1736 struct vport *vport;
c19e6535
BP
1737 int err;
1738
ed099e92 1739 rcu_read_lock();
2a4999f3 1740 vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
c19e6535
BP
1741 err = PTR_ERR(vport);
1742 if (IS_ERR(vport))
f0fef760 1743 goto exit_unlock;
c19e6535 1744
28aea917
IY
1745 reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
1746 info->snd_seq, OVS_VPORT_CMD_NEW);
ed099e92
BP
1747 err = PTR_ERR(reply);
1748 if (IS_ERR(reply))
f0fef760 1749 goto exit_unlock;
ed099e92 1750
df2fa9b5
JG
1751 rcu_read_unlock();
1752
1753 return genlmsg_reply(reply, info);
ed099e92 1754
f0fef760 1755exit_unlock:
ed099e92 1756 rcu_read_unlock();
c19e6535
BP
1757 return err;
1758}
1759
df2c07f4 1760static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
c19e6535 1761{
df2c07f4 1762 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
c19e6535 1763 struct datapath *dp;
95b1d73a
PS
1764 int bucket = cb->args[0], skip = cb->args[1];
1765 int i, j = 0;
c19e6535 1766
2a4999f3 1767 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
c19e6535 1768 if (!dp)
f0fef760 1769 return -ENODEV;
ed099e92
BP
1770
1771 rcu_read_lock();
95b1d73a 1772 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
ed099e92 1773 struct vport *vport;
95b1d73a
PS
1774
1775 j = 0;
f8dfbcb7 1776 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
95b1d73a
PS
1777 if (j >= skip &&
1778 ovs_vport_cmd_fill_info(vport, skb,
28aea917 1779 NETLINK_CB(cb->skb).portid,
95b1d73a
PS
1780 cb->nlh->nlmsg_seq,
1781 NLM_F_MULTI,
1782 OVS_VPORT_CMD_NEW) < 0)
1783 goto out;
1784
1785 j++;
1786 }
1787 skip = 0;
c19e6535 1788 }
95b1d73a 1789out:
ed099e92 1790 rcu_read_unlock();
c19e6535 1791
95b1d73a
PS
1792 cb->args[0] = i;
1793 cb->args[1] = j;
f0fef760 1794
95b1d73a 1795 return skb->len;
7c40efc9
BP
1796}
1797
f0fef760 1798static struct genl_ops dp_vport_genl_ops[] = {
df2c07f4 1799 { .cmd = OVS_VPORT_CMD_NEW,
f0fef760
BP
1800 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1801 .policy = vport_policy,
df2c07f4 1802 .doit = ovs_vport_cmd_new
f0fef760 1803 },
df2c07f4 1804 { .cmd = OVS_VPORT_CMD_DEL,
f0fef760
BP
1805 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1806 .policy = vport_policy,
df2c07f4 1807 .doit = ovs_vport_cmd_del
f0fef760 1808 },
df2c07f4 1809 { .cmd = OVS_VPORT_CMD_GET,
f0fef760
BP
1810 .flags = 0, /* OK for unprivileged users. */
1811 .policy = vport_policy,
df2c07f4
JP
1812 .doit = ovs_vport_cmd_get,
1813 .dumpit = ovs_vport_cmd_dump
f0fef760 1814 },
df2c07f4 1815 { .cmd = OVS_VPORT_CMD_SET,
f0fef760
BP
1816 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1817 .policy = vport_policy,
df2c07f4 1818 .doit = ovs_vport_cmd_set,
f0fef760
BP
1819 },
1820};
1821
982b8810
BP
1822struct genl_family_and_ops {
1823 struct genl_family *family;
1824 struct genl_ops *ops;
1825 int n_ops;
1826 struct genl_multicast_group *group;
1827};
ed099e92 1828
982b8810 1829static const struct genl_family_and_ops dp_genl_families[] = {
aaff4b55
BP
1830 { &dp_datapath_genl_family,
1831 dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
850b6b3b 1832 &ovs_dp_datapath_multicast_group },
f0fef760
BP
1833 { &dp_vport_genl_family,
1834 dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
850b6b3b 1835 &ovs_dp_vport_multicast_group },
37a1300c
BP
1836 { &dp_flow_genl_family,
1837 dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
850b6b3b 1838 &ovs_dp_flow_multicast_group },
982b8810
BP
1839 { &dp_packet_genl_family,
1840 dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1841 NULL },
1842};
ed099e92 1843
982b8810
BP
1844static void dp_unregister_genl(int n_families)
1845{
1846 int i;
ed099e92 1847
b867ca75 1848 for (i = 0; i < n_families; i++)
982b8810 1849 genl_unregister_family(dp_genl_families[i].family);
ed099e92
BP
1850}
1851
982b8810 1852static int dp_register_genl(void)
064af421 1853{
982b8810
BP
1854 int n_registered;
1855 int err;
1856 int i;
064af421 1857
982b8810
BP
1858 n_registered = 0;
1859 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
1860 const struct genl_family_and_ops *f = &dp_genl_families[i];
064af421 1861
982b8810
BP
1862 err = genl_register_family_with_ops(f->family, f->ops,
1863 f->n_ops);
1864 if (err)
1865 goto error;
1866 n_registered++;
e22d4953 1867
982b8810
BP
1868 if (f->group) {
1869 err = genl_register_mc_group(f->family, f->group);
1870 if (err)
1871 goto error;
1872 }
1873 }
9cc8b4e4 1874
982b8810 1875 return 0;
064af421
BP
1876
1877error:
982b8810
BP
1878 dp_unregister_genl(n_registered);
1879 return err;
064af421
BP
1880}
1881
2a4999f3
PS
1882static int __net_init ovs_init_net(struct net *net)
1883{
1884 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1885
1886 INIT_LIST_HEAD(&ovs_net->dps);
cd2a59e9 1887 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2a4999f3
PS
1888 return 0;
1889}
1890
1891static void __net_exit ovs_exit_net(struct net *net)
1892{
cd2a59e9 1893 struct datapath *dp, *dp_next;
2a4999f3
PS
1894 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1895
cd2a59e9
PS
1896 ovs_lock();
1897 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
1898 __dp_destroy(dp);
1899 ovs_unlock();
1900
1901 cancel_work_sync(&ovs_net->dp_notify_work);
2a4999f3
PS
1902}
1903
1904static struct pernet_operations ovs_net_ops = {
1905 .init = ovs_init_net,
1906 .exit = ovs_exit_net,
1907 .id = &ovs_net_id,
1908 .size = sizeof(struct ovs_net),
1909};
1910
637c8268
PS
1911DEFINE_COMPAT_PNET_REG_FUNC(device);
1912
22d24ebf
BP
1913static int __init dp_init(void)
1914{
1915 int err;
1916
f3d85db3 1917 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
22d24ebf 1918
dc5f3fef 1919 pr_info("Open vSwitch switching datapath %s, built "__DATE__" "__TIME__"\n",
8a07709c 1920 VERSION);
064af421 1921
850b6b3b 1922 err = ovs_flow_init();
3544358a 1923 if (err)
533e96e7 1924 goto error;
3544358a 1925
850b6b3b 1926 err = ovs_vport_init();
064af421
BP
1927 if (err)
1928 goto error_flow_exit;
1929
2a4999f3 1930 err = register_pernet_device(&ovs_net_ops);
f2459fe7
JG
1931 if (err)
1932 goto error_vport_exit;
1933
2a4999f3
PS
1934 err = register_netdevice_notifier(&ovs_dp_device_notifier);
1935 if (err)
1936 goto error_netns_exit;
1937
982b8810
BP
1938 err = dp_register_genl();
1939 if (err < 0)
37a1300c 1940 goto error_unreg_notifier;
982b8810 1941
064af421
BP
1942 return 0;
1943
1944error_unreg_notifier:
850b6b3b 1945 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2a4999f3
PS
1946error_netns_exit:
1947 unregister_pernet_device(&ovs_net_ops);
f2459fe7 1948error_vport_exit:
850b6b3b 1949 ovs_vport_exit();
064af421 1950error_flow_exit:
850b6b3b 1951 ovs_flow_exit();
064af421
BP
1952error:
1953 return err;
1954}
1955
1956static void dp_cleanup(void)
1957{
982b8810 1958 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
850b6b3b 1959 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2a4999f3
PS
1960 unregister_pernet_device(&ovs_net_ops);
1961 rcu_barrier();
850b6b3b
JG
1962 ovs_vport_exit();
1963 ovs_flow_exit();
064af421
BP
1964}
1965
1966module_init(dp_init);
1967module_exit(dp_cleanup);
1968
1969MODULE_DESCRIPTION("Open vSwitch switching datapath");
1970MODULE_LICENSE("GPL");
3d0666d2 1971MODULE_VERSION(VERSION);