]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/datapath.c
nx-match: Fold all of its data structures into mf_field.
[mirror_ovs.git] / datapath / datapath.c
CommitLineData
064af421 1/*
a9a29d22 2 * Copyright (c) 2007-2011 Nicira Networks.
a14bc59f 3 *
a9a29d22
JG
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
064af421
BP
17 */
18
dfffaef1
JP
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
064af421
BP
21#include <linux/init.h>
22#include <linux/module.h>
064af421 23#include <linux/if_arp.h>
064af421
BP
24#include <linux/if_vlan.h>
25#include <linux/in.h>
26#include <linux/ip.h>
982b8810 27#include <linux/jhash.h>
064af421
BP
28#include <linux/delay.h>
29#include <linux/time.h>
30#include <linux/etherdevice.h>
ed099e92 31#include <linux/genetlink.h>
064af421
BP
32#include <linux/kernel.h>
33#include <linux/kthread.h>
064af421
BP
34#include <linux/mutex.h>
35#include <linux/percpu.h>
36#include <linux/rcupdate.h>
37#include <linux/tcp.h>
38#include <linux/udp.h>
39#include <linux/version.h>
40#include <linux/ethtool.h>
064af421
BP
41#include <linux/wait.h>
42#include <asm/system.h>
43#include <asm/div64.h>
656a0e37 44#include <linux/highmem.h>
064af421
BP
45#include <linux/netfilter_bridge.h>
46#include <linux/netfilter_ipv4.h>
47#include <linux/inetdevice.h>
48#include <linux/list.h>
077257b8 49#include <linux/openvswitch.h>
064af421 50#include <linux/rculist.h>
064af421 51#include <linux/dmi.h>
36956a7d 52#include <net/genetlink.h>
064af421 53
dd8d6b8c 54#include "checksum.h"
064af421 55#include "datapath.h"
064af421 56#include "flow.h"
303708cc 57#include "vlan.h"
3544358a 58#include "tunnel.h"
f2459fe7 59#include "vport-internal_dev.h"
064af421 60
4cf41591 61#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
143af30e
PS
62 LINUX_VERSION_CODE > KERNEL_VERSION(3,2,0)
63#error Kernels before 2.6.18 or after 3.2 are not supported by this version of Open vSwitch.
4cf41591
JG
64#endif
65
064af421
BP
66int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
67EXPORT_SYMBOL(dp_ioctl_hook);
68
ed099e92
BP
69/**
70 * DOC: Locking:
064af421 71 *
ed099e92
BP
72 * Writes to device state (add/remove datapath, port, set operations on vports,
73 * etc.) are protected by RTNL.
064af421 74 *
ed099e92 75 * Writes to other state (flow table modifications, set miscellaneous datapath
7257b535
BP
76 * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside
77 * genl_mutex.
ed099e92
BP
78 *
79 * Reads are protected by RCU.
80 *
81 * There are a few special cases (mostly stats) that have their own
82 * synchronization but they nest under all of above and don't interact with
83 * each other.
064af421 84 */
ed099e92 85
254f2dc8
BP
86/* Global list of datapaths to enable dumping them all out.
87 * Protected by genl_mutex.
88 */
89static LIST_HEAD(dps);
064af421 90
c19e6535 91static struct vport *new_vport(const struct vport_parms *);
7257b535
BP
92static int queue_gso_packets(int dp_ifindex, struct sk_buff *,
93 const struct dp_upcall_info *);
94static int queue_userspace_packet(int dp_ifindex, struct sk_buff *,
95 const struct dp_upcall_info *);
064af421 96
ed099e92 97/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
df2a06ab 98static struct datapath *get_dp(int dp_ifindex)
064af421 99{
254f2dc8
BP
100 struct datapath *dp = NULL;
101 struct net_device *dev;
ed099e92 102
254f2dc8
BP
103 rcu_read_lock();
104 dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
105 if (dev) {
106 struct vport *vport = internal_dev_get_vport(dev);
107 if (vport)
108 dp = vport->dp;
109 }
110 rcu_read_unlock();
111
112 return dp;
064af421 113}
064af421 114
f2459fe7
JG
115/* Must be called with rcu_read_lock or RTNL lock. */
116const char *dp_name(const struct datapath *dp)
117{
16b82e84
JG
118 struct vport *vport = rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]);
119 return vport->ops->get_name(vport);
f2459fe7
JG
120}
121
99769a40
JG
122static int get_dpifindex(struct datapath *dp)
123{
124 struct vport *local;
125 int ifindex;
126
127 rcu_read_lock();
128
cbbf4e1e 129 local = rcu_dereference(dp->ports[OVSP_LOCAL]);
99769a40 130 if (local)
16b82e84 131 ifindex = local->ops->get_ifindex(local);
99769a40
JG
132 else
133 ifindex = 0;
134
135 rcu_read_unlock();
136
137 return ifindex;
138}
139
6455100f 140static size_t br_nlmsg_size(void)
064af421
BP
141{
142 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
143 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
144 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
145 + nla_total_size(4) /* IFLA_MASTER */
146 + nla_total_size(4) /* IFLA_MTU */
064af421
BP
147 + nla_total_size(1); /* IFLA_OPERSTATE */
148}
149
ed099e92 150/* Caller must hold RTNL lock. */
064af421 151static int dp_fill_ifinfo(struct sk_buff *skb,
e779d8d9 152 const struct vport *port,
064af421
BP
153 int event, unsigned int flags)
154{
027f9007 155 struct datapath *dp = port->dp;
064af421
BP
156 struct ifinfomsg *hdr;
157 struct nlmsghdr *nlh;
158
16b82e84
JG
159 if (!port->ops->get_ifindex)
160 return -ENODEV;
f2459fe7 161
064af421
BP
162 nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
163 if (nlh == NULL)
164 return -EMSGSIZE;
165
166 hdr = nlmsg_data(nlh);
167 hdr->ifi_family = AF_BRIDGE;
168 hdr->__ifi_pad = 0;
f2459fe7 169 hdr->ifi_type = ARPHRD_ETHER;
16b82e84
JG
170 hdr->ifi_index = port->ops->get_ifindex(port);
171 hdr->ifi_flags = port->ops->get_dev_flags(port);
064af421
BP
172 hdr->ifi_change = 0;
173
16b82e84 174 NLA_PUT_STRING(skb, IFLA_IFNAME, port->ops->get_name(port));
99769a40 175 NLA_PUT_U32(skb, IFLA_MASTER, get_dpifindex(dp));
16b82e84 176 NLA_PUT_U32(skb, IFLA_MTU, port->ops->get_mtu(port));
064af421
BP
177#ifdef IFLA_OPERSTATE
178 NLA_PUT_U8(skb, IFLA_OPERSTATE,
16b82e84
JG
179 port->ops->is_running(port)
180 ? port->ops->get_operstate(port)
f2459fe7 181 : IF_OPER_DOWN);
064af421
BP
182#endif
183
16b82e84 184 NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, port->ops->get_addr(port));
064af421 185
064af421
BP
186 return nlmsg_end(skb, nlh);
187
188nla_put_failure:
189 nlmsg_cancel(skb, nlh);
190 return -EMSGSIZE;
191}
192
ed099e92 193/* Caller must hold RTNL lock. */
e779d8d9 194static void dp_ifinfo_notify(int event, struct vport *port)
064af421 195{
064af421 196 struct sk_buff *skb;
16b82e84 197 int err;
064af421
BP
198
199 skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
16b82e84
JG
200 if (!skb) {
201 err = -ENOBUFS;
202 goto err;
203 }
064af421
BP
204
205 err = dp_fill_ifinfo(skb, port, event, 0);
206 if (err < 0) {
16b82e84
JG
207 if (err == -ENODEV) {
208 goto out;
209 } else {
210 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
211 WARN_ON(err == -EMSGSIZE);
212 goto err;
213 }
064af421 214 }
16b82e84 215
f2459fe7 216 rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
16b82e84 217
cfe7c1f5 218 return;
16b82e84
JG
219err:
220 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
221out:
222 kfree_skb(skb);
064af421
BP
223}
224
58c342f6
BP
225static void release_dp(struct kobject *kobj)
226{
227 struct datapath *dp = container_of(kobj, struct datapath, ifobj);
228 kfree(dp);
229}
230
35f7605b 231static struct kobj_type dp_ktype = {
58c342f6
BP
232 .release = release_dp
233};
234
46c6a11d
JG
235static void destroy_dp_rcu(struct rcu_head *rcu)
236{
237 struct datapath *dp = container_of(rcu, struct datapath, rcu);
46c6a11d 238
be2ba156 239 flow_tbl_destroy((__force struct flow_table *)dp->table);
46c6a11d
JG
240 free_percpu(dp->stats_percpu);
241 kobject_put(&dp->ifobj);
242}
243
ed099e92 244/* Called with RTNL lock and genl_lock. */
c19e6535 245static struct vport *new_vport(const struct vport_parms *parms)
064af421 246{
f2459fe7 247 struct vport *vport;
f2459fe7 248
c19e6535
BP
249 vport = vport_add(parms);
250 if (!IS_ERR(vport)) {
251 struct datapath *dp = parms->dp;
064af421 252
c19e6535 253 rcu_assign_pointer(dp->ports[parms->port_no], vport);
ed099e92 254 list_add(&vport->node, &dp->port_list);
064af421 255
c19e6535
BP
256 dp_ifinfo_notify(RTM_NEWLINK, vport);
257 }
064af421 258
c19e6535 259 return vport;
064af421
BP
260}
261
ed099e92 262/* Called with RTNL lock. */
3544358a 263void dp_detach_port(struct vport *p)
064af421
BP
264{
265 ASSERT_RTNL();
266
df2c07f4 267 if (p->port_no != OVSP_LOCAL)
0515ceb3 268 dp_sysfs_del_if(p);
064af421
BP
269 dp_ifinfo_notify(RTM_DELLINK, p);
270
064af421 271 /* First drop references to device. */
ed099e92 272 list_del(&p->node);
064af421 273 rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
f2459fe7 274
7237e4f4 275 /* Then destroy it. */
3544358a 276 vport_del(p);
064af421
BP
277}
278
8819fac7 279/* Must be called with rcu_read_lock. */
e779d8d9 280void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
064af421
BP
281{
282 struct datapath *dp = p->dp;
3544358a 283 struct sw_flow *flow;
064af421 284 struct dp_stats_percpu *stats;
e9141eec 285 u64 *stats_counter;
4c1ad233 286 int error;
064af421 287
e9141eec 288 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
a063b0df 289
3976f6d5 290 if (!OVS_CB(skb)->flow) {
36956a7d 291 struct sw_flow_key key;
76abe283 292 int key_len;
4c1ad233 293
3976f6d5 294 /* Extract flow from 'skb' into 'key'. */
7257b535 295 error = flow_extract(skb, p->port_no, &key, &key_len);
3976f6d5
JG
296 if (unlikely(error)) {
297 kfree_skb(skb);
298 return;
299 }
064af421 300
3976f6d5 301 /* Look up flow. */
6455100f
PS
302 flow = flow_tbl_lookup(rcu_dereference(dp->table),
303 &key, key_len);
3544358a 304 if (unlikely(!flow)) {
856081f6
BP
305 struct dp_upcall_info upcall;
306
df2c07f4 307 upcall.cmd = OVS_PACKET_CMD_MISS;
856081f6 308 upcall.key = &key;
98403001
BP
309 upcall.userdata = NULL;
310 upcall.pid = p->upcall_pid;
856081f6 311 dp_upcall(dp, skb, &upcall);
d9e214da 312 consume_skb(skb);
e9141eec 313 stats_counter = &stats->n_missed;
3976f6d5
JG
314 goto out;
315 }
316
3544358a 317 OVS_CB(skb)->flow = flow;
55574bb0
BP
318 }
319
e9141eec 320 stats_counter = &stats->n_hit;
3976f6d5 321 flow_used(OVS_CB(skb)->flow, skb);
a4af2475 322 execute_actions(dp, skb);
55574bb0 323
8819fac7 324out:
55574bb0 325 /* Update datapath statistics. */
821cb9fa 326 u64_stats_update_begin(&stats->sync);
e9141eec 327 (*stats_counter)++;
821cb9fa 328 u64_stats_update_end(&stats->sync);
064af421
BP
329}
330
aa5a8fdc
JG
331static struct genl_family dp_packet_genl_family = {
332 .id = GENL_ID_GENERATE,
df2c07f4
JP
333 .hdrsize = sizeof(struct ovs_header),
334 .name = OVS_PACKET_FAMILY,
69685a88 335 .version = OVS_PACKET_VERSION,
df2c07f4 336 .maxattr = OVS_PACKET_ATTR_MAX
aa5a8fdc
JG
337};
338
36ce148c
PS
339int dp_upcall(struct datapath *dp, struct sk_buff *skb,
340 const struct dp_upcall_info *upcall_info)
aa5a8fdc
JG
341{
342 struct dp_stats_percpu *stats;
7257b535 343 int dp_ifindex;
aa5a8fdc
JG
344 int err;
345
98403001 346 if (upcall_info->pid == 0) {
b063d9f0 347 err = -ENOTCONN;
b063d9f0
JG
348 goto err;
349 }
350
7257b535
BP
351 dp_ifindex = get_dpifindex(dp);
352 if (!dp_ifindex) {
353 err = -ENODEV;
354 goto err;
aa5a8fdc
JG
355 }
356
7257b535 357 forward_ip_summed(skb, true);
36ce148c 358
7257b535
BP
359 if (!skb_is_gso(skb))
360 err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
361 else
362 err = queue_gso_packets(dp_ifindex, skb, upcall_info);
d76195db
JG
363 if (err)
364 goto err;
365
366 return 0;
aa5a8fdc 367
aa5a8fdc 368err:
aa5a8fdc
JG
369 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
370
821cb9fa 371 u64_stats_update_begin(&stats->sync);
aa5a8fdc 372 stats->n_lost++;
821cb9fa 373 u64_stats_update_end(&stats->sync);
aa5a8fdc 374
aa5a8fdc 375 return err;
982b8810
BP
376}
377
7257b535
BP
378static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
379 const struct dp_upcall_info *upcall_info)
cb5087ca 380{
7257b535
BP
381 struct dp_upcall_info later_info;
382 struct sw_flow_key later_key;
383 struct sk_buff *segs, *nskb;
384 int err;
cb5087ca 385
7257b535
BP
386 segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
387 if (IS_ERR(skb))
388 return PTR_ERR(skb);
99769a40 389
7257b535
BP
390 /* Queue all of the segments. */
391 skb = segs;
cb5087ca 392 do {
7257b535 393 err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
982b8810 394 if (err)
7257b535 395 break;
856081f6 396
7257b535
BP
397 if (skb == segs && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) {
398 /* The initial flow key extracted by flow_extract() in
399 * this case is for a first fragment, so we need to
400 * properly mark later fragments.
401 */
402 later_key = *upcall_info->key;
9e44d715 403 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
7257b535
BP
404
405 later_info = *upcall_info;
406 later_info.key = &later_key;
407 upcall_info = &later_info;
408 }
36ce148c 409 } while ((skb = skb->next));
cb5087ca 410
7257b535
BP
411 /* Free all of the segments. */
412 skb = segs;
413 do {
414 nskb = skb->next;
415 if (err)
416 kfree_skb(skb);
417 else
418 consume_skb(skb);
419 } while ((skb = nskb));
420 return err;
421}
422
423static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb,
424 const struct dp_upcall_info *upcall_info)
425{
426 struct ovs_header *upcall;
6161d3fd 427 struct sk_buff *nskb = NULL;
7257b535
BP
428 struct sk_buff *user_skb; /* to be queued to userspace */
429 struct nlattr *nla;
430 unsigned int len;
431 int err;
432
6161d3fd
JG
433 if (vlan_tx_tag_present(skb)) {
434 nskb = skb_clone(skb, GFP_ATOMIC);
435 if (!nskb)
436 return -ENOMEM;
437
438 err = vlan_deaccel_tag(nskb);
439 if (err)
440 return err;
7257b535 441
6161d3fd
JG
442 skb = nskb;
443 }
444
445 if (nla_attr_size(skb->len) > USHRT_MAX) {
446 err = -EFBIG;
447 goto out;
448 }
7257b535
BP
449
450 len = sizeof(struct ovs_header);
451 len += nla_total_size(skb->len);
452 len += nla_total_size(FLOW_BUFSIZE);
453 if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
454 len += nla_total_size(8);
455
456 user_skb = genlmsg_new(len, GFP_ATOMIC);
6161d3fd
JG
457 if (!user_skb) {
458 err = -ENOMEM;
459 goto out;
460 }
7257b535
BP
461
462 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
463 0, upcall_info->cmd);
464 upcall->dp_ifindex = dp_ifindex;
465
466 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
467 flow_to_nlattrs(upcall_info->key, user_skb);
468 nla_nest_end(user_skb, nla);
469
470 if (upcall_info->userdata)
471 nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
472 nla_get_u64(upcall_info->userdata));
473
474 nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
bed53bd1
PS
475
476 skb_copy_and_csum_dev(skb, nla_data(nla));
7257b535 477
6161d3fd
JG
478 err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
479
480out:
481 kfree_skb(nskb);
482 return err;
cb5087ca
BP
483}
484
ed099e92 485/* Called with genl_mutex. */
254f2dc8 486static int flush_flows(int dp_ifindex)
064af421 487{
3544358a
PS
488 struct flow_table *old_table;
489 struct flow_table *new_table;
9c52546b 490 struct datapath *dp;
9c52546b 491
254f2dc8 492 dp = get_dp(dp_ifindex);
9c52546b 493 if (!dp)
ed099e92 494 return -ENODEV;
8d5ebd83 495
20d035b2 496 old_table = genl_dereference(dp->table);
3544358a 497 new_table = flow_tbl_alloc(TBL_MIN_BUCKETS);
8d5ebd83 498 if (!new_table)
ed099e92 499 return -ENOMEM;
8d5ebd83
JG
500
501 rcu_assign_pointer(dp->table, new_table);
502
3544358a 503 flow_tbl_deferred_destroy(old_table);
ed099e92 504 return 0;
064af421
BP
505}
506
4edb9ae9
PS
507static int validate_actions(const struct nlattr *attr,
508 const struct sw_flow_key *key, int depth);
6ff686f2 509
4edb9ae9
PS
510static int validate_sample(const struct nlattr *attr,
511 const struct sw_flow_key *key, int depth)
6ff686f2 512{
4be00e48
BP
513 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
514 const struct nlattr *probability, *actions;
515 const struct nlattr *a;
516 int rem;
517
518 memset(attrs, 0, sizeof(attrs));
6455100f 519 nla_for_each_nested(a, attr, rem) {
4be00e48
BP
520 int type = nla_type(a);
521 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
522 return -EINVAL;
523 attrs[type] = a;
524 }
525 if (rem)
6ff686f2 526 return -EINVAL;
4be00e48
BP
527
528 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
529 if (!probability || nla_len(probability) != sizeof(u32))
6ff686f2
PS
530 return -EINVAL;
531
4be00e48
BP
532 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
533 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
534 return -EINVAL;
535 return validate_actions(actions, key, depth + 1);
4edb9ae9
PS
536}
537
fea393b1
BP
538static int validate_set(const struct nlattr *a,
539 const struct sw_flow_key *flow_key)
4edb9ae9 540{
4edb9ae9
PS
541 const struct nlattr *ovs_key = nla_data(a);
542 int key_type = nla_type(ovs_key);
543
544 /* There can be only one key in a action */
545 if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
546 return -EINVAL;
547
548 if (key_type > OVS_KEY_ATTR_MAX ||
549 nla_len(ovs_key) != ovs_key_lens[key_type])
550 return -EINVAL;
551
fea393b1 552 switch (key_type) {
4edb9ae9 553 const struct ovs_key_ipv4 *ipv4_key;
4edb9ae9 554
fea393b1
BP
555 case OVS_KEY_ATTR_PRIORITY:
556 case OVS_KEY_ATTR_TUN_ID:
557 case OVS_KEY_ATTR_ETHERNET:
4edb9ae9
PS
558 break;
559
fea393b1 560 case OVS_KEY_ATTR_IPV4:
4edb9ae9
PS
561 if (flow_key->eth.type != htons(ETH_P_IP))
562 return -EINVAL;
563
564 if (!flow_key->ipv4.addr.src || !flow_key->ipv4.addr.dst)
565 return -EINVAL;
566
567 ipv4_key = nla_data(ovs_key);
568 if (ipv4_key->ipv4_proto != flow_key->ip.proto)
569 return -EINVAL;
570
9e44d715 571 if (ipv4_key->ipv4_frag != flow_key->ip.frag)
7257b535
BP
572 return -EINVAL;
573
4edb9ae9
PS
574 break;
575
fea393b1 576 case OVS_KEY_ATTR_TCP:
4edb9ae9
PS
577 if (flow_key->ip.proto != IPPROTO_TCP)
578 return -EINVAL;
579
580 if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst)
581 return -EINVAL;
582
583 break;
584
fea393b1 585 case OVS_KEY_ATTR_UDP:
4edb9ae9
PS
586 if (flow_key->ip.proto != IPPROTO_UDP)
587 return -EINVAL;
588
589 if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst)
590 return -EINVAL;
591 break;
592
593 default:
594 return -EINVAL;
595 }
fea393b1 596
4edb9ae9 597 return 0;
6ff686f2
PS
598}
599
98403001
BP
600static int validate_userspace(const struct nlattr *attr)
601{
6455100f 602 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
98403001
BP
603 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
604 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 },
605 };
606 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
607 int error;
608
6455100f
PS
609 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
610 attr, userspace_policy);
98403001
BP
611 if (error)
612 return error;
613
6455100f
PS
614 if (!a[OVS_USERSPACE_ATTR_PID] ||
615 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
98403001
BP
616 return -EINVAL;
617
618 return 0;
619}
620
4edb9ae9
PS
621static int validate_actions(const struct nlattr *attr,
622 const struct sw_flow_key *key, int depth)
064af421 623{
23cad98c 624 const struct nlattr *a;
6ff686f2
PS
625 int rem, err;
626
627 if (depth >= SAMPLE_ACTION_DEPTH)
628 return -EOVERFLOW;
23cad98c 629
37a1300c 630 nla_for_each_nested(a, attr, rem) {
98403001 631 /* Expected argument lengths, (u32)-1 for variable length. */
df2c07f4 632 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
fea393b1 633 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
98403001 634 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
fea393b1
BP
635 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
636 [OVS_ACTION_ATTR_POP_VLAN] = 0,
4edb9ae9 637 [OVS_ACTION_ATTR_SET] = (u32)-1,
98403001 638 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
23cad98c 639 };
fea393b1 640 const struct ovs_action_push_vlan *vlan;
23cad98c
BP
641 int type = nla_type(a);
642
6ff686f2 643 if (type > OVS_ACTION_ATTR_MAX ||
98403001
BP
644 (action_lens[type] != nla_len(a) &&
645 action_lens[type] != (u32)-1))
23cad98c
BP
646 return -EINVAL;
647
648 switch (type) {
df2c07f4 649 case OVS_ACTION_ATTR_UNSPEC:
cdee00fd 650 return -EINVAL;
064af421 651
98403001
BP
652 case OVS_ACTION_ATTR_USERSPACE:
653 err = validate_userspace(a);
654 if (err)
655 return err;
656 break;
657
df2c07f4 658 case OVS_ACTION_ATTR_OUTPUT:
23cad98c
BP
659 if (nla_get_u32(a) >= DP_MAX_PORTS)
660 return -EINVAL;
3b1fc5f3 661 break;
cdee00fd 662
4edb9ae9 663
fea393b1
BP
664 case OVS_ACTION_ATTR_POP_VLAN:
665 break;
666
667 case OVS_ACTION_ATTR_PUSH_VLAN:
668 vlan = nla_data(a);
669 if (vlan->vlan_tpid != htons(ETH_P_8021Q))
670 return -EINVAL;
8ddc056d 671 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
064af421 672 return -EINVAL;
23cad98c 673 break;
064af421 674
4edb9ae9 675 case OVS_ACTION_ATTR_SET:
fea393b1 676 err = validate_set(a, key);
4edb9ae9
PS
677 if (err)
678 return err;
23cad98c 679 break;
064af421 680
6ff686f2 681 case OVS_ACTION_ATTR_SAMPLE:
4edb9ae9 682 err = validate_sample(a, key, depth);
6ff686f2
PS
683 if (err)
684 return err;
685 break;
686
23cad98c 687 default:
4edb9ae9 688 return -EINVAL;
23cad98c
BP
689 }
690 }
3c5f6de3 691
23cad98c
BP
692 if (rem > 0)
693 return -EINVAL;
064af421 694
23cad98c 695 return 0;
064af421 696}
4edb9ae9 697
064af421
BP
698static void clear_stats(struct sw_flow *flow)
699{
6bfafa55 700 flow->used = 0;
064af421 701 flow->tcp_flags = 0;
064af421
BP
702 flow->packet_count = 0;
703 flow->byte_count = 0;
704}
705
df2c07f4 706static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
064af421 707{
df2c07f4 708 struct ovs_header *ovs_header = info->userhdr;
982b8810 709 struct nlattr **a = info->attrs;
e0e57990 710 struct sw_flow_actions *acts;
982b8810 711 struct sk_buff *packet;
e0e57990 712 struct sw_flow *flow;
f7cd0081 713 struct datapath *dp;
d6569377 714 struct ethhdr *eth;
3f19d399 715 int len;
d6569377 716 int err;
76abe283 717 int key_len;
064af421 718
f7cd0081 719 err = -EINVAL;
df2c07f4
JP
720 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
721 !a[OVS_PACKET_ATTR_ACTIONS] ||
722 nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
e5cad958 723 goto err;
064af421 724
df2c07f4 725 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
3f19d399 726 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
f7cd0081
BP
727 err = -ENOMEM;
728 if (!packet)
e5cad958 729 goto err;
3f19d399
BP
730 skb_reserve(packet, NET_IP_ALIGN);
731
df2c07f4 732 memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
8d5ebd83 733
f7cd0081
BP
734 skb_reset_mac_header(packet);
735 eth = eth_hdr(packet);
064af421 736
d6569377
BP
737 /* Normally, setting the skb 'protocol' field would be handled by a
738 * call to eth_type_trans(), but it assumes there's a sending
739 * device, which we may not have. */
740 if (ntohs(eth->h_proto) >= 1536)
f7cd0081 741 packet->protocol = eth->h_proto;
d6569377 742 else
f7cd0081 743 packet->protocol = htons(ETH_P_802_2);
d3c54451 744
e0e57990
BP
745 /* Build an sw_flow for sending this packet. */
746 flow = flow_alloc();
747 err = PTR_ERR(flow);
748 if (IS_ERR(flow))
e5cad958 749 goto err_kfree_skb;
064af421 750
7257b535 751 err = flow_extract(packet, -1, &flow->key, &key_len);
e0e57990
BP
752 if (err)
753 goto err_flow_put;
e0e57990 754
abff858b
PS
755 err = flow_metadata_from_nlattrs(&flow->key.phy.priority,
756 &flow->key.phy.in_port,
757 &flow->key.phy.tun_id,
df2c07f4 758 a[OVS_PACKET_ATTR_KEY]);
80e5eed9
BP
759 if (err)
760 goto err_flow_put;
761
4edb9ae9
PS
762 err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0);
763 if (err)
764 goto err_flow_put;
765
3544358a 766 flow->hash = flow_hash(&flow->key, key_len);
0fe255df 767
df2c07f4 768 acts = flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
e0e57990
BP
769 err = PTR_ERR(acts);
770 if (IS_ERR(acts))
771 goto err_flow_put;
772 rcu_assign_pointer(flow->sf_acts, acts);
773
774 OVS_CB(packet)->flow = flow;
abff858b 775 packet->priority = flow->key.phy.priority;
e0e57990 776
d6569377 777 rcu_read_lock();
df2c07f4 778 dp = get_dp(ovs_header->dp_ifindex);
f7cd0081 779 err = -ENODEV;
e5cad958
BP
780 if (!dp)
781 goto err_unlock;
cc4015df 782
e9141eec 783 local_bh_disable();
a4af2475 784 err = execute_actions(dp, packet);
e9141eec 785 local_bh_enable();
d6569377 786 rcu_read_unlock();
e0e57990
BP
787
788 flow_put(flow);
e5cad958 789 return err;
064af421 790
e5cad958
BP
791err_unlock:
792 rcu_read_unlock();
e0e57990
BP
793err_flow_put:
794 flow_put(flow);
e5cad958
BP
795err_kfree_skb:
796 kfree_skb(packet);
797err:
d6569377 798 return err;
064af421
BP
799}
800
df2c07f4
JP
801static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
802 [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
803 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
804 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
982b8810
BP
805};
806
807static struct genl_ops dp_packet_genl_ops[] = {
df2c07f4 808 { .cmd = OVS_PACKET_CMD_EXECUTE,
982b8810
BP
809 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
810 .policy = packet_policy,
df2c07f4 811 .doit = ovs_packet_cmd_execute
982b8810
BP
812 }
813};
814
df2c07f4 815static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
064af421 816{
d6569377 817 int i;
20d035b2 818 struct flow_table *table = genl_dereference(dp->table);
f180c2e2 819
3544358a 820 stats->n_flows = flow_tbl_count(table);
064af421 821
7257b535 822 stats->n_hit = stats->n_missed = stats->n_lost = 0;
d6569377
BP
823 for_each_possible_cpu(i) {
824 const struct dp_stats_percpu *percpu_stats;
825 struct dp_stats_percpu local_stats;
821cb9fa 826 unsigned int start;
44e05eca 827
d6569377 828 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
064af421 829
d6569377 830 do {
821cb9fa 831 start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
d6569377 832 local_stats = *percpu_stats;
821cb9fa 833 } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
064af421 834
d6569377
BP
835 stats->n_hit += local_stats.n_hit;
836 stats->n_missed += local_stats.n_missed;
837 stats->n_lost += local_stats.n_lost;
838 }
839}
064af421 840
df2c07f4
JP
841static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
842 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
843 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
844 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
d6569377 845};
36956a7d 846
37a1300c
BP
847static struct genl_family dp_flow_genl_family = {
848 .id = GENL_ID_GENERATE,
df2c07f4
JP
849 .hdrsize = sizeof(struct ovs_header),
850 .name = OVS_FLOW_FAMILY,
69685a88 851 .version = OVS_FLOW_VERSION,
df2c07f4 852 .maxattr = OVS_FLOW_ATTR_MAX
37a1300c 853};
ed099e92 854
37a1300c 855static struct genl_multicast_group dp_flow_multicast_group = {
df2c07f4 856 .name = OVS_FLOW_MCGROUP
37a1300c
BP
857};
858
859/* Called with genl_lock. */
df2c07f4 860static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
6455100f
PS
861 struct sk_buff *skb, u32 pid,
862 u32 seq, u32 flags, u8 cmd)
d6569377 863{
37a1300c 864 const int skb_orig_len = skb->len;
d6569377 865 const struct sw_flow_actions *sf_acts;
df2c07f4
JP
866 struct ovs_flow_stats stats;
867 struct ovs_header *ovs_header;
d6569377
BP
868 struct nlattr *nla;
869 unsigned long used;
870 u8 tcp_flags;
871 int err;
064af421 872
d6569377 873 sf_acts = rcu_dereference_protected(flow->sf_acts,
ed099e92 874 lockdep_genl_is_held());
064af421 875
df2c07f4
JP
876 ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
877 if (!ovs_header)
37a1300c 878 return -EMSGSIZE;
d6569377 879
99769a40 880 ovs_header->dp_ifindex = get_dpifindex(dp);
d6569377 881
df2c07f4 882 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
d6569377
BP
883 if (!nla)
884 goto nla_put_failure;
885 err = flow_to_nlattrs(&flow->key, skb);
886 if (err)
37a1300c 887 goto error;
d6569377
BP
888 nla_nest_end(skb, nla);
889
890 spin_lock_bh(&flow->lock);
891 used = flow->used;
892 stats.n_packets = flow->packet_count;
893 stats.n_bytes = flow->byte_count;
894 tcp_flags = flow->tcp_flags;
895 spin_unlock_bh(&flow->lock);
896
897 if (used)
df2c07f4 898 NLA_PUT_U64(skb, OVS_FLOW_ATTR_USED, flow_used_time(used));
d6569377
BP
899
900 if (stats.n_packets)
6455100f
PS
901 NLA_PUT(skb, OVS_FLOW_ATTR_STATS,
902 sizeof(struct ovs_flow_stats), &stats);
d6569377
BP
903
904 if (tcp_flags)
df2c07f4 905 NLA_PUT_U8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags);
d6569377 906
df2c07f4 907 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
30053024
BP
908 * this is the first flow to be dumped into 'skb'. This is unusual for
909 * Netlink but individual action lists can be longer than
910 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
911 * The userspace caller can always fetch the actions separately if it
912 * really wants them. (Most userspace callers in fact don't care.)
913 *
914 * This can only fail for dump operations because the skb is always
915 * properly sized for single flows.
916 */
df2c07f4 917 err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
30053024
BP
918 sf_acts->actions);
919 if (err < 0 && skb_orig_len)
920 goto error;
37a1300c 921
df2c07f4 922 return genlmsg_end(skb, ovs_header);
d6569377
BP
923
924nla_put_failure:
925 err = -EMSGSIZE;
37a1300c 926error:
df2c07f4 927 genlmsg_cancel(skb, ovs_header);
d6569377 928 return err;
44e05eca
BP
929}
930
df2c07f4 931static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
44e05eca 932{
37a1300c
BP
933 const struct sw_flow_actions *sf_acts;
934 int len;
d6569377 935
37a1300c
BP
936 sf_acts = rcu_dereference_protected(flow->sf_acts,
937 lockdep_genl_is_held());
d6569377 938
6455100f
PS
939 /* OVS_FLOW_ATTR_KEY */
940 len = nla_total_size(FLOW_BUFSIZE);
941 /* OVS_FLOW_ATTR_ACTIONS */
942 len += nla_total_size(sf_acts->actions_len);
943 /* OVS_FLOW_ATTR_STATS */
944 len += nla_total_size(sizeof(struct ovs_flow_stats));
945 /* OVS_FLOW_ATTR_TCP_FLAGS */
946 len += nla_total_size(1);
947 /* OVS_FLOW_ATTR_USED */
948 len += nla_total_size(8);
949
950 len += NLMSG_ALIGN(sizeof(struct ovs_header));
951
952 return genlmsg_new(len, GFP_KERNEL);
37a1300c 953}
8d5ebd83 954
6455100f
PS
955static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
956 struct datapath *dp,
37a1300c
BP
957 u32 pid, u32 seq, u8 cmd)
958{
959 struct sk_buff *skb;
960 int retval;
d6569377 961
df2c07f4 962 skb = ovs_flow_cmd_alloc_info(flow);
37a1300c
BP
963 if (!skb)
964 return ERR_PTR(-ENOMEM);
d6569377 965
df2c07f4 966 retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
37a1300c 967 BUG_ON(retval < 0);
d6569377 968 return skb;
064af421
BP
969}
970
df2c07f4 971static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
064af421 972{
37a1300c 973 struct nlattr **a = info->attrs;
df2c07f4 974 struct ovs_header *ovs_header = info->userhdr;
37a1300c 975 struct sw_flow_key key;
d6569377 976 struct sw_flow *flow;
37a1300c 977 struct sk_buff *reply;
9c52546b 978 struct datapath *dp;
3544358a 979 struct flow_table *table;
bc4a05c6 980 int error;
76abe283 981 int key_len;
064af421 982
37a1300c
BP
983 /* Extract key. */
984 error = -EINVAL;
df2c07f4 985 if (!a[OVS_FLOW_ATTR_KEY])
37a1300c 986 goto error;
df2c07f4 987 error = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
37a1300c
BP
988 if (error)
989 goto error;
064af421 990
37a1300c 991 /* Validate actions. */
df2c07f4 992 if (a[OVS_FLOW_ATTR_ACTIONS]) {
4edb9ae9 993 error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0);
37a1300c
BP
994 if (error)
995 goto error;
df2c07f4 996 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
37a1300c
BP
997 error = -EINVAL;
998 goto error;
999 }
1000
df2c07f4 1001 dp = get_dp(ovs_header->dp_ifindex);
d6569377 1002 error = -ENODEV;
9c52546b 1003 if (!dp)
37a1300c 1004 goto error;
704a1e09 1005
20d035b2 1006 table = genl_dereference(dp->table);
3544358a
PS
1007 flow = flow_tbl_lookup(table, &key, key_len);
1008 if (!flow) {
d6569377
BP
1009 struct sw_flow_actions *acts;
1010
1011 /* Bail out if we're not allowed to create a new flow. */
1012 error = -ENOENT;
df2c07f4 1013 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
37a1300c 1014 goto error;
d6569377
BP
1015
1016 /* Expand table, if necessary, to make room. */
3544358a
PS
1017 if (flow_tbl_need_to_expand(table)) {
1018 struct flow_table *new_table;
1019
1020 new_table = flow_tbl_expand(table);
1021 if (!IS_ERR(new_table)) {
1022 rcu_assign_pointer(dp->table, new_table);
1023 flow_tbl_deferred_destroy(table);
20d035b2 1024 table = genl_dereference(dp->table);
3544358a 1025 }
d6569377
BP
1026 }
1027
1028 /* Allocate flow. */
1029 flow = flow_alloc();
1030 if (IS_ERR(flow)) {
1031 error = PTR_ERR(flow);
37a1300c 1032 goto error;
d6569377 1033 }
37a1300c 1034 flow->key = key;
d6569377
BP
1035 clear_stats(flow);
1036
1037 /* Obtain actions. */
df2c07f4 1038 acts = flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
d6569377
BP
1039 error = PTR_ERR(acts);
1040 if (IS_ERR(acts))
1041 goto error_free_flow;
1042 rcu_assign_pointer(flow->sf_acts, acts);
1043
d6569377 1044 /* Put flow in bucket. */
3544358a
PS
1045 flow->hash = flow_hash(&key, key_len);
1046 flow_tbl_insert(table, flow);
37a1300c 1047
df2c07f4 1048 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
6455100f
PS
1049 info->snd_seq,
1050 OVS_FLOW_CMD_NEW);
d6569377
BP
1051 } else {
1052 /* We found a matching flow. */
1053 struct sw_flow_actions *old_acts;
6455100f 1054 struct nlattr *acts_attrs;
d6569377
BP
1055
1056 /* Bail out if we're not allowed to modify an existing flow.
1057 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1058 * because Generic Netlink treats the latter as a dump
1059 * request. We also accept NLM_F_EXCL in case that bug ever
1060 * gets fixed.
1061 */
1062 error = -EEXIST;
df2c07f4 1063 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
37a1300c
BP
1064 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1065 goto error;
d6569377
BP
1066
1067 /* Update actions. */
d6569377 1068 old_acts = rcu_dereference_protected(flow->sf_acts,
ed099e92 1069 lockdep_genl_is_held());
6455100f
PS
1070 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
1071 if (acts_attrs &&
1072 (old_acts->actions_len != nla_len(acts_attrs) ||
1073 memcmp(old_acts->actions, nla_data(acts_attrs),
1074 old_acts->actions_len))) {
d6569377
BP
1075 struct sw_flow_actions *new_acts;
1076
6455100f 1077 new_acts = flow_actions_alloc(acts_attrs);
d6569377
BP
1078 error = PTR_ERR(new_acts);
1079 if (IS_ERR(new_acts))
37a1300c 1080 goto error;
d6569377
BP
1081
1082 rcu_assign_pointer(flow->sf_acts, new_acts);
1083 flow_deferred_free_acts(old_acts);
1084 }
1085
df2c07f4 1086 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
6455100f 1087 info->snd_seq, OVS_FLOW_CMD_NEW);
d6569377
BP
1088
1089 /* Clear stats. */
df2c07f4 1090 if (a[OVS_FLOW_ATTR_CLEAR]) {
d6569377
BP
1091 spin_lock_bh(&flow->lock);
1092 clear_stats(flow);
1093 spin_unlock_bh(&flow->lock);
1094 }
9c52546b 1095 }
37a1300c
BP
1096
1097 if (!IS_ERR(reply))
1098 genl_notify(reply, genl_info_net(info), info->snd_pid,
6455100f 1099 dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
37a1300c
BP
1100 else
1101 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1102 dp_flow_multicast_group.id, PTR_ERR(reply));
d6569377 1103 return 0;
704a1e09 1104
d6569377
BP
1105error_free_flow:
1106 flow_put(flow);
37a1300c 1107error:
9c52546b 1108 return error;
704a1e09
BP
1109}
1110
df2c07f4 1111static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
704a1e09 1112{
37a1300c 1113 struct nlattr **a = info->attrs;
df2c07f4 1114 struct ovs_header *ovs_header = info->userhdr;
37a1300c 1115 struct sw_flow_key key;
37a1300c 1116 struct sk_buff *reply;
704a1e09 1117 struct sw_flow *flow;
9c52546b 1118 struct datapath *dp;
3544358a 1119 struct flow_table *table;
9c52546b 1120 int err;
76abe283 1121 int key_len;
704a1e09 1122
df2c07f4 1123 if (!a[OVS_FLOW_ATTR_KEY])
37a1300c 1124 return -EINVAL;
df2c07f4 1125 err = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
37a1300c
BP
1126 if (err)
1127 return err;
704a1e09 1128
df2c07f4 1129 dp = get_dp(ovs_header->dp_ifindex);
9c52546b 1130 if (!dp)
ed099e92 1131 return -ENODEV;
704a1e09 1132
20d035b2 1133 table = genl_dereference(dp->table);
3544358a
PS
1134 flow = flow_tbl_lookup(table, &key, key_len);
1135 if (!flow)
ed099e92 1136 return -ENOENT;
d6569377 1137
6455100f
PS
1138 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
1139 info->snd_seq, OVS_FLOW_CMD_NEW);
37a1300c
BP
1140 if (IS_ERR(reply))
1141 return PTR_ERR(reply);
36956a7d 1142
37a1300c 1143 return genlmsg_reply(reply, info);
d6569377 1144}
9c52546b 1145
df2c07f4 1146static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
d6569377 1147{
37a1300c 1148 struct nlattr **a = info->attrs;
df2c07f4 1149 struct ovs_header *ovs_header = info->userhdr;
37a1300c 1150 struct sw_flow_key key;
37a1300c 1151 struct sk_buff *reply;
d6569377 1152 struct sw_flow *flow;
d6569377 1153 struct datapath *dp;
3544358a 1154 struct flow_table *table;
d6569377 1155 int err;
76abe283 1156 int key_len;
36956a7d 1157
df2c07f4
JP
1158 if (!a[OVS_FLOW_ATTR_KEY])
1159 return flush_flows(ovs_header->dp_ifindex);
1160 err = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
37a1300c
BP
1161 if (err)
1162 return err;
d6569377 1163
df2c07f4 1164 dp = get_dp(ovs_header->dp_ifindex);
d6569377 1165 if (!dp)
6455100f 1166 return -ENODEV;
d6569377 1167
20d035b2 1168 table = genl_dereference(dp->table);
3544358a
PS
1169 flow = flow_tbl_lookup(table, &key, key_len);
1170 if (!flow)
37a1300c 1171 return -ENOENT;
d6569377 1172
df2c07f4 1173 reply = ovs_flow_cmd_alloc_info(flow);
37a1300c
BP
1174 if (!reply)
1175 return -ENOMEM;
1176
3544358a 1177 flow_tbl_remove(table, flow);
37a1300c 1178
df2c07f4
JP
1179 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
1180 info->snd_seq, 0, OVS_FLOW_CMD_DEL);
37a1300c
BP
1181 BUG_ON(err < 0);
1182
1183 flow_deferred_free(flow);
1184
1185 genl_notify(reply, genl_info_net(info), info->snd_pid,
1186 dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1187 return 0;
1188}
1189
df2c07f4 1190static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
37a1300c 1191{
df2c07f4 1192 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
37a1300c 1193 struct datapath *dp;
20d035b2 1194 struct flow_table *table;
37a1300c 1195
df2c07f4 1196 dp = get_dp(ovs_header->dp_ifindex);
37a1300c
BP
1197 if (!dp)
1198 return -ENODEV;
1199
20d035b2
JG
1200 table = genl_dereference(dp->table);
1201
37a1300c 1202 for (;;) {
37a1300c
BP
1203 struct sw_flow *flow;
1204 u32 bucket, obj;
1205
1206 bucket = cb->args[0];
1207 obj = cb->args[1];
20d035b2 1208 flow = flow_tbl_next(table, &bucket, &obj);
3544358a 1209 if (!flow)
37a1300c
BP
1210 break;
1211
6455100f
PS
1212 if (ovs_flow_cmd_fill_info(flow, dp, skb,
1213 NETLINK_CB(cb->skb).pid,
37a1300c 1214 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1215 OVS_FLOW_CMD_NEW) < 0)
37a1300c
BP
1216 break;
1217
1218 cb->args[0] = bucket;
1219 cb->args[1] = obj;
1220 }
1221 return skb->len;
704a1e09
BP
1222}
1223
37a1300c 1224static struct genl_ops dp_flow_genl_ops[] = {
df2c07f4 1225 { .cmd = OVS_FLOW_CMD_NEW,
37a1300c
BP
1226 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1227 .policy = flow_policy,
df2c07f4 1228 .doit = ovs_flow_cmd_new_or_set
37a1300c 1229 },
df2c07f4 1230 { .cmd = OVS_FLOW_CMD_DEL,
37a1300c
BP
1231 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1232 .policy = flow_policy,
df2c07f4 1233 .doit = ovs_flow_cmd_del
37a1300c 1234 },
df2c07f4 1235 { .cmd = OVS_FLOW_CMD_GET,
37a1300c
BP
1236 .flags = 0, /* OK for unprivileged users. */
1237 .policy = flow_policy,
df2c07f4
JP
1238 .doit = ovs_flow_cmd_get,
1239 .dumpit = ovs_flow_cmd_dump
37a1300c 1240 },
df2c07f4 1241 { .cmd = OVS_FLOW_CMD_SET,
37a1300c
BP
1242 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1243 .policy = flow_policy,
df2c07f4 1244 .doit = ovs_flow_cmd_new_or_set,
37a1300c
BP
1245 },
1246};
1247
df2c07f4 1248static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
aaff4b55 1249#ifdef HAVE_NLA_NUL_STRING
df2c07f4 1250 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
aaff4b55 1251#endif
b063d9f0 1252 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
d6569377
BP
1253};
1254
aaff4b55
BP
1255static struct genl_family dp_datapath_genl_family = {
1256 .id = GENL_ID_GENERATE,
df2c07f4
JP
1257 .hdrsize = sizeof(struct ovs_header),
1258 .name = OVS_DATAPATH_FAMILY,
69685a88 1259 .version = OVS_DATAPATH_VERSION,
df2c07f4 1260 .maxattr = OVS_DP_ATTR_MAX
aaff4b55
BP
1261};
1262
1263static struct genl_multicast_group dp_datapath_multicast_group = {
df2c07f4 1264 .name = OVS_DATAPATH_MCGROUP
aaff4b55
BP
1265};
1266
df2c07f4 1267static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
aaff4b55 1268 u32 pid, u32 seq, u32 flags, u8 cmd)
064af421 1269{
df2c07f4 1270 struct ovs_header *ovs_header;
e926dfe3 1271 struct ovs_dp_stats dp_stats;
064af421
BP
1272 int err;
1273
df2c07f4 1274 ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
aaff4b55 1275 flags, cmd);
df2c07f4 1276 if (!ovs_header)
aaff4b55 1277 goto error;
064af421 1278
b063d9f0 1279 ovs_header->dp_ifindex = get_dpifindex(dp);
064af421 1280
d6569377 1281 rcu_read_lock();
df2c07f4 1282 err = nla_put_string(skb, OVS_DP_ATTR_NAME, dp_name(dp));
d6569377 1283 rcu_read_unlock();
064af421 1284 if (err)
d6569377 1285 goto nla_put_failure;
064af421 1286
e926dfe3
JG
1287 get_dp_stats(dp, &dp_stats);
1288 NLA_PUT(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats);
d6569377 1289
df2c07f4 1290 return genlmsg_end(skb, ovs_header);
d6569377
BP
1291
1292nla_put_failure:
df2c07f4 1293 genlmsg_cancel(skb, ovs_header);
aaff4b55
BP
1294error:
1295 return -EMSGSIZE;
d6569377
BP
1296}
1297
df2c07f4 1298static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
aaff4b55 1299 u32 seq, u8 cmd)
d6569377 1300{
d6569377 1301 struct sk_buff *skb;
aaff4b55 1302 int retval;
d6569377 1303
aaff4b55 1304 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
064af421 1305 if (!skb)
d6569377 1306 return ERR_PTR(-ENOMEM);
659586ef 1307
df2c07f4 1308 retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
aaff4b55
BP
1309 if (retval < 0) {
1310 kfree_skb(skb);
1311 return ERR_PTR(retval);
1312 }
1313 return skb;
1314}
9dca7bd5 1315
df2c07f4 1316static int ovs_dp_cmd_validate(struct nlattr *a[OVS_DP_ATTR_MAX + 1])
aaff4b55 1317{
df2c07f4 1318 return CHECK_NUL_STRING(a[OVS_DP_ATTR_NAME], IFNAMSIZ - 1);
d6569377
BP
1319}
1320
ed099e92 1321/* Called with genl_mutex and optionally with RTNL lock also. */
6455100f
PS
1322static struct datapath *lookup_datapath(struct ovs_header *ovs_header,
1323 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
d6569377 1324{
254f2dc8
BP
1325 struct datapath *dp;
1326
df2c07f4
JP
1327 if (!a[OVS_DP_ATTR_NAME])
1328 dp = get_dp(ovs_header->dp_ifindex);
254f2dc8 1329 else {
d6569377 1330 struct vport *vport;
d6569377 1331
057dd6d2 1332 rcu_read_lock();
df2c07f4
JP
1333 vport = vport_locate(nla_data(a[OVS_DP_ATTR_NAME]));
1334 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
057dd6d2 1335 rcu_read_unlock();
d6569377 1336 }
254f2dc8 1337 return dp ? dp : ERR_PTR(-ENODEV);
d6569377
BP
1338}
1339
df2c07f4 1340static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
d6569377 1341{
aaff4b55 1342 struct nlattr **a = info->attrs;
d6569377 1343 struct vport_parms parms;
aaff4b55 1344 struct sk_buff *reply;
d6569377
BP
1345 struct datapath *dp;
1346 struct vport *vport;
d6569377 1347 int err;
d6569377 1348
d6569377 1349 err = -EINVAL;
ea36840f 1350 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
aaff4b55
BP
1351 goto err;
1352
df2c07f4 1353 err = ovs_dp_cmd_validate(a);
aaff4b55
BP
1354 if (err)
1355 goto err;
d6569377
BP
1356
1357 rtnl_lock();
d6569377
BP
1358 err = -ENODEV;
1359 if (!try_module_get(THIS_MODULE))
ed099e92 1360 goto err_unlock_rtnl;
d6569377 1361
d6569377
BP
1362 err = -ENOMEM;
1363 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1364 if (dp == NULL)
1365 goto err_put_module;
1366 INIT_LIST_HEAD(&dp->port_list);
d6569377
BP
1367
1368 /* Initialize kobject for bridge. This will be added as
1369 * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1370 dp->ifobj.kset = NULL;
1371 kobject_init(&dp->ifobj, &dp_ktype);
1372
1373 /* Allocate table. */
1374 err = -ENOMEM;
3544358a 1375 rcu_assign_pointer(dp->table, flow_tbl_alloc(TBL_MIN_BUCKETS));
d6569377
BP
1376 if (!dp->table)
1377 goto err_free_dp;
1378
99769a40
JG
1379 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1380 if (!dp->stats_percpu) {
1381 err = -ENOMEM;
1382 goto err_destroy_table;
1383 }
1384
d6569377 1385 /* Set up our datapath device. */
df2c07f4
JP
1386 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1387 parms.type = OVS_VPORT_TYPE_INTERNAL;
d6569377
BP
1388 parms.options = NULL;
1389 parms.dp = dp;
df2c07f4 1390 parms.port_no = OVSP_LOCAL;
ea36840f 1391 parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
b063d9f0 1392
d6569377
BP
1393 vport = new_vport(&parms);
1394 if (IS_ERR(vport)) {
1395 err = PTR_ERR(vport);
1396 if (err == -EBUSY)
1397 err = -EEXIST;
1398
99769a40 1399 goto err_destroy_percpu;
d6569377 1400 }
d6569377 1401
6455100f
PS
1402 reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
1403 info->snd_seq, OVS_DP_CMD_NEW);
aaff4b55
BP
1404 err = PTR_ERR(reply);
1405 if (IS_ERR(reply))
1406 goto err_destroy_local_port;
1407
254f2dc8 1408 list_add_tail(&dp->list_node, &dps);
d6569377
BP
1409 dp_sysfs_add_dp(dp);
1410
d6569377
BP
1411 rtnl_unlock();
1412
aaff4b55
BP
1413 genl_notify(reply, genl_info_net(info), info->snd_pid,
1414 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
d6569377
BP
1415 return 0;
1416
1417err_destroy_local_port:
cbbf4e1e 1418 dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL]));
99769a40
JG
1419err_destroy_percpu:
1420 free_percpu(dp->stats_percpu);
d6569377 1421err_destroy_table:
20d035b2 1422 flow_tbl_destroy(genl_dereference(dp->table));
d6569377 1423err_free_dp:
d6569377
BP
1424 kfree(dp);
1425err_put_module:
1426 module_put(THIS_MODULE);
ed099e92 1427err_unlock_rtnl:
d6569377 1428 rtnl_unlock();
d6569377 1429err:
064af421
BP
1430 return err;
1431}
1432
df2c07f4 1433static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
44e05eca 1434{
ed099e92 1435 struct vport *vport, *next_vport;
aaff4b55 1436 struct sk_buff *reply;
9c52546b 1437 struct datapath *dp;
d6569377 1438 int err;
44e05eca 1439
df2c07f4 1440 err = ovs_dp_cmd_validate(info->attrs);
aaff4b55 1441 if (err)
d6569377 1442 goto exit;
44e05eca 1443
d6569377 1444 rtnl_lock();
aaff4b55 1445 dp = lookup_datapath(info->userhdr, info->attrs);
d6569377
BP
1446 err = PTR_ERR(dp);
1447 if (IS_ERR(dp))
aaff4b55
BP
1448 goto exit_unlock;
1449
6455100f
PS
1450 reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
1451 info->snd_seq, OVS_DP_CMD_DEL);
aaff4b55
BP
1452 err = PTR_ERR(reply);
1453 if (IS_ERR(reply))
1454 goto exit_unlock;
9c52546b 1455
6455100f 1456 list_for_each_entry_safe(vport, next_vport, &dp->port_list, node)
df2c07f4 1457 if (vport->port_no != OVSP_LOCAL)
ed099e92
BP
1458 dp_detach_port(vport);
1459
1460 dp_sysfs_del_dp(dp);
254f2dc8 1461 list_del(&dp->list_node);
cbbf4e1e 1462 dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL]));
ed099e92 1463
99620d2c
JG
1464 /* rtnl_unlock() will wait until all the references to devices that
1465 * are pending unregistration have been dropped. We do it here to
1466 * ensure that any internal devices (which contain DP pointers) are
1467 * fully destroyed before freeing the datapath.
1468 */
1469 rtnl_unlock();
1470
ed099e92
BP
1471 call_rcu(&dp->rcu, destroy_dp_rcu);
1472 module_put(THIS_MODULE);
1473
aaff4b55
BP
1474 genl_notify(reply, genl_info_net(info), info->snd_pid,
1475 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
99620d2c
JG
1476
1477 return 0;
d6569377 1478
aaff4b55 1479exit_unlock:
d6569377
BP
1480 rtnl_unlock();
1481exit:
1482 return err;
44e05eca
BP
1483}
1484
df2c07f4 1485static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
064af421 1486{
aaff4b55 1487 struct sk_buff *reply;
d6569377 1488 struct datapath *dp;
d6569377 1489 int err;
064af421 1490
df2c07f4 1491 err = ovs_dp_cmd_validate(info->attrs);
aaff4b55
BP
1492 if (err)
1493 return err;
38c6ecbc 1494
aaff4b55 1495 dp = lookup_datapath(info->userhdr, info->attrs);
d6569377 1496 if (IS_ERR(dp))
aaff4b55 1497 return PTR_ERR(dp);
38c6ecbc 1498
6455100f
PS
1499 reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
1500 info->snd_seq, OVS_DP_CMD_NEW);
aaff4b55
BP
1501 if (IS_ERR(reply)) {
1502 err = PTR_ERR(reply);
1503 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1504 dp_datapath_multicast_group.id, err);
1505 return 0;
1506 }
1507
1508 genl_notify(reply, genl_info_net(info), info->snd_pid,
1509 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1510 return 0;
064af421
BP
1511}
1512
df2c07f4 1513static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1dcf111b 1514{
aaff4b55 1515 struct sk_buff *reply;
d6569377 1516 struct datapath *dp;
d6569377 1517 int err;
1dcf111b 1518
df2c07f4 1519 err = ovs_dp_cmd_validate(info->attrs);
aaff4b55
BP
1520 if (err)
1521 return err;
1dcf111b 1522
aaff4b55 1523 dp = lookup_datapath(info->userhdr, info->attrs);
d6569377 1524 if (IS_ERR(dp))
aaff4b55 1525 return PTR_ERR(dp);
1dcf111b 1526
6455100f
PS
1527 reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
1528 info->snd_seq, OVS_DP_CMD_NEW);
aaff4b55
BP
1529 if (IS_ERR(reply))
1530 return PTR_ERR(reply);
1531
1532 return genlmsg_reply(reply, info);
1dcf111b
JP
1533}
1534
df2c07f4 1535static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
a7786963 1536{
254f2dc8
BP
1537 struct datapath *dp;
1538 int skip = cb->args[0];
1539 int i = 0;
a7786963 1540
6455100f 1541 list_for_each_entry(dp, &dps, list_node) {
254f2dc8 1542 if (i < skip)
d6569377 1543 continue;
df2c07f4 1544 if (ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
aaff4b55 1545 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1546 OVS_DP_CMD_NEW) < 0)
aaff4b55 1547 break;
254f2dc8 1548 i++;
a7786963 1549 }
aaff4b55 1550
254f2dc8
BP
1551 cb->args[0] = i;
1552
aaff4b55 1553 return skb->len;
c19e6535
BP
1554}
1555
aaff4b55 1556static struct genl_ops dp_datapath_genl_ops[] = {
df2c07f4 1557 { .cmd = OVS_DP_CMD_NEW,
aaff4b55
BP
1558 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1559 .policy = datapath_policy,
df2c07f4 1560 .doit = ovs_dp_cmd_new
aaff4b55 1561 },
df2c07f4 1562 { .cmd = OVS_DP_CMD_DEL,
aaff4b55
BP
1563 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1564 .policy = datapath_policy,
df2c07f4 1565 .doit = ovs_dp_cmd_del
aaff4b55 1566 },
df2c07f4 1567 { .cmd = OVS_DP_CMD_GET,
aaff4b55
BP
1568 .flags = 0, /* OK for unprivileged users. */
1569 .policy = datapath_policy,
df2c07f4
JP
1570 .doit = ovs_dp_cmd_get,
1571 .dumpit = ovs_dp_cmd_dump
aaff4b55 1572 },
df2c07f4 1573 { .cmd = OVS_DP_CMD_SET,
aaff4b55
BP
1574 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1575 .policy = datapath_policy,
df2c07f4 1576 .doit = ovs_dp_cmd_set,
aaff4b55
BP
1577 },
1578};
1579
df2c07f4 1580static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
f0fef760 1581#ifdef HAVE_NLA_NUL_STRING
df2c07f4 1582 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
f613a0d7 1583 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
df2c07f4 1584 [OVS_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN },
f0fef760 1585#else
f613a0d7 1586 [OVS_VPORT_ATTR_STATS] = { .minlen = sizeof(struct ovs_vport_stats) },
df2c07f4 1587 [OVS_VPORT_ATTR_ADDRESS] = { .minlen = ETH_ALEN },
f0fef760 1588#endif
d48c88ec
JG
1589 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1590 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
b063d9f0 1591 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
df2c07f4 1592 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
c19e6535
BP
1593};
1594
f0fef760
BP
1595static struct genl_family dp_vport_genl_family = {
1596 .id = GENL_ID_GENERATE,
df2c07f4
JP
1597 .hdrsize = sizeof(struct ovs_header),
1598 .name = OVS_VPORT_FAMILY,
69685a88 1599 .version = OVS_VPORT_VERSION,
df2c07f4 1600 .maxattr = OVS_VPORT_ATTR_MAX
f0fef760
BP
1601};
1602
f14d8083 1603struct genl_multicast_group dp_vport_multicast_group = {
df2c07f4 1604 .name = OVS_VPORT_MCGROUP
f0fef760
BP
1605};
1606
1607/* Called with RTNL lock or RCU read lock. */
df2c07f4 1608static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
f0fef760 1609 u32 pid, u32 seq, u32 flags, u8 cmd)
064af421 1610{
df2c07f4 1611 struct ovs_header *ovs_header;
e926dfe3 1612 struct ovs_vport_stats vport_stats;
c19e6535
BP
1613 int err;
1614
df2c07f4 1615 ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
f0fef760 1616 flags, cmd);
df2c07f4 1617 if (!ovs_header)
f0fef760 1618 return -EMSGSIZE;
c19e6535 1619
99769a40 1620 ovs_header->dp_ifindex = get_dpifindex(vport->dp);
c19e6535 1621
df2c07f4 1622 NLA_PUT_U32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
16b82e84
JG
1623 NLA_PUT_U32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type);
1624 NLA_PUT_STRING(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport));
b063d9f0 1625 NLA_PUT_U32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid);
c19e6535 1626
e926dfe3
JG
1627 vport_get_stats(vport, &vport_stats);
1628 NLA_PUT(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1629 &vport_stats);
c19e6535 1630
16b82e84
JG
1631 NLA_PUT(skb, OVS_VPORT_ATTR_ADDRESS, ETH_ALEN,
1632 vport->ops->get_addr(vport));
c19e6535 1633
c19e6535 1634 err = vport_get_options(vport, skb);
f0fef760
BP
1635 if (err == -EMSGSIZE)
1636 goto error;
c19e6535 1637
df2c07f4 1638 return genlmsg_end(skb, ovs_header);
c19e6535
BP
1639
1640nla_put_failure:
1641 err = -EMSGSIZE;
f0fef760 1642error:
df2c07f4 1643 genlmsg_cancel(skb, ovs_header);
f0fef760 1644 return err;
064af421
BP
1645}
1646
f0fef760 1647/* Called with RTNL lock or RCU read lock. */
f14d8083
EJ
1648struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
1649 u32 seq, u8 cmd)
064af421 1650{
c19e6535 1651 struct sk_buff *skb;
f0fef760 1652 int retval;
c19e6535 1653
f0fef760 1654 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
c19e6535
BP
1655 if (!skb)
1656 return ERR_PTR(-ENOMEM);
1657
df2c07f4 1658 retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
f0fef760
BP
1659 if (retval < 0) {
1660 kfree_skb(skb);
1661 return ERR_PTR(retval);
1662 }
c19e6535 1663 return skb;
f0fef760 1664}
c19e6535 1665
df2c07f4 1666static int ovs_vport_cmd_validate(struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
f0fef760 1667{
df2c07f4 1668 return CHECK_NUL_STRING(a[OVS_VPORT_ATTR_NAME], IFNAMSIZ - 1);
c19e6535 1669}
51d4d598 1670
ed099e92 1671/* Called with RTNL lock or RCU read lock. */
df2c07f4
JP
1672static struct vport *lookup_vport(struct ovs_header *ovs_header,
1673 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
c19e6535
BP
1674{
1675 struct datapath *dp;
1676 struct vport *vport;
1677
df2c07f4
JP
1678 if (a[OVS_VPORT_ATTR_NAME]) {
1679 vport = vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME]));
ed099e92 1680 if (!vport)
c19e6535 1681 return ERR_PTR(-ENODEV);
c19e6535 1682 return vport;
df2c07f4
JP
1683 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1684 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
c19e6535
BP
1685
1686 if (port_no >= DP_MAX_PORTS)
f0fef760 1687 return ERR_PTR(-EFBIG);
c19e6535 1688
df2c07f4 1689 dp = get_dp(ovs_header->dp_ifindex);
c19e6535
BP
1690 if (!dp)
1691 return ERR_PTR(-ENODEV);
f2459fe7 1692
cbbf4e1e 1693 vport = rcu_dereference_rtnl(dp->ports[port_no]);
ed099e92 1694 if (!vport)
c19e6535 1695 return ERR_PTR(-ENOENT);
c19e6535
BP
1696 return vport;
1697 } else
1698 return ERR_PTR(-EINVAL);
064af421
BP
1699}
1700
ed099e92 1701/* Called with RTNL lock. */
6455100f
PS
1702static int change_vport(struct vport *vport,
1703 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
064af421 1704{
c19e6535 1705 int err = 0;
f613a0d7 1706
df2c07f4 1707 if (a[OVS_VPORT_ATTR_STATS])
f613a0d7
PS
1708 vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
1709
1710 if (a[OVS_VPORT_ATTR_ADDRESS])
df2c07f4 1711 err = vport_set_addr(vport, nla_data(a[OVS_VPORT_ATTR_ADDRESS]));
f613a0d7 1712
c19e6535
BP
1713 return err;
1714}
1715
df2c07f4 1716static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
c19e6535 1717{
f0fef760 1718 struct nlattr **a = info->attrs;
df2c07f4 1719 struct ovs_header *ovs_header = info->userhdr;
c19e6535 1720 struct vport_parms parms;
ed099e92 1721 struct sk_buff *reply;
c19e6535 1722 struct vport *vport;
c19e6535 1723 struct datapath *dp;
b0ec0f27 1724 u32 port_no;
c19e6535 1725 int err;
b0ec0f27 1726
c19e6535 1727 err = -EINVAL;
ea36840f
BP
1728 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1729 !a[OVS_VPORT_ATTR_UPCALL_PID])
f0fef760
BP
1730 goto exit;
1731
df2c07f4 1732 err = ovs_vport_cmd_validate(a);
f0fef760
BP
1733 if (err)
1734 goto exit;
51d4d598 1735
c19e6535 1736 rtnl_lock();
df2c07f4 1737 dp = get_dp(ovs_header->dp_ifindex);
c19e6535
BP
1738 err = -ENODEV;
1739 if (!dp)
ed099e92 1740 goto exit_unlock;
c19e6535 1741
df2c07f4
JP
1742 if (a[OVS_VPORT_ATTR_PORT_NO]) {
1743 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
c19e6535
BP
1744
1745 err = -EFBIG;
1746 if (port_no >= DP_MAX_PORTS)
ed099e92 1747 goto exit_unlock;
c19e6535 1748
cbbf4e1e 1749 vport = rtnl_dereference(dp->ports[port_no]);
c19e6535
BP
1750 err = -EBUSY;
1751 if (vport)
ed099e92 1752 goto exit_unlock;
c19e6535
BP
1753 } else {
1754 for (port_no = 1; ; port_no++) {
1755 if (port_no >= DP_MAX_PORTS) {
1756 err = -EFBIG;
ed099e92 1757 goto exit_unlock;
c19e6535 1758 }
cbbf4e1e 1759 vport = rtnl_dereference(dp->ports[port_no]);
c19e6535
BP
1760 if (!vport)
1761 break;
51d4d598 1762 }
064af421 1763 }
b0ec0f27 1764
df2c07f4
JP
1765 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1766 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1767 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
c19e6535
BP
1768 parms.dp = dp;
1769 parms.port_no = port_no;
ea36840f 1770 parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
c19e6535
BP
1771
1772 vport = new_vport(&parms);
1773 err = PTR_ERR(vport);
1774 if (IS_ERR(vport))
ed099e92 1775 goto exit_unlock;
c19e6535 1776
6455100f 1777 dp_sysfs_add_if(vport);
c19e6535
BP
1778
1779 err = change_vport(vport, a);
f0fef760 1780 if (!err) {
df2c07f4 1781 reply = ovs_vport_cmd_build_info(vport, info->snd_pid,
6455100f
PS
1782 info->snd_seq,
1783 OVS_VPORT_CMD_NEW);
f0fef760
BP
1784 if (IS_ERR(reply))
1785 err = PTR_ERR(reply);
1786 }
c19e6535
BP
1787 if (err) {
1788 dp_detach_port(vport);
ed099e92 1789 goto exit_unlock;
c19e6535 1790 }
f0fef760
BP
1791 genl_notify(reply, genl_info_net(info), info->snd_pid,
1792 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
c19e6535 1793
c19e6535 1794
ed099e92 1795exit_unlock:
c19e6535 1796 rtnl_unlock();
c19e6535
BP
1797exit:
1798 return err;
44e05eca
BP
1799}
1800
df2c07f4 1801static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
44e05eca 1802{
f0fef760
BP
1803 struct nlattr **a = info->attrs;
1804 struct sk_buff *reply;
c19e6535 1805 struct vport *vport;
c19e6535 1806 int err;
44e05eca 1807
df2c07f4 1808 err = ovs_vport_cmd_validate(a);
f0fef760 1809 if (err)
c19e6535
BP
1810 goto exit;
1811
1812 rtnl_lock();
f0fef760 1813 vport = lookup_vport(info->userhdr, a);
c19e6535
BP
1814 err = PTR_ERR(vport);
1815 if (IS_ERR(vport))
f0fef760 1816 goto exit_unlock;
44e05eca 1817
c19e6535 1818 err = 0;
6455100f 1819 if (a[OVS_VPORT_ATTR_TYPE] &&
16b82e84 1820 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type)
4879d4c7 1821 err = -EINVAL;
6455100f 1822
4879d4c7 1823 if (!err && a[OVS_VPORT_ATTR_OPTIONS])
df2c07f4 1824 err = vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
c19e6535
BP
1825 if (!err)
1826 err = change_vport(vport, a);
b063d9f0
JG
1827 if (!err && a[OVS_VPORT_ATTR_UPCALL_PID])
1828 vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
c19e6535 1829
df2c07f4
JP
1830 reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1831 OVS_VPORT_CMD_NEW);
f0fef760
BP
1832 if (IS_ERR(reply)) {
1833 err = PTR_ERR(reply);
1834 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1835 dp_vport_multicast_group.id, err);
1836 return 0;
1837 }
1838
1839 genl_notify(reply, genl_info_net(info), info->snd_pid,
1840 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1841
1842exit_unlock:
c19e6535
BP
1843 rtnl_unlock();
1844exit:
1845 return err;
064af421
BP
1846}
1847
df2c07f4 1848static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1849{
f0fef760
BP
1850 struct nlattr **a = info->attrs;
1851 struct sk_buff *reply;
c19e6535 1852 struct vport *vport;
c19e6535
BP
1853 int err;
1854
df2c07f4 1855 err = ovs_vport_cmd_validate(a);
f0fef760 1856 if (err)
c19e6535
BP
1857 goto exit;
1858
1859 rtnl_lock();
f0fef760 1860 vport = lookup_vport(info->userhdr, a);
c19e6535 1861 err = PTR_ERR(vport);
f0fef760
BP
1862 if (IS_ERR(vport))
1863 goto exit_unlock;
c19e6535 1864
df2c07f4 1865 if (vport->port_no == OVSP_LOCAL) {
f0fef760
BP
1866 err = -EINVAL;
1867 goto exit_unlock;
1868 }
1869
df2c07f4
JP
1870 reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1871 OVS_VPORT_CMD_DEL);
f0fef760
BP
1872 err = PTR_ERR(reply);
1873 if (IS_ERR(reply))
1874 goto exit_unlock;
1875
3544358a 1876 dp_detach_port(vport);
f0fef760
BP
1877
1878 genl_notify(reply, genl_info_net(info), info->snd_pid,
1879 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1880
1881exit_unlock:
c19e6535
BP
1882 rtnl_unlock();
1883exit:
1884 return err;
7c40efc9
BP
1885}
1886
df2c07f4 1887static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1888{
f0fef760 1889 struct nlattr **a = info->attrs;
df2c07f4 1890 struct ovs_header *ovs_header = info->userhdr;
ed099e92 1891 struct sk_buff *reply;
c19e6535 1892 struct vport *vport;
c19e6535
BP
1893 int err;
1894
df2c07f4 1895 err = ovs_vport_cmd_validate(a);
f0fef760
BP
1896 if (err)
1897 goto exit;
c19e6535 1898
ed099e92 1899 rcu_read_lock();
df2c07f4 1900 vport = lookup_vport(ovs_header, a);
c19e6535
BP
1901 err = PTR_ERR(vport);
1902 if (IS_ERR(vport))
f0fef760 1903 goto exit_unlock;
c19e6535 1904
df2c07f4
JP
1905 reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1906 OVS_VPORT_CMD_NEW);
ed099e92
BP
1907 err = PTR_ERR(reply);
1908 if (IS_ERR(reply))
f0fef760 1909 goto exit_unlock;
ed099e92 1910
df2fa9b5
JG
1911 rcu_read_unlock();
1912
1913 return genlmsg_reply(reply, info);
ed099e92 1914
f0fef760 1915exit_unlock:
ed099e92 1916 rcu_read_unlock();
f0fef760 1917exit:
c19e6535
BP
1918 return err;
1919}
1920
df2c07f4 1921static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
c19e6535 1922{
df2c07f4 1923 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
c19e6535
BP
1924 struct datapath *dp;
1925 u32 port_no;
f0fef760 1926 int retval;
c19e6535 1927
df2c07f4 1928 dp = get_dp(ovs_header->dp_ifindex);
c19e6535 1929 if (!dp)
f0fef760 1930 return -ENODEV;
ed099e92
BP
1931
1932 rcu_read_lock();
f0fef760 1933 for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
ed099e92 1934 struct vport *vport;
ed099e92 1935
cbbf4e1e 1936 vport = rcu_dereference(dp->ports[port_no]);
ed099e92
BP
1937 if (!vport)
1938 continue;
1939
df2c07f4 1940 if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
f0fef760 1941 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1942 OVS_VPORT_CMD_NEW) < 0)
f0fef760 1943 break;
c19e6535 1944 }
ed099e92 1945 rcu_read_unlock();
c19e6535 1946
f0fef760
BP
1947 cb->args[0] = port_no;
1948 retval = skb->len;
1949
1950 return retval;
7c40efc9
BP
1951}
1952
f0fef760 1953static struct genl_ops dp_vport_genl_ops[] = {
df2c07f4 1954 { .cmd = OVS_VPORT_CMD_NEW,
f0fef760
BP
1955 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1956 .policy = vport_policy,
df2c07f4 1957 .doit = ovs_vport_cmd_new
f0fef760 1958 },
df2c07f4 1959 { .cmd = OVS_VPORT_CMD_DEL,
f0fef760
BP
1960 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1961 .policy = vport_policy,
df2c07f4 1962 .doit = ovs_vport_cmd_del
f0fef760 1963 },
df2c07f4 1964 { .cmd = OVS_VPORT_CMD_GET,
f0fef760
BP
1965 .flags = 0, /* OK for unprivileged users. */
1966 .policy = vport_policy,
df2c07f4
JP
1967 .doit = ovs_vport_cmd_get,
1968 .dumpit = ovs_vport_cmd_dump
f0fef760 1969 },
df2c07f4 1970 { .cmd = OVS_VPORT_CMD_SET,
f0fef760
BP
1971 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1972 .policy = vport_policy,
df2c07f4 1973 .doit = ovs_vport_cmd_set,
f0fef760
BP
1974 },
1975};
1976
982b8810
BP
1977struct genl_family_and_ops {
1978 struct genl_family *family;
1979 struct genl_ops *ops;
1980 int n_ops;
1981 struct genl_multicast_group *group;
1982};
ed099e92 1983
982b8810 1984static const struct genl_family_and_ops dp_genl_families[] = {
aaff4b55
BP
1985 { &dp_datapath_genl_family,
1986 dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1987 &dp_datapath_multicast_group },
f0fef760
BP
1988 { &dp_vport_genl_family,
1989 dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1990 &dp_vport_multicast_group },
37a1300c
BP
1991 { &dp_flow_genl_family,
1992 dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1993 &dp_flow_multicast_group },
982b8810
BP
1994 { &dp_packet_genl_family,
1995 dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1996 NULL },
1997};
ed099e92 1998
982b8810
BP
1999static void dp_unregister_genl(int n_families)
2000{
2001 int i;
ed099e92 2002
b867ca75 2003 for (i = 0; i < n_families; i++)
982b8810 2004 genl_unregister_family(dp_genl_families[i].family);
ed099e92
BP
2005}
2006
982b8810 2007static int dp_register_genl(void)
064af421 2008{
982b8810
BP
2009 int n_registered;
2010 int err;
2011 int i;
064af421 2012
982b8810
BP
2013 n_registered = 0;
2014 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2015 const struct genl_family_and_ops *f = &dp_genl_families[i];
064af421 2016
982b8810
BP
2017 err = genl_register_family_with_ops(f->family, f->ops,
2018 f->n_ops);
2019 if (err)
2020 goto error;
2021 n_registered++;
e22d4953 2022
982b8810
BP
2023 if (f->group) {
2024 err = genl_register_mc_group(f->family, f->group);
2025 if (err)
2026 goto error;
2027 }
2028 }
9cc8b4e4 2029
982b8810 2030 return 0;
064af421
BP
2031
2032error:
982b8810
BP
2033 dp_unregister_genl(n_registered);
2034 return err;
064af421
BP
2035}
2036
22d24ebf
BP
2037static int __init dp_init(void)
2038{
f2459fe7 2039 struct sk_buff *dummy_skb;
22d24ebf
BP
2040 int err;
2041
f2459fe7 2042 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
22d24ebf 2043
dc5f3fef 2044 pr_info("Open vSwitch switching datapath %s, built "__DATE__" "__TIME__"\n",
6455100f 2045 VERSION BUILDNR);
064af421 2046
3544358a 2047 err = tnl_init();
064af421
BP
2048 if (err)
2049 goto error;
2050
3544358a
PS
2051 err = flow_init();
2052 if (err)
2053 goto error_tnl_exit;
2054
f2459fe7 2055 err = vport_init();
064af421
BP
2056 if (err)
2057 goto error_flow_exit;
2058
f2459fe7
JG
2059 err = register_netdevice_notifier(&dp_device_notifier);
2060 if (err)
2061 goto error_vport_exit;
2062
982b8810
BP
2063 err = dp_register_genl();
2064 if (err < 0)
37a1300c 2065 goto error_unreg_notifier;
982b8810 2066
064af421
BP
2067 return 0;
2068
2069error_unreg_notifier:
2070 unregister_netdevice_notifier(&dp_device_notifier);
f2459fe7
JG
2071error_vport_exit:
2072 vport_exit();
064af421
BP
2073error_flow_exit:
2074 flow_exit();
3544358a
PS
2075error_tnl_exit:
2076 tnl_exit();
064af421
BP
2077error:
2078 return err;
2079}
2080
2081static void dp_cleanup(void)
2082{
2083 rcu_barrier();
982b8810 2084 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
064af421 2085 unregister_netdevice_notifier(&dp_device_notifier);
f2459fe7 2086 vport_exit();
064af421 2087 flow_exit();
3544358a 2088 tnl_exit();
064af421
BP
2089}
2090
2091module_init(dp_init);
2092module_exit(dp_cleanup);
2093
2094MODULE_DESCRIPTION("Open vSwitch switching datapath");
2095MODULE_LICENSE("GPL");