]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/datapath.c
man: pic failed to run during manpage-check
[mirror_ovs.git] / datapath / datapath.c
CommitLineData
064af421 1/*
f632c8fc 2 * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
a14bc59f
BP
3 * Distributed under the terms of the GNU GPL version 2.
4 *
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
064af421
BP
7 */
8
9/* Functions for managing the dp interface/device. */
10
dfffaef1
JP
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
064af421
BP
13#include <linux/init.h>
14#include <linux/module.h>
064af421 15#include <linux/if_arp.h>
064af421
BP
16#include <linux/if_vlan.h>
17#include <linux/in.h>
18#include <linux/ip.h>
982b8810 19#include <linux/jhash.h>
064af421
BP
20#include <linux/delay.h>
21#include <linux/time.h>
22#include <linux/etherdevice.h>
ed099e92 23#include <linux/genetlink.h>
064af421
BP
24#include <linux/kernel.h>
25#include <linux/kthread.h>
064af421
BP
26#include <linux/mutex.h>
27#include <linux/percpu.h>
28#include <linux/rcupdate.h>
29#include <linux/tcp.h>
30#include <linux/udp.h>
31#include <linux/version.h>
32#include <linux/ethtool.h>
064af421
BP
33#include <linux/wait.h>
34#include <asm/system.h>
35#include <asm/div64.h>
36#include <asm/bug.h>
656a0e37 37#include <linux/highmem.h>
064af421
BP
38#include <linux/netfilter_bridge.h>
39#include <linux/netfilter_ipv4.h>
40#include <linux/inetdevice.h>
41#include <linux/list.h>
42#include <linux/rculist.h>
064af421 43#include <linux/dmi.h>
3c5f6de3 44#include <net/inet_ecn.h>
36956a7d 45#include <net/genetlink.h>
064af421
BP
46
47#include "openvswitch/datapath-protocol.h"
dd8d6b8c 48#include "checksum.h"
064af421
BP
49#include "datapath.h"
50#include "actions.h"
064af421 51#include "flow.h"
303708cc 52#include "vlan.h"
3544358a 53#include "tunnel.h"
f2459fe7 54#include "vport-internal_dev.h"
064af421 55
4cf41591 56#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
3d4793d6
SH
57 LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0)
58#error Kernels before 2.6.18 or after 3.0 are not supported by this version of Open vSwitch.
4cf41591
JG
59#endif
60
064af421
BP
61int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
62EXPORT_SYMBOL(dp_ioctl_hook);
63
ed099e92
BP
64/**
65 * DOC: Locking:
064af421 66 *
ed099e92
BP
67 * Writes to device state (add/remove datapath, port, set operations on vports,
68 * etc.) are protected by RTNL.
064af421 69 *
ed099e92
BP
70 * Writes to other state (flow table modifications, set miscellaneous datapath
71 * parameters such as drop frags, etc.) are protected by genl_mutex. The RTNL
72 * lock nests inside genl_mutex.
73 *
74 * Reads are protected by RCU.
75 *
76 * There are a few special cases (mostly stats) that have their own
77 * synchronization but they nest under all of above and don't interact with
78 * each other.
064af421 79 */
ed099e92 80
254f2dc8
BP
81/* Global list of datapaths to enable dumping them all out.
82 * Protected by genl_mutex.
83 */
84static LIST_HEAD(dps);
064af421 85
c19e6535 86static struct vport *new_vport(const struct vport_parms *);
b85d8d61 87static int queue_userspace_packets(struct datapath *, struct sk_buff *,
aa5a8fdc 88 const struct dp_upcall_info *);
064af421 89
ed099e92 90/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
254f2dc8 91struct datapath *get_dp(int dp_ifindex)
064af421 92{
254f2dc8
BP
93 struct datapath *dp = NULL;
94 struct net_device *dev;
ed099e92 95
254f2dc8
BP
96 rcu_read_lock();
97 dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
98 if (dev) {
99 struct vport *vport = internal_dev_get_vport(dev);
100 if (vport)
101 dp = vport->dp;
102 }
103 rcu_read_unlock();
104
105 return dp;
064af421
BP
106}
107EXPORT_SYMBOL_GPL(get_dp);
108
ed099e92 109/* Must be called with genl_mutex. */
3544358a 110static struct flow_table *get_table_protected(struct datapath *dp)
9abaf6b3 111{
ed099e92 112 return rcu_dereference_protected(dp->table, lockdep_genl_is_held());
1452b28c
JG
113}
114
ed099e92 115/* Must be called with rcu_read_lock or RTNL lock. */
027f9007 116static struct vport *get_vport_protected(struct datapath *dp, u16 port_no)
1452b28c 117{
ed099e92 118 return rcu_dereference_rtnl(dp->ports[port_no]);
9abaf6b3
JG
119}
120
f2459fe7
JG
121/* Must be called with rcu_read_lock or RTNL lock. */
122const char *dp_name(const struct datapath *dp)
123{
df2c07f4 124 return vport_get_name(rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]));
f2459fe7
JG
125}
126
064af421
BP
127static inline size_t br_nlmsg_size(void)
128{
129 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
130 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
131 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
132 + nla_total_size(4) /* IFLA_MASTER */
133 + nla_total_size(4) /* IFLA_MTU */
064af421
BP
134 + nla_total_size(1); /* IFLA_OPERSTATE */
135}
136
ed099e92 137/* Caller must hold RTNL lock. */
064af421 138static int dp_fill_ifinfo(struct sk_buff *skb,
e779d8d9 139 const struct vport *port,
064af421
BP
140 int event, unsigned int flags)
141{
027f9007 142 struct datapath *dp = port->dp;
e779d8d9 143 int ifindex = vport_get_ifindex(port);
064af421
BP
144 struct ifinfomsg *hdr;
145 struct nlmsghdr *nlh;
146
f2459fe7
JG
147 if (ifindex < 0)
148 return ifindex;
149
064af421
BP
150 nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
151 if (nlh == NULL)
152 return -EMSGSIZE;
153
154 hdr = nlmsg_data(nlh);
155 hdr->ifi_family = AF_BRIDGE;
156 hdr->__ifi_pad = 0;
f2459fe7
JG
157 hdr->ifi_type = ARPHRD_ETHER;
158 hdr->ifi_index = ifindex;
e779d8d9 159 hdr->ifi_flags = vport_get_flags(port);
064af421
BP
160 hdr->ifi_change = 0;
161
e779d8d9 162 NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
ad919711 163 NLA_PUT_U32(skb, IFLA_MASTER,
df2c07f4 164 vport_get_ifindex(get_vport_protected(dp, OVSP_LOCAL)));
e779d8d9 165 NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
064af421
BP
166#ifdef IFLA_OPERSTATE
167 NLA_PUT_U8(skb, IFLA_OPERSTATE,
e779d8d9
BP
168 vport_is_running(port)
169 ? vport_get_operstate(port)
f2459fe7 170 : IF_OPER_DOWN);
064af421
BP
171#endif
172
e779d8d9 173 NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
064af421 174
064af421
BP
175 return nlmsg_end(skb, nlh);
176
177nla_put_failure:
178 nlmsg_cancel(skb, nlh);
179 return -EMSGSIZE;
180}
181
ed099e92 182/* Caller must hold RTNL lock. */
e779d8d9 183static void dp_ifinfo_notify(int event, struct vport *port)
064af421 184{
064af421
BP
185 struct sk_buff *skb;
186 int err = -ENOBUFS;
187
188 skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
189 if (skb == NULL)
190 goto errout;
191
192 err = dp_fill_ifinfo(skb, port, event, 0);
193 if (err < 0) {
194 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
195 WARN_ON(err == -EMSGSIZE);
196 kfree_skb(skb);
197 goto errout;
198 }
f2459fe7 199 rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
cfe7c1f5 200 return;
064af421
BP
201errout:
202 if (err < 0)
f2459fe7 203 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
064af421
BP
204}
205
58c342f6
BP
206static void release_dp(struct kobject *kobj)
207{
208 struct datapath *dp = container_of(kobj, struct datapath, ifobj);
209 kfree(dp);
210}
211
35f7605b 212static struct kobj_type dp_ktype = {
58c342f6
BP
213 .release = release_dp
214};
215
46c6a11d
JG
216static void destroy_dp_rcu(struct rcu_head *rcu)
217{
218 struct datapath *dp = container_of(rcu, struct datapath, rcu);
46c6a11d 219
3544358a 220 flow_tbl_destroy(dp->table);
46c6a11d
JG
221 free_percpu(dp->stats_percpu);
222 kobject_put(&dp->ifobj);
223}
224
ed099e92 225/* Called with RTNL lock and genl_lock. */
c19e6535 226static struct vport *new_vport(const struct vport_parms *parms)
064af421 227{
f2459fe7 228 struct vport *vport;
f2459fe7 229
c19e6535
BP
230 vport = vport_add(parms);
231 if (!IS_ERR(vport)) {
232 struct datapath *dp = parms->dp;
064af421 233
c19e6535 234 rcu_assign_pointer(dp->ports[parms->port_no], vport);
ed099e92 235 list_add(&vport->node, &dp->port_list);
064af421 236
c19e6535
BP
237 dp_ifinfo_notify(RTM_NEWLINK, vport);
238 }
064af421 239
c19e6535 240 return vport;
064af421
BP
241}
242
ed099e92 243/* Called with RTNL lock. */
3544358a 244void dp_detach_port(struct vport *p)
064af421
BP
245{
246 ASSERT_RTNL();
247
df2c07f4 248 if (p->port_no != OVSP_LOCAL)
0515ceb3 249 dp_sysfs_del_if(p);
064af421
BP
250 dp_ifinfo_notify(RTM_DELLINK, p);
251
064af421 252 /* First drop references to device. */
ed099e92 253 list_del(&p->node);
064af421 254 rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
f2459fe7 255
7237e4f4 256 /* Then destroy it. */
3544358a 257 vport_del(p);
064af421
BP
258}
259
8819fac7 260/* Must be called with rcu_read_lock. */
e779d8d9 261void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
064af421
BP
262{
263 struct datapath *dp = p->dp;
3544358a 264 struct sw_flow *flow;
064af421 265 struct dp_stats_percpu *stats;
8819fac7 266 int stats_counter_off;
4c1ad233 267 int error;
064af421 268
e779d8d9 269 OVS_CB(skb)->vport = p;
a063b0df 270
3976f6d5 271 if (!OVS_CB(skb)->flow) {
36956a7d 272 struct sw_flow_key key;
76abe283 273 int key_len;
b7a31ec1 274 bool is_frag;
4c1ad233 275
3976f6d5 276 /* Extract flow from 'skb' into 'key'. */
76abe283 277 error = flow_extract(skb, p->port_no, &key, &key_len, &is_frag);
3976f6d5
JG
278 if (unlikely(error)) {
279 kfree_skb(skb);
280 return;
281 }
064af421 282
b7a31ec1 283 if (is_frag && dp->drop_frags) {
5b95ab0e 284 consume_skb(skb);
3976f6d5
JG
285 stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
286 goto out;
287 }
288
289 /* Look up flow. */
3544358a
PS
290 flow = flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
291 if (unlikely(!flow)) {
856081f6
BP
292 struct dp_upcall_info upcall;
293
df2c07f4 294 upcall.cmd = OVS_PACKET_CMD_MISS;
856081f6
BP
295 upcall.key = &key;
296 upcall.userdata = 0;
297 upcall.sample_pool = 0;
298 upcall.actions = NULL;
299 upcall.actions_len = 0;
300 dp_upcall(dp, skb, &upcall);
3976f6d5
JG
301 stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
302 goto out;
303 }
304
3544358a 305 OVS_CB(skb)->flow = flow;
55574bb0
BP
306 }
307
f267de8a 308 stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
3976f6d5 309 flow_used(OVS_CB(skb)->flow, skb);
a4af2475 310 execute_actions(dp, skb);
55574bb0 311
8819fac7 312out:
55574bb0 313 /* Update datapath statistics. */
8819fac7
JG
314 local_bh_disable();
315 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
38c6ecbc
JG
316
317 write_seqcount_begin(&stats->seqlock);
8819fac7 318 (*(u64 *)((u8 *)stats + stats_counter_off))++;
38c6ecbc
JG
319 write_seqcount_end(&stats->seqlock);
320
8819fac7 321 local_bh_enable();
064af421
BP
322}
323
856081f6
BP
324static void copy_and_csum_skb(struct sk_buff *skb, void *to)
325{
326 u16 csum_start, csum_offset;
327 __wsum csum;
328
329 get_skb_csum_pointers(skb, &csum_start, &csum_offset);
330 csum_start -= skb_headroom(skb);
856081f6
BP
331
332 skb_copy_bits(skb, 0, to, csum_start);
333
334 csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start,
335 skb->len - csum_start, 0);
336 *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
337}
338
aa5a8fdc
JG
339static struct genl_family dp_packet_genl_family = {
340 .id = GENL_ID_GENERATE,
df2c07f4
JP
341 .hdrsize = sizeof(struct ovs_header),
342 .name = OVS_PACKET_FAMILY,
aa5a8fdc 343 .version = 1,
df2c07f4 344 .maxattr = OVS_PACKET_ATTR_MAX
aa5a8fdc
JG
345};
346
347/* Generic Netlink multicast groups for upcalls.
348 *
349 * We really want three unique multicast groups per datapath, but we can't even
350 * get one, because genl_register_mc_group() takes genl_lock, which is also
351 * held during Generic Netlink message processing, so trying to acquire
df2c07f4 352 * multicast groups during OVS_DP_NEW processing deadlocks. Instead, we
aa5a8fdc
JG
353 * preallocate a few groups and use them round-robin for datapaths. Collision
354 * isn't fatal--multicast listeners should check that the family is the one
355 * that they want and discard others--but it wastes time and memory to receive
356 * unwanted messages.
357 */
982b8810 358#define PACKET_N_MC_GROUPS 16
aa5a8fdc 359static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
982b8810 360
aa5a8fdc 361static u32 packet_mc_group(struct datapath *dp, u8 cmd)
982b8810 362{
aa5a8fdc 363 u32 idx;
982b8810 364 BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
aa5a8fdc
JG
365
366 idx = jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
367 return packet_mc_groups[idx].id;
368}
369
370static int packet_register_mc_groups(void)
371{
372 int i;
373
374 for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
375 struct genl_multicast_group *group = &packet_mc_groups[i];
376 int error;
377
378 sprintf(group->name, "packet%d", i);
379 error = genl_register_mc_group(&dp_packet_genl_family, group);
380 if (error)
381 return error;
382 }
383 return 0;
384}
385
386int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
387{
388 struct dp_stats_percpu *stats;
389 int err;
390
391 WARN_ON_ONCE(skb_shared(skb));
392
c3729ee4 393 forward_ip_summed(skb, true);
aa5a8fdc
JG
394
395 /* Break apart GSO packets into their component pieces. Otherwise
396 * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
397 if (skb_is_gso(skb)) {
398 struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
399
5b95ab0e
JG
400 if (IS_ERR(nskb)) {
401 kfree_skb(skb);
402 err = PTR_ERR(nskb);
aa5a8fdc
JG
403 goto err;
404 }
5b95ab0e
JG
405 consume_skb(skb);
406 skb = nskb;
aa5a8fdc
JG
407 }
408
b85d8d61 409 err = queue_userspace_packets(dp, skb, upcall_info);
d76195db
JG
410 if (err)
411 goto err;
412
413 return 0;
aa5a8fdc 414
aa5a8fdc
JG
415err:
416 local_bh_disable();
417 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
418
419 write_seqcount_begin(&stats->seqlock);
420 stats->n_lost++;
421 write_seqcount_end(&stats->seqlock);
422
423 local_bh_enable();
424
425 return err;
982b8810
BP
426}
427
428/* Send each packet in the 'skb' list to userspace for 'dp' as directed by
429 * 'upcall_info'. There will be only one packet unless we broke up a GSO
430 * packet.
431 */
b85d8d61 432static int queue_userspace_packets(struct datapath *dp, struct sk_buff *skb,
856081f6 433 const struct dp_upcall_info *upcall_info)
cb5087ca 434{
982b8810 435 u32 group = packet_mc_group(dp, upcall_info->cmd);
cb5087ca 436 struct sk_buff *nskb;
cb5087ca
BP
437 int err;
438
cb5087ca 439 do {
df2c07f4 440 struct ovs_header *upcall;
856081f6
BP
441 struct sk_buff *user_skb; /* to be queued to userspace */
442 struct nlattr *nla;
443 unsigned int len;
cb5087ca
BP
444
445 nskb = skb->next;
446 skb->next = NULL;
447
303708cc
JG
448 err = vlan_deaccel_tag(skb);
449 if (unlikely(err))
450 goto err_kfree_skbs;
6ce39213 451
51313015
JG
452 if (nla_attr_size(skb->len) > USHRT_MAX)
453 goto err_kfree_skbs;
454
df2c07f4 455 len = sizeof(struct ovs_header);
856081f6
BP
456 len += nla_total_size(skb->len);
457 len += nla_total_size(FLOW_BUFSIZE);
458 if (upcall_info->userdata)
459 len += nla_total_size(8);
460 if (upcall_info->sample_pool)
461 len += nla_total_size(4);
462 if (upcall_info->actions_len)
463 len += nla_total_size(upcall_info->actions_len);
464
982b8810
BP
465 user_skb = genlmsg_new(len, GFP_ATOMIC);
466 if (!user_skb) {
467 netlink_set_err(INIT_NET_GENL_SOCK, 0, group, -ENOBUFS);
cb5087ca 468 goto err_kfree_skbs;
982b8810 469 }
cb5087ca 470
982b8810 471 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
254f2dc8 472 upcall->dp_ifindex = dp->dp_ifindex;
856081f6 473
df2c07f4 474 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
856081f6
BP
475 flow_to_nlattrs(upcall_info->key, user_skb);
476 nla_nest_end(user_skb, nla);
cb5087ca 477
856081f6 478 if (upcall_info->userdata)
df2c07f4 479 nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, upcall_info->userdata);
856081f6 480 if (upcall_info->sample_pool)
df2c07f4 481 nla_put_u32(user_skb, OVS_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool);
856081f6
BP
482 if (upcall_info->actions_len) {
483 const struct nlattr *actions = upcall_info->actions;
484 u32 actions_len = upcall_info->actions_len;
485
df2c07f4 486 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
856081f6
BP
487 memcpy(__skb_put(user_skb, actions_len), actions, actions_len);
488 nla_nest_end(user_skb, nla);
489 }
490
df2c07f4 491 nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
856081f6
BP
492 if (skb->ip_summed == CHECKSUM_PARTIAL)
493 copy_and_csum_skb(skb, nla_data(nla));
494 else
495 skb_copy_bits(skb, 0, nla_data(nla), skb->len);
496
982b8810
BP
497 err = genlmsg_multicast(user_skb, 0, group, GFP_ATOMIC);
498 if (err)
499 goto err_kfree_skbs;
856081f6 500
5b95ab0e 501 consume_skb(skb);
cb5087ca
BP
502 skb = nskb;
503 } while (skb);
504 return 0;
505
506err_kfree_skbs:
507 kfree_skb(skb);
508 while ((skb = nskb) != NULL) {
509 nskb = skb->next;
510 kfree_skb(skb);
511 }
512 return err;
513}
514
ed099e92 515/* Called with genl_mutex. */
254f2dc8 516static int flush_flows(int dp_ifindex)
064af421 517{
3544358a
PS
518 struct flow_table *old_table;
519 struct flow_table *new_table;
9c52546b 520 struct datapath *dp;
9c52546b 521
254f2dc8 522 dp = get_dp(dp_ifindex);
9c52546b 523 if (!dp)
ed099e92 524 return -ENODEV;
8d5ebd83 525
9c52546b 526 old_table = get_table_protected(dp);
3544358a 527 new_table = flow_tbl_alloc(TBL_MIN_BUCKETS);
8d5ebd83 528 if (!new_table)
ed099e92 529 return -ENOMEM;
8d5ebd83
JG
530
531 rcu_assign_pointer(dp->table, new_table);
532
3544358a 533 flow_tbl_deferred_destroy(old_table);
ed099e92 534 return 0;
064af421
BP
535}
536
37a1300c 537static int validate_actions(const struct nlattr *attr)
064af421 538{
23cad98c
BP
539 const struct nlattr *a;
540 int rem;
541
37a1300c 542 nla_for_each_nested(a, attr, rem) {
df2c07f4
JP
543 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
544 [OVS_ACTION_ATTR_OUTPUT] = 4,
545 [OVS_ACTION_ATTR_USERSPACE] = 8,
d9065a90
PS
546 [OVS_ACTION_ATTR_PUSH_VLAN] = 2,
547 [OVS_ACTION_ATTR_POP_VLAN] = 0,
df2c07f4
JP
548 [OVS_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN,
549 [OVS_ACTION_ATTR_SET_DL_DST] = ETH_ALEN,
550 [OVS_ACTION_ATTR_SET_NW_SRC] = 4,
551 [OVS_ACTION_ATTR_SET_NW_DST] = 4,
552 [OVS_ACTION_ATTR_SET_NW_TOS] = 1,
553 [OVS_ACTION_ATTR_SET_TP_SRC] = 2,
554 [OVS_ACTION_ATTR_SET_TP_DST] = 2,
555 [OVS_ACTION_ATTR_SET_TUNNEL] = 8,
556 [OVS_ACTION_ATTR_SET_PRIORITY] = 4,
557 [OVS_ACTION_ATTR_POP_PRIORITY] = 0,
23cad98c
BP
558 };
559 int type = nla_type(a);
560
df2c07f4 561 if (type > OVS_ACTION_ATTR_MAX || nla_len(a) != action_lens[type])
23cad98c
BP
562 return -EINVAL;
563
564 switch (type) {
df2c07f4 565 case OVS_ACTION_ATTR_UNSPEC:
cdee00fd 566 return -EINVAL;
064af421 567
df2c07f4 568 case OVS_ACTION_ATTR_USERSPACE:
d9065a90 569 case OVS_ACTION_ATTR_POP_VLAN:
df2c07f4
JP
570 case OVS_ACTION_ATTR_SET_DL_SRC:
571 case OVS_ACTION_ATTR_SET_DL_DST:
572 case OVS_ACTION_ATTR_SET_NW_SRC:
573 case OVS_ACTION_ATTR_SET_NW_DST:
574 case OVS_ACTION_ATTR_SET_TP_SRC:
575 case OVS_ACTION_ATTR_SET_TP_DST:
576 case OVS_ACTION_ATTR_SET_TUNNEL:
577 case OVS_ACTION_ATTR_SET_PRIORITY:
578 case OVS_ACTION_ATTR_POP_PRIORITY:
23cad98c
BP
579 /* No validation needed. */
580 break;
581
df2c07f4 582 case OVS_ACTION_ATTR_OUTPUT:
23cad98c
BP
583 if (nla_get_u32(a) >= DP_MAX_PORTS)
584 return -EINVAL;
3b1fc5f3 585 break;
cdee00fd 586
d9065a90 587 case OVS_ACTION_ATTR_PUSH_VLAN:
cdee00fd 588 if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
064af421 589 return -EINVAL;
23cad98c 590 break;
064af421 591
df2c07f4 592 case OVS_ACTION_ATTR_SET_NW_TOS:
23cad98c
BP
593 if (nla_get_u8(a) & INET_ECN_MASK)
594 return -EINVAL;
595 break;
064af421 596
23cad98c
BP
597 default:
598 return -EOPNOTSUPP;
599 }
600 }
3c5f6de3 601
23cad98c
BP
602 if (rem > 0)
603 return -EINVAL;
064af421 604
23cad98c 605 return 0;
064af421 606}
064af421
BP
607static void clear_stats(struct sw_flow *flow)
608{
6bfafa55 609 flow->used = 0;
064af421 610 flow->tcp_flags = 0;
064af421
BP
611 flow->packet_count = 0;
612 flow->byte_count = 0;
613}
614
df2c07f4 615static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
064af421 616{
df2c07f4 617 struct ovs_header *ovs_header = info->userhdr;
982b8810 618 struct nlattr **a = info->attrs;
e0e57990 619 struct sw_flow_actions *acts;
982b8810 620 struct sk_buff *packet;
e0e57990 621 struct sw_flow *flow;
f7cd0081 622 struct datapath *dp;
d6569377
BP
623 struct ethhdr *eth;
624 bool is_frag;
3f19d399 625 int len;
d6569377 626 int err;
76abe283 627 int key_len;
064af421 628
f7cd0081 629 err = -EINVAL;
df2c07f4
JP
630 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
631 !a[OVS_PACKET_ATTR_ACTIONS] ||
632 nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
e5cad958 633 goto err;
064af421 634
df2c07f4 635 err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS]);
f7cd0081 636 if (err)
e5cad958 637 goto err;
f7cd0081 638
df2c07f4 639 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
3f19d399 640 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
f7cd0081
BP
641 err = -ENOMEM;
642 if (!packet)
e5cad958 643 goto err;
3f19d399
BP
644 skb_reserve(packet, NET_IP_ALIGN);
645
df2c07f4 646 memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
8d5ebd83 647
f7cd0081
BP
648 skb_reset_mac_header(packet);
649 eth = eth_hdr(packet);
064af421 650
d6569377
BP
651 /* Normally, setting the skb 'protocol' field would be handled by a
652 * call to eth_type_trans(), but it assumes there's a sending
653 * device, which we may not have. */
654 if (ntohs(eth->h_proto) >= 1536)
f7cd0081 655 packet->protocol = eth->h_proto;
d6569377 656 else
f7cd0081 657 packet->protocol = htons(ETH_P_802_2);
d3c54451 658
e0e57990
BP
659 /* Build an sw_flow for sending this packet. */
660 flow = flow_alloc();
661 err = PTR_ERR(flow);
662 if (IS_ERR(flow))
e5cad958 663 goto err_kfree_skb;
064af421 664
76abe283 665 err = flow_extract(packet, -1, &flow->key, &key_len, &is_frag);
e0e57990
BP
666 if (err)
667 goto err_flow_put;
e0e57990 668
80e5eed9
BP
669 err = flow_metadata_from_nlattrs(&flow->key.eth.in_port,
670 &flow->key.eth.tun_id,
df2c07f4 671 a[OVS_PACKET_ATTR_KEY]);
80e5eed9
BP
672 if (err)
673 goto err_flow_put;
674
3544358a 675 flow->hash = flow_hash(&flow->key, key_len);
0fe255df 676
df2c07f4 677 acts = flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
e0e57990
BP
678 err = PTR_ERR(acts);
679 if (IS_ERR(acts))
680 goto err_flow_put;
681 rcu_assign_pointer(flow->sf_acts, acts);
682
683 OVS_CB(packet)->flow = flow;
684
d6569377 685 rcu_read_lock();
df2c07f4 686 dp = get_dp(ovs_header->dp_ifindex);
f7cd0081 687 err = -ENODEV;
e5cad958
BP
688 if (!dp)
689 goto err_unlock;
a4af2475 690 err = execute_actions(dp, packet);
d6569377 691 rcu_read_unlock();
e0e57990
BP
692
693 flow_put(flow);
e5cad958 694 return err;
064af421 695
e5cad958
BP
696err_unlock:
697 rcu_read_unlock();
e0e57990
BP
698err_flow_put:
699 flow_put(flow);
e5cad958
BP
700err_kfree_skb:
701 kfree_skb(packet);
702err:
d6569377 703 return err;
064af421
BP
704}
705
df2c07f4
JP
706static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
707 [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
708 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
709 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
982b8810
BP
710};
711
712static struct genl_ops dp_packet_genl_ops[] = {
df2c07f4 713 { .cmd = OVS_PACKET_CMD_EXECUTE,
982b8810
BP
714 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
715 .policy = packet_policy,
df2c07f4 716 .doit = ovs_packet_cmd_execute
982b8810
BP
717 }
718};
719
df2c07f4 720static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
064af421 721{
d6569377 722 int i;
3544358a 723 struct flow_table *table = get_table_protected(dp);
f180c2e2 724
3544358a 725 stats->n_flows = flow_tbl_count(table);
064af421 726
d6569377
BP
727 stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
728 for_each_possible_cpu(i) {
729 const struct dp_stats_percpu *percpu_stats;
730 struct dp_stats_percpu local_stats;
731 unsigned seqcount;
44e05eca 732
d6569377 733 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
064af421 734
d6569377
BP
735 do {
736 seqcount = read_seqcount_begin(&percpu_stats->seqlock);
737 local_stats = *percpu_stats;
738 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
064af421 739
d6569377
BP
740 stats->n_frags += local_stats.n_frags;
741 stats->n_hit += local_stats.n_hit;
742 stats->n_missed += local_stats.n_missed;
743 stats->n_lost += local_stats.n_lost;
744 }
745}
064af421 746
ed099e92
BP
747/* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports.
748 * Called with RTNL lock.
749 */
d6569377
BP
750int dp_min_mtu(const struct datapath *dp)
751{
752 struct vport *p;
753 int mtu = 0;
754
755 ASSERT_RTNL();
756
ed099e92 757 list_for_each_entry (p, &dp->port_list, node) {
d6569377
BP
758 int dev_mtu;
759
760 /* Skip any internal ports, since that's what we're trying to
761 * set. */
762 if (is_internal_vport(p))
763 continue;
764
765 dev_mtu = vport_get_mtu(p);
f915f1a8
BP
766 if (!dev_mtu)
767 continue;
d6569377
BP
768 if (!mtu || dev_mtu < mtu)
769 mtu = dev_mtu;
770 }
771
772 return mtu ? mtu : ETH_DATA_LEN;
064af421
BP
773}
774
ed099e92
BP
775/* Sets the MTU of all datapath devices to the minimum of the ports
776 * Called with RTNL lock.
777 */
d6569377 778void set_internal_devs_mtu(const struct datapath *dp)
064af421 779{
d6569377
BP
780 struct vport *p;
781 int mtu;
064af421 782
d6569377
BP
783 ASSERT_RTNL();
784
785 mtu = dp_min_mtu(dp);
44e05eca 786
ed099e92 787 list_for_each_entry (p, &dp->port_list, node) {
d6569377
BP
788 if (is_internal_vport(p))
789 vport_set_mtu(p, mtu);
790 }
064af421
BP
791}
792
df2c07f4
JP
793static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
794 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
795 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
796 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
d6569377 797};
36956a7d 798
37a1300c
BP
799static struct genl_family dp_flow_genl_family = {
800 .id = GENL_ID_GENERATE,
df2c07f4
JP
801 .hdrsize = sizeof(struct ovs_header),
802 .name = OVS_FLOW_FAMILY,
37a1300c 803 .version = 1,
df2c07f4 804 .maxattr = OVS_FLOW_ATTR_MAX
37a1300c 805};
ed099e92 806
37a1300c 807static struct genl_multicast_group dp_flow_multicast_group = {
df2c07f4 808 .name = OVS_FLOW_MCGROUP
37a1300c
BP
809};
810
811/* Called with genl_lock. */
df2c07f4 812static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
37a1300c 813 struct sk_buff *skb, u32 pid, u32 seq, u32 flags, u8 cmd)
d6569377 814{
37a1300c 815 const int skb_orig_len = skb->len;
d6569377 816 const struct sw_flow_actions *sf_acts;
df2c07f4
JP
817 struct ovs_flow_stats stats;
818 struct ovs_header *ovs_header;
d6569377
BP
819 struct nlattr *nla;
820 unsigned long used;
821 u8 tcp_flags;
822 int err;
064af421 823
d6569377 824 sf_acts = rcu_dereference_protected(flow->sf_acts,
ed099e92 825 lockdep_genl_is_held());
064af421 826
df2c07f4
JP
827 ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
828 if (!ovs_header)
37a1300c 829 return -EMSGSIZE;
d6569377 830
df2c07f4 831 ovs_header->dp_ifindex = dp->dp_ifindex;
d6569377 832
df2c07f4 833 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
d6569377
BP
834 if (!nla)
835 goto nla_put_failure;
836 err = flow_to_nlattrs(&flow->key, skb);
837 if (err)
37a1300c 838 goto error;
d6569377
BP
839 nla_nest_end(skb, nla);
840
841 spin_lock_bh(&flow->lock);
842 used = flow->used;
843 stats.n_packets = flow->packet_count;
844 stats.n_bytes = flow->byte_count;
845 tcp_flags = flow->tcp_flags;
846 spin_unlock_bh(&flow->lock);
847
848 if (used)
df2c07f4 849 NLA_PUT_U64(skb, OVS_FLOW_ATTR_USED, flow_used_time(used));
d6569377
BP
850
851 if (stats.n_packets)
df2c07f4 852 NLA_PUT(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats);
d6569377
BP
853
854 if (tcp_flags)
df2c07f4 855 NLA_PUT_U8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags);
d6569377 856
df2c07f4 857 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
30053024
BP
858 * this is the first flow to be dumped into 'skb'. This is unusual for
859 * Netlink but individual action lists can be longer than
860 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
861 * The userspace caller can always fetch the actions separately if it
862 * really wants them. (Most userspace callers in fact don't care.)
863 *
864 * This can only fail for dump operations because the skb is always
865 * properly sized for single flows.
866 */
df2c07f4 867 err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
30053024
BP
868 sf_acts->actions);
869 if (err < 0 && skb_orig_len)
870 goto error;
37a1300c 871
df2c07f4 872 return genlmsg_end(skb, ovs_header);
d6569377
BP
873
874nla_put_failure:
875 err = -EMSGSIZE;
37a1300c 876error:
df2c07f4 877 genlmsg_cancel(skb, ovs_header);
d6569377 878 return err;
44e05eca
BP
879}
880
df2c07f4 881static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
44e05eca 882{
37a1300c
BP
883 const struct sw_flow_actions *sf_acts;
884 int len;
d6569377 885
37a1300c
BP
886 sf_acts = rcu_dereference_protected(flow->sf_acts,
887 lockdep_genl_is_held());
d6569377 888
df2c07f4
JP
889 len = nla_total_size(FLOW_BUFSIZE); /* OVS_FLOW_ATTR_KEY */
890 len += nla_total_size(sf_acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
891 len += nla_total_size(sizeof(struct ovs_flow_stats)); /* OVS_FLOW_ATTR_STATS */
892 len += nla_total_size(1); /* OVS_FLOW_ATTR_TCP_FLAGS */
893 len += nla_total_size(8); /* OVS_FLOW_ATTR_USED */
894 return genlmsg_new(NLMSG_ALIGN(sizeof(struct ovs_header)) + len, GFP_KERNEL);
37a1300c 895}
8d5ebd83 896
df2c07f4 897static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp,
37a1300c
BP
898 u32 pid, u32 seq, u8 cmd)
899{
900 struct sk_buff *skb;
901 int retval;
d6569377 902
df2c07f4 903 skb = ovs_flow_cmd_alloc_info(flow);
37a1300c
BP
904 if (!skb)
905 return ERR_PTR(-ENOMEM);
d6569377 906
df2c07f4 907 retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
37a1300c 908 BUG_ON(retval < 0);
d6569377 909 return skb;
064af421
BP
910}
911
df2c07f4 912static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
064af421 913{
37a1300c 914 struct nlattr **a = info->attrs;
df2c07f4 915 struct ovs_header *ovs_header = info->userhdr;
37a1300c 916 struct sw_flow_key key;
d6569377 917 struct sw_flow *flow;
37a1300c 918 struct sk_buff *reply;
9c52546b 919 struct datapath *dp;
3544358a 920 struct flow_table *table;
bc4a05c6 921 int error;
76abe283 922 int key_len;
064af421 923
37a1300c
BP
924 /* Extract key. */
925 error = -EINVAL;
df2c07f4 926 if (!a[OVS_FLOW_ATTR_KEY])
37a1300c 927 goto error;
df2c07f4 928 error = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
37a1300c
BP
929 if (error)
930 goto error;
064af421 931
37a1300c 932 /* Validate actions. */
df2c07f4
JP
933 if (a[OVS_FLOW_ATTR_ACTIONS]) {
934 error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS]);
37a1300c
BP
935 if (error)
936 goto error;
df2c07f4 937 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
37a1300c
BP
938 error = -EINVAL;
939 goto error;
940 }
941
df2c07f4 942 dp = get_dp(ovs_header->dp_ifindex);
d6569377 943 error = -ENODEV;
9c52546b 944 if (!dp)
37a1300c 945 goto error;
704a1e09 946
d6569377 947 table = get_table_protected(dp);
3544358a
PS
948 flow = flow_tbl_lookup(table, &key, key_len);
949 if (!flow) {
d6569377
BP
950 struct sw_flow_actions *acts;
951
952 /* Bail out if we're not allowed to create a new flow. */
953 error = -ENOENT;
df2c07f4 954 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
37a1300c 955 goto error;
d6569377
BP
956
957 /* Expand table, if necessary, to make room. */
3544358a
PS
958 if (flow_tbl_need_to_expand(table)) {
959 struct flow_table *new_table;
960
961 new_table = flow_tbl_expand(table);
962 if (!IS_ERR(new_table)) {
963 rcu_assign_pointer(dp->table, new_table);
964 flow_tbl_deferred_destroy(table);
965 table = get_table_protected(dp);
966 }
d6569377
BP
967 }
968
969 /* Allocate flow. */
970 flow = flow_alloc();
971 if (IS_ERR(flow)) {
972 error = PTR_ERR(flow);
37a1300c 973 goto error;
d6569377 974 }
37a1300c 975 flow->key = key;
d6569377
BP
976 clear_stats(flow);
977
978 /* Obtain actions. */
df2c07f4 979 acts = flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
d6569377
BP
980 error = PTR_ERR(acts);
981 if (IS_ERR(acts))
982 goto error_free_flow;
983 rcu_assign_pointer(flow->sf_acts, acts);
984
d6569377 985 /* Put flow in bucket. */
3544358a
PS
986 flow->hash = flow_hash(&key, key_len);
987 flow_tbl_insert(table, flow);
37a1300c 988
df2c07f4
JP
989 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
990 info->snd_seq, OVS_FLOW_CMD_NEW);
d6569377
BP
991 } else {
992 /* We found a matching flow. */
993 struct sw_flow_actions *old_acts;
994
995 /* Bail out if we're not allowed to modify an existing flow.
996 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
997 * because Generic Netlink treats the latter as a dump
998 * request. We also accept NLM_F_EXCL in case that bug ever
999 * gets fixed.
1000 */
1001 error = -EEXIST;
df2c07f4 1002 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
37a1300c
BP
1003 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1004 goto error;
d6569377
BP
1005
1006 /* Update actions. */
d6569377 1007 old_acts = rcu_dereference_protected(flow->sf_acts,
ed099e92 1008 lockdep_genl_is_held());
df2c07f4
JP
1009 if (a[OVS_FLOW_ATTR_ACTIONS] &&
1010 (old_acts->actions_len != nla_len(a[OVS_FLOW_ATTR_ACTIONS]) ||
1011 memcmp(old_acts->actions, nla_data(a[OVS_FLOW_ATTR_ACTIONS]),
37a1300c 1012 old_acts->actions_len))) {
d6569377
BP
1013 struct sw_flow_actions *new_acts;
1014
df2c07f4 1015 new_acts = flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
d6569377
BP
1016 error = PTR_ERR(new_acts);
1017 if (IS_ERR(new_acts))
37a1300c 1018 goto error;
d6569377
BP
1019
1020 rcu_assign_pointer(flow->sf_acts, new_acts);
1021 flow_deferred_free_acts(old_acts);
1022 }
1023
df2c07f4
JP
1024 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
1025 info->snd_seq, OVS_FLOW_CMD_NEW);
d6569377
BP
1026
1027 /* Clear stats. */
df2c07f4 1028 if (a[OVS_FLOW_ATTR_CLEAR]) {
d6569377
BP
1029 spin_lock_bh(&flow->lock);
1030 clear_stats(flow);
1031 spin_unlock_bh(&flow->lock);
1032 }
9c52546b 1033 }
37a1300c
BP
1034
1035 if (!IS_ERR(reply))
1036 genl_notify(reply, genl_info_net(info), info->snd_pid,
1037 dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1038 else
1039 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1040 dp_flow_multicast_group.id, PTR_ERR(reply));
d6569377 1041 return 0;
704a1e09 1042
d6569377
BP
1043error_free_flow:
1044 flow_put(flow);
37a1300c 1045error:
9c52546b 1046 return error;
704a1e09
BP
1047}
1048
df2c07f4 1049static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
704a1e09 1050{
37a1300c 1051 struct nlattr **a = info->attrs;
df2c07f4 1052 struct ovs_header *ovs_header = info->userhdr;
37a1300c 1053 struct sw_flow_key key;
37a1300c 1054 struct sk_buff *reply;
704a1e09 1055 struct sw_flow *flow;
9c52546b 1056 struct datapath *dp;
3544358a 1057 struct flow_table *table;
9c52546b 1058 int err;
76abe283 1059 int key_len;
704a1e09 1060
df2c07f4 1061 if (!a[OVS_FLOW_ATTR_KEY])
37a1300c 1062 return -EINVAL;
df2c07f4 1063 err = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
37a1300c
BP
1064 if (err)
1065 return err;
704a1e09 1066
df2c07f4 1067 dp = get_dp(ovs_header->dp_ifindex);
9c52546b 1068 if (!dp)
ed099e92 1069 return -ENODEV;
704a1e09 1070
9c52546b 1071 table = get_table_protected(dp);
3544358a
PS
1072 flow = flow_tbl_lookup(table, &key, key_len);
1073 if (!flow)
ed099e92 1074 return -ENOENT;
d6569377 1075
df2c07f4 1076 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, info->snd_seq, OVS_FLOW_CMD_NEW);
37a1300c
BP
1077 if (IS_ERR(reply))
1078 return PTR_ERR(reply);
36956a7d 1079
37a1300c 1080 return genlmsg_reply(reply, info);
d6569377 1081}
9c52546b 1082
df2c07f4 1083static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
d6569377 1084{
37a1300c 1085 struct nlattr **a = info->attrs;
df2c07f4 1086 struct ovs_header *ovs_header = info->userhdr;
37a1300c 1087 struct sw_flow_key key;
37a1300c 1088 struct sk_buff *reply;
d6569377 1089 struct sw_flow *flow;
d6569377 1090 struct datapath *dp;
3544358a 1091 struct flow_table *table;
d6569377 1092 int err;
76abe283 1093 int key_len;
36956a7d 1094
df2c07f4
JP
1095 if (!a[OVS_FLOW_ATTR_KEY])
1096 return flush_flows(ovs_header->dp_ifindex);
1097 err = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
37a1300c
BP
1098 if (err)
1099 return err;
d6569377 1100
df2c07f4 1101 dp = get_dp(ovs_header->dp_ifindex);
d6569377 1102 if (!dp)
37a1300c 1103 return -ENODEV;
d6569377 1104
37a1300c 1105 table = get_table_protected(dp);
3544358a
PS
1106 flow = flow_tbl_lookup(table, &key, key_len);
1107 if (!flow)
37a1300c 1108 return -ENOENT;
d6569377 1109
df2c07f4 1110 reply = ovs_flow_cmd_alloc_info(flow);
37a1300c
BP
1111 if (!reply)
1112 return -ENOMEM;
1113
3544358a 1114 flow_tbl_remove(table, flow);
37a1300c 1115
df2c07f4
JP
1116 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
1117 info->snd_seq, 0, OVS_FLOW_CMD_DEL);
37a1300c
BP
1118 BUG_ON(err < 0);
1119
1120 flow_deferred_free(flow);
1121
1122 genl_notify(reply, genl_info_net(info), info->snd_pid,
1123 dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1124 return 0;
1125}
1126
df2c07f4 1127static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
37a1300c 1128{
df2c07f4 1129 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
37a1300c
BP
1130 struct datapath *dp;
1131
df2c07f4 1132 dp = get_dp(ovs_header->dp_ifindex);
37a1300c
BP
1133 if (!dp)
1134 return -ENODEV;
1135
1136 for (;;) {
37a1300c
BP
1137 struct sw_flow *flow;
1138 u32 bucket, obj;
1139
1140 bucket = cb->args[0];
1141 obj = cb->args[1];
3544358a
PS
1142 flow = flow_tbl_next(get_table_protected(dp), &bucket, &obj);
1143 if (!flow)
37a1300c
BP
1144 break;
1145
df2c07f4 1146 if (ovs_flow_cmd_fill_info(flow, dp, skb, NETLINK_CB(cb->skb).pid,
37a1300c 1147 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1148 OVS_FLOW_CMD_NEW) < 0)
37a1300c
BP
1149 break;
1150
1151 cb->args[0] = bucket;
1152 cb->args[1] = obj;
1153 }
1154 return skb->len;
704a1e09
BP
1155}
1156
37a1300c 1157static struct genl_ops dp_flow_genl_ops[] = {
df2c07f4 1158 { .cmd = OVS_FLOW_CMD_NEW,
37a1300c
BP
1159 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1160 .policy = flow_policy,
df2c07f4 1161 .doit = ovs_flow_cmd_new_or_set
37a1300c 1162 },
df2c07f4 1163 { .cmd = OVS_FLOW_CMD_DEL,
37a1300c
BP
1164 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1165 .policy = flow_policy,
df2c07f4 1166 .doit = ovs_flow_cmd_del
37a1300c 1167 },
df2c07f4 1168 { .cmd = OVS_FLOW_CMD_GET,
37a1300c
BP
1169 .flags = 0, /* OK for unprivileged users. */
1170 .policy = flow_policy,
df2c07f4
JP
1171 .doit = ovs_flow_cmd_get,
1172 .dumpit = ovs_flow_cmd_dump
37a1300c 1173 },
df2c07f4 1174 { .cmd = OVS_FLOW_CMD_SET,
37a1300c
BP
1175 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1176 .policy = flow_policy,
df2c07f4 1177 .doit = ovs_flow_cmd_new_or_set,
37a1300c
BP
1178 },
1179};
1180
df2c07f4 1181static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
aaff4b55 1182#ifdef HAVE_NLA_NUL_STRING
df2c07f4 1183 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
aaff4b55 1184#endif
df2c07f4
JP
1185 [OVS_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
1186 [OVS_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
d6569377
BP
1187};
1188
aaff4b55
BP
1189static struct genl_family dp_datapath_genl_family = {
1190 .id = GENL_ID_GENERATE,
df2c07f4
JP
1191 .hdrsize = sizeof(struct ovs_header),
1192 .name = OVS_DATAPATH_FAMILY,
aaff4b55 1193 .version = 1,
df2c07f4 1194 .maxattr = OVS_DP_ATTR_MAX
aaff4b55
BP
1195};
1196
1197static struct genl_multicast_group dp_datapath_multicast_group = {
df2c07f4 1198 .name = OVS_DATAPATH_MCGROUP
aaff4b55
BP
1199};
1200
df2c07f4 1201static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
aaff4b55 1202 u32 pid, u32 seq, u32 flags, u8 cmd)
064af421 1203{
df2c07f4 1204 struct ovs_header *ovs_header;
d6569377 1205 struct nlattr *nla;
064af421
BP
1206 int err;
1207
df2c07f4 1208 ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
aaff4b55 1209 flags, cmd);
df2c07f4 1210 if (!ovs_header)
aaff4b55 1211 goto error;
064af421 1212
df2c07f4 1213 ovs_header->dp_ifindex = dp->dp_ifindex;
064af421 1214
d6569377 1215 rcu_read_lock();
df2c07f4 1216 err = nla_put_string(skb, OVS_DP_ATTR_NAME, dp_name(dp));
d6569377 1217 rcu_read_unlock();
064af421 1218 if (err)
d6569377 1219 goto nla_put_failure;
064af421 1220
df2c07f4 1221 nla = nla_reserve(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats));
d6569377
BP
1222 if (!nla)
1223 goto nla_put_failure;
1224 get_dp_stats(dp, nla_data(nla));
1225
df2c07f4
JP
1226 NLA_PUT_U32(skb, OVS_DP_ATTR_IPV4_FRAGS,
1227 dp->drop_frags ? OVS_DP_FRAG_DROP : OVS_DP_FRAG_ZERO);
d6569377
BP
1228
1229 if (dp->sflow_probability)
df2c07f4 1230 NLA_PUT_U32(skb, OVS_DP_ATTR_SAMPLING, dp->sflow_probability);
d6569377 1231
df2c07f4 1232 nla = nla_nest_start(skb, OVS_DP_ATTR_MCGROUPS);
982b8810
BP
1233 if (!nla)
1234 goto nla_put_failure;
df2c07f4
JP
1235 NLA_PUT_U32(skb, OVS_PACKET_CMD_MISS, packet_mc_group(dp, OVS_PACKET_CMD_MISS));
1236 NLA_PUT_U32(skb, OVS_PACKET_CMD_ACTION, packet_mc_group(dp, OVS_PACKET_CMD_ACTION));
1237 NLA_PUT_U32(skb, OVS_PACKET_CMD_SAMPLE, packet_mc_group(dp, OVS_PACKET_CMD_SAMPLE));
982b8810
BP
1238 nla_nest_end(skb, nla);
1239
df2c07f4 1240 return genlmsg_end(skb, ovs_header);
d6569377
BP
1241
1242nla_put_failure:
df2c07f4 1243 genlmsg_cancel(skb, ovs_header);
aaff4b55
BP
1244error:
1245 return -EMSGSIZE;
d6569377
BP
1246}
1247
df2c07f4 1248static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
aaff4b55 1249 u32 seq, u8 cmd)
d6569377 1250{
d6569377 1251 struct sk_buff *skb;
aaff4b55 1252 int retval;
d6569377 1253
aaff4b55 1254 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
064af421 1255 if (!skb)
d6569377 1256 return ERR_PTR(-ENOMEM);
659586ef 1257
df2c07f4 1258 retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
aaff4b55
BP
1259 if (retval < 0) {
1260 kfree_skb(skb);
1261 return ERR_PTR(retval);
1262 }
1263 return skb;
1264}
9dca7bd5 1265
df2c07f4 1266static int ovs_dp_cmd_validate(struct nlattr *a[OVS_DP_ATTR_MAX + 1])
aaff4b55 1267{
df2c07f4
JP
1268 if (a[OVS_DP_ATTR_IPV4_FRAGS]) {
1269 u32 frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]);
9dca7bd5 1270
df2c07f4 1271 if (frags != OVS_DP_FRAG_ZERO && frags != OVS_DP_FRAG_DROP)
aaff4b55 1272 return -EINVAL;
d6569377
BP
1273 }
1274
df2c07f4 1275 return CHECK_NUL_STRING(a[OVS_DP_ATTR_NAME], IFNAMSIZ - 1);
d6569377
BP
1276}
1277
ed099e92 1278/* Called with genl_mutex and optionally with RTNL lock also. */
df2c07f4 1279static struct datapath *lookup_datapath(struct ovs_header *ovs_header, struct nlattr *a[OVS_DP_ATTR_MAX + 1])
d6569377 1280{
254f2dc8
BP
1281 struct datapath *dp;
1282
df2c07f4
JP
1283 if (!a[OVS_DP_ATTR_NAME])
1284 dp = get_dp(ovs_header->dp_ifindex);
254f2dc8 1285 else {
d6569377 1286 struct vport *vport;
d6569377 1287
057dd6d2 1288 rcu_read_lock();
df2c07f4
JP
1289 vport = vport_locate(nla_data(a[OVS_DP_ATTR_NAME]));
1290 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
057dd6d2 1291 rcu_read_unlock();
d6569377 1292 }
254f2dc8 1293 return dp ? dp : ERR_PTR(-ENODEV);
d6569377
BP
1294}
1295
ed099e92 1296/* Called with genl_mutex. */
df2c07f4 1297static void change_datapath(struct datapath *dp, struct nlattr *a[OVS_DP_ATTR_MAX + 1])
d6569377 1298{
df2c07f4
JP
1299 if (a[OVS_DP_ATTR_IPV4_FRAGS])
1300 dp->drop_frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]) == OVS_DP_FRAG_DROP;
1301 if (a[OVS_DP_ATTR_SAMPLING])
1302 dp->sflow_probability = nla_get_u32(a[OVS_DP_ATTR_SAMPLING]);
d6569377
BP
1303}
1304
df2c07f4 1305static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
d6569377 1306{
aaff4b55 1307 struct nlattr **a = info->attrs;
d6569377 1308 struct vport_parms parms;
aaff4b55 1309 struct sk_buff *reply;
d6569377
BP
1310 struct datapath *dp;
1311 struct vport *vport;
d6569377 1312 int err;
d6569377 1313
d6569377 1314 err = -EINVAL;
df2c07f4 1315 if (!a[OVS_DP_ATTR_NAME])
aaff4b55
BP
1316 goto err;
1317
df2c07f4 1318 err = ovs_dp_cmd_validate(a);
aaff4b55
BP
1319 if (err)
1320 goto err;
d6569377
BP
1321
1322 rtnl_lock();
d6569377
BP
1323 err = -ENODEV;
1324 if (!try_module_get(THIS_MODULE))
ed099e92 1325 goto err_unlock_rtnl;
d6569377 1326
d6569377
BP
1327 err = -ENOMEM;
1328 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1329 if (dp == NULL)
1330 goto err_put_module;
1331 INIT_LIST_HEAD(&dp->port_list);
d6569377
BP
1332
1333 /* Initialize kobject for bridge. This will be added as
1334 * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1335 dp->ifobj.kset = NULL;
1336 kobject_init(&dp->ifobj, &dp_ktype);
1337
1338 /* Allocate table. */
1339 err = -ENOMEM;
3544358a 1340 rcu_assign_pointer(dp->table, flow_tbl_alloc(TBL_MIN_BUCKETS));
d6569377
BP
1341 if (!dp->table)
1342 goto err_free_dp;
1343
1344 /* Set up our datapath device. */
df2c07f4
JP
1345 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1346 parms.type = OVS_VPORT_TYPE_INTERNAL;
d6569377
BP
1347 parms.options = NULL;
1348 parms.dp = dp;
df2c07f4 1349 parms.port_no = OVSP_LOCAL;
d6569377
BP
1350 vport = new_vport(&parms);
1351 if (IS_ERR(vport)) {
1352 err = PTR_ERR(vport);
1353 if (err == -EBUSY)
1354 err = -EEXIST;
1355
1356 goto err_destroy_table;
1357 }
254f2dc8 1358 dp->dp_ifindex = vport_get_ifindex(vport);
d6569377
BP
1359
1360 dp->drop_frags = 0;
1361 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1362 if (!dp->stats_percpu) {
1363 err = -ENOMEM;
1364 goto err_destroy_local_port;
1365 }
1366
1367 change_datapath(dp, a);
1368
df2c07f4 1369 reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_NEW);
aaff4b55
BP
1370 err = PTR_ERR(reply);
1371 if (IS_ERR(reply))
1372 goto err_destroy_local_port;
1373
254f2dc8 1374 list_add_tail(&dp->list_node, &dps);
d6569377
BP
1375 dp_sysfs_add_dp(dp);
1376
d6569377
BP
1377 rtnl_unlock();
1378
aaff4b55
BP
1379 genl_notify(reply, genl_info_net(info), info->snd_pid,
1380 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
d6569377
BP
1381 return 0;
1382
1383err_destroy_local_port:
df2c07f4 1384 dp_detach_port(get_vport_protected(dp, OVSP_LOCAL));
d6569377 1385err_destroy_table:
3544358a 1386 flow_tbl_destroy(get_table_protected(dp));
d6569377 1387err_free_dp:
d6569377
BP
1388 kfree(dp);
1389err_put_module:
1390 module_put(THIS_MODULE);
ed099e92 1391err_unlock_rtnl:
d6569377 1392 rtnl_unlock();
d6569377 1393err:
064af421
BP
1394 return err;
1395}
1396
df2c07f4 1397static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
44e05eca 1398{
ed099e92 1399 struct vport *vport, *next_vport;
aaff4b55 1400 struct sk_buff *reply;
9c52546b 1401 struct datapath *dp;
d6569377 1402 int err;
44e05eca 1403
df2c07f4 1404 err = ovs_dp_cmd_validate(info->attrs);
aaff4b55 1405 if (err)
d6569377 1406 goto exit;
44e05eca 1407
d6569377 1408 rtnl_lock();
aaff4b55 1409 dp = lookup_datapath(info->userhdr, info->attrs);
d6569377
BP
1410 err = PTR_ERR(dp);
1411 if (IS_ERR(dp))
aaff4b55
BP
1412 goto exit_unlock;
1413
df2c07f4 1414 reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_DEL);
aaff4b55
BP
1415 err = PTR_ERR(reply);
1416 if (IS_ERR(reply))
1417 goto exit_unlock;
9c52546b 1418
ed099e92 1419 list_for_each_entry_safe (vport, next_vport, &dp->port_list, node)
df2c07f4 1420 if (vport->port_no != OVSP_LOCAL)
ed099e92
BP
1421 dp_detach_port(vport);
1422
1423 dp_sysfs_del_dp(dp);
254f2dc8 1424 list_del(&dp->list_node);
df2c07f4 1425 dp_detach_port(get_vport_protected(dp, OVSP_LOCAL));
ed099e92 1426
99620d2c
JG
1427 /* rtnl_unlock() will wait until all the references to devices that
1428 * are pending unregistration have been dropped. We do it here to
1429 * ensure that any internal devices (which contain DP pointers) are
1430 * fully destroyed before freeing the datapath.
1431 */
1432 rtnl_unlock();
1433
ed099e92
BP
1434 call_rcu(&dp->rcu, destroy_dp_rcu);
1435 module_put(THIS_MODULE);
1436
aaff4b55
BP
1437 genl_notify(reply, genl_info_net(info), info->snd_pid,
1438 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
99620d2c
JG
1439
1440 return 0;
d6569377 1441
aaff4b55 1442exit_unlock:
d6569377
BP
1443 rtnl_unlock();
1444exit:
1445 return err;
44e05eca
BP
1446}
1447
df2c07f4 1448static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
064af421 1449{
aaff4b55 1450 struct sk_buff *reply;
d6569377 1451 struct datapath *dp;
d6569377 1452 int err;
064af421 1453
df2c07f4 1454 err = ovs_dp_cmd_validate(info->attrs);
aaff4b55
BP
1455 if (err)
1456 return err;
38c6ecbc 1457
aaff4b55 1458 dp = lookup_datapath(info->userhdr, info->attrs);
d6569377 1459 if (IS_ERR(dp))
aaff4b55 1460 return PTR_ERR(dp);
38c6ecbc 1461
aaff4b55 1462 change_datapath(dp, info->attrs);
38c6ecbc 1463
df2c07f4 1464 reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_NEW);
aaff4b55
BP
1465 if (IS_ERR(reply)) {
1466 err = PTR_ERR(reply);
1467 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1468 dp_datapath_multicast_group.id, err);
1469 return 0;
1470 }
1471
1472 genl_notify(reply, genl_info_net(info), info->snd_pid,
1473 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1474 return 0;
064af421
BP
1475}
1476
df2c07f4 1477static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1dcf111b 1478{
aaff4b55 1479 struct sk_buff *reply;
d6569377 1480 struct datapath *dp;
d6569377 1481 int err;
1dcf111b 1482
df2c07f4 1483 err = ovs_dp_cmd_validate(info->attrs);
aaff4b55
BP
1484 if (err)
1485 return err;
1dcf111b 1486
aaff4b55 1487 dp = lookup_datapath(info->userhdr, info->attrs);
d6569377 1488 if (IS_ERR(dp))
aaff4b55 1489 return PTR_ERR(dp);
1dcf111b 1490
df2c07f4 1491 reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_NEW);
aaff4b55
BP
1492 if (IS_ERR(reply))
1493 return PTR_ERR(reply);
1494
1495 return genlmsg_reply(reply, info);
1dcf111b
JP
1496}
1497
df2c07f4 1498static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
a7786963 1499{
254f2dc8
BP
1500 struct datapath *dp;
1501 int skip = cb->args[0];
1502 int i = 0;
a7786963 1503
254f2dc8
BP
1504 list_for_each_entry (dp, &dps, list_node) {
1505 if (i < skip)
d6569377 1506 continue;
df2c07f4 1507 if (ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
aaff4b55 1508 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1509 OVS_DP_CMD_NEW) < 0)
aaff4b55 1510 break;
254f2dc8 1511 i++;
a7786963 1512 }
aaff4b55 1513
254f2dc8
BP
1514 cb->args[0] = i;
1515
aaff4b55 1516 return skb->len;
c19e6535
BP
1517}
1518
aaff4b55 1519static struct genl_ops dp_datapath_genl_ops[] = {
df2c07f4 1520 { .cmd = OVS_DP_CMD_NEW,
aaff4b55
BP
1521 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1522 .policy = datapath_policy,
df2c07f4 1523 .doit = ovs_dp_cmd_new
aaff4b55 1524 },
df2c07f4 1525 { .cmd = OVS_DP_CMD_DEL,
aaff4b55
BP
1526 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1527 .policy = datapath_policy,
df2c07f4 1528 .doit = ovs_dp_cmd_del
aaff4b55 1529 },
df2c07f4 1530 { .cmd = OVS_DP_CMD_GET,
aaff4b55
BP
1531 .flags = 0, /* OK for unprivileged users. */
1532 .policy = datapath_policy,
df2c07f4
JP
1533 .doit = ovs_dp_cmd_get,
1534 .dumpit = ovs_dp_cmd_dump
aaff4b55 1535 },
df2c07f4 1536 { .cmd = OVS_DP_CMD_SET,
aaff4b55
BP
1537 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1538 .policy = datapath_policy,
df2c07f4 1539 .doit = ovs_dp_cmd_set,
aaff4b55
BP
1540 },
1541};
1542
df2c07f4 1543static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
f0fef760 1544#ifdef HAVE_NLA_NUL_STRING
df2c07f4
JP
1545 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1546 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1547 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1548 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct rtnl_link_stats64) },
1549 [OVS_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN },
f0fef760 1550#else
df2c07f4
JP
1551 [OVS_VPORT_ATTR_STATS] = { .minlen = sizeof(struct rtnl_link_stats64) },
1552 [OVS_VPORT_ATTR_ADDRESS] = { .minlen = ETH_ALEN },
f0fef760 1553#endif
df2c07f4 1554 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
c19e6535
BP
1555};
1556
f0fef760
BP
1557static struct genl_family dp_vport_genl_family = {
1558 .id = GENL_ID_GENERATE,
df2c07f4
JP
1559 .hdrsize = sizeof(struct ovs_header),
1560 .name = OVS_VPORT_FAMILY,
f0fef760 1561 .version = 1,
df2c07f4 1562 .maxattr = OVS_VPORT_ATTR_MAX
f0fef760
BP
1563};
1564
f14d8083 1565struct genl_multicast_group dp_vport_multicast_group = {
df2c07f4 1566 .name = OVS_VPORT_MCGROUP
f0fef760
BP
1567};
1568
1569/* Called with RTNL lock or RCU read lock. */
df2c07f4 1570static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
f0fef760 1571 u32 pid, u32 seq, u32 flags, u8 cmd)
064af421 1572{
df2c07f4 1573 struct ovs_header *ovs_header;
c19e6535 1574 struct nlattr *nla;
ff8d7a5e 1575 int ifindex;
c19e6535
BP
1576 int err;
1577
df2c07f4 1578 ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
f0fef760 1579 flags, cmd);
df2c07f4 1580 if (!ovs_header)
f0fef760 1581 return -EMSGSIZE;
c19e6535 1582
df2c07f4 1583 ovs_header->dp_ifindex = vport->dp->dp_ifindex;
c19e6535 1584
df2c07f4
JP
1585 NLA_PUT_U32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
1586 NLA_PUT_U32(skb, OVS_VPORT_ATTR_TYPE, vport_get_type(vport));
1587 NLA_PUT_STRING(skb, OVS_VPORT_ATTR_NAME, vport_get_name(vport));
c19e6535 1588
df2c07f4 1589 nla = nla_reserve(skb, OVS_VPORT_ATTR_STATS, sizeof(struct rtnl_link_stats64));
c19e6535
BP
1590 if (!nla)
1591 goto nla_put_failure;
1592 if (vport_get_stats(vport, nla_data(nla)))
1593 __skb_trim(skb, skb->len - nla->nla_len);
1594
df2c07f4 1595 NLA_PUT(skb, OVS_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
c19e6535 1596
c19e6535 1597 err = vport_get_options(vport, skb);
f0fef760
BP
1598 if (err == -EMSGSIZE)
1599 goto error;
c19e6535
BP
1600
1601 ifindex = vport_get_ifindex(vport);
1602 if (ifindex > 0)
df2c07f4 1603 NLA_PUT_U32(skb, OVS_VPORT_ATTR_IFINDEX, ifindex);
c19e6535 1604
df2c07f4 1605 return genlmsg_end(skb, ovs_header);
c19e6535
BP
1606
1607nla_put_failure:
1608 err = -EMSGSIZE;
f0fef760 1609error:
df2c07f4 1610 genlmsg_cancel(skb, ovs_header);
f0fef760 1611 return err;
064af421
BP
1612}
1613
f0fef760 1614/* Called with RTNL lock or RCU read lock. */
f14d8083
EJ
1615struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
1616 u32 seq, u8 cmd)
064af421 1617{
c19e6535 1618 struct sk_buff *skb;
f0fef760 1619 int retval;
c19e6535 1620
f0fef760 1621 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
c19e6535
BP
1622 if (!skb)
1623 return ERR_PTR(-ENOMEM);
1624
df2c07f4 1625 retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
f0fef760
BP
1626 if (retval < 0) {
1627 kfree_skb(skb);
1628 return ERR_PTR(retval);
1629 }
c19e6535 1630 return skb;
f0fef760 1631}
c19e6535 1632
df2c07f4 1633static int ovs_vport_cmd_validate(struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
f0fef760 1634{
df2c07f4 1635 return CHECK_NUL_STRING(a[OVS_VPORT_ATTR_NAME], IFNAMSIZ - 1);
c19e6535 1636}
51d4d598 1637
ed099e92 1638/* Called with RTNL lock or RCU read lock. */
df2c07f4
JP
1639static struct vport *lookup_vport(struct ovs_header *ovs_header,
1640 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
c19e6535
BP
1641{
1642 struct datapath *dp;
1643 struct vport *vport;
1644
df2c07f4
JP
1645 if (a[OVS_VPORT_ATTR_NAME]) {
1646 vport = vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME]));
ed099e92 1647 if (!vport)
c19e6535 1648 return ERR_PTR(-ENODEV);
c19e6535 1649 return vport;
df2c07f4
JP
1650 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1651 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
c19e6535
BP
1652
1653 if (port_no >= DP_MAX_PORTS)
f0fef760 1654 return ERR_PTR(-EFBIG);
c19e6535 1655
df2c07f4 1656 dp = get_dp(ovs_header->dp_ifindex);
c19e6535
BP
1657 if (!dp)
1658 return ERR_PTR(-ENODEV);
f2459fe7 1659
c19e6535 1660 vport = get_vport_protected(dp, port_no);
ed099e92 1661 if (!vport)
c19e6535 1662 return ERR_PTR(-ENOENT);
c19e6535
BP
1663 return vport;
1664 } else
1665 return ERR_PTR(-EINVAL);
064af421
BP
1666}
1667
ed099e92 1668/* Called with RTNL lock. */
df2c07f4 1669static int change_vport(struct vport *vport, struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
064af421 1670{
c19e6535 1671 int err = 0;
df2c07f4
JP
1672 if (a[OVS_VPORT_ATTR_STATS])
1673 err = vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
1674 if (!err && a[OVS_VPORT_ATTR_ADDRESS])
1675 err = vport_set_addr(vport, nla_data(a[OVS_VPORT_ATTR_ADDRESS]));
c19e6535
BP
1676 return err;
1677}
1678
df2c07f4 1679static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
c19e6535 1680{
f0fef760 1681 struct nlattr **a = info->attrs;
df2c07f4 1682 struct ovs_header *ovs_header = info->userhdr;
c19e6535 1683 struct vport_parms parms;
ed099e92 1684 struct sk_buff *reply;
c19e6535 1685 struct vport *vport;
c19e6535 1686 struct datapath *dp;
b0ec0f27 1687 u32 port_no;
c19e6535 1688 int err;
b0ec0f27 1689
c19e6535 1690 err = -EINVAL;
df2c07f4 1691 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE])
f0fef760
BP
1692 goto exit;
1693
df2c07f4 1694 err = ovs_vport_cmd_validate(a);
f0fef760
BP
1695 if (err)
1696 goto exit;
51d4d598 1697
c19e6535 1698 rtnl_lock();
df2c07f4 1699 dp = get_dp(ovs_header->dp_ifindex);
c19e6535
BP
1700 err = -ENODEV;
1701 if (!dp)
ed099e92 1702 goto exit_unlock;
c19e6535 1703
df2c07f4
JP
1704 if (a[OVS_VPORT_ATTR_PORT_NO]) {
1705 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
c19e6535
BP
1706
1707 err = -EFBIG;
1708 if (port_no >= DP_MAX_PORTS)
ed099e92 1709 goto exit_unlock;
c19e6535
BP
1710
1711 vport = get_vport_protected(dp, port_no);
1712 err = -EBUSY;
1713 if (vport)
ed099e92 1714 goto exit_unlock;
c19e6535
BP
1715 } else {
1716 for (port_no = 1; ; port_no++) {
1717 if (port_no >= DP_MAX_PORTS) {
1718 err = -EFBIG;
ed099e92 1719 goto exit_unlock;
c19e6535
BP
1720 }
1721 vport = get_vport_protected(dp, port_no);
1722 if (!vport)
1723 break;
51d4d598 1724 }
064af421 1725 }
b0ec0f27 1726
df2c07f4
JP
1727 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1728 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1729 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
c19e6535
BP
1730 parms.dp = dp;
1731 parms.port_no = port_no;
1732
1733 vport = new_vport(&parms);
1734 err = PTR_ERR(vport);
1735 if (IS_ERR(vport))
ed099e92 1736 goto exit_unlock;
c19e6535
BP
1737
1738 set_internal_devs_mtu(dp);
1739 dp_sysfs_add_if(vport);
1740
1741 err = change_vport(vport, a);
f0fef760 1742 if (!err) {
df2c07f4
JP
1743 reply = ovs_vport_cmd_build_info(vport, info->snd_pid,
1744 info->snd_seq, OVS_VPORT_CMD_NEW);
f0fef760
BP
1745 if (IS_ERR(reply))
1746 err = PTR_ERR(reply);
1747 }
c19e6535
BP
1748 if (err) {
1749 dp_detach_port(vport);
ed099e92 1750 goto exit_unlock;
c19e6535 1751 }
f0fef760
BP
1752 genl_notify(reply, genl_info_net(info), info->snd_pid,
1753 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
c19e6535 1754
c19e6535 1755
ed099e92 1756exit_unlock:
c19e6535 1757 rtnl_unlock();
c19e6535
BP
1758exit:
1759 return err;
44e05eca
BP
1760}
1761
df2c07f4 1762static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
44e05eca 1763{
f0fef760
BP
1764 struct nlattr **a = info->attrs;
1765 struct sk_buff *reply;
c19e6535 1766 struct vport *vport;
c19e6535 1767 int err;
44e05eca 1768
df2c07f4 1769 err = ovs_vport_cmd_validate(a);
f0fef760 1770 if (err)
c19e6535
BP
1771 goto exit;
1772
1773 rtnl_lock();
f0fef760 1774 vport = lookup_vport(info->userhdr, a);
c19e6535
BP
1775 err = PTR_ERR(vport);
1776 if (IS_ERR(vport))
f0fef760 1777 goto exit_unlock;
44e05eca 1778
c19e6535 1779 err = 0;
df2c07f4
JP
1780 if (a[OVS_VPORT_ATTR_OPTIONS])
1781 err = vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
c19e6535
BP
1782 if (!err)
1783 err = change_vport(vport, a);
1784
df2c07f4
JP
1785 reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1786 OVS_VPORT_CMD_NEW);
f0fef760
BP
1787 if (IS_ERR(reply)) {
1788 err = PTR_ERR(reply);
1789 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1790 dp_vport_multicast_group.id, err);
1791 return 0;
1792 }
1793
1794 genl_notify(reply, genl_info_net(info), info->snd_pid,
1795 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1796
1797exit_unlock:
c19e6535
BP
1798 rtnl_unlock();
1799exit:
1800 return err;
064af421
BP
1801}
1802
df2c07f4 1803static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1804{
f0fef760
BP
1805 struct nlattr **a = info->attrs;
1806 struct sk_buff *reply;
c19e6535 1807 struct vport *vport;
c19e6535
BP
1808 int err;
1809
df2c07f4 1810 err = ovs_vport_cmd_validate(a);
f0fef760 1811 if (err)
c19e6535
BP
1812 goto exit;
1813
1814 rtnl_lock();
f0fef760 1815 vport = lookup_vport(info->userhdr, a);
c19e6535 1816 err = PTR_ERR(vport);
f0fef760
BP
1817 if (IS_ERR(vport))
1818 goto exit_unlock;
c19e6535 1819
df2c07f4 1820 if (vport->port_no == OVSP_LOCAL) {
f0fef760
BP
1821 err = -EINVAL;
1822 goto exit_unlock;
1823 }
1824
df2c07f4
JP
1825 reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1826 OVS_VPORT_CMD_DEL);
f0fef760
BP
1827 err = PTR_ERR(reply);
1828 if (IS_ERR(reply))
1829 goto exit_unlock;
1830
3544358a 1831 dp_detach_port(vport);
f0fef760
BP
1832
1833 genl_notify(reply, genl_info_net(info), info->snd_pid,
1834 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1835
1836exit_unlock:
c19e6535
BP
1837 rtnl_unlock();
1838exit:
1839 return err;
7c40efc9
BP
1840}
1841
df2c07f4 1842static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1843{
f0fef760 1844 struct nlattr **a = info->attrs;
df2c07f4 1845 struct ovs_header *ovs_header = info->userhdr;
ed099e92 1846 struct sk_buff *reply;
c19e6535 1847 struct vport *vport;
c19e6535
BP
1848 int err;
1849
df2c07f4 1850 err = ovs_vport_cmd_validate(a);
f0fef760
BP
1851 if (err)
1852 goto exit;
c19e6535 1853
ed099e92 1854 rcu_read_lock();
df2c07f4 1855 vport = lookup_vport(ovs_header, a);
c19e6535
BP
1856 err = PTR_ERR(vport);
1857 if (IS_ERR(vport))
f0fef760 1858 goto exit_unlock;
c19e6535 1859
df2c07f4
JP
1860 reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1861 OVS_VPORT_CMD_NEW);
ed099e92
BP
1862 err = PTR_ERR(reply);
1863 if (IS_ERR(reply))
f0fef760 1864 goto exit_unlock;
ed099e92 1865
df2fa9b5
JG
1866 rcu_read_unlock();
1867
1868 return genlmsg_reply(reply, info);
ed099e92 1869
f0fef760 1870exit_unlock:
ed099e92 1871 rcu_read_unlock();
f0fef760 1872exit:
c19e6535
BP
1873 return err;
1874}
1875
df2c07f4 1876static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
c19e6535 1877{
df2c07f4 1878 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
c19e6535
BP
1879 struct datapath *dp;
1880 u32 port_no;
f0fef760 1881 int retval;
c19e6535 1882
df2c07f4 1883 dp = get_dp(ovs_header->dp_ifindex);
c19e6535 1884 if (!dp)
f0fef760 1885 return -ENODEV;
ed099e92
BP
1886
1887 rcu_read_lock();
f0fef760 1888 for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
ed099e92 1889 struct vport *vport;
ed099e92
BP
1890
1891 vport = get_vport_protected(dp, port_no);
1892 if (!vport)
1893 continue;
1894
df2c07f4 1895 if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
f0fef760 1896 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1897 OVS_VPORT_CMD_NEW) < 0)
f0fef760 1898 break;
c19e6535 1899 }
ed099e92 1900 rcu_read_unlock();
c19e6535 1901
f0fef760
BP
1902 cb->args[0] = port_no;
1903 retval = skb->len;
1904
1905 return retval;
7c40efc9
BP
1906}
1907
f0fef760 1908static struct genl_ops dp_vport_genl_ops[] = {
df2c07f4 1909 { .cmd = OVS_VPORT_CMD_NEW,
f0fef760
BP
1910 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1911 .policy = vport_policy,
df2c07f4 1912 .doit = ovs_vport_cmd_new
f0fef760 1913 },
df2c07f4 1914 { .cmd = OVS_VPORT_CMD_DEL,
f0fef760
BP
1915 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1916 .policy = vport_policy,
df2c07f4 1917 .doit = ovs_vport_cmd_del
f0fef760 1918 },
df2c07f4 1919 { .cmd = OVS_VPORT_CMD_GET,
f0fef760
BP
1920 .flags = 0, /* OK for unprivileged users. */
1921 .policy = vport_policy,
df2c07f4
JP
1922 .doit = ovs_vport_cmd_get,
1923 .dumpit = ovs_vport_cmd_dump
f0fef760 1924 },
df2c07f4 1925 { .cmd = OVS_VPORT_CMD_SET,
f0fef760
BP
1926 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1927 .policy = vport_policy,
df2c07f4 1928 .doit = ovs_vport_cmd_set,
f0fef760
BP
1929 },
1930};
1931
982b8810
BP
1932struct genl_family_and_ops {
1933 struct genl_family *family;
1934 struct genl_ops *ops;
1935 int n_ops;
1936 struct genl_multicast_group *group;
1937};
ed099e92 1938
982b8810 1939static const struct genl_family_and_ops dp_genl_families[] = {
aaff4b55
BP
1940 { &dp_datapath_genl_family,
1941 dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1942 &dp_datapath_multicast_group },
f0fef760
BP
1943 { &dp_vport_genl_family,
1944 dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1945 &dp_vport_multicast_group },
37a1300c
BP
1946 { &dp_flow_genl_family,
1947 dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1948 &dp_flow_multicast_group },
982b8810
BP
1949 { &dp_packet_genl_family,
1950 dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1951 NULL },
1952};
ed099e92 1953
982b8810
BP
1954static void dp_unregister_genl(int n_families)
1955{
1956 int i;
ed099e92 1957
b867ca75 1958 for (i = 0; i < n_families; i++)
982b8810 1959 genl_unregister_family(dp_genl_families[i].family);
ed099e92
BP
1960}
1961
982b8810 1962static int dp_register_genl(void)
064af421 1963{
982b8810
BP
1964 int n_registered;
1965 int err;
1966 int i;
064af421 1967
982b8810
BP
1968 n_registered = 0;
1969 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
1970 const struct genl_family_and_ops *f = &dp_genl_families[i];
064af421 1971
982b8810
BP
1972 err = genl_register_family_with_ops(f->family, f->ops,
1973 f->n_ops);
1974 if (err)
1975 goto error;
1976 n_registered++;
e22d4953 1977
982b8810
BP
1978 if (f->group) {
1979 err = genl_register_mc_group(f->family, f->group);
1980 if (err)
1981 goto error;
1982 }
1983 }
9cc8b4e4 1984
982b8810
BP
1985 err = packet_register_mc_groups();
1986 if (err)
1987 goto error;
1988 return 0;
064af421
BP
1989
1990error:
982b8810
BP
1991 dp_unregister_genl(n_registered);
1992 return err;
064af421
BP
1993}
1994
22d24ebf
BP
1995static int __init dp_init(void)
1996{
f2459fe7 1997 struct sk_buff *dummy_skb;
22d24ebf
BP
1998 int err;
1999
f2459fe7 2000 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
22d24ebf 2001
f2459fe7 2002 printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
064af421 2003
3544358a 2004 err = tnl_init();
064af421
BP
2005 if (err)
2006 goto error;
2007
3544358a
PS
2008 err = flow_init();
2009 if (err)
2010 goto error_tnl_exit;
2011
f2459fe7 2012 err = vport_init();
064af421
BP
2013 if (err)
2014 goto error_flow_exit;
2015
f2459fe7
JG
2016 err = register_netdevice_notifier(&dp_device_notifier);
2017 if (err)
2018 goto error_vport_exit;
2019
982b8810
BP
2020 err = dp_register_genl();
2021 if (err < 0)
37a1300c 2022 goto error_unreg_notifier;
982b8810 2023
064af421
BP
2024 return 0;
2025
2026error_unreg_notifier:
2027 unregister_netdevice_notifier(&dp_device_notifier);
f2459fe7
JG
2028error_vport_exit:
2029 vport_exit();
064af421
BP
2030error_flow_exit:
2031 flow_exit();
3544358a
PS
2032error_tnl_exit:
2033 tnl_exit();
064af421
BP
2034error:
2035 return err;
2036}
2037
2038static void dp_cleanup(void)
2039{
2040 rcu_barrier();
982b8810 2041 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
064af421 2042 unregister_netdevice_notifier(&dp_device_notifier);
f2459fe7 2043 vport_exit();
064af421 2044 flow_exit();
3544358a 2045 tnl_exit();
064af421
BP
2046}
2047
2048module_init(dp_init);
2049module_exit(dp_cleanup);
2050
2051MODULE_DESCRIPTION("Open vSwitch switching datapath");
2052MODULE_LICENSE("GPL");