]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/datapath.c
datapath: Remove unneeded ovs_netdev_get_ifindex()
[mirror_ovs.git] / datapath / datapath.c
CommitLineData
064af421 1/*
e0edde6f 2 * Copyright (c) 2007-2012 Nicira, Inc.
a14bc59f 3 *
a9a29d22
JG
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
064af421
BP
17 */
18
dfffaef1
JP
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
064af421
BP
21#include <linux/init.h>
22#include <linux/module.h>
064af421 23#include <linux/if_arp.h>
064af421
BP
24#include <linux/if_vlan.h>
25#include <linux/in.h>
26#include <linux/ip.h>
982b8810 27#include <linux/jhash.h>
064af421
BP
28#include <linux/delay.h>
29#include <linux/time.h>
30#include <linux/etherdevice.h>
ed099e92 31#include <linux/genetlink.h>
064af421
BP
32#include <linux/kernel.h>
33#include <linux/kthread.h>
064af421
BP
34#include <linux/mutex.h>
35#include <linux/percpu.h>
36#include <linux/rcupdate.h>
37#include <linux/tcp.h>
38#include <linux/udp.h>
39#include <linux/version.h>
40#include <linux/ethtool.h>
064af421 41#include <linux/wait.h>
064af421 42#include <asm/div64.h>
656a0e37 43#include <linux/highmem.h>
064af421
BP
44#include <linux/netfilter_bridge.h>
45#include <linux/netfilter_ipv4.h>
46#include <linux/inetdevice.h>
47#include <linux/list.h>
077257b8 48#include <linux/openvswitch.h>
064af421 49#include <linux/rculist.h>
064af421 50#include <linux/dmi.h>
cd2a59e9
PS
51#include <linux/genetlink.h>
52#include <net/genetlink.h>
36956a7d 53#include <net/genetlink.h>
2a4999f3
PS
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
064af421 56
dd8d6b8c 57#include "checksum.h"
064af421 58#include "datapath.h"
064af421 59#include "flow.h"
303708cc 60#include "vlan.h"
3544358a 61#include "tunnel.h"
f2459fe7 62#include "vport-internal_dev.h"
d5de5b0d 63#include "vport-netdev.h"
064af421 64
4cf41591 65#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
64807dfb
JP
66 LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0)
67#error Kernels before 2.6.18 or after 3.8 are not supported by this version of Open vSwitch.
4cf41591
JG
68#endif
69
acd051f1
PS
70#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
71static void rehash_flow_table(struct work_struct *work);
72static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
73
2a4999f3
PS
74int ovs_net_id __read_mostly;
75
e297c6b7
TG
76static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
77 struct genl_multicast_group *grp)
78{
79 genl_notify(skb, genl_info_net(info), info->snd_portid,
80 grp->id, info->nlhdr, GFP_KERNEL);
81}
82
ed099e92
BP
83/**
84 * DOC: Locking:
064af421 85 *
cd2a59e9
PS
86 * All writes e.g. Writes to device state (add/remove datapath, port, set
87 * operations on vports, etc.), Writes to other state (flow table
88 * modifications, set miscellaneous datapath parameters, etc.) are protected
89 * by ovs_lock.
ed099e92
BP
90 *
91 * Reads are protected by RCU.
92 *
93 * There are a few special cases (mostly stats) that have their own
94 * synchronization but they nest under all of above and don't interact with
95 * each other.
cd2a59e9
PS
96 *
97 * The RTNL lock nests inside ovs_mutex.
064af421 98 */
ed099e92 99
cd2a59e9
PS
100static DEFINE_MUTEX(ovs_mutex);
101
102void ovs_lock(void)
103{
104 mutex_lock(&ovs_mutex);
105}
106
107void ovs_unlock(void)
108{
109 mutex_unlock(&ovs_mutex);
110}
111
112#ifdef CONFIG_LOCKDEP
113int lockdep_ovsl_is_held(void)
114{
115 if (debug_locks)
116 return lockdep_is_held(&ovs_mutex);
117 else
118 return 1;
119}
120#endif
121
c19e6535 122static struct vport *new_vport(const struct vport_parms *);
2a4999f3 123static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
7257b535 124 const struct dp_upcall_info *);
2a4999f3
PS
125static int queue_userspace_packet(struct net *, int dp_ifindex,
126 struct sk_buff *,
7257b535 127 const struct dp_upcall_info *);
064af421 128
cd2a59e9 129/* Must be called with rcu_read_lock or ovs_mutex. */
2a4999f3 130static struct datapath *get_dp(struct net *net, int dp_ifindex)
064af421 131{
254f2dc8
BP
132 struct datapath *dp = NULL;
133 struct net_device *dev;
ed099e92 134
254f2dc8 135 rcu_read_lock();
2a4999f3 136 dev = dev_get_by_index_rcu(net, dp_ifindex);
254f2dc8 137 if (dev) {
850b6b3b 138 struct vport *vport = ovs_internal_dev_get_vport(dev);
254f2dc8
BP
139 if (vport)
140 dp = vport->dp;
141 }
142 rcu_read_unlock();
143
144 return dp;
064af421 145}
064af421 146
cd2a59e9 147/* Must be called with rcu_read_lock or ovs_mutex. */
850b6b3b 148const char *ovs_dp_name(const struct datapath *dp)
f2459fe7 149{
cd2a59e9 150 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
16b82e84 151 return vport->ops->get_name(vport);
f2459fe7
JG
152}
153
99769a40
JG
154static int get_dpifindex(struct datapath *dp)
155{
156 struct vport *local;
157 int ifindex;
158
159 rcu_read_lock();
160
95b1d73a 161 local = ovs_vport_rcu(dp, OVSP_LOCAL);
99769a40 162 if (local)
d5de5b0d 163 ifindex = netdev_vport_priv(local)->dev->ifindex;
99769a40
JG
164 else
165 ifindex = 0;
166
167 rcu_read_unlock();
168
169 return ifindex;
170}
171
46c6a11d
JG
172static void destroy_dp_rcu(struct rcu_head *rcu)
173{
174 struct datapath *dp = container_of(rcu, struct datapath, rcu);
46c6a11d 175
850b6b3b 176 ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
46c6a11d 177 free_percpu(dp->stats_percpu);
2a4999f3 178 release_net(ovs_dp_get_net(dp));
95b1d73a 179 kfree(dp->ports);
5ca1ba48 180 kfree(dp);
46c6a11d
JG
181}
182
95b1d73a
PS
183static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
184 u16 port_no)
185{
186 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
187}
188
189struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
190{
191 struct vport *vport;
95b1d73a
PS
192 struct hlist_head *head;
193
194 head = vport_hash_bucket(dp, port_no);
f8dfbcb7 195 hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
95b1d73a
PS
196 if (vport->port_no == port_no)
197 return vport;
198 }
199 return NULL;
200}
201
cd2a59e9 202/* Called with ovs_mutex. */
c19e6535 203static struct vport *new_vport(const struct vport_parms *parms)
064af421 204{
f2459fe7 205 struct vport *vport;
f2459fe7 206
850b6b3b 207 vport = ovs_vport_add(parms);
c19e6535
BP
208 if (!IS_ERR(vport)) {
209 struct datapath *dp = parms->dp;
95b1d73a 210 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
064af421 211
95b1d73a 212 hlist_add_head_rcu(&vport->dp_hash_node, head);
c19e6535 213 }
c19e6535 214 return vport;
064af421
BP
215}
216
850b6b3b 217void ovs_dp_detach_port(struct vport *p)
064af421 218{
cd2a59e9 219 ASSERT_OVSL();
064af421 220
064af421 221 /* First drop references to device. */
95b1d73a 222 hlist_del_rcu(&p->dp_hash_node);
f2459fe7 223
7237e4f4 224 /* Then destroy it. */
850b6b3b 225 ovs_vport_del(p);
064af421
BP
226}
227
8819fac7 228/* Must be called with rcu_read_lock. */
850b6b3b 229void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
064af421
BP
230{
231 struct datapath *dp = p->dp;
3544358a 232 struct sw_flow *flow;
064af421 233 struct dp_stats_percpu *stats;
52a23d92 234 struct sw_flow_key key;
e9141eec 235 u64 *stats_counter;
4c1ad233 236 int error;
52a23d92 237 int key_len;
064af421 238
70dbc259 239 stats = this_cpu_ptr(dp->stats_percpu);
a063b0df 240
52a23d92
JG
241 /* Extract flow from 'skb' into 'key'. */
242 error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
243 if (unlikely(error)) {
244 kfree_skb(skb);
245 return;
55574bb0
BP
246 }
247
52a23d92
JG
248 /* Look up flow. */
249 flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
250 if (unlikely(!flow)) {
251 struct dp_upcall_info upcall;
252
253 upcall.cmd = OVS_PACKET_CMD_MISS;
254 upcall.key = &key;
255 upcall.userdata = NULL;
256 upcall.portid = p->upcall_portid;
257 ovs_dp_upcall(dp, skb, &upcall);
258 consume_skb(skb);
259 stats_counter = &stats->n_missed;
260 goto out;
261 }
262
263 OVS_CB(skb)->flow = flow;
264
e9141eec 265 stats_counter = &stats->n_hit;
850b6b3b
JG
266 ovs_flow_used(OVS_CB(skb)->flow, skb);
267 ovs_execute_actions(dp, skb);
55574bb0 268
8819fac7 269out:
55574bb0 270 /* Update datapath statistics. */
821cb9fa 271 u64_stats_update_begin(&stats->sync);
e9141eec 272 (*stats_counter)++;
821cb9fa 273 u64_stats_update_end(&stats->sync);
064af421
BP
274}
275
aa5a8fdc
JG
276static struct genl_family dp_packet_genl_family = {
277 .id = GENL_ID_GENERATE,
df2c07f4
JP
278 .hdrsize = sizeof(struct ovs_header),
279 .name = OVS_PACKET_FAMILY,
69685a88 280 .version = OVS_PACKET_VERSION,
2a4999f3
PS
281 .maxattr = OVS_PACKET_ATTR_MAX,
282 SET_NETNSOK
aa5a8fdc
JG
283};
284
850b6b3b
JG
285int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
286 const struct dp_upcall_info *upcall_info)
aa5a8fdc
JG
287{
288 struct dp_stats_percpu *stats;
7257b535 289 int dp_ifindex;
aa5a8fdc
JG
290 int err;
291
28aea917 292 if (upcall_info->portid == 0) {
b063d9f0 293 err = -ENOTCONN;
b063d9f0
JG
294 goto err;
295 }
296
7257b535
BP
297 dp_ifindex = get_dpifindex(dp);
298 if (!dp_ifindex) {
299 err = -ENODEV;
300 goto err;
aa5a8fdc
JG
301 }
302
7257b535 303 forward_ip_summed(skb, true);
36ce148c 304
7257b535 305 if (!skb_is_gso(skb))
2a4999f3 306 err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
7257b535 307 else
2a4999f3 308 err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
d76195db
JG
309 if (err)
310 goto err;
311
312 return 0;
aa5a8fdc 313
aa5a8fdc 314err:
70dbc259 315 stats = this_cpu_ptr(dp->stats_percpu);
aa5a8fdc 316
821cb9fa 317 u64_stats_update_begin(&stats->sync);
aa5a8fdc 318 stats->n_lost++;
821cb9fa 319 u64_stats_update_end(&stats->sync);
aa5a8fdc 320
aa5a8fdc 321 return err;
982b8810
BP
322}
323
2a4999f3
PS
324static int queue_gso_packets(struct net *net, int dp_ifindex,
325 struct sk_buff *skb,
7257b535 326 const struct dp_upcall_info *upcall_info)
cb5087ca 327{
d4cba1f8 328 unsigned short gso_type = skb_shinfo(skb)->gso_type;
7257b535
BP
329 struct dp_upcall_info later_info;
330 struct sw_flow_key later_key;
331 struct sk_buff *segs, *nskb;
332 int err;
cb5087ca 333
0aa52d88 334 segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false);
79089764
PS
335 if (IS_ERR(segs))
336 return PTR_ERR(segs);
99769a40 337
7257b535
BP
338 /* Queue all of the segments. */
339 skb = segs;
cb5087ca 340 do {
2a4999f3 341 err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
982b8810 342 if (err)
7257b535 343 break;
856081f6 344
d4cba1f8 345 if (skb == segs && gso_type & SKB_GSO_UDP) {
e1cf87ff
JG
346 /* The initial flow key extracted by ovs_flow_extract()
347 * in this case is for a first fragment, so we need to
7257b535
BP
348 * properly mark later fragments.
349 */
350 later_key = *upcall_info->key;
9e44d715 351 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
7257b535
BP
352
353 later_info = *upcall_info;
354 later_info.key = &later_key;
355 upcall_info = &later_info;
356 }
36ce148c 357 } while ((skb = skb->next));
cb5087ca 358
7257b535
BP
359 /* Free all of the segments. */
360 skb = segs;
361 do {
362 nskb = skb->next;
363 if (err)
364 kfree_skb(skb);
365 else
366 consume_skb(skb);
367 } while ((skb = nskb));
368 return err;
369}
370
0afa2373
TG
371static size_t key_attr_size(void)
372{
373 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
374 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
375 + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
376 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
377 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
378 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
379 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
380 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
381 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
382 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
383 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
384 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
385 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
386 + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */
387 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
388 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
389 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
390 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
391 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
392}
393
394static size_t upcall_msg_size(const struct sk_buff *skb,
395 const struct nlattr *userdata)
396{
397 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
398 + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */
399 + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
400
401 /* OVS_PACKET_ATTR_USERDATA */
402 if (userdata)
403 size += NLA_ALIGN(userdata->nla_len);
404
405 return size;
406}
407
2a4999f3
PS
408static int queue_userspace_packet(struct net *net, int dp_ifindex,
409 struct sk_buff *skb,
7257b535
BP
410 const struct dp_upcall_info *upcall_info)
411{
412 struct ovs_header *upcall;
6161d3fd 413 struct sk_buff *nskb = NULL;
7257b535
BP
414 struct sk_buff *user_skb; /* to be queued to userspace */
415 struct nlattr *nla;
7257b535
BP
416 int err;
417
6161d3fd
JG
418 if (vlan_tx_tag_present(skb)) {
419 nskb = skb_clone(skb, GFP_ATOMIC);
420 if (!nskb)
421 return -ENOMEM;
422
423 err = vlan_deaccel_tag(nskb);
424 if (err)
425 return err;
7257b535 426
6161d3fd
JG
427 skb = nskb;
428 }
429
430 if (nla_attr_size(skb->len) > USHRT_MAX) {
431 err = -EFBIG;
432 goto out;
433 }
7257b535 434
0afa2373 435 user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC);
6161d3fd
JG
436 if (!user_skb) {
437 err = -ENOMEM;
438 goto out;
439 }
7257b535
BP
440
441 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
442 0, upcall_info->cmd);
443 upcall->dp_ifindex = dp_ifindex;
444
445 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
850b6b3b 446 ovs_flow_to_nlattrs(upcall_info->key, user_skb);
7257b535
BP
447 nla_nest_end(user_skb, nla);
448
449 if (upcall_info->userdata)
e995e3df 450 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
462a988b 451 nla_len(upcall_info->userdata),
e995e3df 452 nla_data(upcall_info->userdata));
7257b535
BP
453
454 nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
bed53bd1
PS
455
456 skb_copy_and_csum_dev(skb, nla_data(nla));
7257b535 457
c39b1a5c 458 genlmsg_end(user_skb, upcall);
28aea917 459 err = genlmsg_unicast(net, user_skb, upcall_info->portid);
6161d3fd
JG
460
461out:
462 kfree_skb(nskb);
463 return err;
cb5087ca
BP
464}
465
cd2a59e9 466/* Called with ovs_mutex. */
2a4999f3 467static int flush_flows(struct datapath *dp)
064af421 468{
3544358a
PS
469 struct flow_table *old_table;
470 struct flow_table *new_table;
8d5ebd83 471
cd2a59e9 472 old_table = ovsl_dereference(dp->table);
850b6b3b 473 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
8d5ebd83 474 if (!new_table)
ed099e92 475 return -ENOMEM;
8d5ebd83
JG
476
477 rcu_assign_pointer(dp->table, new_table);
478
850b6b3b 479 ovs_flow_tbl_deferred_destroy(old_table);
ed099e92 480 return 0;
064af421
BP
481}
482
9b405f1a
PS
483static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len)
484{
485
486 struct sw_flow_actions *acts;
487 int new_acts_size;
488 int req_size = NLA_ALIGN(attr_len);
489 int next_offset = offsetof(struct sw_flow_actions, actions) +
490 (*sfa)->actions_len;
491
ba400435 492 if (req_size <= (ksize(*sfa) - next_offset))
9b405f1a
PS
493 goto out;
494
ba400435 495 new_acts_size = ksize(*sfa) * 2;
9b405f1a
PS
496
497 if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
498 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
499 return ERR_PTR(-EMSGSIZE);
500 new_acts_size = MAX_ACTIONS_BUFSIZE;
501 }
502
503 acts = ovs_flow_actions_alloc(new_acts_size);
504 if (IS_ERR(acts))
505 return (void *)acts;
506
507 memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
508 acts->actions_len = (*sfa)->actions_len;
ba400435 509 kfree(*sfa);
9b405f1a
PS
510 *sfa = acts;
511
512out:
513 (*sfa)->actions_len += req_size;
514 return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
515}
516
517static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
518{
519 struct nlattr *a;
520
521 a = reserve_sfa_size(sfa, nla_attr_size(len));
522 if (IS_ERR(a))
523 return PTR_ERR(a);
524
525 a->nla_type = attrtype;
526 a->nla_len = nla_attr_size(len);
527
528 if (data)
529 memcpy(nla_data(a), data, len);
530 memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
6ff686f2 531
9b405f1a
PS
532 return 0;
533}
534
535static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype)
536{
537 int used = (*sfa)->actions_len;
538 int err;
539
540 err = add_action(sfa, attrtype, NULL, 0);
541 if (err)
542 return err;
543
544 return used;
545}
546
547static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset)
548{
549 struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset);
550
551 a->nla_len = sfa->actions_len - st_offset;
552}
553
554static int validate_and_copy_actions(const struct nlattr *attr,
555 const struct sw_flow_key *key, int depth,
556 struct sw_flow_actions **sfa);
557
558static int validate_and_copy_sample(const struct nlattr *attr,
559 const struct sw_flow_key *key, int depth,
560 struct sw_flow_actions **sfa)
6ff686f2 561{
4be00e48
BP
562 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
563 const struct nlattr *probability, *actions;
564 const struct nlattr *a;
9b405f1a 565 int rem, start, err, st_acts;
4be00e48
BP
566
567 memset(attrs, 0, sizeof(attrs));
6455100f 568 nla_for_each_nested(a, attr, rem) {
4be00e48
BP
569 int type = nla_type(a);
570 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
571 return -EINVAL;
572 attrs[type] = a;
573 }
574 if (rem)
6ff686f2 575 return -EINVAL;
4be00e48
BP
576
577 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
578 if (!probability || nla_len(probability) != sizeof(u32))
6ff686f2
PS
579 return -EINVAL;
580
4be00e48
BP
581 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
582 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
583 return -EINVAL;
9b405f1a
PS
584
585 /* validation done, copy sample action. */
586 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
587 if (start < 0)
588 return start;
589 err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32));
590 if (err)
591 return err;
592 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
593 if (st_acts < 0)
594 return st_acts;
595
596 err = validate_and_copy_actions(actions, key, depth + 1, sfa);
597 if (err)
598 return err;
599
600 add_nested_action_end(*sfa, st_acts);
601 add_nested_action_end(*sfa, start);
602
603 return 0;
4edb9ae9
PS
604}
605
b1323f59
PS
606static int validate_tp_port(const struct sw_flow_key *flow_key)
607{
608 if (flow_key->eth.type == htons(ETH_P_IP)) {
6e9bea4d 609 if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
b1323f59
PS
610 return 0;
611 } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
6e9bea4d 612 if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
b1323f59
PS
613 return 0;
614 }
615
616 return -EINVAL;
617}
618
9b405f1a
PS
619static int validate_and_copy_set_tun(const struct nlattr *attr,
620 struct sw_flow_actions **sfa)
621{
622 struct ovs_key_ipv4_tunnel tun_key;
623 int err, start;
624
625 err = ipv4_tun_from_nlattr(nla_data(attr), &tun_key);
626 if (err)
627 return err;
628
629 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
630 if (start < 0)
631 return start;
632
633 err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key));
634 add_nested_action_end(*sfa, start);
635
636 return err;
637}
638
fea393b1 639static int validate_set(const struct nlattr *a,
9b405f1a
PS
640 const struct sw_flow_key *flow_key,
641 struct sw_flow_actions **sfa,
642 bool *set_tun)
4edb9ae9 643{
4edb9ae9
PS
644 const struct nlattr *ovs_key = nla_data(a);
645 int key_type = nla_type(ovs_key);
646
647 /* There can be only one key in a action */
648 if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
649 return -EINVAL;
650
651 if (key_type > OVS_KEY_ATTR_MAX ||
9b405f1a
PS
652 (ovs_key_lens[key_type] != nla_len(ovs_key) &&
653 ovs_key_lens[key_type] != -1))
4edb9ae9
PS
654 return -EINVAL;
655
fea393b1 656 switch (key_type) {
4edb9ae9 657 const struct ovs_key_ipv4 *ipv4_key;
bc7a5acd 658 const struct ovs_key_ipv6 *ipv6_key;
9b405f1a 659 int err;
4edb9ae9 660
fea393b1 661 case OVS_KEY_ATTR_PRIORITY:
fea393b1 662 case OVS_KEY_ATTR_ETHERNET:
4edb9ae9
PS
663 break;
664
72e8bf28
AA
665 case OVS_KEY_ATTR_SKB_MARK:
666#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) && !defined(CONFIG_NETFILTER)
667 if (nla_get_u32(ovs_key) != 0)
668 return -EINVAL;
669#endif
670 break;
671
9b405f1a
PS
672 case OVS_KEY_ATTR_TUNNEL:
673 *set_tun = true;
674 err = validate_and_copy_set_tun(a, sfa);
675 if (err)
676 return err;
356af50b
KM
677 break;
678
fea393b1 679 case OVS_KEY_ATTR_IPV4:
4edb9ae9
PS
680 if (flow_key->eth.type != htons(ETH_P_IP))
681 return -EINVAL;
682
6e9bea4d 683 if (!flow_key->ip.proto)
4edb9ae9
PS
684 return -EINVAL;
685
686 ipv4_key = nla_data(ovs_key);
687 if (ipv4_key->ipv4_proto != flow_key->ip.proto)
688 return -EINVAL;
689
9e44d715 690 if (ipv4_key->ipv4_frag != flow_key->ip.frag)
7257b535
BP
691 return -EINVAL;
692
4edb9ae9
PS
693 break;
694
bc7a5acd
AA
695 case OVS_KEY_ATTR_IPV6:
696 if (flow_key->eth.type != htons(ETH_P_IPV6))
697 return -EINVAL;
698
699 if (!flow_key->ip.proto)
700 return -EINVAL;
701
702 ipv6_key = nla_data(ovs_key);
703 if (ipv6_key->ipv6_proto != flow_key->ip.proto)
704 return -EINVAL;
705
706 if (ipv6_key->ipv6_frag != flow_key->ip.frag)
707 return -EINVAL;
708
709 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
710 return -EINVAL;
711
712 break;
713
fea393b1 714 case OVS_KEY_ATTR_TCP:
4edb9ae9
PS
715 if (flow_key->ip.proto != IPPROTO_TCP)
716 return -EINVAL;
717
b1323f59 718 return validate_tp_port(flow_key);
4edb9ae9 719
fea393b1 720 case OVS_KEY_ATTR_UDP:
4edb9ae9
PS
721 if (flow_key->ip.proto != IPPROTO_UDP)
722 return -EINVAL;
723
b1323f59 724 return validate_tp_port(flow_key);
4edb9ae9
PS
725
726 default:
727 return -EINVAL;
728 }
fea393b1 729
4edb9ae9 730 return 0;
6ff686f2
PS
731}
732
98403001
BP
733static int validate_userspace(const struct nlattr *attr)
734{
6455100f 735 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
98403001 736 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
e995e3df 737 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
98403001
BP
738 };
739 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
740 int error;
741
6455100f
PS
742 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
743 attr, userspace_policy);
98403001
BP
744 if (error)
745 return error;
746
6455100f
PS
747 if (!a[OVS_USERSPACE_ATTR_PID] ||
748 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
98403001
BP
749 return -EINVAL;
750
751 return 0;
752}
753
9b405f1a
PS
754static int copy_action(const struct nlattr *from,
755 struct sw_flow_actions **sfa)
756{
757 int totlen = NLA_ALIGN(from->nla_len);
758 struct nlattr *to;
759
760 to = reserve_sfa_size(sfa, from->nla_len);
761 if (IS_ERR(to))
762 return PTR_ERR(to);
763
764 memcpy(to, from, totlen);
765 return 0;
766}
767
768static int validate_and_copy_actions(const struct nlattr *attr,
769 const struct sw_flow_key *key,
770 int depth,
771 struct sw_flow_actions **sfa)
064af421 772{
23cad98c 773 const struct nlattr *a;
6ff686f2
PS
774 int rem, err;
775
776 if (depth >= SAMPLE_ACTION_DEPTH)
777 return -EOVERFLOW;
23cad98c 778
37a1300c 779 nla_for_each_nested(a, attr, rem) {
98403001 780 /* Expected argument lengths, (u32)-1 for variable length. */
df2c07f4 781 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
fea393b1 782 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
98403001 783 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
fea393b1
BP
784 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
785 [OVS_ACTION_ATTR_POP_VLAN] = 0,
4edb9ae9 786 [OVS_ACTION_ATTR_SET] = (u32)-1,
98403001 787 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
23cad98c 788 };
fea393b1 789 const struct ovs_action_push_vlan *vlan;
23cad98c 790 int type = nla_type(a);
9b405f1a 791 bool skip_copy;
23cad98c 792
6ff686f2 793 if (type > OVS_ACTION_ATTR_MAX ||
98403001
BP
794 (action_lens[type] != nla_len(a) &&
795 action_lens[type] != (u32)-1))
23cad98c
BP
796 return -EINVAL;
797
9b405f1a 798 skip_copy = false;
23cad98c 799 switch (type) {
df2c07f4 800 case OVS_ACTION_ATTR_UNSPEC:
cdee00fd 801 return -EINVAL;
064af421 802
98403001
BP
803 case OVS_ACTION_ATTR_USERSPACE:
804 err = validate_userspace(a);
805 if (err)
806 return err;
807 break;
808
df2c07f4 809 case OVS_ACTION_ATTR_OUTPUT:
23cad98c
BP
810 if (nla_get_u32(a) >= DP_MAX_PORTS)
811 return -EINVAL;
3b1fc5f3 812 break;
cdee00fd 813
4edb9ae9 814
fea393b1
BP
815 case OVS_ACTION_ATTR_POP_VLAN:
816 break;
817
818 case OVS_ACTION_ATTR_PUSH_VLAN:
819 vlan = nla_data(a);
820 if (vlan->vlan_tpid != htons(ETH_P_8021Q))
821 return -EINVAL;
8ddc056d 822 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
064af421 823 return -EINVAL;
23cad98c 824 break;
064af421 825
4edb9ae9 826 case OVS_ACTION_ATTR_SET:
9b405f1a 827 err = validate_set(a, key, sfa, &skip_copy);
4edb9ae9
PS
828 if (err)
829 return err;
23cad98c 830 break;
064af421 831
6ff686f2 832 case OVS_ACTION_ATTR_SAMPLE:
9b405f1a 833 err = validate_and_copy_sample(a, key, depth, sfa);
6ff686f2
PS
834 if (err)
835 return err;
9b405f1a 836 skip_copy = true;
6ff686f2
PS
837 break;
838
23cad98c 839 default:
4edb9ae9 840 return -EINVAL;
23cad98c 841 }
9b405f1a
PS
842 if (!skip_copy) {
843 err = copy_action(a, sfa);
844 if (err)
845 return err;
846 }
23cad98c 847 }
3c5f6de3 848
23cad98c
BP
849 if (rem > 0)
850 return -EINVAL;
064af421 851
23cad98c 852 return 0;
064af421 853}
4edb9ae9 854
064af421
BP
855static void clear_stats(struct sw_flow *flow)
856{
6bfafa55 857 flow->used = 0;
064af421 858 flow->tcp_flags = 0;
064af421
BP
859 flow->packet_count = 0;
860 flow->byte_count = 0;
861}
862
df2c07f4 863static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
064af421 864{
df2c07f4 865 struct ovs_header *ovs_header = info->userhdr;
982b8810 866 struct nlattr **a = info->attrs;
e0e57990 867 struct sw_flow_actions *acts;
982b8810 868 struct sk_buff *packet;
e0e57990 869 struct sw_flow *flow;
f7cd0081 870 struct datapath *dp;
d6569377 871 struct ethhdr *eth;
3f19d399 872 int len;
d6569377 873 int err;
76abe283 874 int key_len;
064af421 875
f7cd0081 876 err = -EINVAL;
df2c07f4 877 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
7c3072cc 878 !a[OVS_PACKET_ATTR_ACTIONS])
e5cad958 879 goto err;
064af421 880
df2c07f4 881 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
3f19d399 882 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
f7cd0081
BP
883 err = -ENOMEM;
884 if (!packet)
e5cad958 885 goto err;
3f19d399
BP
886 skb_reserve(packet, NET_IP_ALIGN);
887
bf3d6fce 888 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
8d5ebd83 889
f7cd0081
BP
890 skb_reset_mac_header(packet);
891 eth = eth_hdr(packet);
064af421 892
d6569377
BP
893 /* Normally, setting the skb 'protocol' field would be handled by a
894 * call to eth_type_trans(), but it assumes there's a sending
895 * device, which we may not have. */
7cd46155 896 if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
f7cd0081 897 packet->protocol = eth->h_proto;
d6569377 898 else
f7cd0081 899 packet->protocol = htons(ETH_P_802_2);
d3c54451 900
e0e57990 901 /* Build an sw_flow for sending this packet. */
850b6b3b 902 flow = ovs_flow_alloc();
e0e57990
BP
903 err = PTR_ERR(flow);
904 if (IS_ERR(flow))
e5cad958 905 goto err_kfree_skb;
064af421 906
850b6b3b 907 err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
e0e57990 908 if (err)
9321954a 909 goto err_flow_free;
e0e57990 910
13e24889 911 err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]);
80e5eed9 912 if (err)
9321954a 913 goto err_flow_free;
9b405f1a 914 acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
e0e57990
BP
915 err = PTR_ERR(acts);
916 if (IS_ERR(acts))
9321954a 917 goto err_flow_free;
9b405f1a
PS
918
919 err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts);
e0e57990 920 rcu_assign_pointer(flow->sf_acts, acts);
9b405f1a
PS
921 if (err)
922 goto err_flow_free;
e0e57990
BP
923
924 OVS_CB(packet)->flow = flow;
abff858b 925 packet->priority = flow->key.phy.priority;
72e8bf28 926 skb_set_mark(packet, flow->key.phy.skb_mark);
e0e57990 927
d6569377 928 rcu_read_lock();
2a4999f3 929 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
f7cd0081 930 err = -ENODEV;
e5cad958
BP
931 if (!dp)
932 goto err_unlock;
cc4015df 933
e9141eec 934 local_bh_disable();
850b6b3b 935 err = ovs_execute_actions(dp, packet);
e9141eec 936 local_bh_enable();
d6569377 937 rcu_read_unlock();
e0e57990 938
9321954a 939 ovs_flow_free(flow);
e5cad958 940 return err;
064af421 941
e5cad958
BP
942err_unlock:
943 rcu_read_unlock();
9321954a
JG
944err_flow_free:
945 ovs_flow_free(flow);
e5cad958
BP
946err_kfree_skb:
947 kfree_skb(packet);
948err:
d6569377 949 return err;
064af421
BP
950}
951
df2c07f4 952static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
7c3072cc
TG
953#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
954 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
955#else
956 [OVS_PACKET_ATTR_PACKET] = { .minlen = ETH_HLEN },
957#endif
df2c07f4
JP
958 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
959 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
982b8810
BP
960};
961
962static struct genl_ops dp_packet_genl_ops[] = {
df2c07f4 963 { .cmd = OVS_PACKET_CMD_EXECUTE,
982b8810
BP
964 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
965 .policy = packet_policy,
df2c07f4 966 .doit = ovs_packet_cmd_execute
982b8810
BP
967 }
968};
969
df2c07f4 970static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
064af421 971{
d6569377 972 int i;
cd2a59e9 973 struct flow_table *table = ovsl_dereference(dp->table);
f180c2e2 974
850b6b3b 975 stats->n_flows = ovs_flow_tbl_count(table);
064af421 976
7257b535 977 stats->n_hit = stats->n_missed = stats->n_lost = 0;
d6569377
BP
978 for_each_possible_cpu(i) {
979 const struct dp_stats_percpu *percpu_stats;
980 struct dp_stats_percpu local_stats;
821cb9fa 981 unsigned int start;
44e05eca 982
d6569377 983 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
064af421 984
d6569377 985 do {
821cb9fa 986 start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
d6569377 987 local_stats = *percpu_stats;
821cb9fa 988 } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
064af421 989
d6569377
BP
990 stats->n_hit += local_stats.n_hit;
991 stats->n_missed += local_stats.n_missed;
992 stats->n_lost += local_stats.n_lost;
993 }
994}
064af421 995
df2c07f4
JP
996static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
997 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
998 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
999 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
d6569377 1000};
36956a7d 1001
37a1300c
BP
1002static struct genl_family dp_flow_genl_family = {
1003 .id = GENL_ID_GENERATE,
df2c07f4
JP
1004 .hdrsize = sizeof(struct ovs_header),
1005 .name = OVS_FLOW_FAMILY,
69685a88 1006 .version = OVS_FLOW_VERSION,
2a4999f3
PS
1007 .maxattr = OVS_FLOW_ATTR_MAX,
1008 SET_NETNSOK
37a1300c 1009};
ed099e92 1010
850b6b3b 1011static struct genl_multicast_group ovs_dp_flow_multicast_group = {
df2c07f4 1012 .name = OVS_FLOW_MCGROUP
37a1300c
BP
1013};
1014
9b405f1a
PS
1015static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb);
1016static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
1017{
1018 const struct nlattr *a;
1019 struct nlattr *start;
1020 int err = 0, rem;
1021
1022 start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
1023 if (!start)
1024 return -EMSGSIZE;
1025
1026 nla_for_each_nested(a, attr, rem) {
1027 int type = nla_type(a);
1028 struct nlattr *st_sample;
1029
1030 switch (type) {
1031 case OVS_SAMPLE_ATTR_PROBABILITY:
1032 if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a)))
1033 return -EMSGSIZE;
1034 break;
1035 case OVS_SAMPLE_ATTR_ACTIONS:
1036 st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
1037 if (!st_sample)
1038 return -EMSGSIZE;
1039 err = actions_to_attr(nla_data(a), nla_len(a), skb);
1040 if (err)
1041 return err;
1042 nla_nest_end(skb, st_sample);
1043 break;
1044 }
1045 }
1046
1047 nla_nest_end(skb, start);
1048 return err;
1049}
1050
1051static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
1052{
1053 const struct nlattr *ovs_key = nla_data(a);
1054 int key_type = nla_type(ovs_key);
1055 struct nlattr *start;
1056 int err;
1057
1058 switch (key_type) {
1059 case OVS_KEY_ATTR_IPV4_TUNNEL:
1060 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
1061 if (!start)
1062 return -EMSGSIZE;
1063
1064 err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key));
1065 if (err)
1066 return err;
1067 nla_nest_end(skb, start);
1068 break;
1069 default:
1070 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
1071 return -EMSGSIZE;
1072 break;
1073 }
1074
1075 return 0;
1076}
1077
1078static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb)
1079{
1080 const struct nlattr *a;
1081 int rem, err;
1082
1083 nla_for_each_attr(a, attr, len, rem) {
1084 int type = nla_type(a);
1085
1086 switch (type) {
1087 case OVS_ACTION_ATTR_SET:
1088 err = set_action_to_attr(a, skb);
1089 if (err)
1090 return err;
1091 break;
1092
1093 case OVS_ACTION_ATTR_SAMPLE:
1094 err = sample_action_to_attr(a, skb);
1095 if (err)
1096 return err;
1097 break;
1098 default:
1099 if (nla_put(skb, type, nla_len(a), nla_data(a)))
1100 return -EMSGSIZE;
1101 break;
1102 }
1103 }
1104
1105 return 0;
1106}
1107
0afa2373
TG
1108static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
1109{
1110 return NLMSG_ALIGN(sizeof(struct ovs_header))
1111 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
1112 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
1113 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
1114 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
1115 + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
1116}
1117
cd2a59e9 1118/* Called with ovs_mutex. */
df2c07f4 1119static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
28aea917 1120 struct sk_buff *skb, u32 portid,
6455100f 1121 u32 seq, u32 flags, u8 cmd)
d6569377 1122{
37a1300c 1123 const int skb_orig_len = skb->len;
d6569377 1124 const struct sw_flow_actions *sf_acts;
9b405f1a 1125 struct nlattr *start;
df2c07f4
JP
1126 struct ovs_flow_stats stats;
1127 struct ovs_header *ovs_header;
d6569377
BP
1128 struct nlattr *nla;
1129 unsigned long used;
1130 u8 tcp_flags;
1131 int err;
064af421 1132
cd2a59e9 1133 sf_acts = ovsl_dereference(flow->sf_acts);
064af421 1134
28aea917 1135 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
df2c07f4 1136 if (!ovs_header)
37a1300c 1137 return -EMSGSIZE;
d6569377 1138
99769a40 1139 ovs_header->dp_ifindex = get_dpifindex(dp);
d6569377 1140
df2c07f4 1141 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
d6569377
BP
1142 if (!nla)
1143 goto nla_put_failure;
850b6b3b 1144 err = ovs_flow_to_nlattrs(&flow->key, skb);
d6569377 1145 if (err)
37a1300c 1146 goto error;
d6569377
BP
1147 nla_nest_end(skb, nla);
1148
1149 spin_lock_bh(&flow->lock);
1150 used = flow->used;
1151 stats.n_packets = flow->packet_count;
1152 stats.n_bytes = flow->byte_count;
1153 tcp_flags = flow->tcp_flags;
1154 spin_unlock_bh(&flow->lock);
1155
c3cc8c03
DM
1156 if (used &&
1157 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
1158 goto nla_put_failure;
d6569377 1159
c3cc8c03
DM
1160 if (stats.n_packets &&
1161 nla_put(skb, OVS_FLOW_ATTR_STATS,
1162 sizeof(struct ovs_flow_stats), &stats))
1163 goto nla_put_failure;
d6569377 1164
c3cc8c03
DM
1165 if (tcp_flags &&
1166 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
1167 goto nla_put_failure;
d6569377 1168
df2c07f4 1169 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
30053024
BP
1170 * this is the first flow to be dumped into 'skb'. This is unusual for
1171 * Netlink but individual action lists can be longer than
1172 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
1173 * The userspace caller can always fetch the actions separately if it
1174 * really wants them. (Most userspace callers in fact don't care.)
1175 *
1176 * This can only fail for dump operations because the skb is always
1177 * properly sized for single flows.
1178 */
9b405f1a 1179 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
f6f481ee
PS
1180 if (start) {
1181 err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb);
0a25b039
BP
1182 if (!err)
1183 nla_nest_end(skb, start);
1184 else {
1185 if (skb_orig_len)
1186 goto error;
1187
1188 nla_nest_cancel(skb, start);
1189 }
7aac03bd
JG
1190 } else if (skb_orig_len)
1191 goto nla_put_failure;
37a1300c 1192
df2c07f4 1193 return genlmsg_end(skb, ovs_header);
d6569377
BP
1194
1195nla_put_failure:
1196 err = -EMSGSIZE;
37a1300c 1197error:
df2c07f4 1198 genlmsg_cancel(skb, ovs_header);
d6569377 1199 return err;
44e05eca
BP
1200}
1201
df2c07f4 1202static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
44e05eca 1203{
37a1300c 1204 const struct sw_flow_actions *sf_acts;
d6569377 1205
cd2a59e9 1206 sf_acts = ovsl_dereference(flow->sf_acts);
d6569377 1207
0afa2373 1208 return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);
37a1300c 1209}
8d5ebd83 1210
6455100f
PS
1211static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
1212 struct datapath *dp,
28aea917 1213 u32 portid, u32 seq, u8 cmd)
37a1300c
BP
1214{
1215 struct sk_buff *skb;
1216 int retval;
d6569377 1217
df2c07f4 1218 skb = ovs_flow_cmd_alloc_info(flow);
37a1300c
BP
1219 if (!skb)
1220 return ERR_PTR(-ENOMEM);
d6569377 1221
28aea917 1222 retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd);
37a1300c 1223 BUG_ON(retval < 0);
d6569377 1224 return skb;
064af421
BP
1225}
1226
df2c07f4 1227static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
064af421 1228{
37a1300c 1229 struct nlattr **a = info->attrs;
df2c07f4 1230 struct ovs_header *ovs_header = info->userhdr;
37a1300c 1231 struct sw_flow_key key;
d6569377 1232 struct sw_flow *flow;
37a1300c 1233 struct sk_buff *reply;
9c52546b 1234 struct datapath *dp;
3544358a 1235 struct flow_table *table;
9b405f1a 1236 struct sw_flow_actions *acts = NULL;
bc4a05c6 1237 int error;
76abe283 1238 int key_len;
064af421 1239
37a1300c
BP
1240 /* Extract key. */
1241 error = -EINVAL;
df2c07f4 1242 if (!a[OVS_FLOW_ATTR_KEY])
37a1300c 1243 goto error;
850b6b3b 1244 error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
37a1300c
BP
1245 if (error)
1246 goto error;
064af421 1247
37a1300c 1248 /* Validate actions. */
df2c07f4 1249 if (a[OVS_FLOW_ATTR_ACTIONS]) {
9b405f1a
PS
1250 acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
1251 error = PTR_ERR(acts);
1252 if (IS_ERR(acts))
37a1300c 1253 goto error;
9b405f1a
PS
1254
1255 error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0, &acts);
1256 if (error)
1257 goto err_kfree;
df2c07f4 1258 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
37a1300c
BP
1259 error = -EINVAL;
1260 goto error;
1261 }
1262
cd2a59e9 1263 ovs_lock();
2a4999f3 1264 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
d6569377 1265 error = -ENODEV;
9c52546b 1266 if (!dp)
cd2a59e9 1267 goto err_unlock_ovs;
704a1e09 1268
cd2a59e9 1269 table = ovsl_dereference(dp->table);
850b6b3b 1270 flow = ovs_flow_tbl_lookup(table, &key, key_len);
3544358a 1271 if (!flow) {
d6569377
BP
1272 /* Bail out if we're not allowed to create a new flow. */
1273 error = -ENOENT;
df2c07f4 1274 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
cd2a59e9 1275 goto err_unlock_ovs;
d6569377
BP
1276
1277 /* Expand table, if necessary, to make room. */
850b6b3b 1278 if (ovs_flow_tbl_need_to_expand(table)) {
3544358a
PS
1279 struct flow_table *new_table;
1280
850b6b3b 1281 new_table = ovs_flow_tbl_expand(table);
3544358a
PS
1282 if (!IS_ERR(new_table)) {
1283 rcu_assign_pointer(dp->table, new_table);
850b6b3b 1284 ovs_flow_tbl_deferred_destroy(table);
cd2a59e9 1285 table = ovsl_dereference(dp->table);
3544358a 1286 }
d6569377
BP
1287 }
1288
1289 /* Allocate flow. */
850b6b3b 1290 flow = ovs_flow_alloc();
d6569377
BP
1291 if (IS_ERR(flow)) {
1292 error = PTR_ERR(flow);
cd2a59e9 1293 goto err_unlock_ovs;
d6569377 1294 }
d6569377
BP
1295 clear_stats(flow);
1296
d6569377
BP
1297 rcu_assign_pointer(flow->sf_acts, acts);
1298
d6569377 1299 /* Put flow in bucket. */
13e24889 1300 ovs_flow_tbl_insert(table, flow, &key, key_len);
37a1300c 1301
28aea917 1302 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
6455100f
PS
1303 info->snd_seq,
1304 OVS_FLOW_CMD_NEW);
d6569377
BP
1305 } else {
1306 /* We found a matching flow. */
1307 struct sw_flow_actions *old_acts;
1308
1309 /* Bail out if we're not allowed to modify an existing flow.
1310 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1311 * because Generic Netlink treats the latter as a dump
1312 * request. We also accept NLM_F_EXCL in case that bug ever
1313 * gets fixed.
1314 */
1315 error = -EEXIST;
df2c07f4 1316 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
37a1300c 1317 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
cd2a59e9 1318 goto err_unlock_ovs;
d6569377
BP
1319
1320 /* Update actions. */
cd2a59e9 1321 old_acts = ovsl_dereference(flow->sf_acts);
9b405f1a
PS
1322 rcu_assign_pointer(flow->sf_acts, acts);
1323 ovs_flow_deferred_free_acts(old_acts);
d6569377 1324
28aea917 1325 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
6455100f 1326 info->snd_seq, OVS_FLOW_CMD_NEW);
d6569377
BP
1327
1328 /* Clear stats. */
df2c07f4 1329 if (a[OVS_FLOW_ATTR_CLEAR]) {
d6569377
BP
1330 spin_lock_bh(&flow->lock);
1331 clear_stats(flow);
1332 spin_unlock_bh(&flow->lock);
1333 }
9c52546b 1334 }
cd2a59e9 1335 ovs_unlock();
37a1300c
BP
1336
1337 if (!IS_ERR(reply))
e297c6b7 1338 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
37a1300c 1339 else
2a4999f3
PS
1340 netlink_set_err(GENL_SOCK(sock_net(skb->sk)), 0,
1341 ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
d6569377 1342 return 0;
704a1e09 1343
cd2a59e9
PS
1344err_unlock_ovs:
1345 ovs_unlock();
9b405f1a 1346err_kfree:
ba400435 1347 kfree(acts);
37a1300c 1348error:
9c52546b 1349 return error;
704a1e09
BP
1350}
1351
df2c07f4 1352static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
704a1e09 1353{
37a1300c 1354 struct nlattr **a = info->attrs;
df2c07f4 1355 struct ovs_header *ovs_header = info->userhdr;
37a1300c 1356 struct sw_flow_key key;
37a1300c 1357 struct sk_buff *reply;
704a1e09 1358 struct sw_flow *flow;
9c52546b 1359 struct datapath *dp;
3544358a 1360 struct flow_table *table;
9c52546b 1361 int err;
76abe283 1362 int key_len;
704a1e09 1363
df2c07f4 1364 if (!a[OVS_FLOW_ATTR_KEY])
37a1300c 1365 return -EINVAL;
850b6b3b 1366 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
37a1300c
BP
1367 if (err)
1368 return err;
704a1e09 1369
cd2a59e9 1370 ovs_lock();
2a4999f3 1371 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9
PS
1372 if (!dp) {
1373 err = -ENODEV;
1374 goto unlock;
1375 }
704a1e09 1376
cd2a59e9 1377 table = ovsl_dereference(dp->table);
850b6b3b 1378 flow = ovs_flow_tbl_lookup(table, &key, key_len);
cd2a59e9
PS
1379 if (!flow) {
1380 err = -ENOENT;
1381 goto unlock;
1382 }
d6569377 1383
28aea917 1384 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
6455100f 1385 info->snd_seq, OVS_FLOW_CMD_NEW);
cd2a59e9
PS
1386 if (IS_ERR(reply)) {
1387 err = PTR_ERR(reply);
1388 goto unlock;
1389 }
36956a7d 1390
cd2a59e9 1391 ovs_unlock();
37a1300c 1392 return genlmsg_reply(reply, info);
cd2a59e9
PS
1393unlock:
1394 ovs_unlock();
1395 return err;
d6569377 1396}
9c52546b 1397
df2c07f4 1398static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
d6569377 1399{
37a1300c 1400 struct nlattr **a = info->attrs;
df2c07f4 1401 struct ovs_header *ovs_header = info->userhdr;
37a1300c 1402 struct sw_flow_key key;
37a1300c 1403 struct sk_buff *reply;
d6569377 1404 struct sw_flow *flow;
d6569377 1405 struct datapath *dp;
3544358a 1406 struct flow_table *table;
d6569377 1407 int err;
76abe283 1408 int key_len;
36956a7d 1409
cd2a59e9 1410 ovs_lock();
2a4999f3 1411 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9
PS
1412 if (!dp) {
1413 err = -ENODEV;
1414 goto unlock;
1415 }
2a4999f3 1416
cd2a59e9
PS
1417 if (!a[OVS_FLOW_ATTR_KEY]) {
1418 err = flush_flows(dp);
1419 goto unlock;
1420 }
850b6b3b 1421 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
37a1300c 1422 if (err)
cd2a59e9 1423 goto unlock;
d6569377 1424
cd2a59e9 1425 table = ovsl_dereference(dp->table);
850b6b3b 1426 flow = ovs_flow_tbl_lookup(table, &key, key_len);
cd2a59e9
PS
1427 if (!flow) {
1428 err = -ENOENT;
1429 goto unlock;
1430 }
d6569377 1431
df2c07f4 1432 reply = ovs_flow_cmd_alloc_info(flow);
cd2a59e9
PS
1433 if (!reply) {
1434 err = -ENOMEM;
1435 goto unlock;
1436 }
37a1300c 1437
850b6b3b 1438 ovs_flow_tbl_remove(table, flow);
37a1300c 1439
28aea917 1440 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
df2c07f4 1441 info->snd_seq, 0, OVS_FLOW_CMD_DEL);
37a1300c
BP
1442 BUG_ON(err < 0);
1443
850b6b3b 1444 ovs_flow_deferred_free(flow);
cd2a59e9 1445 ovs_unlock();
37a1300c 1446
e297c6b7 1447 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
37a1300c 1448 return 0;
cd2a59e9
PS
1449unlock:
1450 ovs_unlock();
1451 return err;
37a1300c
BP
1452}
1453
df2c07f4 1454static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
37a1300c 1455{
df2c07f4 1456 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
37a1300c 1457 struct datapath *dp;
20d035b2 1458 struct flow_table *table;
37a1300c 1459
cd2a59e9 1460 ovs_lock();
2a4999f3 1461 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
cd2a59e9
PS
1462 if (!dp) {
1463 ovs_unlock();
37a1300c 1464 return -ENODEV;
cd2a59e9 1465 }
37a1300c 1466
cd2a59e9 1467 table = ovsl_dereference(dp->table);
20d035b2 1468
37a1300c 1469 for (;;) {
37a1300c
BP
1470 struct sw_flow *flow;
1471 u32 bucket, obj;
1472
1473 bucket = cb->args[0];
1474 obj = cb->args[1];
850b6b3b 1475 flow = ovs_flow_tbl_next(table, &bucket, &obj);
3544358a 1476 if (!flow)
37a1300c
BP
1477 break;
1478
6455100f 1479 if (ovs_flow_cmd_fill_info(flow, dp, skb,
28aea917 1480 NETLINK_CB(cb->skb).portid,
37a1300c 1481 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1482 OVS_FLOW_CMD_NEW) < 0)
37a1300c
BP
1483 break;
1484
1485 cb->args[0] = bucket;
1486 cb->args[1] = obj;
1487 }
cd2a59e9 1488 ovs_unlock();
37a1300c 1489 return skb->len;
704a1e09
BP
1490}
1491
37a1300c 1492static struct genl_ops dp_flow_genl_ops[] = {
df2c07f4 1493 { .cmd = OVS_FLOW_CMD_NEW,
37a1300c
BP
1494 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1495 .policy = flow_policy,
df2c07f4 1496 .doit = ovs_flow_cmd_new_or_set
37a1300c 1497 },
df2c07f4 1498 { .cmd = OVS_FLOW_CMD_DEL,
37a1300c
BP
1499 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1500 .policy = flow_policy,
df2c07f4 1501 .doit = ovs_flow_cmd_del
37a1300c 1502 },
df2c07f4 1503 { .cmd = OVS_FLOW_CMD_GET,
37a1300c
BP
1504 .flags = 0, /* OK for unprivileged users. */
1505 .policy = flow_policy,
df2c07f4
JP
1506 .doit = ovs_flow_cmd_get,
1507 .dumpit = ovs_flow_cmd_dump
37a1300c 1508 },
df2c07f4 1509 { .cmd = OVS_FLOW_CMD_SET,
37a1300c
BP
1510 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1511 .policy = flow_policy,
df2c07f4 1512 .doit = ovs_flow_cmd_new_or_set,
37a1300c
BP
1513 },
1514};
1515
df2c07f4 1516static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
aaff4b55 1517#ifdef HAVE_NLA_NUL_STRING
df2c07f4 1518 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
aaff4b55 1519#endif
b063d9f0 1520 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
d6569377
BP
1521};
1522
aaff4b55
BP
1523static struct genl_family dp_datapath_genl_family = {
1524 .id = GENL_ID_GENERATE,
df2c07f4
JP
1525 .hdrsize = sizeof(struct ovs_header),
1526 .name = OVS_DATAPATH_FAMILY,
69685a88 1527 .version = OVS_DATAPATH_VERSION,
2a4999f3
PS
1528 .maxattr = OVS_DP_ATTR_MAX,
1529 SET_NETNSOK
aaff4b55
BP
1530};
1531
850b6b3b 1532static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
df2c07f4 1533 .name = OVS_DATAPATH_MCGROUP
aaff4b55
BP
1534};
1535
0afa2373
TG
1536static size_t ovs_dp_cmd_msg_size(void)
1537{
1538 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1539
1540 msgsize += nla_total_size(IFNAMSIZ);
1541 msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
1542
1543 return msgsize;
1544}
1545
df2c07f4 1546static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
28aea917 1547 u32 portid, u32 seq, u32 flags, u8 cmd)
064af421 1548{
df2c07f4 1549 struct ovs_header *ovs_header;
e926dfe3 1550 struct ovs_dp_stats dp_stats;
064af421
BP
1551 int err;
1552
28aea917 1553 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
aaff4b55 1554 flags, cmd);
df2c07f4 1555 if (!ovs_header)
aaff4b55 1556 goto error;
064af421 1557
b063d9f0 1558 ovs_header->dp_ifindex = get_dpifindex(dp);
064af421 1559
d6569377 1560 rcu_read_lock();
850b6b3b 1561 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
d6569377 1562 rcu_read_unlock();
064af421 1563 if (err)
d6569377 1564 goto nla_put_failure;
064af421 1565
e926dfe3 1566 get_dp_stats(dp, &dp_stats);
c3cc8c03
DM
1567 if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats))
1568 goto nla_put_failure;
d6569377 1569
df2c07f4 1570 return genlmsg_end(skb, ovs_header);
d6569377
BP
1571
1572nla_put_failure:
df2c07f4 1573 genlmsg_cancel(skb, ovs_header);
aaff4b55
BP
1574error:
1575 return -EMSGSIZE;
d6569377
BP
1576}
1577
28aea917 1578static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
aaff4b55 1579 u32 seq, u8 cmd)
d6569377 1580{
d6569377 1581 struct sk_buff *skb;
aaff4b55 1582 int retval;
d6569377 1583
0afa2373 1584 skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
064af421 1585 if (!skb)
d6569377 1586 return ERR_PTR(-ENOMEM);
659586ef 1587
28aea917 1588 retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd);
aaff4b55
BP
1589 if (retval < 0) {
1590 kfree_skb(skb);
1591 return ERR_PTR(retval);
1592 }
1593 return skb;
1594}
9dca7bd5 1595
df2c07f4 1596static int ovs_dp_cmd_validate(struct nlattr *a[OVS_DP_ATTR_MAX + 1])
aaff4b55 1597{
df2c07f4 1598 return CHECK_NUL_STRING(a[OVS_DP_ATTR_NAME], IFNAMSIZ - 1);
d6569377
BP
1599}
1600
cd2a59e9 1601/* Called with ovs_mutex. */
2a4999f3
PS
1602static struct datapath *lookup_datapath(struct net *net,
1603 struct ovs_header *ovs_header,
6455100f 1604 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
d6569377 1605{
254f2dc8
BP
1606 struct datapath *dp;
1607
df2c07f4 1608 if (!a[OVS_DP_ATTR_NAME])
2a4999f3 1609 dp = get_dp(net, ovs_header->dp_ifindex);
254f2dc8 1610 else {
d6569377 1611 struct vport *vport;
d6569377 1612
057dd6d2 1613 rcu_read_lock();
2a4999f3 1614 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
df2c07f4 1615 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
057dd6d2 1616 rcu_read_unlock();
d6569377 1617 }
254f2dc8 1618 return dp ? dp : ERR_PTR(-ENODEV);
d6569377
BP
1619}
1620
df2c07f4 1621static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
d6569377 1622{
aaff4b55 1623 struct nlattr **a = info->attrs;
d6569377 1624 struct vport_parms parms;
aaff4b55 1625 struct sk_buff *reply;
d6569377
BP
1626 struct datapath *dp;
1627 struct vport *vport;
2a4999f3 1628 struct ovs_net *ovs_net;
95b1d73a 1629 int err, i;
d6569377 1630
d6569377 1631 err = -EINVAL;
ea36840f 1632 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
aaff4b55
BP
1633 goto err;
1634
df2c07f4 1635 err = ovs_dp_cmd_validate(a);
aaff4b55
BP
1636 if (err)
1637 goto err;
d6569377 1638
cd2a59e9 1639 ovs_lock();
d6569377 1640
d6569377
BP
1641 err = -ENOMEM;
1642 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1643 if (dp == NULL)
cd2a59e9 1644 goto err_unlock_ovs;
2a4999f3 1645
0ceaa66c
JG
1646 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1647
d6569377
BP
1648 /* Allocate table. */
1649 err = -ENOMEM;
850b6b3b 1650 rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
d6569377
BP
1651 if (!dp->table)
1652 goto err_free_dp;
1653
99769a40
JG
1654 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1655 if (!dp->stats_percpu) {
1656 err = -ENOMEM;
1657 goto err_destroy_table;
1658 }
1659
95b1d73a
PS
1660 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1661 GFP_KERNEL);
1662 if (!dp->ports) {
1663 err = -ENOMEM;
1664 goto err_destroy_percpu;
1665 }
1666
1667 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1668 INIT_HLIST_HEAD(&dp->ports[i]);
1669
d6569377 1670 /* Set up our datapath device. */
df2c07f4
JP
1671 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1672 parms.type = OVS_VPORT_TYPE_INTERNAL;
d6569377
BP
1673 parms.options = NULL;
1674 parms.dp = dp;
df2c07f4 1675 parms.port_no = OVSP_LOCAL;
28aea917 1676 parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
b063d9f0 1677
d6569377
BP
1678 vport = new_vport(&parms);
1679 if (IS_ERR(vport)) {
1680 err = PTR_ERR(vport);
1681 if (err == -EBUSY)
1682 err = -EEXIST;
1683
95b1d73a 1684 goto err_destroy_ports_array;
d6569377 1685 }
d6569377 1686
28aea917 1687 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
6455100f 1688 info->snd_seq, OVS_DP_CMD_NEW);
aaff4b55
BP
1689 err = PTR_ERR(reply);
1690 if (IS_ERR(reply))
1691 goto err_destroy_local_port;
1692
2a4999f3
PS
1693 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1694 list_add_tail(&dp->list_node, &ovs_net->dps);
d6569377 1695
cd2a59e9 1696 ovs_unlock();
d6569377 1697
e297c6b7 1698 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
d6569377
BP
1699 return 0;
1700
1701err_destroy_local_port:
cd2a59e9 1702 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
95b1d73a
PS
1703err_destroy_ports_array:
1704 kfree(dp->ports);
99769a40
JG
1705err_destroy_percpu:
1706 free_percpu(dp->stats_percpu);
d6569377 1707err_destroy_table:
cd2a59e9 1708 ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
d6569377 1709err_free_dp:
0ceaa66c 1710 release_net(ovs_dp_get_net(dp));
d6569377 1711 kfree(dp);
cd2a59e9
PS
1712err_unlock_ovs:
1713 ovs_unlock();
d6569377 1714err:
064af421
BP
1715 return err;
1716}
1717
cd2a59e9 1718/* Called with ovs_mutex. */
2a4999f3 1719static void __dp_destroy(struct datapath *dp)
44e05eca 1720{
95b1d73a 1721 int i;
44e05eca 1722
95b1d73a
PS
1723 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1724 struct vport *vport;
f8dfbcb7 1725 struct hlist_node *n;
95b1d73a 1726
f8dfbcb7 1727 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
95b1d73a
PS
1728 if (vport->port_no != OVSP_LOCAL)
1729 ovs_dp_detach_port(vport);
1730 }
ed099e92 1731
254f2dc8 1732 list_del(&dp->list_node);
ed099e92 1733
cd2a59e9
PS
1734 /* OVSP_LOCAL is datapath internal port. We need to make sure that
1735 * all port in datapath are destroyed first before freeing datapath.
1736 */
1737 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
99620d2c 1738
ed099e92 1739 call_rcu(&dp->rcu, destroy_dp_rcu);
2a4999f3
PS
1740}
1741
1742static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1743{
1744 struct sk_buff *reply;
1745 struct datapath *dp;
1746 int err;
1747
1748 err = ovs_dp_cmd_validate(info->attrs);
1749 if (err)
1750 return err;
1751
cd2a59e9 1752 ovs_lock();
2a4999f3
PS
1753 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1754 err = PTR_ERR(dp);
1755 if (IS_ERR(dp))
cd2a59e9 1756 goto unlock;
2a4999f3 1757
28aea917 1758 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
2a4999f3
PS
1759 info->snd_seq, OVS_DP_CMD_DEL);
1760 err = PTR_ERR(reply);
1761 if (IS_ERR(reply))
cd2a59e9 1762 goto unlock;
2a4999f3
PS
1763
1764 __dp_destroy(dp);
cd2a59e9 1765 ovs_unlock();
ed099e92 1766
e297c6b7 1767 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
99620d2c
JG
1768
1769 return 0;
cd2a59e9
PS
1770unlock:
1771 ovs_unlock();
1772 return err;
44e05eca
BP
1773}
1774
df2c07f4 1775static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
064af421 1776{
aaff4b55 1777 struct sk_buff *reply;
d6569377 1778 struct datapath *dp;
d6569377 1779 int err;
064af421 1780
df2c07f4 1781 err = ovs_dp_cmd_validate(info->attrs);
aaff4b55
BP
1782 if (err)
1783 return err;
38c6ecbc 1784
cd2a59e9 1785 ovs_lock();
2a4999f3 1786 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
cd2a59e9 1787 err = PTR_ERR(dp);
d6569377 1788 if (IS_ERR(dp))
cd2a59e9 1789 goto unlock;
38c6ecbc 1790
28aea917 1791 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
6455100f 1792 info->snd_seq, OVS_DP_CMD_NEW);
aaff4b55
BP
1793 if (IS_ERR(reply)) {
1794 err = PTR_ERR(reply);
2a4999f3 1795 netlink_set_err(GENL_SOCK(sock_net(skb->sk)), 0,
850b6b3b 1796 ovs_dp_datapath_multicast_group.id, err);
cd2a59e9
PS
1797 err = 0;
1798 goto unlock;
aaff4b55
BP
1799 }
1800
cd2a59e9 1801 ovs_unlock();
e297c6b7 1802 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
850b6b3b 1803
aaff4b55 1804 return 0;
cd2a59e9
PS
1805unlock:
1806 ovs_unlock();
1807 return err;
064af421
BP
1808}
1809
df2c07f4 1810static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1dcf111b 1811{
aaff4b55 1812 struct sk_buff *reply;
d6569377 1813 struct datapath *dp;
d6569377 1814 int err;
1dcf111b 1815
df2c07f4 1816 err = ovs_dp_cmd_validate(info->attrs);
aaff4b55
BP
1817 if (err)
1818 return err;
1dcf111b 1819
cd2a59e9 1820 ovs_lock();
2a4999f3 1821 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
cd2a59e9
PS
1822 if (IS_ERR(dp)) {
1823 err = PTR_ERR(dp);
1824 goto unlock;
1825 }
1dcf111b 1826
28aea917 1827 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
6455100f 1828 info->snd_seq, OVS_DP_CMD_NEW);
cd2a59e9
PS
1829 if (IS_ERR(reply)) {
1830 err = PTR_ERR(reply);
1831 goto unlock;
1832 }
aaff4b55 1833
cd2a59e9 1834 ovs_unlock();
aaff4b55 1835 return genlmsg_reply(reply, info);
cd2a59e9
PS
1836
1837unlock:
1838 ovs_unlock();
1839 return err;
1dcf111b
JP
1840}
1841
df2c07f4 1842static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
a7786963 1843{
2a4999f3 1844 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
254f2dc8
BP
1845 struct datapath *dp;
1846 int skip = cb->args[0];
1847 int i = 0;
a7786963 1848
cd2a59e9 1849 ovs_lock();
2a4999f3 1850 list_for_each_entry(dp, &ovs_net->dps, list_node) {
a2bab2f0 1851 if (i >= skip &&
28aea917 1852 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
aaff4b55 1853 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1854 OVS_DP_CMD_NEW) < 0)
aaff4b55 1855 break;
254f2dc8 1856 i++;
a7786963 1857 }
cd2a59e9 1858 ovs_unlock();
aaff4b55 1859
254f2dc8
BP
1860 cb->args[0] = i;
1861
aaff4b55 1862 return skb->len;
c19e6535
BP
1863}
1864
aaff4b55 1865static struct genl_ops dp_datapath_genl_ops[] = {
df2c07f4 1866 { .cmd = OVS_DP_CMD_NEW,
aaff4b55
BP
1867 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1868 .policy = datapath_policy,
df2c07f4 1869 .doit = ovs_dp_cmd_new
aaff4b55 1870 },
df2c07f4 1871 { .cmd = OVS_DP_CMD_DEL,
aaff4b55
BP
1872 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1873 .policy = datapath_policy,
df2c07f4 1874 .doit = ovs_dp_cmd_del
aaff4b55 1875 },
df2c07f4 1876 { .cmd = OVS_DP_CMD_GET,
aaff4b55
BP
1877 .flags = 0, /* OK for unprivileged users. */
1878 .policy = datapath_policy,
df2c07f4
JP
1879 .doit = ovs_dp_cmd_get,
1880 .dumpit = ovs_dp_cmd_dump
aaff4b55 1881 },
df2c07f4 1882 { .cmd = OVS_DP_CMD_SET,
aaff4b55
BP
1883 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1884 .policy = datapath_policy,
df2c07f4 1885 .doit = ovs_dp_cmd_set,
aaff4b55
BP
1886 },
1887};
1888
df2c07f4 1889static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
f0fef760 1890#ifdef HAVE_NLA_NUL_STRING
df2c07f4 1891 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
f613a0d7 1892 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
f0fef760 1893#else
f613a0d7 1894 [OVS_VPORT_ATTR_STATS] = { .minlen = sizeof(struct ovs_vport_stats) },
f0fef760 1895#endif
d48c88ec
JG
1896 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1897 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
b063d9f0 1898 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
df2c07f4 1899 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
c19e6535
BP
1900};
1901
f0fef760
BP
1902static struct genl_family dp_vport_genl_family = {
1903 .id = GENL_ID_GENERATE,
df2c07f4
JP
1904 .hdrsize = sizeof(struct ovs_header),
1905 .name = OVS_VPORT_FAMILY,
69685a88 1906 .version = OVS_VPORT_VERSION,
2a4999f3
PS
1907 .maxattr = OVS_VPORT_ATTR_MAX,
1908 SET_NETNSOK
f0fef760
BP
1909};
1910
850b6b3b 1911struct genl_multicast_group ovs_dp_vport_multicast_group = {
df2c07f4 1912 .name = OVS_VPORT_MCGROUP
f0fef760
BP
1913};
1914
cd2a59e9 1915/* Called with ovs_mutex or RCU read lock. */
df2c07f4 1916static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
28aea917 1917 u32 portid, u32 seq, u32 flags, u8 cmd)
064af421 1918{
df2c07f4 1919 struct ovs_header *ovs_header;
e926dfe3 1920 struct ovs_vport_stats vport_stats;
c19e6535
BP
1921 int err;
1922
28aea917 1923 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
f0fef760 1924 flags, cmd);
df2c07f4 1925 if (!ovs_header)
f0fef760 1926 return -EMSGSIZE;
c19e6535 1927
99769a40 1928 ovs_header->dp_ifindex = get_dpifindex(vport->dp);
c19e6535 1929
c3cc8c03
DM
1930 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1931 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1932 nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
28aea917 1933 nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
c3cc8c03 1934 goto nla_put_failure;
c19e6535 1935
850b6b3b 1936 ovs_vport_get_stats(vport, &vport_stats);
c3cc8c03
DM
1937 if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1938 &vport_stats))
1939 goto nla_put_failure;
c19e6535 1940
850b6b3b 1941 err = ovs_vport_get_options(vport, skb);
f0fef760
BP
1942 if (err == -EMSGSIZE)
1943 goto error;
c19e6535 1944
df2c07f4 1945 return genlmsg_end(skb, ovs_header);
c19e6535
BP
1946
1947nla_put_failure:
1948 err = -EMSGSIZE;
f0fef760 1949error:
df2c07f4 1950 genlmsg_cancel(skb, ovs_header);
f0fef760 1951 return err;
064af421
BP
1952}
1953
cd2a59e9 1954/* Called with ovs_mutex or RCU read lock. */
28aea917 1955struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
f14d8083 1956 u32 seq, u8 cmd)
064af421 1957{
c19e6535 1958 struct sk_buff *skb;
f0fef760 1959 int retval;
c19e6535 1960
f0fef760 1961 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
c19e6535
BP
1962 if (!skb)
1963 return ERR_PTR(-ENOMEM);
1964
28aea917 1965 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
c25ea534
JG
1966 BUG_ON(retval < 0);
1967
c19e6535 1968 return skb;
f0fef760 1969}
c19e6535 1970
df2c07f4 1971static int ovs_vport_cmd_validate(struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
f0fef760 1972{
df2c07f4 1973 return CHECK_NUL_STRING(a[OVS_VPORT_ATTR_NAME], IFNAMSIZ - 1);
c19e6535 1974}
51d4d598 1975
cd2a59e9 1976/* Called with ovs_mutex or RCU read lock. */
2a4999f3
PS
1977static struct vport *lookup_vport(struct net *net,
1978 struct ovs_header *ovs_header,
df2c07f4 1979 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
c19e6535
BP
1980{
1981 struct datapath *dp;
1982 struct vport *vport;
1983
df2c07f4 1984 if (a[OVS_VPORT_ATTR_NAME]) {
2a4999f3 1985 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
ed099e92 1986 if (!vport)
c19e6535 1987 return ERR_PTR(-ENODEV);
24ce832d
BP
1988 if (ovs_header->dp_ifindex &&
1989 ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1990 return ERR_PTR(-ENODEV);
c19e6535 1991 return vport;
df2c07f4
JP
1992 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1993 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
c19e6535
BP
1994
1995 if (port_no >= DP_MAX_PORTS)
f0fef760 1996 return ERR_PTR(-EFBIG);
c19e6535 1997
2a4999f3 1998 dp = get_dp(net, ovs_header->dp_ifindex);
c19e6535
BP
1999 if (!dp)
2000 return ERR_PTR(-ENODEV);
f2459fe7 2001
cd2a59e9 2002 vport = ovs_vport_ovsl_rcu(dp, port_no);
ed099e92 2003 if (!vport)
17535c57 2004 return ERR_PTR(-ENODEV);
c19e6535
BP
2005 return vport;
2006 } else
2007 return ERR_PTR(-EINVAL);
064af421
BP
2008}
2009
df2c07f4 2010static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
c19e6535 2011{
f0fef760 2012 struct nlattr **a = info->attrs;
df2c07f4 2013 struct ovs_header *ovs_header = info->userhdr;
c19e6535 2014 struct vport_parms parms;
ed099e92 2015 struct sk_buff *reply;
c19e6535 2016 struct vport *vport;
c19e6535 2017 struct datapath *dp;
b0ec0f27 2018 u32 port_no;
c19e6535 2019 int err;
b0ec0f27 2020
c19e6535 2021 err = -EINVAL;
ea36840f
BP
2022 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
2023 !a[OVS_VPORT_ATTR_UPCALL_PID])
f0fef760
BP
2024 goto exit;
2025
df2c07f4 2026 err = ovs_vport_cmd_validate(a);
f0fef760
BP
2027 if (err)
2028 goto exit;
51d4d598 2029
cd2a59e9 2030 ovs_lock();
2a4999f3 2031 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
c19e6535
BP
2032 err = -ENODEV;
2033 if (!dp)
ed099e92 2034 goto exit_unlock;
c19e6535 2035
df2c07f4
JP
2036 if (a[OVS_VPORT_ATTR_PORT_NO]) {
2037 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
c19e6535
BP
2038
2039 err = -EFBIG;
2040 if (port_no >= DP_MAX_PORTS)
ed099e92 2041 goto exit_unlock;
c19e6535 2042
cd2a59e9 2043 vport = ovs_vport_ovsl(dp, port_no);
c19e6535
BP
2044 err = -EBUSY;
2045 if (vport)
ed099e92 2046 goto exit_unlock;
c19e6535
BP
2047 } else {
2048 for (port_no = 1; ; port_no++) {
2049 if (port_no >= DP_MAX_PORTS) {
2050 err = -EFBIG;
ed099e92 2051 goto exit_unlock;
c19e6535 2052 }
cd2a59e9 2053 vport = ovs_vport_ovsl(dp, port_no);
c19e6535
BP
2054 if (!vport)
2055 break;
51d4d598 2056 }
064af421 2057 }
b0ec0f27 2058
df2c07f4
JP
2059 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2060 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2061 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
c19e6535
BP
2062 parms.dp = dp;
2063 parms.port_no = port_no;
28aea917 2064 parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
c19e6535
BP
2065
2066 vport = new_vport(&parms);
2067 err = PTR_ERR(vport);
2068 if (IS_ERR(vport))
ed099e92 2069 goto exit_unlock;
c19e6535 2070
faef6d2d 2071 err = 0;
1fc7083d
JG
2072 if (a[OVS_VPORT_ATTR_STATS])
2073 ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
2074
2075 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
2076 OVS_VPORT_CMD_NEW);
2077 if (IS_ERR(reply)) {
2078 err = PTR_ERR(reply);
850b6b3b 2079 ovs_dp_detach_port(vport);
ed099e92 2080 goto exit_unlock;
c19e6535 2081 }
e297c6b7
TG
2082
2083 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
c19e6535 2084
ed099e92 2085exit_unlock:
cd2a59e9 2086 ovs_unlock();
c19e6535
BP
2087exit:
2088 return err;
44e05eca
BP
2089}
2090
df2c07f4 2091static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
44e05eca 2092{
f0fef760
BP
2093 struct nlattr **a = info->attrs;
2094 struct sk_buff *reply;
c19e6535 2095 struct vport *vport;
c19e6535 2096 int err;
44e05eca 2097
df2c07f4 2098 err = ovs_vport_cmd_validate(a);
f0fef760 2099 if (err)
c19e6535
BP
2100 goto exit;
2101
cd2a59e9 2102 ovs_lock();
2a4999f3 2103 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
c19e6535
BP
2104 err = PTR_ERR(vport);
2105 if (IS_ERR(vport))
f0fef760 2106 goto exit_unlock;
44e05eca 2107
c19e6535 2108 err = 0;
6455100f 2109 if (a[OVS_VPORT_ATTR_TYPE] &&
16b82e84 2110 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type)
4879d4c7 2111 err = -EINVAL;
6455100f 2112
c25ea534
JG
2113 reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2114 if (!reply) {
2115 err = -ENOMEM;
2116 goto exit_unlock;
2117 }
2118
4879d4c7 2119 if (!err && a[OVS_VPORT_ATTR_OPTIONS])
850b6b3b 2120 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1fc7083d 2121 if (err)
c25ea534 2122 goto exit_free;
1fc7083d
JG
2123
2124 if (a[OVS_VPORT_ATTR_STATS])
2125 ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
2126
2127 if (a[OVS_VPORT_ATTR_UPCALL_PID])
28aea917 2128 vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
c19e6535 2129
c25ea534
JG
2130 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2131 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2132 BUG_ON(err < 0);
f0fef760 2133
cd2a59e9 2134 ovs_unlock();
8680ae4d 2135 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
c25ea534
JG
2136 return 0;
2137
2138exit_free:
2139 kfree_skb(reply);
f0fef760 2140exit_unlock:
cd2a59e9 2141 ovs_unlock();
c19e6535
BP
2142exit:
2143 return err;
064af421
BP
2144}
2145
df2c07f4 2146static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
7c40efc9 2147{
f0fef760
BP
2148 struct nlattr **a = info->attrs;
2149 struct sk_buff *reply;
c19e6535 2150 struct vport *vport;
c19e6535
BP
2151 int err;
2152
df2c07f4 2153 err = ovs_vport_cmd_validate(a);
f0fef760 2154 if (err)
c19e6535
BP
2155 goto exit;
2156
cd2a59e9 2157 ovs_lock();
2a4999f3 2158 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
c19e6535 2159 err = PTR_ERR(vport);
f0fef760
BP
2160 if (IS_ERR(vport))
2161 goto exit_unlock;
c19e6535 2162
df2c07f4 2163 if (vport->port_no == OVSP_LOCAL) {
f0fef760
BP
2164 err = -EINVAL;
2165 goto exit_unlock;
2166 }
2167
28aea917
IY
2168 reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
2169 info->snd_seq, OVS_VPORT_CMD_DEL);
f0fef760
BP
2170 err = PTR_ERR(reply);
2171 if (IS_ERR(reply))
2172 goto exit_unlock;
2173
b57d5819 2174 err = 0;
850b6b3b 2175 ovs_dp_detach_port(vport);
f0fef760 2176
e297c6b7 2177 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
f0fef760
BP
2178
2179exit_unlock:
cd2a59e9 2180 ovs_unlock();
c19e6535
BP
2181exit:
2182 return err;
7c40efc9
BP
2183}
2184
df2c07f4 2185static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
7c40efc9 2186{
f0fef760 2187 struct nlattr **a = info->attrs;
df2c07f4 2188 struct ovs_header *ovs_header = info->userhdr;
ed099e92 2189 struct sk_buff *reply;
c19e6535 2190 struct vport *vport;
c19e6535
BP
2191 int err;
2192
df2c07f4 2193 err = ovs_vport_cmd_validate(a);
f0fef760
BP
2194 if (err)
2195 goto exit;
c19e6535 2196
ed099e92 2197 rcu_read_lock();
2a4999f3 2198 vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
c19e6535
BP
2199 err = PTR_ERR(vport);
2200 if (IS_ERR(vport))
f0fef760 2201 goto exit_unlock;
c19e6535 2202
28aea917
IY
2203 reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
2204 info->snd_seq, OVS_VPORT_CMD_NEW);
ed099e92
BP
2205 err = PTR_ERR(reply);
2206 if (IS_ERR(reply))
f0fef760 2207 goto exit_unlock;
ed099e92 2208
df2fa9b5
JG
2209 rcu_read_unlock();
2210
2211 return genlmsg_reply(reply, info);
ed099e92 2212
f0fef760 2213exit_unlock:
ed099e92 2214 rcu_read_unlock();
f0fef760 2215exit:
c19e6535
BP
2216 return err;
2217}
2218
df2c07f4 2219static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
c19e6535 2220{
df2c07f4 2221 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
c19e6535 2222 struct datapath *dp;
95b1d73a
PS
2223 int bucket = cb->args[0], skip = cb->args[1];
2224 int i, j = 0;
c19e6535 2225
2a4999f3 2226 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
c19e6535 2227 if (!dp)
f0fef760 2228 return -ENODEV;
ed099e92
BP
2229
2230 rcu_read_lock();
95b1d73a 2231 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
ed099e92 2232 struct vport *vport;
95b1d73a
PS
2233
2234 j = 0;
f8dfbcb7 2235 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
95b1d73a
PS
2236 if (j >= skip &&
2237 ovs_vport_cmd_fill_info(vport, skb,
28aea917 2238 NETLINK_CB(cb->skb).portid,
95b1d73a
PS
2239 cb->nlh->nlmsg_seq,
2240 NLM_F_MULTI,
2241 OVS_VPORT_CMD_NEW) < 0)
2242 goto out;
2243
2244 j++;
2245 }
2246 skip = 0;
c19e6535 2247 }
95b1d73a 2248out:
ed099e92 2249 rcu_read_unlock();
c19e6535 2250
95b1d73a
PS
2251 cb->args[0] = i;
2252 cb->args[1] = j;
f0fef760 2253
95b1d73a 2254 return skb->len;
7c40efc9
BP
2255}
2256
f0fef760 2257static struct genl_ops dp_vport_genl_ops[] = {
df2c07f4 2258 { .cmd = OVS_VPORT_CMD_NEW,
f0fef760
BP
2259 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2260 .policy = vport_policy,
df2c07f4 2261 .doit = ovs_vport_cmd_new
f0fef760 2262 },
df2c07f4 2263 { .cmd = OVS_VPORT_CMD_DEL,
f0fef760
BP
2264 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2265 .policy = vport_policy,
df2c07f4 2266 .doit = ovs_vport_cmd_del
f0fef760 2267 },
df2c07f4 2268 { .cmd = OVS_VPORT_CMD_GET,
f0fef760
BP
2269 .flags = 0, /* OK for unprivileged users. */
2270 .policy = vport_policy,
df2c07f4
JP
2271 .doit = ovs_vport_cmd_get,
2272 .dumpit = ovs_vport_cmd_dump
f0fef760 2273 },
df2c07f4 2274 { .cmd = OVS_VPORT_CMD_SET,
f0fef760
BP
2275 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2276 .policy = vport_policy,
df2c07f4 2277 .doit = ovs_vport_cmd_set,
f0fef760
BP
2278 },
2279};
2280
982b8810
BP
2281struct genl_family_and_ops {
2282 struct genl_family *family;
2283 struct genl_ops *ops;
2284 int n_ops;
2285 struct genl_multicast_group *group;
2286};
ed099e92 2287
982b8810 2288static const struct genl_family_and_ops dp_genl_families[] = {
aaff4b55
BP
2289 { &dp_datapath_genl_family,
2290 dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
850b6b3b 2291 &ovs_dp_datapath_multicast_group },
f0fef760
BP
2292 { &dp_vport_genl_family,
2293 dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
850b6b3b 2294 &ovs_dp_vport_multicast_group },
37a1300c
BP
2295 { &dp_flow_genl_family,
2296 dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
850b6b3b 2297 &ovs_dp_flow_multicast_group },
982b8810
BP
2298 { &dp_packet_genl_family,
2299 dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
2300 NULL },
2301};
ed099e92 2302
982b8810
BP
2303static void dp_unregister_genl(int n_families)
2304{
2305 int i;
ed099e92 2306
b867ca75 2307 for (i = 0; i < n_families; i++)
982b8810 2308 genl_unregister_family(dp_genl_families[i].family);
ed099e92
BP
2309}
2310
982b8810 2311static int dp_register_genl(void)
064af421 2312{
982b8810
BP
2313 int n_registered;
2314 int err;
2315 int i;
064af421 2316
982b8810
BP
2317 n_registered = 0;
2318 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2319 const struct genl_family_and_ops *f = &dp_genl_families[i];
064af421 2320
982b8810
BP
2321 err = genl_register_family_with_ops(f->family, f->ops,
2322 f->n_ops);
2323 if (err)
2324 goto error;
2325 n_registered++;
e22d4953 2326
982b8810
BP
2327 if (f->group) {
2328 err = genl_register_mc_group(f->family, f->group);
2329 if (err)
2330 goto error;
2331 }
2332 }
9cc8b4e4 2333
982b8810 2334 return 0;
064af421
BP
2335
2336error:
982b8810
BP
2337 dp_unregister_genl(n_registered);
2338 return err;
064af421
BP
2339}
2340
cd2a59e9 2341static void rehash_flow_table(struct work_struct *work)
acd051f1
PS
2342{
2343 struct datapath *dp;
2a4999f3
PS
2344 struct net *net;
2345
cd2a59e9 2346 ovs_lock();
2a4999f3
PS
2347 rtnl_lock();
2348 for_each_net(net) {
2349 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
acd051f1 2350
2a4999f3 2351 list_for_each_entry(dp, &ovs_net->dps, list_node) {
cd2a59e9 2352 struct flow_table *old_table = ovsl_dereference(dp->table);
2a4999f3 2353 struct flow_table *new_table;
acd051f1 2354
2a4999f3
PS
2355 new_table = ovs_flow_tbl_rehash(old_table);
2356 if (!IS_ERR(new_table)) {
2357 rcu_assign_pointer(dp->table, new_table);
2358 ovs_flow_tbl_deferred_destroy(old_table);
2359 }
acd051f1
PS
2360 }
2361 }
2a4999f3 2362 rtnl_unlock();
cd2a59e9 2363 ovs_unlock();
acd051f1
PS
2364 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
2365}
2366
2a4999f3
PS
2367static int __net_init ovs_init_net(struct net *net)
2368{
2369 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2370
2371 INIT_LIST_HEAD(&ovs_net->dps);
cd2a59e9 2372 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2a4999f3
PS
2373 return 0;
2374}
2375
2376static void __net_exit ovs_exit_net(struct net *net)
2377{
cd2a59e9 2378 struct datapath *dp, *dp_next;
2a4999f3
PS
2379 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2380
cd2a59e9
PS
2381 ovs_lock();
2382 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2383 __dp_destroy(dp);
2384 ovs_unlock();
2385
2386 cancel_work_sync(&ovs_net->dp_notify_work);
2a4999f3
PS
2387}
2388
2389static struct pernet_operations ovs_net_ops = {
2390 .init = ovs_init_net,
2391 .exit = ovs_exit_net,
2392 .id = &ovs_net_id,
2393 .size = sizeof(struct ovs_net),
2394};
2395
22d24ebf
BP
2396static int __init dp_init(void)
2397{
2398 int err;
2399
f3d85db3 2400 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
22d24ebf 2401
dc5f3fef 2402 pr_info("Open vSwitch switching datapath %s, built "__DATE__" "__TIME__"\n",
8a07709c 2403 VERSION);
064af421 2404
16d650e5 2405 err = ovs_workqueues_init();
b9c15df9 2406 if (err)
cd2a59e9 2407 goto error;
b9c15df9 2408
850b6b3b 2409 err = ovs_flow_init();
3544358a 2410 if (err)
85c9de19 2411 goto error_wq;
3544358a 2412
850b6b3b 2413 err = ovs_vport_init();
064af421
BP
2414 if (err)
2415 goto error_flow_exit;
2416
2a4999f3 2417 err = register_pernet_device(&ovs_net_ops);
f2459fe7
JG
2418 if (err)
2419 goto error_vport_exit;
2420
2a4999f3
PS
2421 err = register_netdevice_notifier(&ovs_dp_device_notifier);
2422 if (err)
2423 goto error_netns_exit;
2424
982b8810
BP
2425 err = dp_register_genl();
2426 if (err < 0)
37a1300c 2427 goto error_unreg_notifier;
982b8810 2428
acd051f1
PS
2429 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
2430
064af421
BP
2431 return 0;
2432
2433error_unreg_notifier:
850b6b3b 2434 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2a4999f3
PS
2435error_netns_exit:
2436 unregister_pernet_device(&ovs_net_ops);
f2459fe7 2437error_vport_exit:
850b6b3b 2438 ovs_vport_exit();
064af421 2439error_flow_exit:
850b6b3b 2440 ovs_flow_exit();
16d650e5
PS
2441error_wq:
2442 ovs_workqueues_exit();
064af421
BP
2443error:
2444 return err;
2445}
2446
2447static void dp_cleanup(void)
2448{
acd051f1 2449 cancel_delayed_work_sync(&rehash_flow_wq);
982b8810 2450 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
850b6b3b 2451 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2a4999f3
PS
2452 unregister_pernet_device(&ovs_net_ops);
2453 rcu_barrier();
850b6b3b
JG
2454 ovs_vport_exit();
2455 ovs_flow_exit();
16d650e5 2456 ovs_workqueues_exit();
064af421
BP
2457}
2458
2459module_init(dp_init);
2460module_exit(dp_cleanup);
2461
2462MODULE_DESCRIPTION("Open vSwitch switching datapath");
2463MODULE_LICENSE("GPL");
3d0666d2 2464MODULE_VERSION(VERSION);