]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/datapath.c
debian: Re-add --timeout option for ifupdown script.
[mirror_ovs.git] / datapath / datapath.c
CommitLineData
064af421 1/*
e0edde6f 2 * Copyright (c) 2007-2012 Nicira, Inc.
a14bc59f 3 *
a9a29d22
JG
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
064af421
BP
17 */
18
dfffaef1
JP
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
064af421
BP
21#include <linux/init.h>
22#include <linux/module.h>
064af421 23#include <linux/if_arp.h>
064af421
BP
24#include <linux/if_vlan.h>
25#include <linux/in.h>
26#include <linux/ip.h>
982b8810 27#include <linux/jhash.h>
064af421
BP
28#include <linux/delay.h>
29#include <linux/time.h>
30#include <linux/etherdevice.h>
ed099e92 31#include <linux/genetlink.h>
064af421
BP
32#include <linux/kernel.h>
33#include <linux/kthread.h>
064af421
BP
34#include <linux/mutex.h>
35#include <linux/percpu.h>
36#include <linux/rcupdate.h>
37#include <linux/tcp.h>
38#include <linux/udp.h>
39#include <linux/version.h>
40#include <linux/ethtool.h>
064af421 41#include <linux/wait.h>
064af421 42#include <asm/div64.h>
656a0e37 43#include <linux/highmem.h>
064af421
BP
44#include <linux/netfilter_bridge.h>
45#include <linux/netfilter_ipv4.h>
46#include <linux/inetdevice.h>
47#include <linux/list.h>
077257b8 48#include <linux/openvswitch.h>
064af421 49#include <linux/rculist.h>
064af421 50#include <linux/dmi.h>
36956a7d 51#include <net/genetlink.h>
2a4999f3
PS
52#include <net/net_namespace.h>
53#include <net/netns/generic.h>
064af421 54
dd8d6b8c 55#include "checksum.h"
064af421 56#include "datapath.h"
064af421 57#include "flow.h"
b9c15df9 58#include "genl_exec.h"
303708cc 59#include "vlan.h"
3544358a 60#include "tunnel.h"
f2459fe7 61#include "vport-internal_dev.h"
064af421 62
4cf41591 63#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
64807dfb
JP
64 LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0)
65#error Kernels before 2.6.18 or after 3.8 are not supported by this version of Open vSwitch.
4cf41591
JG
66#endif
67
acd051f1
PS
68#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
69static void rehash_flow_table(struct work_struct *work);
70static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
71
2a4999f3
PS
72int ovs_net_id __read_mostly;
73
ed099e92
BP
74/**
75 * DOC: Locking:
064af421 76 *
ed099e92
BP
77 * Writes to device state (add/remove datapath, port, set operations on vports,
78 * etc.) are protected by RTNL.
064af421 79 *
ed099e92 80 * Writes to other state (flow table modifications, set miscellaneous datapath
7257b535
BP
81 * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside
82 * genl_mutex.
ed099e92
BP
83 *
84 * Reads are protected by RCU.
85 *
86 * There are a few special cases (mostly stats) that have their own
87 * synchronization but they nest under all of above and don't interact with
88 * each other.
064af421 89 */
ed099e92 90
c19e6535 91static struct vport *new_vport(const struct vport_parms *);
2a4999f3 92static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
7257b535 93 const struct dp_upcall_info *);
2a4999f3
PS
94static int queue_userspace_packet(struct net *, int dp_ifindex,
95 struct sk_buff *,
7257b535 96 const struct dp_upcall_info *);
064af421 97
ed099e92 98/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
2a4999f3 99static struct datapath *get_dp(struct net *net, int dp_ifindex)
064af421 100{
254f2dc8
BP
101 struct datapath *dp = NULL;
102 struct net_device *dev;
ed099e92 103
254f2dc8 104 rcu_read_lock();
2a4999f3 105 dev = dev_get_by_index_rcu(net, dp_ifindex);
254f2dc8 106 if (dev) {
850b6b3b 107 struct vport *vport = ovs_internal_dev_get_vport(dev);
254f2dc8
BP
108 if (vport)
109 dp = vport->dp;
110 }
111 rcu_read_unlock();
112
113 return dp;
064af421 114}
064af421 115
f2459fe7 116/* Must be called with rcu_read_lock or RTNL lock. */
850b6b3b 117const char *ovs_dp_name(const struct datapath *dp)
f2459fe7 118{
95b1d73a 119 struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL);
16b82e84 120 return vport->ops->get_name(vport);
f2459fe7
JG
121}
122
99769a40
JG
123static int get_dpifindex(struct datapath *dp)
124{
125 struct vport *local;
126 int ifindex;
127
128 rcu_read_lock();
129
95b1d73a 130 local = ovs_vport_rcu(dp, OVSP_LOCAL);
99769a40 131 if (local)
16b82e84 132 ifindex = local->ops->get_ifindex(local);
99769a40
JG
133 else
134 ifindex = 0;
135
136 rcu_read_unlock();
137
138 return ifindex;
139}
140
46c6a11d
JG
141static void destroy_dp_rcu(struct rcu_head *rcu)
142{
143 struct datapath *dp = container_of(rcu, struct datapath, rcu);
46c6a11d 144
850b6b3b 145 ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
46c6a11d 146 free_percpu(dp->stats_percpu);
2a4999f3 147 release_net(ovs_dp_get_net(dp));
95b1d73a 148 kfree(dp->ports);
5ca1ba48 149 kfree(dp);
46c6a11d
JG
150}
151
95b1d73a
PS
152static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
153 u16 port_no)
154{
155 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
156}
157
158struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
159{
160 struct vport *vport;
95b1d73a
PS
161 struct hlist_head *head;
162
163 head = vport_hash_bucket(dp, port_no);
f8dfbcb7 164 hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
95b1d73a
PS
165 if (vport->port_no == port_no)
166 return vport;
167 }
168 return NULL;
169}
170
ed099e92 171/* Called with RTNL lock and genl_lock. */
c19e6535 172static struct vport *new_vport(const struct vport_parms *parms)
064af421 173{
f2459fe7 174 struct vport *vport;
f2459fe7 175
850b6b3b 176 vport = ovs_vport_add(parms);
c19e6535
BP
177 if (!IS_ERR(vport)) {
178 struct datapath *dp = parms->dp;
95b1d73a 179 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
064af421 180
95b1d73a 181 hlist_add_head_rcu(&vport->dp_hash_node, head);
c19e6535 182 }
c19e6535 183 return vport;
064af421
BP
184}
185
ed099e92 186/* Called with RTNL lock. */
850b6b3b 187void ovs_dp_detach_port(struct vport *p)
064af421
BP
188{
189 ASSERT_RTNL();
190
064af421 191 /* First drop references to device. */
95b1d73a 192 hlist_del_rcu(&p->dp_hash_node);
f2459fe7 193
7237e4f4 194 /* Then destroy it. */
850b6b3b 195 ovs_vport_del(p);
064af421
BP
196}
197
8819fac7 198/* Must be called with rcu_read_lock. */
850b6b3b 199void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
064af421
BP
200{
201 struct datapath *dp = p->dp;
3544358a 202 struct sw_flow *flow;
064af421 203 struct dp_stats_percpu *stats;
e9141eec 204 u64 *stats_counter;
4c1ad233 205 int error;
064af421 206
70dbc259 207 stats = this_cpu_ptr(dp->stats_percpu);
a063b0df 208
3976f6d5 209 if (!OVS_CB(skb)->flow) {
36956a7d 210 struct sw_flow_key key;
76abe283 211 int key_len;
4c1ad233 212
3976f6d5 213 /* Extract flow from 'skb' into 'key'. */
850b6b3b 214 error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
3976f6d5
JG
215 if (unlikely(error)) {
216 kfree_skb(skb);
217 return;
218 }
064af421 219
3976f6d5 220 /* Look up flow. */
850b6b3b
JG
221 flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table),
222 &key, key_len);
3544358a 223 if (unlikely(!flow)) {
856081f6
BP
224 struct dp_upcall_info upcall;
225
df2c07f4 226 upcall.cmd = OVS_PACKET_CMD_MISS;
856081f6 227 upcall.key = &key;
98403001 228 upcall.userdata = NULL;
28aea917 229 upcall.portid = p->upcall_portid;
850b6b3b 230 ovs_dp_upcall(dp, skb, &upcall);
d9e214da 231 consume_skb(skb);
e9141eec 232 stats_counter = &stats->n_missed;
3976f6d5
JG
233 goto out;
234 }
235
3544358a 236 OVS_CB(skb)->flow = flow;
55574bb0
BP
237 }
238
e9141eec 239 stats_counter = &stats->n_hit;
850b6b3b
JG
240 ovs_flow_used(OVS_CB(skb)->flow, skb);
241 ovs_execute_actions(dp, skb);
55574bb0 242
8819fac7 243out:
55574bb0 244 /* Update datapath statistics. */
821cb9fa 245 u64_stats_update_begin(&stats->sync);
e9141eec 246 (*stats_counter)++;
821cb9fa 247 u64_stats_update_end(&stats->sync);
064af421
BP
248}
249
aa5a8fdc
JG
250static struct genl_family dp_packet_genl_family = {
251 .id = GENL_ID_GENERATE,
df2c07f4
JP
252 .hdrsize = sizeof(struct ovs_header),
253 .name = OVS_PACKET_FAMILY,
69685a88 254 .version = OVS_PACKET_VERSION,
2a4999f3
PS
255 .maxattr = OVS_PACKET_ATTR_MAX,
256 SET_NETNSOK
aa5a8fdc
JG
257};
258
850b6b3b
JG
259int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
260 const struct dp_upcall_info *upcall_info)
aa5a8fdc
JG
261{
262 struct dp_stats_percpu *stats;
7257b535 263 int dp_ifindex;
aa5a8fdc
JG
264 int err;
265
28aea917 266 if (upcall_info->portid == 0) {
b063d9f0 267 err = -ENOTCONN;
b063d9f0
JG
268 goto err;
269 }
270
7257b535
BP
271 dp_ifindex = get_dpifindex(dp);
272 if (!dp_ifindex) {
273 err = -ENODEV;
274 goto err;
aa5a8fdc
JG
275 }
276
7257b535 277 forward_ip_summed(skb, true);
36ce148c 278
7257b535 279 if (!skb_is_gso(skb))
2a4999f3 280 err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
7257b535 281 else
2a4999f3 282 err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
d76195db
JG
283 if (err)
284 goto err;
285
286 return 0;
aa5a8fdc 287
aa5a8fdc 288err:
70dbc259 289 stats = this_cpu_ptr(dp->stats_percpu);
aa5a8fdc 290
821cb9fa 291 u64_stats_update_begin(&stats->sync);
aa5a8fdc 292 stats->n_lost++;
821cb9fa 293 u64_stats_update_end(&stats->sync);
aa5a8fdc 294
aa5a8fdc 295 return err;
982b8810
BP
296}
297
2a4999f3
PS
298static int queue_gso_packets(struct net *net, int dp_ifindex,
299 struct sk_buff *skb,
7257b535 300 const struct dp_upcall_info *upcall_info)
cb5087ca 301{
d4cba1f8 302 unsigned short gso_type = skb_shinfo(skb)->gso_type;
7257b535
BP
303 struct dp_upcall_info later_info;
304 struct sw_flow_key later_key;
305 struct sk_buff *segs, *nskb;
306 int err;
cb5087ca 307
0aa52d88 308 segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false);
79089764
PS
309 if (IS_ERR(segs))
310 return PTR_ERR(segs);
99769a40 311
7257b535
BP
312 /* Queue all of the segments. */
313 skb = segs;
cb5087ca 314 do {
2a4999f3 315 err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
982b8810 316 if (err)
7257b535 317 break;
856081f6 318
d4cba1f8 319 if (skb == segs && gso_type & SKB_GSO_UDP) {
e1cf87ff
JG
320 /* The initial flow key extracted by ovs_flow_extract()
321 * in this case is for a first fragment, so we need to
7257b535
BP
322 * properly mark later fragments.
323 */
324 later_key = *upcall_info->key;
9e44d715 325 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
7257b535
BP
326
327 later_info = *upcall_info;
328 later_info.key = &later_key;
329 upcall_info = &later_info;
330 }
36ce148c 331 } while ((skb = skb->next));
cb5087ca 332
7257b535
BP
333 /* Free all of the segments. */
334 skb = segs;
335 do {
336 nskb = skb->next;
337 if (err)
338 kfree_skb(skb);
339 else
340 consume_skb(skb);
341 } while ((skb = nskb));
342 return err;
343}
344
2a4999f3
PS
345static int queue_userspace_packet(struct net *net, int dp_ifindex,
346 struct sk_buff *skb,
7257b535
BP
347 const struct dp_upcall_info *upcall_info)
348{
349 struct ovs_header *upcall;
6161d3fd 350 struct sk_buff *nskb = NULL;
7257b535
BP
351 struct sk_buff *user_skb; /* to be queued to userspace */
352 struct nlattr *nla;
353 unsigned int len;
354 int err;
355
6161d3fd
JG
356 if (vlan_tx_tag_present(skb)) {
357 nskb = skb_clone(skb, GFP_ATOMIC);
358 if (!nskb)
359 return -ENOMEM;
360
361 err = vlan_deaccel_tag(nskb);
362 if (err)
363 return err;
7257b535 364
6161d3fd
JG
365 skb = nskb;
366 }
367
368 if (nla_attr_size(skb->len) > USHRT_MAX) {
369 err = -EFBIG;
370 goto out;
371 }
7257b535
BP
372
373 len = sizeof(struct ovs_header);
374 len += nla_total_size(skb->len);
375 len += nla_total_size(FLOW_BUFSIZE);
e995e3df
BP
376 if (upcall_info->userdata)
377 len += NLA_ALIGN(upcall_info->userdata->nla_len);
7257b535
BP
378
379 user_skb = genlmsg_new(len, GFP_ATOMIC);
6161d3fd
JG
380 if (!user_skb) {
381 err = -ENOMEM;
382 goto out;
383 }
7257b535
BP
384
385 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
386 0, upcall_info->cmd);
387 upcall->dp_ifindex = dp_ifindex;
388
389 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
850b6b3b 390 ovs_flow_to_nlattrs(upcall_info->key, user_skb);
7257b535
BP
391 nla_nest_end(user_skb, nla);
392
393 if (upcall_info->userdata)
e995e3df 394 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
462a988b 395 nla_len(upcall_info->userdata),
e995e3df 396 nla_data(upcall_info->userdata));
7257b535
BP
397
398 nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
bed53bd1
PS
399
400 skb_copy_and_csum_dev(skb, nla_data(nla));
7257b535 401
c39b1a5c 402 genlmsg_end(user_skb, upcall);
28aea917 403 err = genlmsg_unicast(net, user_skb, upcall_info->portid);
6161d3fd
JG
404
405out:
406 kfree_skb(nskb);
407 return err;
cb5087ca
BP
408}
409
ed099e92 410/* Called with genl_mutex. */
2a4999f3 411static int flush_flows(struct datapath *dp)
064af421 412{
3544358a
PS
413 struct flow_table *old_table;
414 struct flow_table *new_table;
8d5ebd83 415
20d035b2 416 old_table = genl_dereference(dp->table);
850b6b3b 417 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
8d5ebd83 418 if (!new_table)
ed099e92 419 return -ENOMEM;
8d5ebd83
JG
420
421 rcu_assign_pointer(dp->table, new_table);
422
850b6b3b 423 ovs_flow_tbl_deferred_destroy(old_table);
ed099e92 424 return 0;
064af421
BP
425}
426
9b405f1a
PS
427static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len)
428{
429
430 struct sw_flow_actions *acts;
431 int new_acts_size;
432 int req_size = NLA_ALIGN(attr_len);
433 int next_offset = offsetof(struct sw_flow_actions, actions) +
434 (*sfa)->actions_len;
435
ba400435 436 if (req_size <= (ksize(*sfa) - next_offset))
9b405f1a
PS
437 goto out;
438
ba400435 439 new_acts_size = ksize(*sfa) * 2;
9b405f1a
PS
440
441 if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
442 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
443 return ERR_PTR(-EMSGSIZE);
444 new_acts_size = MAX_ACTIONS_BUFSIZE;
445 }
446
447 acts = ovs_flow_actions_alloc(new_acts_size);
448 if (IS_ERR(acts))
449 return (void *)acts;
450
451 memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
452 acts->actions_len = (*sfa)->actions_len;
ba400435 453 kfree(*sfa);
9b405f1a
PS
454 *sfa = acts;
455
456out:
457 (*sfa)->actions_len += req_size;
458 return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
459}
460
461static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
462{
463 struct nlattr *a;
464
465 a = reserve_sfa_size(sfa, nla_attr_size(len));
466 if (IS_ERR(a))
467 return PTR_ERR(a);
468
469 a->nla_type = attrtype;
470 a->nla_len = nla_attr_size(len);
471
472 if (data)
473 memcpy(nla_data(a), data, len);
474 memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
6ff686f2 475
9b405f1a
PS
476 return 0;
477}
478
479static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype)
480{
481 int used = (*sfa)->actions_len;
482 int err;
483
484 err = add_action(sfa, attrtype, NULL, 0);
485 if (err)
486 return err;
487
488 return used;
489}
490
491static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset)
492{
493 struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset);
494
495 a->nla_len = sfa->actions_len - st_offset;
496}
497
498static int validate_and_copy_actions(const struct nlattr *attr,
499 const struct sw_flow_key *key, int depth,
500 struct sw_flow_actions **sfa);
501
502static int validate_and_copy_sample(const struct nlattr *attr,
503 const struct sw_flow_key *key, int depth,
504 struct sw_flow_actions **sfa)
6ff686f2 505{
4be00e48
BP
506 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
507 const struct nlattr *probability, *actions;
508 const struct nlattr *a;
9b405f1a 509 int rem, start, err, st_acts;
4be00e48
BP
510
511 memset(attrs, 0, sizeof(attrs));
6455100f 512 nla_for_each_nested(a, attr, rem) {
4be00e48
BP
513 int type = nla_type(a);
514 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
515 return -EINVAL;
516 attrs[type] = a;
517 }
518 if (rem)
6ff686f2 519 return -EINVAL;
4be00e48
BP
520
521 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
522 if (!probability || nla_len(probability) != sizeof(u32))
6ff686f2
PS
523 return -EINVAL;
524
4be00e48
BP
525 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
526 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
527 return -EINVAL;
9b405f1a
PS
528
529 /* validation done, copy sample action. */
530 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
531 if (start < 0)
532 return start;
533 err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32));
534 if (err)
535 return err;
536 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
537 if (st_acts < 0)
538 return st_acts;
539
540 err = validate_and_copy_actions(actions, key, depth + 1, sfa);
541 if (err)
542 return err;
543
544 add_nested_action_end(*sfa, st_acts);
545 add_nested_action_end(*sfa, start);
546
547 return 0;
4edb9ae9
PS
548}
549
b1323f59
PS
550static int validate_tp_port(const struct sw_flow_key *flow_key)
551{
552 if (flow_key->eth.type == htons(ETH_P_IP)) {
6e9bea4d 553 if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
b1323f59
PS
554 return 0;
555 } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
6e9bea4d 556 if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
b1323f59
PS
557 return 0;
558 }
559
560 return -EINVAL;
561}
562
9b405f1a
PS
563static int validate_and_copy_set_tun(const struct nlattr *attr,
564 struct sw_flow_actions **sfa)
565{
566 struct ovs_key_ipv4_tunnel tun_key;
567 int err, start;
568
569 err = ipv4_tun_from_nlattr(nla_data(attr), &tun_key);
570 if (err)
571 return err;
572
573 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
574 if (start < 0)
575 return start;
576
577 err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key));
578 add_nested_action_end(*sfa, start);
579
580 return err;
581}
582
fea393b1 583static int validate_set(const struct nlattr *a,
9b405f1a
PS
584 const struct sw_flow_key *flow_key,
585 struct sw_flow_actions **sfa,
586 bool *set_tun)
4edb9ae9 587{
4edb9ae9
PS
588 const struct nlattr *ovs_key = nla_data(a);
589 int key_type = nla_type(ovs_key);
590
591 /* There can be only one key in a action */
592 if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
593 return -EINVAL;
594
595 if (key_type > OVS_KEY_ATTR_MAX ||
9b405f1a
PS
596 (ovs_key_lens[key_type] != nla_len(ovs_key) &&
597 ovs_key_lens[key_type] != -1))
4edb9ae9
PS
598 return -EINVAL;
599
fea393b1 600 switch (key_type) {
4edb9ae9 601 const struct ovs_key_ipv4 *ipv4_key;
bc7a5acd 602 const struct ovs_key_ipv6 *ipv6_key;
9b405f1a 603 int err;
4edb9ae9 604
fea393b1 605 case OVS_KEY_ATTR_PRIORITY:
fea393b1 606 case OVS_KEY_ATTR_ETHERNET:
4edb9ae9
PS
607 break;
608
72e8bf28
AA
609 case OVS_KEY_ATTR_SKB_MARK:
610#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) && !defined(CONFIG_NETFILTER)
611 if (nla_get_u32(ovs_key) != 0)
612 return -EINVAL;
613#endif
614 break;
615
9b405f1a
PS
616 case OVS_KEY_ATTR_TUNNEL:
617 *set_tun = true;
618 err = validate_and_copy_set_tun(a, sfa);
619 if (err)
620 return err;
356af50b
KM
621 break;
622
fea393b1 623 case OVS_KEY_ATTR_IPV4:
4edb9ae9
PS
624 if (flow_key->eth.type != htons(ETH_P_IP))
625 return -EINVAL;
626
6e9bea4d 627 if (!flow_key->ip.proto)
4edb9ae9
PS
628 return -EINVAL;
629
630 ipv4_key = nla_data(ovs_key);
631 if (ipv4_key->ipv4_proto != flow_key->ip.proto)
632 return -EINVAL;
633
9e44d715 634 if (ipv4_key->ipv4_frag != flow_key->ip.frag)
7257b535
BP
635 return -EINVAL;
636
4edb9ae9
PS
637 break;
638
bc7a5acd
AA
639 case OVS_KEY_ATTR_IPV6:
640 if (flow_key->eth.type != htons(ETH_P_IPV6))
641 return -EINVAL;
642
643 if (!flow_key->ip.proto)
644 return -EINVAL;
645
646 ipv6_key = nla_data(ovs_key);
647 if (ipv6_key->ipv6_proto != flow_key->ip.proto)
648 return -EINVAL;
649
650 if (ipv6_key->ipv6_frag != flow_key->ip.frag)
651 return -EINVAL;
652
653 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
654 return -EINVAL;
655
656 break;
657
fea393b1 658 case OVS_KEY_ATTR_TCP:
4edb9ae9
PS
659 if (flow_key->ip.proto != IPPROTO_TCP)
660 return -EINVAL;
661
b1323f59 662 return validate_tp_port(flow_key);
4edb9ae9 663
fea393b1 664 case OVS_KEY_ATTR_UDP:
4edb9ae9
PS
665 if (flow_key->ip.proto != IPPROTO_UDP)
666 return -EINVAL;
667
b1323f59 668 return validate_tp_port(flow_key);
4edb9ae9
PS
669
670 default:
671 return -EINVAL;
672 }
fea393b1 673
4edb9ae9 674 return 0;
6ff686f2
PS
675}
676
98403001
BP
677static int validate_userspace(const struct nlattr *attr)
678{
6455100f 679 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
98403001 680 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
e995e3df 681 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
98403001
BP
682 };
683 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
684 int error;
685
6455100f
PS
686 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
687 attr, userspace_policy);
98403001
BP
688 if (error)
689 return error;
690
6455100f
PS
691 if (!a[OVS_USERSPACE_ATTR_PID] ||
692 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
98403001
BP
693 return -EINVAL;
694
695 return 0;
696}
697
9b405f1a
PS
698static int copy_action(const struct nlattr *from,
699 struct sw_flow_actions **sfa)
700{
701 int totlen = NLA_ALIGN(from->nla_len);
702 struct nlattr *to;
703
704 to = reserve_sfa_size(sfa, from->nla_len);
705 if (IS_ERR(to))
706 return PTR_ERR(to);
707
708 memcpy(to, from, totlen);
709 return 0;
710}
711
712static int validate_and_copy_actions(const struct nlattr *attr,
713 const struct sw_flow_key *key,
714 int depth,
715 struct sw_flow_actions **sfa)
064af421 716{
23cad98c 717 const struct nlattr *a;
6ff686f2
PS
718 int rem, err;
719
720 if (depth >= SAMPLE_ACTION_DEPTH)
721 return -EOVERFLOW;
23cad98c 722
37a1300c 723 nla_for_each_nested(a, attr, rem) {
98403001 724 /* Expected argument lengths, (u32)-1 for variable length. */
df2c07f4 725 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
fea393b1 726 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
98403001 727 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
fea393b1
BP
728 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
729 [OVS_ACTION_ATTR_POP_VLAN] = 0,
4edb9ae9 730 [OVS_ACTION_ATTR_SET] = (u32)-1,
98403001 731 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
23cad98c 732 };
fea393b1 733 const struct ovs_action_push_vlan *vlan;
23cad98c 734 int type = nla_type(a);
9b405f1a 735 bool skip_copy;
23cad98c 736
6ff686f2 737 if (type > OVS_ACTION_ATTR_MAX ||
98403001
BP
738 (action_lens[type] != nla_len(a) &&
739 action_lens[type] != (u32)-1))
23cad98c
BP
740 return -EINVAL;
741
9b405f1a 742 skip_copy = false;
23cad98c 743 switch (type) {
df2c07f4 744 case OVS_ACTION_ATTR_UNSPEC:
cdee00fd 745 return -EINVAL;
064af421 746
98403001
BP
747 case OVS_ACTION_ATTR_USERSPACE:
748 err = validate_userspace(a);
749 if (err)
750 return err;
751 break;
752
df2c07f4 753 case OVS_ACTION_ATTR_OUTPUT:
23cad98c
BP
754 if (nla_get_u32(a) >= DP_MAX_PORTS)
755 return -EINVAL;
3b1fc5f3 756 break;
cdee00fd 757
4edb9ae9 758
fea393b1
BP
759 case OVS_ACTION_ATTR_POP_VLAN:
760 break;
761
762 case OVS_ACTION_ATTR_PUSH_VLAN:
763 vlan = nla_data(a);
764 if (vlan->vlan_tpid != htons(ETH_P_8021Q))
765 return -EINVAL;
8ddc056d 766 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
064af421 767 return -EINVAL;
23cad98c 768 break;
064af421 769
4edb9ae9 770 case OVS_ACTION_ATTR_SET:
9b405f1a 771 err = validate_set(a, key, sfa, &skip_copy);
4edb9ae9
PS
772 if (err)
773 return err;
23cad98c 774 break;
064af421 775
6ff686f2 776 case OVS_ACTION_ATTR_SAMPLE:
9b405f1a 777 err = validate_and_copy_sample(a, key, depth, sfa);
6ff686f2
PS
778 if (err)
779 return err;
9b405f1a 780 skip_copy = true;
6ff686f2
PS
781 break;
782
23cad98c 783 default:
4edb9ae9 784 return -EINVAL;
23cad98c 785 }
9b405f1a
PS
786 if (!skip_copy) {
787 err = copy_action(a, sfa);
788 if (err)
789 return err;
790 }
23cad98c 791 }
3c5f6de3 792
23cad98c
BP
793 if (rem > 0)
794 return -EINVAL;
064af421 795
23cad98c 796 return 0;
064af421 797}
4edb9ae9 798
064af421
BP
799static void clear_stats(struct sw_flow *flow)
800{
6bfafa55 801 flow->used = 0;
064af421 802 flow->tcp_flags = 0;
064af421
BP
803 flow->packet_count = 0;
804 flow->byte_count = 0;
805}
806
df2c07f4 807static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
064af421 808{
df2c07f4 809 struct ovs_header *ovs_header = info->userhdr;
982b8810 810 struct nlattr **a = info->attrs;
e0e57990 811 struct sw_flow_actions *acts;
982b8810 812 struct sk_buff *packet;
e0e57990 813 struct sw_flow *flow;
f7cd0081 814 struct datapath *dp;
d6569377 815 struct ethhdr *eth;
3f19d399 816 int len;
d6569377 817 int err;
76abe283 818 int key_len;
064af421 819
f7cd0081 820 err = -EINVAL;
df2c07f4
JP
821 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
822 !a[OVS_PACKET_ATTR_ACTIONS] ||
823 nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
e5cad958 824 goto err;
064af421 825
df2c07f4 826 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
3f19d399 827 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
f7cd0081
BP
828 err = -ENOMEM;
829 if (!packet)
e5cad958 830 goto err;
3f19d399
BP
831 skb_reserve(packet, NET_IP_ALIGN);
832
df2c07f4 833 memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
8d5ebd83 834
f7cd0081
BP
835 skb_reset_mac_header(packet);
836 eth = eth_hdr(packet);
064af421 837
d6569377
BP
838 /* Normally, setting the skb 'protocol' field would be handled by a
839 * call to eth_type_trans(), but it assumes there's a sending
840 * device, which we may not have. */
841 if (ntohs(eth->h_proto) >= 1536)
f7cd0081 842 packet->protocol = eth->h_proto;
d6569377 843 else
f7cd0081 844 packet->protocol = htons(ETH_P_802_2);
d3c54451 845
e0e57990 846 /* Build an sw_flow for sending this packet. */
850b6b3b 847 flow = ovs_flow_alloc();
e0e57990
BP
848 err = PTR_ERR(flow);
849 if (IS_ERR(flow))
e5cad958 850 goto err_kfree_skb;
064af421 851
850b6b3b 852 err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
e0e57990 853 if (err)
9321954a 854 goto err_flow_free;
e0e57990 855
13e24889 856 err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]);
80e5eed9 857 if (err)
9321954a 858 goto err_flow_free;
9b405f1a 859 acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
e0e57990
BP
860 err = PTR_ERR(acts);
861 if (IS_ERR(acts))
9321954a 862 goto err_flow_free;
9b405f1a
PS
863
864 err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts);
e0e57990 865 rcu_assign_pointer(flow->sf_acts, acts);
9b405f1a
PS
866 if (err)
867 goto err_flow_free;
e0e57990
BP
868
869 OVS_CB(packet)->flow = flow;
abff858b 870 packet->priority = flow->key.phy.priority;
72e8bf28 871 skb_set_mark(packet, flow->key.phy.skb_mark);
e0e57990 872
d6569377 873 rcu_read_lock();
2a4999f3 874 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
f7cd0081 875 err = -ENODEV;
e5cad958
BP
876 if (!dp)
877 goto err_unlock;
cc4015df 878
e9141eec 879 local_bh_disable();
850b6b3b 880 err = ovs_execute_actions(dp, packet);
e9141eec 881 local_bh_enable();
d6569377 882 rcu_read_unlock();
e0e57990 883
9321954a 884 ovs_flow_free(flow);
e5cad958 885 return err;
064af421 886
e5cad958
BP
887err_unlock:
888 rcu_read_unlock();
9321954a
JG
889err_flow_free:
890 ovs_flow_free(flow);
e5cad958
BP
891err_kfree_skb:
892 kfree_skb(packet);
893err:
d6569377 894 return err;
064af421
BP
895}
896
df2c07f4
JP
897static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
898 [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
899 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
900 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
982b8810
BP
901};
902
903static struct genl_ops dp_packet_genl_ops[] = {
df2c07f4 904 { .cmd = OVS_PACKET_CMD_EXECUTE,
982b8810
BP
905 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
906 .policy = packet_policy,
df2c07f4 907 .doit = ovs_packet_cmd_execute
982b8810
BP
908 }
909};
910
df2c07f4 911static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
064af421 912{
d6569377 913 int i;
20d035b2 914 struct flow_table *table = genl_dereference(dp->table);
f180c2e2 915
850b6b3b 916 stats->n_flows = ovs_flow_tbl_count(table);
064af421 917
7257b535 918 stats->n_hit = stats->n_missed = stats->n_lost = 0;
d6569377
BP
919 for_each_possible_cpu(i) {
920 const struct dp_stats_percpu *percpu_stats;
921 struct dp_stats_percpu local_stats;
821cb9fa 922 unsigned int start;
44e05eca 923
d6569377 924 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
064af421 925
d6569377 926 do {
821cb9fa 927 start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
d6569377 928 local_stats = *percpu_stats;
821cb9fa 929 } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
064af421 930
d6569377
BP
931 stats->n_hit += local_stats.n_hit;
932 stats->n_missed += local_stats.n_missed;
933 stats->n_lost += local_stats.n_lost;
934 }
935}
064af421 936
df2c07f4
JP
937static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
938 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
939 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
940 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
d6569377 941};
36956a7d 942
37a1300c
BP
943static struct genl_family dp_flow_genl_family = {
944 .id = GENL_ID_GENERATE,
df2c07f4
JP
945 .hdrsize = sizeof(struct ovs_header),
946 .name = OVS_FLOW_FAMILY,
69685a88 947 .version = OVS_FLOW_VERSION,
2a4999f3
PS
948 .maxattr = OVS_FLOW_ATTR_MAX,
949 SET_NETNSOK
37a1300c 950};
ed099e92 951
850b6b3b 952static struct genl_multicast_group ovs_dp_flow_multicast_group = {
df2c07f4 953 .name = OVS_FLOW_MCGROUP
37a1300c
BP
954};
955
9b405f1a
PS
956static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb);
957static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
958{
959 const struct nlattr *a;
960 struct nlattr *start;
961 int err = 0, rem;
962
963 start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
964 if (!start)
965 return -EMSGSIZE;
966
967 nla_for_each_nested(a, attr, rem) {
968 int type = nla_type(a);
969 struct nlattr *st_sample;
970
971 switch (type) {
972 case OVS_SAMPLE_ATTR_PROBABILITY:
973 if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a)))
974 return -EMSGSIZE;
975 break;
976 case OVS_SAMPLE_ATTR_ACTIONS:
977 st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
978 if (!st_sample)
979 return -EMSGSIZE;
980 err = actions_to_attr(nla_data(a), nla_len(a), skb);
981 if (err)
982 return err;
983 nla_nest_end(skb, st_sample);
984 break;
985 }
986 }
987
988 nla_nest_end(skb, start);
989 return err;
990}
991
992static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
993{
994 const struct nlattr *ovs_key = nla_data(a);
995 int key_type = nla_type(ovs_key);
996 struct nlattr *start;
997 int err;
998
999 switch (key_type) {
1000 case OVS_KEY_ATTR_IPV4_TUNNEL:
1001 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
1002 if (!start)
1003 return -EMSGSIZE;
1004
1005 err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key));
1006 if (err)
1007 return err;
1008 nla_nest_end(skb, start);
1009 break;
1010 default:
1011 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
1012 return -EMSGSIZE;
1013 break;
1014 }
1015
1016 return 0;
1017}
1018
1019static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb)
1020{
1021 const struct nlattr *a;
1022 int rem, err;
1023
1024 nla_for_each_attr(a, attr, len, rem) {
1025 int type = nla_type(a);
1026
1027 switch (type) {
1028 case OVS_ACTION_ATTR_SET:
1029 err = set_action_to_attr(a, skb);
1030 if (err)
1031 return err;
1032 break;
1033
1034 case OVS_ACTION_ATTR_SAMPLE:
1035 err = sample_action_to_attr(a, skb);
1036 if (err)
1037 return err;
1038 break;
1039 default:
1040 if (nla_put(skb, type, nla_len(a), nla_data(a)))
1041 return -EMSGSIZE;
1042 break;
1043 }
1044 }
1045
1046 return 0;
1047}
1048
37a1300c 1049/* Called with genl_lock. */
df2c07f4 1050static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
28aea917 1051 struct sk_buff *skb, u32 portid,
6455100f 1052 u32 seq, u32 flags, u8 cmd)
d6569377 1053{
37a1300c 1054 const int skb_orig_len = skb->len;
d6569377 1055 const struct sw_flow_actions *sf_acts;
9b405f1a 1056 struct nlattr *start;
df2c07f4
JP
1057 struct ovs_flow_stats stats;
1058 struct ovs_header *ovs_header;
d6569377
BP
1059 struct nlattr *nla;
1060 unsigned long used;
1061 u8 tcp_flags;
1062 int err;
064af421 1063
d6569377 1064 sf_acts = rcu_dereference_protected(flow->sf_acts,
ed099e92 1065 lockdep_genl_is_held());
064af421 1066
28aea917 1067 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
df2c07f4 1068 if (!ovs_header)
37a1300c 1069 return -EMSGSIZE;
d6569377 1070
99769a40 1071 ovs_header->dp_ifindex = get_dpifindex(dp);
d6569377 1072
df2c07f4 1073 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
d6569377
BP
1074 if (!nla)
1075 goto nla_put_failure;
850b6b3b 1076 err = ovs_flow_to_nlattrs(&flow->key, skb);
d6569377 1077 if (err)
37a1300c 1078 goto error;
d6569377
BP
1079 nla_nest_end(skb, nla);
1080
1081 spin_lock_bh(&flow->lock);
1082 used = flow->used;
1083 stats.n_packets = flow->packet_count;
1084 stats.n_bytes = flow->byte_count;
1085 tcp_flags = flow->tcp_flags;
1086 spin_unlock_bh(&flow->lock);
1087
c3cc8c03
DM
1088 if (used &&
1089 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
1090 goto nla_put_failure;
d6569377 1091
c3cc8c03
DM
1092 if (stats.n_packets &&
1093 nla_put(skb, OVS_FLOW_ATTR_STATS,
1094 sizeof(struct ovs_flow_stats), &stats))
1095 goto nla_put_failure;
d6569377 1096
c3cc8c03
DM
1097 if (tcp_flags &&
1098 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
1099 goto nla_put_failure;
d6569377 1100
df2c07f4 1101 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
30053024
BP
1102 * this is the first flow to be dumped into 'skb'. This is unusual for
1103 * Netlink but individual action lists can be longer than
1104 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
1105 * The userspace caller can always fetch the actions separately if it
1106 * really wants them. (Most userspace callers in fact don't care.)
1107 *
1108 * This can only fail for dump operations because the skb is always
1109 * properly sized for single flows.
1110 */
9b405f1a 1111 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
f6f481ee
PS
1112 if (start) {
1113 err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb);
0a25b039
BP
1114 if (!err)
1115 nla_nest_end(skb, start);
1116 else {
1117 if (skb_orig_len)
1118 goto error;
1119
1120 nla_nest_cancel(skb, start);
1121 }
7aac03bd
JG
1122 } else if (skb_orig_len)
1123 goto nla_put_failure;
37a1300c 1124
df2c07f4 1125 return genlmsg_end(skb, ovs_header);
d6569377
BP
1126
1127nla_put_failure:
1128 err = -EMSGSIZE;
37a1300c 1129error:
df2c07f4 1130 genlmsg_cancel(skb, ovs_header);
d6569377 1131 return err;
44e05eca
BP
1132}
1133
df2c07f4 1134static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
44e05eca 1135{
37a1300c
BP
1136 const struct sw_flow_actions *sf_acts;
1137 int len;
d6569377 1138
37a1300c
BP
1139 sf_acts = rcu_dereference_protected(flow->sf_acts,
1140 lockdep_genl_is_held());
d6569377 1141
6455100f
PS
1142 /* OVS_FLOW_ATTR_KEY */
1143 len = nla_total_size(FLOW_BUFSIZE);
1144 /* OVS_FLOW_ATTR_ACTIONS */
1145 len += nla_total_size(sf_acts->actions_len);
1146 /* OVS_FLOW_ATTR_STATS */
1147 len += nla_total_size(sizeof(struct ovs_flow_stats));
1148 /* OVS_FLOW_ATTR_TCP_FLAGS */
1149 len += nla_total_size(1);
1150 /* OVS_FLOW_ATTR_USED */
1151 len += nla_total_size(8);
1152
1153 len += NLMSG_ALIGN(sizeof(struct ovs_header));
1154
1155 return genlmsg_new(len, GFP_KERNEL);
37a1300c 1156}
8d5ebd83 1157
6455100f
PS
1158static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
1159 struct datapath *dp,
28aea917 1160 u32 portid, u32 seq, u8 cmd)
37a1300c
BP
1161{
1162 struct sk_buff *skb;
1163 int retval;
d6569377 1164
df2c07f4 1165 skb = ovs_flow_cmd_alloc_info(flow);
37a1300c
BP
1166 if (!skb)
1167 return ERR_PTR(-ENOMEM);
d6569377 1168
28aea917 1169 retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd);
37a1300c 1170 BUG_ON(retval < 0);
d6569377 1171 return skb;
064af421
BP
1172}
1173
df2c07f4 1174static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
064af421 1175{
37a1300c 1176 struct nlattr **a = info->attrs;
df2c07f4 1177 struct ovs_header *ovs_header = info->userhdr;
37a1300c 1178 struct sw_flow_key key;
d6569377 1179 struct sw_flow *flow;
37a1300c 1180 struct sk_buff *reply;
9c52546b 1181 struct datapath *dp;
3544358a 1182 struct flow_table *table;
9b405f1a 1183 struct sw_flow_actions *acts = NULL;
bc4a05c6 1184 int error;
76abe283 1185 int key_len;
064af421 1186
37a1300c
BP
1187 /* Extract key. */
1188 error = -EINVAL;
df2c07f4 1189 if (!a[OVS_FLOW_ATTR_KEY])
37a1300c 1190 goto error;
850b6b3b 1191 error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
37a1300c
BP
1192 if (error)
1193 goto error;
064af421 1194
37a1300c 1195 /* Validate actions. */
df2c07f4 1196 if (a[OVS_FLOW_ATTR_ACTIONS]) {
9b405f1a
PS
1197 acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
1198 error = PTR_ERR(acts);
1199 if (IS_ERR(acts))
37a1300c 1200 goto error;
9b405f1a
PS
1201
1202 error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0, &acts);
1203 if (error)
1204 goto err_kfree;
df2c07f4 1205 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
37a1300c
BP
1206 error = -EINVAL;
1207 goto error;
1208 }
1209
2a4999f3 1210 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
d6569377 1211 error = -ENODEV;
9c52546b 1212 if (!dp)
9b405f1a 1213 goto err_kfree;
704a1e09 1214
20d035b2 1215 table = genl_dereference(dp->table);
850b6b3b 1216 flow = ovs_flow_tbl_lookup(table, &key, key_len);
3544358a 1217 if (!flow) {
d6569377
BP
1218 /* Bail out if we're not allowed to create a new flow. */
1219 error = -ENOENT;
df2c07f4 1220 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
9b405f1a 1221 goto err_kfree;
d6569377
BP
1222
1223 /* Expand table, if necessary, to make room. */
850b6b3b 1224 if (ovs_flow_tbl_need_to_expand(table)) {
3544358a
PS
1225 struct flow_table *new_table;
1226
850b6b3b 1227 new_table = ovs_flow_tbl_expand(table);
3544358a
PS
1228 if (!IS_ERR(new_table)) {
1229 rcu_assign_pointer(dp->table, new_table);
850b6b3b 1230 ovs_flow_tbl_deferred_destroy(table);
20d035b2 1231 table = genl_dereference(dp->table);
3544358a 1232 }
d6569377
BP
1233 }
1234
1235 /* Allocate flow. */
850b6b3b 1236 flow = ovs_flow_alloc();
d6569377
BP
1237 if (IS_ERR(flow)) {
1238 error = PTR_ERR(flow);
9b405f1a 1239 goto err_kfree;
d6569377 1240 }
d6569377
BP
1241 clear_stats(flow);
1242
d6569377
BP
1243 rcu_assign_pointer(flow->sf_acts, acts);
1244
d6569377 1245 /* Put flow in bucket. */
13e24889 1246 ovs_flow_tbl_insert(table, flow, &key, key_len);
37a1300c 1247
28aea917 1248 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
6455100f
PS
1249 info->snd_seq,
1250 OVS_FLOW_CMD_NEW);
d6569377
BP
1251 } else {
1252 /* We found a matching flow. */
1253 struct sw_flow_actions *old_acts;
1254
1255 /* Bail out if we're not allowed to modify an existing flow.
1256 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1257 * because Generic Netlink treats the latter as a dump
1258 * request. We also accept NLM_F_EXCL in case that bug ever
1259 * gets fixed.
1260 */
1261 error = -EEXIST;
df2c07f4 1262 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
37a1300c 1263 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
9b405f1a 1264 goto err_kfree;
d6569377
BP
1265
1266 /* Update actions. */
d6569377 1267 old_acts = rcu_dereference_protected(flow->sf_acts,
ed099e92 1268 lockdep_genl_is_held());
9b405f1a
PS
1269 rcu_assign_pointer(flow->sf_acts, acts);
1270 ovs_flow_deferred_free_acts(old_acts);
d6569377 1271
28aea917 1272 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
6455100f 1273 info->snd_seq, OVS_FLOW_CMD_NEW);
d6569377
BP
1274
1275 /* Clear stats. */
df2c07f4 1276 if (a[OVS_FLOW_ATTR_CLEAR]) {
d6569377
BP
1277 spin_lock_bh(&flow->lock);
1278 clear_stats(flow);
1279 spin_unlock_bh(&flow->lock);
1280 }
9c52546b 1281 }
37a1300c
BP
1282
1283 if (!IS_ERR(reply))
28aea917 1284 genl_notify(reply, genl_info_net(info), info->snd_portid,
850b6b3b
JG
1285 ovs_dp_flow_multicast_group.id, info->nlhdr,
1286 GFP_KERNEL);
37a1300c 1287 else
2a4999f3
PS
1288 netlink_set_err(GENL_SOCK(sock_net(skb->sk)), 0,
1289 ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
d6569377 1290 return 0;
704a1e09 1291
9b405f1a 1292err_kfree:
ba400435 1293 kfree(acts);
37a1300c 1294error:
9c52546b 1295 return error;
704a1e09
BP
1296}
1297
df2c07f4 1298static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
704a1e09 1299{
37a1300c 1300 struct nlattr **a = info->attrs;
df2c07f4 1301 struct ovs_header *ovs_header = info->userhdr;
37a1300c 1302 struct sw_flow_key key;
37a1300c 1303 struct sk_buff *reply;
704a1e09 1304 struct sw_flow *flow;
9c52546b 1305 struct datapath *dp;
3544358a 1306 struct flow_table *table;
9c52546b 1307 int err;
76abe283 1308 int key_len;
704a1e09 1309
df2c07f4 1310 if (!a[OVS_FLOW_ATTR_KEY])
37a1300c 1311 return -EINVAL;
850b6b3b 1312 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
37a1300c
BP
1313 if (err)
1314 return err;
704a1e09 1315
2a4999f3 1316 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
9c52546b 1317 if (!dp)
ed099e92 1318 return -ENODEV;
704a1e09 1319
20d035b2 1320 table = genl_dereference(dp->table);
850b6b3b 1321 flow = ovs_flow_tbl_lookup(table, &key, key_len);
3544358a 1322 if (!flow)
ed099e92 1323 return -ENOENT;
d6569377 1324
28aea917 1325 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
6455100f 1326 info->snd_seq, OVS_FLOW_CMD_NEW);
37a1300c
BP
1327 if (IS_ERR(reply))
1328 return PTR_ERR(reply);
36956a7d 1329
37a1300c 1330 return genlmsg_reply(reply, info);
d6569377 1331}
9c52546b 1332
df2c07f4 1333static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
d6569377 1334{
37a1300c 1335 struct nlattr **a = info->attrs;
df2c07f4 1336 struct ovs_header *ovs_header = info->userhdr;
37a1300c 1337 struct sw_flow_key key;
37a1300c 1338 struct sk_buff *reply;
d6569377 1339 struct sw_flow *flow;
d6569377 1340 struct datapath *dp;
3544358a 1341 struct flow_table *table;
d6569377 1342 int err;
76abe283 1343 int key_len;
36956a7d 1344
2a4999f3
PS
1345 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1346 if (!dp)
1347 return -ENODEV;
1348
df2c07f4 1349 if (!a[OVS_FLOW_ATTR_KEY])
2a4999f3
PS
1350 return flush_flows(dp);
1351
850b6b3b 1352 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
37a1300c
BP
1353 if (err)
1354 return err;
d6569377 1355
20d035b2 1356 table = genl_dereference(dp->table);
850b6b3b 1357 flow = ovs_flow_tbl_lookup(table, &key, key_len);
3544358a 1358 if (!flow)
37a1300c 1359 return -ENOENT;
d6569377 1360
df2c07f4 1361 reply = ovs_flow_cmd_alloc_info(flow);
37a1300c
BP
1362 if (!reply)
1363 return -ENOMEM;
1364
850b6b3b 1365 ovs_flow_tbl_remove(table, flow);
37a1300c 1366
28aea917 1367 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
df2c07f4 1368 info->snd_seq, 0, OVS_FLOW_CMD_DEL);
37a1300c
BP
1369 BUG_ON(err < 0);
1370
850b6b3b 1371 ovs_flow_deferred_free(flow);
37a1300c 1372
28aea917 1373 genl_notify(reply, genl_info_net(info), info->snd_portid,
850b6b3b 1374 ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
37a1300c
BP
1375 return 0;
1376}
1377
df2c07f4 1378static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
37a1300c 1379{
df2c07f4 1380 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
37a1300c 1381 struct datapath *dp;
20d035b2 1382 struct flow_table *table;
37a1300c 1383
2a4999f3 1384 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
37a1300c
BP
1385 if (!dp)
1386 return -ENODEV;
1387
20d035b2
JG
1388 table = genl_dereference(dp->table);
1389
37a1300c 1390 for (;;) {
37a1300c
BP
1391 struct sw_flow *flow;
1392 u32 bucket, obj;
1393
1394 bucket = cb->args[0];
1395 obj = cb->args[1];
850b6b3b 1396 flow = ovs_flow_tbl_next(table, &bucket, &obj);
3544358a 1397 if (!flow)
37a1300c
BP
1398 break;
1399
6455100f 1400 if (ovs_flow_cmd_fill_info(flow, dp, skb,
28aea917 1401 NETLINK_CB(cb->skb).portid,
37a1300c 1402 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1403 OVS_FLOW_CMD_NEW) < 0)
37a1300c
BP
1404 break;
1405
1406 cb->args[0] = bucket;
1407 cb->args[1] = obj;
1408 }
1409 return skb->len;
704a1e09
BP
1410}
1411
37a1300c 1412static struct genl_ops dp_flow_genl_ops[] = {
df2c07f4 1413 { .cmd = OVS_FLOW_CMD_NEW,
37a1300c
BP
1414 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1415 .policy = flow_policy,
df2c07f4 1416 .doit = ovs_flow_cmd_new_or_set
37a1300c 1417 },
df2c07f4 1418 { .cmd = OVS_FLOW_CMD_DEL,
37a1300c
BP
1419 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1420 .policy = flow_policy,
df2c07f4 1421 .doit = ovs_flow_cmd_del
37a1300c 1422 },
df2c07f4 1423 { .cmd = OVS_FLOW_CMD_GET,
37a1300c
BP
1424 .flags = 0, /* OK for unprivileged users. */
1425 .policy = flow_policy,
df2c07f4
JP
1426 .doit = ovs_flow_cmd_get,
1427 .dumpit = ovs_flow_cmd_dump
37a1300c 1428 },
df2c07f4 1429 { .cmd = OVS_FLOW_CMD_SET,
37a1300c
BP
1430 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1431 .policy = flow_policy,
df2c07f4 1432 .doit = ovs_flow_cmd_new_or_set,
37a1300c
BP
1433 },
1434};
1435
df2c07f4 1436static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
aaff4b55 1437#ifdef HAVE_NLA_NUL_STRING
df2c07f4 1438 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
aaff4b55 1439#endif
b063d9f0 1440 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
d6569377
BP
1441};
1442
aaff4b55
BP
1443static struct genl_family dp_datapath_genl_family = {
1444 .id = GENL_ID_GENERATE,
df2c07f4
JP
1445 .hdrsize = sizeof(struct ovs_header),
1446 .name = OVS_DATAPATH_FAMILY,
69685a88 1447 .version = OVS_DATAPATH_VERSION,
2a4999f3
PS
1448 .maxattr = OVS_DP_ATTR_MAX,
1449 SET_NETNSOK
aaff4b55
BP
1450};
1451
850b6b3b 1452static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
df2c07f4 1453 .name = OVS_DATAPATH_MCGROUP
aaff4b55
BP
1454};
1455
df2c07f4 1456static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
28aea917 1457 u32 portid, u32 seq, u32 flags, u8 cmd)
064af421 1458{
df2c07f4 1459 struct ovs_header *ovs_header;
e926dfe3 1460 struct ovs_dp_stats dp_stats;
064af421
BP
1461 int err;
1462
28aea917 1463 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
aaff4b55 1464 flags, cmd);
df2c07f4 1465 if (!ovs_header)
aaff4b55 1466 goto error;
064af421 1467
b063d9f0 1468 ovs_header->dp_ifindex = get_dpifindex(dp);
064af421 1469
d6569377 1470 rcu_read_lock();
850b6b3b 1471 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
d6569377 1472 rcu_read_unlock();
064af421 1473 if (err)
d6569377 1474 goto nla_put_failure;
064af421 1475
e926dfe3 1476 get_dp_stats(dp, &dp_stats);
c3cc8c03
DM
1477 if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats))
1478 goto nla_put_failure;
d6569377 1479
df2c07f4 1480 return genlmsg_end(skb, ovs_header);
d6569377
BP
1481
1482nla_put_failure:
df2c07f4 1483 genlmsg_cancel(skb, ovs_header);
aaff4b55
BP
1484error:
1485 return -EMSGSIZE;
d6569377
BP
1486}
1487
28aea917 1488static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
aaff4b55 1489 u32 seq, u8 cmd)
d6569377 1490{
d6569377 1491 struct sk_buff *skb;
aaff4b55 1492 int retval;
d6569377 1493
aaff4b55 1494 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
064af421 1495 if (!skb)
d6569377 1496 return ERR_PTR(-ENOMEM);
659586ef 1497
28aea917 1498 retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd);
aaff4b55
BP
1499 if (retval < 0) {
1500 kfree_skb(skb);
1501 return ERR_PTR(retval);
1502 }
1503 return skb;
1504}
9dca7bd5 1505
df2c07f4 1506static int ovs_dp_cmd_validate(struct nlattr *a[OVS_DP_ATTR_MAX + 1])
aaff4b55 1507{
df2c07f4 1508 return CHECK_NUL_STRING(a[OVS_DP_ATTR_NAME], IFNAMSIZ - 1);
d6569377
BP
1509}
1510
ed099e92 1511/* Called with genl_mutex and optionally with RTNL lock also. */
2a4999f3
PS
1512static struct datapath *lookup_datapath(struct net *net,
1513 struct ovs_header *ovs_header,
6455100f 1514 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
d6569377 1515{
254f2dc8
BP
1516 struct datapath *dp;
1517
df2c07f4 1518 if (!a[OVS_DP_ATTR_NAME])
2a4999f3 1519 dp = get_dp(net, ovs_header->dp_ifindex);
254f2dc8 1520 else {
d6569377 1521 struct vport *vport;
d6569377 1522
057dd6d2 1523 rcu_read_lock();
2a4999f3 1524 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
df2c07f4 1525 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
057dd6d2 1526 rcu_read_unlock();
d6569377 1527 }
254f2dc8 1528 return dp ? dp : ERR_PTR(-ENODEV);
d6569377
BP
1529}
1530
df2c07f4 1531static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
d6569377 1532{
aaff4b55 1533 struct nlattr **a = info->attrs;
d6569377 1534 struct vport_parms parms;
aaff4b55 1535 struct sk_buff *reply;
d6569377
BP
1536 struct datapath *dp;
1537 struct vport *vport;
2a4999f3 1538 struct ovs_net *ovs_net;
95b1d73a 1539 int err, i;
d6569377 1540
d6569377 1541 err = -EINVAL;
ea36840f 1542 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
aaff4b55
BP
1543 goto err;
1544
df2c07f4 1545 err = ovs_dp_cmd_validate(a);
aaff4b55
BP
1546 if (err)
1547 goto err;
d6569377
BP
1548
1549 rtnl_lock();
d6569377 1550
d6569377
BP
1551 err = -ENOMEM;
1552 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1553 if (dp == NULL)
2a4999f3
PS
1554 goto err_unlock_rtnl;
1555
0ceaa66c
JG
1556 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1557
d6569377
BP
1558 /* Allocate table. */
1559 err = -ENOMEM;
850b6b3b 1560 rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
d6569377
BP
1561 if (!dp->table)
1562 goto err_free_dp;
1563
99769a40
JG
1564 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1565 if (!dp->stats_percpu) {
1566 err = -ENOMEM;
1567 goto err_destroy_table;
1568 }
1569
95b1d73a
PS
1570 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1571 GFP_KERNEL);
1572 if (!dp->ports) {
1573 err = -ENOMEM;
1574 goto err_destroy_percpu;
1575 }
1576
1577 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1578 INIT_HLIST_HEAD(&dp->ports[i]);
1579
d6569377 1580 /* Set up our datapath device. */
df2c07f4
JP
1581 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1582 parms.type = OVS_VPORT_TYPE_INTERNAL;
d6569377
BP
1583 parms.options = NULL;
1584 parms.dp = dp;
df2c07f4 1585 parms.port_no = OVSP_LOCAL;
28aea917 1586 parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
b063d9f0 1587
d6569377
BP
1588 vport = new_vport(&parms);
1589 if (IS_ERR(vport)) {
1590 err = PTR_ERR(vport);
1591 if (err == -EBUSY)
1592 err = -EEXIST;
1593
95b1d73a 1594 goto err_destroy_ports_array;
d6569377 1595 }
d6569377 1596
28aea917 1597 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
6455100f 1598 info->snd_seq, OVS_DP_CMD_NEW);
aaff4b55
BP
1599 err = PTR_ERR(reply);
1600 if (IS_ERR(reply))
1601 goto err_destroy_local_port;
1602
2a4999f3
PS
1603 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1604 list_add_tail(&dp->list_node, &ovs_net->dps);
d6569377 1605
d6569377
BP
1606 rtnl_unlock();
1607
28aea917 1608 genl_notify(reply, genl_info_net(info), info->snd_portid,
850b6b3b
JG
1609 ovs_dp_datapath_multicast_group.id, info->nlhdr,
1610 GFP_KERNEL);
d6569377
BP
1611 return 0;
1612
1613err_destroy_local_port:
95b1d73a
PS
1614 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1615err_destroy_ports_array:
1616 kfree(dp->ports);
99769a40
JG
1617err_destroy_percpu:
1618 free_percpu(dp->stats_percpu);
d6569377 1619err_destroy_table:
850b6b3b 1620 ovs_flow_tbl_destroy(genl_dereference(dp->table));
d6569377 1621err_free_dp:
0ceaa66c 1622 release_net(ovs_dp_get_net(dp));
d6569377 1623 kfree(dp);
ed099e92 1624err_unlock_rtnl:
d6569377 1625 rtnl_unlock();
d6569377 1626err:
064af421
BP
1627 return err;
1628}
1629
2a4999f3
PS
1630/* Called with genl_mutex. */
1631static void __dp_destroy(struct datapath *dp)
44e05eca 1632{
95b1d73a 1633 int i;
44e05eca 1634
d6569377 1635 rtnl_lock();
95b1d73a
PS
1636
1637 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1638 struct vport *vport;
f8dfbcb7 1639 struct hlist_node *n;
95b1d73a 1640
f8dfbcb7 1641 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
95b1d73a
PS
1642 if (vport->port_no != OVSP_LOCAL)
1643 ovs_dp_detach_port(vport);
1644 }
ed099e92 1645
254f2dc8 1646 list_del(&dp->list_node);
95b1d73a 1647 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
ed099e92 1648
99620d2c
JG
1649 /* rtnl_unlock() will wait until all the references to devices that
1650 * are pending unregistration have been dropped. We do it here to
1651 * ensure that any internal devices (which contain DP pointers) are
1652 * fully destroyed before freeing the datapath.
1653 */
1654 rtnl_unlock();
1655
ed099e92 1656 call_rcu(&dp->rcu, destroy_dp_rcu);
2a4999f3
PS
1657}
1658
1659static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1660{
1661 struct sk_buff *reply;
1662 struct datapath *dp;
1663 int err;
1664
1665 err = ovs_dp_cmd_validate(info->attrs);
1666 if (err)
1667 return err;
1668
1669 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1670 err = PTR_ERR(dp);
1671 if (IS_ERR(dp))
1672 return err;
1673
28aea917 1674 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
2a4999f3
PS
1675 info->snd_seq, OVS_DP_CMD_DEL);
1676 err = PTR_ERR(reply);
1677 if (IS_ERR(reply))
1678 return err;
1679
1680 __dp_destroy(dp);
ed099e92 1681
28aea917 1682 genl_notify(reply, genl_info_net(info), info->snd_portid,
850b6b3b
JG
1683 ovs_dp_datapath_multicast_group.id, info->nlhdr,
1684 GFP_KERNEL);
99620d2c
JG
1685
1686 return 0;
44e05eca
BP
1687}
1688
df2c07f4 1689static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
064af421 1690{
aaff4b55 1691 struct sk_buff *reply;
d6569377 1692 struct datapath *dp;
d6569377 1693 int err;
064af421 1694
df2c07f4 1695 err = ovs_dp_cmd_validate(info->attrs);
aaff4b55
BP
1696 if (err)
1697 return err;
38c6ecbc 1698
2a4999f3 1699 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
d6569377 1700 if (IS_ERR(dp))
aaff4b55 1701 return PTR_ERR(dp);
38c6ecbc 1702
28aea917 1703 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
6455100f 1704 info->snd_seq, OVS_DP_CMD_NEW);
aaff4b55
BP
1705 if (IS_ERR(reply)) {
1706 err = PTR_ERR(reply);
2a4999f3 1707 netlink_set_err(GENL_SOCK(sock_net(skb->sk)), 0,
850b6b3b 1708 ovs_dp_datapath_multicast_group.id, err);
aaff4b55
BP
1709 return 0;
1710 }
1711
28aea917 1712 genl_notify(reply, genl_info_net(info), info->snd_portid,
850b6b3b
JG
1713 ovs_dp_datapath_multicast_group.id, info->nlhdr,
1714 GFP_KERNEL);
1715
aaff4b55 1716 return 0;
064af421
BP
1717}
1718
df2c07f4 1719static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1dcf111b 1720{
aaff4b55 1721 struct sk_buff *reply;
d6569377 1722 struct datapath *dp;
d6569377 1723 int err;
1dcf111b 1724
df2c07f4 1725 err = ovs_dp_cmd_validate(info->attrs);
aaff4b55
BP
1726 if (err)
1727 return err;
1dcf111b 1728
2a4999f3 1729 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
d6569377 1730 if (IS_ERR(dp))
aaff4b55 1731 return PTR_ERR(dp);
1dcf111b 1732
28aea917 1733 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
6455100f 1734 info->snd_seq, OVS_DP_CMD_NEW);
aaff4b55
BP
1735 if (IS_ERR(reply))
1736 return PTR_ERR(reply);
1737
1738 return genlmsg_reply(reply, info);
1dcf111b
JP
1739}
1740
df2c07f4 1741static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
a7786963 1742{
2a4999f3 1743 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
254f2dc8
BP
1744 struct datapath *dp;
1745 int skip = cb->args[0];
1746 int i = 0;
a7786963 1747
2a4999f3 1748 list_for_each_entry(dp, &ovs_net->dps, list_node) {
a2bab2f0 1749 if (i >= skip &&
28aea917 1750 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
aaff4b55 1751 cb->nlh->nlmsg_seq, NLM_F_MULTI,
df2c07f4 1752 OVS_DP_CMD_NEW) < 0)
aaff4b55 1753 break;
254f2dc8 1754 i++;
a7786963 1755 }
aaff4b55 1756
254f2dc8
BP
1757 cb->args[0] = i;
1758
aaff4b55 1759 return skb->len;
c19e6535
BP
1760}
1761
aaff4b55 1762static struct genl_ops dp_datapath_genl_ops[] = {
df2c07f4 1763 { .cmd = OVS_DP_CMD_NEW,
aaff4b55
BP
1764 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1765 .policy = datapath_policy,
df2c07f4 1766 .doit = ovs_dp_cmd_new
aaff4b55 1767 },
df2c07f4 1768 { .cmd = OVS_DP_CMD_DEL,
aaff4b55
BP
1769 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1770 .policy = datapath_policy,
df2c07f4 1771 .doit = ovs_dp_cmd_del
aaff4b55 1772 },
df2c07f4 1773 { .cmd = OVS_DP_CMD_GET,
aaff4b55
BP
1774 .flags = 0, /* OK for unprivileged users. */
1775 .policy = datapath_policy,
df2c07f4
JP
1776 .doit = ovs_dp_cmd_get,
1777 .dumpit = ovs_dp_cmd_dump
aaff4b55 1778 },
df2c07f4 1779 { .cmd = OVS_DP_CMD_SET,
aaff4b55
BP
1780 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1781 .policy = datapath_policy,
df2c07f4 1782 .doit = ovs_dp_cmd_set,
aaff4b55
BP
1783 },
1784};
1785
df2c07f4 1786static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
f0fef760 1787#ifdef HAVE_NLA_NUL_STRING
df2c07f4 1788 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
f613a0d7 1789 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
f0fef760 1790#else
f613a0d7 1791 [OVS_VPORT_ATTR_STATS] = { .minlen = sizeof(struct ovs_vport_stats) },
f0fef760 1792#endif
d48c88ec
JG
1793 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1794 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
b063d9f0 1795 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
df2c07f4 1796 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
c19e6535
BP
1797};
1798
f0fef760
BP
1799static struct genl_family dp_vport_genl_family = {
1800 .id = GENL_ID_GENERATE,
df2c07f4
JP
1801 .hdrsize = sizeof(struct ovs_header),
1802 .name = OVS_VPORT_FAMILY,
69685a88 1803 .version = OVS_VPORT_VERSION,
2a4999f3
PS
1804 .maxattr = OVS_VPORT_ATTR_MAX,
1805 SET_NETNSOK
f0fef760
BP
1806};
1807
850b6b3b 1808struct genl_multicast_group ovs_dp_vport_multicast_group = {
df2c07f4 1809 .name = OVS_VPORT_MCGROUP
f0fef760
BP
1810};
1811
1812/* Called with RTNL lock or RCU read lock. */
df2c07f4 1813static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
28aea917 1814 u32 portid, u32 seq, u32 flags, u8 cmd)
064af421 1815{
df2c07f4 1816 struct ovs_header *ovs_header;
e926dfe3 1817 struct ovs_vport_stats vport_stats;
c19e6535
BP
1818 int err;
1819
28aea917 1820 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
f0fef760 1821 flags, cmd);
df2c07f4 1822 if (!ovs_header)
f0fef760 1823 return -EMSGSIZE;
c19e6535 1824
99769a40 1825 ovs_header->dp_ifindex = get_dpifindex(vport->dp);
c19e6535 1826
c3cc8c03
DM
1827 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1828 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1829 nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
28aea917 1830 nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
c3cc8c03 1831 goto nla_put_failure;
c19e6535 1832
850b6b3b 1833 ovs_vport_get_stats(vport, &vport_stats);
c3cc8c03
DM
1834 if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1835 &vport_stats))
1836 goto nla_put_failure;
c19e6535 1837
850b6b3b 1838 err = ovs_vport_get_options(vport, skb);
f0fef760
BP
1839 if (err == -EMSGSIZE)
1840 goto error;
c19e6535 1841
df2c07f4 1842 return genlmsg_end(skb, ovs_header);
c19e6535
BP
1843
1844nla_put_failure:
1845 err = -EMSGSIZE;
f0fef760 1846error:
df2c07f4 1847 genlmsg_cancel(skb, ovs_header);
f0fef760 1848 return err;
064af421
BP
1849}
1850
f0fef760 1851/* Called with RTNL lock or RCU read lock. */
28aea917 1852struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
f14d8083 1853 u32 seq, u8 cmd)
064af421 1854{
c19e6535 1855 struct sk_buff *skb;
f0fef760 1856 int retval;
c19e6535 1857
f0fef760 1858 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
c19e6535
BP
1859 if (!skb)
1860 return ERR_PTR(-ENOMEM);
1861
28aea917 1862 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
f0fef760
BP
1863 if (retval < 0) {
1864 kfree_skb(skb);
1865 return ERR_PTR(retval);
1866 }
c19e6535 1867 return skb;
f0fef760 1868}
c19e6535 1869
df2c07f4 1870static int ovs_vport_cmd_validate(struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
f0fef760 1871{
df2c07f4 1872 return CHECK_NUL_STRING(a[OVS_VPORT_ATTR_NAME], IFNAMSIZ - 1);
c19e6535 1873}
51d4d598 1874
ed099e92 1875/* Called with RTNL lock or RCU read lock. */
2a4999f3
PS
1876static struct vport *lookup_vport(struct net *net,
1877 struct ovs_header *ovs_header,
df2c07f4 1878 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
c19e6535
BP
1879{
1880 struct datapath *dp;
1881 struct vport *vport;
1882
df2c07f4 1883 if (a[OVS_VPORT_ATTR_NAME]) {
2a4999f3 1884 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
ed099e92 1885 if (!vport)
c19e6535 1886 return ERR_PTR(-ENODEV);
24ce832d
BP
1887 if (ovs_header->dp_ifindex &&
1888 ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1889 return ERR_PTR(-ENODEV);
c19e6535 1890 return vport;
df2c07f4
JP
1891 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1892 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
c19e6535
BP
1893
1894 if (port_no >= DP_MAX_PORTS)
f0fef760 1895 return ERR_PTR(-EFBIG);
c19e6535 1896
2a4999f3 1897 dp = get_dp(net, ovs_header->dp_ifindex);
c19e6535
BP
1898 if (!dp)
1899 return ERR_PTR(-ENODEV);
f2459fe7 1900
95b1d73a 1901 vport = ovs_vport_rtnl_rcu(dp, port_no);
ed099e92 1902 if (!vport)
17535c57 1903 return ERR_PTR(-ENODEV);
c19e6535
BP
1904 return vport;
1905 } else
1906 return ERR_PTR(-EINVAL);
064af421
BP
1907}
1908
df2c07f4 1909static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
c19e6535 1910{
f0fef760 1911 struct nlattr **a = info->attrs;
df2c07f4 1912 struct ovs_header *ovs_header = info->userhdr;
c19e6535 1913 struct vport_parms parms;
ed099e92 1914 struct sk_buff *reply;
c19e6535 1915 struct vport *vport;
c19e6535 1916 struct datapath *dp;
b0ec0f27 1917 u32 port_no;
c19e6535 1918 int err;
b0ec0f27 1919
c19e6535 1920 err = -EINVAL;
ea36840f
BP
1921 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1922 !a[OVS_VPORT_ATTR_UPCALL_PID])
f0fef760
BP
1923 goto exit;
1924
df2c07f4 1925 err = ovs_vport_cmd_validate(a);
f0fef760
BP
1926 if (err)
1927 goto exit;
51d4d598 1928
c19e6535 1929 rtnl_lock();
2a4999f3 1930 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
c19e6535
BP
1931 err = -ENODEV;
1932 if (!dp)
ed099e92 1933 goto exit_unlock;
c19e6535 1934
df2c07f4
JP
1935 if (a[OVS_VPORT_ATTR_PORT_NO]) {
1936 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
c19e6535
BP
1937
1938 err = -EFBIG;
1939 if (port_no >= DP_MAX_PORTS)
ed099e92 1940 goto exit_unlock;
c19e6535 1941
95b1d73a 1942 vport = ovs_vport_rtnl(dp, port_no);
c19e6535
BP
1943 err = -EBUSY;
1944 if (vport)
ed099e92 1945 goto exit_unlock;
c19e6535
BP
1946 } else {
1947 for (port_no = 1; ; port_no++) {
1948 if (port_no >= DP_MAX_PORTS) {
1949 err = -EFBIG;
ed099e92 1950 goto exit_unlock;
c19e6535 1951 }
95b1d73a 1952 vport = ovs_vport_rtnl(dp, port_no);
c19e6535
BP
1953 if (!vport)
1954 break;
51d4d598 1955 }
064af421 1956 }
b0ec0f27 1957
df2c07f4
JP
1958 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1959 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1960 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
c19e6535
BP
1961 parms.dp = dp;
1962 parms.port_no = port_no;
28aea917 1963 parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
c19e6535
BP
1964
1965 vport = new_vport(&parms);
1966 err = PTR_ERR(vport);
1967 if (IS_ERR(vport))
ed099e92 1968 goto exit_unlock;
c19e6535 1969
faef6d2d 1970 err = 0;
1fc7083d
JG
1971 if (a[OVS_VPORT_ATTR_STATS])
1972 ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
1973
1974 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1975 OVS_VPORT_CMD_NEW);
1976 if (IS_ERR(reply)) {
1977 err = PTR_ERR(reply);
850b6b3b 1978 ovs_dp_detach_port(vport);
ed099e92 1979 goto exit_unlock;
c19e6535 1980 }
28aea917 1981 genl_notify(reply, genl_info_net(info), info->snd_portid,
850b6b3b 1982 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
c19e6535 1983
ed099e92 1984exit_unlock:
c19e6535 1985 rtnl_unlock();
c19e6535
BP
1986exit:
1987 return err;
44e05eca
BP
1988}
1989
df2c07f4 1990static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
44e05eca 1991{
f0fef760
BP
1992 struct nlattr **a = info->attrs;
1993 struct sk_buff *reply;
c19e6535 1994 struct vport *vport;
c19e6535 1995 int err;
44e05eca 1996
df2c07f4 1997 err = ovs_vport_cmd_validate(a);
f0fef760 1998 if (err)
c19e6535
BP
1999 goto exit;
2000
2001 rtnl_lock();
2a4999f3 2002 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
c19e6535
BP
2003 err = PTR_ERR(vport);
2004 if (IS_ERR(vport))
f0fef760 2005 goto exit_unlock;
44e05eca 2006
c19e6535 2007 err = 0;
6455100f 2008 if (a[OVS_VPORT_ATTR_TYPE] &&
16b82e84 2009 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type)
4879d4c7 2010 err = -EINVAL;
6455100f 2011
4879d4c7 2012 if (!err && a[OVS_VPORT_ATTR_OPTIONS])
850b6b3b 2013 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1fc7083d 2014 if (err)
53509ad5 2015 goto exit_unlock;
1fc7083d
JG
2016
2017 if (a[OVS_VPORT_ATTR_STATS])
2018 ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
2019
2020 if (a[OVS_VPORT_ATTR_UPCALL_PID])
28aea917 2021 vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
c19e6535 2022
28aea917
IY
2023 reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
2024 info->snd_seq, OVS_VPORT_CMD_NEW);
f0fef760 2025 if (IS_ERR(reply)) {
2a4999f3 2026 netlink_set_err(GENL_SOCK(sock_net(skb->sk)), 0,
7a6c067d
AA
2027 ovs_dp_vport_multicast_group.id, PTR_ERR(reply));
2028 goto exit_unlock;
f0fef760
BP
2029 }
2030
28aea917 2031 genl_notify(reply, genl_info_net(info), info->snd_portid,
850b6b3b 2032 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
f0fef760
BP
2033
2034exit_unlock:
c19e6535
BP
2035 rtnl_unlock();
2036exit:
2037 return err;
064af421
BP
2038}
2039
df2c07f4 2040static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
7c40efc9 2041{
f0fef760
BP
2042 struct nlattr **a = info->attrs;
2043 struct sk_buff *reply;
c19e6535 2044 struct vport *vport;
c19e6535
BP
2045 int err;
2046
df2c07f4 2047 err = ovs_vport_cmd_validate(a);
f0fef760 2048 if (err)
c19e6535
BP
2049 goto exit;
2050
2051 rtnl_lock();
2a4999f3 2052 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
c19e6535 2053 err = PTR_ERR(vport);
f0fef760
BP
2054 if (IS_ERR(vport))
2055 goto exit_unlock;
c19e6535 2056
df2c07f4 2057 if (vport->port_no == OVSP_LOCAL) {
f0fef760
BP
2058 err = -EINVAL;
2059 goto exit_unlock;
2060 }
2061
28aea917
IY
2062 reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
2063 info->snd_seq, OVS_VPORT_CMD_DEL);
f0fef760
BP
2064 err = PTR_ERR(reply);
2065 if (IS_ERR(reply))
2066 goto exit_unlock;
2067
b57d5819 2068 err = 0;
850b6b3b 2069 ovs_dp_detach_port(vport);
f0fef760 2070
28aea917 2071 genl_notify(reply, genl_info_net(info), info->snd_portid,
850b6b3b 2072 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
f0fef760
BP
2073
2074exit_unlock:
c19e6535
BP
2075 rtnl_unlock();
2076exit:
2077 return err;
7c40efc9
BP
2078}
2079
df2c07f4 2080static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
7c40efc9 2081{
f0fef760 2082 struct nlattr **a = info->attrs;
df2c07f4 2083 struct ovs_header *ovs_header = info->userhdr;
ed099e92 2084 struct sk_buff *reply;
c19e6535 2085 struct vport *vport;
c19e6535
BP
2086 int err;
2087
df2c07f4 2088 err = ovs_vport_cmd_validate(a);
f0fef760
BP
2089 if (err)
2090 goto exit;
c19e6535 2091
ed099e92 2092 rcu_read_lock();
2a4999f3 2093 vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
c19e6535
BP
2094 err = PTR_ERR(vport);
2095 if (IS_ERR(vport))
f0fef760 2096 goto exit_unlock;
c19e6535 2097
28aea917
IY
2098 reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
2099 info->snd_seq, OVS_VPORT_CMD_NEW);
ed099e92
BP
2100 err = PTR_ERR(reply);
2101 if (IS_ERR(reply))
f0fef760 2102 goto exit_unlock;
ed099e92 2103
df2fa9b5
JG
2104 rcu_read_unlock();
2105
2106 return genlmsg_reply(reply, info);
ed099e92 2107
f0fef760 2108exit_unlock:
ed099e92 2109 rcu_read_unlock();
f0fef760 2110exit:
c19e6535
BP
2111 return err;
2112}
2113
df2c07f4 2114static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
c19e6535 2115{
df2c07f4 2116 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
c19e6535 2117 struct datapath *dp;
95b1d73a
PS
2118 int bucket = cb->args[0], skip = cb->args[1];
2119 int i, j = 0;
c19e6535 2120
2a4999f3 2121 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
c19e6535 2122 if (!dp)
f0fef760 2123 return -ENODEV;
ed099e92
BP
2124
2125 rcu_read_lock();
95b1d73a 2126 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
ed099e92 2127 struct vport *vport;
95b1d73a
PS
2128
2129 j = 0;
f8dfbcb7 2130 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
95b1d73a
PS
2131 if (j >= skip &&
2132 ovs_vport_cmd_fill_info(vport, skb,
28aea917 2133 NETLINK_CB(cb->skb).portid,
95b1d73a
PS
2134 cb->nlh->nlmsg_seq,
2135 NLM_F_MULTI,
2136 OVS_VPORT_CMD_NEW) < 0)
2137 goto out;
2138
2139 j++;
2140 }
2141 skip = 0;
c19e6535 2142 }
95b1d73a 2143out:
ed099e92 2144 rcu_read_unlock();
c19e6535 2145
95b1d73a
PS
2146 cb->args[0] = i;
2147 cb->args[1] = j;
f0fef760 2148
95b1d73a 2149 return skb->len;
7c40efc9
BP
2150}
2151
f0fef760 2152static struct genl_ops dp_vport_genl_ops[] = {
df2c07f4 2153 { .cmd = OVS_VPORT_CMD_NEW,
f0fef760
BP
2154 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2155 .policy = vport_policy,
df2c07f4 2156 .doit = ovs_vport_cmd_new
f0fef760 2157 },
df2c07f4 2158 { .cmd = OVS_VPORT_CMD_DEL,
f0fef760
BP
2159 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2160 .policy = vport_policy,
df2c07f4 2161 .doit = ovs_vport_cmd_del
f0fef760 2162 },
df2c07f4 2163 { .cmd = OVS_VPORT_CMD_GET,
f0fef760
BP
2164 .flags = 0, /* OK for unprivileged users. */
2165 .policy = vport_policy,
df2c07f4
JP
2166 .doit = ovs_vport_cmd_get,
2167 .dumpit = ovs_vport_cmd_dump
f0fef760 2168 },
df2c07f4 2169 { .cmd = OVS_VPORT_CMD_SET,
f0fef760
BP
2170 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2171 .policy = vport_policy,
df2c07f4 2172 .doit = ovs_vport_cmd_set,
f0fef760
BP
2173 },
2174};
2175
982b8810
BP
2176struct genl_family_and_ops {
2177 struct genl_family *family;
2178 struct genl_ops *ops;
2179 int n_ops;
2180 struct genl_multicast_group *group;
2181};
ed099e92 2182
982b8810 2183static const struct genl_family_and_ops dp_genl_families[] = {
aaff4b55
BP
2184 { &dp_datapath_genl_family,
2185 dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
850b6b3b 2186 &ovs_dp_datapath_multicast_group },
f0fef760
BP
2187 { &dp_vport_genl_family,
2188 dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
850b6b3b 2189 &ovs_dp_vport_multicast_group },
37a1300c
BP
2190 { &dp_flow_genl_family,
2191 dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
850b6b3b 2192 &ovs_dp_flow_multicast_group },
982b8810
BP
2193 { &dp_packet_genl_family,
2194 dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
2195 NULL },
2196};
ed099e92 2197
982b8810
BP
2198static void dp_unregister_genl(int n_families)
2199{
2200 int i;
ed099e92 2201
b867ca75 2202 for (i = 0; i < n_families; i++)
982b8810 2203 genl_unregister_family(dp_genl_families[i].family);
ed099e92
BP
2204}
2205
982b8810 2206static int dp_register_genl(void)
064af421 2207{
982b8810
BP
2208 int n_registered;
2209 int err;
2210 int i;
064af421 2211
982b8810
BP
2212 n_registered = 0;
2213 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2214 const struct genl_family_and_ops *f = &dp_genl_families[i];
064af421 2215
982b8810
BP
2216 err = genl_register_family_with_ops(f->family, f->ops,
2217 f->n_ops);
2218 if (err)
2219 goto error;
2220 n_registered++;
e22d4953 2221
982b8810
BP
2222 if (f->group) {
2223 err = genl_register_mc_group(f->family, f->group);
2224 if (err)
2225 goto error;
2226 }
2227 }
9cc8b4e4 2228
982b8810 2229 return 0;
064af421
BP
2230
2231error:
982b8810
BP
2232 dp_unregister_genl(n_registered);
2233 return err;
064af421
BP
2234}
2235
acd051f1
PS
2236static int __rehash_flow_table(void *dummy)
2237{
2238 struct datapath *dp;
2a4999f3
PS
2239 struct net *net;
2240
2241 rtnl_lock();
2242 for_each_net(net) {
2243 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
acd051f1 2244
2a4999f3
PS
2245 list_for_each_entry(dp, &ovs_net->dps, list_node) {
2246 struct flow_table *old_table = genl_dereference(dp->table);
2247 struct flow_table *new_table;
acd051f1 2248
2a4999f3
PS
2249 new_table = ovs_flow_tbl_rehash(old_table);
2250 if (!IS_ERR(new_table)) {
2251 rcu_assign_pointer(dp->table, new_table);
2252 ovs_flow_tbl_deferred_destroy(old_table);
2253 }
acd051f1
PS
2254 }
2255 }
2a4999f3 2256 rtnl_unlock();
acd051f1
PS
2257 return 0;
2258}
2259
2260static void rehash_flow_table(struct work_struct *work)
2261{
2262 genl_exec(__rehash_flow_table, NULL);
2263 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
2264}
2265
2a4999f3
PS
2266static int dp_destroy_all(void *data)
2267{
2268 struct datapath *dp, *dp_next;
2269 struct ovs_net *ovs_net = data;
2270
2271 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2272 __dp_destroy(dp);
2273
2274 return 0;
2275}
2276
2277static int __net_init ovs_init_net(struct net *net)
2278{
2279 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2280
2281 INIT_LIST_HEAD(&ovs_net->dps);
2282 return 0;
2283}
2284
2285static void __net_exit ovs_exit_net(struct net *net)
2286{
2287 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2288
2289 genl_exec(dp_destroy_all, ovs_net);
2290}
2291
2292static struct pernet_operations ovs_net_ops = {
2293 .init = ovs_init_net,
2294 .exit = ovs_exit_net,
2295 .id = &ovs_net_id,
2296 .size = sizeof(struct ovs_net),
2297};
2298
22d24ebf
BP
2299static int __init dp_init(void)
2300{
2301 int err;
2302
f3d85db3 2303 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
22d24ebf 2304
dc5f3fef 2305 pr_info("Open vSwitch switching datapath %s, built "__DATE__" "__TIME__"\n",
8a07709c 2306 VERSION);
064af421 2307
b9c15df9 2308 err = genl_exec_init();
064af421
BP
2309 if (err)
2310 goto error;
2311
16d650e5 2312 err = ovs_workqueues_init();
b9c15df9
PS
2313 if (err)
2314 goto error_genl_exec;
2315
850b6b3b 2316 err = ovs_flow_init();
3544358a 2317 if (err)
85c9de19 2318 goto error_wq;
3544358a 2319
850b6b3b 2320 err = ovs_vport_init();
064af421
BP
2321 if (err)
2322 goto error_flow_exit;
2323
2a4999f3 2324 err = register_pernet_device(&ovs_net_ops);
f2459fe7
JG
2325 if (err)
2326 goto error_vport_exit;
2327
2a4999f3
PS
2328 err = register_netdevice_notifier(&ovs_dp_device_notifier);
2329 if (err)
2330 goto error_netns_exit;
2331
982b8810
BP
2332 err = dp_register_genl();
2333 if (err < 0)
37a1300c 2334 goto error_unreg_notifier;
982b8810 2335
acd051f1
PS
2336 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
2337
064af421
BP
2338 return 0;
2339
2340error_unreg_notifier:
850b6b3b 2341 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2a4999f3
PS
2342error_netns_exit:
2343 unregister_pernet_device(&ovs_net_ops);
f2459fe7 2344error_vport_exit:
850b6b3b 2345 ovs_vport_exit();
064af421 2346error_flow_exit:
850b6b3b 2347 ovs_flow_exit();
16d650e5
PS
2348error_wq:
2349 ovs_workqueues_exit();
b9c15df9
PS
2350error_genl_exec:
2351 genl_exec_exit();
064af421
BP
2352error:
2353 return err;
2354}
2355
2356static void dp_cleanup(void)
2357{
acd051f1 2358 cancel_delayed_work_sync(&rehash_flow_wq);
982b8810 2359 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
850b6b3b 2360 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2a4999f3
PS
2361 unregister_pernet_device(&ovs_net_ops);
2362 rcu_barrier();
850b6b3b
JG
2363 ovs_vport_exit();
2364 ovs_flow_exit();
16d650e5 2365 ovs_workqueues_exit();
b9c15df9 2366 genl_exec_exit();
064af421
BP
2367}
2368
2369module_init(dp_init);
2370module_exit(dp_cleanup);
2371
2372MODULE_DESCRIPTION("Open vSwitch switching datapath");
2373MODULE_LICENSE("GPL");
3d0666d2 2374MODULE_VERSION(VERSION);