]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/datapath.c
datapath: Avoid freeing wild pointer in corner case.
[mirror_ovs.git] / datapath / datapath.c
CommitLineData
064af421 1/*
f632c8fc 2 * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
a14bc59f
BP
3 * Distributed under the terms of the GNU GPL version 2.
4 *
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
064af421
BP
7 */
8
9/* Functions for managing the dp interface/device. */
10
dfffaef1
JP
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
064af421
BP
13#include <linux/init.h>
14#include <linux/module.h>
064af421 15#include <linux/if_arp.h>
064af421
BP
16#include <linux/if_vlan.h>
17#include <linux/in.h>
18#include <linux/ip.h>
982b8810 19#include <linux/jhash.h>
064af421
BP
20#include <linux/delay.h>
21#include <linux/time.h>
22#include <linux/etherdevice.h>
ed099e92 23#include <linux/genetlink.h>
064af421
BP
24#include <linux/kernel.h>
25#include <linux/kthread.h>
064af421
BP
26#include <linux/mutex.h>
27#include <linux/percpu.h>
28#include <linux/rcupdate.h>
29#include <linux/tcp.h>
30#include <linux/udp.h>
31#include <linux/version.h>
32#include <linux/ethtool.h>
064af421
BP
33#include <linux/wait.h>
34#include <asm/system.h>
35#include <asm/div64.h>
36#include <asm/bug.h>
656a0e37 37#include <linux/highmem.h>
064af421
BP
38#include <linux/netfilter_bridge.h>
39#include <linux/netfilter_ipv4.h>
40#include <linux/inetdevice.h>
41#include <linux/list.h>
42#include <linux/rculist.h>
064af421 43#include <linux/dmi.h>
3c5f6de3 44#include <net/inet_ecn.h>
36956a7d 45#include <net/genetlink.h>
064af421
BP
46
47#include "openvswitch/datapath-protocol.h"
dd8d6b8c 48#include "checksum.h"
064af421
BP
49#include "datapath.h"
50#include "actions.h"
064af421 51#include "flow.h"
7eaa9830 52#include "loop_counter.h"
8d5ebd83 53#include "table.h"
303708cc 54#include "vlan.h"
f2459fe7 55#include "vport-internal_dev.h"
064af421 56
064af421
BP
57int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
58EXPORT_SYMBOL(dp_ioctl_hook);
59
ed099e92
BP
60/**
61 * DOC: Locking:
064af421 62 *
ed099e92
BP
63 * Writes to device state (add/remove datapath, port, set operations on vports,
64 * etc.) are protected by RTNL.
064af421 65 *
ed099e92
BP
66 * Writes to other state (flow table modifications, set miscellaneous datapath
67 * parameters such as drop frags, etc.) are protected by genl_mutex. The RTNL
68 * lock nests inside genl_mutex.
69 *
70 * Reads are protected by RCU.
71 *
72 * There are a few special cases (mostly stats) that have their own
73 * synchronization but they nest under all of above and don't interact with
74 * each other.
064af421 75 */
ed099e92 76
254f2dc8
BP
77/* Global list of datapaths to enable dumping them all out.
78 * Protected by genl_mutex.
79 */
80static LIST_HEAD(dps);
064af421 81
c19e6535 82static struct vport *new_vport(const struct vport_parms *);
aa5a8fdc
JG
83static int queue_control_packets(struct datapath *, struct sk_buff *,
84 const struct dp_upcall_info *);
064af421 85
ed099e92 86/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
254f2dc8 87struct datapath *get_dp(int dp_ifindex)
064af421 88{
254f2dc8
BP
89 struct datapath *dp = NULL;
90 struct net_device *dev;
ed099e92 91
254f2dc8
BP
92 rcu_read_lock();
93 dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
94 if (dev) {
95 struct vport *vport = internal_dev_get_vport(dev);
96 if (vport)
97 dp = vport->dp;
98 }
99 rcu_read_unlock();
100
101 return dp;
064af421
BP
102}
103EXPORT_SYMBOL_GPL(get_dp);
104
ed099e92 105/* Must be called with genl_mutex. */
027f9007 106static struct tbl *get_table_protected(struct datapath *dp)
9abaf6b3 107{
ed099e92 108 return rcu_dereference_protected(dp->table, lockdep_genl_is_held());
1452b28c
JG
109}
110
ed099e92 111/* Must be called with rcu_read_lock or RTNL lock. */
027f9007 112static struct vport *get_vport_protected(struct datapath *dp, u16 port_no)
1452b28c 113{
ed099e92 114 return rcu_dereference_rtnl(dp->ports[port_no]);
9abaf6b3
JG
115}
116
f2459fe7
JG
117/* Must be called with rcu_read_lock or RTNL lock. */
118const char *dp_name(const struct datapath *dp)
119{
ad919711 120 return vport_get_name(rcu_dereference_rtnl(dp->ports[ODPP_LOCAL]));
f2459fe7
JG
121}
122
064af421
BP
123static inline size_t br_nlmsg_size(void)
124{
125 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
126 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
127 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
128 + nla_total_size(4) /* IFLA_MASTER */
129 + nla_total_size(4) /* IFLA_MTU */
130 + nla_total_size(4) /* IFLA_LINK */
131 + nla_total_size(1); /* IFLA_OPERSTATE */
132}
133
ed099e92 134/* Caller must hold RTNL lock. */
064af421 135static int dp_fill_ifinfo(struct sk_buff *skb,
e779d8d9 136 const struct vport *port,
064af421
BP
137 int event, unsigned int flags)
138{
027f9007 139 struct datapath *dp = port->dp;
e779d8d9
BP
140 int ifindex = vport_get_ifindex(port);
141 int iflink = vport_get_iflink(port);
064af421
BP
142 struct ifinfomsg *hdr;
143 struct nlmsghdr *nlh;
144
f2459fe7
JG
145 if (ifindex < 0)
146 return ifindex;
147
148 if (iflink < 0)
149 return iflink;
150
064af421
BP
151 nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
152 if (nlh == NULL)
153 return -EMSGSIZE;
154
155 hdr = nlmsg_data(nlh);
156 hdr->ifi_family = AF_BRIDGE;
157 hdr->__ifi_pad = 0;
f2459fe7
JG
158 hdr->ifi_type = ARPHRD_ETHER;
159 hdr->ifi_index = ifindex;
e779d8d9 160 hdr->ifi_flags = vport_get_flags(port);
064af421
BP
161 hdr->ifi_change = 0;
162
e779d8d9 163 NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
ad919711 164 NLA_PUT_U32(skb, IFLA_MASTER,
1452b28c 165 vport_get_ifindex(get_vport_protected(dp, ODPP_LOCAL)));
e779d8d9 166 NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
064af421
BP
167#ifdef IFLA_OPERSTATE
168 NLA_PUT_U8(skb, IFLA_OPERSTATE,
e779d8d9
BP
169 vport_is_running(port)
170 ? vport_get_operstate(port)
f2459fe7 171 : IF_OPER_DOWN);
064af421
BP
172#endif
173
e779d8d9 174 NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
064af421 175
f2459fe7
JG
176 if (ifindex != iflink)
177 NLA_PUT_U32(skb, IFLA_LINK,iflink);
064af421
BP
178
179 return nlmsg_end(skb, nlh);
180
181nla_put_failure:
182 nlmsg_cancel(skb, nlh);
183 return -EMSGSIZE;
184}
185
ed099e92 186/* Caller must hold RTNL lock. */
e779d8d9 187static void dp_ifinfo_notify(int event, struct vport *port)
064af421 188{
064af421
BP
189 struct sk_buff *skb;
190 int err = -ENOBUFS;
191
192 skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
193 if (skb == NULL)
194 goto errout;
195
196 err = dp_fill_ifinfo(skb, port, event, 0);
197 if (err < 0) {
198 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
199 WARN_ON(err == -EMSGSIZE);
200 kfree_skb(skb);
201 goto errout;
202 }
f2459fe7 203 rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
cfe7c1f5 204 return;
064af421
BP
205errout:
206 if (err < 0)
f2459fe7 207 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
064af421
BP
208}
209
58c342f6
BP
210static void release_dp(struct kobject *kobj)
211{
212 struct datapath *dp = container_of(kobj, struct datapath, ifobj);
213 kfree(dp);
214}
215
35f7605b 216static struct kobj_type dp_ktype = {
58c342f6
BP
217 .release = release_dp
218};
219
46c6a11d
JG
220static void destroy_dp_rcu(struct rcu_head *rcu)
221{
222 struct datapath *dp = container_of(rcu, struct datapath, rcu);
46c6a11d
JG
223
224 tbl_destroy((struct tbl __force *)dp->table, flow_free_tbl);
225 free_percpu(dp->stats_percpu);
226 kobject_put(&dp->ifobj);
227}
228
ed099e92 229/* Called with RTNL lock and genl_lock. */
c19e6535 230static struct vport *new_vport(const struct vport_parms *parms)
064af421 231{
f2459fe7 232 struct vport *vport;
f2459fe7 233
c19e6535
BP
234 vport = vport_add(parms);
235 if (!IS_ERR(vport)) {
236 struct datapath *dp = parms->dp;
064af421 237
c19e6535 238 rcu_assign_pointer(dp->ports[parms->port_no], vport);
ed099e92 239 list_add(&vport->node, &dp->port_list);
064af421 240
c19e6535
BP
241 dp_ifinfo_notify(RTM_NEWLINK, vport);
242 }
064af421 243
c19e6535 244 return vport;
064af421
BP
245}
246
ed099e92 247/* Called with RTNL lock. */
e779d8d9 248int dp_detach_port(struct vport *p)
064af421
BP
249{
250 ASSERT_RTNL();
251
2e7dd8ec 252 if (p->port_no != ODPP_LOCAL)
0515ceb3 253 dp_sysfs_del_if(p);
064af421
BP
254 dp_ifinfo_notify(RTM_DELLINK, p);
255
064af421 256 /* First drop references to device. */
ed099e92 257 list_del(&p->node);
064af421 258 rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
f2459fe7 259
7237e4f4 260 /* Then destroy it. */
057dd6d2 261 return vport_del(p);
064af421
BP
262}
263
8819fac7 264/* Must be called with rcu_read_lock. */
e779d8d9 265void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
064af421
BP
266{
267 struct datapath *dp = p->dp;
268 struct dp_stats_percpu *stats;
8819fac7 269 int stats_counter_off;
55574bb0
BP
270 struct sw_flow_actions *acts;
271 struct loop_counter *loop;
4c1ad233 272 int error;
064af421 273
e779d8d9 274 OVS_CB(skb)->vport = p;
a063b0df 275
3976f6d5 276 if (!OVS_CB(skb)->flow) {
36956a7d 277 struct sw_flow_key key;
3976f6d5 278 struct tbl_node *flow_node;
b7a31ec1 279 bool is_frag;
4c1ad233 280
3976f6d5 281 /* Extract flow from 'skb' into 'key'. */
c75d4dcf 282 error = flow_extract(skb, p->port_no, &key, &is_frag);
3976f6d5
JG
283 if (unlikely(error)) {
284 kfree_skb(skb);
285 return;
286 }
064af421 287
b7a31ec1 288 if (is_frag && dp->drop_frags) {
3976f6d5
JG
289 kfree_skb(skb);
290 stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
291 goto out;
292 }
293
294 /* Look up flow. */
295 flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
296 flow_hash(&key), flow_cmp);
297 if (unlikely(!flow_node)) {
856081f6
BP
298 struct dp_upcall_info upcall;
299
982b8810 300 upcall.cmd = ODP_PACKET_CMD_MISS;
856081f6
BP
301 upcall.key = &key;
302 upcall.userdata = 0;
303 upcall.sample_pool = 0;
304 upcall.actions = NULL;
305 upcall.actions_len = 0;
306 dp_upcall(dp, skb, &upcall);
3976f6d5
JG
307 stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
308 goto out;
309 }
310
311 OVS_CB(skb)->flow = flow_cast(flow_node);
55574bb0
BP
312 }
313
f267de8a 314 stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
3976f6d5 315 flow_used(OVS_CB(skb)->flow, skb);
55574bb0 316
3976f6d5 317 acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
55574bb0
BP
318
319 /* Check whether we've looped too much. */
7eaa9830
JG
320 loop = loop_get_counter();
321 if (unlikely(++loop->count > MAX_LOOPS))
55574bb0
BP
322 loop->looping = true;
323 if (unlikely(loop->looping)) {
7eaa9830 324 loop_suppress(dp, acts);
f267de8a 325 kfree_skb(skb);
55574bb0 326 goto out_loop;
064af421 327 }
8819fac7 328
55574bb0 329 /* Execute actions. */
3976f6d5 330 execute_actions(dp, skb, &OVS_CB(skb)->flow->key, acts->actions,
cdee00fd 331 acts->actions_len);
55574bb0
BP
332
333 /* Check whether sub-actions looped too much. */
334 if (unlikely(loop->looping))
7eaa9830 335 loop_suppress(dp, acts);
55574bb0
BP
336
337out_loop:
338 /* Decrement loop counter. */
339 if (!--loop->count)
340 loop->looping = false;
7eaa9830 341 loop_put_counter();
55574bb0 342
8819fac7 343out:
55574bb0 344 /* Update datapath statistics. */
8819fac7
JG
345 local_bh_disable();
346 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
38c6ecbc
JG
347
348 write_seqcount_begin(&stats->seqlock);
8819fac7 349 (*(u64 *)((u8 *)stats + stats_counter_off))++;
38c6ecbc
JG
350 write_seqcount_end(&stats->seqlock);
351
8819fac7 352 local_bh_enable();
064af421
BP
353}
354
856081f6
BP
355static void copy_and_csum_skb(struct sk_buff *skb, void *to)
356{
357 u16 csum_start, csum_offset;
358 __wsum csum;
359
360 get_skb_csum_pointers(skb, &csum_start, &csum_offset);
361 csum_start -= skb_headroom(skb);
856081f6
BP
362
363 skb_copy_bits(skb, 0, to, csum_start);
364
365 csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start,
366 skb->len - csum_start, 0);
367 *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
368}
369
aa5a8fdc
JG
370static struct genl_family dp_packet_genl_family = {
371 .id = GENL_ID_GENERATE,
372 .hdrsize = sizeof(struct odp_header),
373 .name = ODP_PACKET_FAMILY,
374 .version = 1,
375 .maxattr = ODP_PACKET_ATTR_MAX
376};
377
378/* Generic Netlink multicast groups for upcalls.
379 *
380 * We really want three unique multicast groups per datapath, but we can't even
381 * get one, because genl_register_mc_group() takes genl_lock, which is also
382 * held during Generic Netlink message processing, so trying to acquire
383 * multicast groups during ODP_DP_NEW processing deadlocks. Instead, we
384 * preallocate a few groups and use them round-robin for datapaths. Collision
385 * isn't fatal--multicast listeners should check that the family is the one
386 * that they want and discard others--but it wastes time and memory to receive
387 * unwanted messages.
388 */
982b8810 389#define PACKET_N_MC_GROUPS 16
aa5a8fdc 390static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
982b8810 391
aa5a8fdc 392static u32 packet_mc_group(struct datapath *dp, u8 cmd)
982b8810 393{
aa5a8fdc 394 u32 idx;
982b8810 395 BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
aa5a8fdc
JG
396
397 idx = jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
398 return packet_mc_groups[idx].id;
399}
400
401static int packet_register_mc_groups(void)
402{
403 int i;
404
405 for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
406 struct genl_multicast_group *group = &packet_mc_groups[i];
407 int error;
408
409 sprintf(group->name, "packet%d", i);
410 error = genl_register_mc_group(&dp_packet_genl_family, group);
411 if (error)
412 return error;
413 }
414 return 0;
415}
416
417int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
418{
419 struct dp_stats_percpu *stats;
420 int err;
421
422 WARN_ON_ONCE(skb_shared(skb));
423
424 forward_ip_summed(skb);
425
426 err = vswitch_skb_checksum_setup(skb);
427 if (err)
428 goto err_kfree_skb;
429
430 /* Break apart GSO packets into their component pieces. Otherwise
431 * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
432 if (skb_is_gso(skb)) {
433 struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
434
435 kfree_skb(skb);
436 skb = nskb;
437 if (IS_ERR(skb)) {
438 err = PTR_ERR(skb);
439 goto err;
440 }
441 }
442
d76195db
JG
443 err = queue_control_packets(dp, skb, upcall_info);
444 if (err)
445 goto err;
446
447 return 0;
aa5a8fdc
JG
448
449err_kfree_skb:
450 kfree_skb(skb);
451err:
452 local_bh_disable();
453 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
454
455 write_seqcount_begin(&stats->seqlock);
456 stats->n_lost++;
457 write_seqcount_end(&stats->seqlock);
458
459 local_bh_enable();
460
461 return err;
982b8810
BP
462}
463
464/* Send each packet in the 'skb' list to userspace for 'dp' as directed by
465 * 'upcall_info'. There will be only one packet unless we broke up a GSO
466 * packet.
467 */
856081f6
BP
468static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
469 const struct dp_upcall_info *upcall_info)
cb5087ca 470{
982b8810 471 u32 group = packet_mc_group(dp, upcall_info->cmd);
cb5087ca
BP
472 struct sk_buff *nskb;
473 int port_no;
474 int err;
475
e779d8d9
BP
476 if (OVS_CB(skb)->vport)
477 port_no = OVS_CB(skb)->vport->port_no;
f2459fe7
JG
478 else
479 port_no = ODPP_LOCAL;
cb5087ca
BP
480
481 do {
982b8810 482 struct odp_header *upcall;
856081f6
BP
483 struct sk_buff *user_skb; /* to be queued to userspace */
484 struct nlattr *nla;
485 unsigned int len;
cb5087ca
BP
486
487 nskb = skb->next;
488 skb->next = NULL;
489
303708cc
JG
490 err = vlan_deaccel_tag(skb);
491 if (unlikely(err))
492 goto err_kfree_skbs;
6ce39213 493
51313015
JG
494 if (nla_attr_size(skb->len) > USHRT_MAX)
495 goto err_kfree_skbs;
496
982b8810 497 len = sizeof(struct odp_header);
856081f6
BP
498 len += nla_total_size(skb->len);
499 len += nla_total_size(FLOW_BUFSIZE);
500 if (upcall_info->userdata)
501 len += nla_total_size(8);
502 if (upcall_info->sample_pool)
503 len += nla_total_size(4);
504 if (upcall_info->actions_len)
505 len += nla_total_size(upcall_info->actions_len);
506
982b8810
BP
507 user_skb = genlmsg_new(len, GFP_ATOMIC);
508 if (!user_skb) {
509 netlink_set_err(INIT_NET_GENL_SOCK, 0, group, -ENOBUFS);
cb5087ca 510 goto err_kfree_skbs;
982b8810 511 }
cb5087ca 512
982b8810 513 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
254f2dc8 514 upcall->dp_ifindex = dp->dp_ifindex;
856081f6 515
856081f6
BP
516 nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_KEY);
517 flow_to_nlattrs(upcall_info->key, user_skb);
518 nla_nest_end(user_skb, nla);
cb5087ca 519
856081f6
BP
520 if (upcall_info->userdata)
521 nla_put_u64(user_skb, ODP_PACKET_ATTR_USERDATA, upcall_info->userdata);
522 if (upcall_info->sample_pool)
523 nla_put_u32(user_skb, ODP_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool);
524 if (upcall_info->actions_len) {
525 const struct nlattr *actions = upcall_info->actions;
526 u32 actions_len = upcall_info->actions_len;
527
528 nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_ACTIONS);
529 memcpy(__skb_put(user_skb, actions_len), actions, actions_len);
530 nla_nest_end(user_skb, nla);
531 }
532
533 nla = __nla_reserve(user_skb, ODP_PACKET_ATTR_PACKET, skb->len);
534 if (skb->ip_summed == CHECKSUM_PARTIAL)
535 copy_and_csum_skb(skb, nla_data(nla));
536 else
537 skb_copy_bits(skb, 0, nla_data(nla), skb->len);
538
982b8810
BP
539 err = genlmsg_multicast(user_skb, 0, group, GFP_ATOMIC);
540 if (err)
541 goto err_kfree_skbs;
856081f6
BP
542
543 kfree_skb(skb);
cb5087ca
BP
544 skb = nskb;
545 } while (skb);
546 return 0;
547
548err_kfree_skbs:
549 kfree_skb(skb);
550 while ((skb = nskb) != NULL) {
551 nskb = skb->next;
552 kfree_skb(skb);
553 }
554 return err;
555}
556
ed099e92 557/* Called with genl_mutex. */
254f2dc8 558static int flush_flows(int dp_ifindex)
064af421 559{
9c52546b 560 struct tbl *old_table;
8d5ebd83 561 struct tbl *new_table;
9c52546b 562 struct datapath *dp;
9c52546b 563
254f2dc8 564 dp = get_dp(dp_ifindex);
9c52546b 565 if (!dp)
ed099e92 566 return -ENODEV;
8d5ebd83 567
9c52546b 568 old_table = get_table_protected(dp);
c6fadeb1 569 new_table = tbl_create(TBL_MIN_BUCKETS);
8d5ebd83 570 if (!new_table)
ed099e92 571 return -ENOMEM;
8d5ebd83
JG
572
573 rcu_assign_pointer(dp->table, new_table);
574
575 tbl_deferred_destroy(old_table, flow_free_tbl);
576
ed099e92 577 return 0;
064af421
BP
578}
579
37a1300c 580static int validate_actions(const struct nlattr *attr)
064af421 581{
23cad98c
BP
582 const struct nlattr *a;
583 int rem;
584
37a1300c 585 nla_for_each_nested(a, attr, rem) {
7aec165d
BP
586 static const u32 action_lens[ODP_ACTION_ATTR_MAX + 1] = {
587 [ODP_ACTION_ATTR_OUTPUT] = 4,
588 [ODP_ACTION_ATTR_CONTROLLER] = 8,
589 [ODP_ACTION_ATTR_SET_DL_TCI] = 2,
590 [ODP_ACTION_ATTR_STRIP_VLAN] = 0,
591 [ODP_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN,
592 [ODP_ACTION_ATTR_SET_DL_DST] = ETH_ALEN,
593 [ODP_ACTION_ATTR_SET_NW_SRC] = 4,
594 [ODP_ACTION_ATTR_SET_NW_DST] = 4,
595 [ODP_ACTION_ATTR_SET_NW_TOS] = 1,
596 [ODP_ACTION_ATTR_SET_TP_SRC] = 2,
597 [ODP_ACTION_ATTR_SET_TP_DST] = 2,
598 [ODP_ACTION_ATTR_SET_TUNNEL] = 8,
599 [ODP_ACTION_ATTR_SET_PRIORITY] = 4,
600 [ODP_ACTION_ATTR_POP_PRIORITY] = 0,
601 [ODP_ACTION_ATTR_DROP_SPOOFED_ARP] = 0,
23cad98c
BP
602 };
603 int type = nla_type(a);
604
7aec165d 605 if (type > ODP_ACTION_ATTR_MAX || nla_len(a) != action_lens[type])
23cad98c
BP
606 return -EINVAL;
607
608 switch (type) {
7aec165d 609 case ODP_ACTION_ATTR_UNSPEC:
cdee00fd 610 return -EINVAL;
064af421 611
7aec165d
BP
612 case ODP_ACTION_ATTR_CONTROLLER:
613 case ODP_ACTION_ATTR_STRIP_VLAN:
614 case ODP_ACTION_ATTR_SET_DL_SRC:
615 case ODP_ACTION_ATTR_SET_DL_DST:
616 case ODP_ACTION_ATTR_SET_NW_SRC:
617 case ODP_ACTION_ATTR_SET_NW_DST:
618 case ODP_ACTION_ATTR_SET_TP_SRC:
619 case ODP_ACTION_ATTR_SET_TP_DST:
620 case ODP_ACTION_ATTR_SET_TUNNEL:
621 case ODP_ACTION_ATTR_SET_PRIORITY:
622 case ODP_ACTION_ATTR_POP_PRIORITY:
623 case ODP_ACTION_ATTR_DROP_SPOOFED_ARP:
23cad98c
BP
624 /* No validation needed. */
625 break;
626
7aec165d 627 case ODP_ACTION_ATTR_OUTPUT:
23cad98c
BP
628 if (nla_get_u32(a) >= DP_MAX_PORTS)
629 return -EINVAL;
3b1fc5f3 630 break;
cdee00fd 631
7aec165d 632 case ODP_ACTION_ATTR_SET_DL_TCI:
cdee00fd 633 if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
064af421 634 return -EINVAL;
23cad98c 635 break;
064af421 636
7aec165d 637 case ODP_ACTION_ATTR_SET_NW_TOS:
23cad98c
BP
638 if (nla_get_u8(a) & INET_ECN_MASK)
639 return -EINVAL;
640 break;
064af421 641
23cad98c
BP
642 default:
643 return -EOPNOTSUPP;
644 }
645 }
3c5f6de3 646
23cad98c
BP
647 if (rem > 0)
648 return -EINVAL;
064af421 649
23cad98c 650 return 0;
064af421 651}
064af421
BP
652static void clear_stats(struct sw_flow *flow)
653{
6bfafa55 654 flow->used = 0;
064af421 655 flow->tcp_flags = 0;
064af421
BP
656 flow->packet_count = 0;
657 flow->byte_count = 0;
658}
659
ed099e92 660/* Called with genl_mutex. */
8d5ebd83
JG
661static int expand_table(struct datapath *dp)
662{
9abaf6b3 663 struct tbl *old_table = get_table_protected(dp);
8d5ebd83
JG
664 struct tbl *new_table;
665
666 new_table = tbl_expand(old_table);
667 if (IS_ERR(new_table))
668 return PTR_ERR(new_table);
669
670 rcu_assign_pointer(dp->table, new_table);
671 tbl_deferred_destroy(old_table, NULL);
672
d6569377 673 return 0;
8d5ebd83
JG
674}
675
982b8810 676static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
064af421 677{
982b8810
BP
678 struct odp_header *odp_header = info->userhdr;
679 struct nlattr **a = info->attrs;
680 struct sk_buff *packet;
36956a7d 681 struct sw_flow_key key;
f7cd0081 682 struct datapath *dp;
d6569377
BP
683 struct ethhdr *eth;
684 bool is_frag;
3f19d399 685 int len;
d6569377 686 int err;
064af421 687
f7cd0081
BP
688 err = -EINVAL;
689 if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] ||
690 nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN)
e5cad958 691 goto err;
064af421 692
37a1300c 693 err = validate_actions(a[ODP_PACKET_ATTR_ACTIONS]);
f7cd0081 694 if (err)
e5cad958 695 goto err;
f7cd0081 696
3f19d399
BP
697 len = nla_len(a[ODP_PACKET_ATTR_PACKET]);
698 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
f7cd0081
BP
699 err = -ENOMEM;
700 if (!packet)
e5cad958 701 goto err;
3f19d399
BP
702 skb_reserve(packet, NET_IP_ALIGN);
703
704 memcpy(__skb_put(packet, len), nla_data(a[ODP_PACKET_ATTR_PACKET]), len);
8d5ebd83 705
f7cd0081
BP
706 skb_reset_mac_header(packet);
707 eth = eth_hdr(packet);
064af421 708
d6569377
BP
709 /* Normally, setting the skb 'protocol' field would be handled by a
710 * call to eth_type_trans(), but it assumes there's a sending
711 * device, which we may not have. */
712 if (ntohs(eth->h_proto) >= 1536)
f7cd0081 713 packet->protocol = eth->h_proto;
d6569377 714 else
f7cd0081 715 packet->protocol = htons(ETH_P_802_2);
d3c54451 716
f7cd0081 717 err = flow_extract(packet, -1, &key, &is_frag);
d6569377 718 if (err)
e5cad958 719 goto err_kfree_skb;
064af421 720
d6569377 721 rcu_read_lock();
254f2dc8 722 dp = get_dp(odp_header->dp_ifindex);
f7cd0081 723 err = -ENODEV;
e5cad958
BP
724 if (!dp)
725 goto err_unlock;
726 err = execute_actions(dp, packet, &key,
727 nla_data(a[ODP_PACKET_ATTR_ACTIONS]),
728 nla_len(a[ODP_PACKET_ATTR_ACTIONS]));
d6569377 729 rcu_read_unlock();
e5cad958 730 return err;
064af421 731
e5cad958
BP
732err_unlock:
733 rcu_read_unlock();
734err_kfree_skb:
735 kfree_skb(packet);
736err:
d6569377 737 return err;
064af421
BP
738}
739
982b8810
BP
740static const struct nla_policy packet_policy[ODP_PACKET_ATTR_MAX + 1] = {
741 [ODP_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
742 [ODP_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
743};
744
745static struct genl_ops dp_packet_genl_ops[] = {
746 { .cmd = ODP_PACKET_CMD_EXECUTE,
747 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
748 .policy = packet_policy,
749 .doit = odp_packet_cmd_execute
750 }
751};
752
d6569377 753static void get_dp_stats(struct datapath *dp, struct odp_stats *stats)
064af421 754{
d6569377 755 int i;
064af421 756
d6569377
BP
757 stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
758 for_each_possible_cpu(i) {
759 const struct dp_stats_percpu *percpu_stats;
760 struct dp_stats_percpu local_stats;
761 unsigned seqcount;
44e05eca 762
d6569377 763 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
064af421 764
d6569377
BP
765 do {
766 seqcount = read_seqcount_begin(&percpu_stats->seqlock);
767 local_stats = *percpu_stats;
768 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
064af421 769
d6569377
BP
770 stats->n_frags += local_stats.n_frags;
771 stats->n_hit += local_stats.n_hit;
772 stats->n_missed += local_stats.n_missed;
773 stats->n_lost += local_stats.n_lost;
774 }
775}
064af421 776
ed099e92
BP
777/* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports.
778 * Called with RTNL lock.
779 */
d6569377
BP
780int dp_min_mtu(const struct datapath *dp)
781{
782 struct vport *p;
783 int mtu = 0;
784
785 ASSERT_RTNL();
786
ed099e92 787 list_for_each_entry (p, &dp->port_list, node) {
d6569377
BP
788 int dev_mtu;
789
790 /* Skip any internal ports, since that's what we're trying to
791 * set. */
792 if (is_internal_vport(p))
793 continue;
794
795 dev_mtu = vport_get_mtu(p);
f915f1a8
BP
796 if (!dev_mtu)
797 continue;
d6569377
BP
798 if (!mtu || dev_mtu < mtu)
799 mtu = dev_mtu;
800 }
801
802 return mtu ? mtu : ETH_DATA_LEN;
064af421
BP
803}
804
ed099e92
BP
805/* Sets the MTU of all datapath devices to the minimum of the ports
806 * Called with RTNL lock.
807 */
d6569377 808void set_internal_devs_mtu(const struct datapath *dp)
064af421 809{
d6569377
BP
810 struct vport *p;
811 int mtu;
064af421 812
d6569377
BP
813 ASSERT_RTNL();
814
815 mtu = dp_min_mtu(dp);
44e05eca 816
ed099e92 817 list_for_each_entry (p, &dp->port_list, node) {
d6569377
BP
818 if (is_internal_vport(p))
819 vport_set_mtu(p, mtu);
820 }
064af421
BP
821}
822
d6569377
BP
823static const struct nla_policy flow_policy[ODP_FLOW_ATTR_MAX + 1] = {
824 [ODP_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
825 [ODP_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
826 [ODP_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
d6569377 827};
36956a7d 828
37a1300c
BP
829static struct genl_family dp_flow_genl_family = {
830 .id = GENL_ID_GENERATE,
831 .hdrsize = sizeof(struct odp_header),
832 .name = ODP_FLOW_FAMILY,
833 .version = 1,
834 .maxattr = ODP_FLOW_ATTR_MAX
835};
ed099e92 836
37a1300c
BP
837static struct genl_multicast_group dp_flow_multicast_group = {
838 .name = ODP_FLOW_MCGROUP
839};
840
841/* Called with genl_lock. */
842static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
843 struct sk_buff *skb, u32 pid, u32 seq, u32 flags, u8 cmd)
d6569377 844{
37a1300c 845 const int skb_orig_len = skb->len;
d6569377
BP
846 const struct sw_flow_actions *sf_acts;
847 struct odp_flow_stats stats;
37a1300c 848 struct odp_header *odp_header;
d6569377
BP
849 struct nlattr *nla;
850 unsigned long used;
851 u8 tcp_flags;
852 int err;
064af421 853
d6569377 854 sf_acts = rcu_dereference_protected(flow->sf_acts,
ed099e92 855 lockdep_genl_is_held());
064af421 856
37a1300c
BP
857 odp_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
858 if (!odp_header)
859 return -EMSGSIZE;
d6569377 860
254f2dc8 861 odp_header->dp_ifindex = dp->dp_ifindex;
d6569377
BP
862
863 nla = nla_nest_start(skb, ODP_FLOW_ATTR_KEY);
864 if (!nla)
865 goto nla_put_failure;
866 err = flow_to_nlattrs(&flow->key, skb);
867 if (err)
37a1300c 868 goto error;
d6569377
BP
869 nla_nest_end(skb, nla);
870
871 spin_lock_bh(&flow->lock);
872 used = flow->used;
873 stats.n_packets = flow->packet_count;
874 stats.n_bytes = flow->byte_count;
875 tcp_flags = flow->tcp_flags;
876 spin_unlock_bh(&flow->lock);
877
878 if (used)
ec58547a 879 NLA_PUT_U64(skb, ODP_FLOW_ATTR_USED, flow_used_time(used));
d6569377
BP
880
881 if (stats.n_packets)
882 NLA_PUT(skb, ODP_FLOW_ATTR_STATS, sizeof(struct odp_flow_stats), &stats);
883
884 if (tcp_flags)
885 NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags);
886
30053024
BP
887 /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
888 * this is the first flow to be dumped into 'skb'. This is unusual for
889 * Netlink but individual action lists can be longer than
890 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
891 * The userspace caller can always fetch the actions separately if it
892 * really wants them. (Most userspace callers in fact don't care.)
893 *
894 * This can only fail for dump operations because the skb is always
895 * properly sized for single flows.
896 */
897 err = nla_put(skb, ODP_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
898 sf_acts->actions);
899 if (err < 0 && skb_orig_len)
900 goto error;
37a1300c
BP
901
902 return genlmsg_end(skb, odp_header);
d6569377
BP
903
904nla_put_failure:
905 err = -EMSGSIZE;
37a1300c
BP
906error:
907 genlmsg_cancel(skb, odp_header);
d6569377 908 return err;
44e05eca
BP
909}
910
37a1300c 911static struct sk_buff *odp_flow_cmd_alloc_info(struct sw_flow *flow)
44e05eca 912{
37a1300c
BP
913 const struct sw_flow_actions *sf_acts;
914 int len;
d6569377 915
37a1300c
BP
916 sf_acts = rcu_dereference_protected(flow->sf_acts,
917 lockdep_genl_is_held());
d6569377 918
37a1300c
BP
919 len = nla_total_size(FLOW_BUFSIZE); /* ODP_FLOW_ATTR_KEY */
920 len += nla_total_size(sf_acts->actions_len); /* ODP_FLOW_ATTR_ACTIONS */
921 len += nla_total_size(sizeof(struct odp_flow_stats)); /* ODP_FLOW_ATTR_STATS */
922 len += nla_total_size(1); /* ODP_FLOW_ATTR_TCP_FLAGS */
923 len += nla_total_size(8); /* ODP_FLOW_ATTR_USED */
924 return genlmsg_new(NLMSG_ALIGN(sizeof(struct odp_header)) + len, GFP_KERNEL);
925}
8d5ebd83 926
37a1300c
BP
927static struct sk_buff *odp_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp,
928 u32 pid, u32 seq, u8 cmd)
929{
930 struct sk_buff *skb;
931 int retval;
d6569377 932
37a1300c
BP
933 skb = odp_flow_cmd_alloc_info(flow);
934 if (!skb)
935 return ERR_PTR(-ENOMEM);
d6569377 936
37a1300c
BP
937 retval = odp_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
938 BUG_ON(retval < 0);
d6569377 939 return skb;
064af421
BP
940}
941
37a1300c 942static int odp_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
064af421 943{
37a1300c
BP
944 struct nlattr **a = info->attrs;
945 struct odp_header *odp_header = info->userhdr;
bc4a05c6 946 struct tbl_node *flow_node;
37a1300c 947 struct sw_flow_key key;
d6569377 948 struct sw_flow *flow;
37a1300c 949 struct sk_buff *reply;
9c52546b 950 struct datapath *dp;
d6569377
BP
951 struct tbl *table;
952 u32 hash;
bc4a05c6 953 int error;
064af421 954
37a1300c
BP
955 /* Extract key. */
956 error = -EINVAL;
957 if (!a[ODP_FLOW_ATTR_KEY])
958 goto error;
959 error = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
960 if (error)
961 goto error;
064af421 962
37a1300c
BP
963 /* Validate actions. */
964 if (a[ODP_FLOW_ATTR_ACTIONS]) {
965 error = validate_actions(a[ODP_FLOW_ATTR_ACTIONS]);
966 if (error)
967 goto error;
968 } else if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW) {
969 error = -EINVAL;
970 goto error;
971 }
972
254f2dc8 973 dp = get_dp(odp_header->dp_ifindex);
d6569377 974 error = -ENODEV;
9c52546b 975 if (!dp)
37a1300c 976 goto error;
704a1e09 977
37a1300c 978 hash = flow_hash(&key);
d6569377 979 table = get_table_protected(dp);
37a1300c 980 flow_node = tbl_lookup(table, &key, hash, flow_cmp);
d6569377
BP
981 if (!flow_node) {
982 struct sw_flow_actions *acts;
983
984 /* Bail out if we're not allowed to create a new flow. */
985 error = -ENOENT;
37a1300c
BP
986 if (info->genlhdr->cmd == ODP_FLOW_CMD_SET)
987 goto error;
d6569377
BP
988
989 /* Expand table, if necessary, to make room. */
990 if (tbl_count(table) >= tbl_n_buckets(table)) {
991 error = expand_table(dp);
992 if (error)
37a1300c 993 goto error;
d6569377
BP
994 table = get_table_protected(dp);
995 }
996
997 /* Allocate flow. */
998 flow = flow_alloc();
999 if (IS_ERR(flow)) {
1000 error = PTR_ERR(flow);
37a1300c 1001 goto error;
d6569377 1002 }
37a1300c 1003 flow->key = key;
d6569377
BP
1004 clear_stats(flow);
1005
1006 /* Obtain actions. */
37a1300c 1007 acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
d6569377
BP
1008 error = PTR_ERR(acts);
1009 if (IS_ERR(acts))
1010 goto error_free_flow;
1011 rcu_assign_pointer(flow->sf_acts, acts);
1012
d6569377
BP
1013 /* Put flow in bucket. */
1014 error = tbl_insert(table, &flow->tbl_node, hash);
1015 if (error)
1016 goto error_free_flow;
37a1300c
BP
1017
1018 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
1019 info->snd_seq, ODP_FLOW_CMD_NEW);
d6569377
BP
1020 } else {
1021 /* We found a matching flow. */
1022 struct sw_flow_actions *old_acts;
1023
1024 /* Bail out if we're not allowed to modify an existing flow.
1025 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1026 * because Generic Netlink treats the latter as a dump
1027 * request. We also accept NLM_F_EXCL in case that bug ever
1028 * gets fixed.
1029 */
1030 error = -EEXIST;
37a1300c
BP
1031 if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW &&
1032 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1033 goto error;
d6569377
BP
1034
1035 /* Update actions. */
1036 flow = flow_cast(flow_node);
1037 old_acts = rcu_dereference_protected(flow->sf_acts,
ed099e92 1038 lockdep_genl_is_held());
37a1300c
BP
1039 if (a[ODP_FLOW_ATTR_ACTIONS] &&
1040 (old_acts->actions_len != nla_len(a[ODP_FLOW_ATTR_ACTIONS]) ||
1041 memcmp(old_acts->actions, nla_data(a[ODP_FLOW_ATTR_ACTIONS]),
1042 old_acts->actions_len))) {
d6569377
BP
1043 struct sw_flow_actions *new_acts;
1044
37a1300c 1045 new_acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
d6569377
BP
1046 error = PTR_ERR(new_acts);
1047 if (IS_ERR(new_acts))
37a1300c 1048 goto error;
d6569377
BP
1049
1050 rcu_assign_pointer(flow->sf_acts, new_acts);
1051 flow_deferred_free_acts(old_acts);
1052 }
1053
37a1300c
BP
1054 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
1055 info->snd_seq, ODP_FLOW_CMD_NEW);
d6569377
BP
1056
1057 /* Clear stats. */
37a1300c 1058 if (a[ODP_FLOW_ATTR_CLEAR]) {
d6569377
BP
1059 spin_lock_bh(&flow->lock);
1060 clear_stats(flow);
1061 spin_unlock_bh(&flow->lock);
1062 }
9c52546b 1063 }
37a1300c
BP
1064
1065 if (!IS_ERR(reply))
1066 genl_notify(reply, genl_info_net(info), info->snd_pid,
1067 dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1068 else
1069 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1070 dp_flow_multicast_group.id, PTR_ERR(reply));
d6569377 1071 return 0;
704a1e09 1072
d6569377
BP
1073error_free_flow:
1074 flow_put(flow);
37a1300c 1075error:
9c52546b 1076 return error;
704a1e09
BP
1077}
1078
37a1300c 1079static int odp_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
704a1e09 1080{
37a1300c
BP
1081 struct nlattr **a = info->attrs;
1082 struct odp_header *odp_header = info->userhdr;
1083 struct sw_flow_key key;
d6569377 1084 struct tbl_node *flow_node;
37a1300c 1085 struct sk_buff *reply;
704a1e09 1086 struct sw_flow *flow;
9c52546b
BP
1087 struct datapath *dp;
1088 struct tbl *table;
9c52546b 1089 int err;
704a1e09 1090
37a1300c
BP
1091 if (!a[ODP_FLOW_ATTR_KEY])
1092 return -EINVAL;
1093 err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
1094 if (err)
1095 return err;
704a1e09 1096
254f2dc8 1097 dp = get_dp(odp_header->dp_ifindex);
9c52546b 1098 if (!dp)
ed099e92 1099 return -ENODEV;
704a1e09 1100
9c52546b 1101 table = get_table_protected(dp);
37a1300c 1102 flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
d6569377 1103 if (!flow_node)
ed099e92 1104 return -ENOENT;
d6569377 1105
d6569377 1106 flow = flow_cast(flow_node);
37a1300c
BP
1107 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid, info->snd_seq, ODP_FLOW_CMD_NEW);
1108 if (IS_ERR(reply))
1109 return PTR_ERR(reply);
36956a7d 1110
37a1300c 1111 return genlmsg_reply(reply, info);
d6569377 1112}
9c52546b 1113
37a1300c 1114static int odp_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
d6569377 1115{
37a1300c
BP
1116 struct nlattr **a = info->attrs;
1117 struct odp_header *odp_header = info->userhdr;
1118 struct sw_flow_key key;
d6569377 1119 struct tbl_node *flow_node;
37a1300c 1120 struct sk_buff *reply;
d6569377 1121 struct sw_flow *flow;
d6569377 1122 struct datapath *dp;
37a1300c 1123 struct tbl *table;
d6569377 1124 int err;
36956a7d 1125
37a1300c 1126 if (!a[ODP_FLOW_ATTR_KEY])
254f2dc8 1127 return flush_flows(odp_header->dp_ifindex);
37a1300c
BP
1128 err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
1129 if (err)
1130 return err;
d6569377 1131
254f2dc8 1132 dp = get_dp(odp_header->dp_ifindex);
d6569377 1133 if (!dp)
37a1300c 1134 return -ENODEV;
d6569377 1135
37a1300c
BP
1136 table = get_table_protected(dp);
1137 flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
d6569377 1138 if (!flow_node)
37a1300c 1139 return -ENOENT;
d6569377 1140 flow = flow_cast(flow_node);
d6569377 1141
37a1300c
BP
1142 reply = odp_flow_cmd_alloc_info(flow);
1143 if (!reply)
1144 return -ENOMEM;
1145
1146 err = tbl_remove(table, flow_node);
1147 if (err) {
1148 kfree_skb(reply);
1149 return err;
1150 }
1151
1152 err = odp_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
1153 info->snd_seq, 0, ODP_FLOW_CMD_DEL);
1154 BUG_ON(err < 0);
1155
1156 flow_deferred_free(flow);
1157
1158 genl_notify(reply, genl_info_net(info), info->snd_pid,
1159 dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1160 return 0;
1161}
1162
1163static int odp_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1164{
1165 struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
1166 struct datapath *dp;
1167
254f2dc8 1168 dp = get_dp(odp_header->dp_ifindex);
37a1300c
BP
1169 if (!dp)
1170 return -ENODEV;
1171
1172 for (;;) {
1173 struct tbl_node *flow_node;
1174 struct sw_flow *flow;
1175 u32 bucket, obj;
1176
1177 bucket = cb->args[0];
1178 obj = cb->args[1];
1179 flow_node = tbl_next(get_table_protected(dp), &bucket, &obj);
1180 if (!flow_node)
1181 break;
1182
1183 flow = flow_cast(flow_node);
1184 if (odp_flow_cmd_fill_info(flow, dp, skb, NETLINK_CB(cb->skb).pid,
1185 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1186 ODP_FLOW_CMD_NEW) < 0)
1187 break;
1188
1189 cb->args[0] = bucket;
1190 cb->args[1] = obj;
1191 }
1192 return skb->len;
704a1e09
BP
1193}
1194
37a1300c
BP
1195static struct genl_ops dp_flow_genl_ops[] = {
1196 { .cmd = ODP_FLOW_CMD_NEW,
1197 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1198 .policy = flow_policy,
1199 .doit = odp_flow_cmd_new_or_set
1200 },
1201 { .cmd = ODP_FLOW_CMD_DEL,
1202 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1203 .policy = flow_policy,
1204 .doit = odp_flow_cmd_del
1205 },
1206 { .cmd = ODP_FLOW_CMD_GET,
1207 .flags = 0, /* OK for unprivileged users. */
1208 .policy = flow_policy,
1209 .doit = odp_flow_cmd_get,
1210 .dumpit = odp_flow_cmd_dump
1211 },
1212 { .cmd = ODP_FLOW_CMD_SET,
1213 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1214 .policy = flow_policy,
1215 .doit = odp_flow_cmd_new_or_set,
1216 },
1217};
1218
d6569377 1219static const struct nla_policy datapath_policy[ODP_DP_ATTR_MAX + 1] = {
aaff4b55 1220#ifdef HAVE_NLA_NUL_STRING
d6569377 1221 [ODP_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
aaff4b55 1222#endif
d6569377
BP
1223 [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
1224 [ODP_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
1225};
1226
aaff4b55
BP
1227static struct genl_family dp_datapath_genl_family = {
1228 .id = GENL_ID_GENERATE,
1229 .hdrsize = sizeof(struct odp_header),
1230 .name = ODP_DATAPATH_FAMILY,
1231 .version = 1,
1232 .maxattr = ODP_DP_ATTR_MAX
1233};
1234
1235static struct genl_multicast_group dp_datapath_multicast_group = {
1236 .name = ODP_DATAPATH_MCGROUP
1237};
1238
1239static int odp_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1240 u32 pid, u32 seq, u32 flags, u8 cmd)
064af421 1241{
aaff4b55 1242 struct odp_header *odp_header;
d6569377 1243 struct nlattr *nla;
064af421
BP
1244 int err;
1245
aaff4b55
BP
1246 odp_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
1247 flags, cmd);
1248 if (!odp_header)
1249 goto error;
064af421 1250
254f2dc8 1251 odp_header->dp_ifindex = dp->dp_ifindex;
064af421 1252
d6569377
BP
1253 rcu_read_lock();
1254 err = nla_put_string(skb, ODP_DP_ATTR_NAME, dp_name(dp));
1255 rcu_read_unlock();
064af421 1256 if (err)
d6569377 1257 goto nla_put_failure;
064af421 1258
d6569377
BP
1259 nla = nla_reserve(skb, ODP_DP_ATTR_STATS, sizeof(struct odp_stats));
1260 if (!nla)
1261 goto nla_put_failure;
1262 get_dp_stats(dp, nla_data(nla));
1263
1264 NLA_PUT_U32(skb, ODP_DP_ATTR_IPV4_FRAGS,
1265 dp->drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO);
1266
1267 if (dp->sflow_probability)
1268 NLA_PUT_U32(skb, ODP_DP_ATTR_SAMPLING, dp->sflow_probability);
1269
982b8810
BP
1270 nla = nla_nest_start(skb, ODP_DP_ATTR_MCGROUPS);
1271 if (!nla)
1272 goto nla_put_failure;
1273 NLA_PUT_U32(skb, ODP_PACKET_CMD_MISS, packet_mc_group(dp, ODP_PACKET_CMD_MISS));
1274 NLA_PUT_U32(skb, ODP_PACKET_CMD_ACTION, packet_mc_group(dp, ODP_PACKET_CMD_ACTION));
1275 NLA_PUT_U32(skb, ODP_PACKET_CMD_SAMPLE, packet_mc_group(dp, ODP_PACKET_CMD_SAMPLE));
1276 nla_nest_end(skb, nla);
1277
aaff4b55 1278 return genlmsg_end(skb, odp_header);
d6569377
BP
1279
1280nla_put_failure:
aaff4b55
BP
1281 genlmsg_cancel(skb, odp_header);
1282error:
1283 return -EMSGSIZE;
d6569377
BP
1284}
1285
aaff4b55
BP
1286static struct sk_buff *odp_dp_cmd_build_info(struct datapath *dp, u32 pid,
1287 u32 seq, u8 cmd)
d6569377 1288{
d6569377 1289 struct sk_buff *skb;
aaff4b55 1290 int retval;
d6569377 1291
aaff4b55 1292 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
064af421 1293 if (!skb)
d6569377 1294 return ERR_PTR(-ENOMEM);
659586ef 1295
aaff4b55
BP
1296 retval = odp_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
1297 if (retval < 0) {
1298 kfree_skb(skb);
1299 return ERR_PTR(retval);
1300 }
1301 return skb;
1302}
9dca7bd5 1303
aaff4b55
BP
1304static int odp_dp_cmd_validate(struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1305{
d6569377
BP
1306 if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
1307 u32 frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
9dca7bd5 1308
d6569377 1309 if (frags != ODP_DP_FRAG_ZERO && frags != ODP_DP_FRAG_DROP)
aaff4b55 1310 return -EINVAL;
d6569377
BP
1311 }
1312
9cb8d24d 1313 return CHECK_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1);
d6569377
BP
1314}
1315
ed099e92 1316/* Called with genl_mutex and optionally with RTNL lock also. */
aaff4b55 1317static struct datapath *lookup_datapath(struct odp_header *odp_header, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
d6569377 1318{
254f2dc8
BP
1319 struct datapath *dp;
1320
1321 if (!a[ODP_DP_ATTR_NAME])
1322 dp = get_dp(odp_header->dp_ifindex);
1323 else {
d6569377 1324 struct vport *vport;
d6569377 1325
057dd6d2 1326 rcu_read_lock();
d6569377 1327 vport = vport_locate(nla_data(a[ODP_DP_ATTR_NAME]));
254f2dc8 1328 dp = vport && vport->port_no == ODPP_LOCAL ? vport->dp : NULL;
057dd6d2 1329 rcu_read_unlock();
d6569377 1330 }
254f2dc8 1331 return dp ? dp : ERR_PTR(-ENODEV);
d6569377
BP
1332}
1333
ed099e92 1334/* Called with genl_mutex. */
d6569377
BP
1335static void change_datapath(struct datapath *dp, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1336{
1337 if (a[ODP_DP_ATTR_IPV4_FRAGS])
1338 dp->drop_frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]) == ODP_DP_FRAG_DROP;
1339 if (a[ODP_DP_ATTR_SAMPLING])
1340 dp->sflow_probability = nla_get_u32(a[ODP_DP_ATTR_SAMPLING]);
1341}
1342
aaff4b55 1343static int odp_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
d6569377 1344{
aaff4b55 1345 struct nlattr **a = info->attrs;
d6569377 1346 struct vport_parms parms;
aaff4b55 1347 struct sk_buff *reply;
d6569377
BP
1348 struct datapath *dp;
1349 struct vport *vport;
d6569377 1350 int err;
d6569377 1351
d6569377
BP
1352 err = -EINVAL;
1353 if (!a[ODP_DP_ATTR_NAME])
aaff4b55
BP
1354 goto err;
1355
1356 err = odp_dp_cmd_validate(a);
1357 if (err)
1358 goto err;
d6569377
BP
1359
1360 rtnl_lock();
d6569377
BP
1361 err = -ENODEV;
1362 if (!try_module_get(THIS_MODULE))
ed099e92 1363 goto err_unlock_rtnl;
d6569377 1364
d6569377
BP
1365 err = -ENOMEM;
1366 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1367 if (dp == NULL)
1368 goto err_put_module;
1369 INIT_LIST_HEAD(&dp->port_list);
d6569377
BP
1370
1371 /* Initialize kobject for bridge. This will be added as
1372 * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1373 dp->ifobj.kset = NULL;
1374 kobject_init(&dp->ifobj, &dp_ktype);
1375
1376 /* Allocate table. */
1377 err = -ENOMEM;
1378 rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
1379 if (!dp->table)
1380 goto err_free_dp;
1381
1382 /* Set up our datapath device. */
1383 parms.name = nla_data(a[ODP_DP_ATTR_NAME]);
1384 parms.type = ODP_VPORT_TYPE_INTERNAL;
1385 parms.options = NULL;
1386 parms.dp = dp;
1387 parms.port_no = ODPP_LOCAL;
1388 vport = new_vport(&parms);
1389 if (IS_ERR(vport)) {
1390 err = PTR_ERR(vport);
1391 if (err == -EBUSY)
1392 err = -EEXIST;
1393
1394 goto err_destroy_table;
1395 }
254f2dc8 1396 dp->dp_ifindex = vport_get_ifindex(vport);
d6569377
BP
1397
1398 dp->drop_frags = 0;
1399 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1400 if (!dp->stats_percpu) {
1401 err = -ENOMEM;
1402 goto err_destroy_local_port;
1403 }
1404
1405 change_datapath(dp, a);
1406
aaff4b55
BP
1407 reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1408 err = PTR_ERR(reply);
1409 if (IS_ERR(reply))
1410 goto err_destroy_local_port;
1411
254f2dc8 1412 list_add_tail(&dp->list_node, &dps);
d6569377
BP
1413 dp_sysfs_add_dp(dp);
1414
d6569377
BP
1415 rtnl_unlock();
1416
aaff4b55
BP
1417 genl_notify(reply, genl_info_net(info), info->snd_pid,
1418 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
d6569377
BP
1419 return 0;
1420
1421err_destroy_local_port:
1422 dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1423err_destroy_table:
1424 tbl_destroy(get_table_protected(dp), NULL);
1425err_free_dp:
d6569377
BP
1426 kfree(dp);
1427err_put_module:
1428 module_put(THIS_MODULE);
ed099e92 1429err_unlock_rtnl:
d6569377 1430 rtnl_unlock();
d6569377 1431err:
064af421
BP
1432 return err;
1433}
1434
aaff4b55 1435static int odp_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
44e05eca 1436{
ed099e92 1437 struct vport *vport, *next_vport;
aaff4b55 1438 struct sk_buff *reply;
9c52546b 1439 struct datapath *dp;
d6569377 1440 int err;
44e05eca 1441
aaff4b55
BP
1442 err = odp_dp_cmd_validate(info->attrs);
1443 if (err)
d6569377 1444 goto exit;
44e05eca 1445
d6569377 1446 rtnl_lock();
aaff4b55 1447 dp = lookup_datapath(info->userhdr, info->attrs);
d6569377
BP
1448 err = PTR_ERR(dp);
1449 if (IS_ERR(dp))
aaff4b55
BP
1450 goto exit_unlock;
1451
1452 reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_DEL);
1453 err = PTR_ERR(reply);
1454 if (IS_ERR(reply))
1455 goto exit_unlock;
9c52546b 1456
ed099e92
BP
1457 list_for_each_entry_safe (vport, next_vport, &dp->port_list, node)
1458 if (vport->port_no != ODPP_LOCAL)
1459 dp_detach_port(vport);
1460
1461 dp_sysfs_del_dp(dp);
254f2dc8 1462 list_del(&dp->list_node);
ed099e92
BP
1463 dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1464
99620d2c
JG
1465 /* rtnl_unlock() will wait until all the references to devices that
1466 * are pending unregistration have been dropped. We do it here to
1467 * ensure that any internal devices (which contain DP pointers) are
1468 * fully destroyed before freeing the datapath.
1469 */
1470 rtnl_unlock();
1471
ed099e92
BP
1472 call_rcu(&dp->rcu, destroy_dp_rcu);
1473 module_put(THIS_MODULE);
1474
aaff4b55
BP
1475 genl_notify(reply, genl_info_net(info), info->snd_pid,
1476 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
99620d2c
JG
1477
1478 return 0;
d6569377 1479
aaff4b55 1480exit_unlock:
d6569377
BP
1481 rtnl_unlock();
1482exit:
1483 return err;
44e05eca
BP
1484}
1485
aaff4b55 1486static int odp_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
064af421 1487{
aaff4b55 1488 struct sk_buff *reply;
d6569377 1489 struct datapath *dp;
d6569377 1490 int err;
064af421 1491
aaff4b55
BP
1492 err = odp_dp_cmd_validate(info->attrs);
1493 if (err)
1494 return err;
38c6ecbc 1495
aaff4b55 1496 dp = lookup_datapath(info->userhdr, info->attrs);
d6569377 1497 if (IS_ERR(dp))
aaff4b55 1498 return PTR_ERR(dp);
38c6ecbc 1499
aaff4b55 1500 change_datapath(dp, info->attrs);
38c6ecbc 1501
aaff4b55
BP
1502 reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1503 if (IS_ERR(reply)) {
1504 err = PTR_ERR(reply);
1505 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1506 dp_datapath_multicast_group.id, err);
1507 return 0;
1508 }
1509
1510 genl_notify(reply, genl_info_net(info), info->snd_pid,
1511 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1512 return 0;
064af421
BP
1513}
1514
aaff4b55 1515static int odp_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1dcf111b 1516{
aaff4b55 1517 struct sk_buff *reply;
d6569377 1518 struct datapath *dp;
d6569377 1519 int err;
1dcf111b 1520
aaff4b55
BP
1521 err = odp_dp_cmd_validate(info->attrs);
1522 if (err)
1523 return err;
1dcf111b 1524
aaff4b55 1525 dp = lookup_datapath(info->userhdr, info->attrs);
d6569377 1526 if (IS_ERR(dp))
aaff4b55 1527 return PTR_ERR(dp);
1dcf111b 1528
aaff4b55
BP
1529 reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1530 if (IS_ERR(reply))
1531 return PTR_ERR(reply);
1532
1533 return genlmsg_reply(reply, info);
1dcf111b
JP
1534}
1535
aaff4b55 1536static int odp_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
a7786963 1537{
254f2dc8
BP
1538 struct datapath *dp;
1539 int skip = cb->args[0];
1540 int i = 0;
a7786963 1541
254f2dc8
BP
1542 list_for_each_entry (dp, &dps, list_node) {
1543 if (i < skip)
d6569377 1544 continue;
aaff4b55
BP
1545 if (odp_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
1546 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1547 ODP_DP_CMD_NEW) < 0)
1548 break;
254f2dc8 1549 i++;
a7786963 1550 }
aaff4b55 1551
254f2dc8
BP
1552 cb->args[0] = i;
1553
aaff4b55 1554 return skb->len;
c19e6535
BP
1555}
1556
aaff4b55
BP
1557static struct genl_ops dp_datapath_genl_ops[] = {
1558 { .cmd = ODP_DP_CMD_NEW,
1559 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1560 .policy = datapath_policy,
1561 .doit = odp_dp_cmd_new
1562 },
1563 { .cmd = ODP_DP_CMD_DEL,
1564 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1565 .policy = datapath_policy,
1566 .doit = odp_dp_cmd_del
1567 },
1568 { .cmd = ODP_DP_CMD_GET,
1569 .flags = 0, /* OK for unprivileged users. */
1570 .policy = datapath_policy,
1571 .doit = odp_dp_cmd_get,
1572 .dumpit = odp_dp_cmd_dump
1573 },
1574 { .cmd = ODP_DP_CMD_SET,
1575 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1576 .policy = datapath_policy,
1577 .doit = odp_dp_cmd_set,
1578 },
1579};
1580
c19e6535 1581static const struct nla_policy vport_policy[ODP_VPORT_ATTR_MAX + 1] = {
f0fef760 1582#ifdef HAVE_NLA_NUL_STRING
c19e6535
BP
1583 [ODP_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1584 [ODP_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1585 [ODP_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1586 [ODP_VPORT_ATTR_STATS] = { .len = sizeof(struct rtnl_link_stats64) },
1587 [ODP_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN },
f0fef760
BP
1588#else
1589 [ODP_VPORT_ATTR_STATS] = { .minlen = sizeof(struct rtnl_link_stats64) },
1590 [ODP_VPORT_ATTR_ADDRESS] = { .minlen = ETH_ALEN },
1591#endif
c19e6535
BP
1592 [ODP_VPORT_ATTR_MTU] = { .type = NLA_U32 },
1593 [ODP_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1594};
1595
f0fef760
BP
1596static struct genl_family dp_vport_genl_family = {
1597 .id = GENL_ID_GENERATE,
1598 .hdrsize = sizeof(struct odp_header),
1599 .name = ODP_VPORT_FAMILY,
1600 .version = 1,
1601 .maxattr = ODP_VPORT_ATTR_MAX
1602};
1603
1604static struct genl_multicast_group dp_vport_multicast_group = {
1605 .name = ODP_VPORT_MCGROUP
1606};
1607
1608/* Called with RTNL lock or RCU read lock. */
1609static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1610 u32 pid, u32 seq, u32 flags, u8 cmd)
064af421 1611{
f0fef760 1612 struct odp_header *odp_header;
c19e6535
BP
1613 struct nlattr *nla;
1614 int ifindex, iflink;
f915f1a8 1615 int mtu;
c19e6535
BP
1616 int err;
1617
f0fef760
BP
1618 odp_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
1619 flags, cmd);
1620 if (!odp_header)
1621 return -EMSGSIZE;
c19e6535 1622
254f2dc8 1623 odp_header->dp_ifindex = vport->dp->dp_ifindex;
c19e6535
BP
1624
1625 NLA_PUT_U32(skb, ODP_VPORT_ATTR_PORT_NO, vport->port_no);
1626 NLA_PUT_U32(skb, ODP_VPORT_ATTR_TYPE, vport_get_type(vport));
1627 NLA_PUT_STRING(skb, ODP_VPORT_ATTR_NAME, vport_get_name(vport));
1628
1629 nla = nla_reserve(skb, ODP_VPORT_ATTR_STATS, sizeof(struct rtnl_link_stats64));
1630 if (!nla)
1631 goto nla_put_failure;
1632 if (vport_get_stats(vport, nla_data(nla)))
1633 __skb_trim(skb, skb->len - nla->nla_len);
1634
1635 NLA_PUT(skb, ODP_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
1636
f915f1a8
BP
1637 mtu = vport_get_mtu(vport);
1638 if (mtu)
1639 NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, mtu);
c19e6535
BP
1640
1641 err = vport_get_options(vport, skb);
f0fef760
BP
1642 if (err == -EMSGSIZE)
1643 goto error;
c19e6535
BP
1644
1645 ifindex = vport_get_ifindex(vport);
1646 if (ifindex > 0)
1647 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFINDEX, ifindex);
1648
1649 iflink = vport_get_iflink(vport);
1650 if (iflink > 0)
1651 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFLINK, iflink);
1652
f0fef760 1653 return genlmsg_end(skb, odp_header);
c19e6535
BP
1654
1655nla_put_failure:
1656 err = -EMSGSIZE;
f0fef760
BP
1657error:
1658 genlmsg_cancel(skb, odp_header);
1659 return err;
064af421
BP
1660}
1661
f0fef760
BP
1662/* Called with RTNL lock or RCU read lock. */
1663static struct sk_buff *odp_vport_cmd_build_info(struct vport *vport, u32 pid,
1664 u32 seq, u8 cmd)
064af421 1665{
c19e6535 1666 struct sk_buff *skb;
f0fef760 1667 int retval;
c19e6535 1668
f0fef760 1669 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
c19e6535
BP
1670 if (!skb)
1671 return ERR_PTR(-ENOMEM);
1672
f0fef760
BP
1673 retval = odp_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
1674 if (retval < 0) {
1675 kfree_skb(skb);
1676 return ERR_PTR(retval);
1677 }
c19e6535 1678 return skb;
f0fef760 1679}
c19e6535 1680
f0fef760
BP
1681static int odp_vport_cmd_validate(struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1682{
9cb8d24d 1683 return CHECK_NUL_STRING(a[ODP_VPORT_ATTR_NAME], IFNAMSIZ - 1);
c19e6535 1684}
51d4d598 1685
ed099e92 1686/* Called with RTNL lock or RCU read lock. */
f0fef760 1687static struct vport *lookup_vport(struct odp_header *odp_header,
c19e6535
BP
1688 struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1689{
1690 struct datapath *dp;
1691 struct vport *vport;
1692
1693 if (a[ODP_VPORT_ATTR_NAME]) {
c19e6535 1694 vport = vport_locate(nla_data(a[ODP_VPORT_ATTR_NAME]));
ed099e92 1695 if (!vport)
c19e6535 1696 return ERR_PTR(-ENODEV);
c19e6535
BP
1697 return vport;
1698 } else if (a[ODP_VPORT_ATTR_PORT_NO]) {
1699 u32 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1700
1701 if (port_no >= DP_MAX_PORTS)
f0fef760 1702 return ERR_PTR(-EFBIG);
c19e6535 1703
254f2dc8 1704 dp = get_dp(odp_header->dp_ifindex);
c19e6535
BP
1705 if (!dp)
1706 return ERR_PTR(-ENODEV);
f2459fe7 1707
c19e6535 1708 vport = get_vport_protected(dp, port_no);
ed099e92 1709 if (!vport)
c19e6535 1710 return ERR_PTR(-ENOENT);
c19e6535
BP
1711 return vport;
1712 } else
1713 return ERR_PTR(-EINVAL);
064af421
BP
1714}
1715
ed099e92 1716/* Called with RTNL lock. */
c19e6535 1717static int change_vport(struct vport *vport, struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
064af421 1718{
c19e6535
BP
1719 int err = 0;
1720 if (a[ODP_VPORT_ATTR_STATS])
1721 err = vport_set_stats(vport, nla_data(a[ODP_VPORT_ATTR_STATS]));
1722 if (!err && a[ODP_VPORT_ATTR_ADDRESS])
1723 err = vport_set_addr(vport, nla_data(a[ODP_VPORT_ATTR_ADDRESS]));
1724 if (!err && a[ODP_VPORT_ATTR_MTU])
1725 err = vport_set_mtu(vport, nla_get_u32(a[ODP_VPORT_ATTR_MTU]));
1726 return err;
1727}
1728
f0fef760 1729static int odp_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
c19e6535 1730{
f0fef760
BP
1731 struct nlattr **a = info->attrs;
1732 struct odp_header *odp_header = info->userhdr;
c19e6535 1733 struct vport_parms parms;
ed099e92 1734 struct sk_buff *reply;
c19e6535 1735 struct vport *vport;
c19e6535 1736 struct datapath *dp;
b0ec0f27 1737 u32 port_no;
c19e6535 1738 int err;
b0ec0f27 1739
c19e6535
BP
1740 err = -EINVAL;
1741 if (!a[ODP_VPORT_ATTR_NAME] || !a[ODP_VPORT_ATTR_TYPE])
f0fef760
BP
1742 goto exit;
1743
1744 err = odp_vport_cmd_validate(a);
1745 if (err)
1746 goto exit;
51d4d598 1747
c19e6535 1748 rtnl_lock();
254f2dc8 1749 dp = get_dp(odp_header->dp_ifindex);
c19e6535
BP
1750 err = -ENODEV;
1751 if (!dp)
ed099e92 1752 goto exit_unlock;
c19e6535
BP
1753
1754 if (a[ODP_VPORT_ATTR_PORT_NO]) {
1755 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1756
1757 err = -EFBIG;
1758 if (port_no >= DP_MAX_PORTS)
ed099e92 1759 goto exit_unlock;
c19e6535
BP
1760
1761 vport = get_vport_protected(dp, port_no);
1762 err = -EBUSY;
1763 if (vport)
ed099e92 1764 goto exit_unlock;
c19e6535
BP
1765 } else {
1766 for (port_no = 1; ; port_no++) {
1767 if (port_no >= DP_MAX_PORTS) {
1768 err = -EFBIG;
ed099e92 1769 goto exit_unlock;
c19e6535
BP
1770 }
1771 vport = get_vport_protected(dp, port_no);
1772 if (!vport)
1773 break;
51d4d598 1774 }
064af421 1775 }
b0ec0f27 1776
c19e6535
BP
1777 parms.name = nla_data(a[ODP_VPORT_ATTR_NAME]);
1778 parms.type = nla_get_u32(a[ODP_VPORT_ATTR_TYPE]);
1779 parms.options = a[ODP_VPORT_ATTR_OPTIONS];
1780 parms.dp = dp;
1781 parms.port_no = port_no;
1782
1783 vport = new_vport(&parms);
1784 err = PTR_ERR(vport);
1785 if (IS_ERR(vport))
ed099e92 1786 goto exit_unlock;
c19e6535
BP
1787
1788 set_internal_devs_mtu(dp);
1789 dp_sysfs_add_if(vport);
1790
1791 err = change_vport(vport, a);
f0fef760
BP
1792 if (!err) {
1793 reply = odp_vport_cmd_build_info(vport, info->snd_pid,
1794 info->snd_seq, ODP_VPORT_CMD_NEW);
1795 if (IS_ERR(reply))
1796 err = PTR_ERR(reply);
1797 }
c19e6535
BP
1798 if (err) {
1799 dp_detach_port(vport);
ed099e92 1800 goto exit_unlock;
c19e6535 1801 }
f0fef760
BP
1802 genl_notify(reply, genl_info_net(info), info->snd_pid,
1803 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
c19e6535 1804
c19e6535 1805
ed099e92 1806exit_unlock:
c19e6535 1807 rtnl_unlock();
c19e6535
BP
1808exit:
1809 return err;
44e05eca
BP
1810}
1811
f0fef760 1812static int odp_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
44e05eca 1813{
f0fef760
BP
1814 struct nlattr **a = info->attrs;
1815 struct sk_buff *reply;
c19e6535 1816 struct vport *vport;
c19e6535 1817 int err;
44e05eca 1818
f0fef760
BP
1819 err = odp_vport_cmd_validate(a);
1820 if (err)
c19e6535
BP
1821 goto exit;
1822
1823 rtnl_lock();
f0fef760 1824 vport = lookup_vport(info->userhdr, a);
c19e6535
BP
1825 err = PTR_ERR(vport);
1826 if (IS_ERR(vport))
f0fef760 1827 goto exit_unlock;
44e05eca 1828
c19e6535
BP
1829 err = 0;
1830 if (a[ODP_VPORT_ATTR_OPTIONS])
1831 err = vport_set_options(vport, a[ODP_VPORT_ATTR_OPTIONS]);
1832 if (!err)
1833 err = change_vport(vport, a);
1834
f0fef760
BP
1835 reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1836 ODP_VPORT_CMD_NEW);
1837 if (IS_ERR(reply)) {
1838 err = PTR_ERR(reply);
1839 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1840 dp_vport_multicast_group.id, err);
1841 return 0;
1842 }
1843
1844 genl_notify(reply, genl_info_net(info), info->snd_pid,
1845 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1846
1847exit_unlock:
c19e6535
BP
1848 rtnl_unlock();
1849exit:
1850 return err;
064af421
BP
1851}
1852
f0fef760 1853static int odp_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1854{
f0fef760
BP
1855 struct nlattr **a = info->attrs;
1856 struct sk_buff *reply;
c19e6535 1857 struct vport *vport;
c19e6535
BP
1858 int err;
1859
f0fef760
BP
1860 err = odp_vport_cmd_validate(a);
1861 if (err)
c19e6535
BP
1862 goto exit;
1863
1864 rtnl_lock();
f0fef760 1865 vport = lookup_vport(info->userhdr, a);
c19e6535 1866 err = PTR_ERR(vport);
f0fef760
BP
1867 if (IS_ERR(vport))
1868 goto exit_unlock;
c19e6535 1869
f0fef760
BP
1870 if (vport->port_no == ODPP_LOCAL) {
1871 err = -EINVAL;
1872 goto exit_unlock;
1873 }
1874
1875 reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1876 ODP_VPORT_CMD_DEL);
1877 err = PTR_ERR(reply);
1878 if (IS_ERR(reply))
1879 goto exit_unlock;
1880
1881 err = dp_detach_port(vport);
1882
1883 genl_notify(reply, genl_info_net(info), info->snd_pid,
1884 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1885
1886exit_unlock:
c19e6535
BP
1887 rtnl_unlock();
1888exit:
1889 return err;
7c40efc9
BP
1890}
1891
f0fef760 1892static int odp_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1893{
f0fef760
BP
1894 struct nlattr **a = info->attrs;
1895 struct odp_header *odp_header = info->userhdr;
ed099e92 1896 struct sk_buff *reply;
c19e6535 1897 struct vport *vport;
c19e6535
BP
1898 int err;
1899
f0fef760
BP
1900 err = odp_vport_cmd_validate(a);
1901 if (err)
1902 goto exit;
c19e6535 1903
ed099e92 1904 rcu_read_lock();
f0fef760 1905 vport = lookup_vport(odp_header, a);
c19e6535
BP
1906 err = PTR_ERR(vport);
1907 if (IS_ERR(vport))
f0fef760 1908 goto exit_unlock;
c19e6535 1909
f0fef760
BP
1910 reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1911 ODP_VPORT_CMD_NEW);
ed099e92
BP
1912 err = PTR_ERR(reply);
1913 if (IS_ERR(reply))
f0fef760 1914 goto exit_unlock;
ed099e92 1915
f0fef760 1916 err = genlmsg_reply(reply, info);
ed099e92 1917
f0fef760 1918exit_unlock:
ed099e92 1919 rcu_read_unlock();
f0fef760 1920exit:
c19e6535
BP
1921 return err;
1922}
1923
f0fef760 1924static int odp_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
c19e6535 1925{
f0fef760 1926 struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
c19e6535
BP
1927 struct datapath *dp;
1928 u32 port_no;
f0fef760 1929 int retval;
c19e6535 1930
254f2dc8 1931 dp = get_dp(odp_header->dp_ifindex);
c19e6535 1932 if (!dp)
f0fef760 1933 return -ENODEV;
ed099e92
BP
1934
1935 rcu_read_lock();
f0fef760 1936 for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
ed099e92 1937 struct vport *vport;
ed099e92
BP
1938
1939 vport = get_vport_protected(dp, port_no);
1940 if (!vport)
1941 continue;
1942
f0fef760
BP
1943 if (odp_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
1944 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1945 ODP_VPORT_CMD_NEW) < 0)
1946 break;
c19e6535 1947 }
ed099e92 1948 rcu_read_unlock();
c19e6535 1949
f0fef760
BP
1950 cb->args[0] = port_no;
1951 retval = skb->len;
1952
1953 return retval;
7c40efc9
BP
1954}
1955
f0fef760
BP
1956static struct genl_ops dp_vport_genl_ops[] = {
1957 { .cmd = ODP_VPORT_CMD_NEW,
1958 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1959 .policy = vport_policy,
1960 .doit = odp_vport_cmd_new
1961 },
1962 { .cmd = ODP_VPORT_CMD_DEL,
1963 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1964 .policy = vport_policy,
1965 .doit = odp_vport_cmd_del
1966 },
1967 { .cmd = ODP_VPORT_CMD_GET,
1968 .flags = 0, /* OK for unprivileged users. */
1969 .policy = vport_policy,
1970 .doit = odp_vport_cmd_get,
1971 .dumpit = odp_vport_cmd_dump
1972 },
1973 { .cmd = ODP_VPORT_CMD_SET,
1974 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1975 .policy = vport_policy,
1976 .doit = odp_vport_cmd_set,
1977 },
1978};
1979
982b8810
BP
1980struct genl_family_and_ops {
1981 struct genl_family *family;
1982 struct genl_ops *ops;
1983 int n_ops;
1984 struct genl_multicast_group *group;
1985};
ed099e92 1986
982b8810 1987static const struct genl_family_and_ops dp_genl_families[] = {
aaff4b55
BP
1988 { &dp_datapath_genl_family,
1989 dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1990 &dp_datapath_multicast_group },
f0fef760
BP
1991 { &dp_vport_genl_family,
1992 dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1993 &dp_vport_multicast_group },
37a1300c
BP
1994 { &dp_flow_genl_family,
1995 dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1996 &dp_flow_multicast_group },
982b8810
BP
1997 { &dp_packet_genl_family,
1998 dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1999 NULL },
2000};
ed099e92 2001
982b8810
BP
2002static void dp_unregister_genl(int n_families)
2003{
2004 int i;
ed099e92 2005
982b8810
BP
2006 for (i = 0; i < n_families; i++) {
2007 genl_unregister_family(dp_genl_families[i].family);
2008 }
ed099e92
BP
2009}
2010
982b8810 2011static int dp_register_genl(void)
064af421 2012{
982b8810
BP
2013 int n_registered;
2014 int err;
2015 int i;
064af421 2016
982b8810
BP
2017 n_registered = 0;
2018 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2019 const struct genl_family_and_ops *f = &dp_genl_families[i];
064af421 2020
982b8810
BP
2021 err = genl_register_family_with_ops(f->family, f->ops,
2022 f->n_ops);
2023 if (err)
2024 goto error;
2025 n_registered++;
e22d4953 2026
982b8810
BP
2027 if (f->group) {
2028 err = genl_register_mc_group(f->family, f->group);
2029 if (err)
2030 goto error;
2031 }
2032 }
9cc8b4e4 2033
982b8810
BP
2034 err = packet_register_mc_groups();
2035 if (err)
2036 goto error;
2037 return 0;
064af421
BP
2038
2039error:
982b8810
BP
2040 dp_unregister_genl(n_registered);
2041 return err;
064af421
BP
2042}
2043
22d24ebf
BP
2044static int __init dp_init(void)
2045{
f2459fe7 2046 struct sk_buff *dummy_skb;
22d24ebf
BP
2047 int err;
2048
f2459fe7 2049 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
22d24ebf 2050
f2459fe7 2051 printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
064af421
BP
2052
2053 err = flow_init();
2054 if (err)
2055 goto error;
2056
f2459fe7 2057 err = vport_init();
064af421
BP
2058 if (err)
2059 goto error_flow_exit;
2060
f2459fe7
JG
2061 err = register_netdevice_notifier(&dp_device_notifier);
2062 if (err)
2063 goto error_vport_exit;
2064
982b8810
BP
2065 err = dp_register_genl();
2066 if (err < 0)
37a1300c 2067 goto error_unreg_notifier;
982b8810 2068
064af421
BP
2069 return 0;
2070
2071error_unreg_notifier:
2072 unregister_netdevice_notifier(&dp_device_notifier);
f2459fe7
JG
2073error_vport_exit:
2074 vport_exit();
064af421
BP
2075error_flow_exit:
2076 flow_exit();
2077error:
2078 return err;
2079}
2080
2081static void dp_cleanup(void)
2082{
2083 rcu_barrier();
982b8810 2084 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
064af421 2085 unregister_netdevice_notifier(&dp_device_notifier);
f2459fe7 2086 vport_exit();
064af421 2087 flow_exit();
064af421
BP
2088}
2089
2090module_init(dp_init);
2091module_exit(dp_cleanup);
2092
2093MODULE_DESCRIPTION("Open vSwitch switching datapath");
2094MODULE_LICENSE("GPL");