]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/datapath.c
datapath: Convert ODP_VPORT_* to use AF_NETLINK socket layer.
[mirror_ovs.git] / datapath / datapath.c
CommitLineData
064af421 1/*
f632c8fc 2 * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
a14bc59f
BP
3 * Distributed under the terms of the GNU GPL version 2.
4 *
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
064af421
BP
7 */
8
9/* Functions for managing the dp interface/device. */
10
dfffaef1
JP
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
064af421
BP
13#include <linux/init.h>
14#include <linux/module.h>
15#include <linux/fs.h>
16#include <linux/if_arp.h>
064af421
BP
17#include <linux/if_vlan.h>
18#include <linux/in.h>
19#include <linux/ip.h>
982b8810 20#include <linux/jhash.h>
064af421
BP
21#include <linux/delay.h>
22#include <linux/time.h>
23#include <linux/etherdevice.h>
ed099e92 24#include <linux/genetlink.h>
064af421
BP
25#include <linux/kernel.h>
26#include <linux/kthread.h>
064af421
BP
27#include <linux/mutex.h>
28#include <linux/percpu.h>
29#include <linux/rcupdate.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/version.h>
33#include <linux/ethtool.h>
064af421
BP
34#include <linux/wait.h>
35#include <asm/system.h>
36#include <asm/div64.h>
37#include <asm/bug.h>
656a0e37 38#include <linux/highmem.h>
064af421
BP
39#include <linux/netfilter_bridge.h>
40#include <linux/netfilter_ipv4.h>
41#include <linux/inetdevice.h>
42#include <linux/list.h>
43#include <linux/rculist.h>
064af421 44#include <linux/dmi.h>
3c5f6de3 45#include <net/inet_ecn.h>
36956a7d 46#include <net/genetlink.h>
3fbd517a 47#include <linux/compat.h>
064af421
BP
48
49#include "openvswitch/datapath-protocol.h"
dd8d6b8c 50#include "checksum.h"
064af421
BP
51#include "datapath.h"
52#include "actions.h"
064af421 53#include "flow.h"
7eaa9830 54#include "loop_counter.h"
8d5ebd83 55#include "table.h"
f2459fe7 56#include "vport-internal_dev.h"
064af421 57
064af421
BP
58int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
59EXPORT_SYMBOL(dp_ioctl_hook);
60
ed099e92
BP
61/**
62 * DOC: Locking:
064af421 63 *
ed099e92
BP
64 * Writes to device state (add/remove datapath, port, set operations on vports,
65 * etc.) are protected by RTNL.
064af421 66 *
ed099e92
BP
67 * Writes to other state (flow table modifications, set miscellaneous datapath
68 * parameters such as drop frags, etc.) are protected by genl_mutex. The RTNL
69 * lock nests inside genl_mutex.
70 *
71 * Reads are protected by RCU.
72 *
73 * There are a few special cases (mostly stats) that have their own
74 * synchronization but they nest under all of above and don't interact with
75 * each other.
064af421 76 */
ed099e92
BP
77
78/* Protected by genl_mutex. */
d6569377 79static struct datapath __rcu *dps[256];
064af421 80
c19e6535 81static struct vport *new_vport(const struct vport_parms *);
064af421 82
ed099e92 83/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
064af421
BP
84struct datapath *get_dp(int dp_idx)
85{
d6569377 86 if (dp_idx < 0 || dp_idx >= ARRAY_SIZE(dps))
064af421 87 return NULL;
ed099e92 88
eb3ccf11 89 return rcu_dereference_check(dps[dp_idx], rcu_read_lock_held() ||
ed099e92
BP
90 lockdep_rtnl_is_held() ||
91 lockdep_genl_is_held());
064af421
BP
92}
93EXPORT_SYMBOL_GPL(get_dp);
94
ed099e92 95/* Must be called with genl_mutex. */
027f9007 96static struct tbl *get_table_protected(struct datapath *dp)
9abaf6b3 97{
ed099e92 98 return rcu_dereference_protected(dp->table, lockdep_genl_is_held());
1452b28c
JG
99}
100
ed099e92 101/* Must be called with rcu_read_lock or RTNL lock. */
027f9007 102static struct vport *get_vport_protected(struct datapath *dp, u16 port_no)
1452b28c 103{
ed099e92 104 return rcu_dereference_rtnl(dp->ports[port_no]);
9abaf6b3
JG
105}
106
f2459fe7
JG
107/* Must be called with rcu_read_lock or RTNL lock. */
108const char *dp_name(const struct datapath *dp)
109{
ad919711 110 return vport_get_name(rcu_dereference_rtnl(dp->ports[ODPP_LOCAL]));
f2459fe7
JG
111}
112
064af421
BP
113static inline size_t br_nlmsg_size(void)
114{
115 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
116 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
117 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
118 + nla_total_size(4) /* IFLA_MASTER */
119 + nla_total_size(4) /* IFLA_MTU */
120 + nla_total_size(4) /* IFLA_LINK */
121 + nla_total_size(1); /* IFLA_OPERSTATE */
122}
123
ed099e92 124/* Caller must hold RTNL lock. */
064af421 125static int dp_fill_ifinfo(struct sk_buff *skb,
e779d8d9 126 const struct vport *port,
064af421
BP
127 int event, unsigned int flags)
128{
027f9007 129 struct datapath *dp = port->dp;
e779d8d9
BP
130 int ifindex = vport_get_ifindex(port);
131 int iflink = vport_get_iflink(port);
064af421
BP
132 struct ifinfomsg *hdr;
133 struct nlmsghdr *nlh;
134
f2459fe7
JG
135 if (ifindex < 0)
136 return ifindex;
137
138 if (iflink < 0)
139 return iflink;
140
064af421
BP
141 nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
142 if (nlh == NULL)
143 return -EMSGSIZE;
144
145 hdr = nlmsg_data(nlh);
146 hdr->ifi_family = AF_BRIDGE;
147 hdr->__ifi_pad = 0;
f2459fe7
JG
148 hdr->ifi_type = ARPHRD_ETHER;
149 hdr->ifi_index = ifindex;
e779d8d9 150 hdr->ifi_flags = vport_get_flags(port);
064af421
BP
151 hdr->ifi_change = 0;
152
e779d8d9 153 NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
ad919711 154 NLA_PUT_U32(skb, IFLA_MASTER,
1452b28c 155 vport_get_ifindex(get_vport_protected(dp, ODPP_LOCAL)));
e779d8d9 156 NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
064af421
BP
157#ifdef IFLA_OPERSTATE
158 NLA_PUT_U8(skb, IFLA_OPERSTATE,
e779d8d9
BP
159 vport_is_running(port)
160 ? vport_get_operstate(port)
f2459fe7 161 : IF_OPER_DOWN);
064af421
BP
162#endif
163
e779d8d9 164 NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
064af421 165
f2459fe7
JG
166 if (ifindex != iflink)
167 NLA_PUT_U32(skb, IFLA_LINK,iflink);
064af421
BP
168
169 return nlmsg_end(skb, nlh);
170
171nla_put_failure:
172 nlmsg_cancel(skb, nlh);
173 return -EMSGSIZE;
174}
175
ed099e92 176/* Caller must hold RTNL lock. */
e779d8d9 177static void dp_ifinfo_notify(int event, struct vport *port)
064af421 178{
064af421
BP
179 struct sk_buff *skb;
180 int err = -ENOBUFS;
181
182 skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
183 if (skb == NULL)
184 goto errout;
185
186 err = dp_fill_ifinfo(skb, port, event, 0);
187 if (err < 0) {
188 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
189 WARN_ON(err == -EMSGSIZE);
190 kfree_skb(skb);
191 goto errout;
192 }
f2459fe7 193 rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
cfe7c1f5 194 return;
064af421
BP
195errout:
196 if (err < 0)
f2459fe7 197 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
064af421
BP
198}
199
58c342f6
BP
200static void release_dp(struct kobject *kobj)
201{
202 struct datapath *dp = container_of(kobj, struct datapath, ifobj);
203 kfree(dp);
204}
205
35f7605b 206static struct kobj_type dp_ktype = {
58c342f6
BP
207 .release = release_dp
208};
209
46c6a11d
JG
210static void destroy_dp_rcu(struct rcu_head *rcu)
211{
212 struct datapath *dp = container_of(rcu, struct datapath, rcu);
46c6a11d
JG
213
214 tbl_destroy((struct tbl __force *)dp->table, flow_free_tbl);
215 free_percpu(dp->stats_percpu);
216 kobject_put(&dp->ifobj);
217}
218
ed099e92 219/* Called with RTNL lock and genl_lock. */
c19e6535 220static struct vport *new_vport(const struct vport_parms *parms)
064af421 221{
f2459fe7 222 struct vport *vport;
f2459fe7 223
c19e6535
BP
224 vport = vport_add(parms);
225 if (!IS_ERR(vport)) {
226 struct datapath *dp = parms->dp;
064af421 227
c19e6535 228 rcu_assign_pointer(dp->ports[parms->port_no], vport);
ed099e92 229 list_add(&vport->node, &dp->port_list);
064af421 230
c19e6535
BP
231 dp_ifinfo_notify(RTM_NEWLINK, vport);
232 }
064af421 233
c19e6535 234 return vport;
064af421
BP
235}
236
ed099e92 237/* Called with RTNL lock. */
e779d8d9 238int dp_detach_port(struct vport *p)
064af421
BP
239{
240 ASSERT_RTNL();
241
2e7dd8ec 242 if (p->port_no != ODPP_LOCAL)
0515ceb3 243 dp_sysfs_del_if(p);
064af421
BP
244 dp_ifinfo_notify(RTM_DELLINK, p);
245
064af421 246 /* First drop references to device. */
ed099e92 247 list_del(&p->node);
064af421 248 rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
f2459fe7 249
7237e4f4 250 /* Then destroy it. */
057dd6d2 251 return vport_del(p);
064af421
BP
252}
253
8819fac7 254/* Must be called with rcu_read_lock. */
e779d8d9 255void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
064af421
BP
256{
257 struct datapath *dp = p->dp;
258 struct dp_stats_percpu *stats;
8819fac7 259 int stats_counter_off;
55574bb0
BP
260 struct sw_flow_actions *acts;
261 struct loop_counter *loop;
4c1ad233 262 int error;
064af421 263
e779d8d9 264 OVS_CB(skb)->vport = p;
a063b0df 265
3976f6d5 266 if (!OVS_CB(skb)->flow) {
36956a7d 267 struct sw_flow_key key;
3976f6d5 268 struct tbl_node *flow_node;
b7a31ec1 269 bool is_frag;
4c1ad233 270
3976f6d5 271 /* Extract flow from 'skb' into 'key'. */
c75d4dcf 272 error = flow_extract(skb, p->port_no, &key, &is_frag);
3976f6d5
JG
273 if (unlikely(error)) {
274 kfree_skb(skb);
275 return;
276 }
064af421 277
b7a31ec1 278 if (is_frag && dp->drop_frags) {
3976f6d5
JG
279 kfree_skb(skb);
280 stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
281 goto out;
282 }
283
284 /* Look up flow. */
285 flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
286 flow_hash(&key), flow_cmp);
287 if (unlikely(!flow_node)) {
856081f6
BP
288 struct dp_upcall_info upcall;
289
982b8810 290 upcall.cmd = ODP_PACKET_CMD_MISS;
856081f6
BP
291 upcall.key = &key;
292 upcall.userdata = 0;
293 upcall.sample_pool = 0;
294 upcall.actions = NULL;
295 upcall.actions_len = 0;
296 dp_upcall(dp, skb, &upcall);
3976f6d5
JG
297 stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
298 goto out;
299 }
300
301 OVS_CB(skb)->flow = flow_cast(flow_node);
55574bb0
BP
302 }
303
f267de8a 304 stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
3976f6d5 305 flow_used(OVS_CB(skb)->flow, skb);
55574bb0 306
3976f6d5 307 acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
55574bb0
BP
308
309 /* Check whether we've looped too much. */
7eaa9830
JG
310 loop = loop_get_counter();
311 if (unlikely(++loop->count > MAX_LOOPS))
55574bb0
BP
312 loop->looping = true;
313 if (unlikely(loop->looping)) {
7eaa9830 314 loop_suppress(dp, acts);
f267de8a 315 kfree_skb(skb);
55574bb0 316 goto out_loop;
064af421 317 }
8819fac7 318
55574bb0 319 /* Execute actions. */
3976f6d5 320 execute_actions(dp, skb, &OVS_CB(skb)->flow->key, acts->actions,
cdee00fd 321 acts->actions_len);
55574bb0
BP
322
323 /* Check whether sub-actions looped too much. */
324 if (unlikely(loop->looping))
7eaa9830 325 loop_suppress(dp, acts);
55574bb0
BP
326
327out_loop:
328 /* Decrement loop counter. */
329 if (!--loop->count)
330 loop->looping = false;
7eaa9830 331 loop_put_counter();
55574bb0 332
8819fac7 333out:
55574bb0 334 /* Update datapath statistics. */
8819fac7
JG
335 local_bh_disable();
336 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
38c6ecbc
JG
337
338 write_seqcount_begin(&stats->seqlock);
8819fac7 339 (*(u64 *)((u8 *)stats + stats_counter_off))++;
38c6ecbc
JG
340 write_seqcount_end(&stats->seqlock);
341
8819fac7 342 local_bh_enable();
064af421
BP
343}
344
856081f6
BP
345static void copy_and_csum_skb(struct sk_buff *skb, void *to)
346{
347 u16 csum_start, csum_offset;
348 __wsum csum;
349
350 get_skb_csum_pointers(skb, &csum_start, &csum_offset);
351 csum_start -= skb_headroom(skb);
352 BUG_ON(csum_start >= skb_headlen(skb));
353
354 skb_copy_bits(skb, 0, to, csum_start);
355
356 csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start,
357 skb->len - csum_start, 0);
358 *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
359}
360
982b8810
BP
361static struct genl_family dp_packet_genl_family;
362#define PACKET_N_MC_GROUPS 16
363
364static int packet_mc_group(struct datapath *dp, u8 cmd)
365{
366 BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
367 return jhash_2words(dp->dp_idx, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
368}
369
370/* Send each packet in the 'skb' list to userspace for 'dp' as directed by
371 * 'upcall_info'. There will be only one packet unless we broke up a GSO
372 * packet.
373 */
856081f6
BP
374static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
375 const struct dp_upcall_info *upcall_info)
cb5087ca 376{
982b8810 377 u32 group = packet_mc_group(dp, upcall_info->cmd);
cb5087ca
BP
378 struct sk_buff *nskb;
379 int port_no;
380 int err;
381
e779d8d9
BP
382 if (OVS_CB(skb)->vport)
383 port_no = OVS_CB(skb)->vport->port_no;
f2459fe7
JG
384 else
385 port_no = ODPP_LOCAL;
cb5087ca
BP
386
387 do {
982b8810 388 struct odp_header *upcall;
856081f6
BP
389 struct sk_buff *user_skb; /* to be queued to userspace */
390 struct nlattr *nla;
391 unsigned int len;
cb5087ca
BP
392
393 nskb = skb->next;
394 skb->next = NULL;
395
982b8810 396 len = sizeof(struct odp_header);
856081f6
BP
397 len += nla_total_size(4); /* ODP_PACKET_ATTR_TYPE. */
398 len += nla_total_size(skb->len);
399 len += nla_total_size(FLOW_BUFSIZE);
400 if (upcall_info->userdata)
401 len += nla_total_size(8);
402 if (upcall_info->sample_pool)
403 len += nla_total_size(4);
404 if (upcall_info->actions_len)
405 len += nla_total_size(upcall_info->actions_len);
406
982b8810
BP
407 user_skb = genlmsg_new(len, GFP_ATOMIC);
408 if (!user_skb) {
409 netlink_set_err(INIT_NET_GENL_SOCK, 0, group, -ENOBUFS);
cb5087ca 410 goto err_kfree_skbs;
982b8810 411 }
cb5087ca 412
982b8810 413 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
856081f6
BP
414 upcall->dp_idx = dp->dp_idx;
415
856081f6
BP
416 nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_KEY);
417 flow_to_nlattrs(upcall_info->key, user_skb);
418 nla_nest_end(user_skb, nla);
cb5087ca 419
856081f6
BP
420 if (upcall_info->userdata)
421 nla_put_u64(user_skb, ODP_PACKET_ATTR_USERDATA, upcall_info->userdata);
422 if (upcall_info->sample_pool)
423 nla_put_u32(user_skb, ODP_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool);
424 if (upcall_info->actions_len) {
425 const struct nlattr *actions = upcall_info->actions;
426 u32 actions_len = upcall_info->actions_len;
427
428 nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_ACTIONS);
429 memcpy(__skb_put(user_skb, actions_len), actions, actions_len);
430 nla_nest_end(user_skb, nla);
431 }
432
433 nla = __nla_reserve(user_skb, ODP_PACKET_ATTR_PACKET, skb->len);
434 if (skb->ip_summed == CHECKSUM_PARTIAL)
435 copy_and_csum_skb(skb, nla_data(nla));
436 else
437 skb_copy_bits(skb, 0, nla_data(nla), skb->len);
438
982b8810
BP
439 err = genlmsg_multicast(user_skb, 0, group, GFP_ATOMIC);
440 if (err)
441 goto err_kfree_skbs;
856081f6
BP
442
443 kfree_skb(skb);
cb5087ca
BP
444 skb = nskb;
445 } while (skb);
446 return 0;
447
448err_kfree_skbs:
449 kfree_skb(skb);
450 while ((skb = nskb) != NULL) {
451 nskb = skb->next;
452 kfree_skb(skb);
453 }
454 return err;
455}
456
982b8810
BP
457/* Generic Netlink multicast groups for upcalls.
458 *
459 * We really want three unique multicast groups per datapath, but we can't even
460 * get one, because genl_register_mc_group() takes genl_lock, which is also
461 * held during Generic Netlink message processing, so trying to acquire
462 * multicast groups during ODP_DP_NEW processing deadlocks. Instead, we
463 * preallocate a few groups and use them round-robin for datapaths. Collision
464 * isn't fatal--multicast listeners should check that the family is the one
465 * that they want and discard others--but it wastes time and memory to receive
466 * unwanted messages.
467 */
468static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
469
470static struct genl_family dp_packet_genl_family = {
471 .id = GENL_ID_GENERATE,
472 .hdrsize = sizeof(struct odp_header),
473 .name = ODP_PACKET_FAMILY,
474 .version = 1,
475 .maxattr = ODP_PACKET_ATTR_MAX
476};
477
478static int packet_register_mc_groups(void)
479{
480 int i;
481
482 for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
483 struct genl_multicast_group *group = &packet_mc_groups[i];
484 int error;
485
486 sprintf(group->name, "packet%d", i);
487 error = genl_register_mc_group(&dp_packet_genl_family, group);
488 if (error)
489 return error;
490 }
491 return 0;
492}
493
856081f6 494int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
064af421
BP
495{
496 struct dp_stats_percpu *stats;
064af421
BP
497 int err;
498
499 WARN_ON_ONCE(skb_shared(skb));
064af421 500
a6057323
JG
501 forward_ip_summed(skb);
502
a2377e44
JG
503 err = vswitch_skb_checksum_setup(skb);
504 if (err)
505 goto err_kfree_skb;
506
064af421
BP
507 /* Break apart GSO packets into their component pieces. Otherwise
508 * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
509 if (skb_is_gso(skb)) {
9cc8b4e4 510 struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
2d7ce2ee
JG
511
512 kfree_skb(skb);
513 skb = nskb;
40796b34 514 if (IS_ERR(skb)) {
2d7ce2ee
JG
515 err = PTR_ERR(skb);
516 goto err;
064af421
BP
517 }
518 }
519
982b8810 520 return queue_control_packets(dp, skb, upcall_info);
064af421
BP
521
522err_kfree_skb:
523 kfree_skb(skb);
524err:
1c075d0a
JG
525 local_bh_disable();
526 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
38c6ecbc
JG
527
528 write_seqcount_begin(&stats->seqlock);
064af421 529 stats->n_lost++;
38c6ecbc
JG
530 write_seqcount_end(&stats->seqlock);
531
1c075d0a 532 local_bh_enable();
064af421
BP
533
534 return err;
535}
536
ed099e92 537/* Called with genl_mutex. */
9c52546b 538static int flush_flows(int dp_idx)
064af421 539{
9c52546b 540 struct tbl *old_table;
8d5ebd83 541 struct tbl *new_table;
9c52546b 542 struct datapath *dp;
9c52546b 543
ed099e92 544 dp = get_dp(dp_idx);
9c52546b 545 if (!dp)
ed099e92 546 return -ENODEV;
8d5ebd83 547
9c52546b 548 old_table = get_table_protected(dp);
c6fadeb1 549 new_table = tbl_create(TBL_MIN_BUCKETS);
8d5ebd83 550 if (!new_table)
ed099e92 551 return -ENOMEM;
8d5ebd83
JG
552
553 rcu_assign_pointer(dp->table, new_table);
554
555 tbl_deferred_destroy(old_table, flow_free_tbl);
556
ed099e92 557 return 0;
064af421
BP
558}
559
cdee00fd 560static int validate_actions(const struct nlattr *actions, u32 actions_len)
064af421 561{
23cad98c
BP
562 const struct nlattr *a;
563 int rem;
564
565 nla_for_each_attr(a, actions, actions_len, rem) {
566 static const u32 action_lens[ODPAT_MAX + 1] = {
567 [ODPAT_OUTPUT] = 4,
568 [ODPAT_CONTROLLER] = 8,
569 [ODPAT_SET_DL_TCI] = 2,
570 [ODPAT_STRIP_VLAN] = 0,
571 [ODPAT_SET_DL_SRC] = ETH_ALEN,
572 [ODPAT_SET_DL_DST] = ETH_ALEN,
573 [ODPAT_SET_NW_SRC] = 4,
574 [ODPAT_SET_NW_DST] = 4,
575 [ODPAT_SET_NW_TOS] = 1,
576 [ODPAT_SET_TP_SRC] = 2,
577 [ODPAT_SET_TP_DST] = 2,
578 [ODPAT_SET_TUNNEL] = 8,
579 [ODPAT_SET_PRIORITY] = 4,
580 [ODPAT_POP_PRIORITY] = 0,
581 [ODPAT_DROP_SPOOFED_ARP] = 0,
582 };
583 int type = nla_type(a);
584
585 if (type > ODPAT_MAX || nla_len(a) != action_lens[type])
586 return -EINVAL;
587
588 switch (type) {
cdee00fd
BP
589 case ODPAT_UNSPEC:
590 return -EINVAL;
064af421 591
23cad98c
BP
592 case ODPAT_CONTROLLER:
593 case ODPAT_STRIP_VLAN:
594 case ODPAT_SET_DL_SRC:
595 case ODPAT_SET_DL_DST:
596 case ODPAT_SET_NW_SRC:
597 case ODPAT_SET_NW_DST:
598 case ODPAT_SET_TP_SRC:
599 case ODPAT_SET_TP_DST:
600 case ODPAT_SET_TUNNEL:
601 case ODPAT_SET_PRIORITY:
602 case ODPAT_POP_PRIORITY:
603 case ODPAT_DROP_SPOOFED_ARP:
604 /* No validation needed. */
605 break;
606
607 case ODPAT_OUTPUT:
608 if (nla_get_u32(a) >= DP_MAX_PORTS)
609 return -EINVAL;
3b1fc5f3 610 break;
cdee00fd 611
23cad98c 612 case ODPAT_SET_DL_TCI:
cdee00fd 613 if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
064af421 614 return -EINVAL;
23cad98c 615 break;
064af421 616
23cad98c
BP
617 case ODPAT_SET_NW_TOS:
618 if (nla_get_u8(a) & INET_ECN_MASK)
619 return -EINVAL;
620 break;
064af421 621
23cad98c
BP
622 default:
623 return -EOPNOTSUPP;
624 }
625 }
3c5f6de3 626
23cad98c
BP
627 if (rem > 0)
628 return -EINVAL;
064af421 629
23cad98c 630 return 0;
064af421
BP
631}
632
d6569377
BP
633struct dp_flowcmd {
634 u32 nlmsg_flags;
635 u32 dp_idx;
636 u32 total_len;
637 struct sw_flow_key key;
638 const struct nlattr *actions;
639 u32 actions_len;
640 bool clear;
641 u64 state;
642};
643
644static struct sw_flow_actions *get_actions(const struct dp_flowcmd *flowcmd)
064af421
BP
645{
646 struct sw_flow_actions *actions;
064af421 647
d6569377
BP
648 actions = flow_actions_alloc(flowcmd->actions_len);
649 if (!IS_ERR(actions) && flowcmd->actions_len)
650 memcpy(actions->actions, flowcmd->actions, flowcmd->actions_len);
064af421 651 return actions;
064af421
BP
652}
653
654static void clear_stats(struct sw_flow *flow)
655{
6bfafa55 656 flow->used = 0;
064af421 657 flow->tcp_flags = 0;
064af421
BP
658 flow->packet_count = 0;
659 flow->byte_count = 0;
660}
661
ed099e92 662/* Called with genl_mutex. */
8d5ebd83
JG
663static int expand_table(struct datapath *dp)
664{
9abaf6b3 665 struct tbl *old_table = get_table_protected(dp);
8d5ebd83
JG
666 struct tbl *new_table;
667
668 new_table = tbl_expand(old_table);
669 if (IS_ERR(new_table))
670 return PTR_ERR(new_table);
671
672 rcu_assign_pointer(dp->table, new_table);
673 tbl_deferred_destroy(old_table, NULL);
674
d6569377 675 return 0;
8d5ebd83
JG
676}
677
982b8810 678static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
064af421 679{
982b8810
BP
680 struct odp_header *odp_header = info->userhdr;
681 struct nlattr **a = info->attrs;
682 struct sk_buff *packet;
f7cd0081
BP
683 unsigned int actions_len;
684 struct nlattr *actions;
36956a7d 685 struct sw_flow_key key;
f7cd0081 686 struct datapath *dp;
d6569377
BP
687 struct ethhdr *eth;
688 bool is_frag;
689 int err;
064af421 690
f7cd0081
BP
691 err = -EINVAL;
692 if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] ||
693 nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN)
982b8810 694 goto exit;
064af421 695
f7cd0081
BP
696 actions = nla_data(a[ODP_PACKET_ATTR_ACTIONS]);
697 actions_len = nla_len(a[ODP_PACKET_ATTR_ACTIONS]);
698 err = validate_actions(actions, actions_len);
699 if (err)
982b8810 700 goto exit;
f7cd0081
BP
701
702 packet = skb_clone(skb, GFP_KERNEL);
703 err = -ENOMEM;
704 if (!packet)
982b8810 705 goto exit;
f7cd0081
BP
706 packet->data = nla_data(a[ODP_PACKET_ATTR_PACKET]);
707 packet->len = nla_len(a[ODP_PACKET_ATTR_PACKET]);
8d5ebd83 708
f7cd0081
BP
709 skb_reset_mac_header(packet);
710 eth = eth_hdr(packet);
064af421 711
d6569377
BP
712 /* Normally, setting the skb 'protocol' field would be handled by a
713 * call to eth_type_trans(), but it assumes there's a sending
714 * device, which we may not have. */
715 if (ntohs(eth->h_proto) >= 1536)
f7cd0081 716 packet->protocol = eth->h_proto;
d6569377 717 else
f7cd0081 718 packet->protocol = htons(ETH_P_802_2);
d3c54451 719
f7cd0081 720 err = flow_extract(packet, -1, &key, &is_frag);
d6569377 721 if (err)
982b8810 722 goto exit;
064af421 723
d6569377 724 rcu_read_lock();
982b8810 725 dp = get_dp(odp_header->dp_idx);
f7cd0081
BP
726 err = -ENODEV;
727 if (dp)
728 err = execute_actions(dp, packet, &key, actions, actions_len);
d6569377 729 rcu_read_unlock();
064af421 730
982b8810 731exit:
d6569377 732 return err;
064af421
BP
733}
734
982b8810
BP
735static const struct nla_policy packet_policy[ODP_PACKET_ATTR_MAX + 1] = {
736 [ODP_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
737 [ODP_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
738};
739
740static struct genl_ops dp_packet_genl_ops[] = {
741 { .cmd = ODP_PACKET_CMD_EXECUTE,
742 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
743 .policy = packet_policy,
744 .doit = odp_packet_cmd_execute
745 }
746};
747
d6569377 748static void get_dp_stats(struct datapath *dp, struct odp_stats *stats)
064af421 749{
d6569377 750 int i;
064af421 751
d6569377
BP
752 stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
753 for_each_possible_cpu(i) {
754 const struct dp_stats_percpu *percpu_stats;
755 struct dp_stats_percpu local_stats;
756 unsigned seqcount;
44e05eca 757
d6569377 758 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
064af421 759
d6569377
BP
760 do {
761 seqcount = read_seqcount_begin(&percpu_stats->seqlock);
762 local_stats = *percpu_stats;
763 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
064af421 764
d6569377
BP
765 stats->n_frags += local_stats.n_frags;
766 stats->n_hit += local_stats.n_hit;
767 stats->n_missed += local_stats.n_missed;
768 stats->n_lost += local_stats.n_lost;
769 }
770}
064af421 771
ed099e92
BP
772/* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports.
773 * Called with RTNL lock.
774 */
d6569377
BP
775int dp_min_mtu(const struct datapath *dp)
776{
777 struct vport *p;
778 int mtu = 0;
779
780 ASSERT_RTNL();
781
ed099e92 782 list_for_each_entry (p, &dp->port_list, node) {
d6569377
BP
783 int dev_mtu;
784
785 /* Skip any internal ports, since that's what we're trying to
786 * set. */
787 if (is_internal_vport(p))
788 continue;
789
790 dev_mtu = vport_get_mtu(p);
791 if (!mtu || dev_mtu < mtu)
792 mtu = dev_mtu;
793 }
794
795 return mtu ? mtu : ETH_DATA_LEN;
064af421
BP
796}
797
ed099e92
BP
798/* Sets the MTU of all datapath devices to the minimum of the ports
799 * Called with RTNL lock.
800 */
d6569377 801void set_internal_devs_mtu(const struct datapath *dp)
064af421 802{
d6569377
BP
803 struct vport *p;
804 int mtu;
064af421 805
d6569377
BP
806 ASSERT_RTNL();
807
808 mtu = dp_min_mtu(dp);
44e05eca 809
ed099e92 810 list_for_each_entry (p, &dp->port_list, node) {
d6569377
BP
811 if (is_internal_vport(p))
812 vport_set_mtu(p, mtu);
813 }
064af421
BP
814}
815
d6569377
BP
816static const struct nla_policy flow_policy[ODP_FLOW_ATTR_MAX + 1] = {
817 [ODP_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
818 [ODP_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
819 [ODP_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
820 [ODP_FLOW_ATTR_STATE] = { .type = NLA_U64 },
821};
36956a7d 822
ed099e92 823
d6569377
BP
824static int copy_flow_to_user(struct odp_flow __user *dst, struct datapath *dp,
825 struct sw_flow *flow, u32 total_len, u64 state)
826{
827 const struct sw_flow_actions *sf_acts;
828 struct odp_flow_stats stats;
829 struct odp_flow *odp_flow;
830 struct sk_buff *skb;
831 struct nlattr *nla;
832 unsigned long used;
833 u8 tcp_flags;
834 int err;
064af421 835
d6569377 836 sf_acts = rcu_dereference_protected(flow->sf_acts,
ed099e92 837 lockdep_genl_is_held());
064af421 838
d6569377
BP
839 skb = alloc_skb(128 + FLOW_BUFSIZE + sf_acts->actions_len, GFP_KERNEL);
840 err = -ENOMEM;
841 if (!skb)
842 goto exit;
843
d6569377
BP
844 odp_flow = (struct odp_flow*)__skb_put(skb, sizeof(struct odp_flow));
845 odp_flow->dp_idx = dp->dp_idx;
846 odp_flow->total_len = total_len;
847
848 nla = nla_nest_start(skb, ODP_FLOW_ATTR_KEY);
849 if (!nla)
850 goto nla_put_failure;
851 err = flow_to_nlattrs(&flow->key, skb);
852 if (err)
ed099e92 853 goto exit_free;
d6569377
BP
854 nla_nest_end(skb, nla);
855
856 nla = nla_nest_start(skb, ODP_FLOW_ATTR_ACTIONS);
857 if (!nla || skb_tailroom(skb) < sf_acts->actions_len)
858 goto nla_put_failure;
859 memcpy(__skb_put(skb, sf_acts->actions_len), sf_acts->actions, sf_acts->actions_len);
860 nla_nest_end(skb, nla);
861
862 spin_lock_bh(&flow->lock);
863 used = flow->used;
864 stats.n_packets = flow->packet_count;
865 stats.n_bytes = flow->byte_count;
866 tcp_flags = flow->tcp_flags;
867 spin_unlock_bh(&flow->lock);
868
869 if (used)
870 NLA_PUT_MSECS(skb, ODP_FLOW_ATTR_USED, used);
871
872 if (stats.n_packets)
873 NLA_PUT(skb, ODP_FLOW_ATTR_STATS, sizeof(struct odp_flow_stats), &stats);
874
875 if (tcp_flags)
876 NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags);
877
878 if (state)
879 NLA_PUT_U64(skb, ODP_FLOW_ATTR_STATE, state);
880
881 if (skb->len > total_len)
882 goto nla_put_failure;
883
884 odp_flow->len = skb->len;
885 err = copy_to_user(dst, skb->data, skb->len) ? -EFAULT : 0;
ed099e92 886 goto exit_free;
d6569377
BP
887
888nla_put_failure:
889 err = -EMSGSIZE;
ed099e92 890exit_free:
d6569377
BP
891 kfree_skb(skb);
892exit:
893 return err;
44e05eca
BP
894}
895
ed099e92 896/* Called with genl_mutex. */
d6569377
BP
897static struct sk_buff *copy_flow_from_user(struct odp_flow __user *uodp_flow,
898 struct dp_flowcmd *flowcmd)
44e05eca 899{
d6569377
BP
900 struct nlattr *a[ODP_FLOW_ATTR_MAX + 1];
901 struct odp_flow *odp_flow;
902 struct sk_buff *skb;
903 u32 len;
904 int err;
44e05eca 905
d6569377
BP
906 if (get_user(len, &uodp_flow->len))
907 return ERR_PTR(-EFAULT);
908 if (len < sizeof(struct odp_flow))
909 return ERR_PTR(-EINVAL);
44e05eca 910
d6569377
BP
911 skb = alloc_skb(len, GFP_KERNEL);
912 if (!skb)
913 return ERR_PTR(-ENOMEM);
9c52546b 914
d6569377
BP
915 err = -EFAULT;
916 if (copy_from_user(__skb_put(skb, len), uodp_flow, len))
917 goto error_free_skb;
918
919 odp_flow = (struct odp_flow *)skb->data;
920 err = -EINVAL;
921 if (odp_flow->len != len)
922 goto error_free_skb;
923
924 flowcmd->nlmsg_flags = odp_flow->nlmsg_flags;
925 flowcmd->dp_idx = odp_flow->dp_idx;
926 flowcmd->total_len = odp_flow->total_len;
927
928 err = nla_parse(a, ODP_FLOW_ATTR_MAX,
929 (struct nlattr *)(skb->data + sizeof(struct odp_flow)),
930 skb->len - sizeof(struct odp_flow), flow_policy);
931 if (err)
932 goto error_free_skb;
933
934 /* ODP_FLOW_ATTR_KEY. */
935 if (a[ODP_FLOW_ATTR_KEY]) {
936 err = flow_from_nlattrs(&flowcmd->key, a[ODP_FLOW_ATTR_KEY]);
937 if (err)
938 goto error_free_skb;
939 } else
940 memset(&flowcmd->key, 0, sizeof(struct sw_flow_key));
941
942 /* ODP_FLOW_ATTR_ACTIONS. */
943 if (a[ODP_FLOW_ATTR_ACTIONS]) {
944 flowcmd->actions = nla_data(a[ODP_FLOW_ATTR_ACTIONS]);
945 flowcmd->actions_len = nla_len(a[ODP_FLOW_ATTR_ACTIONS]);
946 err = validate_actions(flowcmd->actions, flowcmd->actions_len);
947 if (err)
948 goto error_free_skb;
949 } else {
950 flowcmd->actions = NULL;
951 flowcmd->actions_len = 0;
9c52546b 952 }
8d5ebd83 953
d6569377
BP
954 flowcmd->clear = a[ODP_FLOW_ATTR_CLEAR] != NULL;
955
956 flowcmd->state = a[ODP_FLOW_ATTR_STATE] ? nla_get_u64(a[ODP_FLOW_ATTR_STATE]) : 0;
957
958 return skb;
959
960error_free_skb:
961 kfree_skb(skb);
962 return ERR_PTR(err);
064af421
BP
963}
964
d6569377 965static int new_flow(unsigned int cmd, struct odp_flow __user *uodp_flow)
064af421 966{
bc4a05c6 967 struct tbl_node *flow_node;
d6569377
BP
968 struct dp_flowcmd flowcmd;
969 struct sw_flow *flow;
970 struct sk_buff *skb;
9c52546b 971 struct datapath *dp;
d6569377
BP
972 struct tbl *table;
973 u32 hash;
bc4a05c6 974 int error;
064af421 975
d6569377
BP
976 skb = copy_flow_from_user(uodp_flow, &flowcmd);
977 error = PTR_ERR(skb);
978 if (IS_ERR(skb))
979 goto exit;
064af421 980
ed099e92 981 dp = get_dp(flowcmd.dp_idx);
d6569377 982 error = -ENODEV;
9c52546b 983 if (!dp)
ed099e92 984 goto exit;
704a1e09 985
d6569377
BP
986 hash = flow_hash(&flowcmd.key);
987 table = get_table_protected(dp);
988 flow_node = tbl_lookup(table, &flowcmd.key, hash, flow_cmp);
989 if (!flow_node) {
990 struct sw_flow_actions *acts;
991
992 /* Bail out if we're not allowed to create a new flow. */
993 error = -ENOENT;
994 if (cmd == ODP_FLOW_SET)
ed099e92 995 goto exit;
d6569377
BP
996
997 /* Expand table, if necessary, to make room. */
998 if (tbl_count(table) >= tbl_n_buckets(table)) {
999 error = expand_table(dp);
1000 if (error)
ed099e92 1001 goto exit;
d6569377
BP
1002 table = get_table_protected(dp);
1003 }
1004
1005 /* Allocate flow. */
1006 flow = flow_alloc();
1007 if (IS_ERR(flow)) {
1008 error = PTR_ERR(flow);
ed099e92 1009 goto exit;
d6569377
BP
1010 }
1011 flow->key = flowcmd.key;
1012 clear_stats(flow);
1013
1014 /* Obtain actions. */
1015 acts = get_actions(&flowcmd);
1016 error = PTR_ERR(acts);
1017 if (IS_ERR(acts))
1018 goto error_free_flow;
1019 rcu_assign_pointer(flow->sf_acts, acts);
1020
1021 error = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, 0);
1022 if (error)
1023 goto error_free_flow;
1024
1025 /* Put flow in bucket. */
1026 error = tbl_insert(table, &flow->tbl_node, hash);
1027 if (error)
1028 goto error_free_flow;
1029 } else {
1030 /* We found a matching flow. */
1031 struct sw_flow_actions *old_acts;
1032
1033 /* Bail out if we're not allowed to modify an existing flow.
1034 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1035 * because Generic Netlink treats the latter as a dump
1036 * request. We also accept NLM_F_EXCL in case that bug ever
1037 * gets fixed.
1038 */
1039 error = -EEXIST;
1040 if (flowcmd.nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1041 goto error_kfree_skb;
1042
1043 /* Update actions. */
1044 flow = flow_cast(flow_node);
1045 old_acts = rcu_dereference_protected(flow->sf_acts,
ed099e92 1046 lockdep_genl_is_held());
d6569377
BP
1047 if (flowcmd.actions &&
1048 (old_acts->actions_len != flowcmd.actions_len ||
1049 memcmp(old_acts->actions, flowcmd.actions,
1050 flowcmd.actions_len))) {
1051 struct sw_flow_actions *new_acts;
1052
1053 new_acts = get_actions(&flowcmd);
1054 error = PTR_ERR(new_acts);
1055 if (IS_ERR(new_acts))
1056 goto error_kfree_skb;
1057
1058 rcu_assign_pointer(flow->sf_acts, new_acts);
1059 flow_deferred_free_acts(old_acts);
1060 }
1061
1062 error = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, 0);
1063 if (error)
1064 goto error_kfree_skb;
1065
1066 /* Clear stats. */
1067 if (flowcmd.clear) {
1068 spin_lock_bh(&flow->lock);
1069 clear_stats(flow);
1070 spin_unlock_bh(&flow->lock);
1071 }
9c52546b 1072 }
d6569377 1073 kfree_skb(skb);
d6569377 1074 return 0;
704a1e09 1075
d6569377
BP
1076error_free_flow:
1077 flow_put(flow);
d6569377
BP
1078error_kfree_skb:
1079 kfree_skb(skb);
1080exit:
9c52546b 1081 return error;
704a1e09
BP
1082}
1083
d6569377 1084static int get_or_del_flow(unsigned int cmd, struct odp_flow __user *uodp_flow)
704a1e09 1085{
d6569377
BP
1086 struct tbl_node *flow_node;
1087 struct dp_flowcmd flowcmd;
704a1e09 1088 struct sw_flow *flow;
d6569377 1089 struct sk_buff *skb;
9c52546b
BP
1090 struct datapath *dp;
1091 struct tbl *table;
9c52546b 1092 int err;
704a1e09 1093
d6569377 1094 skb = copy_flow_from_user(uodp_flow, &flowcmd);
d6569377 1095 if (IS_ERR(skb))
ed099e92 1096 return PTR_ERR(skb);
704a1e09 1097
ed099e92 1098 dp = get_dp(flowcmd.dp_idx);
9c52546b 1099 if (!dp)
ed099e92 1100 return -ENODEV;
704a1e09 1101
9c52546b 1102 table = get_table_protected(dp);
d6569377 1103 flow_node = tbl_lookup(table, &flowcmd.key, flow_hash(&flowcmd.key), flow_cmp);
d6569377 1104 if (!flow_node)
ed099e92 1105 return -ENOENT;
d6569377
BP
1106
1107 if (cmd == ODP_FLOW_DEL) {
1108 err = tbl_remove(table, flow_node);
1109 if (err)
ed099e92 1110 return err;
9c52546b 1111 }
704a1e09 1112
d6569377
BP
1113 flow = flow_cast(flow_node);
1114 err = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, 0);
1115 if (!err && cmd == ODP_FLOW_DEL)
1116 flow_deferred_free(flow);
36956a7d 1117
d6569377
BP
1118 return err;
1119}
9c52546b 1120
d6569377
BP
1121static int dump_flow(struct odp_flow __user *uodp_flow)
1122{
1123 struct tbl_node *flow_node;
1124 struct dp_flowcmd flowcmd;
1125 struct sw_flow *flow;
1126 struct sk_buff *skb;
1127 struct datapath *dp;
1128 u32 bucket, obj;
1129 int err;
36956a7d 1130
d6569377
BP
1131 skb = copy_flow_from_user(uodp_flow, &flowcmd);
1132 err = PTR_ERR(skb);
1133 if (IS_ERR(skb))
1134 goto exit;
1135
ed099e92 1136 dp = get_dp(flowcmd.dp_idx);
d6569377
BP
1137 err = -ENODEV;
1138 if (!dp)
ed099e92 1139 goto exit_kfree_skb;
d6569377
BP
1140
1141 bucket = flowcmd.state >> 32;
1142 obj = flowcmd.state;
ed099e92 1143 flow_node = tbl_next(get_table_protected(dp), &bucket, &obj);
d6569377
BP
1144 err = -ENODEV;
1145 if (!flow_node)
ed099e92 1146 goto exit_kfree_skb;
d6569377
BP
1147
1148 flow = flow_cast(flow_node);
1149 err = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len,
1150 ((u64)bucket << 32) | obj);
1151
ed099e92 1152exit_kfree_skb:
d6569377 1153 kfree_skb(skb);
9c52546b
BP
1154exit:
1155 return err;
704a1e09
BP
1156}
1157
d6569377 1158static const struct nla_policy datapath_policy[ODP_DP_ATTR_MAX + 1] = {
aaff4b55 1159#ifdef HAVE_NLA_NUL_STRING
d6569377 1160 [ODP_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
aaff4b55 1161#endif
d6569377
BP
1162 [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
1163 [ODP_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
1164};
1165
aaff4b55
BP
1166static struct genl_family dp_datapath_genl_family = {
1167 .id = GENL_ID_GENERATE,
1168 .hdrsize = sizeof(struct odp_header),
1169 .name = ODP_DATAPATH_FAMILY,
1170 .version = 1,
1171 .maxattr = ODP_DP_ATTR_MAX
1172};
1173
1174static struct genl_multicast_group dp_datapath_multicast_group = {
1175 .name = ODP_DATAPATH_MCGROUP
1176};
1177
1178static int odp_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1179 u32 pid, u32 seq, u32 flags, u8 cmd)
064af421 1180{
aaff4b55 1181 struct odp_header *odp_header;
d6569377 1182 struct nlattr *nla;
064af421
BP
1183 int err;
1184
aaff4b55
BP
1185 odp_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
1186 flags, cmd);
1187 if (!odp_header)
1188 goto error;
064af421 1189
aaff4b55 1190 odp_header->dp_idx = dp->dp_idx;
064af421 1191
d6569377
BP
1192 rcu_read_lock();
1193 err = nla_put_string(skb, ODP_DP_ATTR_NAME, dp_name(dp));
1194 rcu_read_unlock();
064af421 1195 if (err)
d6569377 1196 goto nla_put_failure;
064af421 1197
d6569377
BP
1198 nla = nla_reserve(skb, ODP_DP_ATTR_STATS, sizeof(struct odp_stats));
1199 if (!nla)
1200 goto nla_put_failure;
1201 get_dp_stats(dp, nla_data(nla));
1202
1203 NLA_PUT_U32(skb, ODP_DP_ATTR_IPV4_FRAGS,
1204 dp->drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO);
1205
1206 if (dp->sflow_probability)
1207 NLA_PUT_U32(skb, ODP_DP_ATTR_SAMPLING, dp->sflow_probability);
1208
982b8810
BP
1209 nla = nla_nest_start(skb, ODP_DP_ATTR_MCGROUPS);
1210 if (!nla)
1211 goto nla_put_failure;
1212 NLA_PUT_U32(skb, ODP_PACKET_CMD_MISS, packet_mc_group(dp, ODP_PACKET_CMD_MISS));
1213 NLA_PUT_U32(skb, ODP_PACKET_CMD_ACTION, packet_mc_group(dp, ODP_PACKET_CMD_ACTION));
1214 NLA_PUT_U32(skb, ODP_PACKET_CMD_SAMPLE, packet_mc_group(dp, ODP_PACKET_CMD_SAMPLE));
1215 nla_nest_end(skb, nla);
1216
aaff4b55 1217 return genlmsg_end(skb, odp_header);
d6569377
BP
1218
1219nla_put_failure:
aaff4b55
BP
1220 genlmsg_cancel(skb, odp_header);
1221error:
1222 return -EMSGSIZE;
d6569377
BP
1223}
1224
aaff4b55
BP
1225static struct sk_buff *odp_dp_cmd_build_info(struct datapath *dp, u32 pid,
1226 u32 seq, u8 cmd)
d6569377 1227{
d6569377 1228 struct sk_buff *skb;
aaff4b55 1229 int retval;
d6569377 1230
aaff4b55 1231 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
064af421 1232 if (!skb)
d6569377 1233 return ERR_PTR(-ENOMEM);
659586ef 1234
aaff4b55
BP
1235 retval = odp_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
1236 if (retval < 0) {
1237 kfree_skb(skb);
1238 return ERR_PTR(retval);
1239 }
1240 return skb;
1241}
9dca7bd5 1242
aaff4b55
BP
1243static int odp_dp_cmd_validate(struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1244{
d6569377
BP
1245 if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
1246 u32 frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
9dca7bd5 1247
d6569377 1248 if (frags != ODP_DP_FRAG_ZERO && frags != ODP_DP_FRAG_DROP)
aaff4b55 1249 return -EINVAL;
d6569377
BP
1250 }
1251
aaff4b55 1252 return VERIFY_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1);
d6569377
BP
1253}
1254
ed099e92 1255/* Called with genl_mutex and optionally with RTNL lock also. */
aaff4b55 1256static struct datapath *lookup_datapath(struct odp_header *odp_header, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
d6569377 1257{
d6569377 1258 if (!a[ODP_DP_ATTR_NAME]) {
aaff4b55 1259 struct datapath *dp = get_dp(odp_header->dp_idx);
d6569377
BP
1260 if (!dp)
1261 return ERR_PTR(-ENODEV);
d6569377
BP
1262 return dp;
1263 } else {
d6569377
BP
1264 struct vport *vport;
1265 int dp_idx;
1266
057dd6d2 1267 rcu_read_lock();
d6569377
BP
1268 vport = vport_locate(nla_data(a[ODP_DP_ATTR_NAME]));
1269 dp_idx = vport && vport->port_no == ODPP_LOCAL ? vport->dp->dp_idx : -1;
057dd6d2 1270 rcu_read_unlock();
d6569377
BP
1271
1272 if (dp_idx < 0)
1273 return ERR_PTR(-ENODEV);
ed099e92 1274 return vport->dp;
d6569377
BP
1275 }
1276}
1277
ed099e92 1278/* Called with genl_mutex. */
d6569377
BP
1279static void change_datapath(struct datapath *dp, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1280{
1281 if (a[ODP_DP_ATTR_IPV4_FRAGS])
1282 dp->drop_frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]) == ODP_DP_FRAG_DROP;
1283 if (a[ODP_DP_ATTR_SAMPLING])
1284 dp->sflow_probability = nla_get_u32(a[ODP_DP_ATTR_SAMPLING]);
1285}
1286
aaff4b55 1287static int odp_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
d6569377 1288{
aaff4b55
BP
1289 struct nlattr **a = info->attrs;
1290 struct odp_header *odp_header = info->userhdr;
d6569377 1291 struct vport_parms parms;
aaff4b55 1292 struct sk_buff *reply;
d6569377
BP
1293 struct datapath *dp;
1294 struct vport *vport;
1295 int dp_idx;
1296 int err;
d6569377 1297
d6569377
BP
1298 err = -EINVAL;
1299 if (!a[ODP_DP_ATTR_NAME])
aaff4b55
BP
1300 goto err;
1301
1302 err = odp_dp_cmd_validate(a);
1303 if (err)
1304 goto err;
d6569377
BP
1305
1306 rtnl_lock();
d6569377
BP
1307 err = -ENODEV;
1308 if (!try_module_get(THIS_MODULE))
ed099e92 1309 goto err_unlock_rtnl;
d6569377 1310
aaff4b55 1311 dp_idx = odp_header->dp_idx;
d6569377
BP
1312 if (dp_idx < 0) {
1313 err = -EFBIG;
1314 for (dp_idx = 0; dp_idx < ARRAY_SIZE(dps); dp_idx++) {
1315 if (get_dp(dp_idx))
1316 continue;
1317 err = 0;
1318 break;
1319 }
1320 } else if (dp_idx < ARRAY_SIZE(dps))
1321 err = get_dp(dp_idx) ? -EBUSY : 0;
1322 else
1323 err = -EINVAL;
1324 if (err)
1325 goto err_put_module;
1326
1327 err = -ENOMEM;
1328 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1329 if (dp == NULL)
1330 goto err_put_module;
1331 INIT_LIST_HEAD(&dp->port_list);
d6569377 1332 dp->dp_idx = dp_idx;
d6569377
BP
1333
1334 /* Initialize kobject for bridge. This will be added as
1335 * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1336 dp->ifobj.kset = NULL;
1337 kobject_init(&dp->ifobj, &dp_ktype);
1338
1339 /* Allocate table. */
1340 err = -ENOMEM;
1341 rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
1342 if (!dp->table)
1343 goto err_free_dp;
1344
1345 /* Set up our datapath device. */
1346 parms.name = nla_data(a[ODP_DP_ATTR_NAME]);
1347 parms.type = ODP_VPORT_TYPE_INTERNAL;
1348 parms.options = NULL;
1349 parms.dp = dp;
1350 parms.port_no = ODPP_LOCAL;
1351 vport = new_vport(&parms);
1352 if (IS_ERR(vport)) {
1353 err = PTR_ERR(vport);
1354 if (err == -EBUSY)
1355 err = -EEXIST;
1356
1357 goto err_destroy_table;
1358 }
1359
1360 dp->drop_frags = 0;
1361 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1362 if (!dp->stats_percpu) {
1363 err = -ENOMEM;
1364 goto err_destroy_local_port;
1365 }
1366
1367 change_datapath(dp, a);
1368
aaff4b55
BP
1369 reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1370 err = PTR_ERR(reply);
1371 if (IS_ERR(reply))
1372 goto err_destroy_local_port;
1373
d6569377
BP
1374 rcu_assign_pointer(dps[dp_idx], dp);
1375 dp_sysfs_add_dp(dp);
1376
d6569377
BP
1377 rtnl_unlock();
1378
aaff4b55
BP
1379 genl_notify(reply, genl_info_net(info), info->snd_pid,
1380 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
d6569377
BP
1381 return 0;
1382
1383err_destroy_local_port:
1384 dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1385err_destroy_table:
1386 tbl_destroy(get_table_protected(dp), NULL);
1387err_free_dp:
d6569377
BP
1388 kfree(dp);
1389err_put_module:
1390 module_put(THIS_MODULE);
ed099e92 1391err_unlock_rtnl:
d6569377 1392 rtnl_unlock();
d6569377 1393err:
064af421
BP
1394 return err;
1395}
1396
aaff4b55 1397static int odp_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
44e05eca 1398{
ed099e92 1399 struct vport *vport, *next_vport;
aaff4b55 1400 struct sk_buff *reply;
9c52546b 1401 struct datapath *dp;
d6569377 1402 int err;
44e05eca 1403
aaff4b55
BP
1404 err = odp_dp_cmd_validate(info->attrs);
1405 if (err)
d6569377 1406 goto exit;
44e05eca 1407
d6569377 1408 rtnl_lock();
aaff4b55 1409 dp = lookup_datapath(info->userhdr, info->attrs);
d6569377
BP
1410 err = PTR_ERR(dp);
1411 if (IS_ERR(dp))
aaff4b55
BP
1412 goto exit_unlock;
1413
1414 reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_DEL);
1415 err = PTR_ERR(reply);
1416 if (IS_ERR(reply))
1417 goto exit_unlock;
9c52546b 1418
ed099e92
BP
1419 list_for_each_entry_safe (vport, next_vport, &dp->port_list, node)
1420 if (vport->port_no != ODPP_LOCAL)
1421 dp_detach_port(vport);
1422
1423 dp_sysfs_del_dp(dp);
1424 rcu_assign_pointer(dps[dp->dp_idx], NULL);
1425 dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1426
1427 call_rcu(&dp->rcu, destroy_dp_rcu);
1428 module_put(THIS_MODULE);
1429
aaff4b55
BP
1430 genl_notify(reply, genl_info_net(info), info->snd_pid,
1431 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
d6569377
BP
1432 err = 0;
1433
aaff4b55 1434exit_unlock:
d6569377
BP
1435 rtnl_unlock();
1436exit:
1437 return err;
44e05eca
BP
1438}
1439
aaff4b55 1440static int odp_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
064af421 1441{
aaff4b55 1442 struct sk_buff *reply;
d6569377 1443 struct datapath *dp;
d6569377 1444 int err;
064af421 1445
aaff4b55
BP
1446 err = odp_dp_cmd_validate(info->attrs);
1447 if (err)
1448 return err;
38c6ecbc 1449
aaff4b55 1450 dp = lookup_datapath(info->userhdr, info->attrs);
d6569377 1451 if (IS_ERR(dp))
aaff4b55 1452 return PTR_ERR(dp);
38c6ecbc 1453
aaff4b55 1454 change_datapath(dp, info->attrs);
38c6ecbc 1455
aaff4b55
BP
1456 reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1457 if (IS_ERR(reply)) {
1458 err = PTR_ERR(reply);
1459 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1460 dp_datapath_multicast_group.id, err);
1461 return 0;
1462 }
1463
1464 genl_notify(reply, genl_info_net(info), info->snd_pid,
1465 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1466 return 0;
064af421
BP
1467}
1468
aaff4b55 1469static int odp_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1dcf111b 1470{
aaff4b55 1471 struct sk_buff *reply;
d6569377 1472 struct datapath *dp;
d6569377 1473 int err;
1dcf111b 1474
aaff4b55
BP
1475 err = odp_dp_cmd_validate(info->attrs);
1476 if (err)
1477 return err;
1dcf111b 1478
aaff4b55 1479 dp = lookup_datapath(info->userhdr, info->attrs);
d6569377 1480 if (IS_ERR(dp))
aaff4b55 1481 return PTR_ERR(dp);
1dcf111b 1482
aaff4b55
BP
1483 reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1484 if (IS_ERR(reply))
1485 return PTR_ERR(reply);
1486
1487 return genlmsg_reply(reply, info);
1dcf111b
JP
1488}
1489
aaff4b55 1490static int odp_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
a7786963 1491{
d6569377 1492 u32 dp_idx;
a7786963 1493
aaff4b55 1494 for (dp_idx = cb->args[0]; dp_idx < ARRAY_SIZE(dps); dp_idx++) {
d6569377
BP
1495 struct datapath *dp = get_dp(dp_idx);
1496 if (!dp)
1497 continue;
aaff4b55
BP
1498 if (odp_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
1499 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1500 ODP_DP_CMD_NEW) < 0)
1501 break;
a7786963 1502 }
aaff4b55
BP
1503
1504 cb->args[0] = dp_idx;
1505 return skb->len;
c19e6535
BP
1506}
1507
aaff4b55
BP
1508static struct genl_ops dp_datapath_genl_ops[] = {
1509 { .cmd = ODP_DP_CMD_NEW,
1510 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1511 .policy = datapath_policy,
1512 .doit = odp_dp_cmd_new
1513 },
1514 { .cmd = ODP_DP_CMD_DEL,
1515 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1516 .policy = datapath_policy,
1517 .doit = odp_dp_cmd_del
1518 },
1519 { .cmd = ODP_DP_CMD_GET,
1520 .flags = 0, /* OK for unprivileged users. */
1521 .policy = datapath_policy,
1522 .doit = odp_dp_cmd_get,
1523 .dumpit = odp_dp_cmd_dump
1524 },
1525 { .cmd = ODP_DP_CMD_SET,
1526 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1527 .policy = datapath_policy,
1528 .doit = odp_dp_cmd_set,
1529 },
1530};
1531
c19e6535 1532static const struct nla_policy vport_policy[ODP_VPORT_ATTR_MAX + 1] = {
f0fef760 1533#ifdef HAVE_NLA_NUL_STRING
c19e6535
BP
1534 [ODP_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1535 [ODP_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1536 [ODP_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1537 [ODP_VPORT_ATTR_STATS] = { .len = sizeof(struct rtnl_link_stats64) },
1538 [ODP_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN },
f0fef760
BP
1539#else
1540 [ODP_VPORT_ATTR_STATS] = { .minlen = sizeof(struct rtnl_link_stats64) },
1541 [ODP_VPORT_ATTR_ADDRESS] = { .minlen = ETH_ALEN },
1542#endif
c19e6535
BP
1543 [ODP_VPORT_ATTR_MTU] = { .type = NLA_U32 },
1544 [ODP_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1545};
1546
f0fef760
BP
1547static struct genl_family dp_vport_genl_family = {
1548 .id = GENL_ID_GENERATE,
1549 .hdrsize = sizeof(struct odp_header),
1550 .name = ODP_VPORT_FAMILY,
1551 .version = 1,
1552 .maxattr = ODP_VPORT_ATTR_MAX
1553};
1554
1555static struct genl_multicast_group dp_vport_multicast_group = {
1556 .name = ODP_VPORT_MCGROUP
1557};
1558
1559/* Called with RTNL lock or RCU read lock. */
1560static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1561 u32 pid, u32 seq, u32 flags, u8 cmd)
064af421 1562{
f0fef760 1563 struct odp_header *odp_header;
c19e6535
BP
1564 struct nlattr *nla;
1565 int ifindex, iflink;
1566 int err;
1567
f0fef760
BP
1568 odp_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
1569 flags, cmd);
1570 if (!odp_header)
1571 return -EMSGSIZE;
c19e6535 1572
f0fef760 1573 odp_header->dp_idx = vport->dp->dp_idx;
c19e6535
BP
1574
1575 NLA_PUT_U32(skb, ODP_VPORT_ATTR_PORT_NO, vport->port_no);
1576 NLA_PUT_U32(skb, ODP_VPORT_ATTR_TYPE, vport_get_type(vport));
1577 NLA_PUT_STRING(skb, ODP_VPORT_ATTR_NAME, vport_get_name(vport));
1578
1579 nla = nla_reserve(skb, ODP_VPORT_ATTR_STATS, sizeof(struct rtnl_link_stats64));
1580 if (!nla)
1581 goto nla_put_failure;
1582 if (vport_get_stats(vport, nla_data(nla)))
1583 __skb_trim(skb, skb->len - nla->nla_len);
1584
1585 NLA_PUT(skb, ODP_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
1586
1587 NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, vport_get_mtu(vport));
1588
1589 err = vport_get_options(vport, skb);
f0fef760
BP
1590 if (err == -EMSGSIZE)
1591 goto error;
c19e6535
BP
1592
1593 ifindex = vport_get_ifindex(vport);
1594 if (ifindex > 0)
1595 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFINDEX, ifindex);
1596
1597 iflink = vport_get_iflink(vport);
1598 if (iflink > 0)
1599 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFLINK, iflink);
1600
f0fef760 1601 return genlmsg_end(skb, odp_header);
c19e6535
BP
1602
1603nla_put_failure:
1604 err = -EMSGSIZE;
f0fef760
BP
1605error:
1606 genlmsg_cancel(skb, odp_header);
1607 return err;
064af421
BP
1608}
1609
f0fef760
BP
1610/* Called with RTNL lock or RCU read lock. */
1611static struct sk_buff *odp_vport_cmd_build_info(struct vport *vport, u32 pid,
1612 u32 seq, u8 cmd)
064af421 1613{
c19e6535 1614 struct sk_buff *skb;
f0fef760 1615 int retval;
c19e6535 1616
f0fef760 1617 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
c19e6535
BP
1618 if (!skb)
1619 return ERR_PTR(-ENOMEM);
1620
f0fef760
BP
1621 retval = odp_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
1622 if (retval < 0) {
1623 kfree_skb(skb);
1624 return ERR_PTR(retval);
1625 }
c19e6535 1626 return skb;
f0fef760 1627}
c19e6535 1628
f0fef760
BP
1629static int odp_vport_cmd_validate(struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1630{
1631 return VERIFY_NUL_STRING(a[ODP_VPORT_ATTR_NAME], IFNAMSIZ - 1);
c19e6535 1632}
51d4d598 1633
ed099e92 1634/* Called with RTNL lock or RCU read lock. */
f0fef760 1635static struct vport *lookup_vport(struct odp_header *odp_header,
c19e6535
BP
1636 struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1637{
1638 struct datapath *dp;
1639 struct vport *vport;
1640
1641 if (a[ODP_VPORT_ATTR_NAME]) {
c19e6535 1642 vport = vport_locate(nla_data(a[ODP_VPORT_ATTR_NAME]));
ed099e92 1643 if (!vport)
c19e6535 1644 return ERR_PTR(-ENODEV);
c19e6535
BP
1645 return vport;
1646 } else if (a[ODP_VPORT_ATTR_PORT_NO]) {
1647 u32 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1648
1649 if (port_no >= DP_MAX_PORTS)
f0fef760 1650 return ERR_PTR(-EFBIG);
c19e6535 1651
f0fef760 1652 dp = get_dp(odp_header->dp_idx);
c19e6535
BP
1653 if (!dp)
1654 return ERR_PTR(-ENODEV);
f2459fe7 1655
c19e6535 1656 vport = get_vport_protected(dp, port_no);
ed099e92 1657 if (!vport)
c19e6535 1658 return ERR_PTR(-ENOENT);
c19e6535
BP
1659 return vport;
1660 } else
1661 return ERR_PTR(-EINVAL);
064af421
BP
1662}
1663
ed099e92 1664/* Called with RTNL lock. */
c19e6535 1665static int change_vport(struct vport *vport, struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
064af421 1666{
c19e6535
BP
1667 int err = 0;
1668 if (a[ODP_VPORT_ATTR_STATS])
1669 err = vport_set_stats(vport, nla_data(a[ODP_VPORT_ATTR_STATS]));
1670 if (!err && a[ODP_VPORT_ATTR_ADDRESS])
1671 err = vport_set_addr(vport, nla_data(a[ODP_VPORT_ATTR_ADDRESS]));
1672 if (!err && a[ODP_VPORT_ATTR_MTU])
1673 err = vport_set_mtu(vport, nla_get_u32(a[ODP_VPORT_ATTR_MTU]));
1674 return err;
1675}
1676
f0fef760 1677static int odp_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
c19e6535 1678{
f0fef760
BP
1679 struct nlattr **a = info->attrs;
1680 struct odp_header *odp_header = info->userhdr;
c19e6535 1681 struct vport_parms parms;
ed099e92 1682 struct sk_buff *reply;
c19e6535 1683 struct vport *vport;
c19e6535 1684 struct datapath *dp;
b0ec0f27 1685 u32 port_no;
c19e6535 1686 int err;
b0ec0f27 1687
c19e6535
BP
1688 err = -EINVAL;
1689 if (!a[ODP_VPORT_ATTR_NAME] || !a[ODP_VPORT_ATTR_TYPE])
f0fef760
BP
1690 goto exit;
1691
1692 err = odp_vport_cmd_validate(a);
1693 if (err)
1694 goto exit;
51d4d598 1695
c19e6535 1696 rtnl_lock();
f0fef760 1697 dp = get_dp(odp_header->dp_idx);
c19e6535
BP
1698 err = -ENODEV;
1699 if (!dp)
ed099e92 1700 goto exit_unlock;
c19e6535
BP
1701
1702 if (a[ODP_VPORT_ATTR_PORT_NO]) {
1703 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1704
1705 err = -EFBIG;
1706 if (port_no >= DP_MAX_PORTS)
ed099e92 1707 goto exit_unlock;
c19e6535
BP
1708
1709 vport = get_vport_protected(dp, port_no);
1710 err = -EBUSY;
1711 if (vport)
ed099e92 1712 goto exit_unlock;
c19e6535
BP
1713 } else {
1714 for (port_no = 1; ; port_no++) {
1715 if (port_no >= DP_MAX_PORTS) {
1716 err = -EFBIG;
ed099e92 1717 goto exit_unlock;
c19e6535
BP
1718 }
1719 vport = get_vport_protected(dp, port_no);
1720 if (!vport)
1721 break;
51d4d598 1722 }
064af421 1723 }
b0ec0f27 1724
c19e6535
BP
1725 parms.name = nla_data(a[ODP_VPORT_ATTR_NAME]);
1726 parms.type = nla_get_u32(a[ODP_VPORT_ATTR_TYPE]);
1727 parms.options = a[ODP_VPORT_ATTR_OPTIONS];
1728 parms.dp = dp;
1729 parms.port_no = port_no;
1730
1731 vport = new_vport(&parms);
1732 err = PTR_ERR(vport);
1733 if (IS_ERR(vport))
ed099e92 1734 goto exit_unlock;
c19e6535
BP
1735
1736 set_internal_devs_mtu(dp);
1737 dp_sysfs_add_if(vport);
1738
1739 err = change_vport(vport, a);
f0fef760
BP
1740 if (!err) {
1741 reply = odp_vport_cmd_build_info(vport, info->snd_pid,
1742 info->snd_seq, ODP_VPORT_CMD_NEW);
1743 if (IS_ERR(reply))
1744 err = PTR_ERR(reply);
1745 }
c19e6535
BP
1746 if (err) {
1747 dp_detach_port(vport);
ed099e92 1748 goto exit_unlock;
c19e6535 1749 }
f0fef760
BP
1750 genl_notify(reply, genl_info_net(info), info->snd_pid,
1751 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
c19e6535 1752
c19e6535 1753
ed099e92 1754exit_unlock:
c19e6535 1755 rtnl_unlock();
c19e6535
BP
1756exit:
1757 return err;
44e05eca
BP
1758}
1759
f0fef760 1760static int odp_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
44e05eca 1761{
f0fef760
BP
1762 struct nlattr **a = info->attrs;
1763 struct sk_buff *reply;
c19e6535 1764 struct vport *vport;
c19e6535 1765 int err;
44e05eca 1766
f0fef760
BP
1767 err = odp_vport_cmd_validate(a);
1768 if (err)
c19e6535
BP
1769 goto exit;
1770
1771 rtnl_lock();
f0fef760 1772 vport = lookup_vport(info->userhdr, a);
c19e6535
BP
1773 err = PTR_ERR(vport);
1774 if (IS_ERR(vport))
f0fef760 1775 goto exit_unlock;
44e05eca 1776
c19e6535
BP
1777 err = 0;
1778 if (a[ODP_VPORT_ATTR_OPTIONS])
1779 err = vport_set_options(vport, a[ODP_VPORT_ATTR_OPTIONS]);
1780 if (!err)
1781 err = change_vport(vport, a);
1782
f0fef760
BP
1783 reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1784 ODP_VPORT_CMD_NEW);
1785 if (IS_ERR(reply)) {
1786 err = PTR_ERR(reply);
1787 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1788 dp_vport_multicast_group.id, err);
1789 return 0;
1790 }
1791
1792 genl_notify(reply, genl_info_net(info), info->snd_pid,
1793 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1794
1795exit_unlock:
c19e6535
BP
1796 rtnl_unlock();
1797exit:
1798 return err;
064af421
BP
1799}
1800
f0fef760 1801static int odp_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1802{
f0fef760
BP
1803 struct nlattr **a = info->attrs;
1804 struct sk_buff *reply;
c19e6535 1805 struct vport *vport;
c19e6535
BP
1806 int err;
1807
f0fef760
BP
1808 err = odp_vport_cmd_validate(a);
1809 if (err)
c19e6535
BP
1810 goto exit;
1811
1812 rtnl_lock();
f0fef760 1813 vport = lookup_vport(info->userhdr, a);
c19e6535 1814 err = PTR_ERR(vport);
f0fef760
BP
1815 if (IS_ERR(vport))
1816 goto exit_unlock;
c19e6535 1817
f0fef760
BP
1818 if (vport->port_no == ODPP_LOCAL) {
1819 err = -EINVAL;
1820 goto exit_unlock;
1821 }
1822
1823 reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1824 ODP_VPORT_CMD_DEL);
1825 err = PTR_ERR(reply);
1826 if (IS_ERR(reply))
1827 goto exit_unlock;
1828
1829 err = dp_detach_port(vport);
1830
1831 genl_notify(reply, genl_info_net(info), info->snd_pid,
1832 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1833
1834exit_unlock:
c19e6535
BP
1835 rtnl_unlock();
1836exit:
1837 return err;
7c40efc9
BP
1838}
1839
f0fef760 1840static int odp_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1841{
f0fef760
BP
1842 struct nlattr **a = info->attrs;
1843 struct odp_header *odp_header = info->userhdr;
ed099e92 1844 struct sk_buff *reply;
c19e6535 1845 struct vport *vport;
c19e6535
BP
1846 int err;
1847
f0fef760
BP
1848 err = odp_vport_cmd_validate(a);
1849 if (err)
1850 goto exit;
c19e6535 1851
ed099e92 1852 rcu_read_lock();
f0fef760 1853 vport = lookup_vport(odp_header, a);
c19e6535
BP
1854 err = PTR_ERR(vport);
1855 if (IS_ERR(vport))
f0fef760 1856 goto exit_unlock;
c19e6535 1857
f0fef760
BP
1858 reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1859 ODP_VPORT_CMD_NEW);
ed099e92
BP
1860 err = PTR_ERR(reply);
1861 if (IS_ERR(reply))
f0fef760 1862 goto exit_unlock;
ed099e92 1863
f0fef760 1864 err = genlmsg_reply(reply, info);
ed099e92 1865
f0fef760 1866exit_unlock:
ed099e92 1867 rcu_read_unlock();
f0fef760 1868exit:
c19e6535
BP
1869 return err;
1870}
1871
f0fef760 1872static int odp_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
c19e6535 1873{
f0fef760 1874 struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
c19e6535
BP
1875 struct datapath *dp;
1876 u32 port_no;
f0fef760 1877 int retval;
c19e6535 1878
f0fef760 1879 dp = get_dp(odp_header->dp_idx);
c19e6535 1880 if (!dp)
f0fef760 1881 return -ENODEV;
ed099e92
BP
1882
1883 rcu_read_lock();
f0fef760 1884 for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
ed099e92 1885 struct vport *vport;
ed099e92
BP
1886
1887 vport = get_vport_protected(dp, port_no);
1888 if (!vport)
1889 continue;
1890
f0fef760
BP
1891 if (odp_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
1892 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1893 ODP_VPORT_CMD_NEW) < 0)
1894 break;
c19e6535 1895 }
ed099e92 1896 rcu_read_unlock();
c19e6535 1897
f0fef760
BP
1898 cb->args[0] = port_no;
1899 retval = skb->len;
1900
1901 return retval;
7c40efc9
BP
1902}
1903
f0fef760
BP
1904static struct genl_ops dp_vport_genl_ops[] = {
1905 { .cmd = ODP_VPORT_CMD_NEW,
1906 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1907 .policy = vport_policy,
1908 .doit = odp_vport_cmd_new
1909 },
1910 { .cmd = ODP_VPORT_CMD_DEL,
1911 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1912 .policy = vport_policy,
1913 .doit = odp_vport_cmd_del
1914 },
1915 { .cmd = ODP_VPORT_CMD_GET,
1916 .flags = 0, /* OK for unprivileged users. */
1917 .policy = vport_policy,
1918 .doit = odp_vport_cmd_get,
1919 .dumpit = odp_vport_cmd_dump
1920 },
1921 { .cmd = ODP_VPORT_CMD_SET,
1922 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1923 .policy = vport_policy,
1924 .doit = odp_vport_cmd_set,
1925 },
1926};
1927
064af421
BP
1928static long openvswitch_ioctl(struct file *f, unsigned int cmd,
1929 unsigned long argp)
1930{
064af421
BP
1931 int err;
1932
ed099e92 1933 genl_lock();
064af421 1934 switch (cmd) {
9c52546b
BP
1935 case ODP_FLOW_FLUSH:
1936 err = flush_flows(argp);
1937 goto exit;
1938
d6569377
BP
1939 case ODP_FLOW_NEW:
1940 case ODP_FLOW_SET:
1941 err = new_flow(cmd, (struct odp_flow __user *)argp);
9c52546b
BP
1942 goto exit;
1943
1944 case ODP_FLOW_GET:
d6569377
BP
1945 case ODP_FLOW_DEL:
1946 err = get_or_del_flow(cmd, (struct odp_flow __user *)argp);
9c52546b
BP
1947 goto exit;
1948
1949 case ODP_FLOW_DUMP:
d6569377 1950 err = dump_flow((struct odp_flow __user *)argp);
9c52546b
BP
1951 goto exit;
1952
064af421
BP
1953 default:
1954 err = -ENOIOCTLCMD;
1955 break;
1956 }
e86c8696 1957exit:
ed099e92 1958 genl_unlock();
064af421
BP
1959 return err;
1960}
1961
3fbd517a 1962#ifdef CONFIG_COMPAT
3fbd517a
BP
1963static long openvswitch_compat_ioctl(struct file *f, unsigned int cmd, unsigned long argp)
1964{
3fbd517a 1965 switch (cmd) {
3fbd517a
BP
1966 case ODP_FLOW_FLUSH:
1967 /* Ioctls that don't need any translation at all. */
1968 return openvswitch_ioctl(f, cmd, argp);
1969
d6569377
BP
1970 case ODP_FLOW_NEW:
1971 case ODP_FLOW_DEL:
1972 case ODP_FLOW_GET:
1973 case ODP_FLOW_SET:
1974 case ODP_FLOW_DUMP:
3fbd517a
BP
1975 /* Ioctls that just need their pointer argument extended. */
1976 return openvswitch_ioctl(f, cmd, (unsigned long)compat_ptr(argp));
3fbd517a 1977
3fbd517a 1978 default:
9c52546b 1979 return -ENOIOCTLCMD;
3fbd517a 1980 }
3fbd517a
BP
1981}
1982#endif
1983
982b8810
BP
1984static struct file_operations openvswitch_fops = {
1985 .owner = THIS_MODULE,
1986 .unlocked_ioctl = openvswitch_ioctl,
1987#ifdef CONFIG_COMPAT
1988 .compat_ioctl = openvswitch_compat_ioctl,
1989#endif
1990};
ed099e92 1991
982b8810 1992static int major;
ed099e92 1993
982b8810
BP
1994struct genl_family_and_ops {
1995 struct genl_family *family;
1996 struct genl_ops *ops;
1997 int n_ops;
1998 struct genl_multicast_group *group;
1999};
ed099e92 2000
982b8810 2001static const struct genl_family_and_ops dp_genl_families[] = {
aaff4b55
BP
2002 { &dp_datapath_genl_family,
2003 dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
2004 &dp_datapath_multicast_group },
f0fef760
BP
2005 { &dp_vport_genl_family,
2006 dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
2007 &dp_vport_multicast_group },
982b8810
BP
2008 { &dp_packet_genl_family,
2009 dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
2010 NULL },
2011};
ed099e92 2012
982b8810
BP
2013static void dp_unregister_genl(int n_families)
2014{
2015 int i;
ed099e92 2016
982b8810
BP
2017 for (i = 0; i < n_families; i++) {
2018 genl_unregister_family(dp_genl_families[i].family);
2019 }
ed099e92
BP
2020}
2021
982b8810 2022static int dp_register_genl(void)
064af421 2023{
982b8810
BP
2024 int n_registered;
2025 int err;
2026 int i;
064af421 2027
982b8810
BP
2028 n_registered = 0;
2029 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2030 const struct genl_family_and_ops *f = &dp_genl_families[i];
064af421 2031
982b8810
BP
2032 err = genl_register_family_with_ops(f->family, f->ops,
2033 f->n_ops);
2034 if (err)
2035 goto error;
2036 n_registered++;
e22d4953 2037
982b8810
BP
2038 if (f->group) {
2039 err = genl_register_mc_group(f->family, f->group);
2040 if (err)
2041 goto error;
2042 }
2043 }
9cc8b4e4 2044
982b8810
BP
2045 err = packet_register_mc_groups();
2046 if (err)
2047 goto error;
2048 return 0;
064af421
BP
2049
2050error:
982b8810
BP
2051 dp_unregister_genl(n_registered);
2052 return err;
064af421
BP
2053}
2054
22d24ebf
BP
2055static int __init dp_init(void)
2056{
f2459fe7 2057 struct sk_buff *dummy_skb;
22d24ebf
BP
2058 int err;
2059
f2459fe7 2060 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
22d24ebf 2061
f2459fe7 2062 printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
064af421
BP
2063
2064 err = flow_init();
2065 if (err)
2066 goto error;
2067
f2459fe7 2068 err = vport_init();
064af421
BP
2069 if (err)
2070 goto error_flow_exit;
2071
f2459fe7
JG
2072 err = register_netdevice_notifier(&dp_device_notifier);
2073 if (err)
2074 goto error_vport_exit;
2075
064af421
BP
2076 major = register_chrdev(0, "openvswitch", &openvswitch_fops);
2077 if (err < 0)
2078 goto error_unreg_notifier;
2079
982b8810
BP
2080 err = dp_register_genl();
2081 if (err < 0)
2082 goto error_unreg_chrdev;
2083
064af421
BP
2084 return 0;
2085
982b8810
BP
2086error_unreg_chrdev:
2087 unregister_chrdev(major, "openvswitch");
064af421
BP
2088error_unreg_notifier:
2089 unregister_netdevice_notifier(&dp_device_notifier);
f2459fe7
JG
2090error_vport_exit:
2091 vport_exit();
064af421
BP
2092error_flow_exit:
2093 flow_exit();
2094error:
2095 return err;
2096}
2097
2098static void dp_cleanup(void)
2099{
2100 rcu_barrier();
982b8810 2101 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
064af421
BP
2102 unregister_chrdev(major, "openvswitch");
2103 unregister_netdevice_notifier(&dp_device_notifier);
f2459fe7 2104 vport_exit();
064af421 2105 flow_exit();
064af421
BP
2106}
2107
2108module_init(dp_init);
2109module_exit(dp_cleanup);
2110
2111MODULE_DESCRIPTION("Open vSwitch switching datapath");
2112MODULE_LICENSE("GPL");